From: Ruben Beltran del Rio Date: Mon, 26 Feb 2024 19:08:05 +0000 (+0000) Subject: Add tokenizer X-Git-Tag: 7.0.0~45 X-Git-Url: https://git.r.bdr.sh/rbdr/blog/commitdiff_plain/2998247083406f914b3647cedd19abf5507bf2c6 Add tokenizer --- diff --git a/src/archiver/gemini.txt b/src/archiver/gemini.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/archiver/gopher.txt b/src/archiver/gopher.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/archiver/mod.rs b/src/archiver/mod.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/command/generate.rs b/src/command/generate.rs index fac73ff..c8c5673 100644 --- a/src/command/generate.rs +++ b/src/command/generate.rs @@ -1,5 +1,12 @@ -use std::io::Result; +use std::fs::{create_dir_all, read_dir, remove_dir_all, File}; +use std::io::{Read, Result}; +use std::path::PathBuf; use crate::configuration::Configuration; +use crate::constants::METADATA_FILENAME; +use crate::gemini_parser::parse; +use crate::generator::generate; +use crate::metadata::Metadata; +use crate::post::Post; pub struct Generate; @@ -7,6 +14,53 @@ impl Generate { pub fn new() -> Self { Generate } + + fn read_posts(&self, posts_directory: &PathBuf, max_posts: u8) -> Vec { + let mut posts = Vec::new(); + + for i in 0..max_posts - 1 { + let post_directory = posts_directory.join(i.to_string()); + match self.read_post(&post_directory, i) { + Some(post) => posts.push(post), + None => continue + } + } + + posts + } + + fn find_blog_content(&self, post_directory: &PathBuf) -> Option { + let entries = read_dir(&post_directory).ok()?; + for entry in entries.filter_map(Result::ok) { + let entry_path = entry.path(); + match entry_path.extension() { + Some(extension) => { + if extension == "gmi" { + let mut file = File::open(entry_path).ok()?; + let mut contents = String::new(); + file.read_to_string(&mut contents).ok()?; + return Some(contents); + } + }, + None => continue + } + } + None + } + + fn read_post(&self, post_directory: &PathBuf, index: u8) -> Option { + let metadata_path = post_directory.join(METADATA_FILENAME); + let metadata = Metadata::read_or_create(&metadata_path); + let raw = self.find_blog_content(&post_directory)?; + let html = parse(&raw); + + Some(Post { + metadata, + index, + html, + raw + }) + } } impl super::Command for Generate { @@ -14,8 +68,20 @@ impl super::Command for Generate { vec![] } - fn execute(&self, input: Option<&String>, _: &Configuration, _: &String) -> Result<()> { - println!("GENERATE! {:?}", input); + fn execute(&self, _: Option<&String>, configuration: &Configuration, _: &String) -> Result<()> { + let _ = remove_dir_all(&configuration.blog_output_directory); + create_dir_all(&configuration.blog_output_directory)?; + + let posts = self.read_posts(&configuration.posts_directory, configuration.max_posts); + generate( + &configuration.static_directory, + &configuration.templates_directory, + &configuration.blog_output_directory, + &posts + )?; + + let _ = remove_dir_all(&configuration.archive_output_directory); + create_dir_all(&configuration.archive_output_directory)?; return Ok(()) } diff --git a/src/gemini_parser.rs b/src/gemini_parser.rs new file mode 100644 index 0000000..3414ea7 --- /dev/null +++ b/src/gemini_parser.rs @@ -0,0 +1,225 @@ +// TAKEN FROM PAGE. Need to move to a common source. + pub fn parse(source: &str) -> String { + + let lines = source.split("\n"); + let mut is_preformatted = false; + + let mut block_label: Option = None; + let mut html: String = "".to_owned(); + let mut current_line_type: Option = None; + + let mut heading_stack: Vec = Vec::new(); + for line in lines { + let mut line_type = LineType::Blank; + if line.char_indices().count() > 2 { + let mut end = line.len(); + if line.char_indices().count() > 3 { + end = line.char_indices().map(|(i, _)| i).nth(3).unwrap(); + } + line_type = identify_line(&line[..end], is_preformatted); + } + match line_type { + LineType::PreformattedToggle => { + is_preformatted = !is_preformatted; + if is_preformatted && line.char_indices().count() > 3 { + block_label = Some(get_partial_line_content(&line_type, line)); + } else { + block_label = None; + } + }, + _ => { + // Close previous block if needed + if let Some(line) = ¤t_line_type { + if line != &line_type && is_block(line) { + html.push_str(get_line_closer(line)); + } + } + + // Blocks + if is_block(&line_type) { + if let Some(line) = ¤t_line_type { + if line != &line_type { + html.push_str(&get_line_opener(&line_type, block_label.as_ref())); + } + } else { + html.push_str(&get_line_opener(&line_type, None)); + } + + let line_content = get_partial_line_content(&line_type, line); + html.push_str(&line_content); + } else { + html.push_str(&get_heading_wrapper(&mut heading_stack, &line_type)); + html.push_str(&get_full_line_content(&line_type, line)); + } + current_line_type = Some(line_type); + }, + } + } + if let Some(line) = ¤t_line_type { + if is_block(line) { + html.push_str(get_line_closer(line)); + } + } + html.push_str(&close_heading_wrapper(&mut heading_stack)); + html +} + +fn is_block(line_type: &LineType) -> bool { + return match line_type { + LineType::PreformattedText | LineType::ListItem | LineType::Quote => true, + _ => false, + } +} + +fn get_partial_line_content(line_type: &LineType, line: &str) -> String { + let encoded_line = line.replace("<", "<").replace(">", ">"); + return match line_type { + LineType::ListItem => format!("
  • {}
  • ", encoded_line[2..].trim()), + LineType::Quote => encoded_line[1..].trim().to_string(), + LineType::PreformattedText => format!("{}\n", encoded_line), + LineType::PreformattedToggle => encoded_line[3..].trim().to_string(), + _ => "".to_string(), + } +} + +fn get_full_line_content(line_type: &LineType, line: &str) -> String { + let encoded_line = line.replace("<", "<").replace(">", ">"); + match line_type { + LineType::Text => format!("

    {}

    \n", encoded_line.trim()), + LineType::Blank => "
    \n".to_string(), + LineType::Link => { + let url = get_link_address(line); + if url.starts_with("gemini:") { + format!("\n", url, get_link_content(line)) + } else { + format!("\n", url.replace(".gmi", ".html"), get_link_content(line)) + } + }, + LineType::Heading1 => format!("

    {}

    \n", encoded_line[1..].trim()), + LineType::Heading2 => format!("

    {}

    \n", encoded_line[2..].trim()), + LineType::Heading3 => format!("

    {}

    \n", encoded_line[3..].trim()), + _ => "".to_string(), + } +} + +fn get_heading_wrapper(heading_stack: &mut Vec, line_type: &LineType) -> String { + let mut string = String::new(); + let current_heading: u8 = match line_type { + LineType::Heading1 => 1, + LineType::Heading2 => 2, + LineType::Heading3 => 3, + _ => 255 + }; + + if current_heading < 255 { + while let Some(open_heading) = heading_stack.pop() { + // You just encountered a more important heading. + // Put it back. Desist. + if open_heading < current_heading { + heading_stack.push(open_heading); + break; + } + + string.push_str(""); + + if open_heading == current_heading { + break; + } + } + heading_stack.push(current_heading); + string.push_str(&format!("
    ", current_heading)); + } + + return string; +} + +fn close_heading_wrapper(heading_stack: &mut Vec) -> String { + let mut string = String::new(); + while let Some(_open_heading) = heading_stack.pop() { + string.push_str("
    "); + } + return string; +} + +fn get_line_opener(line_type: &LineType, block_label: Option<&String>) -> String { + match line_type { + LineType::ListItem => "
      ".to_string(), + LineType::Quote => "
      ".to_string(), + LineType::PreformattedText => { + if let Some(label) = &block_label { + return format!("
      ", label);
      +            } else {
      +                return "
      ".to_string();
      +            }
      +        },
      +        _ => "".to_string(),
      +    }
      +}
      +
      +fn get_line_closer(line_type: &LineType) -> &'static str {
      +    match line_type {
      +        LineType::ListItem => "
    \n", + LineType::Quote => "\n", + LineType::PreformattedText => "\n", + _ => "", + } +} + +fn get_link_content(line: &str) -> &str { + let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect(); + if components.len() > 1 { + return components[1].trim() + } + components[0].trim() +} + +fn get_link_address(line: &str) -> &str { + let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect(); + components[0].trim() +} + +fn identify_line(line: &str, is_preformatted: bool) -> LineType { + if line.starts_with("```") { + return LineType::PreformattedToggle; + } + if is_preformatted { + return LineType::PreformattedText; + } + if line.is_empty() { + return LineType::Blank; + } + if line.starts_with("=>") { + return LineType::Link; + } + if line.starts_with("* ") { + return LineType::ListItem; + } + if line.starts_with(">") { + return LineType::Quote; + } + if line.starts_with("###") { + return LineType::Heading3; + } + if line.starts_with("##") { + return LineType::Heading2; + } + if line.starts_with("#") { + return LineType::Heading1; + } + + LineType::Text +} + +#[derive(PartialEq, Eq)] +enum LineType { + Text, + Blank, + Link, + PreformattedToggle, + PreformattedText, + Heading1, + Heading2, + Heading3, + ListItem, + Quote +} diff --git a/src/generator/html.rs b/src/generator/html.rs new file mode 100644 index 0000000..0b258bf --- /dev/null +++ b/src/generator/html.rs @@ -0,0 +1,18 @@ +use std::io::Result; +use std::path::PathBuf; +use crate::post::Post; +use crate::template::{find, parse}; + +pub fn generate(_: &PathBuf, template_directory: &PathBuf, _: &PathBuf, _: &Vec) -> Result<()> { + println!("READING TEMP"); + match find(template_directory, "index.html") { + Some(template) => { + let parsed_template = parse(&template); + for token in parsed_template.tokens { + println!("TOKEN {}", token); + } + }, + None => {} + } + Ok(()) +} diff --git a/src/generator/mod.rs b/src/generator/mod.rs new file mode 100644 index 0000000..c4bf1e1 --- /dev/null +++ b/src/generator/mod.rs @@ -0,0 +1,26 @@ +mod static_files; +mod html; +mod rss; +mod txt; + +use std::io::Result; +use std::path::PathBuf; +use crate::post::Post; + +pub fn generate(static_directory: &PathBuf, template_directory: &PathBuf, output_directory: &PathBuf, posts: &Vec) -> Result<()> { + let generators = available_generators(); + for generator in generators { + generator(static_directory, template_directory, output_directory, posts)?; + } + Ok(()) +} + + +fn available_generators() -> Vec) -> Result<()>> { + vec![ + static_files::generate, + html::generate, + rss::generate, + txt::generate + ] +} diff --git a/src/generator/rss.rs b/src/generator/rss.rs new file mode 100644 index 0000000..dab7e50 --- /dev/null +++ b/src/generator/rss.rs @@ -0,0 +1,7 @@ +use std::io::Result; +use std::path::PathBuf; +use crate::post::Post; + +pub fn generate(_: &PathBuf, _: &PathBuf, _: &PathBuf, _: &Vec) -> Result<()> { + Ok(()) +} diff --git a/src/generator/static_files.rs b/src/generator/static_files.rs new file mode 100644 index 0000000..6804800 --- /dev/null +++ b/src/generator/static_files.rs @@ -0,0 +1,31 @@ +use std::fs::{copy, create_dir_all, read_dir}; +use std::io::Result; +use std::path::PathBuf; +use crate::post::Post; + +fn recursively_copy(source: &PathBuf, target: &PathBuf) -> Result<()> { + let entries = read_dir(source)?; + for entry in entries { + let entry = entry?; + let entry_type = entry.file_type()?; + let entry_name = entry.file_name(); + let entry_source = entry.path(); + let entry_target = target.join(entry_name); + + if entry_type.is_dir() { + create_dir_all(&entry_target)?; + recursively_copy(&entry_source, &entry_target)?; + } else { + copy(&entry_source, &entry_target)?; + } + } + + Ok(()) +} + +pub fn generate(source: &PathBuf, _: &PathBuf, target: &PathBuf, _: &Vec) -> Result<()> { + if source.exists() { + return recursively_copy(source, target) + } + Ok(()) +} diff --git a/src/generator/txt.rs b/src/generator/txt.rs new file mode 100644 index 0000000..dab7e50 --- /dev/null +++ b/src/generator/txt.rs @@ -0,0 +1,7 @@ +use std::io::Result; +use std::path::PathBuf; +use crate::post::Post; + +pub fn generate(_: &PathBuf, _: &PathBuf, _: &PathBuf, _: &Vec) -> Result<()> { + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index 4cb8036..0de896b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,11 @@ mod configuration; mod command; mod constants; +mod gemini_parser; +mod generator; mod metadata; +mod post; +mod template; use std::iter::once; use std::env::args; diff --git a/src/post.rs b/src/post.rs new file mode 100644 index 0000000..548a5cb --- /dev/null +++ b/src/post.rs @@ -0,0 +1,8 @@ +use crate::metadata::Metadata; + +pub struct Post { + pub metadata: Metadata, + pub index: u8, + pub html: String, + pub raw: String +} diff --git a/src/remote/mod.rs b/src/remote/mod.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/template.rs b/src/template.rs new file mode 100644 index 0000000..37906ed --- /dev/null +++ b/src/template.rs @@ -0,0 +1,140 @@ +use std::fs::File; +use std::path::PathBuf; +use std::io::Read; + +const TXT_TEMPLATE: &'static str = include_str!("../templates/index.txt"); +const HTML_TEMPLATE: &'static str = include_str!("../templates/index.html"); +const GMI_TEMPLATE: &'static str = include_str!("../templates/index.gmi"); +const RSS_TEMPLATE: &'static str = include_str!("../templates/feed.xml"); + +// Parse and Render + +pub enum Token { + Text(String), + DisplayDirective { content: String }, + ConditionalDirective { condition: String, children: Vec}, + IteratorDirective { collection: String, member_label: String, children: Vec } +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Token::Text(label) => write!(f, "Text {}", label), + Token::DisplayDirective{content} => write!(f, "DisplayDirective {}", content), + Token::ConditionalDirective{condition, children} => { + write!(f, "ConditionalDirective {} [[[\n", condition)?; + for child in children { + write!(f, "\t{}\n", child)?; + } + write!(f, "\n]]]") + }, + Token::IteratorDirective{collection, member_label, children} => { + write!(f, "IteratorDirective {}: {} [[[\n", collection, member_label)?; + for child in children { + write!(f, "\t{}\n", child)?; + } + write!(f, "\n]]]") + }, + } + } +} + +pub struct ParsedTemplate { + pub tokens: Vec +} + +pub fn parse(template: &str) -> ParsedTemplate { + let mut tokens = Vec::new(); + tokenize(template, &mut tokens); + ParsedTemplate { + tokens + } +} + +fn tokenize(template: &str, tokens: &mut Vec) { + let mut remaining_template = template; + + while !remaining_template.is_empty() && remaining_template.contains("{{") { + let directive_start_index = remaining_template.find("{{") + .expect("Was expecting at least one tag opener"); + if directive_start_index > 0 { + let text = remaining_template[..directive_start_index].to_string(); + tokens.push(Token::Text(text.to_string())); + } + remaining_template = &remaining_template[directive_start_index..]; + + let directive_end_index = remaining_template.find("}}") + .expect("Was expecting }} after {{") + 2; + let directive = &remaining_template[..directive_end_index]; + remaining_template = &remaining_template[directive_end_index..]; + + let directive_type = directive.chars().nth(2).unwrap(); + match directive_type { + // Simple Directives + '=' => { + let content = directive[3..directive.len() - 2].trim(); + tokens.push(Token::DisplayDirective{ + content: content.to_string() + }); + }, + // Block Directives + '?' | '~' => { + let content = directive[3..directive.len() - 2].trim(); + let mut children = Vec::new(); + + match directive_type { + '?' => { + let closing_block = remaining_template.find("{{?}}").unwrap(); + let directive_block = &remaining_template[..closing_block]; + remaining_template = &remaining_template[closing_block + 5..]; + tokenize(directive_block, &mut children); + tokens.push(Token::ConditionalDirective{ + condition: content.to_string(), + children + }); + }, + '~' => { + let parts: Vec<_> = content.splitn(2, ':').collect(); + let closing_block = remaining_template.find("{{~}}").unwrap(); + let directive_block = &remaining_template[..closing_block]; + remaining_template = &remaining_template[closing_block + 5..]; + tokenize(directive_block, &mut children); + if parts.len() == 2 { + tokens.push(Token::IteratorDirective { + collection: parts[0].trim().to_string(), + member_label: parts[1].trim().to_string(), + children + }); + } + }, + _ => unreachable!() + } + }, + _ => unreachable!() + } + } + tokens.push(Token::Text(remaining_template.to_string())); +} + +// File helpers. + +pub fn find(template_directory: &PathBuf, filename: &str) -> Option { + let template_path = template_directory.join(filename); + if template_path.exists() { + let mut contents = String::new(); + if File::open(template_path).ok()?.read_to_string(&mut contents).is_ok() { + return Some(contents); + } + } + find_default(filename) +} + +fn find_default(filename: &str) -> Option { + match filename { + "index.txt" => Some(TXT_TEMPLATE.to_string()), + "index.html" => Some(HTML_TEMPLATE.to_string()), + "index.gmi" => Some(GMI_TEMPLATE.to_string()), + "index.rss" => Some(RSS_TEMPLATE.to_string()), + &_ => None + } +}