]> git.r.bdr.sh - rbdr/blog/commitdiff
Add tokenizer
authorRuben Beltran del Rio <redacted>
Mon, 26 Feb 2024 19:08:05 +0000 (19:08 +0000)
committerRuben Beltran del Rio <redacted>
Mon, 26 Feb 2024 19:08:05 +0000 (19:08 +0000)
14 files changed:
src/archiver/gemini.txt [new file with mode: 0644]
src/archiver/gopher.txt [new file with mode: 0644]
src/archiver/mod.rs [new file with mode: 0644]
src/command/generate.rs
src/gemini_parser.rs [new file with mode: 0644]
src/generator/html.rs [new file with mode: 0644]
src/generator/mod.rs [new file with mode: 0644]
src/generator/rss.rs [new file with mode: 0644]
src/generator/static_files.rs [new file with mode: 0644]
src/generator/txt.rs [new file with mode: 0644]
src/main.rs
src/post.rs [new file with mode: 0644]
src/remote/mod.rs [new file with mode: 0644]
src/template.rs [new file with mode: 0644]

diff --git a/src/archiver/gemini.txt b/src/archiver/gemini.txt
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/archiver/gopher.txt b/src/archiver/gopher.txt
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/archiver/mod.rs b/src/archiver/mod.rs
new file mode 100644 (file)
index 0000000..e69de29
index fac73ff37de30f57234eadde8118755e2fc8031c..c8c567392ea63ef1f7ff2ab4b12bc10337b759a0 100644 (file)
@@ -1,5 +1,12 @@
-use std::io::Result;
+use std::fs::{create_dir_all, read_dir, remove_dir_all, File};
+use std::io::{Read, Result};
+use std::path::PathBuf;
 use crate::configuration::Configuration;
+use crate::constants::METADATA_FILENAME;
+use crate::gemini_parser::parse;
+use crate::generator::generate;
+use crate::metadata::Metadata;
+use crate::post::Post;
 
 pub struct Generate;
 
@@ -7,6 +14,53 @@ impl Generate {
     pub fn new() -> Self {
         Generate
     }
+
+    fn read_posts(&self, posts_directory: &PathBuf, max_posts: u8) -> Vec<Post> {
+        let mut posts = Vec::new();
+
+        for i in 0..max_posts - 1 {
+            let post_directory = posts_directory.join(i.to_string());
+            match self.read_post(&post_directory, i) {
+                Some(post) => posts.push(post),
+                None => continue
+            }
+        }
+
+        posts
+    }
+
+    fn find_blog_content(&self, post_directory: &PathBuf) -> Option<String> {
+        let entries = read_dir(&post_directory).ok()?;
+        for entry in entries.filter_map(Result::ok) {
+            let entry_path = entry.path();
+            match entry_path.extension() {
+                Some(extension) => {
+                    if extension == "gmi" {
+                        let mut file = File::open(entry_path).ok()?;
+                        let mut contents = String::new();
+                        file.read_to_string(&mut contents).ok()?;
+                        return Some(contents);
+                    }
+                },
+                None => continue
+            }
+        }
+        None
+    }
+
+    fn read_post(&self, post_directory: &PathBuf, index: u8) -> Option<Post> {
+        let metadata_path = post_directory.join(METADATA_FILENAME);
+        let metadata = Metadata::read_or_create(&metadata_path);
+        let raw = self.find_blog_content(&post_directory)?;
+        let html = parse(&raw);
+
+        Some(Post {
+            metadata,
+            index,
+            html,
+            raw
+        })
+    }
 }
 
 impl super::Command for Generate {
@@ -14,8 +68,20 @@ impl super::Command for Generate {
         vec![]
     }
 
-    fn execute(&self, input: Option<&String>, _: &Configuration, _: &String) -> Result<()> {
-        println!("GENERATE! {:?}", input);
+    fn execute(&self, _: Option<&String>, configuration: &Configuration, _: &String) -> Result<()> {
+        let _ = remove_dir_all(&configuration.blog_output_directory);
+        create_dir_all(&configuration.blog_output_directory)?;
+
+        let posts = self.read_posts(&configuration.posts_directory, configuration.max_posts);
+        generate(
+            &configuration.static_directory,
+            &configuration.templates_directory,
+            &configuration.blog_output_directory,
+            &posts
+        )?;
+
+        let _ = remove_dir_all(&configuration.archive_output_directory);
+        create_dir_all(&configuration.archive_output_directory)?;
         return Ok(())
     }
 
diff --git a/src/gemini_parser.rs b/src/gemini_parser.rs
new file mode 100644 (file)
index 0000000..3414ea7
--- /dev/null
@@ -0,0 +1,225 @@
+// TAKEN FROM PAGE. Need to move to a common source.
+    pub fn parse(source: &str) -> String {
+
+    let lines = source.split("\n");
+    let mut is_preformatted = false;
+
+    let mut block_label: Option<String> = None;
+    let mut html: String = "".to_owned();
+    let mut current_line_type: Option<LineType> = None;
+
+    let mut heading_stack: Vec<u8> = Vec::new();
+    for line in lines {
+        let mut line_type = LineType::Blank;
+        if line.char_indices().count() > 2 {
+            let mut end = line.len();
+            if line.char_indices().count() > 3 {
+                end = line.char_indices().map(|(i, _)| i).nth(3).unwrap();
+            }
+            line_type = identify_line(&line[..end], is_preformatted);
+        }
+        match line_type {
+            LineType::PreformattedToggle => {
+                is_preformatted = !is_preformatted;
+                if is_preformatted && line.char_indices().count() > 3 {
+                    block_label = Some(get_partial_line_content(&line_type, line));
+                } else {
+                    block_label = None;
+                }
+            },
+            _ => {
+                // Close previous block if needed
+                if let Some(line) = &current_line_type {
+                    if line != &line_type && is_block(line) {
+                        html.push_str(get_line_closer(line));
+                    }
+                }
+
+                // Blocks
+                if is_block(&line_type) {
+                    if let Some(line) = &current_line_type {
+                        if line != &line_type  {
+                            html.push_str(&get_line_opener(&line_type, block_label.as_ref()));
+                        }
+                    } else {
+                        html.push_str(&get_line_opener(&line_type, None));
+                    }
+
+                    let line_content = get_partial_line_content(&line_type, line);
+                    html.push_str(&line_content);
+                } else {
+                    html.push_str(&get_heading_wrapper(&mut heading_stack, &line_type));
+                    html.push_str(&get_full_line_content(&line_type, line));
+                }
+                current_line_type = Some(line_type);
+            },
+        }
+    }
+    if let Some(line) = &current_line_type {
+        if is_block(line) {
+            html.push_str(get_line_closer(line));
+        }
+    }
+    html.push_str(&close_heading_wrapper(&mut heading_stack));
+    html
+}
+
+fn is_block(line_type: &LineType) -> bool {
+    return match line_type {
+        LineType::PreformattedText | LineType::ListItem | LineType::Quote => true,
+        _ => false,
+    }
+}
+
+fn get_partial_line_content(line_type: &LineType, line: &str) -> String {
+    let encoded_line = line.replace("<", "&lt;").replace(">", "&gt;");
+    return match line_type {
+        LineType::ListItem => format!("<li>{}</li>", encoded_line[2..].trim()),
+        LineType::Quote => encoded_line[1..].trim().to_string(),
+        LineType::PreformattedText => format!("{}\n", encoded_line),
+        LineType::PreformattedToggle => encoded_line[3..].trim().to_string(),
+        _ => "".to_string(),
+    }
+}
+
+fn get_full_line_content(line_type: &LineType, line: &str) -> String {
+    let encoded_line = line.replace("<", "&lt;").replace(">", "&gt;");
+     match line_type {
+        LineType::Text => format!("<p>{}</p>\n", encoded_line.trim()),
+        LineType::Blank => "<br>\n".to_string(),
+        LineType::Link => {
+            let url = get_link_address(line);
+            if url.starts_with("gemini:") {
+                format!("<div><a href=\"{}\">{}</a></div>\n", url, get_link_content(line))
+            } else {
+                format!("<div><a href=\"{}\">{}</a></div>\n", url.replace(".gmi", ".html"), get_link_content(line))
+            }
+        },
+        LineType::Heading1 => format!("<h1>{}</h1>\n", encoded_line[1..].trim()),
+        LineType::Heading2 => format!("<h2>{}</h2>\n", encoded_line[2..].trim()),
+        LineType::Heading3 => format!("<h3>{}</h3>\n", encoded_line[3..].trim()),
+        _ => "".to_string(),
+    }
+}
+
+fn get_heading_wrapper(heading_stack: &mut Vec<u8>, line_type: &LineType) -> String {
+    let mut string = String::new();
+    let current_heading: u8 = match line_type {
+        LineType::Heading1 => 1,
+        LineType::Heading2 => 2,
+        LineType::Heading3 => 3,
+        _ => 255
+    };
+
+    if current_heading < 255 {
+        while let Some(open_heading) = heading_stack.pop() {
+            // You just encountered a more important heading.
+            // Put it back. Desist.
+            if open_heading < current_heading {
+                heading_stack.push(open_heading);
+                break;
+            }
+
+            string.push_str("</div>");
+
+            if open_heading == current_heading {
+                break;
+            }
+        }
+        heading_stack.push(current_heading);
+        string.push_str(&format!("<div class=\"h{}\">", current_heading));
+    }
+
+    return string;
+}
+
+fn close_heading_wrapper(heading_stack: &mut Vec<u8>) -> String {
+    let mut string = String::new();
+    while let Some(_open_heading) = heading_stack.pop() {
+        string.push_str("</div>");
+    }
+    return string;
+}
+
+fn get_line_opener(line_type: &LineType, block_label: Option<&String>) -> String {
+    match line_type {
+        LineType::ListItem => "<ul>".to_string(),
+        LineType::Quote => "<blockquote>".to_string(),
+        LineType::PreformattedText => {
+            if let Some(label) = &block_label {
+                return format!("<pre role=\"img\" aria-label=\"{}\">", label);
+            } else {
+                return "<pre>".to_string();
+            }
+        },
+        _ => "".to_string(),
+    }
+}
+
+fn get_line_closer(line_type: &LineType) -> &'static str {
+    match line_type {
+        LineType::ListItem => "</ul>\n",
+        LineType::Quote => "</blockquote>\n",
+        LineType::PreformattedText => "</pre>\n",
+        _ => "",
+    }
+}
+
+fn get_link_content(line: &str) -> &str {
+    let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect();
+    if components.len() > 1 {
+        return components[1].trim()
+    }
+    components[0].trim()
+}
+
+fn get_link_address(line: &str) -> &str {
+    let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect();
+    components[0].trim()
+}
+
+fn identify_line(line: &str, is_preformatted: bool) -> LineType {
+    if line.starts_with("```") {
+        return LineType::PreformattedToggle;
+    }
+    if is_preformatted {
+        return LineType::PreformattedText;
+    }
+    if line.is_empty() {
+        return LineType::Blank;
+    }
+    if line.starts_with("=>") {
+        return LineType::Link;
+    }
+    if line.starts_with("* ") {
+        return LineType::ListItem;
+    }
+    if line.starts_with(">") {
+        return LineType::Quote;
+    }
+    if line.starts_with("###") {
+        return LineType::Heading3;
+    }
+    if line.starts_with("##") {
+        return LineType::Heading2;
+    }
+    if line.starts_with("#") {
+        return LineType::Heading1;
+    }
+
+    LineType::Text
+}
+
+#[derive(PartialEq, Eq)]
+enum LineType {
+    Text,
+    Blank,
+    Link,
+    PreformattedToggle,
+    PreformattedText,
+    Heading1,
+    Heading2,
+    Heading3,
+    ListItem,
+    Quote
+}
diff --git a/src/generator/html.rs b/src/generator/html.rs
new file mode 100644 (file)
index 0000000..0b258bf
--- /dev/null
@@ -0,0 +1,18 @@
+use std::io::Result;
+use std::path::PathBuf;
+use crate::post::Post;
+use crate::template::{find, parse};
+
+pub fn generate(_: &PathBuf, template_directory: &PathBuf, _: &PathBuf, _: &Vec<Post>) -> Result<()> {
+    println!("READING TEMP");
+    match find(template_directory, "index.html") {
+        Some(template) => {
+            let parsed_template = parse(&template);
+            for token in parsed_template.tokens {
+                println!("TOKEN {}", token);
+            }
+        },
+        None => {}
+    }
+    Ok(())
+}
diff --git a/src/generator/mod.rs b/src/generator/mod.rs
new file mode 100644 (file)
index 0000000..c4bf1e1
--- /dev/null
@@ -0,0 +1,26 @@
+mod static_files;
+mod html;
+mod rss;
+mod txt;
+
+use std::io::Result;
+use std::path::PathBuf;
+use crate::post::Post;
+
+pub fn generate(static_directory: &PathBuf, template_directory: &PathBuf, output_directory: &PathBuf, posts: &Vec<Post>) -> Result<()> {
+    let generators = available_generators();
+    for generator in generators {
+        generator(static_directory, template_directory, output_directory, posts)?;
+    }
+    Ok(())
+}
+
+
+fn available_generators() -> Vec<fn(&PathBuf, &PathBuf, &PathBuf, &Vec<Post>) -> Result<()>> {
+    vec![
+        static_files::generate,
+        html::generate,
+        rss::generate,
+        txt::generate
+    ]
+}
diff --git a/src/generator/rss.rs b/src/generator/rss.rs
new file mode 100644 (file)
index 0000000..dab7e50
--- /dev/null
@@ -0,0 +1,7 @@
+use std::io::Result;
+use std::path::PathBuf;
+use crate::post::Post;
+
+pub fn generate(_: &PathBuf, _: &PathBuf, _: &PathBuf, _: &Vec<Post>) -> Result<()> {
+    Ok(())
+}
diff --git a/src/generator/static_files.rs b/src/generator/static_files.rs
new file mode 100644 (file)
index 0000000..6804800
--- /dev/null
@@ -0,0 +1,31 @@
+use std::fs::{copy, create_dir_all, read_dir};
+use std::io::Result;
+use std::path::PathBuf;
+use crate::post::Post;
+
+fn recursively_copy(source: &PathBuf, target: &PathBuf) -> Result<()> {
+    let entries = read_dir(source)?;
+    for entry in entries {
+        let entry = entry?;
+        let entry_type = entry.file_type()?;
+        let entry_name = entry.file_name();
+        let entry_source = entry.path();
+        let entry_target = target.join(entry_name);
+
+        if entry_type.is_dir() {
+            create_dir_all(&entry_target)?;
+            recursively_copy(&entry_source, &entry_target)?;
+        } else {
+            copy(&entry_source, &entry_target)?;
+        }
+    }
+
+    Ok(())
+}
+
+pub fn generate(source: &PathBuf, _: &PathBuf, target: &PathBuf, _: &Vec<Post>) -> Result<()> {
+    if source.exists() {
+        return recursively_copy(source, target)
+    }
+    Ok(())
+}
diff --git a/src/generator/txt.rs b/src/generator/txt.rs
new file mode 100644 (file)
index 0000000..dab7e50
--- /dev/null
@@ -0,0 +1,7 @@
+use std::io::Result;
+use std::path::PathBuf;
+use crate::post::Post;
+
+pub fn generate(_: &PathBuf, _: &PathBuf, _: &PathBuf, _: &Vec<Post>) -> Result<()> {
+    Ok(())
+}
index 4cb8036f7fb92f803f007f05e0ee3ccf091c0ab5..0de896b45aa5a431b08d8733ad21971f4afd5d30 100644 (file)
@@ -2,7 +2,11 @@
 mod configuration;
 mod command;
 mod constants;
+mod gemini_parser;
+mod generator;
 mod metadata;
+mod post;
+mod template;
 
 use std::iter::once;
 use std::env::args;
diff --git a/src/post.rs b/src/post.rs
new file mode 100644 (file)
index 0000000..548a5cb
--- /dev/null
@@ -0,0 +1,8 @@
+use crate::metadata::Metadata;
+
+pub struct Post {
+    pub metadata: Metadata,
+    pub index: u8,
+    pub html: String,
+    pub raw: String
+}
diff --git a/src/remote/mod.rs b/src/remote/mod.rs
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/template.rs b/src/template.rs
new file mode 100644 (file)
index 0000000..37906ed
--- /dev/null
@@ -0,0 +1,140 @@
+use std::fs::File;
+use std::path::PathBuf;
+use std::io::Read;
+
+const TXT_TEMPLATE: &'static str = include_str!("../templates/index.txt");
+const HTML_TEMPLATE: &'static str = include_str!("../templates/index.html");
+const GMI_TEMPLATE: &'static str = include_str!("../templates/index.gmi");
+const RSS_TEMPLATE: &'static str = include_str!("../templates/feed.xml");
+
+// Parse and Render
+
+pub enum Token {
+    Text(String),
+    DisplayDirective { content: String },
+    ConditionalDirective { condition: String, children: Vec<Token>},
+    IteratorDirective { collection: String, member_label: String, children: Vec<Token> }
+}
+
+impl std::fmt::Display for Token {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match self {
+            Token::Text(label) => write!(f, "Text {}", label),
+            Token::DisplayDirective{content} => write!(f, "DisplayDirective {}", content),
+            Token::ConditionalDirective{condition, children} => {
+                write!(f, "ConditionalDirective {} [[[\n", condition)?;
+                for child in children {
+                    write!(f, "\t{}\n", child)?;
+                }
+                write!(f, "\n]]]")
+            },
+            Token::IteratorDirective{collection, member_label, children} => {
+                write!(f, "IteratorDirective {}: {} [[[\n", collection, member_label)?;
+                for child in children {
+                    write!(f, "\t{}\n", child)?;
+                }
+                write!(f, "\n]]]")
+            },
+        }
+    }
+}
+
+pub struct ParsedTemplate {
+    pub tokens: Vec<Token>
+}
+
+pub fn parse(template: &str) -> ParsedTemplate {
+    let mut tokens = Vec::new();
+    tokenize(template, &mut tokens);
+    ParsedTemplate {
+        tokens
+    }
+}
+
+fn tokenize(template: &str, tokens: &mut Vec<Token>) {
+    let mut remaining_template = template;
+
+    while !remaining_template.is_empty() && remaining_template.contains("{{") {
+        let directive_start_index = remaining_template.find("{{")
+            .expect("Was expecting at least one tag opener");
+        if directive_start_index > 0 {
+            let text = remaining_template[..directive_start_index].to_string();
+            tokens.push(Token::Text(text.to_string()));
+        }
+        remaining_template = &remaining_template[directive_start_index..];
+
+        let directive_end_index = remaining_template.find("}}")
+            .expect("Was expecting }} after {{") + 2;
+        let directive = &remaining_template[..directive_end_index];
+        remaining_template = &remaining_template[directive_end_index..];
+
+        let directive_type = directive.chars().nth(2).unwrap();
+        match directive_type {
+            // Simple Directives
+            '=' => {
+                let content = directive[3..directive.len() - 2].trim();
+                tokens.push(Token::DisplayDirective{
+                    content: content.to_string()
+                });
+            },
+            // Block Directives
+            '?' | '~' => {
+                let content = directive[3..directive.len() - 2].trim();
+                let mut children = Vec::new();
+
+                match directive_type {
+                    '?' => {
+                        let closing_block = remaining_template.find("{{?}}").unwrap();
+                        let directive_block = &remaining_template[..closing_block];
+                        remaining_template = &remaining_template[closing_block + 5..];
+                        tokenize(directive_block, &mut children);
+                        tokens.push(Token::ConditionalDirective{
+                            condition: content.to_string(),
+                            children
+                        });
+                    },
+                    '~' => {
+                        let parts: Vec<_> = content.splitn(2, ':').collect();
+                        let closing_block = remaining_template.find("{{~}}").unwrap();
+                        let directive_block = &remaining_template[..closing_block];
+                        remaining_template = &remaining_template[closing_block + 5..];
+                        tokenize(directive_block, &mut children);
+                        if parts.len() == 2 {
+                            tokens.push(Token::IteratorDirective {
+                                collection: parts[0].trim().to_string(),
+                                member_label: parts[1].trim().to_string(),
+                                children
+                            });
+                        }
+                    },
+                    _ => unreachable!()
+                }
+            },
+            _ => unreachable!()
+        }
+    }
+    tokens.push(Token::Text(remaining_template.to_string()));
+}
+
+// File helpers.
+
+pub fn find(template_directory: &PathBuf, filename: &str) -> Option<String> {
+    let template_path = template_directory.join(filename);
+    if template_path.exists() {
+        let mut contents = String::new();
+        if File::open(template_path).ok()?.read_to_string(&mut contents).is_ok() {
+            return Some(contents);
+        }
+    }
+    find_default(filename)
+}
+
+fn find_default(filename: &str) -> Option<String> {
+    match filename {
+        "index.txt" => Some(TXT_TEMPLATE.to_string()),
+        "index.html" => Some(HTML_TEMPLATE.to_string()),
+        "index.gmi" => Some(GMI_TEMPLATE.to_string()),
+        "index.rss" => Some(RSS_TEMPLATE.to_string()),
+        &_ => None
+    }
+}