]> git.r.bdr.sh - rbdr/blog/blobdiff - src/gemini_parser.rs
Add tokenizer
[rbdr/blog] / src / gemini_parser.rs
diff --git a/src/gemini_parser.rs b/src/gemini_parser.rs
new file mode 100644 (file)
index 0000000..3414ea7
--- /dev/null
@@ -0,0 +1,225 @@
+// TAKEN FROM PAGE. Need to move to a common source.
+    pub fn parse(source: &str) -> String {
+
+    let lines = source.split("\n");
+    let mut is_preformatted = false;
+
+    let mut block_label: Option<String> = None;
+    let mut html: String = "".to_owned();
+    let mut current_line_type: Option<LineType> = None;
+
+    let mut heading_stack: Vec<u8> = Vec::new();
+    for line in lines {
+        let mut line_type = LineType::Blank;
+        if line.char_indices().count() > 2 {
+            let mut end = line.len();
+            if line.char_indices().count() > 3 {
+                end = line.char_indices().map(|(i, _)| i).nth(3).unwrap();
+            }
+            line_type = identify_line(&line[..end], is_preformatted);
+        }
+        match line_type {
+            LineType::PreformattedToggle => {
+                is_preformatted = !is_preformatted;
+                if is_preformatted && line.char_indices().count() > 3 {
+                    block_label = Some(get_partial_line_content(&line_type, line));
+                } else {
+                    block_label = None;
+                }
+            },
+            _ => {
+                // Close previous block if needed
+                if let Some(line) = &current_line_type {
+                    if line != &line_type && is_block(line) {
+                        html.push_str(get_line_closer(line));
+                    }
+                }
+
+                // Blocks
+                if is_block(&line_type) {
+                    if let Some(line) = &current_line_type {
+                        if line != &line_type  {
+                            html.push_str(&get_line_opener(&line_type, block_label.as_ref()));
+                        }
+                    } else {
+                        html.push_str(&get_line_opener(&line_type, None));
+                    }
+
+                    let line_content = get_partial_line_content(&line_type, line);
+                    html.push_str(&line_content);
+                } else {
+                    html.push_str(&get_heading_wrapper(&mut heading_stack, &line_type));
+                    html.push_str(&get_full_line_content(&line_type, line));
+                }
+                current_line_type = Some(line_type);
+            },
+        }
+    }
+    if let Some(line) = &current_line_type {
+        if is_block(line) {
+            html.push_str(get_line_closer(line));
+        }
+    }
+    html.push_str(&close_heading_wrapper(&mut heading_stack));
+    html
+}
+
+fn is_block(line_type: &LineType) -> bool {
+    return match line_type {
+        LineType::PreformattedText | LineType::ListItem | LineType::Quote => true,
+        _ => false,
+    }
+}
+
+fn get_partial_line_content(line_type: &LineType, line: &str) -> String {
+    let encoded_line = line.replace("<", "&lt;").replace(">", "&gt;");
+    return match line_type {
+        LineType::ListItem => format!("<li>{}</li>", encoded_line[2..].trim()),
+        LineType::Quote => encoded_line[1..].trim().to_string(),
+        LineType::PreformattedText => format!("{}\n", encoded_line),
+        LineType::PreformattedToggle => encoded_line[3..].trim().to_string(),
+        _ => "".to_string(),
+    }
+}
+
+fn get_full_line_content(line_type: &LineType, line: &str) -> String {
+    let encoded_line = line.replace("<", "&lt;").replace(">", "&gt;");
+     match line_type {
+        LineType::Text => format!("<p>{}</p>\n", encoded_line.trim()),
+        LineType::Blank => "<br>\n".to_string(),
+        LineType::Link => {
+            let url = get_link_address(line);
+            if url.starts_with("gemini:") {
+                format!("<div><a href=\"{}\">{}</a></div>\n", url, get_link_content(line))
+            } else {
+                format!("<div><a href=\"{}\">{}</a></div>\n", url.replace(".gmi", ".html"), get_link_content(line))
+            }
+        },
+        LineType::Heading1 => format!("<h1>{}</h1>\n", encoded_line[1..].trim()),
+        LineType::Heading2 => format!("<h2>{}</h2>\n", encoded_line[2..].trim()),
+        LineType::Heading3 => format!("<h3>{}</h3>\n", encoded_line[3..].trim()),
+        _ => "".to_string(),
+    }
+}
+
+fn get_heading_wrapper(heading_stack: &mut Vec<u8>, line_type: &LineType) -> String {
+    let mut string = String::new();
+    let current_heading: u8 = match line_type {
+        LineType::Heading1 => 1,
+        LineType::Heading2 => 2,
+        LineType::Heading3 => 3,
+        _ => 255
+    };
+
+    if current_heading < 255 {
+        while let Some(open_heading) = heading_stack.pop() {
+            // You just encountered a more important heading.
+            // Put it back. Desist.
+            if open_heading < current_heading {
+                heading_stack.push(open_heading);
+                break;
+            }
+
+            string.push_str("</div>");
+
+            if open_heading == current_heading {
+                break;
+            }
+        }
+        heading_stack.push(current_heading);
+        string.push_str(&format!("<div class=\"h{}\">", current_heading));
+    }
+
+    return string;
+}
+
+fn close_heading_wrapper(heading_stack: &mut Vec<u8>) -> String {
+    let mut string = String::new();
+    while let Some(_open_heading) = heading_stack.pop() {
+        string.push_str("</div>");
+    }
+    return string;
+}
+
+fn get_line_opener(line_type: &LineType, block_label: Option<&String>) -> String {
+    match line_type {
+        LineType::ListItem => "<ul>".to_string(),
+        LineType::Quote => "<blockquote>".to_string(),
+        LineType::PreformattedText => {
+            if let Some(label) = &block_label {
+                return format!("<pre role=\"img\" aria-label=\"{}\">", label);
+            } else {
+                return "<pre>".to_string();
+            }
+        },
+        _ => "".to_string(),
+    }
+}
+
+fn get_line_closer(line_type: &LineType) -> &'static str {
+    match line_type {
+        LineType::ListItem => "</ul>\n",
+        LineType::Quote => "</blockquote>\n",
+        LineType::PreformattedText => "</pre>\n",
+        _ => "",
+    }
+}
+
+fn get_link_content(line: &str) -> &str {
+    let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect();
+    if components.len() > 1 {
+        return components[1].trim()
+    }
+    components[0].trim()
+}
+
+fn get_link_address(line: &str) -> &str {
+    let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect();
+    components[0].trim()
+}
+
+fn identify_line(line: &str, is_preformatted: bool) -> LineType {
+    if line.starts_with("```") {
+        return LineType::PreformattedToggle;
+    }
+    if is_preformatted {
+        return LineType::PreformattedText;
+    }
+    if line.is_empty() {
+        return LineType::Blank;
+    }
+    if line.starts_with("=>") {
+        return LineType::Link;
+    }
+    if line.starts_with("* ") {
+        return LineType::ListItem;
+    }
+    if line.starts_with(">") {
+        return LineType::Quote;
+    }
+    if line.starts_with("###") {
+        return LineType::Heading3;
+    }
+    if line.starts_with("##") {
+        return LineType::Heading2;
+    }
+    if line.starts_with("#") {
+        return LineType::Heading1;
+    }
+
+    LineType::Text
+}
+
+#[derive(PartialEq, Eq)]
+enum LineType {
+    Text,
+    Blank,
+    Link,
+    PreformattedToggle,
+    PreformattedText,
+    Heading1,
+    Heading2,
+    Heading3,
+    ListItem,
+    Quote
+}