X-Git-Url: https://git.r.bdr.sh/rbdr/page/blobdiff_plain/f07392253d4db57964eae389dab219c963dd4c95..2cbae13cfd94f48dfe9a8c903e05aea49106b778:/src/gemini_parser.rs?ds=sidebyside

diff --git a/src/gemini_parser.rs b/src/gemini_parser.rs
index 14d74c6..418b758 100644
--- a/src/gemini_parser.rs
+++ b/src/gemini_parser.rs
@@ -1,224 +1,222 @@
-    pub fn parse(source: &str) -> String {
-
-    let lines = source.split("\n");
-    let mut is_preformatted = false;
-
-    let mut block_label: Option<String> = None;
-    let mut html: String = "".to_owned();
-    let mut current_line_type: Option<LineType> = None;
-
-    let mut heading_stack: Vec<u8> = Vec::new();
-    for line in lines {
-        let mut line_type = LineType::Blank;
-        if line.char_indices().count() > 2 {
-            let mut end = line.len();
-            if line.char_indices().count() > 3 {
-                end = line.char_indices().map(|(i, _)| i).nth(3).unwrap();
-            }
-            line_type = identify_line(&line[..end], is_preformatted);
-        }
-        match line_type {
-            LineType::PreformattedToggle => {
-                is_preformatted = !is_preformatted;
-                if is_preformatted && line.char_indices().count() > 3 {
-                    block_label = Some(get_partial_line_content(&line_type, line));
-                } else {
-                    block_label = None;
-                }
-            },
-            _ => {
-                // Close previous block if needed
-                if let Some(line) = &current_line_type {
-                    if line != &line_type && is_block(line) {
-                        html.push_str(get_line_closer(line));
-                    }
-                }
-
-                // Blocks
-                if is_block(&line_type) {
-                    if let Some(line) = &current_line_type {
-                        if line != &line_type  {
-                            html.push_str(&get_line_opener(&line_type, block_label.as_ref()));
-                        }
-                    } else {
-                        html.push_str(&get_line_opener(&line_type, None));
-                    }
-
-                    let line_content = get_partial_line_content(&line_type, line);
-                    html.push_str(&line_content);
-                } else {
-                    html.push_str(&get_heading_wrapper(&mut heading_stack, &line_type));
-                    html.push_str(&get_full_line_content(&line_type, line));
-                }
-                current_line_type = Some(line_type);
-            },
-        }
-    }
-    if let Some(line) = &current_line_type {
-        if is_block(line) {
-            html.push_str(get_line_closer(line));
-        }
-    }
-    html.push_str(&close_heading_wrapper(&mut heading_stack));
-    html
-}
-
-fn is_block(line_type: &LineType) -> bool {
-    return match line_type {
-        LineType::PreformattedText | LineType::ListItem | LineType::Quote => true,
-        _ => false,
-    }
-}
-
-fn get_partial_line_content(line_type: &LineType, line: &str) -> String {
-    let encoded_line = line.replace("<", "&lt;").replace(">", "&gt;");
-    return match line_type {
-        LineType::ListItem => format!("<li>{}</li>", encoded_line[2..].trim()),
-        LineType::Quote => encoded_line[1..].trim().to_string(),
-        LineType::PreformattedText => format!("{}\n", encoded_line),
-        LineType::PreformattedToggle => encoded_line[3..].trim().to_string(),
-        _ => "".to_string(),
-    }
-}
-
-fn get_full_line_content(line_type: &LineType, line: &str) -> String {
-    let encoded_line = line.replace("<", "&lt;").replace(">", "&gt;");
-     match line_type {
-        LineType::Text => format!("<p>{}</p>\n", encoded_line.trim()),
-        LineType::Blank => "<br/>\n".to_string(),
-        LineType::Link => {
-            let url = get_link_address(line);
-            if url.starts_with("gemini:") {
-                format!("<div><a href=\"{}\">{}</a></div>\n", url, get_link_content(line))
-            } else {
-                format!("<div><a href=\"{}\">{}</a></div>\n", url.replace(".gmi", ".html"), get_link_content(line))
-            }
-        },
-        LineType::Heading1 => format!("<h1>{}</h1>\n", encoded_line[1..].trim()),
-        LineType::Heading2 => format!("<h2>{}</h2>\n", encoded_line[2..].trim()),
-        LineType::Heading3 => format!("<h3>{}</h3>\n", encoded_line[3..].trim()),
-        _ => "".to_string(),
-    }
+#[derive(PartialEq, Eq, Debug)]
+pub enum GeminiLine {
+    Text(String, bool),
+    PreformattedToggle(bool, String),
+    Heading(u8, String),
+    Link(String, String),
+    Quote(String),
+    ListItem(String)
 }
 
-fn get_heading_wrapper(heading_stack: &mut Vec<u8>, line_type: &LineType) -> String {
-    let mut string = String::new();
-    let current_heading: u8 = match line_type {
-        LineType::Heading1 => 1,
-        LineType::Heading2 => 2,
-        LineType::Heading3 => 3,
-        _ => 255
-    };
-
-    if current_heading < 255 {
-        while let Some(open_heading) = heading_stack.pop() {
-            // You just encountered a more important heading.
-            // Put it back. Desist.
-            if open_heading < current_heading {
-                heading_stack.push(open_heading);
-                break;
-            }
+/// Parses gemtext source code into a vector of GeminiLine elements.
+/// 
+/// # Arguments
+/// * `source` - A string slice that contains the gemtext
+/// 
+/// # Returns
+/// A `Vec<GeminiLine>` containing the rendered HTML.
+pub fn parse(source: &str) -> Vec<GeminiLine> {
+    source.lines()
+        .fold(
+            (Vec::new(), false),
+            |(mut lines, is_preformatted), line| {
+                let parsed = if is_preformatted {
+                    parse_preformatted_line(line)
+                } else {
+                    parse_line(line)
+                };
 
-            string.push_str("</div>");
+                let new_is_preformatted = match parsed {
+                    GeminiLine::PreformattedToggle(x, _) => x,
+                    _ => is_preformatted
+                };
 
-            if open_heading == current_heading {
-                break;
+                lines.push(parsed);
+                (lines, new_is_preformatted)
             }
-        }
-        heading_stack.push(current_heading);
-        string.push_str(&format!("<div class=\"h{}\">", current_heading));
-    }
-
-    return string;
+        )
+        .0
 }
 
-fn close_heading_wrapper(heading_stack: &mut Vec<u8>) -> String {
-    let mut string = String::new();
-    while let Some(_open_heading) = heading_stack.pop() {
-        string.push_str("</div>");
+fn parse_preformatted_line(line: &str) -> GeminiLine {
+    match line {
+        s if s.starts_with("```") => GeminiLine::PreformattedToggle(false, String::new()),
+        _ => GeminiLine::Text(line.to_string(), true),
     }
-    return string;
 }
 
-fn get_line_opener(line_type: &LineType, block_label: Option<&String>) -> String {
-    match line_type {
-        LineType::ListItem => "<ul>".to_string(),
-        LineType::Quote => "<blockquote>".to_string(),
-        LineType::PreformattedText => {
-            if let Some(label) = &block_label {
-                return format!("<pre role=\"img\" aria-label=\"h{}\">", label);
-            } else {
-                return "<pre>".to_string();
+fn parse_line(line: &str) -> GeminiLine {
+    match line {
+        s if s.starts_with("###") => GeminiLine::Heading(3, s[3..].to_string()),
+        s if s.starts_with("##") => GeminiLine::Heading(2, s[2..].to_string()),
+        s if s.starts_with("#") => GeminiLine::Heading(1, s[1..].to_string()),
+        s if s.starts_with("=>") => {
+            let content = s[2..].trim();
+            match content.split_once(char::is_whitespace) {
+                Some((url, text)) => GeminiLine::Link(url.trim().to_string(), text.trim().to_string()),
+                None => GeminiLine::Link(content.trim().to_string(), String::new()),
             }
         },
-        _ => "".to_string(),
+        s if s.starts_with("* ") => GeminiLine::ListItem(s[2..].to_string()),
+        s if s.starts_with(">") => GeminiLine::Quote(s[1..].to_string()),
+        s if s.starts_with("```") => GeminiLine::PreformattedToggle(true, s[3..].to_string()),
+        _ => GeminiLine::Text(line.to_string(), false),
     }
 }
 
-fn get_line_closer(line_type: &LineType) -> &'static str {
-    match line_type {
-        LineType::ListItem => "</ul>\n",
-        LineType::Quote => "</blockquote>\n",
-        LineType::PreformattedText => "</pre>\n",
-        _ => "",
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_headings() {
+        assert_eq!(parse_line("### Heading"), GeminiLine::Heading(3, " Heading".to_string()));
+        assert_eq!(parse_line("## Heading"), GeminiLine::Heading(2, " Heading".to_string()));
+        assert_eq!(parse_line("# Heading"), GeminiLine::Heading(1, " Heading".to_string()));
+        assert_eq!(parse_line("###"), GeminiLine::Heading(3, "".to_string()));
+        assert_eq!(parse_line("#####"), GeminiLine::Heading(3, "##".to_string()));
+        assert_eq!(parse_line("# "), GeminiLine::Heading(1, " ".to_string()));
+
+        assert_eq!(parse_preformatted_line("### Heading"), GeminiLine::Text("### Heading".to_string(), true));
+        assert_eq!(parse_preformatted_line("## Heading"), GeminiLine::Text("## Heading".to_string(), true));
+        assert_eq!(parse_preformatted_line("# Heading"), GeminiLine::Text("# Heading".to_string(), true));
+    }
+
+    #[test]
+    fn test_links() {
+        assert_eq!(
+            parse_line("=> https://example.com Link text"),
+            GeminiLine::Link("https://example.com".to_string(), "Link text".to_string())
+        );
+        assert_eq!(
+            parse_line("=> /local/path"),
+            GeminiLine::Link("/local/path".to_string(), "".to_string())
+        );
+
+        assert_eq!(
+            parse_line("=>"),
+            GeminiLine::Link("".to_string(), "".to_string())
+        );
+        assert_eq!(
+            parse_line("=> "),
+            GeminiLine::Link("".to_string(), "".to_string())
+        );
+        assert_eq!(
+            parse_line("=>  multiple    spaces  in   text"),
+            GeminiLine::Link("multiple".to_string(), "spaces  in   text".to_string())
+        );
+
+        assert_eq!(
+            parse_preformatted_line("=> https://example.com Link text"),
+            GeminiLine::Text("=> https://example.com Link text".to_string(), true)
+        );
+    }
+
+    #[test]
+    fn test_list_items() {
+        assert_eq!(
+            parse_line("* List item"),
+            GeminiLine::ListItem("List item".to_string())
+        );
+
+        assert_eq!(parse_line("* "), GeminiLine::ListItem("".to_string()));
+        assert_eq!(parse_line("*"), GeminiLine::Text("*".to_string(), false));
+        assert_eq!(parse_line("*WithText"), GeminiLine::Text("*WithText".to_string(), false));
+        assert_eq!(parse_line("*  Multiple spaces"), GeminiLine::ListItem(" Multiple spaces".to_string()));
+    }
+
+    #[test]
+    fn test_quotes() {
+        assert_eq!(
+            parse_line(">Quote text"),
+            GeminiLine::Quote("Quote text".to_string())
+        );
+
+        assert_eq!(parse_line(">"), GeminiLine::Quote("".to_string()));
+        assert_eq!(parse_line("> "), GeminiLine::Quote(" ".to_string()));
+        assert_eq!(parse_line(">>Nested"), GeminiLine::Quote(">Nested".to_string()));
+    }
+
+    #[test]
+    fn test_preformatted() {
+        assert_eq!(
+            parse_line("```alt-text"),
+            GeminiLine::PreformattedToggle(true, "alt-text".to_string())
+        );
+
+        assert_eq!(parse_line("```"), GeminiLine::PreformattedToggle(true, "".to_string()));
+        assert_eq!(parse_line("``` "), GeminiLine::PreformattedToggle(true, " ".to_string()));
+        assert_eq!(parse_line("````"), GeminiLine::PreformattedToggle(true, "`".to_string()));
+
+        assert_eq!(
+            parse_preformatted_line("```alt-text"),
+            GeminiLine::PreformattedToggle(false, "".to_string())
+        );
+        assert_eq!(
+            parse_preformatted_line("```"),
+            GeminiLine::PreformattedToggle(false, "".to_string())
+        );
+
+    }
+
+    #[test]
+    fn test_text() {
+        // Normal case
+        assert_eq!(
+            parse_line("Regular text"),
+            GeminiLine::Text("Regular text".to_string(), false)
+        );
+
+        // Edge cases
+        assert_eq!(parse_line(""), GeminiLine::Text("".to_string(), false));
+        assert_eq!(parse_line(" "), GeminiLine::Text(" ".to_string(), false));
+        assert_eq!(parse_line("  "), GeminiLine::Text("  ".to_string(), false));
+    }
+
+    #[test]
+    fn test_malformed_input() {
+        assert_eq!(parse_line("= >Not a link"), GeminiLine::Text("= >Not a link".to_string(), false));
+        assert_eq!(parse_line("``Not preformatted"), GeminiLine::Text("``Not preformatted".to_string(), false));
+        assert_eq!(parse_line("** Not a list"), GeminiLine::Text("** Not a list".to_string(), false));
+    }
+
+    #[test]
+    fn test_full_document() {
+        let input = "\
+# Heading 1
+## Heading 2
+### Heading 3
+Regular text
+=> https://example.com Link text
+* List item
+>Quote
+```alt
+code
+# Heading 1
+## Heading 2
+### Heading 3
+=> https://example.com Link text
+* List item
+>Quote
+```trailing alt";
+        let result = parse(input);
+        assert_eq!(result, vec![
+            GeminiLine::Heading(1, " Heading 1".to_string()),
+            GeminiLine::Heading(2, " Heading 2".to_string()),
+            GeminiLine::Heading(3, " Heading 3".to_string()),
+            GeminiLine::Text("Regular text".to_string(), false),
+            GeminiLine::Link("https://example.com".to_string(), "Link text".to_string()),
+            GeminiLine::ListItem("List item".to_string()),
+            GeminiLine::Quote("Quote".to_string()),
+            GeminiLine::PreformattedToggle(true, "alt".to_string()),
+            GeminiLine::Text("code".to_string(), true),
+            GeminiLine::Text("# Heading 1".to_string(), true),
+            GeminiLine::Text("## Heading 2".to_string(), true),
+            GeminiLine::Text("### Heading 3".to_string(), true),
+            GeminiLine::Text("=> https://example.com Link text".to_string(), true),
+            GeminiLine::Text("* List item".to_string(), true),
+            GeminiLine::Text(">Quote".to_string(), true),
+            GeminiLine::PreformattedToggle(false, "".to_string()),
+        ]);
     }
 }
-
-fn get_link_content(line: &str) -> &str {
-    let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect();
-    if components.len() > 1 {
-        return components[1].trim()
-    }
-    components[0].trim()
-}
-
-fn get_link_address(line: &str) -> &str {
-    let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect();
-    components[0].trim()
-}
-
-fn identify_line(line: &str, is_preformatted: bool) -> LineType {
-    if line.starts_with("```") {
-        return LineType::PreformattedToggle;
-    }
-    if is_preformatted {
-        return LineType::PreformattedText;
-    }
-    if line.is_empty() {
-        return LineType::Blank;
-    }
-    if line.starts_with("=>") {
-        return LineType::Link;
-    }
-    if line.starts_with("* ") {
-        return LineType::ListItem;
-    }
-    if line.starts_with(">") {
-        return LineType::Quote;
-    }
-    if line.starts_with("###") {
-        return LineType::Heading3;
-    }
-    if line.starts_with("##") {
-        return LineType::Heading2;
-    }
-    if line.starts_with("#") {
-        return LineType::Heading1;
-    }
-
-    LineType::Text
-}
-
-#[derive(PartialEq, Eq)]
-enum LineType {
-    Text,
-    Blank,
-    Link,
-    PreformattedToggle,
-    PreformattedText,
-    Heading1,
-    Heading2,
-    Heading3,
-    ListItem,
-    Quote
-}