]> git.r.bdr.sh - rbdr/page/blobdiff - src/gemini_parser.rs
Format and lint the code
[rbdr/page] / src / gemini_parser.rs
index e63ebd30344dccdfe22e06eaeba3f0cb4fefd2de..e6de4eceeeccc6425b133bb8b5ce9beec573c414 100644 (file)
-pub fn parse(source: &str) -> String {
-
-    let lines = source.split("\n");
-    let mut is_preformatted = false;
+#[derive(PartialEq, Eq, Debug)]
+pub enum GeminiLine {
+    Text(String, bool),
+    PreformattedToggle(bool, String),
+    Heading(u8, String),
+    Link(String, String),
+    Quote(String),
+    ListItem(String),
+}
 
-    let mut html:String = "".to_owned();
-    let mut current_line_type: Option<LineType> = None;
+/// Parses gemtext source code into a vector of GeminiLine elements.
+///
+/// # Arguments
+/// * `source` - A string slice that contains the gemtext
+///
+/// # Returns
+/// A `Vec<GeminiLine>` containing the rendered HTML.
+pub fn parse(source: &str) -> Vec<GeminiLine> {
+    source
+        .lines()
+        .fold((Vec::new(), false), |(mut lines, is_preformatted), line| {
+            let parsed = if is_preformatted {
+                parse_preformatted_line(line)
+            } else {
+                parse_line(line)
+            };
 
-    let mut heading_stack: Vec<u8> = Vec::new();
-    for line in lines {
-        let mut line_type = LineType::Blank;
-        if line.char_indices().count() > 2 {
-            let mut end = line.len();
-            if line.char_indices().count() > 3 {
-                end = line.char_indices().map(|(i, _)| i).nth(3).unwrap();
-            }
-            line_type = identify_line(&line[..end], is_preformatted);
-        }
-        match line_type {
-            LineType::PreformattedToggle => is_preformatted = !is_preformatted,
-            _ => {
-                // Close previous block if needed
-                if let Some(line) = &current_line_type {
-                    if line != &line_type && is_block(line) {
-                        html.push_str(get_line_closer(line));
-                    }
-                }
+            let new_is_preformatted = match parsed {
+                GeminiLine::PreformattedToggle(x, _) => x,
+                _ => is_preformatted,
+            };
 
-                // Blocks
-                if is_block(&line_type) {
-                    if let Some(line) = &current_line_type {
-                        if line != &line_type  {
-                            html.push_str(get_line_opener(&line_type));
-                        }
-                    } else {
-                        html.push_str(get_line_opener(&line_type));
-                    }
-
-                    let line_content = get_partial_line_content(&line_type, line);
-                    html.push_str(&line_content);
-                } else {
-                    html.push_str(&get_heading_wrapper(&mut heading_stack, &line_type));
-                    html.push_str(&get_full_line_content(&line_type, line));
-                }
-                current_line_type = Some(line_type);
-            },
-        }
-    }
-    if let Some(line) = &current_line_type {
-        if is_block(line) {
-            html.push_str(get_line_closer(line));
-        }
-    }
-    html.push_str(&close_heading_wrapper(&mut heading_stack));
-    html
+            lines.push(parsed);
+            (lines, new_is_preformatted)
+        })
+        .0
 }
 
-fn is_block(line_type: &LineType) -> bool {
-    return match line_type {
-        LineType::PreformattedText | LineType::ListItem | LineType::Quote => true,
-        _ => false,
+fn parse_preformatted_line(line: &str) -> GeminiLine {
+    match line {
+        s if s.starts_with("```") => GeminiLine::PreformattedToggle(false, String::new()),
+        _ => GeminiLine::Text(line.to_string(), true),
     }
 }
 
-fn get_partial_line_content(line_type: &LineType, line: &str) -> String {
-    let encoded_line = line.replace("<", "&lt;").replace(">", "&gt;");
-    return match line_type {
-        LineType::ListItem => format!("<li>{}</li>", encoded_line[2..].trim()),
-        LineType::Quote => encoded_line[1..].trim().to_string(),
-        LineType::PreformattedText => format!("{}\n", encoded_line),
-        _ => "".to_string(),
-    }
-}
-
-fn get_full_line_content(line_type: &LineType, line: &str) -> String {
-    let encoded_line = line.replace("<", "&lt;").replace(">", "&gt;");
-     match line_type {
-        LineType::Text => format!("<p>{}</p>\n", encoded_line.trim()),
-        LineType::Blank => "<br/>\n".to_string(),
-        LineType::Link => {
-            let url = get_link_address(line);
-            if url.starts_with("gemini:") {
-                format!("<div><a href=\"{}\">{}</a></div>\n", url, get_link_content(line))
-            } else {
-                format!("<div><a href=\"{}\">{}</a></div>\n", url.replace(".gmi", ".html"), get_link_content(line))
+fn parse_line(line: &str) -> GeminiLine {
+    match line {
+        s if s.starts_with("###") => GeminiLine::Heading(3, s[3..].to_string()),
+        s if s.starts_with("##") => GeminiLine::Heading(2, s[2..].to_string()),
+        s if s.starts_with("#") => GeminiLine::Heading(1, s[1..].to_string()),
+        s if s.starts_with("=>") => {
+            let content = s[2..].trim();
+            match content.split_once(char::is_whitespace) {
+                Some((url, text)) => {
+                    GeminiLine::Link(url.trim().to_string(), text.trim().to_string())
+                }
+                None => GeminiLine::Link(content.trim().to_string(), String::new()),
             }
-        },
-        LineType::Heading1 => format!("<h1>{}</h1>\n", encoded_line[1..].trim()),
-        LineType::Heading2 => format!("<h2>{}</h2>\n", encoded_line[2..].trim()),
-        LineType::Heading3 => format!("<h3>{}</h3>\n", encoded_line[3..].trim()),
-        _ => "".to_string(),
+        }
+        s if s.starts_with("* ") => GeminiLine::ListItem(s[2..].to_string()),
+        s if s.starts_with(">") => GeminiLine::Quote(s[1..].to_string()),
+        s if s.starts_with("```") => GeminiLine::PreformattedToggle(true, s[3..].to_string()),
+        _ => GeminiLine::Text(line.to_string(), false),
     }
 }
 
-fn get_heading_wrapper(heading_stack: &mut Vec<u8>, line_type: &LineType) -> String {
-    let mut string = String::new();
-    let current_heading: u8 = match line_type {
-        LineType::Heading1 => 1,
-        LineType::Heading2 => 2,
-        LineType::Heading3 => 3,
-        _ => 255
-    };
-
-    if current_heading < 255 {
-        while let Some(open_heading) = heading_stack.pop() {
-            // You just encountered a more important heading.
-            // Put it back. Desist.
-            if open_heading < current_heading {
-                heading_stack.push(open_heading);
-                break;
-            }
+#[cfg(test)]
+mod tests {
+    use super::*;
 
-            string.push_str("</div>");
+    #[test]
+    fn test_headings() {
+        assert_eq!(
+            parse_line("### Heading"),
+            GeminiLine::Heading(3, " Heading".to_string())
+        );
+        assert_eq!(
+            parse_line("## Heading"),
+            GeminiLine::Heading(2, " Heading".to_string())
+        );
+        assert_eq!(
+            parse_line("# Heading"),
+            GeminiLine::Heading(1, " Heading".to_string())
+        );
+        assert_eq!(parse_line("###"), GeminiLine::Heading(3, "".to_string()));
+        assert_eq!(
+            parse_line("#####"),
+            GeminiLine::Heading(3, "##".to_string())
+        );
+        assert_eq!(parse_line("# "), GeminiLine::Heading(1, " ".to_string()));
 
-            if open_heading == current_heading {
-                break;
-            }
-        }
-        heading_stack.push(current_heading);
-        string.push_str(&format!("<div class=\"h{}\">", current_heading));
+        assert_eq!(
+            parse_preformatted_line("### Heading"),
+            GeminiLine::Text("### Heading".to_string(), true)
+        );
+        assert_eq!(
+            parse_preformatted_line("## Heading"),
+            GeminiLine::Text("## Heading".to_string(), true)
+        );
+        assert_eq!(
+            parse_preformatted_line("# Heading"),
+            GeminiLine::Text("# Heading".to_string(), true)
+        );
     }
 
-    return string;
-}
+    #[test]
+    fn test_links() {
+        assert_eq!(
+            parse_line("=> https://example.com Link text"),
+            GeminiLine::Link("https://example.com".to_string(), "Link text".to_string())
+        );
+        assert_eq!(
+            parse_line("=> /local/path"),
+            GeminiLine::Link("/local/path".to_string(), "".to_string())
+        );
 
-fn close_heading_wrapper(heading_stack: &mut Vec<u8>) -> String {
-    let mut string = String::new();
-    while let Some(_open_heading) = heading_stack.pop() {
-        string.push_str("</div>");
-    }
-    return string;
-}
+        assert_eq!(
+            parse_line("=>"),
+            GeminiLine::Link("".to_string(), "".to_string())
+        );
+        assert_eq!(
+            parse_line("=> "),
+            GeminiLine::Link("".to_string(), "".to_string())
+        );
+        assert_eq!(
+            parse_line("=>  multiple    spaces  in   text"),
+            GeminiLine::Link("multiple".to_string(), "spaces  in   text".to_string())
+        );
 
-fn get_line_opener(line_type: &LineType) -> &'static str {
-    match line_type {
-        LineType::ListItem => "<ul>",
-        LineType::Quote => "<blockquote>",
-        LineType::PreformattedText => "<pre>",
-        _ => "",
+        assert_eq!(
+            parse_preformatted_line("=> https://example.com Link text"),
+            GeminiLine::Text("=> https://example.com Link text".to_string(), true)
+        );
     }
-}
 
-fn get_line_closer(line_type: &LineType) -> &'static str {
-    match line_type {
-        LineType::ListItem => "</ul>\n",
-        LineType::Quote => "</blockquote>\n",
-        LineType::PreformattedText => "</pre>\n",
-        _ => "",
-    }
-}
+    #[test]
+    fn test_list_items() {
+        assert_eq!(
+            parse_line("* List item"),
+            GeminiLine::ListItem("List item".to_string())
+        );
 
-fn get_link_content(line: &str) -> &str {
-    let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect();
-    if components.len() > 1 {
-        return components[1].trim()
+        assert_eq!(parse_line("* "), GeminiLine::ListItem("".to_string()));
+        assert_eq!(parse_line("*"), GeminiLine::Text("*".to_string(), false));
+        assert_eq!(
+            parse_line("*WithText"),
+            GeminiLine::Text("*WithText".to_string(), false)
+        );
+        assert_eq!(
+            parse_line("*  Multiple spaces"),
+            GeminiLine::ListItem(" Multiple spaces".to_string())
+        );
     }
-    components[0].trim()
-}
 
-fn get_link_address(line: &str) -> &str {
-    let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect();
-    components[0].trim()
-}
+    #[test]
+    fn test_quotes() {
+        assert_eq!(
+            parse_line(">Quote text"),
+            GeminiLine::Quote("Quote text".to_string())
+        );
 
-fn identify_line(line: &str, is_preformatted: bool) -> LineType {
-    if line.starts_with("```") {
-        return LineType::PreformattedToggle;
-    }
-    if is_preformatted {
-        return LineType::PreformattedText;
-    }
-    if line.is_empty() {
-        return LineType::Blank;
-    }
-    if line.starts_with("=>") {
-        return LineType::Link;
-    }
-    if line.starts_with("* ") {
-        return LineType::ListItem;
+        assert_eq!(parse_line(">"), GeminiLine::Quote("".to_string()));
+        assert_eq!(parse_line("> "), GeminiLine::Quote(" ".to_string()));
+        assert_eq!(
+            parse_line(">>Nested"),
+            GeminiLine::Quote(">Nested".to_string())
+        );
     }
-    if line.starts_with(">") {
-        return LineType::Quote;
-    }
-    if line.starts_with("###") {
-        return LineType::Heading3;
-    }
-    if line.starts_with("##") {
-        return LineType::Heading2;
+
+    #[test]
+    fn test_preformatted() {
+        assert_eq!(
+            parse_line("```alt-text"),
+            GeminiLine::PreformattedToggle(true, "alt-text".to_string())
+        );
+
+        assert_eq!(
+            parse_line("```"),
+            GeminiLine::PreformattedToggle(true, "".to_string())
+        );
+        assert_eq!(
+            parse_line("``` "),
+            GeminiLine::PreformattedToggle(true, " ".to_string())
+        );
+        assert_eq!(
+            parse_line("````"),
+            GeminiLine::PreformattedToggle(true, "`".to_string())
+        );
+
+        assert_eq!(
+            parse_preformatted_line("```alt-text"),
+            GeminiLine::PreformattedToggle(false, "".to_string())
+        );
+        assert_eq!(
+            parse_preformatted_line("```"),
+            GeminiLine::PreformattedToggle(false, "".to_string())
+        );
     }
-    if line.starts_with("#") {
-        return LineType::Heading1;
+
+    #[test]
+    fn test_text() {
+        // Normal case
+        assert_eq!(
+            parse_line("Regular text"),
+            GeminiLine::Text("Regular text".to_string(), false)
+        );
+
+        // Edge cases
+        assert_eq!(parse_line(""), GeminiLine::Text("".to_string(), false));
+        assert_eq!(parse_line(" "), GeminiLine::Text(" ".to_string(), false));
+        assert_eq!(parse_line("  "), GeminiLine::Text("  ".to_string(), false));
     }
 
-    LineType::Text
-}
+    #[test]
+    fn test_malformed_input() {
+        assert_eq!(
+            parse_line("= >Not a link"),
+            GeminiLine::Text("= >Not a link".to_string(), false)
+        );
+        assert_eq!(
+            parse_line("``Not preformatted"),
+            GeminiLine::Text("``Not preformatted".to_string(), false)
+        );
+        assert_eq!(
+            parse_line("** Not a list"),
+            GeminiLine::Text("** Not a list".to_string(), false)
+        );
+    }
 
-#[derive(PartialEq, Eq)]
-enum LineType {
-    Text,
-    Blank,
-    Link,
-    PreformattedToggle,
-    PreformattedText,
-    Heading1,
-    Heading2,
-    Heading3,
-    ListItem,
-    Quote
+    #[test]
+    fn test_full_document() {
+        let input = "\
+# Heading 1
+## Heading 2
+### Heading 3
+Regular text
+=> https://example.com Link text
+* List item
+>Quote
+```alt
+code
+# Heading 1
+## Heading 2
+### Heading 3
+=> https://example.com Link text
+* List item
+>Quote
+```trailing alt";
+        let result = parse(input);
+        assert_eq!(
+            result,
+            vec![
+                GeminiLine::Heading(1, " Heading 1".to_string()),
+                GeminiLine::Heading(2, " Heading 2".to_string()),
+                GeminiLine::Heading(3, " Heading 3".to_string()),
+                GeminiLine::Text("Regular text".to_string(), false),
+                GeminiLine::Link("https://example.com".to_string(), "Link text".to_string()),
+                GeminiLine::ListItem("List item".to_string()),
+                GeminiLine::Quote("Quote".to_string()),
+                GeminiLine::PreformattedToggle(true, "alt".to_string()),
+                GeminiLine::Text("code".to_string(), true),
+                GeminiLine::Text("# Heading 1".to_string(), true),
+                GeminiLine::Text("## Heading 2".to_string(), true),
+                GeminiLine::Text("### Heading 3".to_string(), true),
+                GeminiLine::Text("=> https://example.com Link text".to_string(), true),
+                GeminiLine::Text("* List item".to_string(), true),
+                GeminiLine::Text(">Quote".to_string(), true),
+                GeminiLine::PreformattedToggle(false, "".to_string()),
+            ]
+        );
+    }
 }