X-Git-Url: https://git.r.bdr.sh/rbdr/page/blobdiff_plain/fb0da1eec8c545c3010bc3e838a402ce6ebd31d5..4d946aebe3f70ad18e235d68474b6d489757c927:/src/gemini_parser.rs diff --git a/src/gemini_parser.rs b/src/gemini_parser.rs index a7c752e..f7aa0cc 100644 --- a/src/gemini_parser.rs +++ b/src/gemini_parser.rs @@ -1,224 +1,272 @@ - pub fn parse(source: &str) -> String { - - let lines = source.split("\n"); - let mut is_preformatted = false; - - let mut block_label: Option<String> = None; - let mut html: String = "".to_owned(); - let mut current_line_type: Option<LineType> = None; - - let mut heading_stack: Vec<u8> = Vec::new(); - for line in lines { - let mut line_type = LineType::Blank; - if line.char_indices().count() > 2 { - let mut end = line.len(); - if line.char_indices().count() > 3 { - end = line.char_indices().map(|(i, _)| i).nth(3).unwrap(); - } - line_type = identify_line(&line[..end], is_preformatted); - } - match line_type { - LineType::PreformattedToggle => { - is_preformatted = !is_preformatted; - if is_preformatted && line.char_indices().count() > 3 { - block_label = Some(get_partial_line_content(&line_type, line)); - } else { - block_label = None; - } - }, - _ => { - // Close previous block if needed - if let Some(line) = ¤t_line_type { - if line != &line_type && is_block(line) { - html.push_str(get_line_closer(line)); - } - } - - // Blocks - if is_block(&line_type) { - if let Some(line) = ¤t_line_type { - if line != &line_type { - html.push_str(&get_line_opener(&line_type, block_label.as_ref())); - } - } else { - html.push_str(&get_line_opener(&line_type, None)); - } - - let line_content = get_partial_line_content(&line_type, line); - html.push_str(&line_content); - } else { - html.push_str(&get_heading_wrapper(&mut heading_stack, &line_type)); - html.push_str(&get_full_line_content(&line_type, line)); - } - current_line_type = Some(line_type); - }, - } - } - if let Some(line) = ¤t_line_type { - if is_block(line) { - html.push_str(get_line_closer(line)); - } - } - html.push_str(&close_heading_wrapper(&mut heading_stack)); - html +#[derive(PartialEq, Eq, Debug)] +pub enum GeminiLine { + Text(String, bool), + PreformattedToggle(bool, String), + Heading(u8, String), + Link(String, String), + Quote(String), + ListItem(String), } -fn is_block(line_type: &LineType) -> bool { - return match line_type { - LineType::PreformattedText | LineType::ListItem | LineType::Quote => true, - _ => false, - } +/// Parses gemtext source code into a vector of `GeminiLine` elements. +/// +/// # Arguments +/// * `source` - A string slice that contains the gemtext +/// +/// # Returns +/// A `Vec<GeminiLine>` containing the rendered HTML. +pub fn parse(source: &str) -> Vec<GeminiLine> { + source + .lines() + .fold((Vec::new(), false), |(mut lines, is_preformatted), line| { + let parsed = if is_preformatted { + parse_preformatted_line(line) + } else { + parse_line(line) + }; + + let new_is_preformatted = match parsed { + GeminiLine::PreformattedToggle(x, _) => x, + _ => is_preformatted, + }; + + lines.push(parsed); + (lines, new_is_preformatted) + }) + .0 } -fn get_partial_line_content(line_type: &LineType, line: &str) -> String { - let encoded_line = line.replace("<", "<").replace(">", ">"); - return match line_type { - LineType::ListItem => format!("<li>{}</li>", encoded_line[2..].trim()), - LineType::Quote => encoded_line[1..].trim().to_string(), - LineType::PreformattedText => format!("{}\n", encoded_line), - LineType::PreformattedToggle => encoded_line[3..].trim().to_string(), - _ => "".to_string(), +fn parse_preformatted_line(line: &str) -> GeminiLine { + match line { + s if s.starts_with("```") => GeminiLine::PreformattedToggle(false, String::new()), + _ => GeminiLine::Text(line.to_string(), true), } } -fn get_full_line_content(line_type: &LineType, line: &str) -> String { - let encoded_line = line.replace("<", "<").replace(">", ">"); - match line_type { - LineType::Text => format!("<p>{}</p>\n", encoded_line.trim()), - LineType::Blank => "<br>\n".to_string(), - LineType::Link => { - let url = get_link_address(line); - if url.starts_with("gemini:") { - format!("<div><a href=\"{}\">{}</a></div>\n", url, get_link_content(line)) - } else { - format!("<div><a href=\"{}\">{}</a></div>\n", url.replace(".gmi", ".html"), get_link_content(line)) +fn parse_line(line: &str) -> GeminiLine { + match line { + s if s.starts_with("###") => GeminiLine::Heading(3, s[3..].to_string()), + s if s.starts_with("##") => GeminiLine::Heading(2, s[2..].to_string()), + s if s.starts_with('#') => GeminiLine::Heading(1, s[1..].to_string()), + s if s.starts_with("=>") => { + let content = s[2..].trim(); + match content.split_once(char::is_whitespace) { + Some((url, text)) => { + GeminiLine::Link(url.trim().to_string(), text.trim().to_string()) + } + None => GeminiLine::Link(content.trim().to_string(), String::new()), } - }, - LineType::Heading1 => format!("<h1>{}</h1>\n", encoded_line[1..].trim()), - LineType::Heading2 => format!("<h2>{}</h2>\n", encoded_line[2..].trim()), - LineType::Heading3 => format!("<h3>{}</h3>\n", encoded_line[3..].trim()), - _ => "".to_string(), + } + s if s.starts_with("* ") => GeminiLine::ListItem(s[2..].to_string()), + s if s.starts_with('>') => GeminiLine::Quote(s[1..].to_string()), + s if s.starts_with("```") => GeminiLine::PreformattedToggle(true, s[3..].to_string()), + _ => GeminiLine::Text(line.to_string(), false), } } -fn get_heading_wrapper(heading_stack: &mut Vec<u8>, line_type: &LineType) -> String { - let mut string = String::new(); - let current_heading: u8 = match line_type { - LineType::Heading1 => 1, - LineType::Heading2 => 2, - LineType::Heading3 => 3, - _ => 255 - }; - - if current_heading < 255 { - while let Some(open_heading) = heading_stack.pop() { - // You just encountered a more important heading. - // Put it back. Desist. - if open_heading < current_heading { - heading_stack.push(open_heading); - break; - } +#[cfg(test)] +mod tests { + use super::*; - string.push_str("</div>"); + #[test] + fn test_headings() { + assert_eq!( + parse_line("### Heading"), + GeminiLine::Heading(3, " Heading".to_string()) + ); + assert_eq!( + parse_line("## Heading"), + GeminiLine::Heading(2, " Heading".to_string()) + ); + assert_eq!( + parse_line("# Heading"), + GeminiLine::Heading(1, " Heading".to_string()) + ); + assert_eq!(parse_line("###"), GeminiLine::Heading(3, String::new())); + assert_eq!( + parse_line("#####"), + GeminiLine::Heading(3, "##".to_string()) + ); + assert_eq!(parse_line("# "), GeminiLine::Heading(1, " ".to_string())); - if open_heading == current_heading { - break; - } - } - heading_stack.push(current_heading); - string.push_str(&format!("<div class=\"h{}\">", current_heading)); + assert_eq!( + parse_preformatted_line("### Heading"), + GeminiLine::Text("### Heading".to_string(), true) + ); + assert_eq!( + parse_preformatted_line("## Heading"), + GeminiLine::Text("## Heading".to_string(), true) + ); + assert_eq!( + parse_preformatted_line("# Heading"), + GeminiLine::Text("# Heading".to_string(), true) + ); } - return string; -} + #[test] + fn test_links() { + assert_eq!( + parse_line("=> https://example.com Link text"), + GeminiLine::Link("https://example.com".to_string(), "Link text".to_string()) + ); + assert_eq!( + parse_line("=> /local/path"), + GeminiLine::Link("/local/path".to_string(), String::new()) + ); -fn close_heading_wrapper(heading_stack: &mut Vec<u8>) -> String { - let mut string = String::new(); - while let Some(_open_heading) = heading_stack.pop() { - string.push_str("</div>"); - } - return string; -} + assert_eq!( + parse_line("=>"), + GeminiLine::Link(String::new(), String::new()) + ); + assert_eq!( + parse_line("=> "), + GeminiLine::Link(String::new(), String::new()) + ); + assert_eq!( + parse_line("=> multiple spaces in text"), + GeminiLine::Link("multiple".to_string(), "spaces in text".to_string()) + ); -fn get_line_opener(line_type: &LineType, block_label: Option<&String>) -> String { - match line_type { - LineType::ListItem => "<ul>".to_string(), - LineType::Quote => "<blockquote>".to_string(), - LineType::PreformattedText => { - if let Some(label) = &block_label { - return format!("<pre role=\"img\" aria-label=\"{}\">", label); - } else { - return "<pre>".to_string(); - } - }, - _ => "".to_string(), + assert_eq!( + parse_preformatted_line("=> https://example.com Link text"), + GeminiLine::Text("=> https://example.com Link text".to_string(), true) + ); } -} -fn get_line_closer(line_type: &LineType) -> &'static str { - match line_type { - LineType::ListItem => "</ul>\n", - LineType::Quote => "</blockquote>\n", - LineType::PreformattedText => "</pre>\n", - _ => "", - } -} + #[test] + fn test_list_items() { + assert_eq!( + parse_line("* List item"), + GeminiLine::ListItem("List item".to_string()) + ); -fn get_link_content(line: &str) -> &str { - let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect(); - if components.len() > 1 { - return components[1].trim() + assert_eq!(parse_line("* "), GeminiLine::ListItem(String::new())); + assert_eq!(parse_line("*"), GeminiLine::Text("*".to_string(), false)); + assert_eq!( + parse_line("*WithText"), + GeminiLine::Text("*WithText".to_string(), false) + ); + assert_eq!( + parse_line("* Multiple spaces"), + GeminiLine::ListItem(" Multiple spaces".to_string()) + ); } - components[0].trim() -} -fn get_link_address(line: &str) -> &str { - let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect(); - components[0].trim() -} + #[test] + fn test_quotes() { + assert_eq!( + parse_line(">Quote text"), + GeminiLine::Quote("Quote text".to_string()) + ); -fn identify_line(line: &str, is_preformatted: bool) -> LineType { - if line.starts_with("```") { - return LineType::PreformattedToggle; - } - if is_preformatted { - return LineType::PreformattedText; - } - if line.is_empty() { - return LineType::Blank; - } - if line.starts_with("=>") { - return LineType::Link; + assert_eq!(parse_line(">"), GeminiLine::Quote(String::new())); + assert_eq!(parse_line("> "), GeminiLine::Quote(" ".to_string())); + assert_eq!( + parse_line(">>Nested"), + GeminiLine::Quote(">Nested".to_string()) + ); } - if line.starts_with("* ") { - return LineType::ListItem; - } - if line.starts_with(">") { - return LineType::Quote; - } - if line.starts_with("###") { - return LineType::Heading3; - } - if line.starts_with("##") { - return LineType::Heading2; + + #[test] + fn test_preformatted() { + assert_eq!( + parse_line("```alt-text"), + GeminiLine::PreformattedToggle(true, "alt-text".to_string()) + ); + + assert_eq!( + parse_line("```"), + GeminiLine::PreformattedToggle(true, String::new()) + ); + assert_eq!( + parse_line("``` "), + GeminiLine::PreformattedToggle(true, " ".to_string()) + ); + assert_eq!( + parse_line("````"), + GeminiLine::PreformattedToggle(true, "`".to_string()) + ); + + assert_eq!( + parse_preformatted_line("```alt-text"), + GeminiLine::PreformattedToggle(false, String::new()) + ); + assert_eq!( + parse_preformatted_line("```"), + GeminiLine::PreformattedToggle(false, String::new()) + ); } - if line.starts_with("#") { - return LineType::Heading1; + + #[test] + fn test_text() { + // Normal case + assert_eq!( + parse_line("Regular text"), + GeminiLine::Text("Regular text".to_string(), false) + ); + + // Edge cases + assert_eq!(parse_line(""), GeminiLine::Text(String::new(), false)); + assert_eq!(parse_line(" "), GeminiLine::Text(" ".to_string(), false)); + assert_eq!(parse_line(" "), GeminiLine::Text(" ".to_string(), false)); } - LineType::Text -} + #[test] + fn test_malformed_input() { + assert_eq!( + parse_line("= >Not a link"), + GeminiLine::Text("= >Not a link".to_string(), false) + ); + assert_eq!( + parse_line("``Not preformatted"), + GeminiLine::Text("``Not preformatted".to_string(), false) + ); + assert_eq!( + parse_line("** Not a list"), + GeminiLine::Text("** Not a list".to_string(), false) + ); + } -#[derive(PartialEq, Eq)] -enum LineType { - Text, - Blank, - Link, - PreformattedToggle, - PreformattedText, - Heading1, - Heading2, - Heading3, - ListItem, - Quote + #[test] + fn test_full_document() { + let input = "\ +# Heading 1 +## Heading 2 +### Heading 3 +Regular text +=> https://example.com Link text +* List item +>Quote +```alt +code +# Heading 1 +## Heading 2 +### Heading 3 +=> https://example.com Link text +* List item +>Quote +```trailing alt"; + let result = parse(input); + assert_eq!( + result, + vec![ + GeminiLine::Heading(1, " Heading 1".to_string()), + GeminiLine::Heading(2, " Heading 2".to_string()), + GeminiLine::Heading(3, " Heading 3".to_string()), + GeminiLine::Text("Regular text".to_string(), false), + GeminiLine::Link("https://example.com".to_string(), "Link text".to_string()), + GeminiLine::ListItem("List item".to_string()), + GeminiLine::Quote("Quote".to_string()), + GeminiLine::PreformattedToggle(true, "alt".to_string()), + GeminiLine::Text("code".to_string(), true), + GeminiLine::Text("# Heading 1".to_string(), true), + GeminiLine::Text("## Heading 2".to_string(), true), + GeminiLine::Text("### Heading 3".to_string(), true), + GeminiLine::Text("=> https://example.com Link text".to_string(), true), + GeminiLine::Text("* List item".to_string(), true), + GeminiLine::Text(">Quote".to_string(), true), + GeminiLine::PreformattedToggle(false, String::new()), + ] + ); + } }