X-Git-Url: https://git.r.bdr.sh/rbdr/page/blobdiff_plain/f07392253d4db57964eae389dab219c963dd4c95..2cbae13cfd94f48dfe9a8c903e05aea49106b778:/src/gemini_parser.rs?ds=sidebyside diff --git a/src/gemini_parser.rs b/src/gemini_parser.rs index 14d74c6..418b758 100644 --- a/src/gemini_parser.rs +++ b/src/gemini_parser.rs @@ -1,224 +1,222 @@ - pub fn parse(source: &str) -> String { - - let lines = source.split("\n"); - let mut is_preformatted = false; - - let mut block_label: Option<String> = None; - let mut html: String = "".to_owned(); - let mut current_line_type: Option<LineType> = None; - - let mut heading_stack: Vec<u8> = Vec::new(); - for line in lines { - let mut line_type = LineType::Blank; - if line.char_indices().count() > 2 { - let mut end = line.len(); - if line.char_indices().count() > 3 { - end = line.char_indices().map(|(i, _)| i).nth(3).unwrap(); - } - line_type = identify_line(&line[..end], is_preformatted); - } - match line_type { - LineType::PreformattedToggle => { - is_preformatted = !is_preformatted; - if is_preformatted && line.char_indices().count() > 3 { - block_label = Some(get_partial_line_content(&line_type, line)); - } else { - block_label = None; - } - }, - _ => { - // Close previous block if needed - if let Some(line) = ¤t_line_type { - if line != &line_type && is_block(line) { - html.push_str(get_line_closer(line)); - } - } - - // Blocks - if is_block(&line_type) { - if let Some(line) = ¤t_line_type { - if line != &line_type { - html.push_str(&get_line_opener(&line_type, block_label.as_ref())); - } - } else { - html.push_str(&get_line_opener(&line_type, None)); - } - - let line_content = get_partial_line_content(&line_type, line); - html.push_str(&line_content); - } else { - html.push_str(&get_heading_wrapper(&mut heading_stack, &line_type)); - html.push_str(&get_full_line_content(&line_type, line)); - } - current_line_type = Some(line_type); - }, - } - } - if let Some(line) = ¤t_line_type { - if is_block(line) { - html.push_str(get_line_closer(line)); - } - } - html.push_str(&close_heading_wrapper(&mut heading_stack)); - html -} - -fn is_block(line_type: &LineType) -> bool { - return match line_type { - LineType::PreformattedText | LineType::ListItem | LineType::Quote => true, - _ => false, - } -} - -fn get_partial_line_content(line_type: &LineType, line: &str) -> String { - let encoded_line = line.replace("<", "<").replace(">", ">"); - return match line_type { - LineType::ListItem => format!("<li>{}</li>", encoded_line[2..].trim()), - LineType::Quote => encoded_line[1..].trim().to_string(), - LineType::PreformattedText => format!("{}\n", encoded_line), - LineType::PreformattedToggle => encoded_line[3..].trim().to_string(), - _ => "".to_string(), - } -} - -fn get_full_line_content(line_type: &LineType, line: &str) -> String { - let encoded_line = line.replace("<", "<").replace(">", ">"); - match line_type { - LineType::Text => format!("<p>{}</p>\n", encoded_line.trim()), - LineType::Blank => "<br/>\n".to_string(), - LineType::Link => { - let url = get_link_address(line); - if url.starts_with("gemini:") { - format!("<div><a href=\"{}\">{}</a></div>\n", url, get_link_content(line)) - } else { - format!("<div><a href=\"{}\">{}</a></div>\n", url.replace(".gmi", ".html"), get_link_content(line)) - } - }, - LineType::Heading1 => format!("<h1>{}</h1>\n", encoded_line[1..].trim()), - LineType::Heading2 => format!("<h2>{}</h2>\n", encoded_line[2..].trim()), - LineType::Heading3 => format!("<h3>{}</h3>\n", encoded_line[3..].trim()), - _ => "".to_string(), - } +#[derive(PartialEq, Eq, Debug)] +pub enum GeminiLine { + Text(String, bool), + PreformattedToggle(bool, String), + Heading(u8, String), + Link(String, String), + Quote(String), + ListItem(String) } -fn get_heading_wrapper(heading_stack: &mut Vec<u8>, line_type: &LineType) -> String { - let mut string = String::new(); - let current_heading: u8 = match line_type { - LineType::Heading1 => 1, - LineType::Heading2 => 2, - LineType::Heading3 => 3, - _ => 255 - }; - - if current_heading < 255 { - while let Some(open_heading) = heading_stack.pop() { - // You just encountered a more important heading. - // Put it back. Desist. - if open_heading < current_heading { - heading_stack.push(open_heading); - break; - } +/// Parses gemtext source code into a vector of GeminiLine elements. +/// +/// # Arguments +/// * `source` - A string slice that contains the gemtext +/// +/// # Returns +/// A `Vec<GeminiLine>` containing the rendered HTML. +pub fn parse(source: &str) -> Vec<GeminiLine> { + source.lines() + .fold( + (Vec::new(), false), + |(mut lines, is_preformatted), line| { + let parsed = if is_preformatted { + parse_preformatted_line(line) + } else { + parse_line(line) + }; - string.push_str("</div>"); + let new_is_preformatted = match parsed { + GeminiLine::PreformattedToggle(x, _) => x, + _ => is_preformatted + }; - if open_heading == current_heading { - break; + lines.push(parsed); + (lines, new_is_preformatted) } - } - heading_stack.push(current_heading); - string.push_str(&format!("<div class=\"h{}\">", current_heading)); - } - - return string; + ) + .0 } -fn close_heading_wrapper(heading_stack: &mut Vec<u8>) -> String { - let mut string = String::new(); - while let Some(_open_heading) = heading_stack.pop() { - string.push_str("</div>"); +fn parse_preformatted_line(line: &str) -> GeminiLine { + match line { + s if s.starts_with("```") => GeminiLine::PreformattedToggle(false, String::new()), + _ => GeminiLine::Text(line.to_string(), true), } - return string; } -fn get_line_opener(line_type: &LineType, block_label: Option<&String>) -> String { - match line_type { - LineType::ListItem => "<ul>".to_string(), - LineType::Quote => "<blockquote>".to_string(), - LineType::PreformattedText => { - if let Some(label) = &block_label { - return format!("<pre role=\"img\" aria-label=\"h{}\">", label); - } else { - return "<pre>".to_string(); +fn parse_line(line: &str) -> GeminiLine { + match line { + s if s.starts_with("###") => GeminiLine::Heading(3, s[3..].to_string()), + s if s.starts_with("##") => GeminiLine::Heading(2, s[2..].to_string()), + s if s.starts_with("#") => GeminiLine::Heading(1, s[1..].to_string()), + s if s.starts_with("=>") => { + let content = s[2..].trim(); + match content.split_once(char::is_whitespace) { + Some((url, text)) => GeminiLine::Link(url.trim().to_string(), text.trim().to_string()), + None => GeminiLine::Link(content.trim().to_string(), String::new()), } }, - _ => "".to_string(), + s if s.starts_with("* ") => GeminiLine::ListItem(s[2..].to_string()), + s if s.starts_with(">") => GeminiLine::Quote(s[1..].to_string()), + s if s.starts_with("```") => GeminiLine::PreformattedToggle(true, s[3..].to_string()), + _ => GeminiLine::Text(line.to_string(), false), } } -fn get_line_closer(line_type: &LineType) -> &'static str { - match line_type { - LineType::ListItem => "</ul>\n", - LineType::Quote => "</blockquote>\n", - LineType::PreformattedText => "</pre>\n", - _ => "", +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_headings() { + assert_eq!(parse_line("### Heading"), GeminiLine::Heading(3, " Heading".to_string())); + assert_eq!(parse_line("## Heading"), GeminiLine::Heading(2, " Heading".to_string())); + assert_eq!(parse_line("# Heading"), GeminiLine::Heading(1, " Heading".to_string())); + assert_eq!(parse_line("###"), GeminiLine::Heading(3, "".to_string())); + assert_eq!(parse_line("#####"), GeminiLine::Heading(3, "##".to_string())); + assert_eq!(parse_line("# "), GeminiLine::Heading(1, " ".to_string())); + + assert_eq!(parse_preformatted_line("### Heading"), GeminiLine::Text("### Heading".to_string(), true)); + assert_eq!(parse_preformatted_line("## Heading"), GeminiLine::Text("## Heading".to_string(), true)); + assert_eq!(parse_preformatted_line("# Heading"), GeminiLine::Text("# Heading".to_string(), true)); + } + + #[test] + fn test_links() { + assert_eq!( + parse_line("=> https://example.com Link text"), + GeminiLine::Link("https://example.com".to_string(), "Link text".to_string()) + ); + assert_eq!( + parse_line("=> /local/path"), + GeminiLine::Link("/local/path".to_string(), "".to_string()) + ); + + assert_eq!( + parse_line("=>"), + GeminiLine::Link("".to_string(), "".to_string()) + ); + assert_eq!( + parse_line("=> "), + GeminiLine::Link("".to_string(), "".to_string()) + ); + assert_eq!( + parse_line("=> multiple spaces in text"), + GeminiLine::Link("multiple".to_string(), "spaces in text".to_string()) + ); + + assert_eq!( + parse_preformatted_line("=> https://example.com Link text"), + GeminiLine::Text("=> https://example.com Link text".to_string(), true) + ); + } + + #[test] + fn test_list_items() { + assert_eq!( + parse_line("* List item"), + GeminiLine::ListItem("List item".to_string()) + ); + + assert_eq!(parse_line("* "), GeminiLine::ListItem("".to_string())); + assert_eq!(parse_line("*"), GeminiLine::Text("*".to_string(), false)); + assert_eq!(parse_line("*WithText"), GeminiLine::Text("*WithText".to_string(), false)); + assert_eq!(parse_line("* Multiple spaces"), GeminiLine::ListItem(" Multiple spaces".to_string())); + } + + #[test] + fn test_quotes() { + assert_eq!( + parse_line(">Quote text"), + GeminiLine::Quote("Quote text".to_string()) + ); + + assert_eq!(parse_line(">"), GeminiLine::Quote("".to_string())); + assert_eq!(parse_line("> "), GeminiLine::Quote(" ".to_string())); + assert_eq!(parse_line(">>Nested"), GeminiLine::Quote(">Nested".to_string())); + } + + #[test] + fn test_preformatted() { + assert_eq!( + parse_line("```alt-text"), + GeminiLine::PreformattedToggle(true, "alt-text".to_string()) + ); + + assert_eq!(parse_line("```"), GeminiLine::PreformattedToggle(true, "".to_string())); + assert_eq!(parse_line("``` "), GeminiLine::PreformattedToggle(true, " ".to_string())); + assert_eq!(parse_line("````"), GeminiLine::PreformattedToggle(true, "`".to_string())); + + assert_eq!( + parse_preformatted_line("```alt-text"), + GeminiLine::PreformattedToggle(false, "".to_string()) + ); + assert_eq!( + parse_preformatted_line("```"), + GeminiLine::PreformattedToggle(false, "".to_string()) + ); + + } + + #[test] + fn test_text() { + // Normal case + assert_eq!( + parse_line("Regular text"), + GeminiLine::Text("Regular text".to_string(), false) + ); + + // Edge cases + assert_eq!(parse_line(""), GeminiLine::Text("".to_string(), false)); + assert_eq!(parse_line(" "), GeminiLine::Text(" ".to_string(), false)); + assert_eq!(parse_line(" "), GeminiLine::Text(" ".to_string(), false)); + } + + #[test] + fn test_malformed_input() { + assert_eq!(parse_line("= >Not a link"), GeminiLine::Text("= >Not a link".to_string(), false)); + assert_eq!(parse_line("``Not preformatted"), GeminiLine::Text("``Not preformatted".to_string(), false)); + assert_eq!(parse_line("** Not a list"), GeminiLine::Text("** Not a list".to_string(), false)); + } + + #[test] + fn test_full_document() { + let input = "\ +# Heading 1 +## Heading 2 +### Heading 3 +Regular text +=> https://example.com Link text +* List item +>Quote +```alt +code +# Heading 1 +## Heading 2 +### Heading 3 +=> https://example.com Link text +* List item +>Quote +```trailing alt"; + let result = parse(input); + assert_eq!(result, vec![ + GeminiLine::Heading(1, " Heading 1".to_string()), + GeminiLine::Heading(2, " Heading 2".to_string()), + GeminiLine::Heading(3, " Heading 3".to_string()), + GeminiLine::Text("Regular text".to_string(), false), + GeminiLine::Link("https://example.com".to_string(), "Link text".to_string()), + GeminiLine::ListItem("List item".to_string()), + GeminiLine::Quote("Quote".to_string()), + GeminiLine::PreformattedToggle(true, "alt".to_string()), + GeminiLine::Text("code".to_string(), true), + GeminiLine::Text("# Heading 1".to_string(), true), + GeminiLine::Text("## Heading 2".to_string(), true), + GeminiLine::Text("### Heading 3".to_string(), true), + GeminiLine::Text("=> https://example.com Link text".to_string(), true), + GeminiLine::Text("* List item".to_string(), true), + GeminiLine::Text(">Quote".to_string(), true), + GeminiLine::PreformattedToggle(false, "".to_string()), + ]); } } - -fn get_link_content(line: &str) -> &str { - let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect(); - if components.len() > 1 { - return components[1].trim() - } - components[0].trim() -} - -fn get_link_address(line: &str) -> &str { - let components: Vec<&str> = line[2..].trim().splitn(2, " ").collect(); - components[0].trim() -} - -fn identify_line(line: &str, is_preformatted: bool) -> LineType { - if line.starts_with("```") { - return LineType::PreformattedToggle; - } - if is_preformatted { - return LineType::PreformattedText; - } - if line.is_empty() { - return LineType::Blank; - } - if line.starts_with("=>") { - return LineType::Link; - } - if line.starts_with("* ") { - return LineType::ListItem; - } - if line.starts_with(">") { - return LineType::Quote; - } - if line.starts_with("###") { - return LineType::Heading3; - } - if line.starts_with("##") { - return LineType::Heading2; - } - if line.starts_with("#") { - return LineType::Heading1; - } - - LineType::Text -} - -#[derive(PartialEq, Eq)] -enum LineType { - Text, - Blank, - Link, - PreformattedToggle, - PreformattedText, - Heading1, - Heading2, - Heading3, - ListItem, - Quote -}