From b7b3db32951bbb8ef424672e7bd2b9b4344f9c74 Mon Sep 17 00:00:00 2001 From: Manuel Hatzl Date: Wed, 27 Apr 2022 23:46:14 +0200 Subject: [PATCH] style: format code according to styleguide --- core/src/backend/inline.rs | 92 +- core/src/backend/loader.rs | 11 +- core/src/backend/mod.rs | 2 +- core/src/elements/heading_block.rs | 45 +- core/src/elements/paragraph_block.rs | 17 +- core/src/unimarkup_block.rs | 24 +- core/tests/backend/backend_run.rs | 11 +- inline/src/ast/collect.rs | 316 ++++--- inline/src/ast/mod.rs | 445 ++++----- inline/src/ast/substitutions.rs | 96 +- inline/src/error.rs | 10 +- inline/src/lib.rs | 14 +- inline/src/tokenizer/mod.rs | 1122 +++++++++++++---------- inline/src/tokenizer/tokens.rs | 294 +++--- inline/tests/ast/bold_italic.rs | 85 +- inline/tests/ast/escaping.rs | 111 +-- inline/tests/ast/mixed.rs | 1 + inline/tests/ast/mixed_nested.rs | 1 + inline/tests/ast/offseted.rs | 96 +- inline/tests/ast/substitutions.rs | 115 +-- inline/tests/ast/text_group.rs | 54 +- inline/tests/ast/verbatim.rs | 238 ++--- inline/tests/ast/whitespaces.rs | 73 +- inline/tests/tests.rs | 33 +- inline/tests/tokenizer/accent.rs | 145 ++- inline/tests/tokenizer/asterisk.rs | 1154 +++++++++++++++++++----- inline/tests/tokenizer/backslash.rs | 2 - inline/tests/tokenizer/mixed.rs | 1 + inline/tests/tokenizer/mixed_nested.rs | 1 + inline/tests/tokenizer/text_group.rs | 340 +++++-- inline/tests/tokenizer/whitespaces.rs | 31 +- system_tests/tests/cli.rs | 12 +- system_tests/tests/logging/cli_logs.rs | 17 +- 33 files changed, 3044 insertions(+), 1965 deletions(-) diff --git a/core/src/backend/inline.rs b/core/src/backend/inline.rs index 66505ed9..75eae58b 100644 --- a/core/src/backend/inline.rs +++ b/core/src/backend/inline.rs @@ -1,58 +1,54 @@ use unimarkup_inline::{Inline, InlineKind, NestedInline}; -use super::{Render, error::BackendError}; - +use super::{error::BackendError, Render}; impl Render for Inline { - fn render_html(&self) -> Result { - let mut output = String::new(); + fn render_html(&self) -> Result { + let mut output = String::new(); - for inline in self { - match inline { - InlineKind::Bold(bold) => { - output.push_str(""); - output.push_str(&bold.render_html()?); - output.push_str(""); - }, - InlineKind::Italic(italic) => { - output.push_str(""); - output.push_str(&italic.render_html()?); - output.push_str(""); - }, - InlineKind::BoldItalic(bold_italic) => { - output.push_str(""); - output.push_str(&bold_italic.render_html()?); - output.push_str(""); - }, - InlineKind::Verbatim(verbatim) => { - output.push_str(""); - output.push_str(&verbatim.render_html()?); - output.push_str(""); - }, - InlineKind::Plain(plain) - | InlineKind::PlainNewLine(plain) => { - output.push_str(&plain.content); - }, - InlineKind::EscapedNewLine(_) => { - output.push_str("
"); - }, - InlineKind::EscapedSpace(_) => { - output.push_str(" ") - }, - InlineKind::TextGroup(nested, _) => { - output.push_str(""); - output.push_str(&nested.content.render_html()?); - output.push_str(""); - }, - } - } + for inline in self { + match inline { + InlineKind::Bold(bold) => { + output.push_str(""); + output.push_str(&bold.render_html()?); + output.push_str(""); + } + InlineKind::Italic(italic) => { + output.push_str(""); + output.push_str(&italic.render_html()?); + output.push_str(""); + } + InlineKind::BoldItalic(bold_italic) => { + output.push_str(""); + output.push_str(&bold_italic.render_html()?); + output.push_str(""); + } + InlineKind::Verbatim(verbatim) => { + output.push_str(""); + output.push_str(&verbatim.render_html()?); + output.push_str(""); + } + InlineKind::Plain(plain) | InlineKind::PlainNewLine(plain) => { + output.push_str(&plain.content); + } + InlineKind::EscapedNewLine(_) => { + output.push_str("
"); + } + InlineKind::EscapedSpace(_) => output.push_str(" "), + InlineKind::TextGroup(nested, _) => { + output.push_str(""); + output.push_str(&nested.content.render_html()?); + output.push_str(""); + } + } + } - Ok(output) - } + Ok(output) + } } impl Render for NestedInline { - fn render_html(&self) -> Result { - self.content.render_html() - } + fn render_html(&self) -> Result { + self.content.render_html() + } } diff --git a/core/src/backend/loader.rs b/core/src/backend/loader.rs index f95833f2..68642267 100644 --- a/core/src/backend/loader.rs +++ b/core/src/backend/loader.rs @@ -6,7 +6,8 @@ use crate::{ backend::BackendError, elements::{types, types::UnimarkupType, HeadingBlock, ParagraphBlock, VerbatimBlock}, log_id::{LogId, SetLog}, - middleend::{self, ContentIrLine}, unimarkup_block::UnimarkupBlockKind, + middleend::{self, ContentIrLine}, + unimarkup_block::UnimarkupBlockKind, }; use super::log_id::LoaderErrLogId; @@ -33,7 +34,9 @@ pub trait ParseFromIr { /// # Arguments /// /// * `connection` - [`rusqlite::Connection`] used for interaction with IR -pub fn get_blocks_from_ir(connection: &mut Connection) -> Result, BackendError> { +pub fn get_blocks_from_ir( + connection: &mut Connection, +) -> Result, BackendError> { let mut blocks: Vec = vec![]; let mut content_lines: VecDeque = middleend::get_content_lines(connection)?.into(); @@ -43,7 +46,9 @@ pub fn get_blocks_from_ir(connection: &mut Connection) -> Result todo!(), - UnimarkupType::Heading => UnimarkupBlockKind::Heading(HeadingBlock::parse_from_ir(&mut content_lines)?), + UnimarkupType::Heading => { + UnimarkupBlockKind::Heading(HeadingBlock::parse_from_ir(&mut content_lines)?) + } UnimarkupType::Paragraph => { UnimarkupBlockKind::Paragraph(ParagraphBlock::parse_from_ir(&mut content_lines)?) } diff --git a/core/src/backend/mod.rs b/core/src/backend/mod.rs index e6aecd1b..e6147cdf 100644 --- a/core/src/backend/mod.rs +++ b/core/src/backend/mod.rs @@ -7,9 +7,9 @@ use crate::{config::Config, unimarkup::UnimarkupDocument, unimarkup_block::UnimarkupBlockKind}; use rusqlite::Connection; +mod inline; mod loader; mod renderer; -mod inline; pub use loader::ParseFromIr; pub use renderer::*; diff --git a/core/src/elements/heading_block.rs b/core/src/elements/heading_block.rs index 86295434..2999dffe 100644 --- a/core/src/elements/heading_block.rs +++ b/core/src/elements/heading_block.rs @@ -3,7 +3,7 @@ use std::collections::{HashMap, VecDeque}; use pest::iterators::{Pair, Pairs}; use pest::Span; use strum_macros::*; -use unimarkup_inline::{Inline, Position, parse_with_offset, flat_inline, FlattenInlineKind}; +use unimarkup_inline::{flat_inline, parse_with_offset, FlattenInlineKind, Inline, Position}; use crate::backend::{error::BackendError, ParseFromIr, Render}; use crate::elements::types::{self, UnimarkupBlocks, UnimarkupType}; @@ -159,7 +159,7 @@ impl HeadingBlock { Some(id) => id.to_lowercase(), None => format!("heading-{}-line-{}", level, line_nr), }; - + let id = match attributes { Some(ref attrs) if attrs.get("id").is_some() => attrs.get("id").unwrap().to_string(), _ => generated_id, @@ -273,8 +273,13 @@ impl ParseFromIr for HeadingBlock { ir_line.fallback_attributes }; - - let try_inline = parse_with_offset(&content, Position{line: ir_line.line_nr, column: get_column_offset_from_level(level)}); + let try_inline = parse_with_offset( + &content, + Position { + line: ir_line.line_nr, + column: get_column_offset_from_level(level), + }, + ); let parsed_inline; match try_inline { Ok(inline) => parsed_inline = inline, @@ -282,7 +287,7 @@ impl ParseFromIr for HeadingBlock { parsed_inline = flat_inline(&content); (InlineWarnLogId::InlineParsingFailed as LogId) .set_log(&format!("Inline parsing failed for heading-id {} => content taken as plain as fallback", ir_line.id), file!(), line!()); - }, + } } let block = HeadingBlock { @@ -354,7 +359,14 @@ mod tests { let highest_level = HeadingLevel::Level6 as usize; for level in lowest_level..=highest_level { - let heading_content = parse_with_offset("This is a heading", Position{line: 0, column: level + 1}).unwrap(); + let heading_content = parse_with_offset( + "This is a heading", + Position { + line: 0, + column: level + 1, + }, + ) + .unwrap(); let id = format!("heading-id-{}", level); let heading = HeadingBlock { @@ -378,7 +390,14 @@ mod tests { let highest_level = HeadingLevel::Level6 as usize; for level in lowest_level..=highest_level { - let heading_content = parse_with_offset("`This` *is a* **heading**", Position{line: 0, column: level + 1}).unwrap(); + let heading_content = parse_with_offset( + "`This` *is a* **heading**", + Position { + line: 0, + column: level + 1, + }, + ) + .unwrap(); let id = format!("heading-id-{}", level); let heading = HeadingBlock { @@ -446,7 +465,17 @@ mod tests { assert_eq!(id, String::from("some_id")); assert_eq!(level, HeadingLevel::from(iterations)); - assert_eq!(content, parse_with_offset("This is a heading", Position{line: block.line_nr, column: get_column_offset_from_level(level)}).unwrap()); + assert_eq!( + content, + parse_with_offset( + "This is a heading", + Position { + line: block.line_nr, + column: get_column_offset_from_level(level) + } + ) + .unwrap() + ); assert_eq!(attr, String::from("{}")); } } diff --git a/core/src/elements/paragraph_block.rs b/core/src/elements/paragraph_block.rs index 7dd9afa2..f6d7b256 100644 --- a/core/src/elements/paragraph_block.rs +++ b/core/src/elements/paragraph_block.rs @@ -17,9 +17,12 @@ use crate::{ use pest::iterators::Pairs; use pest::Span; -use unimarkup_inline::{Inline, flat_inline, FlattenInlineKind, Position, parse_with_offset}; +use unimarkup_inline::{flat_inline, parse_with_offset, FlattenInlineKind, Inline, Position}; -use super::{error::ElementError, log_id::{GeneralErrLogId, InlineWarnLogId}}; +use super::{ + error::ElementError, + log_id::{GeneralErrLogId, InlineWarnLogId}, +}; /// Structure of a Unimarkup paragraph element. #[derive(Debug, Default, Clone)] @@ -133,7 +136,13 @@ impl ParseFromIr for ParagraphBlock { ir_line.fallback_attributes }; - let try_inline = parse_with_offset(&content, Position{line: ir_line.line_nr, ..Default::default()}); + let try_inline = parse_with_offset( + &content, + Position { + line: ir_line.line_nr, + ..Default::default() + }, + ); let parsed_inline; match try_inline { Ok(inline) => parsed_inline = inline, @@ -141,7 +150,7 @@ impl ParseFromIr for ParagraphBlock { parsed_inline = flat_inline(&content); (InlineWarnLogId::InlineParsingFailed as LogId) .set_log(&format!("Inline parsing failed for paragraph-id {} => content taken as plain as fallback", ir_line.id), file!(), line!()); - }, + } } let block = ParagraphBlock { diff --git a/core/src/unimarkup_block.rs b/core/src/unimarkup_block.rs index 6c951e21..657e5f27 100644 --- a/core/src/unimarkup_block.rs +++ b/core/src/unimarkup_block.rs @@ -8,20 +8,20 @@ use crate::elements::VerbatimBlock; /// Enum of supported Unimarkup block elements #[derive(Debug, Clone)] pub enum UnimarkupBlockKind { - /// Represents the heading block - Heading(HeadingBlock), - /// Represents the paragraph block - Paragraph(ParagraphBlock), - /// Represents the verbatim block - Verbatim(VerbatimBlock), + /// Represents the heading block + Heading(HeadingBlock), + /// Represents the paragraph block + Paragraph(ParagraphBlock), + /// Represents the verbatim block + Verbatim(VerbatimBlock), } impl Render for UnimarkupBlockKind { - fn render_html(&self) -> Result { - match self { - UnimarkupBlockKind::Heading(heading) => heading.render_html(), - UnimarkupBlockKind::Paragraph(paragraph) => paragraph.render_html(), - UnimarkupBlockKind::Verbatim(verbatim) => verbatim.render_html(), + fn render_html(&self) -> Result { + match self { + UnimarkupBlockKind::Heading(heading) => heading.render_html(), + UnimarkupBlockKind::Paragraph(paragraph) => paragraph.render_html(), + UnimarkupBlockKind::Verbatim(verbatim) => verbatim.render_html(), + } } - } } diff --git a/core/tests/backend/backend_run.rs b/core/tests/backend/backend_run.rs index 2c167089..e777348f 100644 --- a/core/tests/backend/backend_run.rs +++ b/core/tests/backend/backend_run.rs @@ -2,7 +2,7 @@ use clap::StructOpt; use unimarkup_core::{ backend::{self, Render}, config::Config, - elements::{HeadingBlock, HeadingLevel, get_column_offset_from_level}, + elements::{get_column_offset_from_level, HeadingBlock, HeadingLevel}, middleend::{self, AsIrLines, ContentIrLine}, }; use unimarkup_inline::{parse_with_offset, Position}; @@ -16,7 +16,14 @@ fn test__backend_run__heading_block() { let block = HeadingBlock { id: "some-id".into(), level: HeadingLevel::Level1, - content: parse_with_offset("This is a heading", Position{line: 0, column: get_column_offset_from_level(HeadingLevel::Level1)}).unwrap(), + content: parse_with_offset( + "This is a heading", + Position { + line: 0, + column: get_column_offset_from_level(HeadingLevel::Level1), + }, + ) + .unwrap(), attributes: "{}".into(), line_nr: 0, }; diff --git a/inline/src/ast/collect.rs b/inline/src/ast/collect.rs index 907eff54..7bc66915 100644 --- a/inline/src/ast/collect.rs +++ b/inline/src/ast/collect.rs @@ -1,161 +1,213 @@ //! This module provides functionality to create a Unimarkup inline AST out of a given list of tokens. -use crate::{tokenizer::{Position, TokenKind, Tokens, Newline}, TextGroupAttributes}; +use crate::{ + tokenizer::{Newline, Position, TokenKind, Tokens}, + TextGroupAttributes, +}; -use super::{Span, NestedInline, InlineKind, FlatInline, substitutions::DirectSubstitution, Inline, FlattenInlineKind}; +use super::{ + substitutions::DirectSubstitution, FlatInline, FlattenInlineKind, Inline, InlineKind, + NestedInline, Span, +}; /// Struct to store partial collected inline tokens. -/// +/// /// Needed for nested tokens. pub(crate) struct InlineSection { - /// Partially collected inline tokens. - pub(crate) content: Inline, - /// End position of the last inline token of the section. - pub(crate) end: Position, + /// Partially collected inline tokens. + pub(crate) content: Inline, + /// End position of the last inline token of the section. + pub(crate) end: Position, } /// Trait to create an inline AST. pub(crate) trait InlineAst { - /// Function to create an inline AST from a given input. - fn collect(self) -> Inline; + /// Function to create an inline AST from a given input. + fn collect(self) -> Inline; } impl InlineAst for Tokens { - fn collect(mut self) -> Inline { - self.reverse(); // needed to use .pop() - collect_until(&mut self, TokenKind::Eoi).content - } + fn collect(mut self) -> Inline { + self.reverse(); // needed to use .pop() + collect_until(&mut self, TokenKind::Eoi).content + } } /// Function to collect inline elements up until a certain token is reached. -/// +/// /// Note: The token of kind `token_kind` is the last token of the returned section, if it was found. /// Otherwise, the given list of tokens is fully emptied. pub(crate) fn collect_until(tokens: &mut Tokens, token_kind: TokenKind) -> InlineSection { - let mut inline = Vec::new(); - let mut end: Position = Position::default(); - let mut prev_token_kind: TokenKind = TokenKind::NewLine; // important to start with space or newline for substitutions - - while let Some(mut token) = tokens.pop() { - end = Position{ line: token.position.line, column: token.position.column + token.length() }; - - if token.kind == token_kind { - return InlineSection{ content: inline, end }; - } - - match token.kind { - TokenKind::BoldOpen => { - let InlineSection { content, end } = collect_until(tokens, TokenKind::BoldClose); - let nested = NestedInline{ - content, - span: Span { start: token.position, end } - }; - inline.push(InlineKind::Bold(nested)); - }, - TokenKind::ItalicOpen => { - let InlineSection { content, end } = collect_until(tokens, TokenKind::ItalicClose); - let nested = NestedInline{ - content, - span: Span { start: token.position, end } - }; - inline.push(InlineKind::Italic(nested)); - }, - TokenKind::BoldItalicOpen => { - let InlineSection { content, end } = collect_until(tokens, TokenKind::BoldItalicClose); - let nested = NestedInline{ - content, - span: Span { start: token.position, end } + let mut inline = Vec::new(); + let mut end: Position = Position::default(); + let mut prev_token_kind: TokenKind = TokenKind::NewLine; // important to start with space or newline for substitutions + + while let Some(mut token) = tokens.pop() { + end = Position { + line: token.position.line, + column: token.position.column + token.length(), }; - inline.push(InlineKind::BoldItalic(nested)); - }, - TokenKind::VerbatimOpen => { - let InlineSection { content, end } = collect_until(tokens, TokenKind::VerbatimClose); - let nested = NestedInline{ - content: content.flatten_for_verbatim(), - span: Span { start: token.position, end } - }; - inline.push(InlineKind::Verbatim(nested)); - }, - TokenKind::Plain => { - if prev_token_kind.is_space_or_newline() && - ((tokens.last().is_some() && tokens.last().unwrap().is_space_or_newline()) || tokens.last().is_none()) { - token.content = token.content.substitute_arrow().substitute_emoji(); + if token.kind == token_kind { + return InlineSection { + content: inline, + end, + }; } - let flat = FlatInline{ - content: token.content, - span: Span { start: token.position, end } - }; - - if let Some(InlineKind::Plain(plain)) = inline.last_mut() { - plain.content.push_str(&flat.content); - plain.span.end = flat.span.end; - } else { - inline.push(InlineKind::Plain(flat)); - } - }, - TokenKind::EscapedGrapheme => { - end.column += 1; // add backlash offset - - let flat = FlatInline{ - content: token.content, - span: Span { start: token.position, end } - }; - - if flat.content.is_newline() { - inline.push(InlineKind::EscapedNewLine(flat)); - } else if flat.content.contains(char::is_whitespace) { - inline.push(InlineKind::EscapedSpace(flat)); - } else if let Some(InlineKind::Plain(plain_flat)) = inline.last_mut() { - plain_flat.content.push_str(&flat.content); - plain_flat.span.end = flat.span.end; - } else { - inline.push(InlineKind::Plain(flat)); + match token.kind { + TokenKind::BoldOpen => { + let InlineSection { content, end } = collect_until(tokens, TokenKind::BoldClose); + let nested = NestedInline { + content, + span: Span { + start: token.position, + end, + }, + }; + inline.push(InlineKind::Bold(nested)); + } + TokenKind::ItalicOpen => { + let InlineSection { content, end } = collect_until(tokens, TokenKind::ItalicClose); + let nested = NestedInline { + content, + span: Span { + start: token.position, + end, + }, + }; + inline.push(InlineKind::Italic(nested)); + } + TokenKind::BoldItalicOpen => { + let InlineSection { content, end } = + collect_until(tokens, TokenKind::BoldItalicClose); + let nested = NestedInline { + content, + span: Span { + start: token.position, + end, + }, + }; + inline.push(InlineKind::BoldItalic(nested)); + } + TokenKind::VerbatimOpen => { + let InlineSection { content, end } = + collect_until(tokens, TokenKind::VerbatimClose); + let nested = NestedInline { + content: content.flatten_for_verbatim(), + span: Span { + start: token.position, + end, + }, + }; + inline.push(InlineKind::Verbatim(nested)); + } + TokenKind::Plain => { + if prev_token_kind.is_space_or_newline() + && ((tokens.last().is_some() && tokens.last().unwrap().is_space_or_newline()) + || tokens.last().is_none()) + { + token.content = token.content.substitute_arrow().substitute_emoji(); + } + + let flat = FlatInline { + content: token.content, + span: Span { + start: token.position, + end, + }, + }; + + if let Some(InlineKind::Plain(plain)) = inline.last_mut() { + plain.content.push_str(&flat.content); + plain.span.end = flat.span.end; + } else { + inline.push(InlineKind::Plain(flat)); + } + } + TokenKind::EscapedGrapheme => { + end.column += 1; // add backlash offset + + let flat = FlatInline { + content: token.content, + span: Span { + start: token.position, + end, + }, + }; + + if flat.content.is_newline() { + inline.push(InlineKind::EscapedNewLine(flat)); + } else if flat.content.contains(char::is_whitespace) { + inline.push(InlineKind::EscapedSpace(flat)); + } else if let Some(InlineKind::Plain(plain_flat)) = inline.last_mut() { + plain_flat.content.push_str(&flat.content); + plain_flat.span.end = flat.span.end; + } else { + inline.push(InlineKind::Plain(flat)); + } + } + TokenKind::NewLine => { + let flat = FlatInline { + content: " ".to_string(), + span: Span { + start: token.position, + end, + }, + }; + + inline.push(InlineKind::PlainNewLine(flat)); + } + TokenKind::Space => { + let flat = FlatInline { + content: " ".to_string(), + span: Span { + start: token.position, + end, + }, + }; + + if let Some(InlineKind::Plain(plain)) = inline.last_mut() { + plain.content.push_str(&flat.content); + plain.span.end = flat.span.end; + } else { + inline.push(InlineKind::Plain(flat)); + } + } + TokenKind::TextGroupOpen => { + let InlineSection { content, end } = + collect_until(tokens, TokenKind::TextGroupClose); + let nested = NestedInline { + content, + span: Span { + start: token.position, + end, + }, + }; + inline.push(InlineKind::TextGroup( + nested, + TextGroupAttributes { + ..Default::default() + }, + )); + } + unsupported_token => { + eprintln!("Not supported token: {:?}", token.kind); + inline.push(InlineKind::Plain(FlatInline { + content: unsupported_token.as_str().to_string(), + span: Span { + start: token.position, + end, + }, + })); + } } - }, - TokenKind::NewLine => { - let flat = FlatInline{ - content: " ".to_string(), - span: Span { start: token.position, end } - }; - inline.push(InlineKind::PlainNewLine(flat)); - }, - TokenKind::Space => { - let flat = FlatInline{ - content: " ".to_string(), - span: Span { start: token.position, end } - }; - - if let Some(InlineKind::Plain(plain)) = inline.last_mut() { - plain.content.push_str(&flat.content); - plain.span.end = flat.span.end; - } else { - inline.push(InlineKind::Plain(flat)); - } - }, - TokenKind::TextGroupOpen => { - let InlineSection { content, end } = collect_until(tokens, TokenKind::TextGroupClose); - let nested = NestedInline{ - content, - span: Span { start: token.position, end } - }; - inline.push(InlineKind::TextGroup(nested, TextGroupAttributes{ ..Default::default() })); - }, - unsupported_token => { - eprintln!("Not supported token: {:?}", token.kind); - inline.push(InlineKind::Plain( - FlatInline{ - content: unsupported_token.as_str().to_string(), - span: Span { start: token.position, end }, - } - )); - }, + prev_token_kind = token.kind; } - prev_token_kind = token.kind; - } - - InlineSection{ content: inline, end } + InlineSection { + content: inline, + end, + } } diff --git a/inline/src/ast/mod.rs b/inline/src/ast/mod.rs index e8544146..cc355bed 100644 --- a/inline/src/ast/mod.rs +++ b/inline/src/ast/mod.rs @@ -10,279 +10,302 @@ pub type Inline = Vec; /// Convenient function to convert a string into plain inline. pub fn flat_inline(s: &str) -> Inline { - vec![InlineKind::Plain(FlatInline{ content: s.to_string(), span: Span::default() })] + vec![InlineKind::Plain(FlatInline { + content: s.to_string(), + span: Span::default(), + })] } /// Struct to set the span of an inline element in a given input. -/// +/// /// Note: If the inline element only consists of one grapheme, start and end point to the same position. #[derive(Debug, Default, Clone, PartialEq, Copy)] pub struct Span { - /// The start position of an inline element. - pub start: Position, - /// The end position of an inline element. - pub end: Position, + /// The start position of an inline element. + pub start: Position, + /// The end position of an inline element. + pub end: Position, } /// Struct representing inline elements that allow nesting. #[derive(Debug, Default, Clone, PartialEq)] pub struct NestedInline { - pub content: Vec, - pub span: Span + pub content: Vec, + pub span: Span, } /// Struct representing inline elements that do not allow nesting. #[derive(Debug, Default, Clone, PartialEq)] pub struct FlatInline { - pub content: String, - pub span: Span, + pub content: String, + pub span: Span, } /// Struct representing possible attributes for the text group inline element. #[derive(Debug, Default, Clone, PartialEq)] pub struct TextGroupAttributes { - pub content: String, - pub span: Span, + pub content: String, + pub span: Span, } /// Enum representing all supported Unimarkup inline elements. #[derive(Debug, Clone, PartialEq)] pub enum InlineKind { - /// Representing the bold inline element. - Bold(NestedInline), - /// Representing the italic inline element. - Italic(NestedInline), - /// Representing the combined bold and italic inline element. - BoldItalic(NestedInline), - /// Representing the verbatim inline element. - Verbatim(NestedInline), - /// Representing plain text. - Plain(FlatInline), - /// Representing newline in the original content that is treated as normal whitespace. - PlainNewLine(FlatInline), - /// Representing excplicit newlines. - EscapedNewLine(FlatInline), - /// Representing explicit spaces. - EscapedSpace(FlatInline), - /// Representing the text group inline element - TextGroup(NestedInline, TextGroupAttributes), + /// Representing the bold inline element. + Bold(NestedInline), + /// Representing the italic inline element. + Italic(NestedInline), + /// Representing the combined bold and italic inline element. + BoldItalic(NestedInline), + /// Representing the verbatim inline element. + Verbatim(NestedInline), + /// Representing plain text. + Plain(FlatInline), + /// Representing newline in the original content that is treated as normal whitespace. + PlainNewLine(FlatInline), + /// Representing excplicit newlines. + EscapedNewLine(FlatInline), + /// Representing explicit spaces. + EscapedSpace(FlatInline), + /// Representing the text group inline element + TextGroup(NestedInline, TextGroupAttributes), } /// Trait to flatten inline elements. pub trait FlattenInlineKind { - /// This function converts an inline element back into its original plain representation. - /// - /// e.g. `Bold(Plain(text))` --> `**text**` - fn flatten(self) -> String; + /// This function converts an inline element back into its original plain representation. + /// + /// e.g. `Bold(Plain(text))` --> `**text**` + fn flatten(self) -> String; - /// This function converts an inline element in its verbatim representation. - /// - /// e.g. Verbatim(Bold(Plain(b),EscapedSpace(),Plain(b))) --> `` `**b\ b**` `` - fn flatten_for_verbatim(self) -> Vec; + /// This function converts an inline element in its verbatim representation. + /// + /// e.g. Verbatim(Bold(Plain(b),EscapedSpace(),Plain(b))) --> `` `**b\ b**` `` + fn flatten_for_verbatim(self) -> Vec; } impl FlattenInlineKind for Vec { - fn flatten(self) -> String { - let mut s: String = String::new(); + fn flatten(self) -> String { + let mut s: String = String::new(); - for inline in self { - s.push_str(&inline.flatten()); - } + for inline in self { + s.push_str(&inline.flatten()); + } - s - } + s + } - fn flatten_for_verbatim(self) -> Vec { - let mut flattened: Vec = Vec::new(); + fn flatten_for_verbatim(self) -> Vec { + let mut flattened: Vec = Vec::new(); - for inline in self { - let mut inner = inline.flatten_for_verbatim(); - if let Some(InlineKind::Plain(last_outer)) = flattened.last_mut() { - if let Some(InlineKind::Plain(first_inner)) = inner.first() { - last_outer.content.push_str(&first_inner.content); - last_outer.span.end = first_inner.span.end; - flattened.append(&mut inner[1..].into()); - } else { - flattened.append(&mut inner); + for inline in self { + let mut inner = inline.flatten_for_verbatim(); + if let Some(InlineKind::Plain(last_outer)) = flattened.last_mut() { + if let Some(InlineKind::Plain(first_inner)) = inner.first() { + last_outer.content.push_str(&first_inner.content); + last_outer.span.end = first_inner.span.end; + flattened.append(&mut inner[1..].into()); + } else { + flattened.append(&mut inner); + } + } else { + flattened.append(&mut inner); + } } - } else { - flattened.append(&mut inner); - } - } - flattened - } + flattened + } } impl FlattenInlineKind for InlineKind { - fn flatten(self) -> String { - match self { - InlineKind::Bold(nested) => { - let mut s = String::from(TokenKind::BoldOpen.as_str()); - s.push_str(&nested.content.flatten()); - s.push_str(TokenKind::BoldClose.as_str()); - s - }, - InlineKind::Italic(nested) => { - let mut s = String::from(TokenKind::ItalicOpen.as_str()); - s.push_str(&nested.content.flatten()); - s.push_str(TokenKind::ItalicClose.as_str()); - s - }, - InlineKind::BoldItalic(nested) => { - let mut s = String::from(TokenKind::BoldItalicOpen.as_str()); - s.push_str(&nested.content.flatten()); - s.push_str(TokenKind::BoldItalicClose.as_str()); - s - }, - InlineKind::Verbatim(flat) => { - let mut s = String::from(TokenKind::VerbatimOpen.as_str()); - s.push_str(&flat.content.flatten()); - s.push_str(TokenKind::VerbatimClose.as_str()); - s - }, - InlineKind::Plain(flat) - | InlineKind::PlainNewLine(flat) - | InlineKind::EscapedNewLine(flat) - | InlineKind::EscapedSpace(flat) => { - flat.content - }, - InlineKind::TextGroup(nested, attributes) => { - let mut s = String::from(TokenKind::TextGroupOpen.as_str()); - s.push_str(&nested.content.flatten()); - s.push_str(TokenKind::TextGroupClose.as_str()); - s.push_str(&attributes.content); - s - }, + fn flatten(self) -> String { + match self { + InlineKind::Bold(nested) => { + let mut s = String::from(TokenKind::BoldOpen.as_str()); + s.push_str(&nested.content.flatten()); + s.push_str(TokenKind::BoldClose.as_str()); + s + } + InlineKind::Italic(nested) => { + let mut s = String::from(TokenKind::ItalicOpen.as_str()); + s.push_str(&nested.content.flatten()); + s.push_str(TokenKind::ItalicClose.as_str()); + s + } + InlineKind::BoldItalic(nested) => { + let mut s = String::from(TokenKind::BoldItalicOpen.as_str()); + s.push_str(&nested.content.flatten()); + s.push_str(TokenKind::BoldItalicClose.as_str()); + s + } + InlineKind::Verbatim(flat) => { + let mut s = String::from(TokenKind::VerbatimOpen.as_str()); + s.push_str(&flat.content.flatten()); + s.push_str(TokenKind::VerbatimClose.as_str()); + s + } + InlineKind::Plain(flat) + | InlineKind::PlainNewLine(flat) + | InlineKind::EscapedNewLine(flat) + | InlineKind::EscapedSpace(flat) => flat.content, + InlineKind::TextGroup(nested, attributes) => { + let mut s = String::from(TokenKind::TextGroupOpen.as_str()); + s.push_str(&nested.content.flatten()); + s.push_str(TokenKind::TextGroupClose.as_str()); + s.push_str(&attributes.content); + s + } + } } - } - fn flatten_for_verbatim(self) -> Vec { - match self { - InlineKind::Bold(nested) => { - let mut inner = nested.content.flatten_for_verbatim(); - merge_flattend_verbatim(&mut inner, TokenKind::BoldOpen.as_str(), - TokenKind::BoldClose.as_str(), nested.span); - inner - }, - InlineKind::Italic(nested) => { - let mut inner = nested.content.flatten_for_verbatim(); - merge_flattend_verbatim(&mut inner, TokenKind::ItalicOpen.as_str(), - TokenKind::ItalicClose.as_str(), nested.span); - inner - }, - InlineKind::BoldItalic(nested) => { - let mut inner = nested.content.flatten_for_verbatim(); - merge_flattend_verbatim(&mut inner, TokenKind::BoldItalicOpen.as_str(), - TokenKind::BoldItalicClose.as_str(), nested.span); - inner - }, - InlineKind::TextGroup(nested, attributes) => { - let mut inner = nested.content.flatten_for_verbatim(); - merge_flattend_verbatim(&mut inner, TokenKind::TextGroupOpen.as_str(), - TokenKind::TextGroupClose.as_str(), nested.span); - - if let Some(InlineKind::Plain(last)) = inner.last_mut() { - last.content.push_str(&attributes.content); - } - - inner + fn flatten_for_verbatim(self) -> Vec { + match self { + InlineKind::Bold(nested) => { + let mut inner = nested.content.flatten_for_verbatim(); + merge_flattend_verbatim( + &mut inner, + TokenKind::BoldOpen.as_str(), + TokenKind::BoldClose.as_str(), + nested.span, + ); + inner + } + InlineKind::Italic(nested) => { + let mut inner = nested.content.flatten_for_verbatim(); + merge_flattend_verbatim( + &mut inner, + TokenKind::ItalicOpen.as_str(), + TokenKind::ItalicClose.as_str(), + nested.span, + ); + inner + } + InlineKind::BoldItalic(nested) => { + let mut inner = nested.content.flatten_for_verbatim(); + merge_flattend_verbatim( + &mut inner, + TokenKind::BoldItalicOpen.as_str(), + TokenKind::BoldItalicClose.as_str(), + nested.span, + ); + inner + } + InlineKind::TextGroup(nested, attributes) => { + let mut inner = nested.content.flatten_for_verbatim(); + merge_flattend_verbatim( + &mut inner, + TokenKind::TextGroupOpen.as_str(), + TokenKind::TextGroupClose.as_str(), + nested.span, + ); + + if let Some(InlineKind::Plain(last)) = inner.last_mut() { + last.content.push_str(&attributes.content); + } + + inner + } + _ => { + vec![self] + } } - _ => { - vec![self] - }, } - } } /// This function merges nested inlines into `Plain` kinds -fn merge_flattend_verbatim(inner: &mut Vec, outer_start: &str, outer_end: &str, outer_span: Span) { - if let Some(first) = inner.first_mut() { - match first { - InlineKind::Plain(plain) => { - plain.content.insert_str(0, outer_start); - plain.span.start = outer_span.start; - }, - _ => { - inner.insert(0, - InlineKind::Plain(FlatInline{ - content: outer_start.to_string(), - span: Span { start: outer_span.start, - end: Position { line: outer_span.start.line, - column: outer_span.start.column + outer_start.len() - } - }, - }) - ); - } +fn merge_flattend_verbatim( + inner: &mut Vec, + outer_start: &str, + outer_end: &str, + outer_span: Span, +) { + if let Some(first) = inner.first_mut() { + match first { + InlineKind::Plain(plain) => { + plain.content.insert_str(0, outer_start); + plain.span.start = outer_span.start; + } + _ => { + inner.insert( + 0, + InlineKind::Plain(FlatInline { + content: outer_start.to_string(), + span: Span { + start: outer_span.start, + end: Position { + line: outer_span.start.line, + column: outer_span.start.column + outer_start.len(), + }, + }, + }), + ); + } + } } - } - if let Some(last) = inner.last_mut() { - match last { - InlineKind::Plain(plain) => { - plain.content.push_str(outer_end); - plain.span.end = outer_span.end; - }, - _ => { - inner.push( - InlineKind::Plain(FlatInline{ - content: outer_end.to_string(), - span: Span { - start: Position { line: outer_span.end.line, - column: outer_span.end.column - outer_end.len() - }, - end: outer_span.end, - }, - }) - ); - } + if let Some(last) = inner.last_mut() { + match last { + InlineKind::Plain(plain) => { + plain.content.push_str(outer_end); + plain.span.end = outer_span.end; + } + _ => { + inner.push(InlineKind::Plain(FlatInline { + content: outer_end.to_string(), + span: Span { + start: Position { + line: outer_span.end.line, + column: outer_span.end.column - outer_end.len(), + }, + end: outer_span.end, + }, + })); + } + } } - } } pub struct TokenIdentifier { - pub start: String, - pub end: String, + pub start: String, + pub end: String, } pub trait InlineIdentifiers { - fn get_identifier(&self) -> TokenIdentifier; + fn get_identifier(&self) -> TokenIdentifier; } impl InlineIdentifiers for InlineKind { - fn get_identifier(&self) -> TokenIdentifier { - match self { - InlineKind::Bold(_) => TokenIdentifier{ - start: TokenKind::BoldOpen.as_str().to_string(), - end: TokenKind::BoldClose.as_str().to_string(), - }, - InlineKind::Italic(_) => TokenIdentifier{ - start: TokenKind::ItalicOpen.as_str().to_string(), - end: TokenKind::ItalicClose.as_str().to_string(), - }, - InlineKind::BoldItalic(_) => TokenIdentifier{ - start: TokenKind::BoldItalicOpen.as_str().to_string(), - end: TokenKind::BoldItalicClose.as_str().to_string(), - }, - InlineKind::Verbatim(_) => TokenIdentifier{ - start: TokenKind::VerbatimOpen.as_str().to_string(), - end: TokenKind::VerbatimClose.as_str().to_string(), - }, - InlineKind::EscapedNewLine(_) - | InlineKind::EscapedSpace(_) => TokenIdentifier{ - start: "\\".to_string(), - end: "".to_string(), - }, - InlineKind::TextGroup(_, _) => TokenIdentifier{ - start: TokenKind::TextGroupOpen.as_str().to_string(), - end: TokenKind::TextGroupClose.as_str().to_string(), - }, - _ => TokenIdentifier{ - start: "".to_string(), - end: "".to_string(), - } + fn get_identifier(&self) -> TokenIdentifier { + match self { + InlineKind::Bold(_) => TokenIdentifier { + start: TokenKind::BoldOpen.as_str().to_string(), + end: TokenKind::BoldClose.as_str().to_string(), + }, + InlineKind::Italic(_) => TokenIdentifier { + start: TokenKind::ItalicOpen.as_str().to_string(), + end: TokenKind::ItalicClose.as_str().to_string(), + }, + InlineKind::BoldItalic(_) => TokenIdentifier { + start: TokenKind::BoldItalicOpen.as_str().to_string(), + end: TokenKind::BoldItalicClose.as_str().to_string(), + }, + InlineKind::Verbatim(_) => TokenIdentifier { + start: TokenKind::VerbatimOpen.as_str().to_string(), + end: TokenKind::VerbatimClose.as_str().to_string(), + }, + InlineKind::EscapedNewLine(_) | InlineKind::EscapedSpace(_) => TokenIdentifier { + start: "\\".to_string(), + end: "".to_string(), + }, + InlineKind::TextGroup(_, _) => TokenIdentifier { + start: TokenKind::TextGroupOpen.as_str().to_string(), + end: TokenKind::TextGroupClose.as_str().to_string(), + }, + _ => TokenIdentifier { + start: "".to_string(), + end: "".to_string(), + }, + } } - } } diff --git a/inline/src/ast/substitutions.rs b/inline/src/ast/substitutions.rs index d9d1784c..90ab507b 100644 --- a/inline/src/ast/substitutions.rs +++ b/inline/src/ast/substitutions.rs @@ -2,59 +2,59 @@ /// Trait for direct substitution pub trait DirectSubstitution { - /// Substitutes supported arrows or leaves given input unchanged, if no supported arrow matched. - fn substitute_arrow(self) -> Self; + /// Substitutes supported arrows or leaves given input unchanged, if no supported arrow matched. + fn substitute_arrow(self) -> Self; - /// Substitutes supported emojis or leaves given input unchanged, if no supported emoji matched. - fn substitute_emoji(self) -> Self; + /// Substitutes supported emojis or leaves given input unchanged, if no supported emoji matched. + fn substitute_emoji(self) -> Self; } impl DirectSubstitution for String { - fn substitute_arrow(self) -> Self { - match self.as_str() { - "-->" => "๐Ÿ –".to_string(), - "|-->" => "โ†ฆ".to_string(), - "---->" => "โŸถ".to_string(), - "|---->" => "โŸผ".to_string(), - "==>" => "โ‡’".to_string(), - "|==>" => "โค‡".to_string(), - "====>" => "โŸน".to_string(), - "|====>" => "โŸพ".to_string(), - "<--" => "๐Ÿ ”".to_string(), - "<--|" => "โ†ค".to_string(), - "<----" => "โŸต".to_string(), - "<----|" => "โŸป".to_string(), - "<==" => "โ‡".to_string(), - "<==|" => "โค†".to_string(), - "<====" => "โŸธ".to_string(), - "<====|" => "โŸฝ".to_string(), - "<-->" => "โŸท".to_string(), - "<==>" => "โ‡”".to_string(), - _ => self, + fn substitute_arrow(self) -> Self { + match self.as_str() { + "-->" => "๐Ÿ –".to_string(), + "|-->" => "โ†ฆ".to_string(), + "---->" => "โŸถ".to_string(), + "|---->" => "โŸผ".to_string(), + "==>" => "โ‡’".to_string(), + "|==>" => "โค‡".to_string(), + "====>" => "โŸน".to_string(), + "|====>" => "โŸพ".to_string(), + "<--" => "๐Ÿ ”".to_string(), + "<--|" => "โ†ค".to_string(), + "<----" => "โŸต".to_string(), + "<----|" => "โŸป".to_string(), + "<==" => "โ‡".to_string(), + "<==|" => "โค†".to_string(), + "<====" => "โŸธ".to_string(), + "<====|" => "โŸฝ".to_string(), + "<-->" => "โŸท".to_string(), + "<==>" => "โ‡”".to_string(), + _ => self, + } } - } - fn substitute_emoji(self) -> Self { - match self.as_str() { - ":)" => "๐Ÿ™‚".to_string(), - ";)" => "๐Ÿ˜‰".to_string(), - ":D" => "๐Ÿ˜ƒ".to_string(), - "^^" => "๐Ÿ˜„".to_string(), - "=)" => "๐Ÿ˜Š".to_string(), - ":(" => "๐Ÿ™".to_string(), - ";(" => "๐Ÿ˜ข".to_string(), - ":P" => "๐Ÿ˜›".to_string(), - ";P" => "๐Ÿ˜œ".to_string(), - "O:)" => "๐Ÿ˜‡".to_string(), - ":O" => "๐Ÿ˜จ".to_string(), - ">:(" => "๐Ÿคฌ".to_string(), - ":/" => "๐Ÿ˜•".to_string(), - "3:)" => "๐Ÿ˜ˆ".to_string(), - "--" => "๐Ÿ˜‘".to_string(), - "<3" => "โค".to_string(), - "(Y)" => "๐Ÿ‘".to_string(), - "(N)" => "๐Ÿ‘Ž".to_string(), - _ => self, + fn substitute_emoji(self) -> Self { + match self.as_str() { + ":)" => "๐Ÿ™‚".to_string(), + ";)" => "๐Ÿ˜‰".to_string(), + ":D" => "๐Ÿ˜ƒ".to_string(), + "^^" => "๐Ÿ˜„".to_string(), + "=)" => "๐Ÿ˜Š".to_string(), + ":(" => "๐Ÿ™".to_string(), + ";(" => "๐Ÿ˜ข".to_string(), + ":P" => "๐Ÿ˜›".to_string(), + ";P" => "๐Ÿ˜œ".to_string(), + "O:)" => "๐Ÿ˜‡".to_string(), + ":O" => "๐Ÿ˜จ".to_string(), + ">:(" => "๐Ÿคฌ".to_string(), + ":/" => "๐Ÿ˜•".to_string(), + "3:)" => "๐Ÿ˜ˆ".to_string(), + "--" => "๐Ÿ˜‘".to_string(), + "<3" => "โค".to_string(), + "(Y)" => "๐Ÿ‘".to_string(), + "(N)" => "๐Ÿ‘Ž".to_string(), + _ => self, + } } - } } diff --git a/inline/src/error.rs b/inline/src/error.rs index d33b856c..cbfc52c9 100644 --- a/inline/src/error.rs +++ b/inline/src/error.rs @@ -1,10 +1,8 @@ - /// Error enum for possible inline errors -/// +/// /// Note: Temporary solution until log_id is separated from core #[derive(Debug)] -pub enum InlineError{ - /// Set if either text group, uri or attribute block is not closed properly - ClosingViolation, +pub enum InlineError { + /// Set if either text group, uri or attribute block is not closed properly + ClosingViolation, } - diff --git a/inline/src/lib.rs b/inline/src/lib.rs index a4f07e93..c52caa84 100644 --- a/inline/src/lib.rs +++ b/inline/src/lib.rs @@ -1,4 +1,4 @@ -//! This library provides functionality to get a Unimarkup inline AST from a given string +//! This library provides functionality to get a Unimarkup inline AST from a given string use ast::collect::InlineAst; use error::InlineError; @@ -12,21 +12,21 @@ pub use ast::*; pub use tokenizer::*; /// Function to transform a given string into an AST of Unimarkup inline elements. -/// +/// /// **Note:** The string must not contain blank lines! It is not checked, but will probably lead to false results. -/// +/// /// Returns `InlineError`, if inline constraints are violated by the given string. pub fn parse(content: &str) -> Result { - Ok(content.tokenize()?.collect()) + Ok(content.tokenize()?.collect()) } /// Function to transform a given string into an AST of Unimarkup inline elements. /// The additional offset is used to set the start position of the first inline element. /// This function is useful to get correct element positions inside a Unimarkup document. -/// +/// /// **Note:** The string must not contain blank lines! It is not checked, but will probably lead to false results. -/// +/// /// Returns `InlineError`, if inline constraints are violated by the given string. pub fn parse_with_offset(content: &str, offset: Position) -> Result { - Ok(content.tokenize_with_offset(offset)?.collect()) + Ok(content.tokenize_with_offset(offset)?.collect()) } diff --git a/inline/src/tokenizer/mod.rs b/inline/src/tokenizer/mod.rs index 1a898ef0..ca7382b9 100644 --- a/inline/src/tokenizer/mod.rs +++ b/inline/src/tokenizer/mod.rs @@ -1,9 +1,12 @@ //! This module provides functionality to tokenize a given &str input. //! The resulting list of tokens is a flat tokenized representation. -//! +//! //! e.g. `*text*` --> `[ItalicOpen][Plain][ItalicClose]` -use std::{collections::{HashMap, hash_map::Entry::Vacant}, cmp::min}; +use std::{ + cmp::min, + collections::{hash_map::Entry::Vacant, HashMap}, +}; use unicode_segmentation::{Graphemes, UnicodeSegmentation}; @@ -15,298 +18,361 @@ use crate::error::InlineError; /// Struct to link to the grapheme position of a token in the given input. #[derive(Debug, Default, Clone, PartialEq, Copy)] pub struct Position { - /// Line number in the given input. - pub line: usize, - /// Column in the given input. - pub column: usize, + /// Line number in the given input. + pub line: usize, + /// Column in the given input. + pub column: usize, } /// Trait to convert a given input into a list of tokens. pub trait Tokenizer { - /// Takes an input and converts it into a list of tokens. - /// - /// Returns an error if inline constraints are violated. - fn tokenize(self) -> Result; - - /// Takes an input and an offset to convert the input into a list of tokens, - /// where the first token starts at the given offset. - /// - /// Returns an error if inline constraints are violated. - fn tokenize_with_offset(self, offset: Position) -> Result; + /// Takes an input and converts it into a list of tokens. + /// + /// Returns an error if inline constraints are violated. + fn tokenize(self) -> Result; + + /// Takes an input and an offset to convert the input into a list of tokens, + /// where the first token starts at the given offset. + /// + /// Returns an error if inline constraints are violated. + fn tokenize_with_offset(self, offset: Position) -> Result; } impl Tokenizer for &str { - fn tokenize(self) -> Result { - self.tokenize_with_offset(Position::default()) - } - - fn tokenize_with_offset(self, offset: Position) -> Result { - let mut tokenized = Tokenized::from((self, offset)); - tokenize_until(&mut tokenized, TokenKind::Eoi)?; - // EOI is treated as newline - update_open_map(&mut tokenized, true); - try_closing_fixated_token(&mut tokenized, true); - cleanup_loose_open_tokens(&mut tokenized); - - Ok(tokenized.tokens) - } + fn tokenize(self) -> Result { + self.tokenize_with_offset(Position::default()) + } + + fn tokenize_with_offset(self, offset: Position) -> Result { + let mut tokenized = Tokenized::from((self, offset)); + tokenize_until(&mut tokenized, TokenKind::Eoi)?; + // EOI is treated as newline + update_open_map(&mut tokenized, true); + try_closing_fixated_token(&mut tokenized, true); + cleanup_loose_open_tokens(&mut tokenized); + + Ok(tokenized.tokens) + } } /// Internal structure to keep track of the tokenization process. #[derive(Debug)] struct Tokenized<'a> { - /// Input converted to a grapheme iterator. - graphemes: Graphemes<'a>, - /// List of tokens that were tokenized so far. - tokens: Vec::, - /// Map of open tokens that were not yet closed - open_tokens: HashMap::, - /// The position inside the input of the current token being tokenized. - cur_pos: Position, - /// Flag indicating that a grapheme must be escaped. - escape_active: bool, + /// Input converted to a grapheme iterator. + graphemes: Graphemes<'a>, + /// List of tokens that were tokenized so far. + tokens: Vec, + /// Map of open tokens that were not yet closed + open_tokens: HashMap, + /// The position inside the input of the current token being tokenized. + cur_pos: Position, + /// Flag indicating that a grapheme must be escaped. + escape_active: bool, } impl<'a> From<(&'a str, Position)> for Tokenized<'a> { - fn from((content, offset): (&'a str, Position)) -> Self { - Tokenized { - graphemes: content.graphemes(true), - tokens: Default::default(), - open_tokens: Default::default(), - cur_pos: offset, - escape_active: false, + fn from((content, offset): (&'a str, Position)) -> Self { + Tokenized { + graphemes: content.graphemes(true), + tokens: Default::default(), + open_tokens: Default::default(), + cur_pos: offset, + escape_active: false, + } } - } } /// Function creates tokens until `token_kind` is matched, or end of input is reached. -/// +/// /// Note: The token of kind `token_kind` is also included in the resulting tokens vector. fn tokenize_until(tokenized: &mut Tokenized, token_kind: TokenKind) -> Result<(), InlineError> { - let mut prev_tokens_len = tokenized.tokens.len(); - while let Some(grapheme) = tokenized.graphemes.next() { - update_tokens(tokenized, grapheme)?; - - if tokenized.tokens.len() != prev_tokens_len && !tokenized.tokens.is_empty() { - // Last token excluded, since it is not fixated yet - let last = tokenized.tokens.pop().unwrap(); - if !last.closes_scope() { - update_open_map(tokenized, last.is_space_or_newline()); - try_closing_fixated_token(tokenized, last.is_space_or_newline()); - } - - let last_kind = last.kind; - tokenized.tokens.push(last); + let mut prev_tokens_len = tokenized.tokens.len(); + while let Some(grapheme) = tokenized.graphemes.next() { + update_tokens(tokenized, grapheme)?; + + if tokenized.tokens.len() != prev_tokens_len && !tokenized.tokens.is_empty() { + // Last token excluded, since it is not fixated yet + let last = tokenized.tokens.pop().unwrap(); + if !last.closes_scope() { + update_open_map(tokenized, last.is_space_or_newline()); + try_closing_fixated_token(tokenized, last.is_space_or_newline()); + } - if last_kind == token_kind { - return Ok(()); - } + let last_kind = last.kind; + tokenized.tokens.push(last); + + if last_kind == token_kind { + return Ok(()); + } + } + prev_tokens_len = tokenized.tokens.len(); } - prev_tokens_len = tokenized.tokens.len(); - } - // Brackets must close - if let Some(last) = tokenized.tokens.last() { - if token_kind != TokenKind::Eoi && last.kind != token_kind { - return Err(InlineError::ClosingViolation); + // Brackets must close + if let Some(last) = tokenized.tokens.last() { + if token_kind != TokenKind::Eoi && last.kind != token_kind { + return Err(InlineError::ClosingViolation); + } } - } - Ok(()) + Ok(()) } /// Handles verbatim tokens. fn update_accent(tokenized: &mut Tokenized, grapheme: &str) { - if let Some(last) = tokenized.tokens.last() { - tokenized.cur_pos.column += last.length(); - } - - match tokenized.open_tokens.contains_key(&TokenKind::VerbatimOpen) { - true => { - let new_token = Token{ kind: TokenKind::VerbatimClose, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); - }, - false => { - let new_token = Token{ kind: TokenKind::VerbatimOpen, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); - }, - } + if let Some(last) = tokenized.tokens.last() { + tokenized.cur_pos.column += last.length(); + } + + match tokenized.open_tokens.contains_key(&TokenKind::VerbatimOpen) { + true => { + let new_token = Token { + kind: TokenKind::VerbatimClose, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); + } + false => { + let new_token = Token { + kind: TokenKind::VerbatimOpen, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); + } + } } /// Updates the list of tokens by handling the next grapheme of the input. fn update_tokens(tokenized: &mut Tokenized, grapheme: &str) -> Result<(), InlineError> { - if tokenized.escape_active { - update_escaped(tokenized, grapheme); - tokenized.escape_active = false; - } else { - let single_token_kind = grapheme.as_single_token_kind(); - // Only single grapheme tokens need to be handled here, because only single grapheme is handled per update - match single_token_kind { - SingleTokenKind::Plain => update_plain(tokenized, grapheme), - SingleTokenKind::Newline => update_newline(tokenized, grapheme), - SingleTokenKind::Space => update_space(tokenized, grapheme), - SingleTokenKind::Backslash => { - tokenized.escape_active = true; - }, - // SingleTokenKind::ExclamationMark => todo!(), - // SingleTokenKind::Ampersand => todo!(), - // SingleTokenKind::Colon => todo!(), - // SingleTokenKind::Caret => todo!(), - // SingleTokenKind::Underscore => todo!(), - SingleTokenKind::Asterisk => update_asterisk(tokenized, grapheme), - // SingleTokenKind::Plus => todo!(), - SingleTokenKind::Accent => update_accent(tokenized, grapheme), - SingleTokenKind::LeftSquareBracket => open_text_group(tokenized, grapheme)?, - SingleTokenKind::RightSquareBracket => try_closing_text_group(tokenized, grapheme), + if tokenized.escape_active { + update_escaped(tokenized, grapheme); + tokenized.escape_active = false; + } else { + let single_token_kind = grapheme.as_single_token_kind(); + // Only single grapheme tokens need to be handled here, because only single grapheme is handled per update + match single_token_kind { + SingleTokenKind::Plain => update_plain(tokenized, grapheme), + SingleTokenKind::Newline => update_newline(tokenized, grapheme), + SingleTokenKind::Space => update_space(tokenized, grapheme), + SingleTokenKind::Backslash => { + tokenized.escape_active = true; + } + // SingleTokenKind::ExclamationMark => todo!(), + // SingleTokenKind::Ampersand => todo!(), + // SingleTokenKind::Colon => todo!(), + // SingleTokenKind::Caret => todo!(), + // SingleTokenKind::Underscore => todo!(), + SingleTokenKind::Asterisk => update_asterisk(tokenized, grapheme), + // SingleTokenKind::Plus => todo!(), + SingleTokenKind::Accent => update_accent(tokenized, grapheme), + SingleTokenKind::LeftSquareBracket => open_text_group(tokenized, grapheme)?, + SingleTokenKind::RightSquareBracket => try_closing_text_group(tokenized, grapheme), + } } - } - Ok(()) + Ok(()) } /// Handles text group tokenization by taking precedence over inline formattings. /// This is achieved by recursive tokenization expecting text group close token. -/// +/// /// Note: The recursive approach enforces the closing constraint. fn open_text_group(tokenized: &mut Tokenized, grapheme: &str) -> Result<(), InlineError> { - if let Some(last) = tokenized.tokens.last() { - tokenized.cur_pos.column += last.length(); - } + if let Some(last) = tokenized.tokens.last() { + tokenized.cur_pos.column += last.length(); + } - update_open_map(tokenized, false); - try_closing_fixated_token(tokenized, false); - - // Makes sure to not have formattings over text group borders - let outer_open_tokens = tokenized.open_tokens.clone(); - tokenized.open_tokens = HashMap::default(); + update_open_map(tokenized, false); + try_closing_fixated_token(tokenized, false); - let new_token = Token{ kind: TokenKind::TextGroupOpen, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); + // Makes sure to not have formattings over text group borders + let outer_open_tokens = tokenized.open_tokens.clone(); + tokenized.open_tokens = HashMap::default(); - tokenize_until(tokenized, TokenKind::TextGroupClose)?; + let new_token = Token { + kind: TokenKind::TextGroupOpen, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); - let closing_token = tokenized.tokens.pop().unwrap(); - try_closing_fixated_token(tokenized, true); - cleanup_loose_open_tokens(tokenized); - tokenized.tokens.push(closing_token); + tokenize_until(tokenized, TokenKind::TextGroupClose)?; - tokenized.open_tokens = outer_open_tokens; + let closing_token = tokenized.tokens.pop().unwrap(); + try_closing_fixated_token(tokenized, true); + cleanup_loose_open_tokens(tokenized); + tokenized.tokens.push(closing_token); - Ok(()) + tokenized.open_tokens = outer_open_tokens; + + Ok(()) } /// Function to close a text group if possible. fn try_closing_text_group(tokenized: &mut Tokenized, grapheme: &str) { - if tokenized.open_tokens.remove(&TokenKind::TextGroupOpen).is_some() { - if let Some(last) = tokenized.tokens.last() { - tokenized.cur_pos.column += last.length(); - } - tokenized.tokens.push(Token{ kind: TokenKind::TextGroupClose, content: grapheme.to_string(), position: tokenized.cur_pos }); - } else if let Some(last) = tokenized.tokens.last_mut() { - tokenized.cur_pos.column += last.length(); - let new_token = Token{ kind: TokenKind::Plain, content: grapheme.to_string(), position: tokenized.cur_pos }; - - if last.kind == TokenKind::Plain { - last.content.push_str(&new_token.content); - } else { - tokenized.tokens.push(new_token); + if tokenized + .open_tokens + .remove(&TokenKind::TextGroupOpen) + .is_some() + { + if let Some(last) = tokenized.tokens.last() { + tokenized.cur_pos.column += last.length(); + } + tokenized.tokens.push(Token { + kind: TokenKind::TextGroupClose, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }); + } else if let Some(last) = tokenized.tokens.last_mut() { + tokenized.cur_pos.column += last.length(); + let new_token = Token { + kind: TokenKind::Plain, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + + if last.kind == TokenKind::Plain { + last.content.push_str(&new_token.content); + } else { + tokenized.tokens.push(new_token); + } } - } } /// Function removes any dangling open token between open/close tokens of the last fix token, if it is a closing one. fn try_closing_fixated_token(tokenized: &mut Tokenized, next_token_is_space_or_newline: bool) { - if let Some(mut last) = tokenized.tokens.pop() { - let open_index; - let mut updated_open_tokens = HashMap::new(); - match last.kind { - TokenKind::BoldClose => { - if let Some(index) = tokenized.open_tokens.remove(&TokenKind::BoldOpen) { - open_index = index; - } else { - open_index = tokenized.open_tokens.remove(&TokenKind::BoldItalicOpen).expect("Closing token requires open token"); - let open_token = tokenized.tokens.get_mut(open_index).expect("Got token index from hashmap"); - open_token.kind = TokenKind::ItalicOpen; - open_token.content = TokenKind::ItalicOpen.as_str().to_string(); - updated_open_tokens.insert(open_token.kind, open_index); - let new_pos = Position { line: open_token.position.line, column: open_token.position.column + open_token.length() }; - // +1 because the inner token gets closed first - tokenized.tokens.insert(open_index + 1, Token { - kind: TokenKind::BoldOpen, content: TokenKind::BoldOpen.as_str().to_string(), position: new_pos - }); - } - }, - TokenKind::ItalicClose => { - if let Some(index) = tokenized.open_tokens.remove(&TokenKind::ItalicOpen) { - open_index = index; - } else if let Some(index) = tokenized.open_tokens.remove(&TokenKind::BoldItalicOpen) { - open_index = index; - let open_token = tokenized.tokens.get_mut(open_index).expect("Got token index from hashmap"); - open_token.kind = TokenKind::BoldOpen; - open_token.content = TokenKind::BoldOpen.as_str().to_string(); - updated_open_tokens.insert(open_token.kind, open_index); - let new_pos = Position { line: open_token.position.line, column: open_token.position.column + open_token.length() }; - // +1 because the inner token gets closed first - tokenized.tokens.insert(open_index + 1, Token { - kind: TokenKind::ItalicOpen, content: TokenKind::ItalicOpen.as_str().to_string(), position: new_pos - }); - } else { - // ItalicClose kept open for possible BoldClose, but stayed at ItalicClose - if next_token_is_space_or_newline { - last.kind = TokenKind::Plain; - if let Some(prev) = tokenized.tokens.last_mut() { - if prev.kind == TokenKind::Plain { - prev.content.push_str(&last.content); - return; + if let Some(mut last) = tokenized.tokens.pop() { + let open_index; + let mut updated_open_tokens = HashMap::new(); + match last.kind { + TokenKind::BoldClose => { + if let Some(index) = tokenized.open_tokens.remove(&TokenKind::BoldOpen) { + open_index = index; + } else { + open_index = tokenized + .open_tokens + .remove(&TokenKind::BoldItalicOpen) + .expect("Closing token requires open token"); + let open_token = tokenized + .tokens + .get_mut(open_index) + .expect("Got token index from hashmap"); + open_token.kind = TokenKind::ItalicOpen; + open_token.content = TokenKind::ItalicOpen.as_str().to_string(); + updated_open_tokens.insert(open_token.kind, open_index); + let new_pos = Position { + line: open_token.position.line, + column: open_token.position.column + open_token.length(), + }; + // +1 because the inner token gets closed first + tokenized.tokens.insert( + open_index + 1, + Token { + kind: TokenKind::BoldOpen, + content: TokenKind::BoldOpen.as_str().to_string(), + position: new_pos, + }, + ); } - } - } else { - last.kind = TokenKind::ItalicOpen; - tokenized.open_tokens.insert(last.kind, tokenized.tokens.len()); } - tokenized.tokens.push(last); - return; - } - }, - TokenKind::BoldItalicClose => { - if let Some(index) = tokenized.open_tokens.remove(&TokenKind::BoldItalicOpen) { - open_index = index; - } else { - let bold_index = tokenized.open_tokens.remove(&TokenKind::BoldOpen).expect("Bold open must exist for bold-italic closing"); - let italic_index = tokenized.open_tokens.remove(&TokenKind::ItalicOpen).expect("Italic open must exist for bold-italic closing"); - open_index = min(bold_index, italic_index); - } - }, - TokenKind::VerbatimClose => { open_index = tokenized.open_tokens.remove(&TokenKind::VerbatimOpen).unwrap(); }, - // TokenKind::EmojiClose => { open_index = tokenized.open_tokens.remove(&TokenKind::EmojiOpen).unwrap(); }, - // TokenKind::CommentClose => { open_index = tokenized.open_tokens.remove(&TokenKind::CommentOpen).unwrap(); }, - _ => { - tokenized.tokens.push(last); - return; - }, - } + TokenKind::ItalicClose => { + if let Some(index) = tokenized.open_tokens.remove(&TokenKind::ItalicOpen) { + open_index = index; + } else if let Some(index) = tokenized.open_tokens.remove(&TokenKind::BoldItalicOpen) + { + open_index = index; + let open_token = tokenized + .tokens + .get_mut(open_index) + .expect("Got token index from hashmap"); + open_token.kind = TokenKind::BoldOpen; + open_token.content = TokenKind::BoldOpen.as_str().to_string(); + updated_open_tokens.insert(open_token.kind, open_index); + let new_pos = Position { + line: open_token.position.line, + column: open_token.position.column + open_token.length(), + }; + // +1 because the inner token gets closed first + tokenized.tokens.insert( + open_index + 1, + Token { + kind: TokenKind::ItalicOpen, + content: TokenKind::ItalicOpen.as_str().to_string(), + position: new_pos, + }, + ); + } else { + // ItalicClose kept open for possible BoldClose, but stayed at ItalicClose + if next_token_is_space_or_newline { + last.kind = TokenKind::Plain; + if let Some(prev) = tokenized.tokens.last_mut() { + if prev.kind == TokenKind::Plain { + prev.content.push_str(&last.content); + return; + } + } + } else { + last.kind = TokenKind::ItalicOpen; + tokenized + .open_tokens + .insert(last.kind, tokenized.tokens.len()); + } + tokenized.tokens.push(last); + return; + } + } + TokenKind::BoldItalicClose => { + if let Some(index) = tokenized.open_tokens.remove(&TokenKind::BoldItalicOpen) { + open_index = index; + } else { + let bold_index = tokenized + .open_tokens + .remove(&TokenKind::BoldOpen) + .expect("Bold open must exist for bold-italic closing"); + let italic_index = tokenized + .open_tokens + .remove(&TokenKind::ItalicOpen) + .expect("Italic open must exist for bold-italic closing"); + open_index = min(bold_index, italic_index); + } + } + TokenKind::VerbatimClose => { + open_index = tokenized + .open_tokens + .remove(&TokenKind::VerbatimOpen) + .unwrap(); + } + // TokenKind::EmojiClose => { open_index = tokenized.open_tokens.remove(&TokenKind::EmojiOpen).unwrap(); }, + // TokenKind::CommentClose => { open_index = tokenized.open_tokens.remove(&TokenKind::CommentOpen).unwrap(); }, + _ => { + tokenized.tokens.push(last); + return; + } + } - tokenized.tokens.push(last); + tokenized.tokens.push(last); - for (kind, index) in &tokenized.open_tokens.clone() { - if *index < open_index { - updated_open_tokens.insert(*kind, *index); - } else if tokenized.tokens.len() > *index { - try_plain_token_merge(tokenized, *index); - } + for (kind, index) in &tokenized.open_tokens.clone() { + if *index < open_index { + updated_open_tokens.insert(*kind, *index); + } else if tokenized.tokens.len() > *index { + try_plain_token_merge(tokenized, *index); + } + } + tokenized.open_tokens = updated_open_tokens; } - tokenized.open_tokens = updated_open_tokens; - } } /// Enteres the last fixed token into the open token hashmap, if it is an open token. -/// +/// /// Note: Enforces open token contraints, changing a token to plain if a constraint is violated. fn update_open_map(tokenized: &mut Tokenized, next_token_is_space_or_newline: bool) { - if let Some(mut prev) = tokenized.tokens.pop() { - // Makes sure that no two open tokens of the same kind are before one closing one - if let Vacant(e) = tokenized.open_tokens.entry(prev.kind) { - match prev.kind { + if let Some(mut prev) = tokenized.tokens.pop() { + // Makes sure that no two open tokens of the same kind are before one closing one + if let Vacant(e) = tokenized.open_tokens.entry(prev.kind) { + match prev.kind { TokenKind::BoldOpen | TokenKind::ItalicOpen | TokenKind::BoldItalicOpen @@ -322,306 +388,408 @@ fn update_open_map(tokenized: &mut Tokenized, next_token_is_space_or_newline: bo | TokenKind::TextGroupOpen => { e.insert(tokenized.tokens.len()); }, _ => { }, } - } else { - prev.kind = TokenKind::Plain; - } + } else { + prev.kind = TokenKind::Plain; + } - // Try plain merge - if let Some(prev_prev) = tokenized.tokens.last_mut() { - if prev_prev.kind == TokenKind::Plain && prev.kind == TokenKind::Plain { - prev_prev.content.push_str(&prev.content); - } else { - tokenized.tokens.push(prev); - } - } else { - tokenized.tokens.push(prev); + // Try plain merge + if let Some(prev_prev) = tokenized.tokens.last_mut() { + if prev_prev.kind == TokenKind::Plain && prev.kind == TokenKind::Plain { + prev_prev.content.push_str(&prev.content); + } else { + tokenized.tokens.push(prev); + } + } else { + tokenized.tokens.push(prev); + } } - } } /// Handles plain text. fn update_plain(tokenized: &mut Tokenized, grapheme: &str) { - if let Some(last) = tokenized.tokens.last_mut() { - if last.kind == TokenKind::Plain { - last.content.push_str(grapheme); + if let Some(last) = tokenized.tokens.last_mut() { + if last.kind == TokenKind::Plain { + last.content.push_str(grapheme); + } else { + tokenized.cur_pos.column += last.length(); + let new_token = Token { + kind: TokenKind::Plain, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); + } } else { - tokenized.cur_pos.column += last.length(); - let new_token = Token{ kind: TokenKind::Plain, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); + let new_token = Token { + kind: TokenKind::Plain, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); } - } else { - let new_token = Token{ kind: TokenKind::Plain, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); - } } /// Handles escaped graphemes. fn update_escaped(tokenized: &mut Tokenized, grapheme: &str) { - if let Some(last) = tokenized.tokens.last() { - tokenized.cur_pos.column += last.length(); - } - tokenized.tokens.push(Token{ kind: TokenKind::EscapedGrapheme, content: grapheme.to_string(), position: tokenized.cur_pos }); - tokenized.cur_pos.column += 1; // add backslash length offset for next token start + if let Some(last) = tokenized.tokens.last() { + tokenized.cur_pos.column += last.length(); + } + tokenized.tokens.push(Token { + kind: TokenKind::EscapedGrapheme, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }); + tokenized.cur_pos.column += 1; // add backslash length offset for next token start } /// Handles graphemes with Unicode whitespace property that are not a newline. fn update_space(tokenized: &mut Tokenized, grapheme: &str) { - if let Some(last) = tokenized.tokens.last_mut() { - if last.kind == TokenKind::Space { - last.content.push_str(grapheme); + if let Some(last) = tokenized.tokens.last_mut() { + if last.kind == TokenKind::Space { + last.content.push_str(grapheme); + } else { + tokenized.cur_pos.column += last.length(); + let new_token = Token { + kind: TokenKind::Space, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); + } } else { - tokenized.cur_pos.column += last.length(); - let new_token = Token{ kind: TokenKind::Space, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); + let new_token = Token { + kind: TokenKind::Space, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); } - } else { - let new_token = Token{ kind: TokenKind::Space, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); - } } /// Handles newlines. fn update_newline(tokenized: &mut Tokenized, grapheme: &str) { - if let Some(last) = tokenized.tokens.last() { - tokenized.cur_pos.column += last.length(); - } - - let new_token = Token{ kind: TokenKind::NewLine, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); - tokenized.cur_pos.line += 1; - tokenized.cur_pos.column = 0; + if let Some(last) = tokenized.tokens.last() { + tokenized.cur_pos.column += last.length(); + } + + let new_token = Token { + kind: TokenKind::NewLine, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); + tokenized.cur_pos.line += 1; + tokenized.cur_pos.column = 0; } /// Handles bold, italic and any combination of them. fn update_asterisk(tokenized: &mut Tokenized, grapheme: &str) { - match tokenized.tokens.pop() { - Some(mut last) => { - if last.kind == TokenKind::ItalicOpen { - last.content.push_str(grapheme); - - if tokenized.open_tokens.get(&TokenKind::BoldOpen).is_some() { - let preceding_token = tokenized.tokens.last().expect("Tokens must not be empty, because open token exists"); - if preceding_token.is_space_or_newline() { - // Close after space is not allowed - last.kind = TokenKind::Plain; - } else { - last.kind = TokenKind::BoldClose; - } - } else { - last.kind = TokenKind::BoldOpen; - } - tokenized.tokens.push(last); - } else if last.kind == TokenKind::BoldOpen { - if tokenized.open_tokens.get(&TokenKind::ItalicOpen).is_some() { - // Handles cases like `*italic***bold**` - let preceding_token = tokenized.tokens.last().expect("Tokens must not be empty, because open token exists"); - if preceding_token.is_space_or_newline() { - // If Space is before `***`, it is split into [plain|italicClose|italicOpen] -> `*before ***after*` = `[io]before *[ic][io]after[ic] - last.kind = TokenKind::Plain; - last.content = TokenKind::ItalicOpen.as_str().to_string(); - tokenized.cur_pos.column += last.length(); - tokenized.tokens.push(last); - - let italic_close_token = Token { kind: TokenKind::ItalicClose, content: TokenKind::ItalicClose.as_str().to_string(), position: tokenized.cur_pos }; - tokenized.cur_pos.column += italic_close_token.length(); - tokenized.tokens.push(italic_close_token); - - let italic_open_token = Token { kind: TokenKind::ItalicOpen, content: TokenKind::ItalicClose.as_str().to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(italic_open_token); - } else { - last.kind = TokenKind::ItalicClose; - last.content = TokenKind::ItalicClose.as_str().to_string(); - tokenized.cur_pos.column += last.length(); - tokenized.tokens.push(last); - - let bold_open_token = Token { kind: TokenKind::BoldOpen, content: TokenKind::BoldOpen.as_str().to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(bold_open_token); - } - } else { - last.kind = TokenKind::BoldItalicOpen; - last.content.push_str(grapheme); - tokenized.tokens.push(last); - } - } else if last.kind == TokenKind::BoldItalicOpen { - // Handles `****` by converting the leftmost `*` to plain. - // If no italic, bold or bolditalic open token is present before, bolditalicopen is kept as is. - // Otherwise, italic, bold or bolditalic closing tokens are taken from the remaining three `*`. - last.kind = TokenKind::Plain; - last.content = TokenKind::ItalicOpen.as_str().to_string(); - tokenized.cur_pos.column += last.length(); - - if (tokenized.open_tokens.contains_key(&TokenKind::ItalicOpen) && tokenized.open_tokens.contains_key(&TokenKind::BoldOpen)) - || tokenized.open_tokens.contains_key(&TokenKind::BoldItalicOpen) { - - tokenized.tokens.push(last); - - let combined_close_token = Token { kind: TokenKind::BoldItalicClose, content: TokenKind::BoldItalicClose.as_str().to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(combined_close_token); - } else if tokenized.open_tokens.contains_key(&TokenKind::ItalicOpen) { - tokenized.tokens.push(last); - - let italic_close_token = Token { kind: TokenKind::ItalicClose, content: TokenKind::ItalicClose.as_str().to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(italic_close_token); - - let bold_open_token = Token { kind: TokenKind::BoldOpen, content: TokenKind::BoldOpen.as_str().to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(bold_open_token); - } else if tokenized.open_tokens.contains_key(&TokenKind::BoldOpen) { - tokenized.tokens.push(last); - - let bold_close_token = Token { kind: TokenKind::BoldClose, content: TokenKind::BoldClose.as_str().to_string(), position: tokenized.cur_pos }; - tokenized.cur_pos.column += bold_close_token.length(); - tokenized.tokens.push(bold_close_token); - - let italic_open_token = Token { kind: TokenKind::ItalicOpen, content: TokenKind::ItalicOpen.as_str().to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(italic_open_token); - } else { - match tokenized.tokens.last_mut() { - Some(prev) => { - if prev.kind == TokenKind::Plain { - prev.content.push_str(&last.content); - } else { + match tokenized.tokens.pop() { + Some(mut last) => { + if last.kind == TokenKind::ItalicOpen { + last.content.push_str(grapheme); + + if tokenized.open_tokens.get(&TokenKind::BoldOpen).is_some() { + let preceding_token = tokenized + .tokens + .last() + .expect("Tokens must not be empty, because open token exists"); + if preceding_token.is_space_or_newline() { + // Close after space is not allowed + last.kind = TokenKind::Plain; + } else { + last.kind = TokenKind::BoldClose; + } + } else { + last.kind = TokenKind::BoldOpen; + } tokenized.tokens.push(last); - } - }, - None => { - tokenized.tokens.push(last); - }, - } + } else if last.kind == TokenKind::BoldOpen { + if tokenized.open_tokens.get(&TokenKind::ItalicOpen).is_some() { + // Handles cases like `*italic***bold**` + let preceding_token = tokenized + .tokens + .last() + .expect("Tokens must not be empty, because open token exists"); + if preceding_token.is_space_or_newline() { + // If Space is before `***`, it is split into [plain|italicClose|italicOpen] -> `*before ***after*` = `[io]before *[ic][io]after[ic] + last.kind = TokenKind::Plain; + last.content = TokenKind::ItalicOpen.as_str().to_string(); + tokenized.cur_pos.column += last.length(); + tokenized.tokens.push(last); + + let italic_close_token = Token { + kind: TokenKind::ItalicClose, + content: TokenKind::ItalicClose.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.cur_pos.column += italic_close_token.length(); + tokenized.tokens.push(italic_close_token); + + let italic_open_token = Token { + kind: TokenKind::ItalicOpen, + content: TokenKind::ItalicClose.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(italic_open_token); + } else { + last.kind = TokenKind::ItalicClose; + last.content = TokenKind::ItalicClose.as_str().to_string(); + tokenized.cur_pos.column += last.length(); + tokenized.tokens.push(last); + + let bold_open_token = Token { + kind: TokenKind::BoldOpen, + content: TokenKind::BoldOpen.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(bold_open_token); + } + } else { + last.kind = TokenKind::BoldItalicOpen; + last.content.push_str(grapheme); + tokenized.tokens.push(last); + } + } else if last.kind == TokenKind::BoldItalicOpen { + // Handles `****` by converting the leftmost `*` to plain. + // If no italic, bold or bolditalic open token is present before, bolditalicopen is kept as is. + // Otherwise, italic, bold or bolditalic closing tokens are taken from the remaining three `*`. + last.kind = TokenKind::Plain; + last.content = TokenKind::ItalicOpen.as_str().to_string(); + tokenized.cur_pos.column += last.length(); - let combined_open_token = Token { kind: TokenKind::BoldItalicOpen, content: TokenKind::BoldItalicOpen.as_str().to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(combined_open_token); - } - } else if last.kind == TokenKind::ItalicClose { - if tokenized.open_tokens.contains_key(&TokenKind::BoldItalicOpen) - || tokenized.open_tokens.contains_key(&TokenKind::BoldOpen) { - last.kind = TokenKind::BoldClose; - last.content.push_str(grapheme); - tokenized.tokens.push(last); - } else { - last.kind = TokenKind::BoldOpen; - last.content.push_str(grapheme); - tokenized.tokens.push(last); - } - } else if last.kind == TokenKind::BoldClose { - if tokenized.open_tokens.contains_key(&TokenKind::BoldItalicOpen) { - last.content.push_str(grapheme); - last.kind = TokenKind::BoldItalicClose; - tokenized.tokens.push(last); - } else { - match tokenized.open_tokens.get(&TokenKind::ItalicOpen) { - Some(italic_index) => { - let bold_index = tokenized.open_tokens.get(&TokenKind::BoldOpen).unwrap(); - if italic_index < bold_index { - last.kind = TokenKind::BoldClose; - last.content = TokenKind::BoldClose.as_str().to_string(); + if (tokenized.open_tokens.contains_key(&TokenKind::ItalicOpen) + && tokenized.open_tokens.contains_key(&TokenKind::BoldOpen)) + || tokenized + .open_tokens + .contains_key(&TokenKind::BoldItalicOpen) + { + tokenized.tokens.push(last); + + let combined_close_token = Token { + kind: TokenKind::BoldItalicClose, + content: TokenKind::BoldItalicClose.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(combined_close_token); + } else if tokenized.open_tokens.contains_key(&TokenKind::ItalicOpen) { + tokenized.tokens.push(last); + + let italic_close_token = Token { + kind: TokenKind::ItalicClose, + content: TokenKind::ItalicClose.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(italic_close_token); + + let bold_open_token = Token { + kind: TokenKind::BoldOpen, + content: TokenKind::BoldOpen.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(bold_open_token); + } else if tokenized.open_tokens.contains_key(&TokenKind::BoldOpen) { + tokenized.tokens.push(last); + + let bold_close_token = Token { + kind: TokenKind::BoldClose, + content: TokenKind::BoldClose.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.cur_pos.column += bold_close_token.length(); + tokenized.tokens.push(bold_close_token); + + let italic_open_token = Token { + kind: TokenKind::ItalicOpen, + content: TokenKind::ItalicOpen.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(italic_open_token); + } else { + match tokenized.tokens.last_mut() { + Some(prev) => { + if prev.kind == TokenKind::Plain { + prev.content.push_str(&last.content); + } else { + tokenized.tokens.push(last); + } + } + None => { + tokenized.tokens.push(last); + } + } + + let combined_open_token = Token { + kind: TokenKind::BoldItalicOpen, + content: TokenKind::BoldItalicOpen.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(combined_open_token); + } + } else if last.kind == TokenKind::ItalicClose { + if tokenized + .open_tokens + .contains_key(&TokenKind::BoldItalicOpen) + || tokenized.open_tokens.contains_key(&TokenKind::BoldOpen) + { + last.kind = TokenKind::BoldClose; + last.content.push_str(grapheme); + tokenized.tokens.push(last); + } else { + last.kind = TokenKind::BoldOpen; + last.content.push_str(grapheme); + tokenized.tokens.push(last); + } + } else if last.kind == TokenKind::BoldClose { + if tokenized + .open_tokens + .contains_key(&TokenKind::BoldItalicOpen) + { + last.content.push_str(grapheme); + last.kind = TokenKind::BoldItalicClose; + tokenized.tokens.push(last); + } else { + match tokenized.open_tokens.get(&TokenKind::ItalicOpen) { + Some(italic_index) => { + let bold_index = + tokenized.open_tokens.get(&TokenKind::BoldOpen).unwrap(); + if italic_index < bold_index { + last.kind = TokenKind::BoldClose; + last.content = TokenKind::BoldClose.as_str().to_string(); + tokenized.cur_pos.column += last.length(); + tokenized.tokens.push(last); + let new_token = Token { + kind: TokenKind::ItalicClose, + content: TokenKind::ItalicClose.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); + } else { + last.kind = TokenKind::ItalicClose; + last.content = TokenKind::ItalicClose.as_str().to_string(); + tokenized.cur_pos.column += last.length(); + tokenized.tokens.push(last); + let new_token = Token { + kind: TokenKind::BoldClose, + content: TokenKind::BoldClose.as_str().to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); + } + } + None => { + // Handles `**bold***italic*` -> [bo]bold[bc][io]italic[ic] + tokenized.cur_pos.column += last.length(); + tokenized.tokens.push(last); + let new_token = Token { + kind: TokenKind::ItalicOpen, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); + } + } + } + } else if last.kind == TokenKind::BoldItalicClose { + // Handles `***bold & italic****italic*` -> [bio]bold & italic[bic][io]italic[ic] tokenized.cur_pos.column += last.length(); tokenized.tokens.push(last); - let new_token = Token{ - kind: TokenKind::ItalicClose, - content: TokenKind::ItalicClose.as_str().to_string(), - position: tokenized.cur_pos + let new_token = Token { + kind: TokenKind::ItalicOpen, + content: grapheme.to_string(), + position: tokenized.cur_pos, }; tokenized.tokens.push(new_token); - } else { - last.kind = TokenKind::ItalicClose; - last.content = TokenKind::ItalicClose.as_str().to_string(); + } else { + let new_token; tokenized.cur_pos.column += last.length(); + if tokenized.open_tokens.contains_key(&TokenKind::ItalicOpen) + || tokenized.open_tokens.contains_key(&TokenKind::BoldOpen) + || tokenized + .open_tokens + .contains_key(&TokenKind::BoldItalicOpen) + { + if last.is_space_or_newline() { + // Closing not allowed after space + new_token = Token { + kind: TokenKind::ItalicOpen, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + } else { + new_token = Token { + kind: TokenKind::ItalicClose, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + } + } else { + new_token = Token { + kind: TokenKind::ItalicOpen, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + } + tokenized.tokens.push(last); - let new_token = Token{ - kind: TokenKind::BoldClose, - content: TokenKind::BoldClose.as_str().to_string(), - position: tokenized.cur_pos - }; tokenized.tokens.push(new_token); - } - }, - None => { - // Handles `**bold***italic*` -> [bo]bold[bc][io]italic[ic] - tokenized.cur_pos.column += last.length(); - tokenized.tokens.push(last); - let new_token = Token{ kind: TokenKind::ItalicOpen, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); } - } } - } else if last.kind == TokenKind::BoldItalicClose { - // Handles `***bold & italic****italic*` -> [bio]bold & italic[bic][io]italic[ic] - tokenized.cur_pos.column += last.length(); - tokenized.tokens.push(last); - let new_token = Token{ kind: TokenKind::ItalicOpen, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); - } else { - let new_token; - tokenized.cur_pos.column += last.length(); - if tokenized.open_tokens.contains_key(&TokenKind::ItalicOpen) - || tokenized.open_tokens.contains_key(&TokenKind::BoldOpen) - || tokenized.open_tokens.contains_key(&TokenKind::BoldItalicOpen) { - - if last.is_space_or_newline() { - // Closing not allowed after space - new_token = Token{ kind: TokenKind::ItalicOpen, content: grapheme.to_string(), position: tokenized.cur_pos }; - } else { - new_token = Token{ kind: TokenKind::ItalicClose, content: grapheme.to_string(), position: tokenized.cur_pos }; - } - } else { - new_token = Token{ kind: TokenKind::ItalicOpen, content: grapheme.to_string(), position: tokenized.cur_pos }; + None => { + let new_token = Token { + kind: TokenKind::ItalicOpen, + content: grapheme.to_string(), + position: tokenized.cur_pos, + }; + tokenized.tokens.push(new_token); } - - tokenized.tokens.push(last); - tokenized.tokens.push(new_token); - } - }, - None => { - let new_token = Token{ kind: TokenKind::ItalicOpen, content: grapheme.to_string(), position: tokenized.cur_pos }; - tokenized.tokens.push(new_token); - }, - } + } } /// Cleans up open tokens. -/// +/// /// Remaining open tokens that have no matching close token get converted to plain. -/// Neighboring plain tokens get merged with the open token. +/// Neighboring plain tokens get merged with the open token. fn cleanup_loose_open_tokens(tokenized: &mut Tokenized) { - let open_tokens = tokenized.open_tokens.clone(); - let mut open_indizes: Vec<_> = open_tokens.values().collect(); - open_indizes.sort(); - open_indizes.reverse(); - - for index in open_indizes { - try_plain_token_merge(tokenized, *index); - } + let open_tokens = tokenized.open_tokens.clone(); + let mut open_indizes: Vec<_> = open_tokens.values().collect(); + open_indizes.sort(); + open_indizes.reverse(); + + for index in open_indizes { + try_plain_token_merge(tokenized, *index); + } } /// Function that tries to convert a token to `Plain` /// and merge it with previous and/or next token, if they are also `Plain`. fn try_plain_token_merge(tokenized: &mut Tokenized, index: usize) { - if index >= tokenized.tokens.len() { - return; - } - - let mut token = tokenized.tokens.remove(index); - token.kind = TokenKind::Plain; - if index < tokenized.tokens.len() { - let next_token = tokenized.tokens.remove(index); - if next_token.kind == TokenKind::Plain { - token.content.push_str(&next_token.content); - } else { - tokenized.tokens.insert(index, next_token); + if index >= tokenized.tokens.len() { + return; } - } - if index > 0 { - if let Some(prev_token) = tokenized.tokens.get_mut(index - 1) { - if prev_token.kind == TokenKind::Plain { - prev_token.content.push_str(&token.content); - } else { - tokenized.tokens.insert(index, token); - } + let mut token = tokenized.tokens.remove(index); + token.kind = TokenKind::Plain; + if index < tokenized.tokens.len() { + let next_token = tokenized.tokens.remove(index); + if next_token.kind == TokenKind::Plain { + token.content.push_str(&next_token.content); + } else { + tokenized.tokens.insert(index, next_token); + } + } + + if index > 0 { + if let Some(prev_token) = tokenized.tokens.get_mut(index - 1) { + if prev_token.kind == TokenKind::Plain { + prev_token.content.push_str(&token.content); + } else { + tokenized.tokens.insert(index, token); + } + } else { + tokenized.tokens.insert(index, token); + } } else { - tokenized.tokens.insert(index, token); + tokenized.tokens.insert(index, token); } - } else { - tokenized.tokens.insert(index, token); - } } diff --git a/inline/src/tokenizer/tokens.rs b/inline/src/tokenizer/tokens.rs index 33dafe0a..77572f94 100644 --- a/inline/src/tokenizer/tokens.rs +++ b/inline/src/tokenizer/tokens.rs @@ -11,191 +11,191 @@ pub type Tokens = Vec; /// content and position inside a given input. #[derive(Debug, Default, Clone, PartialEq)] pub struct Token { - /// The token kind identifies the token parts of an Unimarkup inline element - pub kind: TokenKind, - /// The content of the token - pub content: String, - /// The starting position of this token inside a given input - pub position: Position, + /// The token kind identifies the token parts of an Unimarkup inline element + pub kind: TokenKind, + /// The content of the token + pub content: String, + /// The starting position of this token inside a given input + pub position: Position, } impl Token { - /// Returns the content length of a token. - /// The length is the number of Unicode graphemes inside the content. - pub fn length(&self) -> usize { - if self.kind == TokenKind::NewLine { - return 0; + /// Returns the content length of a token. + /// The length is the number of Unicode graphemes inside the content. + pub fn length(&self) -> usize { + if self.kind == TokenKind::NewLine { + return 0; + } + self.content.graphemes(true).count() + } + + /// Shows if a token is of kind space or newline. + pub fn is_space_or_newline(&self) -> bool { + self.kind.is_space_or_newline() + } + + /// Shows if a token is closing a scope inside a given input. + /// Closing scopes may be closing text groups, closing attribute blocks, ... + pub fn closes_scope(&self) -> bool { + self.kind == TokenKind::TextGroupClose } - self.content.graphemes(true).count() - } - - /// Shows if a token is of kind space or newline. - pub fn is_space_or_newline(&self) -> bool { - self.kind.is_space_or_newline() - } - - /// Shows if a token is closing a scope inside a given input. - /// Closing scopes may be closing text groups, closing attribute blocks, ... - pub fn closes_scope(&self) -> bool { - self.kind == TokenKind::TextGroupClose - } } -/// Enum defining all special single graphemes understood by Unimarkup. +/// Enum defining all special single graphemes understood by Unimarkup. #[derive(Debug, Clone, PartialEq)] pub enum SingleTokenKind { - /// Default kind for all non-special graphemes. - Plain, - /// Represents a newline grapheme. - Newline, - /// Represents a grapheme that has the Unicode whitespace property and is not a newline. - Space, - /// Represents `\`. - Backslash, - // ExclamationMark, - // Ampersand, - // Colon, - // Caret, - // Underscore, - /// Represents `*`. - Asterisk, - // Plus, - /// Represents `` ` ``. - Accent, - /// Represents `[`. - LeftSquareBracket, - /// Represents `]`. - RightSquareBracket, + /// Default kind for all non-special graphemes. + Plain, + /// Represents a newline grapheme. + Newline, + /// Represents a grapheme that has the Unicode whitespace property and is not a newline. + Space, + /// Represents `\`. + Backslash, + // ExclamationMark, + // Ampersand, + // Colon, + // Caret, + // Underscore, + /// Represents `*`. + Asterisk, + // Plus, + /// Represents `` ` ``. + Accent, + /// Represents `[`. + LeftSquareBracket, + /// Represents `]`. + RightSquareBracket, } /// Enum representing tokens that are part of Unimarkup inline elements. #[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)] pub enum TokenKind { - /// Represents the open part of bold inline formatting. - BoldOpen, - /// Represents the closing part of bold inline formatting. - BoldClose, - /// Represents the open part of italic inline formatting. - ItalicOpen, - /// Represents the closing part of italic inline formatting. - ItalicClose, - /// Represents the combined open part of bold and italic inline formatting. - BoldItalicOpen, - /// Represents the combined closing part of bold and italic inline formatting. - BoldItalicClose, - /// Represents the open part of verbatim inline formatting. - VerbatimOpen, - /// Represents the closing part of verbatim inline formatting. - VerbatimClose, - /// Represents a plain text part. - Plain, - /// Represents the open part of an inline emoji shortcut. - EmojiOpen, - // EmojiClose, - /// Represents a grapheme that is escaped by a backslash. - EscapedGrapheme, - /// Represents a newline as defined by `is_newline()`. - NewLine, - /// Represents a grapheme that has the Unicode whitespace property and is not a newline. - Space, - // CommentOpen, - // CommentClose, - // DirectUnicode, - /// Represents the open part of an inline text group. - TextGroupOpen, - /// Represents the closing part of an inline text group. - TextGroupClose, - /// Represents the end of a given input. - Eoi, + /// Represents the open part of bold inline formatting. + BoldOpen, + /// Represents the closing part of bold inline formatting. + BoldClose, + /// Represents the open part of italic inline formatting. + ItalicOpen, + /// Represents the closing part of italic inline formatting. + ItalicClose, + /// Represents the combined open part of bold and italic inline formatting. + BoldItalicOpen, + /// Represents the combined closing part of bold and italic inline formatting. + BoldItalicClose, + /// Represents the open part of verbatim inline formatting. + VerbatimOpen, + /// Represents the closing part of verbatim inline formatting. + VerbatimClose, + /// Represents a plain text part. + Plain, + /// Represents the open part of an inline emoji shortcut. + EmojiOpen, + // EmojiClose, + /// Represents a grapheme that is escaped by a backslash. + EscapedGrapheme, + /// Represents a newline as defined by `is_newline()`. + NewLine, + /// Represents a grapheme that has the Unicode whitespace property and is not a newline. + Space, + // CommentOpen, + // CommentClose, + // DirectUnicode, + /// Represents the open part of an inline text group. + TextGroupOpen, + /// Represents the closing part of an inline text group. + TextGroupClose, + /// Represents the end of a given input. + Eoi, } impl Default for TokenKind { - /// Returns `Plain` as default token. - fn default() -> Self { - TokenKind::Plain - } + /// Returns `Plain` as default token. + fn default() -> Self { + TokenKind::Plain + } } impl TokenKind { - /// Returns the string representation for a token. - /// - /// e.g. `**` for BoldOpen and BoldClose. - pub fn as_str(&self) -> &'static str { - match *self { - TokenKind::BoldOpen => "**", - TokenKind::BoldClose => TokenKind::BoldOpen.as_str(), - TokenKind::ItalicOpen => "*", - TokenKind::ItalicClose => TokenKind::ItalicOpen.as_str(), - TokenKind::BoldItalicOpen => "***", - TokenKind::BoldItalicClose => TokenKind::BoldItalicOpen.as_str(), - TokenKind::VerbatimOpen => "`", - TokenKind::VerbatimClose => TokenKind::VerbatimOpen.as_str(), - TokenKind::EmojiOpen => "::", - // TokenKind::EmojiClose => TokenKind::EmojiOpen.as_str(), - // TokenKind::CommentOpen => ";;", - // TokenKind::CommentClose => TokenKind::CommentOpen.as_str(), - TokenKind::TextGroupOpen => "[", - TokenKind::TextGroupClose => "]", - - // Note: Below are only placeholder valus - TokenKind::Plain => "", - TokenKind::EscapedGrapheme => "\\", - TokenKind::NewLine => "\n", - TokenKind::Space => " ", - // TokenKind::DirectUnicode => "&U+1F816;", - TokenKind::Eoi => "", + /// Returns the string representation for a token. + /// + /// e.g. `**` for BoldOpen and BoldClose. + pub fn as_str(&self) -> &'static str { + match *self { + TokenKind::BoldOpen => "**", + TokenKind::BoldClose => TokenKind::BoldOpen.as_str(), + TokenKind::ItalicOpen => "*", + TokenKind::ItalicClose => TokenKind::ItalicOpen.as_str(), + TokenKind::BoldItalicOpen => "***", + TokenKind::BoldItalicClose => TokenKind::BoldItalicOpen.as_str(), + TokenKind::VerbatimOpen => "`", + TokenKind::VerbatimClose => TokenKind::VerbatimOpen.as_str(), + TokenKind::EmojiOpen => "::", + // TokenKind::EmojiClose => TokenKind::EmojiOpen.as_str(), + // TokenKind::CommentOpen => ";;", + // TokenKind::CommentClose => TokenKind::CommentOpen.as_str(), + TokenKind::TextGroupOpen => "[", + TokenKind::TextGroupClose => "]", + + // Note: Below are only placeholder valus + TokenKind::Plain => "", + TokenKind::EscapedGrapheme => "\\", + TokenKind::NewLine => "\n", + TokenKind::Space => " ", + // TokenKind::DirectUnicode => "&U+1F816;", + TokenKind::Eoi => "", + } } - } - /// Shows if a token is either a space or newline. - pub fn is_space_or_newline(&self) -> bool { - self == &TokenKind::Space || self == &TokenKind::NewLine - } + /// Shows if a token is either a space or newline. + pub fn is_space_or_newline(&self) -> bool { + self == &TokenKind::Space || self == &TokenKind::NewLine + } } /// Trait to convert a type into a single token. pub trait AsSingleTokenKind { - /// Converts given type into a SingleTokenKind. - /// - /// e.g. `*` --> `SingleTokenKind::Asterisk` - fn as_single_token_kind(&self) -> SingleTokenKind; + /// Converts given type into a SingleTokenKind. + /// + /// e.g. `*` --> `SingleTokenKind::Asterisk` + fn as_single_token_kind(&self) -> SingleTokenKind; } impl AsSingleTokenKind for &str { fn as_single_token_kind(&self) -> SingleTokenKind { - match *self { - "*" => { SingleTokenKind::Asterisk }, - "\\" => { SingleTokenKind::Backslash }, - "`" => { SingleTokenKind::Accent }, - "[" => { SingleTokenKind::LeftSquareBracket }, - "]" => { SingleTokenKind::RightSquareBracket }, - grapheme => { - if grapheme.is_newline() { - return SingleTokenKind::Newline; - } else if grapheme.trim().is_empty() { - return SingleTokenKind::Space; - } - SingleTokenKind::Plain + match *self { + "*" => SingleTokenKind::Asterisk, + "\\" => SingleTokenKind::Backslash, + "`" => SingleTokenKind::Accent, + "[" => SingleTokenKind::LeftSquareBracket, + "]" => SingleTokenKind::RightSquareBracket, + grapheme => { + if grapheme.is_newline() { + return SingleTokenKind::Newline; + } else if grapheme.trim().is_empty() { + return SingleTokenKind::Space; + } + SingleTokenKind::Plain + } } - } } } pub trait Newline { - /// Note: Only temporary solution until rust supports is_newline() per default. - fn is_newline(&self) -> bool; + /// Note: Only temporary solution until rust supports is_newline() per default. + fn is_newline(&self) -> bool; } impl Newline for &str { - /// Treats `\n`, `\r\n` and `\r` as one newline. - fn is_newline(&self) -> bool { - let s = *self; - s == "\n" || s == "\r\n" || s == "\r" - } + /// Treats `\n`, `\r\n` and `\r` as one newline. + fn is_newline(&self) -> bool { + let s = *self; + s == "\n" || s == "\r\n" || s == "\r" + } } impl Newline for String { - fn is_newline(&self) -> bool { - self.as_str().is_newline() - } + fn is_newline(&self) -> bool { + self.as_str().is_newline() + } } diff --git a/inline/tests/ast/bold_italic.rs b/inline/tests/ast/bold_italic.rs index 71e2863d..11c019e7 100644 --- a/inline/tests/ast/bold_italic.rs +++ b/inline/tests/ast/bold_italic.rs @@ -1,52 +1,49 @@ -use unimarkup_inline::{parse, InlineKind, FlatInline, Span, Position, NestedInline}; +use unimarkup_inline::{parse, FlatInline, InlineKind, NestedInline, Position, Span}; use crate::EXPECTED_MSG; #[test] pub fn test_parser__plain_before_italic() { - let input = "plain text *italic*"; - let expected = [ - InlineKind::Plain(FlatInline{ - content: "plain text ".to_string(), - span: Span { - start: Position{ - line: 0, - column: 0 - }, - end: Position{ - line: 0, - column: 11 - } - } - }), - InlineKind::Italic(NestedInline{ - content: vec![InlineKind::Plain(FlatInline{ - content: "italic".to_string(), - span: Span { - start: Position{ - line: 0, - column: 12 - }, - end: Position{ - line: 0, - column: 18 - } - } - })], - span: Span { - start: Position{ - line: 0, - column: 11 - }, - end: Position{ - line: 0, - column: 19 - } - } - }) - ]; + let input = "plain text *italic*"; + let expected = [ + InlineKind::Plain(FlatInline { + content: "plain text ".to_string(), + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { + line: 0, + column: 11, + }, + }, + }), + InlineKind::Italic(NestedInline { + content: vec![InlineKind::Plain(FlatInline { + content: "italic".to_string(), + span: Span { + start: Position { + line: 0, + column: 12, + }, + end: Position { + line: 0, + column: 18, + }, + }, + })], + span: Span { + start: Position { + line: 0, + column: 11, + }, + end: Position { + line: 0, + column: 19, + }, + }, + }), + ]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/inline/tests/ast/escaping.rs b/inline/tests/ast/escaping.rs index c6f36009..7edc9abf 100644 --- a/inline/tests/ast/escaping.rs +++ b/inline/tests/ast/escaping.rs @@ -1,88 +1,63 @@ -use unimarkup_inline::{parse, InlineKind, FlatInline, Span, Position}; +use unimarkup_inline::{parse, FlatInline, InlineKind, Position, Span}; use crate::EXPECTED_MSG; #[test] pub fn test_parser__escape_space() { - let input = "\\ "; - let expected = [ - InlineKind::EscapedSpace(FlatInline{ - content: " ".to_string(), - span: Span { - start: Position{ - line: 0, - column: 0 + let input = "\\ "; + let expected = [InlineKind::EscapedSpace(FlatInline { + content: " ".to_string(), + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { line: 0, column: 2 }, }, - end: Position{ - line: 0, - column: 2 - } - } - }), - ]; + })]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_parser__escape_plain() { - let input = "\\plain"; - let expected = [ - InlineKind::Plain(FlatInline{ - content: "plain".to_string(), - span: Span { - start: Position{ - line: 0, - column: 0 + let input = "\\plain"; + let expected = [InlineKind::Plain(FlatInline { + content: "plain".to_string(), + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { + line: 0, + column: 6, // note that the backslash is taken into account + }, }, - end: Position{ - line: 0, - column: 6 // note that the backslash is taken into account - } - } - }), - ]; + })]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_parser__escape_newline_after_plain() { - let input = "plain\\\n"; - let expected = [ - InlineKind::Plain(FlatInline{ - content: "plain".to_string(), - span: Span { - start: Position{ - line: 0, - column: 0 - }, - end: Position{ - line: 0, - column: 5 - } - } - }), - InlineKind::EscapedNewLine(FlatInline{ - content: "\n".to_string(), - span: Span { - start: Position{ - line: 0, - column: 5 - }, - end: Position{ - line: 0, - column: 7 - } - } - }), - ]; - - let actual = parse(input).unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "plain\\\n"; + let expected = [ + InlineKind::Plain(FlatInline { + content: "plain".to_string(), + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { line: 0, column: 5 }, + }, + }), + InlineKind::EscapedNewLine(FlatInline { + content: "\n".to_string(), + span: Span { + start: Position { line: 0, column: 5 }, + end: Position { line: 0, column: 7 }, + }, + }), + ]; + + let actual = parse(input).unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/inline/tests/ast/mixed.rs b/inline/tests/ast/mixed.rs index e69de29b..8b137891 100644 --- a/inline/tests/ast/mixed.rs +++ b/inline/tests/ast/mixed.rs @@ -0,0 +1 @@ + diff --git a/inline/tests/ast/mixed_nested.rs b/inline/tests/ast/mixed_nested.rs index e69de29b..8b137891 100644 --- a/inline/tests/ast/mixed_nested.rs +++ b/inline/tests/ast/mixed_nested.rs @@ -0,0 +1 @@ + diff --git a/inline/tests/ast/offseted.rs b/inline/tests/ast/offseted.rs index a09ba3a4..c077f624 100644 --- a/inline/tests/ast/offseted.rs +++ b/inline/tests/ast/offseted.rs @@ -1,55 +1,57 @@ -use unimarkup_inline::{InlineKind, FlatInline, Span, Position, parse_with_offset}; +use unimarkup_inline::{parse_with_offset, FlatInline, InlineKind, Position, Span}; use crate::EXPECTED_MSG; - #[test] pub fn test_parser_with_offset__newline_between_plain() { - let offset = Position{ line: 100, column: 2 }; - let input = "line1\nline2"; - let expected = [ - InlineKind::Plain(FlatInline{ - content: "line1".to_string(), - span: Span { - start: Position{ - line: offset.line, - column: offset.column - }, - end: Position{ - line: offset.line, - column: offset.column + 5 - } - } - }), - InlineKind::PlainNewLine(FlatInline{ - content: " ".to_string(), - span: Span { - start: Position{ - line: offset.line, - column: offset.column + 5 - }, - end: Position{ - line: offset.line, - column: offset.column + 5 - } - } - }), - InlineKind::Plain(FlatInline{ - content: "line2".to_string(), - span: Span { - start: Position{ - line: offset.line + 1, - column: 0 - }, - end: Position{ - line: offset.line + 1, - column: 5 - } - } - }), - ]; + let offset = Position { + line: 100, + column: 2, + }; + let input = "line1\nline2"; + let expected = [ + InlineKind::Plain(FlatInline { + content: "line1".to_string(), + span: Span { + start: Position { + line: offset.line, + column: offset.column, + }, + end: Position { + line: offset.line, + column: offset.column + 5, + }, + }, + }), + InlineKind::PlainNewLine(FlatInline { + content: " ".to_string(), + span: Span { + start: Position { + line: offset.line, + column: offset.column + 5, + }, + end: Position { + line: offset.line, + column: offset.column + 5, + }, + }, + }), + InlineKind::Plain(FlatInline { + content: "line2".to_string(), + span: Span { + start: Position { + line: offset.line + 1, + column: 0, + }, + end: Position { + line: offset.line + 1, + column: 5, + }, + }, + }), + ]; - let actual = parse_with_offset(input, offset).unwrap(); + let actual = parse_with_offset(input, offset).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/inline/tests/ast/substitutions.rs b/inline/tests/ast/substitutions.rs index ff6a6bb3..38afc4ad 100644 --- a/inline/tests/ast/substitutions.rs +++ b/inline/tests/ast/substitutions.rs @@ -1,99 +1,76 @@ -use unimarkup_inline::{parse, InlineKind, FlatInline, Span, Position}; +use unimarkup_inline::{parse, FlatInline, InlineKind, Position, Span}; use crate::EXPECTED_MSG; #[test] pub fn test_parser__arrow_substitution() { - let input = "-->"; - let expected = [ - InlineKind::Plain(FlatInline{ - content: "๐Ÿ –".to_string(), - span: Span { - start: Position{ - line: 0, - column: 0 + let input = "-->"; + let expected = [InlineKind::Plain(FlatInline { + content: "๐Ÿ –".to_string(), + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { line: 0, column: 3 }, }, - end: Position{ - line: 0, - column: 3 - } - } - }), - ]; + })]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_parser__emoji_substitution_inside_text() { - let input = "substituted :D smiley"; - let expected = [ - InlineKind::Plain(FlatInline{ - content: "substituted ๐Ÿ˜ƒ smiley".to_string(), - span: Span { - start: Position{ - line: 0, - column: 0 + let input = "substituted :D smiley"; + let expected = [InlineKind::Plain(FlatInline { + content: "substituted ๐Ÿ˜ƒ smiley".to_string(), + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { + line: 0, + column: 21, + }, }, - end: Position{ - line: 0, - column: 21 - } - } - }), - ]; + })]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_parser__smile_emoji_substitution() { - let input = "substituted ^^ smile"; - let expected = [ - InlineKind::Plain(FlatInline{ - content: "substituted ๐Ÿ˜„ smile".to_string(), - span: Span { - start: Position{ - line: 0, - column: 0 + let input = "substituted ^^ smile"; + let expected = [InlineKind::Plain(FlatInline { + content: "substituted ๐Ÿ˜„ smile".to_string(), + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { + line: 0, + column: 20, + }, }, - end: Position{ - line: 0, - column: 20 - } - } - }), - ]; + })]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_parser__expressionless_emoji_substitution() { - let input = "substituted -- expressionless"; - let expected = [ - InlineKind::Plain(FlatInline{ - content: "substituted ๐Ÿ˜‘ expressionless".to_string(), - span: Span { - start: Position{ - line: 0, - column: 0 + let input = "substituted -- expressionless"; + let expected = [InlineKind::Plain(FlatInline { + content: "substituted ๐Ÿ˜‘ expressionless".to_string(), + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { + line: 0, + column: 29, + }, }, - end: Position{ - line: 0, - column: 29 - } - } - }), - ]; + })]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/inline/tests/ast/text_group.rs b/inline/tests/ast/text_group.rs index eb8e4e69..182d1426 100644 --- a/inline/tests/ast/text_group.rs +++ b/inline/tests/ast/text_group.rs @@ -1,40 +1,32 @@ -use unimarkup_inline::{parse, InlineKind, FlatInline, Span, Position, NestedInline, TextGroupAttributes}; +use unimarkup_inline::{ + parse, FlatInline, InlineKind, NestedInline, Position, Span, TextGroupAttributes, +}; use crate::EXPECTED_MSG; #[test] pub fn test_parser__plain_text_group() { - let input = "[group]"; - let expected = [ - InlineKind::TextGroup(NestedInline{ - content: vec![InlineKind::Plain(FlatInline{ - content: "group".to_string(), - span: Span { - start: Position{ - line: 0, - column: 1 - }, - end: Position{ - line: 0, - column: 6 - } - } - })], - span: Span { - start: Position{ - line: 0, - column: 0 + let input = "[group]"; + let expected = [InlineKind::TextGroup( + NestedInline { + content: vec![InlineKind::Plain(FlatInline { + content: "group".to_string(), + span: Span { + start: Position { line: 0, column: 1 }, + end: Position { line: 0, column: 6 }, + }, + })], + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { line: 0, column: 7 }, + }, }, - end: Position{ - line: 0, - column: 7 - } - } - }, - TextGroupAttributes{ ..Default::default() }) - ]; + TextGroupAttributes { + ..Default::default() + }, + )]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/inline/tests/ast/verbatim.rs b/inline/tests/ast/verbatim.rs index 5da8a76a..3a1f3e6b 100644 --- a/inline/tests/ast/verbatim.rs +++ b/inline/tests/ast/verbatim.rs @@ -1,169 +1,119 @@ -use unimarkup_inline::{parse, InlineKind, FlatInline, Span, Position, NestedInline}; +use unimarkup_inline::{parse, FlatInline, InlineKind, NestedInline, Position, Span}; use crate::EXPECTED_MSG; #[test] pub fn test_parser__verbatim_with_escaped_words_and_spaces() { - let input = "`es*ca*ping\\ in\\ner`"; - let expected = [ - InlineKind::Verbatim(NestedInline{ - content: vec![ - InlineKind::Plain(FlatInline{ - content: "es*ca*ping".to_string(), - span: Span { - start: Position{ - line: 0, - column: 1 + let input = "`es*ca*ping\\ in\\ner`"; + let expected = [InlineKind::Verbatim(NestedInline { + content: vec![ + InlineKind::Plain(FlatInline { + content: "es*ca*ping".to_string(), + span: Span { + start: Position { line: 0, column: 1 }, + end: Position { + line: 0, + column: 11, + }, + }, + }), + InlineKind::EscapedSpace(FlatInline { + content: " ".to_string(), + span: Span { + start: Position { + line: 0, + column: 11, + }, + end: Position { + line: 0, + column: 13, + }, + }, + }), + InlineKind::Plain(FlatInline { + content: "inner".to_string(), + span: Span { + start: Position { + line: 0, + column: 13, + }, + end: Position { + line: 0, + column: 19, + }, + }, + }), + ], + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { + line: 0, + column: 20, }, - end: Position{ - line: 0, - column: 11 - } - } - }), - InlineKind::EscapedSpace(FlatInline{ - content: " ".to_string(), - span: Span { - start: Position{ - line: 0, - column: 11 - }, - end: Position{ - line: 0, - column: 13 - } - } - }), - InlineKind::Plain(FlatInline{ - content: "inner".to_string(), - span: Span { - start: Position{ - line: 0, - column: 13 - }, - end: Position{ - line: 0, - column: 19 - } - } - }) - ], - span: Span { - start: Position{ - line: 0, - column: 0 }, - end: Position{ - line: 0, - column: 20 - } - } - }) - ]; + })]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_parser__verbatim_with_plain_newline() { - let input = "`plain\nnewline`"; - let expected = [ - InlineKind::Verbatim(NestedInline{ - content: vec![ - InlineKind::Plain(FlatInline{ - content: "plain".to_string(), - span: Span { - start: Position{ - line: 0, - column: 1 - }, - end: Position{ - line: 0, - column: 6 - } - } - }), - InlineKind::PlainNewLine(FlatInline{ - content: " ".to_string(), - span: Span { - start: Position{ - line: 0, - column: 6 - }, - end: Position{ - line: 0, - column: 6 - } - } - }), - InlineKind::Plain(FlatInline{ - content: "newline".to_string(), - span: Span { - start: Position{ - line: 1, - column: 0 - }, - end: Position{ - line: 1, - column: 7 - } - } - }) - ], - span: Span { - start: Position{ - line: 0, - column: 0 + let input = "`plain\nnewline`"; + let expected = [InlineKind::Verbatim(NestedInline { + content: vec![ + InlineKind::Plain(FlatInline { + content: "plain".to_string(), + span: Span { + start: Position { line: 0, column: 1 }, + end: Position { line: 0, column: 6 }, + }, + }), + InlineKind::PlainNewLine(FlatInline { + content: " ".to_string(), + span: Span { + start: Position { line: 0, column: 6 }, + end: Position { line: 0, column: 6 }, + }, + }), + InlineKind::Plain(FlatInline { + content: "newline".to_string(), + span: Span { + start: Position { line: 1, column: 0 }, + end: Position { line: 1, column: 7 }, + }, + }), + ], + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { line: 1, column: 8 }, }, - end: Position{ - line: 1, - column: 8 - } - } - }) - ]; + })]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_parser__verbatim_with_inner_text_group() { - let input = "`[group]`"; - let expected = [ - InlineKind::Verbatim(NestedInline{ - content: vec![ - InlineKind::Plain(FlatInline{ - content: "[group]".to_string(), - span: Span { - start: Position{ - line: 0, - column: 1 + let input = "`[group]`"; + let expected = [InlineKind::Verbatim(NestedInline { + content: vec![InlineKind::Plain(FlatInline { + content: "[group]".to_string(), + span: Span { + start: Position { line: 0, column: 1 }, + end: Position { line: 0, column: 8 }, }, - end: Position{ - line: 0, - column: 8 - } - } - }) - ], - span: Span { - start: Position{ - line: 0, - column: 0 + })], + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { line: 0, column: 9 }, }, - end: Position{ - line: 0, - column: 9 - } - } - }) - ]; + })]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/inline/tests/ast/whitespaces.rs b/inline/tests/ast/whitespaces.rs index 154e6f0d..a8f4fc45 100644 --- a/inline/tests/ast/whitespaces.rs +++ b/inline/tests/ast/whitespaces.rs @@ -1,54 +1,35 @@ -use unimarkup_inline::{parse, InlineKind, FlatInline, Span, Position}; +use unimarkup_inline::{parse, FlatInline, InlineKind, Position, Span}; use crate::EXPECTED_MSG; - #[test] pub fn test_parser__newline_between_plain() { - let input = "line1\nline2"; - let expected = [ - InlineKind::Plain(FlatInline{ - content: "line1".to_string(), - span: Span { - start: Position{ - line: 0, - column: 0 - }, - end: Position{ - line: 0, - column: 5 - } - } - }), - InlineKind::PlainNewLine(FlatInline{ - content: " ".to_string(), - span: Span { - start: Position{ - line: 0, - column: 5 - }, - end: Position{ - line: 0, - column: 5 - } - } - }), - InlineKind::Plain(FlatInline{ - content: "line2".to_string(), - span: Span { - start: Position{ - line: 1, - column: 0 - }, - end: Position{ - line: 1, - column: 5 - } - } - }), - ]; + let input = "line1\nline2"; + let expected = [ + InlineKind::Plain(FlatInline { + content: "line1".to_string(), + span: Span { + start: Position { line: 0, column: 0 }, + end: Position { line: 0, column: 5 }, + }, + }), + InlineKind::PlainNewLine(FlatInline { + content: " ".to_string(), + span: Span { + start: Position { line: 0, column: 5 }, + end: Position { line: 0, column: 5 }, + }, + }), + InlineKind::Plain(FlatInline { + content: "line2".to_string(), + span: Span { + start: Position { line: 1, column: 0 }, + end: Position { line: 1, column: 5 }, + }, + }), + ]; - let actual = parse(input).unwrap(); + let actual = parse(input).unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/inline/tests/tests.rs b/inline/tests/tests.rs index bd65ad98..95afb1b1 100644 --- a/inline/tests/tests.rs +++ b/inline/tests/tests.rs @@ -1,26 +1,25 @@ - pub const EXPECTED_MSG: &str = "actual(left) != expected(right)"; #[allow(non_snake_case)] mod ast { - mod bold_italic; - mod escaping; - mod mixed_nested; - mod mixed; - mod offseted; - mod substitutions; - mod text_group; - mod verbatim; - mod whitespaces; + mod bold_italic; + mod escaping; + mod mixed; + mod mixed_nested; + mod offseted; + mod substitutions; + mod text_group; + mod verbatim; + mod whitespaces; } #[allow(non_snake_case)] mod tokenizer { - mod accent; - mod asterisk; - mod backslash; - mod mixed_nested; - mod mixed; - mod text_group; - mod whitespaces; + mod accent; + mod asterisk; + mod backslash; + mod mixed; + mod mixed_nested; + mod text_group; + mod whitespaces; } diff --git a/inline/tests/tokenizer/accent.rs b/inline/tests/tokenizer/accent.rs index f0dd5d7f..c39efb48 100644 --- a/inline/tests/tokenizer/accent.rs +++ b/inline/tests/tokenizer/accent.rs @@ -1,51 +1,126 @@ -use unimarkup_inline::{Token, TokenKind, Position, Tokenizer}; +use unimarkup_inline::{Position, Token, TokenKind, Tokenizer}; use crate::EXPECTED_MSG; #[test] pub fn test_tokenize__verbatim() { - let input = "`verbatim`"; - let expected = [ - Token{ kind: TokenKind::VerbatimOpen, content: "`".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "verbatim".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::VerbatimClose, content: "`".to_string(), position: Position { line: 0, column: 9 } }, - ]; + let input = "`verbatim`"; + let expected = [ + Token { + kind: TokenKind::VerbatimOpen, + content: "`".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "verbatim".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::VerbatimClose, + content: "`".to_string(), + position: Position { line: 0, column: 9 }, + }, + ]; - let actual = input.tokenize().unwrap(); + let actual = input.tokenize().unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__verbatim_escaped_close() { - let input = "`verbatim\\`still verbatim`"; - let expected = [ - Token{ kind: TokenKind::VerbatimOpen, content: "`".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "verbatim".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::EscapedGrapheme, content: "`".to_string(), position: Position { line: 0, column: 9 } }, - Token{ kind: TokenKind::Plain, content: "still".to_string(), position: Position { line: 0, column: 11 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 16 } }, - Token{ kind: TokenKind::Plain, content: "verbatim".to_string(), position: Position { line: 0, column: 17 } }, - Token{ kind: TokenKind::VerbatimClose, content: "`".to_string(), position: Position { line: 0, column: 25 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "`verbatim\\`still verbatim`"; + let expected = [ + Token { + kind: TokenKind::VerbatimOpen, + content: "`".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "verbatim".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::EscapedGrapheme, + content: "`".to_string(), + position: Position { line: 0, column: 9 }, + }, + Token { + kind: TokenKind::Plain, + content: "still".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 16, + }, + }, + Token { + kind: TokenKind::Plain, + content: "verbatim".to_string(), + position: Position { + line: 0, + column: 17, + }, + }, + Token { + kind: TokenKind::VerbatimClose, + content: "`".to_string(), + position: Position { + line: 0, + column: 25, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__verbatim_with_escaped_char() { - let input = "`text\\&text`"; - let expected = [ - Token{ kind: TokenKind::VerbatimOpen, content: "`".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "text".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::EscapedGrapheme, content: "&".to_string(), position: Position { line: 0, column: 5 } }, - Token{ kind: TokenKind::Plain, content: "text".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::VerbatimClose, content: "`".to_string(), position: Position { line: 0, column: 11 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "`text\\&text`"; + let expected = [ + Token { + kind: TokenKind::VerbatimOpen, + content: "`".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "text".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::EscapedGrapheme, + content: "&".to_string(), + position: Position { line: 0, column: 5 }, + }, + Token { + kind: TokenKind::Plain, + content: "text".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::VerbatimClose, + content: "`".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/inline/tests/tokenizer/asterisk.rs b/inline/tests/tokenizer/asterisk.rs index abfe0c1a..9476b653 100644 --- a/inline/tests/tokenizer/asterisk.rs +++ b/inline/tests/tokenizer/asterisk.rs @@ -1,388 +1,1048 @@ -use unimarkup_inline::{Token, TokenKind, Position, Tokenizer}; +use unimarkup_inline::{Position, Token, TokenKind, Tokenizer}; use crate::EXPECTED_MSG; #[test] pub fn test_tokenize__plain_before_italic() { - let input = "plain text *italic*"; - let expected = [ - Token{ kind: TokenKind::Plain, content: "plain".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 5 } }, - Token{ kind: TokenKind::Plain, content: "text".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 10 } }, - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 11 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 12 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 18 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "plain text *italic*"; + let expected = [ + Token { + kind: TokenKind::Plain, + content: "plain".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 5 }, + }, + Token { + kind: TokenKind::Plain, + content: "text".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 10, + }, + }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { + line: 0, + column: 12, + }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { + line: 0, + column: 18, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__plain_after_bold() { - let input = "**bold** plain text"; - let expected = [ - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 2 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 8 } }, - Token{ kind: TokenKind::Plain, content: "plain".to_string(), position: Position { line: 0, column: 9 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 14 } }, - Token{ kind: TokenKind::Plain, content: "text".to_string(), position: Position { line: 0, column: 15 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "**bold** plain text"; + let expected = [ + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { line: 0, column: 2 }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 8 }, + }, + Token { + kind: TokenKind::Plain, + content: "plain".to_string(), + position: Position { line: 0, column: 9 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 14, + }, + }, + Token { + kind: TokenKind::Plain, + content: "text".to_string(), + position: Position { + line: 0, + column: 15, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__right_side_nested() { - let input = "**bold and *italic***"; - let expected = [ - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 2 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::Plain, content: "and".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 10 } }, - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 11 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 12 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 18 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 19 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "**bold and *italic***"; + let expected = [ + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { line: 0, column: 2 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::Plain, + content: "and".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 10, + }, + }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { + line: 0, + column: 12, + }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { + line: 0, + column: 18, + }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { + line: 0, + column: 19, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__bold_with_unopened_italic() { - let input = "**bold no-italic* bold**"; - let expected = [ - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 2 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::Plain, content: "no-italic*".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 17 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 18 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 22 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "**bold no-italic* bold**"; + let expected = [ + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { line: 0, column: 2 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::Plain, + content: "no-italic*".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 17, + }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { + line: 0, + column: 18, + }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { + line: 0, + column: 22, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__italic_with_unopened_bold() { - let input = "*italic no-bold** italic*"; - let expected = [ - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::Plain, content: "no-bold**".to_string(), position: Position { line: 0, column: 8 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 17 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 18 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 24 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "*italic no-bold** italic*"; + let expected = [ + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::Plain, + content: "no-bold**".to_string(), + position: Position { line: 0, column: 8 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 17, + }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { + line: 0, + column: 18, + }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { + line: 0, + column: 24, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__left_side_nested() { - let input = "***italic* and bold**"; - let expected = [ - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 2 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 3 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 9 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 10 } }, - Token{ kind: TokenKind::Plain, content: "and".to_string(), position: Position { line: 0, column: 11 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 14 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 15 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 19 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "***italic* and bold**"; + let expected = [ + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { line: 0, column: 2 }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { line: 0, column: 3 }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { line: 0, column: 9 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 10, + }, + }, + Token { + kind: TokenKind::Plain, + content: "and".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 14, + }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { + line: 0, + column: 15, + }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { + line: 0, + column: 19, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__left_side_nested_with_plain_ending() { - let input = "***italic* and bold** plain"; - let expected = [ - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 2 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 3 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 9 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 10 } }, - Token{ kind: TokenKind::Plain, content: "and".to_string(), position: Position { line: 0, column: 11 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 14 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 15 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 19 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 21 } }, - Token{ kind: TokenKind::Plain, content: "plain".to_string(), position: Position { line: 0, column: 22 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "***italic* and bold** plain"; + let expected = [ + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { line: 0, column: 2 }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { line: 0, column: 3 }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { line: 0, column: 9 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 10, + }, + }, + Token { + kind: TokenKind::Plain, + content: "and".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 14, + }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { + line: 0, + column: 15, + }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { + line: 0, + column: 19, + }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 21, + }, + }, + Token { + kind: TokenKind::Plain, + content: "plain".to_string(), + position: Position { + line: 0, + column: 22, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] - pub fn test_tokenize__bold_directly_after_italic() { +pub fn test_tokenize__bold_directly_after_italic() { let input = "*italic***bold**"; let expected = [ - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 8 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 10 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 14 } }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 8 }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { + line: 0, + column: 10, + }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { + line: 0, + column: 14, + }, + }, ]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } - - #[test] - pub fn test_tokenize__split_bold_italic_combined_close_due_to_space() { +} + +#[test] +pub fn test_tokenize__split_bold_italic_combined_close_due_to_space() { let input = "*before ***after*"; let expected = [ - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "before".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::Plain, content: "*".to_string(), position: Position { line: 0, column: 8 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 9 } }, - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 10 } }, - Token{ kind: TokenKind::Plain, content: "after".to_string(), position: Position { line: 0, column: 11 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 16 } }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "before".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::Plain, + content: "*".to_string(), + position: Position { line: 0, column: 8 }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { line: 0, column: 9 }, + }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { + line: 0, + column: 10, + }, + }, + Token { + kind: TokenKind::Plain, + content: "after".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { + line: 0, + column: 16, + }, + }, ]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } +} - #[test] - pub fn test_tokenize__asterisks_as_plain() { +#[test] +pub fn test_tokenize__asterisks_as_plain() { let input = "before****after"; - let expected = [ - Token{ kind: TokenKind::Plain, content: "before****after".to_string(), position: Position { line: 0, column: 0 } }, - ]; + let expected = [Token { + kind: TokenKind::Plain, + content: "before****after".to_string(), + position: Position { line: 0, column: 0 }, + }]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } +} - #[test] - pub fn test_tokenize__asterisks_as_plain_surrounded_by_space() { +#[test] +pub fn test_tokenize__asterisks_as_plain_surrounded_by_space() { let input = "before **** after"; let expected = [ - Token{ kind: TokenKind::Plain, content: "before".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::Plain, content: "****".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 11 } }, - Token{ kind: TokenKind::Plain, content: "after".to_string(), position: Position { line: 0, column: 12 } }, + Token { + kind: TokenKind::Plain, + content: "before".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::Plain, + content: "****".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + Token { + kind: TokenKind::Plain, + content: "after".to_string(), + position: Position { + line: 0, + column: 12, + }, + }, ]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } - - #[test] - pub fn test_tokenize__italic_directly_after_bold() { +} + +#[test] +pub fn test_tokenize__italic_directly_after_bold() { let input = "**bold***italic*"; let expected = [ - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 2 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 8 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 9 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 15 } }, + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { line: 0, column: 2 }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { line: 0, column: 8 }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { line: 0, column: 9 }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { + line: 0, + column: 15, + }, + }, ]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } - - #[test] - pub fn test_tokenize__italic_directly_after_combined_bold_italic() { +} + +#[test] +pub fn test_tokenize__italic_directly_after_combined_bold_italic() { let input = "***bold & italic****italic*"; let expected = [ - Token{ kind: TokenKind::BoldItalicOpen, content: "***".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 3 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::Plain, content: "&".to_string(), position: Position { line: 0, column: 8 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 9 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 10 } }, - Token{ kind: TokenKind::BoldItalicClose, content: "***".to_string(), position: Position { line: 0, column: 16 } }, - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 19 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 20 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 26 } }, + Token { + kind: TokenKind::BoldItalicOpen, + content: "***".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { line: 0, column: 3 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::Plain, + content: "&".to_string(), + position: Position { line: 0, column: 8 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 9 }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { + line: 0, + column: 10, + }, + }, + Token { + kind: TokenKind::BoldItalicClose, + content: "***".to_string(), + position: Position { + line: 0, + column: 16, + }, + }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { + line: 0, + column: 19, + }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { + line: 0, + column: 20, + }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { + line: 0, + column: 26, + }, + }, ]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } +} - #[test] - pub fn test_tokenize__italic_directly_after_plain_asterisks() { +#[test] +pub fn test_tokenize__italic_directly_after_plain_asterisks() { let input = "****italic*"; let expected = [ - Token{ kind: TokenKind::Plain, content: "***".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 3 } }, - Token{ kind: TokenKind::Plain, content: "italic".to_string(), position: Position { line: 0, column: 4 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 10 } }, + Token { + kind: TokenKind::Plain, + content: "***".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { line: 0, column: 3 }, + }, + Token { + kind: TokenKind::Plain, + content: "italic".to_string(), + position: Position { line: 0, column: 4 }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { + line: 0, + column: 10, + }, + }, ]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } +} - #[test] - pub fn test_tokenize__bold_directly_after_plain_asterisks() { +#[test] +pub fn test_tokenize__bold_directly_after_plain_asterisks() { let input = "*****bold**"; let expected = [ - Token{ kind: TokenKind::Plain, content: "***".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 3 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 5 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 9 } }, + Token { + kind: TokenKind::Plain, + content: "***".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 3 }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { line: 0, column: 5 }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { line: 0, column: 9 }, + }, ]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } +} - #[test] - pub fn test_tokenize__combined_directly_after_plain_asterisks() { +#[test] +pub fn test_tokenize__combined_directly_after_plain_asterisks() { let input = "******bold-italic***"; let expected = [ - Token{ kind: TokenKind::Plain, content: "***".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::BoldItalicOpen, content: "***".to_string(), position: Position { line: 0, column: 3 } }, - Token{ kind: TokenKind::Plain, content: "bold-italic".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::BoldItalicClose, content: "***".to_string(), position: Position { line: 0, column: 17 } }, + Token { + kind: TokenKind::Plain, + content: "***".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::BoldItalicOpen, + content: "***".to_string(), + position: Position { line: 0, column: 3 }, + }, + Token { + kind: TokenKind::Plain, + content: "bold-italic".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::BoldItalicClose, + content: "***".to_string(), + position: Position { + line: 0, + column: 17, + }, + }, ]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } +} - #[test] - pub fn test_tokenize__plain_asterisks() { +#[test] +pub fn test_tokenize__plain_asterisks() { let input = "*********"; - let expected = [ - Token{ kind: TokenKind::Plain, content: "*********".to_string(), position: Position { line: 0, column: 0 } }, - ]; + let expected = [Token { + kind: TokenKind::Plain, + content: "*********".to_string(), + position: Position { line: 0, column: 0 }, + }]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } +} - #[test] - pub fn test_tokenize__invalid_italic_open() { +#[test] +pub fn test_tokenize__invalid_italic_open() { let input = "* no italic*"; let expected = [ - Token{ kind: TokenKind::Plain, content: "*".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::Plain, content: "no".to_string(), position: Position { line: 0, column: 2 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 4 } }, - Token{ kind: TokenKind::Plain, content: "italic*".to_string(), position: Position { line: 0, column: 5 } }, + Token { + kind: TokenKind::Plain, + content: "*".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::Plain, + content: "no".to_string(), + position: Position { line: 0, column: 2 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 4 }, + }, + Token { + kind: TokenKind::Plain, + content: "italic*".to_string(), + position: Position { line: 0, column: 5 }, + }, ]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } +} - #[test] - pub fn test_tokenize__invalid_bold_open() { +#[test] +pub fn test_tokenize__invalid_bold_open() { let input = "plain** still plain**"; let expected = [ - Token{ kind: TokenKind::Plain, content: "plain**".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::Plain, content: "still".to_string(), position: Position { line: 0, column: 8 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 13 } }, - Token{ kind: TokenKind::Plain, content: "plain**".to_string(), position: Position { line: 0, column: 14 } }, + Token { + kind: TokenKind::Plain, + content: "plain**".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::Plain, + content: "still".to_string(), + position: Position { line: 0, column: 8 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 13, + }, + }, + Token { + kind: TokenKind::Plain, + content: "plain**".to_string(), + position: Position { + line: 0, + column: 14, + }, + }, ]; let actual = input.tokenize().unwrap(); assert_eq!(actual, expected, "{}", EXPECTED_MSG); - } +} - #[test] +#[test] pub fn test_tokenize__escape_open_italic() { - let input = "\\*not italic*"; - let expected = [ - Token{ kind: TokenKind::EscapedGrapheme, content: "*".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "not".to_string(), position: Position { line: 0, column: 2 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 5 } }, - Token{ kind: TokenKind::Plain, content: "italic*".to_string(), position: Position { line: 0, column: 6 } }, - ]; + let input = "\\*not italic*"; + let expected = [ + Token { + kind: TokenKind::EscapedGrapheme, + content: "*".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "not".to_string(), + position: Position { line: 0, column: 2 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 5 }, + }, + Token { + kind: TokenKind::Plain, + content: "italic*".to_string(), + position: Position { line: 0, column: 6 }, + }, + ]; - let actual = input.tokenize().unwrap(); + let actual = input.tokenize().unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__inner_italic_after_word() { - let input = "**b*it*b**"; - let expected = [ - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "b".to_string(), position: Position { line: 0, column: 2 } }, - Token{ kind: TokenKind::ItalicOpen, content: "*".to_string(), position: Position { line: 0, column: 3 } }, - Token{ kind: TokenKind::Plain, content: "it".to_string(), position: Position { line: 0, column: 4 } }, - Token{ kind: TokenKind::ItalicClose, content: "*".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::Plain, content: "b".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 8 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "**b*it*b**"; + let expected = [ + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "b".to_string(), + position: Position { line: 0, column: 2 }, + }, + Token { + kind: TokenKind::ItalicOpen, + content: "*".to_string(), + position: Position { line: 0, column: 3 }, + }, + Token { + kind: TokenKind::Plain, + content: "it".to_string(), + position: Position { line: 0, column: 4 }, + }, + Token { + kind: TokenKind::ItalicClose, + content: "*".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::Plain, + content: "b".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { line: 0, column: 8 }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__bold_after_word_with_inner_asterisk() { - let input = "p**b*b**"; - let expected = [ - Token{ kind: TokenKind::Plain, content: "p".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::Plain, content: "b*b".to_string(), position: Position { line: 0, column: 3 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 6 } }, - ]; + let input = "p**b*b**"; + let expected = [ + Token { + kind: TokenKind::Plain, + content: "p".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::Plain, + content: "b*b".to_string(), + position: Position { line: 0, column: 3 }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { line: 0, column: 6 }, + }, + ]; - let actual = input.tokenize().unwrap(); + let actual = input.tokenize().unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__open_bold_after_italic() { - let input = "*i**b"; - let expected = [ - Token{ kind: TokenKind::Plain, content: "*i**b".to_string(), position: Position { line: 0, column: 0 } }, - ]; + let input = "*i**b"; + let expected = [Token { + kind: TokenKind::Plain, + content: "*i**b".to_string(), + position: Position { line: 0, column: 0 }, + }]; - let actual = input.tokenize().unwrap(); + let actual = input.tokenize().unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/inline/tests/tokenizer/backslash.rs b/inline/tests/tokenizer/backslash.rs index b28b04f6..8b137891 100644 --- a/inline/tests/tokenizer/backslash.rs +++ b/inline/tests/tokenizer/backslash.rs @@ -1,3 +1 @@ - - diff --git a/inline/tests/tokenizer/mixed.rs b/inline/tests/tokenizer/mixed.rs index e69de29b..8b137891 100644 --- a/inline/tests/tokenizer/mixed.rs +++ b/inline/tests/tokenizer/mixed.rs @@ -0,0 +1 @@ + diff --git a/inline/tests/tokenizer/mixed_nested.rs b/inline/tests/tokenizer/mixed_nested.rs index e69de29b..8b137891 100644 --- a/inline/tests/tokenizer/mixed_nested.rs +++ b/inline/tests/tokenizer/mixed_nested.rs @@ -0,0 +1 @@ + diff --git a/inline/tests/tokenizer/text_group.rs b/inline/tests/tokenizer/text_group.rs index 302c0f5e..0bb91eaa 100644 --- a/inline/tests/tokenizer/text_group.rs +++ b/inline/tests/tokenizer/text_group.rs @@ -1,106 +1,290 @@ -use unimarkup_inline::{Token, TokenKind, Position, Tokenizer}; +use unimarkup_inline::{Position, Token, TokenKind, Tokenizer}; use crate::EXPECTED_MSG; #[test] pub fn test_tokenize__simple_text_group() { - let input = "[valid text group]"; - let expected = [ - Token{ kind: TokenKind::TextGroupOpen, content: "[".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "valid".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::Plain, content: "text".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 11 } }, - Token{ kind: TokenKind::Plain, content: "group".to_string(), position: Position { line: 0, column: 12 } }, - Token{ kind: TokenKind::TextGroupClose, content: "]".to_string(), position: Position { line: 0, column: 17 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "[valid text group]"; + let expected = [ + Token { + kind: TokenKind::TextGroupOpen, + content: "[".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "valid".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::Plain, + content: "text".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + Token { + kind: TokenKind::Plain, + content: "group".to_string(), + position: Position { + line: 0, + column: 12, + }, + }, + Token { + kind: TokenKind::TextGroupClose, + content: "]".to_string(), + position: Position { + line: 0, + column: 17, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__plain_before_text_group() { - let input = "plain[valid text group]"; - let expected = [ - Token{ kind: TokenKind::Plain, content: "plain".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::TextGroupOpen, content: "[".to_string(), position: Position { line: 0, column: 5 } }, - Token{ kind: TokenKind::Plain, content: "valid".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 11 } }, - Token{ kind: TokenKind::Plain, content: "text".to_string(), position: Position { line: 0, column: 12 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 16 } }, - Token{ kind: TokenKind::Plain, content: "group".to_string(), position: Position { line: 0, column: 17 } }, - Token{ kind: TokenKind::TextGroupClose, content: "]".to_string(), position: Position { line: 0, column: 22 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "plain[valid text group]"; + let expected = [ + Token { + kind: TokenKind::Plain, + content: "plain".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::TextGroupOpen, + content: "[".to_string(), + position: Position { line: 0, column: 5 }, + }, + Token { + kind: TokenKind::Plain, + content: "valid".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + Token { + kind: TokenKind::Plain, + content: "text".to_string(), + position: Position { + line: 0, + column: 12, + }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 16, + }, + }, + Token { + kind: TokenKind::Plain, + content: "group".to_string(), + position: Position { + line: 0, + column: 17, + }, + }, + Token { + kind: TokenKind::TextGroupClose, + content: "]".to_string(), + position: Position { + line: 0, + column: 22, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__plain_after_text_group() { - let input = "[valid text group]plain"; - let expected = [ - Token{ kind: TokenKind::TextGroupOpen, content: "[".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "valid".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 6 } }, - Token{ kind: TokenKind::Plain, content: "text".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::Space, content: " ".to_string(), position: Position { line: 0, column: 11 } }, - Token{ kind: TokenKind::Plain, content: "group".to_string(), position: Position { line: 0, column: 12 } }, - Token{ kind: TokenKind::TextGroupClose, content: "]".to_string(), position: Position { line: 0, column: 17 } }, - Token{ kind: TokenKind::Plain, content: "plain".to_string(), position: Position { line: 0, column: 18 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "[valid text group]plain"; + let expected = [ + Token { + kind: TokenKind::TextGroupOpen, + content: "[".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "valid".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { line: 0, column: 6 }, + }, + Token { + kind: TokenKind::Plain, + content: "text".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::Space, + content: " ".to_string(), + position: Position { + line: 0, + column: 11, + }, + }, + Token { + kind: TokenKind::Plain, + content: "group".to_string(), + position: Position { + line: 0, + column: 12, + }, + }, + Token { + kind: TokenKind::TextGroupClose, + content: "]".to_string(), + position: Position { + line: 0, + column: 17, + }, + }, + Token { + kind: TokenKind::Plain, + content: "plain".to_string(), + position: Position { + line: 0, + column: 18, + }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__formatting_inside_text_group() { - let input = "[**bold**]"; - let expected = [ - Token{ kind: TokenKind::TextGroupOpen, content: "[".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 3 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::TextGroupClose, content: "]".to_string(), position: Position { line: 0, column: 9 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "[**bold**]"; + let expected = [ + Token { + kind: TokenKind::TextGroupOpen, + content: "[".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { line: 0, column: 3 }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::TextGroupClose, + content: "]".to_string(), + position: Position { line: 0, column: 9 }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__invalid_formatting_over_text_group_borders() { - let input = "[**bold]**"; - let expected = [ - Token{ kind: TokenKind::TextGroupOpen, content: "[".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::Plain, content: "**bold".to_string(), position: Position { line: 0, column: 1 } }, - Token{ kind: TokenKind::TextGroupClose, content: "]".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::Plain, content: "**".to_string(), position: Position { line: 0, column: 8 } }, - ]; + let input = "[**bold]**"; + let expected = [ + Token { + kind: TokenKind::TextGroupOpen, + content: "[".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::Plain, + content: "**bold".to_string(), + position: Position { line: 0, column: 1 }, + }, + Token { + kind: TokenKind::TextGroupClose, + content: "]".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::Plain, + content: "**".to_string(), + position: Position { line: 0, column: 8 }, + }, + ]; - let actual = input.tokenize().unwrap(); + let actual = input.tokenize().unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } #[test] pub fn test_tokenize__formatting_outside_text_group() { - let input = "**[bold]**"; - let expected = [ - Token{ kind: TokenKind::BoldOpen, content: "**".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::TextGroupOpen, content: "[".to_string(), position: Position { line: 0, column: 2 } }, - Token{ kind: TokenKind::Plain, content: "bold".to_string(), position: Position { line: 0, column: 3 } }, - Token{ kind: TokenKind::TextGroupClose, content: "]".to_string(), position: Position { line: 0, column: 7 } }, - Token{ kind: TokenKind::BoldClose, content: "**".to_string(), position: Position { line: 0, column: 8 } }, - ]; - - let actual = input.tokenize().unwrap(); - - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + let input = "**[bold]**"; + let expected = [ + Token { + kind: TokenKind::BoldOpen, + content: "**".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::TextGroupOpen, + content: "[".to_string(), + position: Position { line: 0, column: 2 }, + }, + Token { + kind: TokenKind::Plain, + content: "bold".to_string(), + position: Position { line: 0, column: 3 }, + }, + Token { + kind: TokenKind::TextGroupClose, + content: "]".to_string(), + position: Position { line: 0, column: 7 }, + }, + Token { + kind: TokenKind::BoldClose, + content: "**".to_string(), + position: Position { line: 0, column: 8 }, + }, + ]; + + let actual = input.tokenize().unwrap(); + + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/inline/tests/tokenizer/whitespaces.rs b/inline/tests/tokenizer/whitespaces.rs index 0379584a..25bf5ba0 100644 --- a/inline/tests/tokenizer/whitespaces.rs +++ b/inline/tests/tokenizer/whitespaces.rs @@ -1,18 +1,29 @@ -use unimarkup_inline::{Token, TokenKind, Position, Tokenizer}; +use unimarkup_inline::{Position, Token, TokenKind, Tokenizer}; use crate::EXPECTED_MSG; - #[test] pub fn test_tokenize__two_plain_lines() { - let input = "line1\nline2"; - let expected = [ - Token{ kind: TokenKind::Plain, content: "line1".to_string(), position: Position { line: 0, column: 0 } }, - Token{ kind: TokenKind::NewLine, content: "\n".to_string(), position: Position { line: 0, column: 5 } }, - Token{ kind: TokenKind::Plain, content: "line2".to_string(), position: Position { line: 1, column: 0 } }, - ]; + let input = "line1\nline2"; + let expected = [ + Token { + kind: TokenKind::Plain, + content: "line1".to_string(), + position: Position { line: 0, column: 0 }, + }, + Token { + kind: TokenKind::NewLine, + content: "\n".to_string(), + position: Position { line: 0, column: 5 }, + }, + Token { + kind: TokenKind::Plain, + content: "line2".to_string(), + position: Position { line: 1, column: 0 }, + }, + ]; - let actual = input.tokenize().unwrap(); + let actual = input.tokenize().unwrap(); - assert_eq!(actual, expected, "{}", EXPECTED_MSG); + assert_eq!(actual, expected, "{}", EXPECTED_MSG); } diff --git a/system_tests/tests/cli.rs b/system_tests/tests/cli.rs index 2c7860ea..18800de4 100644 --- a/system_tests/tests/cli.rs +++ b/system_tests/tests/cli.rs @@ -183,11 +183,7 @@ fn test__config_parse__enable_elements_option_set() { let um_filename = "file.um"; let elements = vec![UnimarkupType::VerbatimBlock, UnimarkupType::DefinitionList]; - let options = format!( - "--enable-elements={},{}", - elements[0], - elements[1] - ); + let options = format!("--enable-elements={},{}", elements[0], elements[1]); let args = get_args(&options, um_filename); let cfg: Config = Config::parse_from(args); @@ -208,11 +204,7 @@ fn test__config_parse__disable_elements_option_set() { let um_filename = "file.um"; let elements = vec![UnimarkupType::VerbatimBlock, UnimarkupType::DefinitionList]; - let options = format!( - "--disable-elements={},{}", - elements[0], - elements[1] - ); + let options = format!("--disable-elements={},{}", elements[0], elements[1]); let args = get_args(&options, um_filename); let cfg: Config = Config::parse_from(args); diff --git a/system_tests/tests/logging/cli_logs.rs b/system_tests/tests/logging/cli_logs.rs index 8224f459..5cb657ba 100644 --- a/system_tests/tests/logging/cli_logs.rs +++ b/system_tests/tests/logging/cli_logs.rs @@ -16,24 +16,19 @@ fn test__main_log_trace__attributes_file() { .spawn() .expect("Failed to spawn 'cargo run'"); - let output = cli_proc.wait_with_output().expect("Failed to execute 'cargo run'"); + let output = cli_proc + .wait_with_output() + .expect("Failed to execute 'cargo run'"); let logs = String::from_utf8_lossy(&output.stderr); assert!(logs.contains("INFO : 536936448: Writing to file: \"")); - assert!(logs.contains(&format!( - "{}\"", - TEST_FILE.replace(".um", ".html") - ))); + assert!(logs.contains(&format!("{}\"", TEST_FILE.replace(".um", ".html")))); assert!(logs.contains("TRACE: 536936448: Occured in file")); assert!(logs.contains("INFO : 536936449: Finished compiling: \"")); - assert!(logs.contains(&format!( - "{}\"", - TEST_FILE - ))); + assert!(logs.contains(&format!("{}\"", TEST_FILE))); assert!(logs.contains("TRACE: 536936449: Occured in file")); } - // Note: Functions below needed to get the test running in 'run' and 'debug' mode fn get_path() -> PathBuf { @@ -45,7 +40,7 @@ fn get_path() -> PathBuf { Err(_) => { let path = PathBuf::from("tests/test_files/".to_owned() + TEST_FILE); path.canonicalize().unwrap() - }, + } } }