Skip to content

Commit

Permalink
doc(inline): add documentation for the inline crate
Browse files Browse the repository at this point in the history
  • Loading branch information
mhatzl committed Mar 23, 2022
1 parent ba88b48 commit 57fb41d
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 26 deletions.
14 changes: 13 additions & 1 deletion inline/src/ast/collect.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
//! This module provides functionality to create a Unimarkup inline AST out of a given list of tokens.

use crate::tokenizer::{Position, TokenKind, Tokens, Newline};

use super::{Span, NestedInline, InlineKind, FlatInline, substitutions::DirectSubstitution, Inline, FlattenInlineKind};


/// Struct to store partial collected inline tokens.
///
/// Needed for nested tokens.
pub(crate) struct InlineSection {
/// Partially collected inline tokens.
pub(crate) content: Inline,
/// End position of the last inline token of the section.
pub(crate) end: Position,
}

/// Trait to create an inline AST.
pub(crate) trait InlineAst {
/// Function to create an inline AST from a given input.
fn collect(self) -> Inline;
}

Expand All @@ -19,6 +27,10 @@ impl InlineAst for Tokens {
}
}

/// Function to collect inline elements up until a certain token is reached.
///
/// Note: The token of kind `token_kind` is the last token of the returned section, if it was found.
/// Otherwise, the given list of tokens is fully emptied.
pub(crate) fn collect_until(tokens: &mut Tokens, token_kind: TokenKind) -> InlineSection {
let mut inline = Vec::new();
let mut end: Position = Position::default();
Expand Down
28 changes: 24 additions & 4 deletions inline/src/ast/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! This module provides types and functionality to create a Unimarkup inline AST out of a given list of tokens.

use crate::tokenizer::{Position, TokenKind};

pub(crate) mod collect;
Expand All @@ -6,45 +8,63 @@ mod substitutions;
/// Represents an AST of Unimarkup inline elements
pub type Inline = Vec<InlineKind>;


/// Convenient function to convert a string into plain inline.
pub fn flat_inline(s: &str) -> Inline {
vec![InlineKind::Plain(FlatInline{ content: s.to_string(), span: Span::default() })]
}



/// Struct to set the span of an inline element in a given input.
///
/// Note: If the inline element only consists of one grapheme, start and end point to the same position.
#[derive(Debug, Default, Clone, PartialEq, Copy)]
pub struct Span {
/// The start position of an inline element.
pub start: Position,
/// The end position of an inline element.
pub end: Position,
}

/// Struct representing inline elements that allow nesting.
#[derive(Debug, Default, Clone, PartialEq)]
pub struct NestedInline {
pub content: Vec<InlineKind>,
pub span: Span
}

/// Struct representing inline elements that do not allow nesting.
#[derive(Debug, Default, Clone, PartialEq)]
pub struct FlatInline {
pub content: String,
pub span: Span,
}

/// Enum representing all supported Unimarkup inline elements.
#[derive(Debug, Clone, PartialEq)]
pub enum InlineKind {
/// Representing the bold inline element.
Bold(NestedInline),
/// Representing the italic inline element.
Italic(NestedInline),
/// Representing the combined bold and italic inline element.
BoldItalic(NestedInline),
/// Representing the verbatim inline element.
Verbatim(FlatInline),
/// Representing plain text.
Plain(FlatInline),
/// Representing excplicit newlines.
EscapedNewLine(FlatInline),
/// Representing explicit spaces.
EscapedSpace(FlatInline),
}

/// Trait to flatten inline elements.

pub trait FlattenInlineKind {
/// This function converts an inline element back into its original plain representation.
///
/// e.g. `Bold(Plain(text))` --> `**text**`
fn flatten(self) -> String;
}
}

impl FlattenInlineKind for Vec<InlineKind> {
fn flatten(self) -> String {
Expand Down
6 changes: 1 addition & 5 deletions inline/src/ast/substitutions.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@

//! Defines possible direct substitutions.

/// Trait for direct substitution
pub trait DirectSubstitution {
/// Substitutes supported arrows or leaves given input unchanged, if no supported arrow matched.
///
/// - `possible_arrow` ... String that is tried to be substituted
fn substitute_arrow(self) -> Self;

/// Substitutes supported emojis or leaves given input unchanged, if no supported emoji matched.
///
/// - `possible_emoji` ... String that is tried to be substituted
fn substitute_emoji(self) -> Self;
}

Expand Down
2 changes: 2 additions & 0 deletions inline/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! This library provides functionality to get a Unimarkup inline AST from a given string

use ast::collect::InlineAst;
use error::InlineError;

Expand Down
48 changes: 38 additions & 10 deletions inline/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
//! This module provides functionality to tokenize a given &str input.
//! The resulting list of tokens is a flat tokenized representation.
//!
//! e.g. `*text*` --> `[ItalicOpen][Plain][ItalicClose]`

use std::{collections::{HashMap, hash_map::Entry::Vacant}, cmp::min};

use unicode_segmentation::{Graphemes, UnicodeSegmentation};
Expand All @@ -7,17 +12,26 @@ pub use tokens::*;

use crate::error::InlineError;


/// Struct to link to the grapheme position of a token in the given input.
#[derive(Debug, Default, Clone, PartialEq, Copy)]
pub struct Position {
/// Line number in the given input.
pub line: usize,
/// Column in the given input.
pub column: usize,
}


/// Trait to convert a given input into a list of tokens.
pub trait Tokenizer {
/// Takes an input and converts it into a list of tokens.
///
/// Returns an error if inline constraints are violated.
fn tokenize(self) -> Result<Tokens, InlineError>;

/// Takes an input and an offset to convert the input into a list of tokens,
/// where the first token starts at the given offset.
///
/// Returns an error if inline constraints are violated.
fn tokenize_with_offset(self, offset: Position) -> Result<Tokens, InlineError>;
}

Expand All @@ -38,14 +52,19 @@ impl Tokenizer for &str {
}
}

/// Internal structure to keep track of the tokenization process.
#[derive(Debug)]
struct Tokenized<'a> {
/// Input converted to a grapheme iterator.
graphemes: Graphemes<'a>,
/// List of tokens that were tokenized so far.
tokens: Vec::<Token>,
/// Map of open tokens that were not yet closed
open_tokens: HashMap::<TokenKind, usize>,
/// The position inside the input of the current token being tokenized.
cur_pos: Position,
/// Flag indicating that a grapheme must be escaped.
escape_active: bool,
open_verbatim: bool,
}

impl<'a> From<(&'a str, Position)> for Tokenized<'a> {
Expand All @@ -56,7 +75,6 @@ impl<'a> From<(&'a str, Position)> for Tokenized<'a> {
open_tokens: Default::default(),
cur_pos: offset,
escape_active: false,
open_verbatim: false,
}
}
}
Expand Down Expand Up @@ -97,6 +115,7 @@ fn tokenize_until(tokenized: &mut Tokenized, token_kind: TokenKind) -> Result<()
Ok(())
}

/// Handles verbatim tokens.
fn update_accent(tokenized: &mut Tokenized, grapheme: &str) {
if let Some(last) = tokenized.tokens.last() {
tokenized.cur_pos.column += last.length();
Expand All @@ -106,17 +125,15 @@ fn update_accent(tokenized: &mut Tokenized, grapheme: &str) {
true => {
let new_token = Token{ kind: TokenKind::VerbatimClose, content: grapheme.to_string(), position: tokenized.cur_pos };
tokenized.tokens.push(new_token);
tokenized.open_verbatim = false;
},
false => {
let new_token = Token{ kind: TokenKind::VerbatimOpen, content: grapheme.to_string(), position: tokenized.cur_pos };
tokenized.tokens.push(new_token);
tokenized.open_verbatim = true;
},
}
}


/// Updates the list of tokens by handling the next grapheme of the input.
fn update_tokens(tokenized: &mut Tokenized, grapheme: &str) -> Result<(), InlineError> {
if tokenized.escape_active {
update_escaped(tokenized, grapheme);
Expand Down Expand Up @@ -147,6 +164,10 @@ fn update_tokens(tokenized: &mut Tokenized, grapheme: &str) -> Result<(), Inline
Ok(())
}

/// Handles text group tokenization by taking precedence over inline formattings.
/// This is achieved by recursive tokenization expecting text group close token.
///
/// Note: The recursive approach enforces the closing constraint.
fn open_text_group(tokenized: &mut Tokenized, grapheme: &str) -> Result<(), InlineError> {
if let Some(last) = tokenized.tokens.last() {
tokenized.cur_pos.column += last.length();
Expand Down Expand Up @@ -174,6 +195,7 @@ fn open_text_group(tokenized: &mut Tokenized, grapheme: &str) -> Result<(), Inli
Ok(())
}

/// Function to close a text group if possible.
fn try_closing_text_group(tokenized: &mut Tokenized, grapheme: &str) {
if tokenized.open_tokens.remove(&TokenKind::TextGroupOpen).is_some() {
if let Some(last) = tokenized.tokens.last() {
Expand All @@ -192,8 +214,7 @@ fn try_closing_text_group(tokenized: &mut Tokenized, grapheme: &str) {
}
}


/// Function removes any dangling open token between open/close tokens of the last fix token, if it is a closing one
/// Function removes any dangling open token between open/close tokens of the last fix token, if it is a closing one.
fn try_closing_fixated_token(tokenized: &mut Tokenized) {
if let Some(last) = tokenized.tokens.last() {
let open_index;
Expand Down Expand Up @@ -257,7 +278,7 @@ fn try_closing_fixated_token(tokenized: &mut Tokenized) {

/// Enteres the last fixed token into the open token hashmap, if it is an open token.
///
/// Note: Enforces open token contraints, changing a token to plain if a constraint is violated
/// Note: Enforces open token contraints, changing a token to plain if a constraint is violated.
fn update_open_map(tokenized: &mut Tokenized, next_token_is_space_or_newline: bool) {
if let Some(mut prev) = tokenized.tokens.pop() {
// Makes sure that no two open tokens of the same kind are before one closing one
Expand Down Expand Up @@ -295,6 +316,7 @@ fn update_open_map(tokenized: &mut Tokenized, next_token_is_space_or_newline: bo
}
}

/// Handles plain text.
fn update_plain(tokenized: &mut Tokenized, grapheme: &str) {
if let Some(last) = tokenized.tokens.last_mut() {
if last.kind == TokenKind::Plain {
Expand All @@ -310,6 +332,7 @@ fn update_plain(tokenized: &mut Tokenized, grapheme: &str) {
}
}

/// Handles escaped graphemes.
fn update_escaped(tokenized: &mut Tokenized, grapheme: &str) {
if let Some(last) = tokenized.tokens.last() {
tokenized.cur_pos.column += last.length();
Expand All @@ -318,6 +341,7 @@ fn update_escaped(tokenized: &mut Tokenized, grapheme: &str) {
tokenized.cur_pos.column += 1; // add backslash length offset for next token start
}

/// Handles graphemes with Unicode whitespace property that are not a newline.
fn update_space(tokenized: &mut Tokenized, grapheme: &str) {
if let Some(last) = tokenized.tokens.last_mut() {
if last.kind == TokenKind::Space {
Expand All @@ -333,6 +357,7 @@ fn update_space(tokenized: &mut Tokenized, grapheme: &str) {
}
}

/// Handles newlines.
fn update_newline(tokenized: &mut Tokenized, grapheme: &str) {
if let Some(last) = tokenized.tokens.last() {
tokenized.cur_pos.column += last.length();
Expand All @@ -344,6 +369,7 @@ fn update_newline(tokenized: &mut Tokenized, grapheme: &str) {
tokenized.cur_pos.column = 0;
}

/// Handles bold, italic and any combination of them.
fn update_asterisk(tokenized: &mut Tokenized, grapheme: &str) {
match tokenized.tokens.pop() {
Some(mut last) => {
Expand Down Expand Up @@ -520,6 +546,8 @@ fn update_asterisk(tokenized: &mut Tokenized, grapheme: &str) {
}
}

/// Cleans up open tokens.
///
/// Remaining open tokens that have no matching close token get converted to plain.
/// Neighboring plain tokens get merged with the open token.
fn cleanup_loose_open_tokens(tokenized: &mut Tokenized) {
Expand Down
Loading

0 comments on commit 57fb41d

Please sign in to comment.