From 57fb41d41a15db6e39a0414cea249bc29dc3836c Mon Sep 17 00:00:00 2001
From: Manuel Hatzl <hatzlmanuel@outlook.com>
Date: Wed, 23 Mar 2022 12:46:00 +0100
Subject: [PATCH] doc(inline): add documentation for the inline crate

---
 inline/src/ast/collect.rs       | 14 +++++++-
 inline/src/ast/mod.rs           | 28 +++++++++++++---
 inline/src/ast/substitutions.rs |  6 +---
 inline/src/lib.rs               |  2 ++
 inline/src/tokenizer/mod.rs     | 48 +++++++++++++++++++++------
 inline/src/tokenizer/tokens.rs  | 58 +++++++++++++++++++++++++++++----
 6 files changed, 130 insertions(+), 26 deletions(-)
diff --git a/inline/src/ast/collect.rs b/inline/src/ast/collect.rs
index 5015e764..7be89f15 100644
--- a/inline/src/ast/collect.rs
+++ b/inline/src/ast/collect.rs
@@ -1,14 +1,22 @@
+//! This module provides functionality to create a Unimarkup inline AST out of a given list of tokens.
+
 use crate::tokenizer::{Position, TokenKind, Tokens, Newline};
 
 use super::{Span, NestedInline, InlineKind, FlatInline, substitutions::DirectSubstitution, Inline, FlattenInlineKind};
 
-
+/// Struct to store partial collected inline tokens.
+/// 
+/// Needed for nested tokens.
 pub(crate) struct InlineSection {
+  /// Partially collected inline tokens.
   pub(crate) content: Inline,
+  /// End position of the last inline token of the section.
   pub(crate) end: Position,
 }
 
+/// Trait to create an inline AST.
 pub(crate) trait InlineAst {
+  /// Function to create an inline AST from a given input.
   fn collect(self) -> Inline;
 }
 
@@ -19,6 +27,10 @@ impl InlineAst for Tokens {
   }
 }
 
+/// Function to collect inline elements up until a certain token is reached.
+/// 
+/// Note: The token of kind `token_kind` is the last token of the returned section, if it was found.
+/// Otherwise, the given list of tokens is fully emptied.
 pub(crate) fn collect_until(tokens: &mut Tokens, token_kind: TokenKind) -> InlineSection {
   let mut inline = Vec::new();
   let mut end: Position = Position::default();
diff --git a/inline/src/ast/mod.rs b/inline/src/ast/mod.rs
index 6745df30..aa6751a0 100644
--- a/inline/src/ast/mod.rs
+++ b/inline/src/ast/mod.rs
@@ -1,3 +1,5 @@
+//! This module provides types and functionality to create a Unimarkup inline AST out of a given list of tokens.
+
 use crate::tokenizer::{Position, TokenKind};
 
 pub(crate) mod collect;
@@ -6,45 +8,63 @@ mod substitutions;
 /// Represents an AST of Unimarkup inline elements
 pub type Inline = Vec<InlineKind>;
 
-
+/// Convenient function to convert a string into plain inline.
 pub fn flat_inline(s: &str) -> Inline {
   vec![InlineKind::Plain(FlatInline{ content: s.to_string(), span: Span::default() })]
 }
 
-
-
+/// Struct to set the span of an inline element in a given input.
+/// 
+/// Note: If the inline element only consists of one grapheme, start and end point to the same position.
 #[derive(Debug, Default, Clone, PartialEq, Copy)]
 pub struct Span {
+  /// The start position of an inline element.
   pub start: Position,
+  /// The end position of an inline element.
   pub end: Position,
 }
 
+/// Struct representing inline elements that allow nesting.
 #[derive(Debug, Default, Clone, PartialEq)]
 pub struct NestedInline {
   pub content: Vec<InlineKind>, 
   pub span: Span
 }
 
+/// Struct representing inline elements that do not allow nesting.
 #[derive(Debug, Default, Clone, PartialEq)]
 pub struct FlatInline {
   pub content: String,
   pub span: Span,
 }
 
+/// Enum representing all supported Unimarkup inline elements.
 #[derive(Debug, Clone, PartialEq)]
 pub enum InlineKind {
+  /// Representing the bold inline element.
   Bold(NestedInline),
+  /// Representing the italic inline element.
   Italic(NestedInline),
+  /// Representing the combined bold and italic inline element.
   BoldItalic(NestedInline),
+  /// Representing the verbatim inline element.
   Verbatim(FlatInline),
+  /// Representing plain text.
   Plain(FlatInline),
+  /// Representing excplicit newlines.
   EscapedNewLine(FlatInline),
+  /// Representing explicit spaces.
   EscapedSpace(FlatInline),
 }
 
+/// Trait to flatten inline elements.
+
 pub trait FlattenInlineKind {
+  /// This function converts an inline element back into its original plain representation.
+  /// 
+  /// e.g. `Bold(Plain(text))` --> `**text**`
   fn flatten(self) -> String;
-} 
+}
 
 impl FlattenInlineKind for Vec<InlineKind> {
   fn flatten(self) -> String {
diff --git a/inline/src/ast/substitutions.rs b/inline/src/ast/substitutions.rs
index 1d5c517e..d9d1784c 100644
--- a/inline/src/ast/substitutions.rs
+++ b/inline/src/ast/substitutions.rs
@@ -1,15 +1,11 @@
-
+//! Defines possible direct substitutions.
 
 /// Trait for direct substitution
 pub trait DirectSubstitution {
   /// Substitutes supported arrows or leaves given input unchanged, if no supported arrow matched.
-  /// 
-  /// - `possible_arrow` ... String that is tried to be substituted
   fn substitute_arrow(self) -> Self;
 
   /// Substitutes supported emojis or leaves given input unchanged, if no supported emoji matched.
-  /// 
-  /// - `possible_emoji` ... String that is tried to be substituted
   fn substitute_emoji(self) -> Self;
 }
 
diff --git a/inline/src/lib.rs b/inline/src/lib.rs
index fd8b5f9b..a4f07e93 100644
--- a/inline/src/lib.rs
+++ b/inline/src/lib.rs
@@ -1,3 +1,5 @@
+//! This library provides functionality to get a Unimarkup inline AST from a given string 
+
 use ast::collect::InlineAst;
 use error::InlineError;
 
diff --git a/inline/src/tokenizer/mod.rs b/inline/src/tokenizer/mod.rs
index 5e4d6362..070a7462 100644
--- a/inline/src/tokenizer/mod.rs
+++ b/inline/src/tokenizer/mod.rs
@@ -1,3 +1,8 @@
+//! This module provides functionality to tokenize a given &str input.
+//! The resulting list of tokens is a flat tokenized representation.
+//! 
+//! e.g. `*text*` --> `[ItalicOpen][Plain][ItalicClose]`
+
 use std::{collections::{HashMap, hash_map::Entry::Vacant}, cmp::min};
 
 use unicode_segmentation::{Graphemes, UnicodeSegmentation};
@@ -7,17 +12,26 @@ pub use tokens::*;
 
 use crate::error::InlineError;
 
-
+/// Struct to link to the grapheme position of a token in the given input.
 #[derive(Debug, Default, Clone, PartialEq, Copy)]
 pub struct Position {
+  /// Line number in the given input.
   pub line: usize,
+  /// Column in the given input.
   pub column: usize,
 }
 
-
+/// Trait to convert a given input into a list of tokens.
 pub trait Tokenizer {
+  /// Takes an input and converts it into a list of tokens.
+  /// 
+  /// Returns an error if inline constraints are violated.
   fn tokenize(self) -> Result<Tokens, InlineError>;
 
+  /// Takes an input and an offset to convert the input into a list of tokens,
+  /// where the first token starts at the given offset.
+  /// 
+  /// Returns an error if inline constraints are violated.
   fn tokenize_with_offset(self, offset: Position) -> Result<Tokens, InlineError>;
 }
 
@@ -38,14 +52,19 @@ impl Tokenizer for &str {
   }
 }
 
+/// Internal structure to keep track of the tokenization process.
 #[derive(Debug)]
 struct Tokenized<'a> {
+  /// Input converted to a grapheme iterator.
   graphemes: Graphemes<'a>,
+  /// List of tokens that were tokenized so far.
   tokens: Vec::<Token>,
+  /// Map of open tokens that were not yet closed
   open_tokens: HashMap::<TokenKind, usize>,
+  /// The position inside the input of the current token being tokenized.
   cur_pos: Position,
+  /// Flag indicating that a grapheme must be escaped.
   escape_active: bool,
-  open_verbatim: bool,
 }
 
 impl<'a> From<(&'a str, Position)> for Tokenized<'a> {
@@ -56,7 +75,6 @@ impl<'a> From<(&'a str, Position)> for Tokenized<'a> {
       open_tokens: Default::default(),
       cur_pos: offset,
       escape_active: false,
-      open_verbatim: false,
     }
   }
 }
@@ -97,6 +115,7 @@ fn tokenize_until(tokenized: &mut Tokenized, token_kind: TokenKind) -> Result<()
   Ok(())
 }
 
+/// Handles verbatim tokens.
 fn update_accent(tokenized: &mut Tokenized, grapheme: &str) {
   if let Some(last) = tokenized.tokens.last() {
     tokenized.cur_pos.column += last.length();
@@ -106,17 +125,15 @@ fn update_accent(tokenized: &mut Tokenized, grapheme: &str) {
     true => {
       let new_token = Token{ kind: TokenKind::VerbatimClose, content: grapheme.to_string(), position: tokenized.cur_pos };
       tokenized.tokens.push(new_token);
-      tokenized.open_verbatim = false;
     },
     false => {
       let new_token = Token{ kind: TokenKind::VerbatimOpen, content: grapheme.to_string(), position: tokenized.cur_pos };
       tokenized.tokens.push(new_token);
-      tokenized.open_verbatim = true;
     },
   }
 }
 
-
+/// Updates the list of tokens by handling the next grapheme of the input.
 fn update_tokens(tokenized: &mut Tokenized, grapheme: &str) -> Result<(), InlineError> {
   if tokenized.escape_active {
     update_escaped(tokenized, grapheme);
@@ -147,6 +164,10 @@ fn update_tokens(tokenized: &mut Tokenized, grapheme: &str) -> Result<(), Inline
   Ok(())
 }
 
+/// Handles text group tokenization by taking precedence over inline formattings.
+/// This is achieved by recursive tokenization expecting text group close token.
+/// 
+/// Note: The recursive approach enforces the closing constraint.
 fn open_text_group(tokenized: &mut Tokenized, grapheme: &str) -> Result<(), InlineError> {
   if let Some(last) = tokenized.tokens.last() {
     tokenized.cur_pos.column += last.length();
@@ -174,6 +195,7 @@ fn open_text_group(tokenized: &mut Tokenized, grapheme: &str) -> Result<(), Inli
   Ok(())
 }
 
+/// Function to close a text group if possible.
 fn try_closing_text_group(tokenized: &mut Tokenized, grapheme: &str) {
   if tokenized.open_tokens.remove(&TokenKind::TextGroupOpen).is_some() {
     if let Some(last) = tokenized.tokens.last() {
@@ -192,8 +214,7 @@ fn try_closing_text_group(tokenized: &mut Tokenized, grapheme: &str) {
   }
 }
 
-
-/// Function removes any dangling open token between open/close tokens of the last fix token, if it is a closing one
+/// Function removes any dangling open token between open/close tokens of the last fix token, if it is a closing one.
 fn try_closing_fixated_token(tokenized: &mut Tokenized) {
   if let Some(last) = tokenized.tokens.last() {
     let open_index;
@@ -257,7 +278,7 @@ fn try_closing_fixated_token(tokenized: &mut Tokenized) {
 
 /// Enteres the last fixed token into the open token hashmap, if it is an open token.
 /// 
-/// Note: Enforces open token contraints, changing a token to plain if a constraint is violated
+/// Note: Enforces open token contraints, changing a token to plain if a constraint is violated.
 fn update_open_map(tokenized: &mut Tokenized, next_token_is_space_or_newline: bool) {
   if let Some(mut prev) = tokenized.tokens.pop() {
     // Makes sure that no two open tokens of the same kind are before one closing one
@@ -295,6 +316,7 @@ fn update_open_map(tokenized: &mut Tokenized, next_token_is_space_or_newline: bo
   }
 }
 
+/// Handles plain text.
 fn update_plain(tokenized: &mut Tokenized, grapheme: &str) {
   if let Some(last) = tokenized.tokens.last_mut() {
     if last.kind == TokenKind::Plain {
@@ -310,6 +332,7 @@ fn update_plain(tokenized: &mut Tokenized, grapheme: &str) {
   }
 }
 
+/// Handles escaped graphemes.
 fn update_escaped(tokenized: &mut Tokenized, grapheme: &str) {
   if let Some(last) = tokenized.tokens.last() {
     tokenized.cur_pos.column += last.length();
@@ -318,6 +341,7 @@ fn update_escaped(tokenized: &mut Tokenized, grapheme: &str) {
   tokenized.cur_pos.column += 1; // add backslash length offset for next token start
 }
 
+/// Handles graphemes with Unicode whitespace property that are not a newline.
 fn update_space(tokenized: &mut Tokenized, grapheme: &str) {
   if let Some(last) = tokenized.tokens.last_mut() {
     if last.kind == TokenKind::Space {
@@ -333,6 +357,7 @@ fn update_space(tokenized: &mut Tokenized, grapheme: &str) {
   }
 }
 
+/// Handles newlines.
 fn update_newline(tokenized: &mut Tokenized, grapheme: &str) {
   if let Some(last) = tokenized.tokens.last() {
     tokenized.cur_pos.column += last.length();
@@ -344,6 +369,7 @@ fn update_newline(tokenized: &mut Tokenized, grapheme: &str) {
   tokenized.cur_pos.column = 0;
 }
 
+/// Handles bold, italic and any combination of them.
 fn update_asterisk(tokenized: &mut Tokenized, grapheme: &str) {
   match tokenized.tokens.pop() {
     Some(mut last) => {
@@ -520,6 +546,8 @@ fn update_asterisk(tokenized: &mut Tokenized, grapheme: &str) {
   }
 }
 
+/// Cleans up open tokens.
+/// 
 /// Remaining open tokens that have no matching close token get converted to plain.
 /// Neighboring plain tokens get merged with the open token. 
 fn cleanup_loose_open_tokens(tokenized: &mut Tokenized) {
diff --git a/inline/src/tokenizer/tokens.rs b/inline/src/tokenizer/tokens.rs
index d748cffe..affb43b9 100644
--- a/inline/src/tokenizer/tokens.rs
+++ b/inline/src/tokenizer/tokens.rs
@@ -1,17 +1,27 @@
+//! Defines all tokens used for tokenization.
+
 use unicode_segmentation::UnicodeSegmentation;
 
 use super::Position;
 
+/// Type representing a list of tokens
 pub type Tokens = Vec<Token>;
 
+/// Token structure representing all supported inline elements with their
+/// content and position inside a given input.
 #[derive(Debug, Default, Clone, PartialEq)]
 pub struct Token {
+  /// The token kind identifies the token parts of an Unimarkup inline element
   pub kind: TokenKind,
+  /// The content of the token
   pub content: String,
+  /// The starting position of this token inside a given input
   pub position: Position,
 }
 
 impl Token {
+  /// Returns the content length of a token.
+  /// The length is the number of Unicode graphemes inside the content.
   pub fn length(&self) -> usize {
     if self.kind == TokenKind::NewLine {
       return 0;
@@ -19,64 +29,97 @@ impl Token {
     self.content.graphemes(true).count()
   }
 
+  /// Shows if a token is of kind space or newline.
   pub fn is_space_or_newline(&self) -> bool {
     self.kind.is_space_or_newline()
   }
 
+  /// Shows if a token is closing a scope inside a given input.
+  /// Closing scopes may be closing text groups, closing attribute blocks, ...
   pub fn closes_scope(&self) -> bool {
     self.kind == TokenKind::TextGroupClose
   }
 }
 
+/// Enum defining all special single graphemes understood by Unimarkup. 
 #[derive(Debug, Clone, PartialEq)]
 pub enum SingleTokenKind {
+  /// Default kind for all non-special graphemes.
   Plain,
+  /// Represents a newline grapheme.
   Newline,
+  /// Represents a grapheme that has the Unicode whitespace property and is not a newline.
   Space,
+  /// Represents `\`.
   Backslash,
   // ExclamationMark,
   // Ampersand,
   // Colon,
   // Caret,
   // Underscore,
+  /// Represents `*`.
   Asterisk,
   // Plus,
+  /// Represents `` ` ``.
   Accent,
+  /// Represents `[`.
   LeftSquareBracket,
+  /// Represents `]`.
   RightSquareBracket,
 }
 
+/// Enum representing tokens that are part of Unimarkup inline elements.
 #[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
 pub enum TokenKind {
+  /// Represents the open part of bold inline formatting.
   BoldOpen,
+  /// Represents the closing part of bold inline formatting.
   BoldClose,
+  /// Represents the open part of italic inline formatting.
   ItalicOpen,
+  /// Represents the closing part of italic inline formatting.
   ItalicClose,
+  /// Represents the combined open part of bold and italic inline formatting.
   BoldItalicOpen,
+  /// Represents the combined closing part of bold and italic inline formatting.
   BoldItalicClose,
+  /// Represents the open part of verbatim inline formatting.
   VerbatimOpen,
+  /// Represents the closing part of verbatim inline formatting.
   VerbatimClose,
+  /// Represents a plain text part.
   Plain,
+  /// Represents the open part of an inline emoji shortcut.
   EmojiOpen,
   // EmojiClose,
+  /// Represents a grapheme that is escaped by a backslash.
   EscapedGrapheme,
+  /// Represents a newline as defined by `is_newline()`.
   NewLine,
+  /// Represents a grapheme that has the Unicode whitespace property and is not a newline.
   Space,
   // CommentOpen,
   // CommentClose,
   // DirectUnicode,
+  /// Represents the open part of an inline text group.
   TextGroupOpen,
+  /// Represents the closing part of an inline text group.
   TextGroupClose,
+  /// Represents the end of a given input.
   Eoi,
 }
 
 impl Default for TokenKind {
-    fn default() -> Self {
-      TokenKind::Plain
-    }
+  /// Returns `Plain` as default token.
+  fn default() -> Self {
+    TokenKind::Plain
+  }
 }
 
 impl TokenKind {
+  /// Returns the string representation for a token.
+  /// 
+  /// e.g. `**` for BoldOpen and BoldClose.
   pub fn as_str(&self) -> &'static str {
     match *self {
       TokenKind::BoldOpen => "**",
@@ -104,13 +147,17 @@ impl TokenKind {
     }
   }
 
+  /// Shows if a token is either a space or newline.
   pub fn is_space_or_newline(&self) -> bool {
     self == &TokenKind::Space || self == &TokenKind::NewLine
   }
 }
 
-
+/// Trait to convert a type into a single token.
 pub trait AsSingleTokenKind {
+  /// Converts given type into a SingleTokenKind.
+  /// 
+  /// e.g. `*` --> `SingleTokenKind::Asterisk`
   fn as_single_token_kind(&self) -> SingleTokenKind;
 }
 
@@ -135,12 +182,11 @@ impl AsSingleTokenKind for &str {
 }
 
 pub trait Newline {
+  /// Note: Only temporary solution until rust supports is_newline() per default.
   fn is_newline(&self) -> bool;
 }
 
 impl Newline for &str {
-  /// Note: Only temporary solution until rust supports is_newline() per default.
-  /// 
   /// Treats `\n`, `\r\n` and `\r` as one newline.
   fn is_newline(&self) -> bool {
     let s = *self;