Skip to content

Commit

Permalink
feat: implement basics of MainParser for Unimarkup
Browse files Browse the repository at this point in the history
  • Loading branch information
nfejzic committed Apr 16, 2023
1 parent e39588a commit e69ab67
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 5 deletions.
69 changes: 68 additions & 1 deletion core/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ pub mod symbol;

use symbol::Symbol;

use crate::elements::Blocks;
use crate::elements::{
atomic::{Heading, Paragraph},
enclosed::Verbatim,
Blocks,
};

use self::symbol::IntoSymbols;

/// Parser as function that can parse Unimarkup content
pub type ParserFn = for<'i> fn(&'i [Symbol<'i>]) -> Option<(Blocks, &'i [Symbol<'i>])>;
Expand Down Expand Up @@ -59,3 +65,64 @@ where
}
}
}

/// Parser of unimarkup content.
#[derive(Clone)]
pub struct MainParser {
parsers: Vec<ParserFn>,
default: ParserFn,
}

impl Default for MainParser {
fn default() -> Self {
let default = Paragraph::generate_parser();

let mut parser = Self {
parsers: Vec::with_capacity(2),
default,
};

parser.register_parser(Heading::generate_parser());
parser.register_parser(Verbatim::generate_parser());

parser
}
}

impl MainParser {
fn register_parser(&mut self, parser: ParserFn) {
self.parsers.push(parser);
}

/// Parses Unimarkup content and produces Unimarkup blocks.
pub fn parse<'s>(&self, input: impl IntoSymbols<'s, &'s [Symbol<'s>]>) -> Blocks {
let mut input = input.into_symbols();
let mut blocks = Vec::default();

#[cfg(debug_assertions)]
let mut input_len = input.len();

while input.first().is_some() {
for parser in &self.parsers {
if let Some((mut inner_blocks, rest_of_input)) = parser(input) {
blocks.append(&mut inner_blocks);
input = rest_of_input;
break;
}
}

let (mut inner_blocks, rest_of_input) =
(self.default)(input).expect("Default parser could not parse content!");
blocks.append(&mut inner_blocks);
input = rest_of_input;

#[cfg(debug_assertions)]
{
assert_ne!(input.len(), input_len);
input_len = input.len();
}
}

blocks
}
}
20 changes: 16 additions & 4 deletions core/src/parser/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,23 +134,35 @@ impl From<&str> for SymbolKind {
}

/// Trait for conversion of input into Unimarkup symbols.
pub trait IntoSymbols<'s> {
pub trait IntoSymbols<'s, T> {
/// Converts input into Unimarkup symbols.
fn into_symbols(self) -> Vec<Symbol<'s>>;
fn into_symbols(self) -> T;
}

impl<'s> IntoSymbols<'s> for &'s str {
impl<'s> IntoSymbols<'s, Vec<Symbol<'s>>> for &'s str {
fn into_symbols(self) -> Vec<Symbol<'s>> {
word_split(self)
}
}

impl<'s> IntoSymbols<'s> for Vec<Symbol<'s>> {
impl<'s> IntoSymbols<'s, Vec<Symbol<'s>>> for Vec<Symbol<'s>> {
fn into_symbols(self) -> Vec<Symbol<'s>> {
self
}
}

impl<'s> IntoSymbols<'s, &'s [Symbol<'s>]> for &'s Vec<Symbol<'s>> {
fn into_symbols(self) -> &'s [Symbol<'s>] {
self
}
}

impl<'s> IntoSymbols<'s, &'s [Symbol<'s>]> for &'s [Symbol<'s>] {
fn into_symbols(self) -> &'s [Symbol<'s>] {
self
}
}

fn word_split(input: &str) -> Vec<Symbol> {
let segmenter =
WordSegmenter::try_new_unstable(&icu_testdata::unstable()).expect("Data exists");
Expand Down

0 comments on commit e69ab67

Please sign in to comment.