Skip to content

Commit

Permalink
refactor: Move Token next to Expr (#4521)
Browse files Browse the repository at this point in the history
  • Loading branch information
max-sixty committed Jun 5, 2024
1 parent 6f711de commit 61dcf39
Show file tree
Hide file tree
Showing 18 changed files with 283 additions and 285 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ consolidate-commits = true

[workspace.dependencies]
anyhow = "1.0.86"
enum-as-inner = "0.6.0"
insta = {version = "1.39.0", features = ["colors", "glob", "yaml"]}
insta-cmd = "0.6.0"
itertools = "0.12.0"
Expand Down
2 changes: 1 addition & 1 deletion prqlc/prqlc-ast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ version.workspace = true
doctest = false

[dependencies]
enum-as-inner = "0.6.0"
enum-as-inner = {workspace = true}
semver = {version = "1.0.23", features = ["serde"]}
serde = {workspace = true}
serde_yaml = {workspace = true, optional = true}
Expand Down
10 changes: 5 additions & 5 deletions prqlc/prqlc-ast/src/expr.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
pub mod generic;
mod ident;
mod literal;
mod ops;

use std::collections::HashMap;
Expand All @@ -9,8 +8,9 @@ use enum_as_inner::EnumAsInner;
use serde::{Deserialize, Serialize};

pub use self::ident::Ident;
pub use self::literal::{Literal, ValueAndUnit};
pub use self::ops::{BinOp, UnOp};
pub use self::token::{Literal, ValueAndUnit};
use super::token;
use crate::span::Span;
use crate::Ty;

Expand Down Expand Up @@ -51,7 +51,7 @@ pub enum ExprKind {
feature = "serde_yaml",
serde(with = "serde_yaml::with::singleton_map")
)]
Literal(Literal),
Literal(token::Literal),
Pipeline(Pipeline),

Tuple(Vec<Expr>),
Expand Down Expand Up @@ -153,8 +153,8 @@ pub type Range = generic::Range<Box<Expr>>;
pub type InterpolateItem = generic::InterpolateItem<Expr>;
pub type SwitchCase = generic::SwitchCase<Box<Expr>>;

impl From<Literal> for ExprKind {
fn from(value: Literal) -> Self {
impl From<token::Literal> for ExprKind {
fn from(value: token::Literal) -> Self {
ExprKind::Literal(value)
}
}
Expand Down
81 changes: 0 additions & 81 deletions prqlc/prqlc-ast/src/expr/literal.rs

This file was deleted.

2 changes: 2 additions & 0 deletions prqlc/prqlc-ast/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
pub mod expr;
pub mod span;
pub mod stmt;
pub mod token;
mod types;

pub use expr::*;
pub use span::*;
pub use stmt::*;
pub use token::*;
pub use types::*;
232 changes: 232 additions & 0 deletions prqlc/prqlc-ast/src/token.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
use enum_as_inner::EnumAsInner;
use serde::{Deserialize, Serialize};

#[derive(Clone, PartialEq, Serialize, Deserialize, Eq)]
pub struct Token {
pub kind: TokenKind,
pub span: std::ops::Range<usize>,
}

#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
pub enum TokenKind {
NewLine,

Ident(String),
Keyword(String),
#[cfg_attr(
feature = "serde_yaml",
serde(with = "serde_yaml::with::singleton_map")
)]
Literal(Literal),
Param(String),

Range {
/// Whether the left side of the range is bound by the previous token
/// (but it's not contained in this token)
bind_left: bool,
bind_right: bool,
},
Interpolation(char, String),

/// single-char control tokens
Control(char),

ArrowThin, // ->
ArrowFat, // =>
Eq, // ==
Ne, // !=
Gte, // >=
Lte, // <=
RegexSearch, // ~=
And, // &&
Or, // ||
Coalesce, // ??
DivInt, // //
// Pow, // **
Annotate, // @

// Aesthetics only
Comment(String),
DocComment(String),
/// Vec containing comments between the newline and the line wrap
// Currently we include the comments with the LineWrap token. This isn't
// ideal, but I'm not sure of an easy way of having them be separate.
// - The line wrap span technically includes the comments — on a newline,
// we need to look ahead to _after_ the comments to see if there's a
// line wrap, and exclude the newline if there is.
// - We can only pass one token back
//
// Alternatives:
// - Post-process the stream, removing the newline prior to a line wrap.
// But requires a whole extra pass.
// - Change the functionality. But it's very nice to be able to comment
// something out and have line-wraps still work.
LineWrap(Vec<TokenKind>),
}

#[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize, strum::AsRefStr)]
pub enum Literal {
Null,
Integer(i64),
Float(f64),
Boolean(bool),
String(String),
Date(String),
Time(String),
Timestamp(String),
ValueAndUnit(ValueAndUnit),
}

impl TokenKind {
pub fn range(bind_left: bool, bind_right: bool) -> Self {
TokenKind::Range {
bind_left,
bind_right,
}
}
}
// Compound units, such as "2 days 3 hours" can be represented as `2days + 3hours`
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ValueAndUnit {
pub n: i64, // Do any DBs use floats or decimals for this?
pub unit: String, // Could be an enum IntervalType,
}

impl std::fmt::Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Literal::Null => write!(f, "null")?,
Literal::Integer(i) => write!(f, "{i}")?,
Literal::Float(i) => write!(f, "{i}")?,

Literal::String(s) => {
quote_string(s, f)?;
}

Literal::Boolean(b) => {
f.write_str(if *b { "true" } else { "false" })?;
}

Literal::Date(inner) | Literal::Time(inner) | Literal::Timestamp(inner) => {
write!(f, "@{inner}")?;
}

Literal::ValueAndUnit(i) => {
write!(f, "{}{}", i.n, i.unit)?;
}
}
Ok(())
}
}

fn quote_string(s: &str, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = escape_all_except_quotes(s);

if !s.contains('"') {
return write!(f, r#""{s}""#);
}

if !s.contains('\'') {
return write!(f, "'{s}'");
}

// when string contains both single and double quotes
// find minimum number of double quotes
let mut quotes = "\"\"".to_string();
while s.contains(&quotes) {
quotes += "\"";
}
write!(f, "{quotes}{s}{quotes}")
}

fn escape_all_except_quotes(s: &str) -> String {
let mut result = String::new();
for ch in s.chars() {
if ch == '"' || ch == '\'' {
result.push(ch);
} else {
result.extend(ch.escape_default());
}
}
result
}

// This is here because Literal::Float(f64) does not implement Hash, so we cannot simply derive it.
// There are reasons for that, but chumsky::Error needs Hash for the TokenKind, so it can deduplicate
// tokens in error.
// So this hack could lead to duplicated tokens in error messages. Oh no.
#[allow(clippy::derived_hash_with_manual_eq)]
impl std::hash::Hash for TokenKind {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
core::mem::discriminant(self).hash(state);
}
}

impl std::cmp::Eq for TokenKind {}

impl std::fmt::Display for TokenKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenKind::NewLine => write!(f, "new line"),
TokenKind::Ident(s) => {
if s.is_empty() {
// FYI this shows up in errors
write!(f, "an identifier")
} else {
write!(f, "{s}")
}
}
TokenKind::Keyword(s) => write!(f, "keyword {s}"),
TokenKind::Literal(lit) => write!(f, "{}", lit),
TokenKind::Control(c) => write!(f, "{c}"),

TokenKind::ArrowThin => f.write_str("->"),
TokenKind::ArrowFat => f.write_str("=>"),
TokenKind::Eq => f.write_str("=="),
TokenKind::Ne => f.write_str("!="),
TokenKind::Gte => f.write_str(">="),
TokenKind::Lte => f.write_str("<="),
TokenKind::RegexSearch => f.write_str("~="),
TokenKind::And => f.write_str("&&"),
TokenKind::Or => f.write_str("||"),
TokenKind::Coalesce => f.write_str("??"),
TokenKind::DivInt => f.write_str("//"),
// TokenKind::Pow => f.write_str("**"),
TokenKind::Annotate => f.write_str("@{"),

TokenKind::Param(id) => write!(f, "${id}"),

TokenKind::Range {
bind_left,
bind_right,
} => write!(
f,
"'{}..{}'",
if *bind_left { "" } else { " " },
if *bind_right { "" } else { " " }
),
TokenKind::Interpolation(c, s) => {
write!(f, "{c}\"{}\"", s)
}
TokenKind::Comment(s) => {
writeln!(f, "#{}", s)
}
TokenKind::DocComment(s) => {
writeln!(f, "#!{}", s)
}
TokenKind::LineWrap(comments) => {
write!(f, "\n\\ ")?;
for comment in comments {
write!(f, "{}", comment)?;
}
Ok(())
}
}
}
}

impl std::fmt::Debug for Token {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}..{}: {:?}", self.span.start, self.span.end, self.kind)
}
}
3 changes: 2 additions & 1 deletion prqlc/prqlc-parser/src/err/parse_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ use std::collections::HashSet;
use std::fmt::Display;
use std::hash::Hash;

use prqlc_ast::TokenKind;

use crate::ast::Span;
use crate::err::error::{Error, ErrorSource, Reason, WithErrorInfo};
use crate::lexer::TokenKind;

#[derive(Clone, Debug)]
pub struct ChumError<T: Hash + Eq> {
Expand Down
Loading

0 comments on commit 61dcf39

Please sign in to comment.