diff --git a/core/odin/frontend/ast.odin b/core/odin/frontend/ast.odin new file mode 100644 index 00000000000..2fedbf5e1c4 --- /dev/null +++ b/core/odin/frontend/ast.odin @@ -0,0 +1,1050 @@ +package odin_frontend + +import "core:sync" +import "core:intrinsics" + +Proc_Tag :: enum { + Bounds_Check, + No_Bounds_Check, + Optional_Ok, + Optional_Allocator_Error, +} +Proc_Tags :: distinct bit_set[Proc_Tag; u32] + +Proc_Inlining :: enum u32 { + None = 0, + Inline = 1, + No_Inline = 2, +} + +Proc_Calling_Convention_Extra :: enum i32 { + Foreign_Block_Default, +} +Proc_Calling_Convention :: union { + string, + Proc_Calling_Convention_Extra, +} + +Node_State_Flag :: enum { + Bounds_Check, + No_Bounds_Check, +} +Node_State_Flags :: distinct bit_set[Node_State_Flag] + +Node :: struct { + pos: Pos, + end: Pos, + state_flags: Node_State_Flags, + derived: Any_Node, +} + +Comment_Group :: struct { + using node: Node, + list: []Token, +} + + + + +/*c++ +struct AstFile { + i32 id; + u32 flags; + AstPackage * pkg; + Scope * scope; + + Ast * pkg_decl; + + String fullpath; + String filename; + String directory; + + Tokenizer tokenizer; + Array tokens; + isize curr_token_index; + isize prev_token_index; + Token curr_token; + Token prev_token; // previous non-comment + Token package_token; + String package_name; + + u64 vet_flags; + bool vet_flags_set; + + // >= 0: In Expression + // < 0: In Control Clause + // NOTE(bill): Used to prevent type literals in control clauses + isize expr_level; + bool allow_newline; // Only valid for expr_level == 0 + bool allow_range; // NOTE(bill): Ranges are only allowed in certain cases + bool allow_in_expr; // NOTE(bill): in expression are only allowed in certain cases + bool in_foreign_block; + bool allow_type; + bool in_when_statement; + + isize total_file_decl_count; + isize delayed_decl_count; + Slice decls; + Array imports; // 'import' + isize directive_count; + + Ast * curr_proc; + isize error_count; + ParseFileError last_error; + f64 time_to_tokenize; // seconds + f64 time_to_parse; // seconds + + CommentGroup *lead_comment; // Comment (block) before the decl + CommentGroup *line_comment; // Comment after the semicolon + CommentGroup *docs; // current docs + Array comments; // All the comments! + + // This is effectively a queue but does not require any multi-threading capabilities + Array delayed_decls_queues[AstDelayQueue_COUNT]; + +#define PARSER_MAX_FIX_COUNT 6 + isize fix_count; + TokenPos fix_prev_pos; + + struct LLVMOpaqueMetadata *llvm_metadata; + struct LLVMOpaqueMetadata *llvm_metadata_scope; +}; +*/ + + + + + + +// Base Types + +Expr :: struct { + using expr_base: Node, + derived_expr: Any_Expr, +} +Stmt :: struct { + using stmt_base: Node, + derived_stmt: Any_Stmt, +} +Decl :: struct { + using decl_base: Stmt, +} + +// Expressions + +Bad_Expr :: struct { + using node: Expr, +} + +Ident :: struct { + using node: Expr, + name: string, +} + +Implicit :: struct { + using node: Expr, + tok: Token, +} + + +Undef :: struct { + using node: Expr, + tok: Token_Kind, +} + +Basic_Lit :: struct { + using node: Expr, + tok: Token, +} + +Basic_Directive :: struct { + using node: Expr, + tok: Token, + name: string, +} + +Ellipsis :: struct { + using node: Expr, + tok: Token_Kind, + expr: ^Expr, +} + +Proc_Lit :: struct { + using node: Expr, + type: ^Proc_Type, + body: ^Stmt, + tags: Proc_Tags, + inlining: Proc_Inlining, + where_token: Token, + where_clauses: []^Expr, +} + +Comp_Lit :: struct { + using node: Expr, + type: ^Expr, + open: Pos, + elems: []^Expr, + close: Pos, + tag: ^Expr, +} + + +Tag_Expr :: struct { + using node: Expr, + op: Token, + name: string, + expr: ^Expr, +} + +Unary_Expr :: struct { + using node: Expr, + op: Token, + expr: ^Expr, +} + +Binary_Expr :: struct { + using node: Expr, + left: ^Expr, + op: Token, + right: ^Expr, +} + +Paren_Expr :: struct { + using node: Expr, + open: Pos, + expr: ^Expr, + close: Pos, +} + +Selector_Expr :: struct { + using node: Expr, + expr: ^Expr, + op: Token, + field: ^Ident, +} + +Implicit_Selector_Expr :: struct { + using node: Expr, + field: ^Ident, +} + +Selector_Call_Expr :: struct { + using node: Expr, + expr: ^Expr, + call: ^Call_Expr, + modified_call: bool, +} + +Index_Expr :: struct { + using node: Expr, + expr: ^Expr, + open: Pos, + index: ^Expr, + close: Pos, +} + +Deref_Expr :: struct { + using node: Expr, + expr: ^Expr, + op: Token, +} + +Slice_Expr :: struct { + using node: Expr, + expr: ^Expr, + open: Pos, + low: ^Expr, + interval: Token, + high: ^Expr, + close: Pos, +} + +Matrix_Index_Expr :: struct { + using node: Expr, + expr: ^Expr, + open: Pos, + row_index: ^Expr, + column_index: ^Expr, + close: Pos, +} + +Call_Expr :: struct { + using node: Expr, + inlining: Proc_Inlining, + expr: ^Expr, + open: Pos, + args: []^Expr, + ellipsis: Token, + close: Pos, +} + +Field_Value :: struct { + using node: Expr, + field: ^Expr, + sep: Pos, + value: ^Expr, +} + +Ternary_If_Expr :: struct { + using node: Expr, + x: ^Expr, + op1: Token, + cond: ^Expr, + op2: Token, + y: ^Expr, +} + +Ternary_When_Expr :: struct { + using node: Expr, + x: ^Expr, + op1: Token, + cond: ^Expr, + op2: Token, + y: ^Expr, +} + +Or_Else_Expr :: struct { + using node: Expr, + x: ^Expr, + token: Token, + y: ^Expr, +} + +Or_Return_Expr :: struct { + using node: Expr, + expr: ^Expr, + token: Token, +} + +Or_Branch_Expr :: struct { + using node: Expr, + expr: ^Expr, + token: Token, + label: ^Expr, +} + +Type_Assertion :: struct { + using node: Expr, + expr: ^Expr, + dot: Pos, + open: Pos, + type: ^Expr, + close: Pos, +} + +Type_Cast :: struct { + using node: Expr, + tok: Token, + open: Pos, + type: ^Expr, + close: Pos, + expr: ^Expr, +} + +Auto_Cast :: struct { + using node: Expr, + op: Token, + expr: ^Expr, +} + +Inline_Asm_Dialect :: enum u8 { + Default = 0, + ATT = 1, + Intel = 2, +} + + +Inline_Asm_Expr :: struct { + using node: Expr, + tok: Token, + param_types: []^Expr, + return_type: ^Expr, + has_side_effects: bool, + is_align_stack: bool, + dialect: Inline_Asm_Dialect, + open: Pos, + constraints_string: ^Expr, + asm_string: ^Expr, + close: Pos, +} + + + + +// Statements + +Bad_Stmt :: struct { + using node: Stmt, +} + +Empty_Stmt :: struct { + using node: Stmt, + semicolon: Pos, // Position of the following ';' +} + +Expr_Stmt :: struct { + using node: Stmt, + expr: ^Expr, +} + +Tag_Stmt :: struct { + using node: Stmt, + op: Token, + name: string, + stmt: ^Stmt, +} + +Assign_Stmt :: struct { + using node: Stmt, + lhs: []^Expr, + op: Token, + rhs: []^Expr, +} + + +Block_Stmt :: struct { + using node: Stmt, + label: ^Expr, + open: Pos, + stmts: []^Stmt, + close: Pos, + uses_do: bool, +} + +If_Stmt :: struct { + using node: Stmt, + label: ^Expr, + if_pos: Pos, + init: ^Stmt, + cond: ^Expr, + body: ^Stmt, + else_pos: Pos, + else_stmt: ^Stmt, +} + +When_Stmt :: struct { + using node: Stmt, + when_pos: Pos, + cond: ^Expr, + body: ^Stmt, + else_stmt: ^Stmt, +} + +Return_Stmt :: struct { + using node: Stmt, + results: []^Expr, +} + +Defer_Stmt :: struct { + using node: Stmt, + stmt: ^Stmt, +} + +For_Stmt :: struct { + using node: Stmt, + label: ^Expr, + for_pos: Pos, + init: ^Stmt, + cond: ^Expr, + post: ^Stmt, + body: ^Stmt, +} + +Range_Stmt :: struct { + using node: Stmt, + label: ^Expr, + for_pos: Pos, + vals: []^Expr, + in_pos: Pos, + expr: ^Expr, + body: ^Stmt, + reverse: bool, +} + +Inline_Range_Stmt :: struct { + using node: Stmt, + label: ^Expr, + inline_pos: Pos, + for_pos: Pos, + val0: ^Expr, + val1: ^Expr, + in_pos: Pos, + expr: ^Expr, + body: ^Stmt, +} + +Case_Clause :: struct { + using node: Stmt, + case_pos: Pos, + list: []^Expr, + terminator: Token, + body: []^Stmt, +} + +Switch_Stmt :: struct { + using node: Stmt, + label: ^Expr, + switch_pos: Pos, + init: ^Stmt, + cond: ^Expr, + body: ^Stmt, + partial: bool, +} + +Type_Switch_Stmt :: struct { + using node: Stmt, + label: ^Expr, + switch_pos: Pos, + tag: ^Stmt, + expr: ^Expr, + body: ^Stmt, + partial: bool, +} + +Branch_Stmt :: struct { + using node: Stmt, + tok: Token, + label: ^Ident, +} + +Using_Stmt :: struct { + using node: Stmt, + list: []^Expr, +} + + +// Declarations + +Bad_Decl :: struct { + using node: Decl, +} + +Value_Decl :: struct { + using node: Decl, + docs: ^Comment_Group, + attributes: [dynamic]^Attribute, // dynamic as parsing will add to them lazily + names: []^Expr, + type: ^Expr, + values: []^Expr, + comment: ^Comment_Group, + is_using: bool, + is_mutable: bool, +} + +Package_Decl :: struct { + using node: Decl, + docs: ^Comment_Group, + token: Token, + name: string, + comment: ^Comment_Group, +} + +Import_Decl :: struct { + using node: Decl, + docs: ^Comment_Group, + is_using: bool, + import_tok: Token, + name: Token, + relpath: Token, + fullpath: string, + comment: ^Comment_Group, +} + +Foreign_Block_Decl :: struct { + using node: Decl, + docs: ^Comment_Group, + attributes: [dynamic]^Attribute, // dynamic as parsing will add to them lazily + tok: Token, + foreign_library: ^Expr, + body: ^Stmt, +} + +Foreign_Import_Decl :: struct { + using node: Decl, + docs: ^Comment_Group, + attributes: [dynamic]^Attribute, // dynamic as parsing will add to them lazily + foreign_tok: Token, + import_tok: Token, + name: ^Ident, + collection_name: string, + fullpaths: []string, + comment: ^Comment_Group, +} + + + +// Other things +unparen_expr :: proc(expr: ^Expr) -> (val: ^Expr) { + val = expr + if expr == nil { + return + } + for { + e := val.derived.(^Paren_Expr) or_break + if e.expr == nil { + break + } + val = e.expr + } + return +} + +strip_or_return_expr :: proc(expr: ^Expr) -> (val: ^Expr) { + val = expr + if expr == nil { + return + } + for { + inner: ^Expr + #partial switch e in val.derived { + case ^Or_Return_Expr: + inner = e.expr + case ^Or_Branch_Expr: + inner = e.expr + case ^Paren_Expr: + inner = e.expr + } + if inner == nil { + break + } + val = inner + } + return +} + +Field_Flags :: distinct bit_set[Field_Flag] + +Field_Flag :: enum { + Invalid, + Unknown, + + Ellipsis, + Using, + No_Alias, + C_Vararg, + Const, + Any_Int, + Subtype, + By_Ptr, + + Results, + Tags, + Default_Parameters, + Typeid_Token, +} + +field_flag_strings := [Field_Flag]string{ + .Invalid = "", + .Unknown = "", + + .Ellipsis = "..", + .Using = "using", + .No_Alias = "#no_alias", + .C_Vararg = "#c_vararg", + .Const = "#const", + .Any_Int = "#any_int", + .Subtype = "#subtype", + .By_Ptr = "#by_ptr", + + .Results = "results", + .Tags = "field tag", + .Default_Parameters = "default parameters", + .Typeid_Token = "typeid", +} + +field_hash_flag_strings := []struct{key: string, flag: Field_Flag}{ + {"no_alias", .No_Alias}, + {"c_vararg", .C_Vararg}, + {"const", .Const}, + {"any_int", .Any_Int}, + {"subtype", .Subtype}, + {"by_ptr", .By_Ptr}, +} + + +Field_Flags_Struct :: Field_Flags{ + .Using, + .Tags, + .Subtype, +} +Field_Flags_Record_Poly_Params :: Field_Flags{ + .Typeid_Token, + .Default_Parameters, +} +Field_Flags_Signature :: Field_Flags{ + .Ellipsis, + .Using, + .No_Alias, + .C_Vararg, + .Const, + .Any_Int, + .By_Ptr, + .Default_Parameters, +} + +Field_Flags_Signature_Params :: Field_Flags_Signature | {Field_Flag.Typeid_Token} +Field_Flags_Signature_Results :: Field_Flags_Signature + + +Proc_Group :: struct { + using node: Expr, + tok: Token, + open: Pos, + args: []^Expr, + close: Pos, +} + +Attribute :: struct { + using node: Node, + tok: Token_Kind, + open: Pos, + elems: []^Expr, + close: Pos, +} + +Field :: struct { + using node: Node, + docs: ^Comment_Group, + names: []^Expr, // Could be polymorphic + type: ^Expr, + default_value: ^Expr, + tag: Token, + flags: Field_Flags, + comment: ^Comment_Group, +} + +Field_List :: struct { + using node: Node, + open: Pos, + list: []^Field, + close: Pos, +} + + +// Types +Typeid_Type :: struct { + using node: Expr, + tok: Token_Kind, + specialization: ^Expr, +} + +Helper_Type :: struct { + using node: Expr, + tok: Token_Kind, + type: ^Expr, +} + +Distinct_Type :: struct { + using node: Expr, + tok: Token_Kind, + type: ^Expr, +} + +Poly_Type :: struct { + using node: Expr, + dollar: Pos, + type: ^Ident, + specialization: ^Expr, +} + +Proc_Type :: struct { + using node: Expr, + tok: Token, + calling_convention: Proc_Calling_Convention, + params: ^Field_List, + arrow: Pos, + results: ^Field_List, + tags: Proc_Tags, + generic: bool, + diverging: bool, +} + +Pointer_Type :: struct { + using node: Expr, + tag: ^Expr, + pointer: Pos, + elem: ^Expr, +} + +Multi_Pointer_Type :: struct { + using node: Expr, + open: Pos, + pointer: Pos, + close: Pos, + elem: ^Expr, +} + +Array_Type :: struct { + using node: Expr, + open: Pos, + tag: ^Expr, + len: ^Expr, // Ellipsis node for [?]T arrray types, nil for slice types + close: Pos, + elem: ^Expr, +} + +Dynamic_Array_Type :: struct { + using node: Expr, + tag: ^Expr, + open: Pos, + dynamic_pos: Pos, + close: Pos, + elem: ^Expr, +} + +Struct_Type :: struct { + using node: Expr, + tok_pos: Pos, + poly_params: ^Field_List, + align: ^Expr, + where_token: Token, + where_clauses: []^Expr, + is_packed: bool, + is_raw_union: bool, + is_no_copy: bool, + fields: ^Field_List, + name_count: int, +} + +Union_Type_Kind :: enum u8 { + Normal, + maybe, + no_nil, + shared_nil, +} + +Union_Type :: struct { + using node: Expr, + tok_pos: Pos, + poly_params: ^Field_List, + align: ^Expr, + kind: Union_Type_Kind, + where_token: Token, + where_clauses: []^Expr, + variants: []^Expr, +} + +Enum_Type :: struct { + using node: Expr, + tok_pos: Pos, + base_type: ^Expr, + open: Pos, + fields: []^Expr, + close: Pos, + + is_using: bool, +} + +Bit_Set_Type :: struct { + using node: Expr, + tok_pos: Pos, + open: Pos, + elem: ^Expr, + underlying: ^Expr, + close: Pos, +} + +Map_Type :: struct { + using node: Expr, + tok_pos: Pos, + key: ^Expr, + value: ^Expr, +} + + +Relative_Type :: struct { + using node: Expr, + tag: ^Expr, + type: ^Expr, +} + +Matrix_Type :: struct { + using node: Expr, + tok_pos: Pos, + row_count: ^Expr, + column_count: ^Expr, + elem: ^Expr, +} + + +Any_Node :: union { + ^Comment_Group, + ^Bad_Expr, + ^Ident, + ^Implicit, + ^Undef, + ^Basic_Lit, + ^Basic_Directive, + ^Ellipsis, + ^Proc_Lit, + ^Comp_Lit, + ^Tag_Expr, + ^Unary_Expr, + ^Binary_Expr, + ^Paren_Expr, + ^Selector_Expr, + ^Implicit_Selector_Expr, + ^Selector_Call_Expr, + ^Index_Expr, + ^Deref_Expr, + ^Slice_Expr, + ^Matrix_Index_Expr, + ^Call_Expr, + ^Field_Value, + ^Ternary_If_Expr, + ^Ternary_When_Expr, + ^Or_Else_Expr, + ^Or_Return_Expr, + ^Or_Branch_Expr, + ^Type_Assertion, + ^Type_Cast, + ^Auto_Cast, + ^Inline_Asm_Expr, + + ^Proc_Group, + + ^Typeid_Type, + ^Helper_Type, + ^Distinct_Type, + ^Poly_Type, + ^Proc_Type, + ^Pointer_Type, + ^Multi_Pointer_Type, + ^Array_Type, + ^Dynamic_Array_Type, + ^Struct_Type, + ^Union_Type, + ^Enum_Type, + ^Bit_Set_Type, + ^Map_Type, + ^Relative_Type, + ^Matrix_Type, + + ^Bad_Stmt, + ^Empty_Stmt, + ^Expr_Stmt, + ^Tag_Stmt, + ^Assign_Stmt, + ^Block_Stmt, + ^If_Stmt, + ^When_Stmt, + ^Return_Stmt, + ^Defer_Stmt, + ^For_Stmt, + ^Range_Stmt, + ^Inline_Range_Stmt, + ^Case_Clause, + ^Switch_Stmt, + ^Type_Switch_Stmt, + ^Branch_Stmt, + ^Using_Stmt, + + ^Bad_Decl, + ^Value_Decl, + ^Package_Decl, + ^Import_Decl, + ^Foreign_Block_Decl, + ^Foreign_Import_Decl, + + ^Attribute, + ^Field, + ^Field_List, +} + + +Any_Expr :: union { + ^Bad_Expr, + ^Ident, + ^Implicit, + ^Undef, + ^Basic_Lit, + ^Basic_Directive, + ^Ellipsis, + ^Proc_Lit, + ^Comp_Lit, + ^Tag_Expr, + ^Unary_Expr, + ^Binary_Expr, + ^Paren_Expr, + ^Selector_Expr, + ^Implicit_Selector_Expr, + ^Selector_Call_Expr, + ^Index_Expr, + ^Deref_Expr, + ^Slice_Expr, + ^Matrix_Index_Expr, + ^Call_Expr, + ^Field_Value, + ^Ternary_If_Expr, + ^Ternary_When_Expr, + ^Or_Else_Expr, + ^Or_Return_Expr, + ^Or_Branch_Expr, + ^Type_Assertion, + ^Type_Cast, + ^Auto_Cast, + ^Inline_Asm_Expr, + + ^Proc_Group, + + ^Typeid_Type, + ^Helper_Type, + ^Distinct_Type, + ^Poly_Type, + ^Proc_Type, + ^Pointer_Type, + ^Multi_Pointer_Type, + ^Array_Type, + ^Dynamic_Array_Type, + ^Struct_Type, + ^Union_Type, + ^Enum_Type, + ^Bit_Set_Type, + ^Map_Type, + ^Relative_Type, + ^Matrix_Type, +} + +Any_Stmt :: union { + ^Bad_Stmt, + ^Empty_Stmt, + ^Expr_Stmt, + ^Tag_Stmt, + ^Assign_Stmt, + ^Block_Stmt, + ^If_Stmt, + ^When_Stmt, + ^Return_Stmt, + ^Defer_Stmt, + ^For_Stmt, + ^Range_Stmt, + ^Inline_Range_Stmt, + ^Case_Clause, + ^Switch_Stmt, + ^Type_Switch_Stmt, + ^Branch_Stmt, + ^Using_Stmt, + + ^Bad_Decl, + ^Value_Decl, + ^Package_Decl, + ^Import_Decl, + ^Foreign_Block_Decl, + ^Foreign_Import_Decl, +} + +new_node :: proc($T: typeid, pos, end: Pos, allocator := context.allocator) -> ^T { + n, _ := new(T, allocator) + n.pos = pos + n.end = end + n.derived = n + base: ^Node = n // Dummy check + _ = base // make -vet happy + when intrinsics.type_has_field(T, "derived_expr") { + n.derived_expr = n + } + when intrinsics.type_has_field(T, "derived_stmt") { + n.derived_stmt = n + } + return n +} \ No newline at end of file diff --git a/core/odin/frontend/checker.odin b/core/odin/frontend/checker.odin new file mode 100644 index 00000000000..561d3c8d83d --- /dev/null +++ b/core/odin/frontend/checker.odin @@ -0,0 +1,30 @@ +package odin_frontend + +import "core:sync" + +Scope_Flag :: enum i32 { + +} + +Scope_Flags :: distinct bit_set[Scope_Flag; i32] + +Scope :: struct { + node : ^Node, + parent : ^Scope, + next : ^Scope, + head_child: ^Scope, + + mutex: sync.RW_Mutex, + elements: map[string]^Entity, + + imported: map[^Scope]bool, + + flags: Scope_Flags, + + variant: union { + ^Package, + ^File, + ^Entity, // procedure_entry + } +} + diff --git a/core/odin/frontend/directives.odin b/core/odin/frontend/directives.odin new file mode 100644 index 00000000000..4e13cfa6304 --- /dev/null +++ b/core/odin/frontend/directives.odin @@ -0,0 +1,33 @@ +package odin_frontend + +Directive_Kind :: enum { + // Record memory layout + Packed, + Raw_Union, + Align, + No_Nil, + // Control statements + Partial, + // Procedure parameters + No_Alias, + Any_Int, + Caller_Location, + C_Vararg, + By_Ptr, + Optional_Ok, + // Expressions + Type, + // Statements + Bounds_Check, + No_Bounds_Check, + // Built-in procedures + Assert, + Panic, + Config, // (, default) + Defined, // (identifier) + File, Line, Procedure, + Location, // () + Load, + Load_Or, + Load_Hash, +} \ No newline at end of file diff --git a/core/odin/frontend/entity.odin b/core/odin/frontend/entity.odin new file mode 100644 index 00000000000..e0249a60c08 --- /dev/null +++ b/core/odin/frontend/entity.odin @@ -0,0 +1,5 @@ +package odin_frontend + +Entity :: struct { + +} \ No newline at end of file diff --git a/core/odin/frontend/errors.odin b/core/odin/frontend/errors.odin new file mode 100644 index 00000000000..9fef2d7a218 --- /dev/null +++ b/core/odin/frontend/errors.odin @@ -0,0 +1,52 @@ +package odin_frontend + +import "core:fmt" + +Warning_Handler :: #type proc(pos: Pos, format: string, args: ..any) +Error_Handler :: #type proc(pos: Pos, format: string, args: ..any) + +default_warning_handler :: proc(pos: Pos, msg: string, args: ..any) { + fmt.eprintf("%s(%d:%d): Warning: ", pos.file, pos.line, pos.column) + fmt.eprintf(msg, ..args) + fmt.eprintf("\n") +} + +default_error_handler :: proc(pos: Pos, msg: string, args: ..any) { + fmt.eprintf("%s(%d:%d): ", pos.file, pos.line, pos.column) + fmt.eprintf(msg, ..args) + fmt.eprintf("\n") +} + +tokenizer_error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) { + pos := offset_to_pos(t, offset) + if t.err != nil { + t.err(pos, msg, ..args) + } + t.error_count += 1 +} + +parser_error :: proc(p: ^Parser, pos: Pos, msg: string, args: ..any) { + if p.err != nil { + p.err(pos, msg, ..args) + } + //p.file.syntax_error_count += 1 + //p.error_count += 1 + // TODO(Dragos): Modify this +} + +parser_warn :: proc(p: ^Parser, pos: Pos, msg: string, args: ..any) { + if p.warn != nil { + p.warn(pos, msg, ..args) + } + //p.file.syntax_warning_count += 1 +} + +error :: proc { + tokenizer_error, + parser_error, +} + +warn :: proc { + parser_warn, +} + diff --git a/core/odin/frontend/frontend.odin b/core/odin/frontend/frontend.odin new file mode 100644 index 00000000000..211651b615e --- /dev/null +++ b/core/odin/frontend/frontend.odin @@ -0,0 +1,2 @@ +package odin_frontend + diff --git a/core/odin/frontend/frontend_test.odin b/core/odin/frontend/frontend_test.odin new file mode 100644 index 00000000000..9baa5e2b7f2 --- /dev/null +++ b/core/odin/frontend/frontend_test.odin @@ -0,0 +1,55 @@ +package odin_frontend + +import "core:os" +import "core:testing" +import "core:fmt" +import "core:strings" +import "core:path/filepath" + +@test +test_tokenizer :: proc(T: ^testing.T) { + sb := strings.builder_make() + defer strings.builder_destroy(&sb) + tokenizer: Tokenizer + src_path := "examples/demo/demo.odin" + src, src_ok := os.read_entire_file(src_path) + testing.expect(T, src_ok, "Failed to read the input file") + tokenizer_init(&tokenizer, string(src), src_path) + + for tok := scan(&tokenizer); tok.kind != .EOF; tok = scan(&tokenizer) { + fmt.sbprintf(&sb, "[%v](%d:%d):%v\n", tok.kind, tok.pos.line, tok.pos.column, tok.text) + } + str := strings.to_string(sb) + out_ok := os.write_entire_file("demo_tokens.txt", transmute([]byte)str) + testing.expect(T, out_ok, "Failed to write demo_tokens.txt") + testing.expect(T, tokenizer.error_count == 0, "Tokenization failed with errors") +} + +@test +test_paths :: proc(T: ^testing.T) { + ok: bool + parser := default_parser() + // Note(Dragos): Parser doesn't need collections. Only the type checker does + ok = parser_add_collection(&parser, "core", filepath.join({ODIN_ROOT, "core"}, context.temp_allocator)) + ok = parser_add_collection(&parser, "vendor", filepath.join({ODIN_ROOT, "vendor"}, context.temp_allocator)) + ok = parser_add_collection(&parser, "shared", filepath.join({ODIN_ROOT, "shared"}, context.temp_allocator)) + testing.expect(T, ok) +} + +@test +test_file_loading :: proc(T: ^testing.T) { + ok: bool + pkg: ^Package + pkg, ok = read_package("examples/demo", context.allocator, context.allocator) + testing.expect(T, ok, "Failed to read package") + testing.expect(T, len(pkg.files) == 1, "Failed to read the files") + for path, file in pkg.files { + fmt.printf("Read file %s\n", path) + + } +} + +@test +test_parser :: proc(T: ^testing.T) { + +} \ No newline at end of file diff --git a/core/odin/frontend/package.odin b/core/odin/frontend/package.odin new file mode 100644 index 00000000000..5d484f2c820 --- /dev/null +++ b/core/odin/frontend/package.odin @@ -0,0 +1,157 @@ +package odin_frontend + +import "core:path/filepath" +import "core:os" +import "core:sync" +import "core:fmt" +import "core:strings" +import "core:runtime" +import "core:mem" + +/* +File :: struct { + using node: Node, + id: int, + pkg: ^Package, + + fullpath: string, + src: string, + + docs: ^Comment_Group, + + pkg_decl: ^Package_Decl, + pkg_token: Token, + pkg_name: string, + + decls: [dynamic]^Stmt, + imports: [dynamic]^Import_Decl, + directive_count: int, + + comments: [dynamic]^Comment_Group, + + syntax_warning_count: int, + syntax_error_count: int, +} +*/ + +Package_Kind :: enum { + Normal, + Runtime, + Init, +} + + +Foreign_File_Kind :: enum { + Invalid, + Source, +} + +Foreign_File :: struct { + kind: Foreign_File_Kind, + source: string, +} + +File_Flag :: enum u32 { + Is_Private_Pkg = 1<<0, + Is_Private_File = 1<<1, + + Is_Test = 1<<3, + Is_Lazy = 1<<4, +} +File_Flags :: bit_set[File_Flag] + + +File :: struct { + id: int, + + pkg: ^Package, + pkg_decl: ^Node, + + src: string, + fullpath: string, + filename: string, + directory: string, + + tokens: [dynamic]Token, + + docs: ^Comment_Group, +} + +Package :: struct { + kind: Package_Kind, + id: int, + name: string, + fullpath: string, + files: map[string]^File, + + is_single_file: bool, + order: int, + + file_allocator: mem.Allocator, +} + +read_file :: proc(pkg: ^Package, path: string) -> (file: ^File) { + context.allocator = pkg.file_allocator + fullpath, fullpath_ok := filepath.abs(path) + if !fullpath_ok { + return nil + } + fmt.assertf(fullpath not_in pkg.files, "File %s already part of the package\n", fullpath) + src, src_ok := os.read_entire_file(fullpath) + if !src_ok { + return nil + } + file = new(File) + file.fullpath = fullpath + file.filename = filepath.base(file.fullpath) + file.directory = filepath.dir(file.filename) + file.src = string(src) + file.tokens = make([dynamic]Token) // Note(Dragos): Maybe this can have a different allocator + file.pkg = pkg + pkg.files[file.fullpath] = file + return file +} + +delete_file :: proc(file: ^File) { + fmt.assertf(file.fullpath in file.pkg.files, "File %s is not part of the package\n", file.fullpath) + context.allocator = file.pkg.file_allocator + delete_key(&file.pkg.files, file.fullpath) + delete(file.fullpath) + delete(file.directory) + free(file) +} + +read_package :: proc(path: string, file_allocator: mem.Allocator, allocator := context.allocator) -> (pkg: ^Package, ok: bool) { + context.allocator = allocator + pkg_path, pkg_path_ok := filepath.abs(path) + if !pkg_path_ok { + return nil, false + } + path_pattern := fmt.tprintf("%s/*.odin", pkg_path) + matches, matches_err := filepath.glob(path_pattern, context.temp_allocator) + if matches_err != nil { + return nil, false + } + + pkg = new(Package) + pkg.fullpath = pkg_path + pkg.files = make(map[string]^File) + pkg.file_allocator = file_allocator + defer if !ok { + delete(pkg.fullpath) + delete(pkg.files) + free(pkg) + } + for match in matches { + file := read_file(pkg, match) + if file == nil { + return nil, false + } + } + + return pkg, true +} + +delete_package :: proc(pkg: ^Package, allocator := context.allocator) { + context.allocator = allocator +} \ No newline at end of file diff --git a/core/odin/frontend/parser.odin b/core/odin/frontend/parser.odin new file mode 100644 index 00000000000..7203901a5b4 --- /dev/null +++ b/core/odin/frontend/parser.odin @@ -0,0 +1,174 @@ +package odin_frontend + +import "core:sync" +import "core:fmt" +import "core:strings" + +import "core:intrinsics" + +import "core:path/filepath" +import "core:os" + +/*c++ +struct Parser { + String init_fullpath; + + StringSet imported_files; // fullpath + BlockingMutex imported_files_mutex; + + Array packages; + BlockingMutex packages_mutex; + + std::atomic file_to_process_count; + std::atomic total_token_count; + std::atomic total_line_count; + + // TODO(bill): What should this mutex be per? + // * Parser + // * Package + // * File + BlockingMutex file_decl_mutex; + + BlockingMutex file_error_mutex; + ParseFileErrorNode * file_error_head; + ParseFileErrorNode * file_error_tail; +}; +*/ + +Imported_File :: struct { + pkg: ^Package, + fi: os.File_Info, + pos: Pos, // import + index: int, +} + +Collection :: struct { + name: string, + path: string, +} + +Parser :: struct { + init_fullpath: string, + + imported_files: map[string]bool, + imported_files_mutex: sync.Mutex, + + collections: [dynamic]Collection, + + packages: [dynamic]^Package, + packages_mutex: sync.Mutex, + + file_to_process_count: int, + total_token_count: int, + total_line_count: int, + + file_decl_mutex: sync.Mutex, + + err: Error_Handler, + warn: Warning_Handler, +} + +default_parser :: proc() -> Parser { + return Parser { + err = default_error_handler, + warn = default_warning_handler, + } +} + +Stmt_Allow_Flag :: enum { + In, + Label, +} +Stmt_Allow_Flags :: distinct bit_set[Stmt_Allow_Flag] + +Import_Decl_Kind :: enum { + Standard, + Using, +} + +Parse_File_Error :: enum { + None, + Wrong_Extension, + Invalid_File, + Empty_File, + Permission, + Not_Found, + Invalid_Token, + General_Error, + File_Too_Large, + Directory_Already_Exists, +} + +collection_path :: proc(p: ^Parser, name: string) -> (path: Maybe(string)) { + for collection in p.collections { + if collection.name == name { + return collection.path + } + } + return nil +} + +import_path_to_fullpath :: proc(p: ^Parser, pkg: ^Package, path: string, allocator := context.allocator) -> (fullpath: Maybe(string)) { + collection_and_relpath := strings.split(path, ":", context.temp_allocator) + switch len(collection_and_relpath) { + case 1: // Relative to the package path + return filepath.join({pkg.fullpath, collection_and_relpath[0]}, allocator) + + case 2: + col_path := collection_path(p, collection_and_relpath[0]) + if col_path, is_valid := col_path.?; is_valid { + return filepath.join({col_path, collection_and_relpath[1]}, allocator) + } + } + return nil +} + +parser_add_collection :: proc(p: ^Parser, name: string, path: string) -> bool { + old_path := collection_path(p, name) + if old_path, is_valid := old_path.?; is_valid { + error(p, NO_POS, "Redaclaration of collection %s to %s. Was %s", name, path, old_path) + return false + } + append(&p.collections, Collection{name, path}) + return true +} + + +parser_add_file_to_process :: proc(p: ^Parser, pkg: ^Package, fi: os.File_Info, pos: Pos) { + f := Imported_File{pkg, fi, pos, p.file_to_process_count + 1} + p.file_to_process_count += 1 + // Todo(Dragos): add worker to pool, or process directly +} + +process_imported_file :: proc(p: ^Parser, imported_file: Imported_File) -> bool { + pkg := imported_file.pkg + fi := imported_file.fi + pos := imported_file.pos + + file := new(File, context.allocator) + + file.pkg = pkg + file.id = imported_file.index + 1 + + err_pos, file_ok := file_init(file, fi.fullpath) + + return true +} + +file_init :: proc(file: ^File, fullpath: string) -> (err_pos: Pos, ok: bool) { + unimplemented() +} + +parser_add_package :: proc(p: ^Parser, pkg: ^Package) { + pkg.id = len(p.packages) + 1 + append(&p.packages, pkg) +} + +parse_packages :: proc(p: ^Parser, init_filename: string) -> bool { + if init_fullpath, ok := filepath.abs(init_filename, context.allocator); ok { + error(p, Pos{}, "Failed to get the fullpath of %s", init_filename) + return false + } + + return true +} \ No newline at end of file diff --git a/core/odin/frontend/token.odin b/core/odin/frontend/token.odin new file mode 100644 index 00000000000..ea22dc390be --- /dev/null +++ b/core/odin/frontend/token.odin @@ -0,0 +1,348 @@ +package odin_frontend + +import "core:strings" + +Token :: struct { + kind: Token_Kind, + text: string, + pos: Pos, +} + +Pos :: struct { + file: string, + offset: int, // starting at 0 + line: int, // starting at 1 + column: int, // starting at 1 +} + +NO_POS :: Pos{} + +pos_compare :: proc(lhs, rhs: Pos) -> int { + if lhs.offset != rhs.offset { + return -1 if (lhs.offset < rhs.offset) else +1 + } + if lhs.line != rhs.line { + return -1 if (lhs.line < rhs.line) else +1 + } + if lhs.column != rhs.column { + return -1 if (lhs.column < rhs.column) else +1 + } + return strings.compare(lhs.file, rhs.file) +} + +Token_Kind :: enum u32 { + Invalid, + EOF, + Comment, + + B_Literal_Begin, + Ident, // main + Integer, // 12345 + Float, // 123.45 + Imag, // 123.45i + Rune, // 'a' + String, // "abc" + B_Literal_End, + + B_Operator_Begin, + Eq, // = + Not, // ! + Hash, // # + At, // @ + Dollar, // $ + Pointer, // ^ + Question, // ? + Add, // + + Sub, // - + Mul, // * + Quo, // / + Mod, // % + Mod_Mod, // %% + And, // & + Or, // | + Xor, // ~ + And_Not, // &~ + Shl, // << + Shr, // >> + + Cmp_And, // && + Cmp_Or, // || + + B_Assign_Op_Begin, + Add_Eq, // += + Sub_Eq, // -= + Mul_Eq, // *= + Quo_Eq, // /= + Mod_Eq, // %= + Mod_Mod_Eq, // %%= + And_Eq, // &= + Or_Eq, // |= + Xor_Eq, // ~= + And_Not_Eq, // &~= + Shl_Eq, // <<= + Shr_Eq, // >>= + Cmp_And_Eq, // &&= + Cmp_Or_Eq, // ||= + B_Assign_Op_End, + + Increment, // ++ + Decrement, // -- + Arrow_Right, // -> + Undef, // --- + + B_Comparison_Begin, + Cmp_Eq, // == + Not_Eq, // != + Lt, // < + Gt, // > + Lt_Eq, // <= + Gt_Eq, // >= + B_Comparison_End, + + Open_Paren, // ( + Close_Paren, // ) + Open_Bracket, // [ + Close_Bracket, // ] + Open_Brace, // { + Close_Brace, // } + Colon, // : + Semicolon, // ; + Period, // . + Comma, // , + Ellipsis, // .. + Range_Half, // ..< + Range_Full, // ..= + B_Operator_End, + + B_Keyword_Begin, + Import, // import + Foreign, // foreign + Package, // package + Typeid, // typeid + When, // when + Where, // where + If, // if + Else, // else + For, // for + Switch, // switch + In, // in + Not_In, // not_in + Do, // do + Case, // case + Break, // break + Continue, // continue + Fallthrough, // fallthrough + Defer, // defer + Return, // return + Proc, // proc + Struct, // struct + Union, // union + Enum, // enum + Bit_Set, // bit_set + Map, // map + Dynamic, // dynamic + Auto_Cast, // auto_cast + Cast, // cast + Transmute, // transmute + Distinct, // distinct + Using, // using + Context, // context + Or_Else, // or_else + Or_Return, // or_return + Or_Break, // or_break + Or_Continue, // or_continue + Asm, // asm + Inline, // inline + No_Inline, // no_inline + Matrix, // matrix + B_Keyword_End, + + COUNT, + + B_Custom_Keyword_Begin = COUNT+1, + // ... Custom keywords +} + +tokens := [Token_Kind.COUNT]string { + "Invalid", + "EOF", + "Comment", + + "", + "identifier", + "integer", + "float", + "imaginary", + "rune", + "string", + "", + + "", + "=", + "!", + "#", + "@", + "$", + "^", + "?", + "+", + "-", + "*", + "/", + "%", + "%%", + "&", + "|", + "~", + "&~", + "<<", + ">>", + + "&&", + "||", + + "", + "+=", + "-=", + "*=", + "/=", + "%=", + "%%=", + "&=", + "|=", + "~=", + "&~=", + "<<=", + ">>=", + "&&=", + "||=", + "", + + "++", + "--", + "->", + "---", + + "", + "==", + "!=", + "<", + ">", + "<=", + ">=", + "", + + "(", + ")", + "[", + "]", + "{", + "}", + ":", + ";", + ".", + ",", + "..", + "..<", + "..=", + "", + + "", + "import", + "foreign", + "package", + "typeid", + "when", + "where", + "if", + "else", + "for", + "switch", + "in", + "not_in", + "do", + "case", + "break", + "continue", + "fallthrough", + "defer", + "return", + "proc", + "struct", + "union", + "enum", + "bit_set", + "map", + "dynamic", + "auto_cast", + "cast", + "transmute", + "distinct", + "using", + "context", + "or_else", + "or_return", + "or_break", + "or_continue", + "asm", + "inline", + "no_inline", + "matrix", + "", +} + +custom_keyword_tokens: []string + + +is_newline :: proc(tok: Token) -> bool { + return tok.kind == .Semicolon && tok.text == "\n" +} + + +token_to_string :: proc(tok: Token) -> string { + if is_newline(tok) { + return "newline" + } + return token_kind_to_string(tok.kind) +} + +token_kind_to_string :: proc(kind: Token_Kind) -> string { + if .Invalid <= kind && kind < .COUNT { + return tokens[kind] + } + if .B_Custom_Keyword_Begin < kind { + n := int(u16(kind)-u16(Token_Kind.B_Custom_Keyword_Begin)) + if n < len(custom_keyword_tokens) { + return custom_keyword_tokens[n] + } + } + + return "Invalid" +} + +is_literal :: proc(kind: Token_Kind) -> bool { + return .B_Literal_Begin < kind && kind < .B_Literal_End +} +is_operator :: proc(kind: Token_Kind) -> bool { + #partial switch kind { + case .B_Operator_Begin ..= .B_Operator_End: + return true + case .In, .Not_In: + return true + case .If: + return true + } + return false +} +is_assignment_operator :: proc(kind: Token_Kind) -> bool { + return .B_Assign_Op_Begin < kind && kind < .B_Assign_Op_End || kind == .Eq +} +is_keyword :: proc(kind: Token_Kind) -> bool { + switch { + case .B_Keyword_Begin < kind && kind < .B_Keyword_End: + return true + case .B_Custom_Keyword_Begin < kind: + return true + } + return false +} diff --git a/core/odin/frontend/tokenizer.odin b/core/odin/frontend/tokenizer.odin new file mode 100644 index 00000000000..a705022c5b3 --- /dev/null +++ b/core/odin/frontend/tokenizer.odin @@ -0,0 +1,728 @@ +package odin_frontend + +import "core:fmt" +import "core:unicode" +import "core:unicode/utf8" + + + +Flag :: enum { + Insert_Semicolon, +} +Flags :: distinct bit_set[Flag; u32] + +Tokenizer :: struct { + // Immutable data + path: string, + src: string, + err: Error_Handler, + + flags: Flags, + + // Tokenizing state + ch: rune, + offset: int, + read_offset: int, + line_offset: int, + line_count: int, + insert_semicolon: bool, + + // Mutable data + error_count: int, +} + +tokenizer_init :: proc(t: ^Tokenizer, src: string, path: string, err: Error_Handler = default_error_handler) { + t.src = src + t.err = err + t.ch = ' ' + t.offset = 0 + t.read_offset = 0 + t.line_offset = 0 + t.line_count = len(src) > 0 ? 1 : 0 + t.error_count = 0 + t.path = path + + advance_rune(t) + if t.ch == utf8.RUNE_BOM { + advance_rune(t) + } +} + +@(private) +offset_to_pos :: proc(t: ^Tokenizer, offset: int) -> Pos { + line := t.line_count + column := offset - t.line_offset + 1 + + return Pos { + file = t.path, + offset = offset, + line = line, + column = column, + } +} + + + +advance_rune :: proc(t: ^Tokenizer) { + if t.read_offset < len(t.src) { + t.offset = t.read_offset + if t.ch == '\n' { + t.line_offset = t.offset + t.line_count += 1 + } + r, w := rune(t.src[t.read_offset]), 1 + switch { + case r == 0: + error(t, t.offset, "illegal character NUL") + case r >= utf8.RUNE_SELF: + r, w = utf8.decode_rune_in_string(t.src[t.read_offset:]) + if r == utf8.RUNE_ERROR && w == 1 { + error(t, t.offset, "illegal UTF-8 encoding") + } else if r == utf8.RUNE_BOM && t.offset > 0 { + error(t, t.offset, "illegal byte order mark") + } + } + t.read_offset += w + t.ch = r + } else { + t.offset = len(t.src) + if t.ch == '\n' { + t.line_offset = t.offset + t.line_count += 1 + } + t.ch = -1 + } +} + +peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte { + if t.read_offset+offset < len(t.src) { + return t.src[t.read_offset+offset] + } + return 0 +} + +skip_whitespace :: proc(t: ^Tokenizer) { + if t.insert_semicolon { + for { + switch t.ch { + case ' ', '\t', '\r': + advance_rune(t) + case: + return + } + } + } else { + for { + switch t.ch { + case ' ', '\t', '\r', '\n': + advance_rune(t) + case: + return + } + } + } +} + +is_letter :: proc(r: rune) -> bool { + if r < utf8.RUNE_SELF { + switch r { + case '_': + return true + case 'A'..='Z', 'a'..='z': + return true + } + } + return unicode.is_letter(r) +} +is_digit :: proc(r: rune) -> bool { + if '0' <= r && r <= '9' { + return true + } + return unicode.is_digit(r) +} + + +scan_comment :: proc(t: ^Tokenizer) -> string { + offset := t.offset-1 + next := -1 + general: { + if t.ch == '/' || t.ch == '!' { // // #! comments + advance_rune(t) + for t.ch != '\n' && t.ch >= 0 { + advance_rune(t) + } + + next = t.offset + if t.ch == '\n' { + next += 1 + } + break general + } + + /* style comment */ + advance_rune(t) + nest := 1 + for t.ch >= 0 && nest > 0 { + ch := t.ch + advance_rune(t) + if ch == '/' && t.ch == '*' { + nest += 1 + } + + if ch == '*' && t.ch == '/' { + nest -= 1 + advance_rune(t) + next = t.offset + if nest == 0 { + break general + } + } + } + + error(t, offset, "comment not terminated") + } + + lit := t.src[offset : t.offset] + + // NOTE(bill): Strip CR for line comments + for len(lit) > 2 && lit[1] == '/' && lit[len(lit)-1] == '\r' { + lit = lit[:len(lit)-1] + } + + + return string(lit) +} + +scan_identifier :: proc(t: ^Tokenizer) -> string { + offset := t.offset + + for is_letter(t.ch) || is_digit(t.ch) { + advance_rune(t) + } + + return string(t.src[offset : t.offset]) +} + +scan_string :: proc(t: ^Tokenizer) -> string { + offset := t.offset-1 + + for { + ch := t.ch + if ch == '\n' || ch < 0 { + error(t, offset, "string literal was not terminated") + break + } + advance_rune(t) + if ch == '"' { + break + } + if ch == '\\' { + scan_escape(t) + } + } + + return string(t.src[offset : t.offset]) +} + +scan_raw_string :: proc(t: ^Tokenizer) -> string { + offset := t.offset-1 + + for { + ch := t.ch + if ch == utf8.RUNE_EOF { + error(t, offset, "raw string literal was not terminated") + break + } + advance_rune(t) + if ch == '`' { + break + } + } + + return string(t.src[offset : t.offset]) +} + +digit_val :: proc(r: rune) -> int { + switch r { + case '0'..='9': + return int(r-'0') + case 'A'..='F': + return int(r-'A' + 10) + case 'a'..='f': + return int(r-'a' + 10) + } + return 16 +} + +scan_escape :: proc(t: ^Tokenizer) -> bool { + offset := t.offset + + n: int + base, max: u32 + switch t.ch { + case 'a', 'b', 'e', 'f', 'n', 't', 'v', 'r', '\\', '\'', '\"': + advance_rune(t) + return true + + case '0'..='7': + n, base, max = 3, 8, 255 + case 'x': + advance_rune(t) + n, base, max = 2, 16, 255 + case 'u': + advance_rune(t) + n, base, max = 4, 16, utf8.MAX_RUNE + case 'U': + advance_rune(t) + n, base, max = 8, 16, utf8.MAX_RUNE + case: + if t.ch < 0 { + error(t, offset, "escape sequence was not terminated") + } else { + error(t, offset, "unknown escape sequence") + } + return false + } + + x: u32 + for n > 0 { + d := u32(digit_val(t.ch)) + for d >= base { + if t.ch < 0 { + error(t, t.offset, "escape sequence was not terminated") + } else { + error(t, t.offset, "illegal character %d in escape sequence", t.ch) + } + return false + } + + x = x*base + d + advance_rune(t) + n -= 1 + } + + if x > max || 0xd800 <= x && x <= 0xe000 { + error(t, offset, "escape sequence is an invalid Unicode code point") + return false + } + return true +} + +scan_rune :: proc(t: ^Tokenizer) -> string { + offset := t.offset-1 + valid := true + n := 0 + for { + ch := t.ch + if ch == '\n' || ch < 0 { + if valid { + error(t, offset, "rune literal not terminated") + valid = false + } + break + } + advance_rune(t) + if ch == '\'' { + break + } + n += 1 + if ch == '\\' { + if !scan_escape(t) { + valid = false + } + } + } + + if valid && n != 1 { + error(t, offset, "illegal rune literal") + } + + return string(t.src[offset : t.offset]) +} + +scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Token_Kind, string) { + scan_mantissa :: proc(t: ^Tokenizer, base: int) { + for digit_val(t.ch) < base || t.ch == '_' { + advance_rune(t) + } + } + scan_exponent :: proc(t: ^Tokenizer, kind: ^Token_Kind) { + if t.ch == 'e' || t.ch == 'E' { + kind^ = .Float + advance_rune(t) + if t.ch == '-' || t.ch == '+' { + advance_rune(t) + } + if digit_val(t.ch) < 10 { + scan_mantissa(t, 10) + } else { + error(t, t.offset, "illegal floating-point exponent") + } + } + + // NOTE(bill): This needs to be here for sanity's sake + switch t.ch { + case 'i', 'j', 'k': + kind^ = .Imag + advance_rune(t) + } + } + scan_fraction :: proc(t: ^Tokenizer, kind: ^Token_Kind) -> (early_exit: bool) { + if t.ch == '.' && peek_byte(t) == '.' { + return true + } + if t.ch == '.' { + kind^ = .Float + advance_rune(t) + scan_mantissa(t, 10) + } + return false + } + + + offset := t.offset + kind := Token_Kind.Integer + seen_point := seen_decimal_point + + if seen_point { + offset -= 1 + kind = .Float + scan_mantissa(t, 10) + scan_exponent(t, &kind) + } else { + if t.ch == '0' { + int_base :: proc(t: ^Tokenizer, kind: ^Token_Kind, base: int, msg: string) { + prev := t.offset + advance_rune(t) + scan_mantissa(t, base) + if t.offset - prev <= 1 { + kind^ = .Invalid + error(t, t.offset, msg) + } + } + + advance_rune(t) + switch t.ch { + case 'b': int_base(t, &kind, 2, "illegal binary integer") + case 'o': int_base(t, &kind, 8, "illegal octal integer") + case 'd': int_base(t, &kind, 10, "illegal decimal integer") + case 'z': int_base(t, &kind, 12, "illegal dozenal integer") + case 'x': int_base(t, &kind, 16, "illegal hexadecimal integer") + case 'h': + prev := t.offset + advance_rune(t) + scan_mantissa(t, 16) + if t.offset - prev <= 1 { + kind = .Invalid + error(t, t.offset, "illegal hexadecimal floating-point number") + } else { + sub := t.src[prev+1 : t.offset] + digit_count := 0 + for d in sub { + if d != '_' { + digit_count += 1 + } + } + + switch digit_count { + case 4, 8, 16: break + case: + error(t, t.offset, "invalid hexadecimal floating-point number, expected 4, 8, or 16 digits, got %d", digit_count) + } + } + + case: + seen_point = false + scan_mantissa(t, 10) + if t.ch == '.' { + seen_point = true + if scan_fraction(t, &kind) { + return kind, string(t.src[offset : t.offset]) + } + } + scan_exponent(t, &kind) + return kind, string(t.src[offset : t.offset]) + } + } + } + + scan_mantissa(t, 10) + + if scan_fraction(t, &kind) { + return kind, string(t.src[offset : t.offset]) + } + + scan_exponent(t, &kind) + + return kind, string(t.src[offset : t.offset]) +} + + +scan :: proc(t: ^Tokenizer) -> Token { + skip_whitespace(t) + + offset := t.offset + + kind: Token_Kind + lit: string + pos := offset_to_pos(t, offset) + + switch ch := t.ch; true { + case is_letter(ch): + lit = scan_identifier(t) + kind = .Ident + check_keyword: if len(lit) > 1 { + // TODO(bill): Maybe have a hash table lookup rather than this linear search + for i in Token_Kind.B_Keyword_Begin ..= Token_Kind.B_Keyword_End { + if lit == tokens[i] { + kind = Token_Kind(i) + break check_keyword + } + } + for keyword, i in custom_keyword_tokens { + if lit == keyword { + kind = Token_Kind(i+1) + .B_Custom_Keyword_Begin + break check_keyword + } + } + break check_keyword + } + case '0' <= ch && ch <= '9': + kind, lit = scan_number(t, false) + case: + advance_rune(t) + switch ch { + case -1: + kind = .EOF + if t.insert_semicolon { + t.insert_semicolon = false + kind = .Semicolon + lit = "\n" + return Token{kind, lit, pos} + } + case '\n': + t.insert_semicolon = false + kind = .Semicolon + lit = "\n" + case '\\': + if .Insert_Semicolon in t.flags { + t.insert_semicolon = false + } + token := scan(t) + if token.pos.line == pos.line { + error(t, token.pos.offset, "expected a newline after \\") + } + return token + + case '\'': + kind = .Rune + lit = scan_rune(t) + case '"': + kind = .String + lit = scan_string(t) + case '`': + kind = .String + lit = scan_raw_string(t) + case '.': + kind = .Period + switch t.ch { + case '0'..='9': + kind, lit = scan_number(t, true) + case '.': + advance_rune(t) + kind = .Ellipsis + switch t.ch { + case '<': + advance_rune(t) + kind = .Range_Half + case '=': + advance_rune(t) + kind = .Range_Full + } + } + case '@': kind = .At + case '$': kind = .Dollar + case '?': kind = .Question + case '^': kind = .Pointer + case ';': kind = .Semicolon + case ',': kind = .Comma + case ':': kind = .Colon + case '(': kind = .Open_Paren + case ')': kind = .Close_Paren + case '[': kind = .Open_Bracket + case ']': kind = .Close_Bracket + case '{': kind = .Open_Brace + case '}': kind = .Close_Brace + case '%': + kind = .Mod + switch t.ch { + case '=': + advance_rune(t) + kind = .Mod_Eq + case '%': + advance_rune(t) + kind = .Mod_Mod + if t.ch == '=' { + advance_rune(t) + kind = .Mod_Mod_Eq + } + } + case '*': + kind = .Mul + if t.ch == '=' { + advance_rune(t) + kind = .Mul_Eq + } + case '=': + kind = .Eq + if t.ch == '=' { + advance_rune(t) + kind = .Cmp_Eq + } + case '~': + kind = .Xor + if t.ch == '=' { + advance_rune(t) + kind = .Xor_Eq + } + case '!': + kind = .Not + if t.ch == '=' { + advance_rune(t) + kind = .Not_Eq + } + case '+': + kind = .Add + switch t.ch { + case '=': + advance_rune(t) + kind = .Add_Eq + case '+': + advance_rune(t) + kind = .Increment + } + case '-': + kind = .Sub + switch t.ch { + case '-': + advance_rune(t) + kind = .Decrement + if t.ch == '-' { + advance_rune(t) + kind = .Undef + } + case '>': + advance_rune(t) + kind = .Arrow_Right + case '=': + advance_rune(t) + kind = .Sub_Eq + } + case '#': + kind = .Hash + if t.ch == '!' { + kind = .Comment + lit = scan_comment(t) + } + case '/': + kind = .Quo + switch t.ch { + case '/', '*': + kind = .Comment + lit = scan_comment(t) + case '=': + advance_rune(t) + kind = .Quo_Eq + } + case '<': + kind = .Lt + switch t.ch { + case '=': + advance_rune(t) + kind = .Lt_Eq + case '<': + advance_rune(t) + kind = .Shl + if t.ch == '=' { + advance_rune(t) + kind = .Shl_Eq + } + } + case '>': + kind = .Gt + switch t.ch { + case '=': + advance_rune(t) + kind = .Gt_Eq + case '>': + advance_rune(t) + kind = .Shr + if t.ch == '=' { + advance_rune(t) + kind = .Shr_Eq + } + } + case '&': + kind = .And + switch t.ch { + case '~': + advance_rune(t) + kind = .And_Not + if t.ch == '=' { + advance_rune(t) + kind = .And_Not_Eq + } + case '=': + advance_rune(t) + kind = .And_Eq + case '&': + advance_rune(t) + kind = .Cmp_And + if t.ch == '=' { + advance_rune(t) + kind = .Cmp_And_Eq + } + } + case '|': + kind = .Or + switch t.ch { + case '=': + advance_rune(t) + kind = .Or_Eq + case '|': + advance_rune(t) + kind = .Cmp_Or + if t.ch == '=' { + advance_rune(t) + kind = .Cmp_Or_Eq + } + } + case: + if ch != utf8.RUNE_BOM { + error(t, t.offset, "illegal character '%r': %d", ch, ch) + } + kind = .Invalid + } + } + + if .Insert_Semicolon in t.flags { + #partial switch kind { + case .Invalid, .Comment: + // Preserve insert_semicolon info + case .Ident, .Context, .Typeid, .Break, .Continue, .Fallthrough, .Return, + .Integer, .Float, .Imag, .Rune, .String, .Undef, + .Question, .Pointer, .Close_Paren, .Close_Bracket, .Close_Brace, + .Increment, .Decrement, .Or_Return, .Or_Break, .Or_Continue: + /*fallthrough*/ + t.insert_semicolon = true + case: + t.insert_semicolon = false + break + } + } + + if lit == "" { + lit = string(t.src[offset : t.offset]) + } + return Token{kind, lit, pos} +} diff --git a/core/odin/frontend/unknown.odin b/core/odin/frontend/unknown.odin new file mode 100644 index 00000000000..e733ed0a582 --- /dev/null +++ b/core/odin/frontend/unknown.odin @@ -0,0 +1,3 @@ +package odin_frontend + +// NOTE(dragos): This is the place where I'm gonna put things that I'm yet to figure out \ No newline at end of file