Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add core:text/regex #3962

Merged
merged 24 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
cb0704d
Add `core:text/regex`
Feoramund Jul 22, 2024
730e10b
Support printing `Regular_Expression` in `fmt`
Feoramund Jul 22, 2024
3e49ceb
Add tests for `core:text/regex`
Feoramund Jul 22, 2024
be38ba6
Add benchmarks for `core:text/regex`
Feoramund Jul 22, 2024
b8f3d0f
Add `core:text/regex` to `examples/all`
Feoramund Jul 22, 2024
e642be8
Fix handling of unclosed `regex` classes and repetitions
Feoramund Jul 24, 2024
e8537a3
Add test cases for unclosed classes and repetition
Feoramund Jul 24, 2024
16b644a
Use `slice.zero` instead
Feoramund Jul 24, 2024
c52a8a5
Allow configuring of `MAX_CAPTURE_GROUPS` for `n` > 10
Feoramund Jul 24, 2024
042f6de
Remove printing facilities for `Regular_Expression`
Feoramund Jul 24, 2024
ff492e6
Use `unaligned_load` for `regex` virtual machine
Feoramund Jul 24, 2024
90f1f7f
Use `unaligned_store` in `regex` too
Feoramund Jul 24, 2024
6252712
Add missing features to `regex` package documentation
Feoramund Aug 4, 2024
cd82725
Test that a RegEx Capture `pos` corresponds to its `groups`
Feoramund Aug 4, 2024
d3a51e2
Hide `Regular_Expression` values
Feoramund Aug 4, 2024
babdc43
Move `Flag_To_Letter` to `core:text/regex/common`
Feoramund Aug 4, 2024
1ccb0b2
Remove unused code
Feoramund Aug 4, 2024
743480b
Use `regex.destroy` for test captures
Feoramund Aug 4, 2024
ca7e46d
Add explicit test case for Capture `pos`
Feoramund Aug 4, 2024
dde42f0
Add more documentation for `core:text/regex` API
Feoramund Aug 4, 2024
e17fc82
Document rationale behind RegEx shorthand classes
Feoramund Aug 4, 2024
1485830
Add explicit license info to `core:text/regex`
Feoramund Aug 4, 2024
8f5b838
Review manual `for` loops in `core:text/regex`
Feoramund Aug 5, 2024
d0d4f19
Remove debug line from test
Feoramund Aug 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions core/fmt/fmt.odin
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import "core:io"
import "core:reflect"
import "core:strconv"
import "core:strings"
import "core:text/regex"
import "core:time"
import "core:unicode/utf8"

Expand Down Expand Up @@ -2405,6 +2406,21 @@ fmt_named :: proc(fi: ^Info, v: any, verb: rune, info: runtime.Type_Info_Named)
write_padded_number(fi, (ns), 9)
io.write_string(fi.writer, " +0000 UTC", &fi.n)
return

case regex.Regular_Expression:
Feoramund marked this conversation as resolved.
Show resolved Hide resolved
io.write_byte(fi.writer, '/')
for r in a.original_pattern {
if r == '/' {
io.write_string(fi.writer, `\/`)
} else {
io.write_rune(fi.writer, r)
}
}
io.write_byte(fi.writer, '/')
for flag in a.flags {
io.write_byte(fi.writer, regex.Flag_To_Letter[flag])
}
return
}
}

Expand Down
27 changes: 27 additions & 0 deletions core/text/regex/common/common.odin
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// This package helps break dependency cycles.
package regex_common

// VM limitations
MAX_CAPTURE_GROUPS :: 10
Feoramund marked this conversation as resolved.
Show resolved Hide resolved
MAX_PROGRAM_SIZE :: int(max(i16))
MAX_CLASSES :: int(max(u8))

Flag :: enum u8 {
// Global: try to match the pattern anywhere in the string.
Global,
// Multiline: treat `^` and `$` as if they also match newlines.
Multiline,
// Case Insensitive: treat `a-z` as if it was also `A-Z`.
Case_Insensitive,
// Ignore Whitespace: bypass unescaped whitespace outside of classes.
Ignore_Whitespace,
// Unicode: let the compiler and virtual machine know to expect Unicode strings.
Unicode,

// No Capture: avoid saving capture group data entirely.
No_Capture,
// No Optimization: do not pass the pattern through the optimizer; for debugging.
No_Optimization,
}

Flags :: bit_set[Flag; u8]
25 changes: 25 additions & 0 deletions core/text/regex/common/debugging.odin
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package regex_common

@require import "core:os"
import "core:io"
import "core:strings"

ODIN_DEBUG_REGEX :: #config(ODIN_DEBUG_REGEX, false)

when ODIN_DEBUG_REGEX {
debug_stream := os.stream_from_handle(os.stderr)
}

write_padded_hex :: proc(w: io.Writer, #any_int n, zeroes: int) {
sb := strings.builder_make()
defer strings.builder_destroy(&sb)

sbw := strings.to_writer(&sb)
io.write_int(sbw, n, 0x10)

io.write_string(w, "0x")
for _ in 0..<max(0, zeroes - strings.builder_len(sb)) {
io.write_byte(w, '0')
}
io.write_int(w, n, 0x10)
}
Loading