diff --git a/cmd/internal/jlog/jlog.go b/cmd/internal/jlog/jlog.go index be66032..ff43948 100644 --- a/cmd/internal/jlog/jlog.go +++ b/cmd/internal/jlog/jlog.go @@ -28,13 +28,14 @@ type Output struct { } type General struct { - MatchRegex string `short:"g" long:"regex" description:"A regular expression that removes lines from the output that don't match, like grep."` - NoMatchRegex string `short:"G" long:"no-regex" description:"A regular expression that removes lines from the output that DO match, like 'grep -v'."` - JQ string `short:"e" long:"jq" description:"A jq program to run on each record in the processed input; use this to ignore certain lines, add fields, etc. Hint: 'select(condition)' will remove lines that don't match 'condition'."` - JQSearchPath []string `long:"jq-search-path" env:"JLOG_JQ_SEARCH_PATH" description:"A list of directories in which to search for JQ modules. A path entry named (not merely ending in) .jq is automatically loaded. When set through the environment, use ':' as the delimiter (like $PATH)." default:"~/.jq" default:"~/.jlog/jq/.jq" default:"~/.jlog/jq" env-delim:":"` //nolint - NoColor bool `short:"M" long:"no-color" description:"Disable the use of color." env:"JLOG_FORCE_MONOCHROME"` - NoMonochrome bool `short:"c" long:"no-monochrome" description:"Force the use of color." env:"JLOG_FORCE_COLOR"` - Profile string `long:"profile" description:"If set, collect a CPU profile and write it to this file."` + MatchRegex string `short:"g" long:"regex" description:"A regular expression that removes lines from the output that don't match, like grep."` + NoMatchRegex string `short:"G" long:"no-regex" description:"A regular expression that removes lines from the output that DO match, like 'grep -v'."` + RegexpScope *parse.RegexpScope `short:"S" long:"regex-scope" description:"Where to apply the provided regex; (m)essage, (k)eys, or (v)alues. 'kmv' looks in all scopes, 'k' only searches keys, etc." default:"kmv"` + JQ string `short:"e" long:"jq" description:"A jq program to run on each record in the processed input; use this to ignore certain lines, add fields, etc. Hint: 'select(condition)' will remove lines that don't match 'condition'."` + JQSearchPath []string `long:"jq-search-path" env:"JLOG_JQ_SEARCH_PATH" description:"A list of directories in which to search for JQ modules. A path entry named (not merely ending in) .jq is automatically loaded. When set through the environment, use ':' as the delimiter (like $PATH)." default:"~/.jq" default:"~/.jlog/jq/.jq" default:"~/.jlog/jq" env-delim:":"` //nolint + NoColor bool `short:"M" long:"no-color" description:"Disable the use of color." env:"JLOG_FORCE_MONOCHROME"` + NoMonochrome bool `short:"c" long:"no-monochrome" description:"Force the use of color." env:"JLOG_FORCE_COLOR"` + Profile string `long:"profile" description:"If set, collect a CPU profile and write it to this file."` Version bool `short:"v" long:"version" description:"Print version information and exit."` } @@ -182,6 +183,9 @@ func NewFilterScheme(gen General) (*parse.FilterScheme, error) { //nolint if err := fsch.AddJQ(gen.JQ, &parse.JQOptions{SearchPath: gen.JQSearchPath}); err != nil { return nil, fmt.Errorf("adding JQ: %v", err) } + if gen.RegexpScope != nil { + fsch.Scope = *gen.RegexpScope + } return fsch, nil } diff --git a/pkg/parse/filter.go b/pkg/parse/filter.go index bb19a86..628f909 100644 --- a/pkg/parse/filter.go +++ b/pkg/parse/filter.go @@ -1,10 +1,12 @@ package parse import ( + "encoding/json" "errors" "fmt" "os" "regexp" + "strings" "github.com/itchyny/gojq" ) @@ -14,6 +16,7 @@ type FilterScheme struct { JQ *gojq.Code MatchRegex *regexp.Regexp NoMatchRegex *regexp.Regexp + Scope RegexpScope } // DefaultVariables are variables available to JQ programs. @@ -123,24 +126,87 @@ func (f *FilterScheme) runJQ(l *line) (bool, error) { return filtered, nil } -// regexpScope determines what fields a regexp should run against. Not implemented yet. -type regexpScope int +// RegexpScope determines what fields a regexp should run against. +type RegexpScope int const ( - regexpScopeUnknown regexpScope = iota - regexpScopeMessage + RegexpScopeMessage = 1 << iota + RegexpScopeKeys + RegexpScopeValues ) +func (s RegexpScope) String() string { + var parts []string + if s&RegexpScopeMessage > 0 { + parts = append(parts, "m") + } + if s&RegexpScopeKeys > 0 { + parts = append(parts, "k") + } + if s&RegexpScopeValues > 0 { + parts = append(parts, "v") + } + return strings.Join(parts, "") +} + +func (s *RegexpScope) UnmarshalText(text []byte) error { + for _, b := range text { + switch b { + case 'm': + *s |= RegexpScopeMessage + case 'k': + *s |= RegexpScopeKeys + case 'v': + *s |= RegexpScopeValues + default: + return fmt.Errorf("unrecognized scope character '%c'; [kmv] are recognized", b) + } + } + return nil +} + +func (s RegexpScope) MarshalFlag() string { return s.String() } +func (s *RegexpScope) UnmarshalFlag(text string) error { return s.UnmarshalText([]byte(text)) } + // runRegexp runs the regexp, returning whether or not it matched. -func runRegexp(rx *regexp.Regexp, l *line, scope regexpScope) bool { - var input string - switch scope { - case regexpScopeUnknown: - panic("unknown regexp scope") - case regexpScopeMessage: - input = l.msg +func runRegexp(rx *regexp.Regexp, l *line, scope RegexpScope) bool { + if scope&RegexpScopeMessage > 0 { + if applyRegexp(rx, l, l.msg) { + return true + } + } + if scope&RegexpScopeKeys > 0 { + for k := range l.fields { + if applyRegexp(rx, l, k) { + return true + } + } } + if scope&RegexpScopeValues > 0 { + var addErr error + for _, v := range l.fields { + j, err := json.Marshal(v) + if err != nil { + // This is very unlikely to happen, but Go code can mutate l.fields + // to produce something unmarshalable. + addErr = err + continue + } + if applyRegexp(rx, l, string(j)) { + return true + } + } + // This is done so that regexps can't match the error message we generate here. + if addErr != nil { + l.fields["jlog_match_marshal_error"] = addErr.Error() + } + } + return false +} +// applyRegexp runs the regexp against the provided input, modifying the line in-place and retruning +// whether the regexp matched. +func applyRegexp(rx *regexp.Regexp, l *line, input string) bool { fields := rx.FindStringSubmatch(input) if len(fields) == 0 { return false @@ -162,12 +228,12 @@ func runRegexp(rx *regexp.Regexp, l *line, scope regexpScope) bool { func (f *FilterScheme) Run(l *line) (bool, error) { rxFiltered := false if rx := f.NoMatchRegex; rx != nil { - if found := runRegexp(rx, l, regexpScopeMessage); found { + if found := runRegexp(rx, l, f.Scope); found { rxFiltered = true } } if rx := f.MatchRegex; rx != nil { - if found := runRegexp(rx, l, regexpScopeMessage); !found { + if found := runRegexp(rx, l, f.Scope); !found { rxFiltered = true } } diff --git a/pkg/parse/filter_test.go b/pkg/parse/filter_test.go index 8366f86..d4eb61b 100644 --- a/pkg/parse/filter_test.go +++ b/pkg/parse/filter_test.go @@ -3,7 +3,9 @@ package parse import ( "os" "path/filepath" + "regexp" "testing" + "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" @@ -283,3 +285,179 @@ func TestAdds(t *testing.T) { }) } } + +func TestRegexp(t *testing.T) { + defaultFields := func(add map[string]string) map[string]any { + result := map[string]any{ + "string": "string", + "map": map[string]any{ + "string": "bar", + "bytes": []byte("baz"), + "map": map[string]any{ + "text": "quux", + "number": 2, + }, + "slice": []any{"text", []byte("binary"), 21, 4.321}, + }, + "slice": []any{"string", []byte("bytes"), 42, 1.234}, + } + for k, v := range add { + result[k] = v + } + return result + } + testData := []struct { + name string + pattern string + scope RegexpScope + wantMatch bool + wantFields map[string]any + }{ + { + name: "message scope", + pattern: `here is a (?Pf(o+)[^[:space:]]+)$`, + scope: RegexpScopeMessage, + wantMatch: true, + wantFields: defaultFields(map[string]string{ + "what": "foobar", + "$2": "oo", + }), + }, + { + name: "message scope, no match", + pattern: `this does not match anything`, + scope: RegexpScopeMessage, + }, + { + name: "key scope", + pattern: `^(st.+)$`, + scope: RegexpScopeKeys, + wantMatch: true, + wantFields: defaultFields(map[string]string{ + "$1": "string", + }), + }, + { + name: "key scope, no match", + pattern: `no key with this name`, + scope: RegexpScopeKeys, + }, + { + name: "value scope", + pattern: `":\["([^"]+)"`, + scope: RegexpScopeValues, + wantMatch: true, + wantFields: defaultFields(map[string]string{ + "$1": "text", + }), + }, + { + name: "value scope, no match", + pattern: `no values that match this`, + scope: RegexpScopeValues, + }, + { + name: "all scopes, targeting message", + pattern: `^(here is a foobar)$`, + scope: RegexpScopeMessage | RegexpScopeKeys | RegexpScopeValues, + wantMatch: true, + wantFields: defaultFields(map[string]string{ + "$1": "here is a foobar", + }), + }, + { + name: "all scopes, targeting keys", + pattern: `^(map)$`, + scope: RegexpScopeMessage | RegexpScopeKeys | RegexpScopeValues, + wantMatch: true, + wantFields: defaultFields(map[string]string{ + "$1": "map", + }), + }, + { + name: "all scopes, targeting values", + pattern: `(\["text",[^\]]+\])`, + scope: RegexpScopeMessage | RegexpScopeKeys | RegexpScopeValues, + wantMatch: true, + wantFields: defaultFields(map[string]string{ + "$1": `["text","YmluYXJ5",21,4.321]`, + }), + }, + { + name: "all scopes, no match", + pattern: `this does not appear anywhere`, + scope: RegexpScopeMessage | RegexpScopeKeys | RegexpScopeValues, + }, + } + + for _, test := range testData { + t.Run(test.name, func(t *testing.T) { + rx := regexp.MustCompile(test.pattern) + var l line + l.time = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) + l.lvl = LevelInfo + l.msg = "here is a foobar" + l.raw = []byte(`{"msg":"here is a foobar"}`) + l.fields = defaultFields(nil) + if test.wantFields == nil { + test.wantFields = defaultFields(nil) + } + + matched := runRegexp(rx, &l, test.scope) + if got, want := matched, test.wantMatch; got != want { + t.Errorf("matches?\n got: %v\n want: %v", got, want) + } + if diff := cmp.Diff(l.fields, test.wantFields, cmpopts.EquateEmpty()); diff != "" { + t.Errorf("fields:\n%s", diff) + } + }) + } +} + +func TestInvalidValues(t *testing.T) { + rx := regexp.MustCompile(`.`) + var l line + l.time = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) + l.lvl = LevelInfo + l.msg = "here is a foobar" + l.raw = []byte(`{"msg":"here is a foobar"}`) + l.fields = map[string]any{ + "foo": func() {}, + } + if got, want := runRegexp(rx, &l, RegexpScopeValues), false; got != want { + t.Errorf("matches with only 'foo'?\n got: %v\n want: %v", got, want) + } + delete(l.fields, "foo") + wantFields := map[string]any{ + "jlog_match_marshal_error": "json: unsupported type: func()", + } + if diff := cmp.Diff(l.fields, wantFields); diff != "" { + t.Errorf("fields:\n%s", diff) + } + + l.fields = map[string]any{ + "foo": func() {}, + "real thing": "this matches", + } + if got, want := runRegexp(rx, &l, RegexpScopeValues), true; got != want { + t.Errorf("matches with 'real thing'?\n got: %v\n want: %v", got, want) + } +} + +func TestScopeParsing(t *testing.T) { + for want := 0; want < RegexpScopeKeys|RegexpScopeValues|RegexpScopeMessage; want++ { + var got RegexpScope + s := RegexpScope(want).MarshalFlag() + if err := got.UnmarshalFlag(s); err != nil { + t.Fatalf("unmarshal scope %q (%v): %v", s, want, err) + } + if got != RegexpScope(want) { + t.Errorf("scope %q (%v): unmarshals to %v", s, want, got) + } + } + + var x RegexpScope + if err := x.UnmarshalFlag("this is invalid"); err == nil { + t.Errorf("expected error") + } +} diff --git a/pkg/parse/parse_test.go b/pkg/parse/parse_test.go index f2260d4..c56cc0a 100644 --- a/pkg/parse/parse_test.go +++ b/pkg/parse/parse_test.go @@ -1052,6 +1052,7 @@ func TestFullLog(t *testing.T) { testData := []struct { name string jq, matchregex, nomatchregex string + scope RegexpScope beforecontext, aftercontext int input []string wantOutput []string @@ -1151,6 +1152,7 @@ func TestFullLog(t *testing.T) { name: "regex match", input: testLog, matchregex: `(?Pstarted|finished) (?Pincoming|outgoing)`, + scope: RegexpScopeMessage, wantOutput: []string{ "DEBUG Jan 1 00:00:10.000000 started incoming request dir:incoming request_id:1234 route:/example state:started", "DEBUG .010000 started incoming request dir:↑ request_id:4321 route:/test state:↑", @@ -1165,6 +1167,7 @@ func TestFullLog(t *testing.T) { input: testLog, beforecontext: 2, matchregex: `(?Pstarted|finished) (?Pincoming|outgoing)`, + scope: RegexpScopeMessage, wantOutput: []string{ "DEBUG Jan 1 00:00:01.000002 reading config file:/tmp/config-overlay.json", "INFO .002000 serving port:8080", @@ -1181,6 +1184,7 @@ func TestFullLog(t *testing.T) { name: "regex match with jq", input: testLog, matchregex: `(?Pstarted|finished) (?Pincoming|outgoing)`, + scope: RegexpScopeMessage, jq: `select(.state == "started") | {state, dir}`, wantOutput: []string{ "DEBUG Jan 1 00:00:10.000000 started incoming request dir:incoming state:started", @@ -1192,6 +1196,7 @@ func TestFullLog(t *testing.T) { name: "regex match with jq, with context", input: testLog, matchregex: `(?Pstarted|finished) (?Pincoming|outgoing)`, + scope: RegexpScopeMessage, jq: `select(.state == "started") | {}`, beforecontext: 1, aftercontext: 1, @@ -1212,6 +1217,7 @@ func TestFullLog(t *testing.T) { beforecontext: 1, aftercontext: 1, nomatchregex: `(started|finished) incoming request`, + scope: RegexpScopeMessage, wantOutput: []string{ "INFO Jan 1 00:00:01.000000 start", "DEBUG .000001 reading config file:/tmp/config.json", @@ -1234,6 +1240,7 @@ func TestFullLog(t *testing.T) { beforecontext: 1, aftercontext: 1, nomatchregex: `(started|finished) incoming request`, + scope: RegexpScopeMessage, jq: `if ."$1" != null then {"$1"} else {} end`, wantOutput: []string{ "INFO Jan 1 00:00:01.000000 start", @@ -1251,12 +1258,23 @@ func TestFullLog(t *testing.T) { "INFO Jan 1 00:01:55.000000 connections drained", }, }, + { + name: "regex match in fields", + input: testLog, + matchregex: `200`, + scope: RegexpScopeValues, + wantOutput: []string{ + "DEBUG Jan 1 00:00:10.020000 finished incoming request request_id:1234 response_code:200 route:/example", + "DEBUG .031000 finished incoming request request_id:5432 response_code:↑ route:↑", + }, + }, { name: "no output, regex", input: testLog, beforecontext: 1, aftercontext: 1, matchregex: "this matches nothing", + scope: RegexpScopeMessage, }, { name: "no output, regex nomatch", @@ -1264,6 +1282,7 @@ func TestFullLog(t *testing.T) { beforecontext: 1, aftercontext: 1, nomatchregex: ".*", + scope: RegexpScopeMessage, }, { name: "jq", @@ -1286,6 +1305,7 @@ func TestFullLog(t *testing.T) { if err := fs.AddNoMatchRegex(test.nomatchregex); err != nil { t.Fatal(err) } + fs.Scope = test.scope r := strings.NewReader(strings.Join(test.input, "\n")) w := new(bytes.Buffer)