Skip to content

Commit

Permalink
filter: allow regexps to match any combination of the message, field …
Browse files Browse the repository at this point in the history
…keys, and field values
  • Loading branch information
jrockway committed Jul 9, 2022
1 parent 06a87fc commit cae9e59
Show file tree
Hide file tree
Showing 4 changed files with 288 additions and 20 deletions.
18 changes: 11 additions & 7 deletions cmd/internal/jlog/jlog.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ type Output struct {
}

type General struct {
MatchRegex string `short:"g" long:"regex" description:"A regular expression that removes lines from the output that don't match, like grep."`
NoMatchRegex string `short:"G" long:"no-regex" description:"A regular expression that removes lines from the output that DO match, like 'grep -v'."`
JQ string `short:"e" long:"jq" description:"A jq program to run on each record in the processed input; use this to ignore certain lines, add fields, etc. Hint: 'select(condition)' will remove lines that don't match 'condition'."`
JQSearchPath []string `long:"jq-search-path" env:"JLOG_JQ_SEARCH_PATH" description:"A list of directories in which to search for JQ modules. A path entry named (not merely ending in) .jq is automatically loaded. When set through the environment, use ':' as the delimiter (like $PATH)." default:"~/.jq" default:"~/.jlog/jq/.jq" default:"~/.jlog/jq" env-delim:":"` //nolint
NoColor bool `short:"M" long:"no-color" description:"Disable the use of color." env:"JLOG_FORCE_MONOCHROME"`
NoMonochrome bool `short:"c" long:"no-monochrome" description:"Force the use of color." env:"JLOG_FORCE_COLOR"`
Profile string `long:"profile" description:"If set, collect a CPU profile and write it to this file."`
MatchRegex string `short:"g" long:"regex" description:"A regular expression that removes lines from the output that don't match, like grep."`
NoMatchRegex string `short:"G" long:"no-regex" description:"A regular expression that removes lines from the output that DO match, like 'grep -v'."`
RegexpScope *parse.RegexpScope `short:"S" long:"regex-scope" description:"Where to apply the provided regex; (m)essage, (k)eys, or (v)alues. 'kmv' looks in all scopes, 'k' only searches keys, etc." default:"kmv"`
JQ string `short:"e" long:"jq" description:"A jq program to run on each record in the processed input; use this to ignore certain lines, add fields, etc. Hint: 'select(condition)' will remove lines that don't match 'condition'."`
JQSearchPath []string `long:"jq-search-path" env:"JLOG_JQ_SEARCH_PATH" description:"A list of directories in which to search for JQ modules. A path entry named (not merely ending in) .jq is automatically loaded. When set through the environment, use ':' as the delimiter (like $PATH)." default:"~/.jq" default:"~/.jlog/jq/.jq" default:"~/.jlog/jq" env-delim:":"` //nolint
NoColor bool `short:"M" long:"no-color" description:"Disable the use of color." env:"JLOG_FORCE_MONOCHROME"`
NoMonochrome bool `short:"c" long:"no-monochrome" description:"Force the use of color." env:"JLOG_FORCE_COLOR"`
Profile string `long:"profile" description:"If set, collect a CPU profile and write it to this file."`

Version bool `short:"v" long:"version" description:"Print version information and exit."`
}
Expand Down Expand Up @@ -182,6 +183,9 @@ func NewFilterScheme(gen General) (*parse.FilterScheme, error) { //nolint
if err := fsch.AddJQ(gen.JQ, &parse.JQOptions{SearchPath: gen.JQSearchPath}); err != nil {
return nil, fmt.Errorf("adding JQ: %v", err)
}
if gen.RegexpScope != nil {
fsch.Scope = *gen.RegexpScope
}
return fsch, nil
}

Expand Down
92 changes: 79 additions & 13 deletions pkg/parse/filter.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package parse

import (
"encoding/json"
"errors"
"fmt"
"os"
"regexp"
"strings"

"github.com/itchyny/gojq"
)
Expand All @@ -14,6 +16,7 @@ type FilterScheme struct {
JQ *gojq.Code
MatchRegex *regexp.Regexp
NoMatchRegex *regexp.Regexp
Scope RegexpScope
}

// DefaultVariables are variables available to JQ programs.
Expand Down Expand Up @@ -123,24 +126,87 @@ func (f *FilterScheme) runJQ(l *line) (bool, error) {
return filtered, nil
}

// regexpScope determines what fields a regexp should run against. Not implemented yet.
type regexpScope int
// RegexpScope determines what fields a regexp should run against.
type RegexpScope int

const (
regexpScopeUnknown regexpScope = iota
regexpScopeMessage
RegexpScopeMessage = 1 << iota
RegexpScopeKeys
RegexpScopeValues
)

func (s RegexpScope) String() string {
var parts []string
if s&RegexpScopeMessage > 0 {
parts = append(parts, "m")
}
if s&RegexpScopeKeys > 0 {
parts = append(parts, "k")
}
if s&RegexpScopeValues > 0 {
parts = append(parts, "v")
}
return strings.Join(parts, "")
}

func (s *RegexpScope) UnmarshalText(text []byte) error {
for _, b := range text {
switch b {
case 'm':
*s |= RegexpScopeMessage
case 'k':
*s |= RegexpScopeKeys
case 'v':
*s |= RegexpScopeValues
default:
return fmt.Errorf("unrecognized scope character '%c'; [kmv] are recognized", b)
}
}
return nil
}

func (s RegexpScope) MarshalFlag() string { return s.String() }
func (s *RegexpScope) UnmarshalFlag(text string) error { return s.UnmarshalText([]byte(text)) }

// runRegexp runs the regexp, returning whether or not it matched.
func runRegexp(rx *regexp.Regexp, l *line, scope regexpScope) bool {
var input string
switch scope {
case regexpScopeUnknown:
panic("unknown regexp scope")
case regexpScopeMessage:
input = l.msg
func runRegexp(rx *regexp.Regexp, l *line, scope RegexpScope) bool {
if scope&RegexpScopeMessage > 0 {
if applyRegexp(rx, l, l.msg) {
return true
}
}
if scope&RegexpScopeKeys > 0 {
for k := range l.fields {
if applyRegexp(rx, l, k) {
return true
}
}
}
if scope&RegexpScopeValues > 0 {
var addErr error
for _, v := range l.fields {
j, err := json.Marshal(v)
if err != nil {
// This is very unlikely to happen, but Go code can mutate l.fields
// to produce something unmarshalable.
addErr = err
continue
}
if applyRegexp(rx, l, string(j)) {
return true
}
}
// This is done so that regexps can't match the error message we generate here.
if addErr != nil {
l.fields["jlog_match_marshal_error"] = addErr.Error()
}
}
return false
}

// applyRegexp runs the regexp against the provided input, modifying the line in-place and retruning
// whether the regexp matched.
func applyRegexp(rx *regexp.Regexp, l *line, input string) bool {
fields := rx.FindStringSubmatch(input)
if len(fields) == 0 {
return false
Expand All @@ -162,12 +228,12 @@ func runRegexp(rx *regexp.Regexp, l *line, scope regexpScope) bool {
func (f *FilterScheme) Run(l *line) (bool, error) {
rxFiltered := false
if rx := f.NoMatchRegex; rx != nil {
if found := runRegexp(rx, l, regexpScopeMessage); found {
if found := runRegexp(rx, l, f.Scope); found {
rxFiltered = true
}
}
if rx := f.MatchRegex; rx != nil {
if found := runRegexp(rx, l, regexpScopeMessage); !found {
if found := runRegexp(rx, l, f.Scope); !found {
rxFiltered = true
}
}
Expand Down
178 changes: 178 additions & 0 deletions pkg/parse/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package parse
import (
"os"
"path/filepath"
"regexp"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
Expand Down Expand Up @@ -283,3 +285,179 @@ func TestAdds(t *testing.T) {
})
}
}

func TestRegexp(t *testing.T) {
defaultFields := func(add map[string]string) map[string]any {
result := map[string]any{
"string": "string",
"map": map[string]any{
"string": "bar",
"bytes": []byte("baz"),
"map": map[string]any{
"text": "quux",
"number": 2,
},
"slice": []any{"text", []byte("binary"), 21, 4.321},
},
"slice": []any{"string", []byte("bytes"), 42, 1.234},
}
for k, v := range add {
result[k] = v
}
return result
}
testData := []struct {
name string
pattern string
scope RegexpScope
wantMatch bool
wantFields map[string]any
}{
{
name: "message scope",
pattern: `here is a (?P<what>f(o+)[^[:space:]]+)$`,
scope: RegexpScopeMessage,
wantMatch: true,
wantFields: defaultFields(map[string]string{
"what": "foobar",
"$2": "oo",
}),
},
{
name: "message scope, no match",
pattern: `this does not match anything`,
scope: RegexpScopeMessage,
},
{
name: "key scope",
pattern: `^(st.+)$`,
scope: RegexpScopeKeys,
wantMatch: true,
wantFields: defaultFields(map[string]string{
"$1": "string",
}),
},
{
name: "key scope, no match",
pattern: `no key with this name`,
scope: RegexpScopeKeys,
},
{
name: "value scope",
pattern: `":\["([^"]+)"`,
scope: RegexpScopeValues,
wantMatch: true,
wantFields: defaultFields(map[string]string{
"$1": "text",
}),
},
{
name: "value scope, no match",
pattern: `no values that match this`,
scope: RegexpScopeValues,
},
{
name: "all scopes, targeting message",
pattern: `^(here is a foobar)$`,
scope: RegexpScopeMessage | RegexpScopeKeys | RegexpScopeValues,
wantMatch: true,
wantFields: defaultFields(map[string]string{
"$1": "here is a foobar",
}),
},
{
name: "all scopes, targeting keys",
pattern: `^(map)$`,
scope: RegexpScopeMessage | RegexpScopeKeys | RegexpScopeValues,
wantMatch: true,
wantFields: defaultFields(map[string]string{
"$1": "map",
}),
},
{
name: "all scopes, targeting values",
pattern: `(\["text",[^\]]+\])`,
scope: RegexpScopeMessage | RegexpScopeKeys | RegexpScopeValues,
wantMatch: true,
wantFields: defaultFields(map[string]string{
"$1": `["text","YmluYXJ5",21,4.321]`,
}),
},
{
name: "all scopes, no match",
pattern: `this does not appear anywhere`,
scope: RegexpScopeMessage | RegexpScopeKeys | RegexpScopeValues,
},
}

for _, test := range testData {
t.Run(test.name, func(t *testing.T) {
rx := regexp.MustCompile(test.pattern)
var l line
l.time = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
l.lvl = LevelInfo
l.msg = "here is a foobar"
l.raw = []byte(`{"msg":"here is a foobar"}`)
l.fields = defaultFields(nil)
if test.wantFields == nil {
test.wantFields = defaultFields(nil)
}

matched := runRegexp(rx, &l, test.scope)
if got, want := matched, test.wantMatch; got != want {
t.Errorf("matches?\n got: %v\n want: %v", got, want)
}
if diff := cmp.Diff(l.fields, test.wantFields, cmpopts.EquateEmpty()); diff != "" {
t.Errorf("fields:\n%s", diff)
}
})
}
}

func TestInvalidValues(t *testing.T) {
rx := regexp.MustCompile(`.`)
var l line
l.time = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
l.lvl = LevelInfo
l.msg = "here is a foobar"
l.raw = []byte(`{"msg":"here is a foobar"}`)
l.fields = map[string]any{
"foo": func() {},
}
if got, want := runRegexp(rx, &l, RegexpScopeValues), false; got != want {
t.Errorf("matches with only 'foo'?\n got: %v\n want: %v", got, want)
}
delete(l.fields, "foo")
wantFields := map[string]any{
"jlog_match_marshal_error": "json: unsupported type: func()",
}
if diff := cmp.Diff(l.fields, wantFields); diff != "" {
t.Errorf("fields:\n%s", diff)
}

l.fields = map[string]any{
"foo": func() {},
"real thing": "this matches",
}
if got, want := runRegexp(rx, &l, RegexpScopeValues), true; got != want {
t.Errorf("matches with 'real thing'?\n got: %v\n want: %v", got, want)
}
}

func TestScopeParsing(t *testing.T) {
for want := 0; want < RegexpScopeKeys|RegexpScopeValues|RegexpScopeMessage; want++ {
var got RegexpScope
s := RegexpScope(want).MarshalFlag()
if err := got.UnmarshalFlag(s); err != nil {
t.Fatalf("unmarshal scope %q (%v): %v", s, want, err)
}
if got != RegexpScope(want) {
t.Errorf("scope %q (%v): unmarshals to %v", s, want, got)
}
}

var x RegexpScope
if err := x.UnmarshalFlag("this is invalid"); err == nil {
t.Errorf("expected error")
}
}
Loading

0 comments on commit cae9e59

Please sign in to comment.