diff --git a/arshal.go b/arshal.go index 04d3bb3..a3613d4 100644 --- a/arshal.go +++ b/arshal.go @@ -9,12 +9,19 @@ import ( "errors" "io" "reflect" + "slices" "sync" + "github.com/go-json-experiment/json/internal" "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonopts" + "github.com/go-json-experiment/json/internal/jsonwire" + "github.com/go-json-experiment/json/jsontext" ) +// export exposes internal functionality of the "jsontext" package. +var export = jsontext.Internal.Export(&internal.AllowInternalUse) + var structOptionsPool = &sync.Pool{New: func() any { return new(jsonopts.Struct) }} func getStructOptions() *jsonopts.Struct { @@ -146,11 +153,12 @@ func putStructOptions(o *jsonopts.Struct) { // JSON cannot represent cyclic data structures and Marshal does not handle them. // Passing cyclic structures will result in an error. func Marshal(in any, opts ...Options) (out []byte, err error) { - enc := getBufferedEncoder(opts...) - defer putBufferedEncoder(enc) - enc.options.Flags.Set(jsonflags.OmitTopLevelNewline | 1) - err = marshalEncode(enc, in, &enc.options) - return bytes.Clone(enc.buf), err + enc := export.GetBufferedEncoder(opts...) + defer export.PutBufferedEncoder(enc) + xe := export.Encoder(enc) + xe.Flags.Set(jsonflags.OmitTopLevelNewline | 1) + err = marshalEncode(enc, in, &xe.Struct) + return bytes.Clone(xe.Buf), err } // MarshalWrite serializes a Go value into an [io.Writer] according to the provided @@ -158,10 +166,11 @@ func Marshal(in any, opts ...Options) (out []byte, err error) { // It does not terminate the output with a newline. // See [Marshal] for details about the conversion of a Go value into JSON. func MarshalWrite(out io.Writer, in any, opts ...Options) (err error) { - enc := getStreamingEncoder(out, opts...) - defer putStreamingEncoder(enc) - enc.options.Flags.Set(jsonflags.OmitTopLevelNewline | 1) - return marshalEncode(enc, in, &enc.options) + enc := export.GetStreamingEncoder(out, opts...) + defer export.PutStreamingEncoder(enc) + xe := export.Encoder(enc) + xe.Flags.Set(jsonflags.OmitTopLevelNewline | 1) + return marshalEncode(enc, in, &xe.Struct) } // MarshalEncode serializes a Go value into an [Encoder] according to the provided @@ -173,7 +182,8 @@ func MarshalEncode(out *Encoder, in any, opts ...Options) (err error) { mo := getStructOptions() defer putStructOptions(mo) mo.Join(opts...) - mo.CopyCoderOptions(&out.options) + xe := export.Encoder(out) + mo.CopyCoderOptions(&xe.Struct) return marshalEncode(out, in, mo) } @@ -198,8 +208,9 @@ func marshalEncode(out *Encoder, in any, mo *jsonopts.Struct) (err error) { marshal, _ = mo.Marshalers.(*Marshalers).lookup(marshal, t) } if err := marshal(out, va, mo); err != nil { - if !out.options.Flags.Get(jsonflags.AllowDuplicateNames) { - out.tokens.invalidateDisabledNamespaces() + xe := export.Encoder(out) + if !xe.Flags.Get(jsonflags.AllowDuplicateNames) { + xe.Tokens.InvalidateDisabledNamespaces() } return err } @@ -354,9 +365,10 @@ func marshalEncode(out *Encoder, in any, mo *jsonopts.Struct) (err error) { // For JSON objects, the input object is merged into the destination value // where matching object members recursively apply merge semantics. func Unmarshal(in []byte, out any, opts ...Options) (err error) { - dec := getBufferedDecoder(in, opts...) - defer putBufferedDecoder(dec) - return unmarshalFull(dec, out, &dec.options) + dec := export.GetBufferedDecoder(in, opts...) + defer export.PutBufferedDecoder(dec) + xd := export.Decoder(dec) + return unmarshalFull(dec, out, &xd.Struct) } // UnmarshalRead deserializes a Go value from an [io.Reader] according to the @@ -366,15 +378,16 @@ func Unmarshal(in []byte, out any, opts ...Options) (err error) { // without reporting an error for EOF. The output must be a non-nil pointer. // See [Unmarshal] for details about the conversion of JSON into a Go value. func UnmarshalRead(in io.Reader, out any, opts ...Options) (err error) { - dec := getStreamingDecoder(in, opts...) - defer putStreamingDecoder(dec) - return unmarshalFull(dec, out, &dec.options) + dec := export.GetStreamingDecoder(in, opts...) + defer export.PutStreamingDecoder(dec) + xd := export.Decoder(dec) + return unmarshalFull(dec, out, &xd.Struct) } func unmarshalFull(in *Decoder, out any, uo *jsonopts.Struct) error { switch err := unmarshalDecode(in, out, uo); err { case nil: - return in.checkEOF() + return export.Decoder(in).CheckEOF() case io.EOF: return io.ErrUnexpectedEOF default: @@ -394,7 +407,8 @@ func UnmarshalDecode(in *Decoder, out any, opts ...Options) (err error) { uo := getStructOptions() defer putStructOptions(uo) uo.Join(opts...) - uo.CopyCoderOptions(&in.options) + xd := export.Decoder(in) + uo.CopyCoderOptions(&xd.Struct) return unmarshalDecode(in, out, uo) } @@ -420,8 +434,9 @@ func unmarshalDecode(in *Decoder, out any, uo *jsonopts.Struct) (err error) { unmarshal, _ = uo.Unmarshalers.(*Unmarshalers).lookup(unmarshal, t) } if err := unmarshal(in, va, uo); err != nil { - if !in.options.Flags.Get(jsonflags.AllowDuplicateNames) { - in.tokens.invalidateDisabledNamespaces() + xd := export.Decoder(in) + if !xd.Flags.Get(jsonflags.AllowDuplicateNames) { + xd.Tokens.InvalidateDisabledNamespaces() } return err } @@ -472,3 +487,29 @@ func lookupArshaler(t reflect.Type) *arshaler { v, _ := lookupArshalerCache.LoadOrStore(t, fncs) return v.(*arshaler) } + +var stringsPools = &sync.Pool{New: func() any { return new(stringSlice) }} + +type stringSlice []string + +// getStrings returns a non-nil pointer to a slice with length n. +func getStrings(n int) *stringSlice { + s := stringsPools.Get().(*stringSlice) + if cap(*s) < n { + *s = make([]string, n) + } + *s = (*s)[:n] + return s +} + +func putStrings(s *stringSlice) { + if cap(*s) > 1<<10 { + *s = nil // avoid pinning arbitrarily large amounts of memory + } + stringsPools.Put(s) +} + +// Sort sorts the string slice according to RFC 8785, section 3.2.3. +func (ss *stringSlice) Sort() { + slices.SortFunc(*ss, func(x, y string) int { return jsonwire.CompareUTF16(x, y) }) +} diff --git a/arshal_any.go b/arshal_any.go index 0057583..e7192e7 100644 --- a/arshal_any.go +++ b/arshal_any.go @@ -9,6 +9,7 @@ import ( "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonopts" + "github.com/go-json-experiment/json/internal/jsonwire" ) // This file contains an optimized marshal and unmarshal implementation @@ -48,8 +49,9 @@ func unmarshalValueAny(dec *Decoder, uo *jsonopts.Struct) (any, error) { case '[': return unmarshalArrayAny(dec, uo) default: - var flags valueFlags - val, err := dec.readValue(&flags) + xd := export.Decoder(dec) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) if err != nil { return nil, err } @@ -61,13 +63,13 @@ func unmarshalValueAny(dec *Decoder, uo *jsonopts.Struct) (any, error) { case 't': return true, nil case '"': - val = unescapeStringMayCopy(val, flags.isVerbatim()) - if dec.stringCache == nil { - dec.stringCache = new(stringCache) + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + if xd.StringCache == nil { + xd.StringCache = new(stringCache) } - return dec.stringCache.make(val), nil + return makeString(xd.StringCache, val), nil case '0': - fv, _ := parseFloat(val, 64) // ignore error since readValue guarantees val is valid + fv, _ := jsonwire.ParseFloat(val, 64) // ignore error since readValue guarantees val is valid return fv, nil default: panic("BUG: invalid kind: " + k.String()) @@ -77,12 +79,13 @@ func unmarshalValueAny(dec *Decoder, uo *jsonopts.Struct) (any, error) { func marshalObjectAny(enc *Encoder, obj map[string]any, mo *jsonopts.Struct) error { // Check for cycles. - if enc.tokens.depth() > startDetectingCyclesAfter { + xe := export.Encoder(enc) + if xe.Tokens.Depth() > startDetectingCyclesAfter { v := reflect.ValueOf(obj) - if err := enc.seenPointers.visit(v); err != nil { + if err := visitPointer(&xe.SeenPointers, v); err != nil { return err } - defer enc.seenPointers.leave(v) + defer leavePointer(&xe.SeenPointers, v) } // Handle empty maps. @@ -91,12 +94,12 @@ func marshalObjectAny(enc *Encoder, obj map[string]any, mo *jsonopts.Struct) err return enc.WriteToken(Null) } // Optimize for marshaling an empty map without any preceding whitespace. - if !enc.options.Flags.Get(jsonflags.Expand) && !enc.tokens.last.needObjectName() { - enc.buf = enc.tokens.mayAppendDelim(enc.buf, '{') - enc.buf = append(enc.buf, "{}"...) - enc.tokens.last.increment() - if enc.needFlush() { - return enc.flush() + if !xe.Flags.Get(jsonflags.Expand) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = xe.Tokens.MayAppendDelim(xe.Buf, '{') + xe.Buf = append(xe.Buf, "{}"...) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() } return nil } @@ -107,8 +110,8 @@ func marshalObjectAny(enc *Encoder, obj map[string]any, mo *jsonopts.Struct) err } // A Go map guarantees that each entry has a unique key // The only possibility of duplicates is due to invalid UTF-8. - if !enc.options.Flags.Get(jsonflags.AllowInvalidUTF8) { - enc.tokens.last.disableNamespace() + if !xe.Flags.Get(jsonflags.AllowInvalidUTF8) { + xe.Tokens.Last.DisableNamespace() } if !mo.Flags.Get(jsonflags.Deterministic) || len(obj) <= 1 { for name, val := range obj { @@ -153,11 +156,12 @@ func unmarshalObjectAny(dec *Decoder, uo *jsonopts.Struct) (map[string]any, erro case 'n': return nil, nil case '{': + xd := export.Decoder(dec) obj := make(map[string]any) // A Go map guarantees that each entry has a unique key // The only possibility of duplicates is due to invalid UTF-8. - if !dec.options.Flags.Get(jsonflags.AllowInvalidUTF8) { - dec.tokens.last.disableNamespace() + if !xd.Flags.Get(jsonflags.AllowInvalidUTF8) { + xd.Tokens.Last.DisableNamespace() } for dec.PeekKind() != '}' { tok, err := dec.ReadToken() @@ -168,9 +172,9 @@ func unmarshalObjectAny(dec *Decoder, uo *jsonopts.Struct) (map[string]any, erro // Manually check for duplicate names. if _, ok := obj[name]; ok { - name := dec.previousBuffer() - err := newDuplicateNameError(name) - return obj, err.withOffset(dec.InputOffset() - len64(name)) + name := xd.PreviousBuffer() + err := export.NewDuplicateNameError(name, dec.InputOffset()-len64(name)) + return obj, err } val, err := unmarshalValueAny(dec, uo) @@ -189,12 +193,13 @@ func unmarshalObjectAny(dec *Decoder, uo *jsonopts.Struct) (map[string]any, erro func marshalArrayAny(enc *Encoder, arr []any, mo *jsonopts.Struct) error { // Check for cycles. - if enc.tokens.depth() > startDetectingCyclesAfter { + xe := export.Encoder(enc) + if xe.Tokens.Depth() > startDetectingCyclesAfter { v := reflect.ValueOf(arr) - if err := enc.seenPointers.visit(v); err != nil { + if err := visitPointer(&xe.SeenPointers, v); err != nil { return err } - defer enc.seenPointers.leave(v) + defer leavePointer(&xe.SeenPointers, v) } // Handle empty slices. @@ -203,12 +208,12 @@ func marshalArrayAny(enc *Encoder, arr []any, mo *jsonopts.Struct) error { return enc.WriteToken(Null) } // Optimize for marshaling an empty slice without any preceding whitespace. - if !enc.options.Flags.Get(jsonflags.Expand) && !enc.tokens.last.needObjectName() { - enc.buf = enc.tokens.mayAppendDelim(enc.buf, '[') - enc.buf = append(enc.buf, "[]"...) - enc.tokens.last.increment() - if enc.needFlush() { - return enc.flush() + if !xe.Flags.Get(jsonflags.Expand) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = xe.Tokens.MayAppendDelim(xe.Buf, '[') + xe.Buf = append(xe.Buf, "[]"...) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() } return nil } diff --git a/arshal_default.go b/arshal_default.go index 722db53..f86e795 100644 --- a/arshal_default.go +++ b/arshal_default.go @@ -19,6 +19,7 @@ import ( "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonopts" + "github.com/go-json-experiment/json/internal/jsonwire" ) // optimizeCommon specifies whether to use optimizations targeted for certain @@ -41,7 +42,7 @@ var ( const startDetectingCyclesAfter = 1000 -type seenPointers map[typedPointer]struct{} +type seenPointers = map[any]struct{} type typedPointer struct { typ reflect.Type @@ -49,20 +50,20 @@ type typedPointer struct { len int // remember slice length to avoid false positives } -// visit visits pointer p of type t, reporting an error if seen before. +// visitPointer visits pointer p of type t, reporting an error if seen before. // If successfully visited, then the caller must eventually call leave. -func (m *seenPointers) visit(v reflect.Value) error { +func visitPointer(m *seenPointers, v reflect.Value) error { p := typedPointer{v.Type(), v.UnsafePointer(), sliceLen(v)} if _, ok := (*m)[p]; ok { return &SemanticError{action: "marshal", GoType: p.typ, Err: errors.New("encountered a cycle")} } if *m == nil { - *m = make(map[typedPointer]struct{}) + *m = make(seenPointers) } (*m)[p] = struct{}{} return nil } -func (m *seenPointers) leave(v reflect.Value) { +func leavePointer(m *seenPointers, v reflect.Value) { p := typedPointer{v.Type(), v.UnsafePointer(), sliceLen(v)} delete(*m, p) } @@ -118,21 +119,22 @@ func makeDefaultArshaler(t reflect.Type) *arshaler { func makeBoolArshaler(t reflect.Type) *arshaler { var fncs arshaler fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { return newInvalidFormatError("marshal", t, mo.Format) } // Optimize for marshaling without preceding whitespace. - if optimizeCommon && !enc.options.Flags.Get(jsonflags.Expand) && !enc.tokens.last.needObjectName() { - enc.buf = enc.tokens.mayAppendDelim(enc.buf, 't') + if optimizeCommon && !xe.Flags.Get(jsonflags.Expand) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = xe.Tokens.MayAppendDelim(xe.Buf, 't') if va.Bool() { - enc.buf = append(enc.buf, "true"...) + xe.Buf = append(xe.Buf, "true"...) } else { - enc.buf = append(enc.buf, "false"...) + xe.Buf = append(xe.Buf, "false"...) } - enc.tokens.last.increment() - if enc.needFlush() { - return enc.flush() + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() } return nil } @@ -140,7 +142,8 @@ func makeBoolArshaler(t reflect.Type) *arshaler { return enc.WriteToken(Bool(va.Bool())) } fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { return newInvalidFormatError("unmarshal", t, uo.Format) } tok, err := dec.ReadToken() @@ -164,17 +167,19 @@ func makeBoolArshaler(t reflect.Type) *arshaler { func makeStringArshaler(t reflect.Type) *arshaler { var fncs arshaler fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { return newInvalidFormatError("marshal", t, mo.Format) } return enc.WriteToken(String(va.String())) } fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { return newInvalidFormatError("unmarshal", t, uo.Format) } - var flags valueFlags - val, err := dec.readValue(&flags) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) if err != nil { return err } @@ -184,11 +189,11 @@ func makeStringArshaler(t reflect.Type) *arshaler { va.SetString("") return nil case '"': - val = unescapeStringMayCopy(val, flags.isVerbatim()) - if dec.stringCache == nil { - dec.stringCache = new(stringCache) + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) + if xd.StringCache == nil { + xd.StringCache = new(stringCache) } - str := dec.stringCache.make(val) + str := makeString(xd.StringCache, val) va.SetString(str) return nil } @@ -224,8 +229,9 @@ func makeBytesArshaler(t reflect.Type, fncs *arshaler) *arshaler { // NOTE: This handles both []byte and [N]byte. marshalDefault := fncs.marshal fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) encode, encodedLen := encodeBase64, encodedLenBase64 - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { switch mo.Format { case "base64": encode, encodedLen = encodeBase64, encodedLenBase64 @@ -263,8 +269,9 @@ func makeBytesArshaler(t reflect.Type, fncs *arshaler) *arshaler { } unmarshalDefault := fncs.unmarshal fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) decode, decodedLen, encodedLen := decodeBase64, decodedLenBase64, encodedLenBase64 - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { switch uo.Format { case "base64": decode, decodedLen, encodedLen = decodeBase64, decodedLenBase64, encodedLenBase64 @@ -283,8 +290,8 @@ func makeBytesArshaler(t reflect.Type, fncs *arshaler) *arshaler { return newInvalidFormatError("unmarshal", t, uo.Format) } } - var flags valueFlags - val, err := dec.readValue(&flags) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) if err != nil { return err } @@ -294,7 +301,7 @@ func makeBytesArshaler(t reflect.Type, fncs *arshaler) *arshaler { va.SetZero() return nil case '"': - val = unescapeStringMayCopy(val, flags.isVerbatim()) + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) // For base64 and base32, decodedLen computes the maximum output size // when given the original input size. To compute the exact size, @@ -343,30 +350,32 @@ func makeIntArshaler(t reflect.Type) *arshaler { var fncs arshaler bits := t.Bits() fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { return newInvalidFormatError("marshal", t, mo.Format) } // Optimize for marshaling without preceding whitespace or string escaping. - if optimizeCommon && !enc.options.Flags.Get(jsonflags.Expand) && !mo.Flags.Get(jsonflags.StringifyNumbers) && !enc.tokens.last.needObjectName() { - enc.buf = enc.tokens.mayAppendDelim(enc.buf, '0') - enc.buf = strconv.AppendInt(enc.buf, va.Int(), 10) - enc.tokens.last.increment() - if enc.needFlush() { - return enc.flush() + if optimizeCommon && !xe.Flags.Get(jsonflags.Expand) && !mo.Flags.Get(jsonflags.StringifyNumbers) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = xe.Tokens.MayAppendDelim(xe.Buf, '0') + xe.Buf = strconv.AppendInt(xe.Buf, va.Int(), 10) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() } return nil } x := math.Float64frombits(uint64(va.Int())) - return enc.writeNumber(x, rawIntNumber, mo.Flags.Get(jsonflags.StringifyNumbers)) + return xe.WriteNumber(x, 'i', mo.Flags.Get(jsonflags.StringifyNumbers)) } fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { return newInvalidFormatError("unmarshal", t, uo.Format) } - var flags valueFlags - val, err := dec.readValue(&flags) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) if err != nil { return err } @@ -379,7 +388,7 @@ func makeIntArshaler(t reflect.Type) *arshaler { if !uo.Flags.Get(jsonflags.StringifyNumbers) { break } - val = unescapeStringMayCopy(val, flags.isVerbatim()) + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) fallthrough case '0': var negOffset int @@ -387,7 +396,7 @@ func makeIntArshaler(t reflect.Type) *arshaler { if neg { negOffset = 1 } - n, ok := parseDecUint(val[negOffset:]) + n, ok := jsonwire.ParseUint(val[negOffset:]) maxInt := uint64(1) << (bits - 1) overflow := (neg && n > maxInt) || (!neg && n > maxInt-1) if !ok { @@ -417,30 +426,32 @@ func makeUintArshaler(t reflect.Type) *arshaler { var fncs arshaler bits := t.Bits() fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { return newInvalidFormatError("marshal", t, mo.Format) } // Optimize for marshaling without preceding whitespace or string escaping. - if optimizeCommon && !enc.options.Flags.Get(jsonflags.Expand) && !mo.Flags.Get(jsonflags.StringifyNumbers) && !enc.tokens.last.needObjectName() { - enc.buf = enc.tokens.mayAppendDelim(enc.buf, '0') - enc.buf = strconv.AppendUint(enc.buf, va.Uint(), 10) - enc.tokens.last.increment() - if enc.needFlush() { - return enc.flush() + if optimizeCommon && !xe.Flags.Get(jsonflags.Expand) && !mo.Flags.Get(jsonflags.StringifyNumbers) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = xe.Tokens.MayAppendDelim(xe.Buf, '0') + xe.Buf = strconv.AppendUint(xe.Buf, va.Uint(), 10) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() } return nil } x := math.Float64frombits(va.Uint()) - return enc.writeNumber(x, rawUintNumber, mo.Flags.Get(jsonflags.StringifyNumbers)) + return xe.WriteNumber(x, 'u', mo.Flags.Get(jsonflags.StringifyNumbers)) } fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { return newInvalidFormatError("unmarshal", t, uo.Format) } - var flags valueFlags - val, err := dec.readValue(&flags) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) if err != nil { return err } @@ -453,10 +464,10 @@ func makeUintArshaler(t reflect.Type) *arshaler { if !uo.Flags.Get(jsonflags.StringifyNumbers) { break } - val = unescapeStringMayCopy(val, flags.isVerbatim()) + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) fallthrough case '0': - n, ok := parseDecUint(val) + n, ok := jsonwire.ParseUint(val) maxUint := uint64(1) << bits overflow := n > maxUint-1 if !ok { @@ -482,8 +493,9 @@ func makeFloatArshaler(t reflect.Type) *arshaler { var fncs arshaler bits := t.Bits() fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) var allowNonFinite bool - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { if mo.Format == "nonfinite" { allowNonFinite = true } else { @@ -501,29 +513,30 @@ func makeFloatArshaler(t reflect.Type) *arshaler { } // Optimize for marshaling without preceding whitespace or string escaping. - if optimizeCommon && !enc.options.Flags.Get(jsonflags.Expand) && !mo.Flags.Get(jsonflags.StringifyNumbers) && !enc.tokens.last.needObjectName() { - enc.buf = enc.tokens.mayAppendDelim(enc.buf, '0') - enc.buf = appendNumber(enc.buf, fv, bits) - enc.tokens.last.increment() - if enc.needFlush() { - return enc.flush() + if optimizeCommon && !xe.Flags.Get(jsonflags.Expand) && !mo.Flags.Get(jsonflags.StringifyNumbers) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = xe.Tokens.MayAppendDelim(xe.Buf, '0') + xe.Buf = jsonwire.AppendFloat(xe.Buf, fv, bits) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() } return nil } - return enc.writeNumber(fv, bits, mo.Flags.Get(jsonflags.StringifyNumbers)) + return xe.WriteNumber(fv, bits, mo.Flags.Get(jsonflags.StringifyNumbers)) } fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) var allowNonFinite bool - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { if uo.Format == "nonfinite" { allowNonFinite = true } else { return newInvalidFormatError("unmarshal", t, uo.Format) } } - var flags valueFlags - val, err := dec.readValue(&flags) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) if err != nil { return err } @@ -533,7 +546,7 @@ func makeFloatArshaler(t reflect.Type) *arshaler { va.SetFloat(0) return nil case '"': - val = unescapeStringMayCopy(val, flags.isVerbatim()) + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) if allowNonFinite { switch string(val) { case "NaN": @@ -550,7 +563,7 @@ func makeFloatArshaler(t reflect.Type) *arshaler { if !uo.Flags.Get(jsonflags.StringifyNumbers) { break } - if n, err := consumeNumber(val); n != len(val) || err != nil { + if n, err := jsonwire.ConsumeNumber(val); n != len(val) || err != nil { err := fmt.Errorf("cannot parse %q as JSON number: %w", val, strconv.ErrSyntax) return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} } @@ -560,7 +573,7 @@ func makeFloatArshaler(t reflect.Type) *arshaler { // We never report an overflow condition since we can always // round the input to the closest representable finite value. // For extremely large numbers, the closest value is ±MaxFloat. - fv, _ := parseFloat(val, bits) + fv, _ := jsonwire.ParseFloat(val, bits) va.SetFloat(fv) return nil } @@ -589,15 +602,16 @@ func makeMapArshaler(t reflect.Type) *arshaler { } fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { // Check for cycles. - if enc.tokens.depth() > startDetectingCyclesAfter { - if err := enc.seenPointers.visit(va.Value); err != nil { + xe := export.Encoder(enc) + if xe.Tokens.Depth() > startDetectingCyclesAfter { + if err := visitPointer(&xe.SeenPointers, va.Value); err != nil { return err } - defer enc.seenPointers.leave(va.Value) + defer leavePointer(&xe.SeenPointers, va.Value) } emitNull := mo.Flags.Get(jsonflags.FormatNilMapAsNull) - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { switch mo.Format { case "emitnull": emitNull = true @@ -617,12 +631,12 @@ func makeMapArshaler(t reflect.Type) *arshaler { return enc.WriteToken(Null) } // Optimize for marshaling an empty map without any preceding whitespace. - if optimizeCommon && !enc.options.Flags.Get(jsonflags.Expand) && !enc.tokens.last.needObjectName() { - enc.buf = enc.tokens.mayAppendDelim(enc.buf, '{') - enc.buf = append(enc.buf, "{}"...) - enc.tokens.last.increment() - if enc.needFlush() { - return enc.flush() + if optimizeCommon && !xe.Flags.Get(jsonflags.Expand) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = xe.Tokens.MayAppendDelim(xe.Buf, '{') + xe.Buf = append(xe.Buf, "{}"...) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() } return nil } @@ -648,8 +662,8 @@ func makeMapArshaler(t reflect.Type) *arshaler { // A Go map guarantees that each entry has a unique key. // As such, disable the expensive duplicate name check if we know // that every Go key will serialize as a unique JSON string. - if !nonDefaultKey && mapKeyWithUniqueRepresentation(k.Kind(), enc.options.Flags.Get(jsonflags.AllowInvalidUTF8)) { - enc.tokens.last.disableNamespace() + if !nonDefaultKey && mapKeyWithUniqueRepresentation(k.Kind(), xe.Flags.Get(jsonflags.AllowInvalidUTF8)) { + xe.Tokens.Last.DisableNamespace() } switch { @@ -715,14 +729,14 @@ func makeMapArshaler(t reflect.Type) *arshaler { // as a JSON string. return err } - name := enc.unwriteOnlyObjectMemberName() + name := xe.UnwriteOnlyObjectMemberName() members[i] = member{name, k, v} } // TODO: If AllowDuplicateNames is enabled, then sort according // to reflect.Value as well if the names are equal. // See internal/fmtsort. slices.SortFunc(members, func(x, y member) int { - return compareUTF16(x.name, y.name) + return jsonwire.CompareUTF16(x.name, y.name) }) for _, member := range members { if err := enc.WriteToken(String(member.name)); err != nil { @@ -740,7 +754,8 @@ func makeMapArshaler(t reflect.Type) *arshaler { return nil } fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { switch uo.Format { case "emitnull", "emitempty": uo.Format = "" // only relevant for marshaling @@ -781,8 +796,8 @@ func makeMapArshaler(t reflect.Type) *arshaler { // will be rejected as duplicates since they semantically refer // to the same Go value. This is an unusual interaction // between syntax and semantics, but is more correct. - if !nonDefaultKey && mapKeyWithUniqueRepresentation(k.Kind(), dec.options.Flags.Get(jsonflags.AllowInvalidUTF8)) { - dec.tokens.last.disableNamespace() + if !nonDefaultKey && mapKeyWithUniqueRepresentation(k.Kind(), xd.Flags.Get(jsonflags.AllowInvalidUTF8)) { + xd.Tokens.Last.DisableNamespace() } // In the rare case where the map is not already empty, @@ -790,7 +805,7 @@ func makeMapArshaler(t reflect.Type) *arshaler { // since existing presence alone is insufficient to indicate // whether the input had a duplicate name. var seen reflect.Value - if !dec.options.Flags.Get(jsonflags.AllowDuplicateNames) && va.Len() > 0 { + if !xd.Flags.Get(jsonflags.AllowDuplicateNames) && va.Len() > 0 { seen = reflect.MakeMap(reflect.MapOf(k.Type(), emptyStructType)) } @@ -809,11 +824,11 @@ func makeMapArshaler(t reflect.Type) *arshaler { } if v2 := va.MapIndex(k.Value); v2.IsValid() { - if !dec.options.Flags.Get(jsonflags.AllowDuplicateNames) && (!seen.IsValid() || seen.MapIndex(k.Value).IsValid()) { + if !xd.Flags.Get(jsonflags.AllowDuplicateNames) && (!seen.IsValid() || seen.MapIndex(k.Value).IsValid()) { // TODO: Unread the object name. - name := dec.previousBuffer() - err := newDuplicateNameError(name) - return err.withOffset(dec.InputOffset() - len64(name)) + name := xd.PreviousBuffer() + err := export.NewDuplicateNameError(name, dec.InputOffset()-len64(name)) + return err } v.Set(v2) } else { @@ -874,7 +889,8 @@ func makeStructArshaler(t reflect.Type) *arshaler { fields, errInit = makeStructFields(t) } fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { return newInvalidFormatError("marshal", t, mo.Format) } once.Do(init) @@ -888,7 +904,7 @@ func makeStructArshaler(t reflect.Type) *arshaler { } var seenIdxs uintSet prevIdx := -1 - enc.tokens.last.disableNamespace() // we manually ensure unique names below + xe.Tokens.Last.DisableNamespace() // we manually ensure unique names below for i := range fields.flattened { f := &fields.flattened[i] v := addressableValue{va.Field(f.index[0])} // addressable if struct value is addressable @@ -932,24 +948,24 @@ func makeStructArshaler(t reflect.Type) *arshaler { // 5. There is no possibility of an error occurring. if optimizeCommon { // Append any delimiters or optional whitespace. - if enc.tokens.last.length() > 0 { - enc.buf = append(enc.buf, ',') + if xe.Tokens.Last.Length() > 0 { + xe.Buf = append(xe.Buf, ',') } - if enc.options.Flags.Get(jsonflags.Expand) { - enc.buf = enc.appendIndent(enc.buf, enc.tokens.needIndent('"')) + if xe.Flags.Get(jsonflags.Expand) { + xe.Buf = xe.AppendIndent(xe.Buf, xe.Tokens.NeedIndent('"')) } // Append the token to the output and to the state machine. - n0 := len(enc.buf) // offset before calling appendString - if enc.escapeRunes.canonical { - enc.buf = append(enc.buf, f.quotedName...) + n0 := len(xe.Buf) // offset before calling AppendQuote + if xe.EscapeRunes.IsCanonical() { + xe.Buf = append(xe.Buf, f.quotedName...) } else { - enc.buf, _ = appendString(enc.buf, f.name, false, enc.escapeRunes) + xe.Buf, _ = jsonwire.AppendQuote(xe.Buf, f.name, false, xe.EscapeRunes) } - if !enc.options.Flags.Get(jsonflags.AllowDuplicateNames) { - enc.names.replaceLastQuotedOffset(n0) + if !xe.Flags.Get(jsonflags.AllowDuplicateNames) { + xe.Names.ReplaceLastQuotedOffset(n0) } - enc.tokens.last.increment() + xe.Tokens.Last.Increment() } else { if err := enc.WriteToken(String(f.name)); err != nil { return err @@ -962,7 +978,7 @@ func makeStructArshaler(t reflect.Type) *arshaler { mo.Flags.Set(jsonflags.StringifyNumbers | 1) } if f.format != "" { - mo.FormatDepth = enc.tokens.depth() + mo.FormatDepth = xe.Tokens.Depth() mo.Format = f.format } err := marshal(enc, v, mo) @@ -978,7 +994,7 @@ func makeStructArshaler(t reflect.Type) *arshaler { if prevIdx >= 0 { prevName = &fields.flattened[prevIdx].name } - if enc.unwriteEmptyObjectMember(prevName) { + if xe.UnwriteEmptyObjectMember(prevName) { continue } } @@ -986,14 +1002,14 @@ func makeStructArshaler(t reflect.Type) *arshaler { // Remember the previous written object member. // The set of seen fields only needs to be updated to detect // duplicate names with those from the inlined fallback. - if !enc.options.Flags.Get(jsonflags.AllowDuplicateNames) && fields.inlinedFallback != nil { + if !xe.Flags.Get(jsonflags.AllowDuplicateNames) && fields.inlinedFallback != nil { seenIdxs.insert(uint(f.id)) } prevIdx = f.id } if fields.inlinedFallback != nil && !(mo.Flags.Get(jsonflags.DiscardUnknownMembers) && fields.inlinedFallback.unknown) { var insertUnquotedName func([]byte) bool - if !enc.options.Flags.Get(jsonflags.AllowDuplicateNames) { + if !xe.Flags.Get(jsonflags.AllowDuplicateNames) { insertUnquotedName = func(name []byte) bool { // Check that the name from inlined fallback does not match // one of the previously marshaled names from known fields. @@ -1010,7 +1026,7 @@ func makeStructArshaler(t reflect.Type) *arshaler { // Check that the name does not match any other name // previously marshaled from the inlined fallback. - return enc.namespaces.last().insertUnquoted(name) + return xe.Namespaces.Last().InsertUnquoted(name) } } if err := marshalInlinedFallbackAll(enc, va, mo, fields.inlinedFallback, insertUnquotedName); err != nil { @@ -1023,7 +1039,8 @@ func makeStructArshaler(t reflect.Type) *arshaler { return nil } fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { return newInvalidFormatError("unmarshal", t, uo.Format) } tok, err := dec.ReadToken() @@ -1043,15 +1060,15 @@ func makeStructArshaler(t reflect.Type) *arshaler { return &err } var seenIdxs uintSet - dec.tokens.last.disableNamespace() + xd.Tokens.Last.DisableNamespace() for dec.PeekKind() != '}' { // Process the object member name. - var flags valueFlags - val, err := dec.readValue(&flags) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) if err != nil { return err } - name := unescapeStringMayCopy(val, flags.isVerbatim()) + name := jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) f := fields.byActualName[string(name)] if f == nil { for _, f2 := range fields.byFoldedName[string(foldName(name))] { @@ -1064,10 +1081,10 @@ func makeStructArshaler(t reflect.Type) *arshaler { if uo.Flags.Get(jsonflags.RejectUnknownMembers) && (fields.inlinedFallback == nil || fields.inlinedFallback.unknown) { return &SemanticError{action: "unmarshal", GoType: t, Err: fmt.Errorf("unknown name %s", val)} } - if !dec.options.Flags.Get(jsonflags.AllowDuplicateNames) && !dec.namespaces.last().insertUnquoted(name) { + if !xd.Flags.Get(jsonflags.AllowDuplicateNames) && !xd.Namespaces.Last().InsertUnquoted(name) { // TODO: Unread the object name. - err := newDuplicateNameError(val) - return err.withOffset(dec.InputOffset() - len64(val)) + err := export.NewDuplicateNameError(val, dec.InputOffset()-len64(val)) + return err } if fields.inlinedFallback == nil { @@ -1084,10 +1101,10 @@ func makeStructArshaler(t reflect.Type) *arshaler { continue } } - if !dec.options.Flags.Get(jsonflags.AllowDuplicateNames) && !seenIdxs.insert(uint(f.id)) { + if !xd.Flags.Get(jsonflags.AllowDuplicateNames) && !seenIdxs.insert(uint(f.id)) { // TODO: Unread the object name. - err := newDuplicateNameError(val) - return err.withOffset(dec.InputOffset() - len64(val)) + err := export.NewDuplicateNameError(val, dec.InputOffset()-len64(val)) + return err } // Process the object member value. @@ -1100,7 +1117,7 @@ func makeStructArshaler(t reflect.Type) *arshaler { uo.Flags.Set(jsonflags.StringifyNumbers | 1) } if f.format != "" { - uo.FormatDepth = dec.tokens.depth() + uo.FormatDepth = xd.Tokens.Depth() uo.Format = f.format } v := addressableValue{va.Field(f.index[0])} // addressable if struct value is addressable @@ -1159,15 +1176,16 @@ func makeSliceArshaler(t reflect.Type) *arshaler { } fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { // Check for cycles. - if enc.tokens.depth() > startDetectingCyclesAfter { - if err := enc.seenPointers.visit(va.Value); err != nil { + xe := export.Encoder(enc) + if xe.Tokens.Depth() > startDetectingCyclesAfter { + if err := visitPointer(&xe.SeenPointers, va.Value); err != nil { return err } - defer enc.seenPointers.leave(va.Value) + defer leavePointer(&xe.SeenPointers, va.Value) } emitNull := mo.Flags.Get(jsonflags.FormatNilSliceAsNull) - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { switch mo.Format { case "emitnull": emitNull = true @@ -1187,12 +1205,12 @@ func makeSliceArshaler(t reflect.Type) *arshaler { return enc.WriteToken(Null) } // Optimize for marshaling an empty slice without any preceding whitespace. - if optimizeCommon && !enc.options.Flags.Get(jsonflags.Expand) && !enc.tokens.last.needObjectName() { - enc.buf = enc.tokens.mayAppendDelim(enc.buf, '[') - enc.buf = append(enc.buf, "[]"...) - enc.tokens.last.increment() - if enc.needFlush() { - return enc.flush() + if optimizeCommon && !xe.Flags.Get(jsonflags.Expand) && !xe.Tokens.Last.NeedObjectName() { + xe.Buf = xe.Tokens.MayAppendDelim(xe.Buf, '[') + xe.Buf = append(xe.Buf, "[]"...) + xe.Tokens.Last.Increment() + if xe.NeedFlush() { + return xe.Flush() } return nil } @@ -1219,7 +1237,8 @@ func makeSliceArshaler(t reflect.Type) *arshaler { } emptySlice := reflect.MakeSlice(t, 0, 0) fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { switch uo.Format { case "emitnull", "emitempty": uo.Format = "" // only relevant for marshaling @@ -1292,7 +1311,8 @@ func makeArrayArshaler(t reflect.Type) *arshaler { } n := t.Len() fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { return newInvalidFormatError("marshal", t, mo.Format) } once.Do(init) @@ -1315,7 +1335,8 @@ func makeArrayArshaler(t reflect.Type) *arshaler { return nil } fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { return newInvalidFormatError("unmarshal", t, uo.Format) } tok, err := dec.ReadToken() @@ -1371,11 +1392,12 @@ func makePointerArshaler(t reflect.Type) *arshaler { } fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { // Check for cycles. - if enc.tokens.depth() > startDetectingCyclesAfter { - if err := enc.seenPointers.visit(va.Value); err != nil { + xe := export.Encoder(enc) + if xe.Tokens.Depth() > startDetectingCyclesAfter { + if err := visitPointer(&xe.SeenPointers, va.Value); err != nil { return err } - defer enc.seenPointers.leave(va.Value) + defer leavePointer(&xe.SeenPointers, va.Value) } // NOTE: Struct.Format is forwarded to underlying marshal. @@ -1420,7 +1442,8 @@ func makeInterfaceArshaler(t reflect.Type) *arshaler { var fncs arshaler fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { return newInvalidFormatError("marshal", t, mo.Format) } if va.IsNil() { @@ -1441,7 +1464,8 @@ func makeInterfaceArshaler(t reflect.Type) *arshaler { return marshal(enc, v, mo) } fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { return newInvalidFormatError("unmarshal", t, uo.Format) } if dec.PeekKind() == 'n' { @@ -1459,7 +1483,7 @@ func makeInterfaceArshaler(t reflect.Type) *arshaler { // Duplicate name check must be enforced since unmarshalValueAny // does not implement merge semantics. if optimizeCommon && - t == anyType && !dec.options.Flags.Get(jsonflags.AllowDuplicateNames) && uo.Format == "" && + t == anyType && !xd.Flags.Get(jsonflags.AllowDuplicateNames) && uo.Format == "" && (uo.Unmarshalers == nil || !uo.Unmarshalers.(*Unmarshalers).fromAny) { v, err := unmarshalValueAny(dec, uo) // We must check for nil interface values up front. @@ -1536,3 +1560,46 @@ func newInvalidFormatError(action string, t reflect.Type, format string) error { err := fmt.Errorf("invalid format flag: %q", format) return &SemanticError{action: action, GoType: t, Err: err} } + +type uintSet64 uint64 + +func (s uintSet64) has(i uint) bool { return s&(1< 0 } +func (s *uintSet64) set(i uint) { *s |= 1 << i } + +// uintSet is a set of unsigned integers. +// It is optimized for most integers being close to zero. +type uintSet struct { + lo uintSet64 + hi []uintSet64 +} + +// has reports whether i is in the set. +func (s *uintSet) has(i uint) bool { + if i < 64 { + return s.lo.has(i) + } else { + i -= 64 + iHi, iLo := int(i/64), i%64 + return iHi < len(s.hi) && s.hi[iHi].has(iLo) + } +} + +// insert inserts i into the set and reports whether it was the first insertion. +func (s *uintSet) insert(i uint) bool { + // TODO: Make this inlinable at least for the lower 64-bit case. + if i < 64 { + has := s.lo.has(i) + s.lo.set(i) + return !has + } else { + i -= 64 + iHi, iLo := int(i/64), i%64 + if iHi >= len(s.hi) { + s.hi = append(s.hi, make([]uintSet64, iHi+1-len(s.hi))...) + s.hi = s.hi[:cap(s.hi)] + } + has := s.hi[iHi].has(iLo) + s.hi[iHi].set(iLo) + return !has + } +} diff --git a/arshal_funcs.go b/arshal_funcs.go index d994f24..e4d34ae 100644 --- a/arshal_funcs.go +++ b/arshal_funcs.go @@ -20,7 +20,7 @@ import ( // on the provided [Encoder] or [Decoder]. For example, it is permissible to call // [Decoder.PeekKind], but not permissible to call [Decoder.ReadToken] or // [Encoder.WriteToken] since such methods mutate the state. -const SkipFunc = jsonError("skip function") +var SkipFunc = errors.New("json: skip function") // Marshalers is a list of functions that may override the marshal behavior // of specific types. Populate [WithMarshalers] to use it with @@ -203,11 +203,12 @@ func MarshalFuncV2[T any](fn func(*Encoder, T, Options) error) *Marshalers { typFnc := typedMarshaler{ typ: t, fnc: func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { - prevDepth, prevLength := enc.tokens.depthLength() - enc.options.Flags.Set(jsonflags.WithinArshalCall | 1) + xe := export.Encoder(enc) + prevDepth, prevLength := xe.Tokens.DepthLength() + xe.Flags.Set(jsonflags.WithinArshalCall | 1) err := fn(enc, va.castTo(t).Interface().(T), mo) - enc.options.Flags.Set(jsonflags.WithinArshalCall | 0) - currDepth, currLength := enc.tokens.depthLength() + xe.Flags.Set(jsonflags.WithinArshalCall | 0) + currDepth, currLength := xe.Tokens.DepthLength() if err == nil && (prevDepth != currDepth || prevLength+1 != currLength) { err = errors.New("must write exactly one JSON value") } @@ -276,11 +277,12 @@ func UnmarshalFuncV2[T any](fn func(*Decoder, T, Options) error) *Unmarshalers { typFnc := typedUnmarshaler{ typ: t, fnc: func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - prevDepth, prevLength := dec.tokens.depthLength() - dec.options.Flags.Set(jsonflags.WithinArshalCall | 1) + xd := export.Decoder(dec) + prevDepth, prevLength := xd.Tokens.DepthLength() + xd.Flags.Set(jsonflags.WithinArshalCall | 1) err := fn(dec, va.castTo(t).Interface().(T), uo) - dec.options.Flags.Set(jsonflags.WithinArshalCall | 0) - currDepth, currLength := dec.tokens.depthLength() + xd.Flags.Set(jsonflags.WithinArshalCall | 0) + currDepth, currLength := xd.Tokens.DepthLength() if err == nil && (prevDepth != currDepth || prevLength+1 != currLength) { err = errors.New("must read exactly one JSON value") } diff --git a/arshal_inlined.go b/arshal_inlined.go index bf79a3e..67b7d5b 100644 --- a/arshal_inlined.go +++ b/arshal_inlined.go @@ -12,6 +12,7 @@ import ( "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonopts" + "github.com/go-json-experiment/json/internal/jsonwire" ) // This package supports "inlining" a Go struct field, where the contents @@ -50,9 +51,10 @@ func marshalInlinedFallbackAll(enc *Encoder, va addressableValue, mo *jsonopts.S return nil } - dec := getBufferedDecoder(b) - defer putBufferedDecoder(dec) - dec.options.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) + dec := export.GetBufferedDecoder(b) + defer export.PutBufferedDecoder(dec) + xd := export.Decoder(dec) + xd.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) tok, err := dec.ReadToken() if err != nil { @@ -67,15 +69,15 @@ func marshalInlinedFallbackAll(enc *Encoder, va addressableValue, mo *jsonopts.S } for dec.PeekKind() != '}' { // Parse the JSON object name. - var flags valueFlags - val, err := dec.readValue(&flags) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) if err != nil { return &SemanticError{action: "marshal", GoType: rawValueType, Err: err} } if insertUnquotedName != nil { - name := unescapeStringMayCopy(val, flags.isVerbatim()) + name := jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) if !insertUnquotedName(name) { - return newDuplicateNameError(val) + return export.NewDuplicateNameError(val, 0) } } if err := enc.WriteValue(val); err != nil { @@ -83,7 +85,7 @@ func marshalInlinedFallbackAll(enc *Encoder, va addressableValue, mo *jsonopts.S } // Parse the JSON object value. - val, err = dec.readValue(&flags) + val, err = xd.ReadValue(&flags) if err != nil { return &SemanticError{action: "marshal", GoType: rawValueType, Err: err} } @@ -94,7 +96,7 @@ func marshalInlinedFallbackAll(enc *Encoder, va addressableValue, mo *jsonopts.S if _, err := dec.ReadToken(); err != nil { return &SemanticError{action: "marshal", GoType: rawValueType, Err: err} } - if err := dec.checkEOF(); err != nil { + if err := xd.CheckEOF(); err != nil { return &SemanticError{action: "marshal", GoType: rawValueType, Err: err} } return nil @@ -107,15 +109,16 @@ func marshalInlinedFallbackAll(enc *Encoder, va addressableValue, mo *jsonopts.S mk := newAddressableValue(stringType) mv := newAddressableValue(m.Type().Elem()) marshalKey := func(mk addressableValue) error { - b, err := appendString(enc.UnusedBuffer(), mk.String(), !enc.options.Flags.Get(jsonflags.AllowInvalidUTF8), nil) + xe := export.Encoder(enc) + b, err := jsonwire.AppendQuote(enc.UnusedBuffer(), mk.String(), !xe.Flags.Get(jsonflags.AllowInvalidUTF8), nil) if err != nil { return err } if insertUnquotedName != nil { isVerbatim := bytes.IndexByte(b, '\\') < 0 - name := unescapeStringMayCopy(b, isVerbatim) + name := jsonwire.UnquoteMayCopy(b, isVerbatim) if !insertUnquotedName(name) { - return newDuplicateNameError(b) + return export.NewDuplicateNameError(b, 0) } } return enc.WriteValue(b) @@ -172,13 +175,13 @@ func unmarshalInlinedFallbackNext(dec *Decoder, va addressableValue, uo *jsonopt if len(*b) == 0 { // TODO: Should this be nil? What if it were all whitespace? *b = append(*b, '{') } else { - *b = trimSuffixWhitespace(*b) - if hasSuffixByte(*b, '}') { + *b = jsonwire.TrimSuffixWhitespace(*b) + if jsonwire.HasSuffixByte(*b, '}') { // TODO: When merging into an object for the first time, // should we verify that it is valid? - *b = trimSuffixByte(*b, '}') - *b = trimSuffixWhitespace(*b) - if !hasSuffixByte(*b, ',') && !hasSuffixByte(*b, '{') { + *b = jsonwire.TrimSuffixByte(*b, '}') + *b = jsonwire.TrimSuffixWhitespace(*b) + if !jsonwire.HasSuffixByte(*b, ',') && !jsonwire.HasSuffixByte(*b, '{') { *b = append(*b, ',') } } else { diff --git a/arshal_methods.go b/arshal_methods.go index 20da09e..e8d1aa2 100644 --- a/arshal_methods.go +++ b/arshal_methods.go @@ -11,6 +11,7 @@ import ( "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonopts" + "github.com/go-json-experiment/json/internal/jsonwire" ) // Interfaces for custom serialization. @@ -96,11 +97,12 @@ func makeMethodArshaler(fncs *arshaler, t reflect.Type) *arshaler { case jsonMarshalerV2Type: fncs.nonDefault = true fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { - prevDepth, prevLength := enc.tokens.depthLength() - enc.options.Flags.Set(jsonflags.WithinArshalCall | 1) + xe := export.Encoder(enc) + prevDepth, prevLength := xe.Tokens.DepthLength() + xe.Flags.Set(jsonflags.WithinArshalCall | 1) err := va.addrWhen(needAddr).Interface().(MarshalerV2).MarshalJSONV2(enc, mo) - enc.options.Flags.Set(jsonflags.WithinArshalCall | 0) - currDepth, currLength := enc.tokens.depthLength() + xe.Flags.Set(jsonflags.WithinArshalCall | 0) + currDepth, currLength := xe.Tokens.DepthLength() if (prevDepth != currDepth || prevLength+1 != currLength) && err == nil { err = errors.New("must write exactly one JSON value") } @@ -138,7 +140,7 @@ func makeMethodArshaler(fncs *arshaler, t reflect.Type) *arshaler { return &SemanticError{action: "marshal", JSONKind: '"', GoType: t, Err: err} } val := enc.UnusedBuffer() - val, err = appendString(val, string(s), true, nil) + val, err = jsonwire.AppendQuote(val, s, true, nil) if err != nil { return &SemanticError{action: "marshal", JSONKind: '"', GoType: t, Err: err} } @@ -155,11 +157,12 @@ func makeMethodArshaler(fncs *arshaler, t reflect.Type) *arshaler { case jsonUnmarshalerV2Type: fncs.nonDefault = true fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - prevDepth, prevLength := dec.tokens.depthLength() - dec.options.Flags.Set(jsonflags.WithinArshalCall | 1) + xd := export.Decoder(dec) + prevDepth, prevLength := xd.Tokens.DepthLength() + xd.Flags.Set(jsonflags.WithinArshalCall | 1) err := va.addrWhen(needAddr).Interface().(UnmarshalerV2).UnmarshalJSONV2(dec, uo) - dec.options.Flags.Set(jsonflags.WithinArshalCall | 0) - currDepth, currLength := dec.tokens.depthLength() + xd.Flags.Set(jsonflags.WithinArshalCall | 0) + currDepth, currLength := xd.Tokens.DepthLength() if (prevDepth != currDepth || prevLength+1 != currLength) && err == nil { err = errors.New("must read exactly one JSON value") } @@ -188,8 +191,9 @@ func makeMethodArshaler(fncs *arshaler, t reflect.Type) *arshaler { case textUnmarshalerType: fncs.nonDefault = true fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { - var flags valueFlags - val, err := dec.readValue(&flags) + xd := export.Decoder(dec) + var flags jsonwire.ValueFlags + val, err := xd.ReadValue(&flags) if err != nil { return err // must be a syntactic or I/O error } @@ -197,7 +201,7 @@ func makeMethodArshaler(fncs *arshaler, t reflect.Type) *arshaler { err = errors.New("JSON value must be string type") return &SemanticError{action: "unmarshal", JSONKind: val.Kind(), GoType: t, Err: err} } - s := unescapeStringMayCopy(val, flags.isVerbatim()) + s := jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) unmarshaler := va.addrWhen(needAddr).Interface().(encoding.TextUnmarshaler) if err := unmarshaler.UnmarshalText(s); err != nil { err = wrapSkipFunc(err, "unmarshal method") diff --git a/arshal_test.go b/arshal_test.go index 3829aaf..c4a9942 100644 --- a/arshal_test.go +++ b/arshal_test.go @@ -23,6 +23,7 @@ import ( "github.com/go-json-experiment/json/internal/jsonopts" "github.com/go-json-experiment/json/internal/jsontest" + "github.com/go-json-experiment/json/jsontext" ) type ( @@ -496,13 +497,13 @@ func (s *structMethodJSONv2) UnmarshalJSONV2(dec *Decoder, opts Options) error { } func (s structMethodJSONv1) MarshalJSON() ([]byte, error) { - return appendString(nil, s.value, false, nil) + return jsontext.AppendQuote(nil, s.value) } func (s *structMethodJSONv1) UnmarshalJSON(b []byte) error { if k := RawValue(b).Kind(); k != '"' { return &SemanticError{action: "unmarshal", JSONKind: k, GoType: structMethodJSONv1Type} } - b, _ = unescapeString(nil, b) + b, _ = jsontext.AppendUnquote(nil, b) s.value = string(b) return nil } @@ -785,17 +786,17 @@ func TestMarshal(t *testing.T) { name: jsontest.Name("Maps/InvalidKey/Bool"), in: map[bool]string{false: "value"}, want: `{`, - wantErr: errMissingName.withOffset(len64(`{`)), + wantErr: export.NewMissingNameError(len64(`{`)), }, { name: jsontest.Name("Maps/InvalidKey/NamedBool"), in: map[namedBool]string{false: "value"}, want: `{`, - wantErr: errMissingName.withOffset(len64(`{`)), + wantErr: export.NewMissingNameError(len64(`{`)), }, { name: jsontest.Name("Maps/InvalidKey/Array"), in: map[[1]string]string{{"key"}: "value"}, want: `{`, - wantErr: errMissingName.withOffset(len64(`{`)), + wantErr: export.NewMissingNameError(len64(`{`)), }, { name: jsontest.Name("Maps/InvalidKey/Channel"), in: map[chan string]string{make(chan string): "value"}, @@ -816,7 +817,7 @@ func TestMarshal(t *testing.T) { in: map[*int64]string{addr(int64(0)): "0", addr(int64(0)): "0"}, canonicalize: true, want: `{"0":"0"`, - wantErr: newDuplicateNameError(`"0"`).withOffset(len64(`{"0":"0",`)), + wantErr: export.NewDuplicateNameError([]byte(`"0"`), len64(`{"0":"0",`)), }, { name: jsontest.Name("Maps/ValidKey/NamedInt"), in: map[namedInt64]string{math.MinInt64: "MinInt64", 0: "Zero", math.MaxInt64: "MaxInt64"}, @@ -861,7 +862,7 @@ func TestMarshal(t *testing.T) { opts: []Options{AllowInvalidUTF8(true)}, in: map[string]string{"\x80": "", "\x81": ""}, want: `{"�":""`, - wantErr: newDuplicateNameError(`"�"`).withOffset(len64(`{"�":"",`)), + wantErr: export.NewDuplicateNameError([]byte(`"�"`), len64(`{"�":"",`)), }, { name: jsontest.Name("Maps/DuplicateName/NoCaseString/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -871,7 +872,7 @@ func TestMarshal(t *testing.T) { name: jsontest.Name("Maps/DuplicateName/NoCaseString"), in: map[nocaseString]string{"hello": "", "HELLO": ""}, want: `{"hello":""`, - wantErr: &SemanticError{action: "marshal", JSONKind: '"', GoType: reflect.TypeOf(nocaseString("")), Err: newDuplicateNameError(`"hello"`).withOffset(len64(`{"hello":"",`))}, + wantErr: &SemanticError{action: "marshal", JSONKind: '"', GoType: reflect.TypeOf(nocaseString("")), Err: export.NewDuplicateNameError([]byte(`"hello"`), len64(`{"hello":"",`))}, }, { name: jsontest.Name("Maps/DuplicateName/NaNs/Deterministic+AllowDuplicateNames"), opts: []Options{ @@ -904,7 +905,7 @@ func TestMarshal(t *testing.T) { }, in: map[string]int{"\xff": 0, "\xfe": 1}, want: `{"�":1`, - wantErr: newDuplicateNameError(`"�"`).withOffset(len64(`{"�":1,`)), + wantErr: export.NewDuplicateNameError([]byte(`"�"`), len64(`{"�":1,`)), }, { name: jsontest.Name("Maps/String/Deterministic+AllowInvalidUTF8+AllowDuplicateNames"), opts: []Options{ @@ -953,7 +954,7 @@ func TestMarshal(t *testing.T) { }, in: map[namedString]map[string]int{"X": {"a": 1, "b": 1}}, want: `{"X":{"x":1`, - wantErr: newDuplicateNameError(`"x"`).withOffset(len64(`{"X":{"x":1,`)), + wantErr: export.NewDuplicateNameError([]byte(`"x"`), len64(`{"X":{"x":1,`)), }, { name: jsontest.Name("Maps/String/Deterministic+MarshalFuncs+AllowDuplicateNames"), opts: []Options{ @@ -2081,7 +2082,7 @@ func TestMarshal(t *testing.T) { opts: []Options{AllowDuplicateNames(false)}, in: structInlineRawValue{X: RawValue(` { "fizz" : "buzz" , "fizz" : "buzz" } `)}, want: `{"fizz":"buzz"`, - wantErr: newDuplicateNameError(`"fizz"`), + wantErr: export.NewDuplicateNameError([]byte(`"fizz"`), 0), }, { name: jsontest.Name("Structs/InlinedFallback/RawValue/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -2092,7 +2093,7 @@ func TestMarshal(t *testing.T) { opts: []Options{AllowInvalidUTF8(false)}, in: structInlineRawValue{X: RawValue(`{"` + "\xde\xad\xbe\xef" + `":"value"}`)}, want: `{`, - wantErr: errInvalidUTF8.withOffset(len64(`{"` + "\xde\xad")), + wantErr: export.NewInvalidUTF8Error(len64(`{"` + "\xde\xad")), }, { name: jsontest.Name("Structs/InlinedFallback/RawValue/AllowInvalidUTF8"), opts: []Options{AllowInvalidUTF8(true)}, @@ -2112,17 +2113,17 @@ func TestMarshal(t *testing.T) { name: jsontest.Name("Structs/InlinedFallback/RawValue/InvalidObjectName"), in: structInlineRawValue{X: RawValue(` { true : false } `)}, want: `{`, - wantErr: &SemanticError{action: "marshal", GoType: rawValueType, Err: errMissingName.withOffset(len64(" { "))}, + wantErr: &SemanticError{action: "marshal", GoType: rawValueType, Err: export.NewMissingNameError(len64(" { "))}, }, { name: jsontest.Name("Structs/InlinedFallback/RawValue/InvalidObjectEnd"), in: structInlineRawValue{X: RawValue(` { "name" : false , } `)}, want: `{"name":false`, - wantErr: &SemanticError{action: "marshal", GoType: rawValueType, Err: newInvalidCharacterError(",", "before next token").withOffset(len64(` { "name" : false `))}, + wantErr: &SemanticError{action: "marshal", GoType: rawValueType, Err: export.NewInvalidCharacterError(",", "before next token", len64(` { "name" : false `))}, }, { name: jsontest.Name("Structs/InlinedFallback/RawValue/InvalidDualObject"), in: structInlineRawValue{X: RawValue(`{}{}`)}, want: `{`, - wantErr: &SemanticError{action: "marshal", GoType: rawValueType, Err: newInvalidCharacterError("{", "after top-level value").withOffset(len64(`{}`))}, + wantErr: &SemanticError{action: "marshal", GoType: rawValueType, Err: export.NewInvalidCharacterError("{", "after top-level value", len64(`{}`))}, }, { name: jsontest.Name("Structs/InlinedFallback/RawValue/Nested/Nil"), in: structInlinePointerInlineRawValue{}, @@ -2170,7 +2171,7 @@ func TestMarshal(t *testing.T) { opts: []Options{AllowInvalidUTF8(false)}, in: structInlineMapStringAny{X: jsonObject{"\xde\xad\xbe\xef": nil}}, want: `{`, - wantErr: errInvalidUTF8, + wantErr: export.NewInvalidUTF8Error(0), }, { name: jsontest.Name("Structs/InlinedFallback/MapStringAny/AllowInvalidUTF8"), opts: []Options{AllowInvalidUTF8(true)}, @@ -2228,7 +2229,7 @@ func TestMarshal(t *testing.T) { X: map[string]int{"\xff": 0, "\xfe": 1}, }, want: `{"�":1`, - wantErr: newDuplicateNameError(`"�"`), + wantErr: export.NewDuplicateNameError([]byte(`"�"`), 0), }, { name: jsontest.Name("Structs/InlinedFallback/MapStringInt/Deterministic+AllowInvalidUTF8+AllowDuplicateNames"), opts: []Options{Deterministic(true), AllowInvalidUTF8(true), AllowDuplicateNames(true)}, @@ -2295,7 +2296,7 @@ func TestMarshal(t *testing.T) { X: RawValue(`{"dupe":"","dupe":""}`), }, want: `{"dupe":""`, - wantErr: newDuplicateNameError(`"dupe"`), + wantErr: export.NewDuplicateNameError([]byte(`"dupe"`), 0), }, { name: jsontest.Name("Structs/DuplicateName/NoCaseInlineRawValue/Other/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -2315,7 +2316,7 @@ func TestMarshal(t *testing.T) { X: RawValue(`{"Aaa": "", "Aaa": ""}`), }, want: `{"Aaa":""`, - wantErr: newDuplicateNameError(`"Aaa"`), + wantErr: export.NewDuplicateNameError([]byte(`"Aaa"`), 0), }, { name: jsontest.Name("Structs/DuplicateName/NoCaseInlineRawValue/ExactConflict/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -2329,7 +2330,7 @@ func TestMarshal(t *testing.T) { X: RawValue(`{"Aaa": "", "AaA": "", "aaa": ""}`), }, want: `{"Aaa":"","AaA":""`, - wantErr: newDuplicateNameError(`"aaa"`), + wantErr: export.NewDuplicateNameError([]byte(`"aaa"`), 0), }, { name: jsontest.Name("Structs/DuplicateName/NoCaseInlineRawValue/NoCaseConflict/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -2353,7 +2354,7 @@ func TestMarshal(t *testing.T) { X: RawValue(`{"AAA": ""}`), }, want: `{"AAA":"x","AaA":"x"`, - wantErr: newDuplicateNameError(`"AAA"`), + wantErr: export.NewDuplicateNameError([]byte(`"AAA"`), 0), }, { name: jsontest.Name("Structs/DuplicateName/NoCaseInlineRawValue/NoCaseConflictWithField"), in: structNoCaseInlineRawValue{ @@ -2362,7 +2363,7 @@ func TestMarshal(t *testing.T) { X: RawValue(`{"aaa": ""}`), }, want: `{"AAA":"x","AaA":"x"`, - wantErr: newDuplicateNameError(`"aaa"`), + wantErr: export.NewDuplicateNameError([]byte(`"aaa"`), 0), }, { name: jsontest.Name("Structs/DuplicateName/NoCaseInlineMapStringAny/ExactDifferent"), in: structNoCaseInlineMapStringAny{ @@ -2386,7 +2387,7 @@ func TestMarshal(t *testing.T) { X: jsonObject{"AAA": ""}, }, want: `{"AAA":"x","AaA":"x"`, - wantErr: newDuplicateNameError(`"AAA"`), + wantErr: export.NewDuplicateNameError([]byte(`"AAA"`), 0), }, { name: jsontest.Name("Structs/DuplicateName/NoCaseInlineMapStringAny/NoCaseConflictWithField"), in: structNoCaseInlineMapStringAny{ @@ -2395,7 +2396,7 @@ func TestMarshal(t *testing.T) { X: jsonObject{"aaa": ""}, }, want: `{"AAA":"x","AaA":"x"`, - wantErr: newDuplicateNameError(`"aaa"`), + wantErr: export.NewDuplicateNameError([]byte(`"aaa"`), 0), }, { name: jsontest.Name("Structs/Invalid/Conflicting"), in: structConflicting{}, @@ -2680,7 +2681,7 @@ func TestMarshal(t *testing.T) { opts: []Options{Deterministic(true), AllowInvalidUTF8(true), AllowDuplicateNames(false)}, in: struct{ X any }{map[string]any{"\xff": "", "\xfe": ""}}, want: `{"X":{"�":""`, - wantErr: newDuplicateNameError(`"�"`).withOffset(len64(`{"X":{"�":"",`)), + wantErr: export.NewDuplicateNameError([]byte(`"�"`), len64(`{"X":{"�":"",`)), }, { name: jsontest.Name("Interfaces/Any/Maps/Deterministic+AllowInvalidUTF8+AllowDuplicateNames"), opts: []Options{Deterministic(true), AllowInvalidUTF8(true), AllowDuplicateNames(true)}, @@ -2690,13 +2691,13 @@ func TestMarshal(t *testing.T) { name: jsontest.Name("Interfaces/Any/Maps/RejectInvalidUTF8"), in: struct{ X any }{map[string]any{"\xff": "", "\xfe": ""}}, want: `{"X":{`, - wantErr: errInvalidUTF8.withOffset(len64(`{"X":{`)), + wantErr: export.NewInvalidUTF8Error(len64(`{"X":{`)), }, { name: jsontest.Name("Interfaces/Any/Maps/AllowInvalidUTF8+RejectDuplicateNames"), opts: []Options{AllowInvalidUTF8(true)}, in: struct{ X any }{map[string]any{"\xff": "", "\xfe": ""}}, want: `{"X":{"�":""`, - wantErr: newDuplicateNameError(`"�"`).withOffset(len64(`{"X":{"�":"",`)), + wantErr: export.NewDuplicateNameError([]byte(`"�"`), len64(`{"X":{"�":"",`)), }, { name: jsontest.Name("Interfaces/Any/Maps/AllowInvalidUTF8+AllowDuplicateNames"), opts: []Options{AllowInvalidUTF8(true), AllowDuplicateNames(true)}, @@ -2856,7 +2857,7 @@ func TestMarshal(t *testing.T) { in: marshalJSONv1Func(func() ([]byte, error) { return []byte("invalid"), nil }), - wantErr: &SemanticError{action: "marshal", JSONKind: 'i', GoType: marshalJSONv1FuncType, Err: newInvalidCharacterError("i", "at start of value")}, + wantErr: &SemanticError{action: "marshal", JSONKind: 'i', GoType: marshalJSONv1FuncType, Err: export.NewInvalidCharacterError("i", "at start of value", 0)}, }, { name: jsontest.Name("Methods/Invalid/JSONv1/SkipFunc"), in: marshalJSONv1Func(func() ([]byte, error) { @@ -2874,7 +2875,7 @@ func TestMarshal(t *testing.T) { in: marshalTextFunc(func() ([]byte, error) { return []byte("\xde\xad\xbe\xef"), nil }), - wantErr: &SemanticError{action: "marshal", JSONKind: '"', GoType: marshalTextFuncType, Err: errInvalidUTF8}, + wantErr: &SemanticError{action: "marshal", JSONKind: '"', GoType: marshalTextFuncType, Err: export.NewInvalidUTF8Error(0)}, }, { name: jsontest.Name("Methods/Invalid/Text/SkipFunc"), in: marshalTextFunc(func() ([]byte, error) { @@ -2889,7 +2890,7 @@ func TestMarshal(t *testing.T) { })): "invalid", }, want: `{`, - wantErr: &SemanticError{action: "marshal", GoType: marshalJSONv2FuncType, Err: errMissingName.withOffset(len64(`{`))}, + wantErr: &SemanticError{action: "marshal", GoType: marshalJSONv2FuncType, Err: export.NewMissingNameError(len64(`{`))}, }, { name: jsontest.Name("Methods/Invalid/MapKey/JSONv1/Syntax"), in: map[any]string{ @@ -2898,7 +2899,7 @@ func TestMarshal(t *testing.T) { })): "invalid", }, want: `{`, - wantErr: &SemanticError{action: "marshal", JSONKind: 'n', GoType: marshalJSONv1FuncType, Err: errMissingName.withOffset(len64(`{`))}, + wantErr: &SemanticError{action: "marshal", JSONKind: 'n', GoType: marshalJSONv1FuncType, Err: export.NewMissingNameError(len64(`{`))}, }, { name: jsontest.Name("Functions/Bool/V1"), opts: []Options{ @@ -3018,7 +3019,7 @@ func TestMarshal(t *testing.T) { })), }, in: true, - wantErr: &SemanticError{action: "marshal", JSONKind: 'i', GoType: boolType, Err: newInvalidCharacterError("i", "at start of value")}, + wantErr: &SemanticError{action: "marshal", JSONKind: 'i', GoType: boolType, Err: export.NewInvalidCharacterError("i", "at start of value", 0)}, }, { name: jsontest.Name("Functions/Bool/V2/DirectError"), opts: []Options{ @@ -3116,7 +3117,7 @@ func TestMarshal(t *testing.T) { }, in: map[nocaseString]string{"hello": "world"}, want: `{`, - wantErr: &SemanticError{action: "marshal", JSONKind: 'n', GoType: nocaseStringType, Err: errMissingName.withOffset(len64(`{`))}, + wantErr: &SemanticError{action: "marshal", JSONKind: 'n', GoType: nocaseStringType, Err: export.NewMissingNameError(len64(`{`))}, }, { name: jsontest.Name("Functions/Map/Key/NoCaseString/V2/InvalidKind"), opts: []Options{ @@ -3126,7 +3127,7 @@ func TestMarshal(t *testing.T) { }, in: map[nocaseString]string{"hello": "world"}, want: `{`, - wantErr: &SemanticError{action: "marshal", JSONKind: 'n', GoType: nocaseStringType, Err: errMissingName.withOffset(len64(`{`))}, + wantErr: &SemanticError{action: "marshal", JSONKind: 'n', GoType: nocaseStringType, Err: export.NewMissingNameError(len64(`{`))}, }, { name: jsontest.Name("Functions/Map/Key/String/V1/DuplicateName"), opts: []Options{ @@ -3136,7 +3137,7 @@ func TestMarshal(t *testing.T) { }, in: map[string]string{"name1": "value", "name2": "value"}, want: `{"name":"name"`, - wantErr: &SemanticError{action: "marshal", JSONKind: '"', GoType: stringType, Err: newDuplicateNameError(`"name"`).withOffset(len64(`{"name":"name",`))}, + wantErr: &SemanticError{action: "marshal", JSONKind: '"', GoType: stringType, Err: export.NewDuplicateNameError([]byte(`"name"`), len64(`{"name":"name",`))}, }, { name: jsontest.Name("Functions/Map/Key/NoCaseString/V2"), opts: []Options{ @@ -3175,7 +3176,7 @@ func TestMarshal(t *testing.T) { }, in: map[nocaseString]string{"hello": "world"}, want: `{`, - wantErr: &SemanticError{action: "marshal", GoType: nocaseStringType, Err: errMissingName.withOffset(len64(`{`))}, + wantErr: &SemanticError{action: "marshal", GoType: nocaseStringType, Err: export.NewMissingNameError(len64(`{`))}, }, { name: jsontest.Name("Functions/Map/Key/NoCaseString/V2/InvalidValue"), opts: []Options{ @@ -3185,7 +3186,7 @@ func TestMarshal(t *testing.T) { }, in: map[nocaseString]string{"hello": "world"}, want: `{`, - wantErr: &SemanticError{action: "marshal", GoType: nocaseStringType, Err: errMissingName.withOffset(len64(`{`))}, + wantErr: &SemanticError{action: "marshal", GoType: nocaseStringType, Err: export.NewMissingNameError(len64(`{`))}, }, { name: jsontest.Name("Functions/Map/Value/NoCaseString/V1"), opts: []Options{ @@ -3440,7 +3441,8 @@ func TestMarshal(t *testing.T) { } makeValueChecker := func(name string, want []PV) func(e *Encoder, v any) error { checkNext := func(e *Encoder, v any) error { - p := P{len(e.tokens.stack), e.tokens.last.length()} + xe := export.Encoder(e) + p := P{len(xe.Tokens.Stack), xe.Tokens.Last.Length()} rv := reflect.ValueOf(v) pv := PV{p, v} switch { @@ -3465,7 +3467,8 @@ func TestMarshal(t *testing.T) { } makePositionChecker := func(name string, want []P) func(e *Encoder, v any) error { checkNext := func(e *Encoder, v any) error { - p := P{len(e.tokens.stack), e.tokens.last.length()} + xe := export.Encoder(e) + p := P{len(xe.Tokens.Stack), xe.Tokens.Last.Length()} switch { case len(want) == 0: return fmt.Errorf("%s: %v: got more values than wanted", name, p) @@ -3922,7 +3925,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `falsetrue`, inVal: addr(true), want: addr(false), - wantErr: newInvalidCharacterError("t", "after top-level value").withOffset(len64(`false`)), + wantErr: export.NewInvalidCharacterError("t", "after top-level value", len64(`false`)), }, { name: jsontest.Name("Bools/Null"), inBuf: `null`, @@ -4838,7 +4841,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"0":1,"-0":-1}`, inVal: new(map[int]int), want: addr(map[int]int{0: 1}), - wantErr: newDuplicateNameError(`"-0"`).withOffset(len64(`{"0":1,`)), + wantErr: export.NewDuplicateNameError([]byte(`"-0"`), len64(`{"0":1,`)), }, { name: jsontest.Name("Maps/DuplicateName/Int/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -4855,7 +4858,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"1.0":"1.0","1":"1","1e0":"1e0"}`, inVal: new(map[float64]string), want: addr(map[float64]string{1: "1.0"}), - wantErr: newDuplicateNameError(`"1"`).withOffset(len64(`{"1.0":"1.0",`)), + wantErr: export.NewDuplicateNameError([]byte(`"1"`), len64(`{"1.0":"1.0",`)), }, { name: jsontest.Name("Maps/DuplicateName/Float/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -4872,7 +4875,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"hello":"hello","HELLO":"HELLO"}`, inVal: new(map[nocaseString]string), want: addr(map[nocaseString]string{"hello": "hello"}), - wantErr: newDuplicateNameError(`"HELLO"`).withOffset(len64(`{"hello":"hello",`)), + wantErr: export.NewDuplicateNameError([]byte(`"HELLO"`), len64(`{"hello":"hello",`)), }, { name: jsontest.Name("Maps/DuplicateName/NoCaseString/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -5722,7 +5725,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"A":1,"fizz":nil,"B":2}`, inVal: new(structInlineRawValue), want: addr(structInlineRawValue{A: 1, X: RawValue(`{"fizz":`)}), - wantErr: newInvalidCharacterError("i", "within literal null (expecting 'u')").withOffset(len64(`{"A":1,"fizz":n`)), + wantErr: export.NewInvalidCharacterError("i", "within literal null (expecting 'u')", len64(`{"A":1,"fizz":n`)), }, { name: jsontest.Name("Structs/InlinedFallback/RawValue/CaseSensitive"), inBuf: `{"A":1,"fizz":"buzz","B":2,"a":3}`, @@ -5734,7 +5737,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"A":1,"fizz":"buzz","B":2,"fizz":"buzz"}`, inVal: new(structInlineRawValue), want: addr(structInlineRawValue{A: 1, X: RawValue(`{"fizz":"buzz"}`), B: 2}), - wantErr: newDuplicateNameError(`"fizz"`).withOffset(len64(`{"A":1,"fizz":"buzz","B":2,`)), + wantErr: export.NewDuplicateNameError([]byte(`"fizz"`), len64(`{"A":1,"fizz":"buzz","B":2,`)), }, { name: jsontest.Name("Structs/InlinedFallback/RawValue/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -5832,13 +5835,13 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"A":1,"fizz":nil,"B":2}`, inVal: new(structInlineMapStringAny), want: addr(structInlineMapStringAny{A: 1, X: jsonObject{"fizz": nil}}), - wantErr: newInvalidCharacterError("i", "within literal null (expecting 'u')").withOffset(len64(`{"A":1,"fizz":n`)), + wantErr: export.NewInvalidCharacterError("i", "within literal null (expecting 'u')", len64(`{"A":1,"fizz":n`)), }, { name: jsontest.Name("Structs/InlinedFallback/MapStringAny/MergeInvalidValue/Existing"), inBuf: `{"A":1,"fizz":nil,"B":2}`, inVal: addr(structInlineMapStringAny{A: 1, X: jsonObject{"fizz": true}}), want: addr(structInlineMapStringAny{A: 1, X: jsonObject{"fizz": true}}), - wantErr: newInvalidCharacterError("i", "within literal null (expecting 'u')").withOffset(len64(`{"A":1,"fizz":n`)), + wantErr: export.NewInvalidCharacterError("i", "within literal null (expecting 'u')", len64(`{"A":1,"fizz":n`)), }, { name: jsontest.Name("Structs/InlinedFallback/MapStringAny/CaseSensitive"), inBuf: `{"A":1,"fizz":"buzz","B":2,"a":3}`, @@ -5850,7 +5853,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"A":1,"fizz":"buzz","B":2,"fizz":"buzz"}`, inVal: new(structInlineMapStringAny), want: addr(structInlineMapStringAny{A: 1, X: jsonObject{"fizz": "buzz"}, B: 2}), - wantErr: newDuplicateNameError(`"fizz"`).withOffset(len64(`{"A":1,"fizz":"buzz","B":2,`)), + wantErr: export.NewDuplicateNameError([]byte(`"fizz"`), len64(`{"A":1,"fizz":"buzz","B":2,`)), }, { name: jsontest.Name("Structs/InlinedFallback/MapStringAny/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -6042,7 +6045,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"AaA":"AaA","aaa":"aaa"}`, inVal: new(structNoCase), want: addr(structNoCase{AaA: "AaA"}), - wantErr: newDuplicateNameError(`"aaa"`).withOffset(len64(`{"AaA":"AaA",`)), + wantErr: export.NewDuplicateNameError([]byte(`"aaa"`), len64(`{"AaA":"AaA",`)), }, { name: jsontest.Name("Structs/CaseSensitive"), inBuf: `{"BOOL": true, "STRING": "hello", "BYTES": "AQID", "INT": -64, "UINT": 64, "FLOAT": 3.14159}`, @@ -6058,7 +6061,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"AAA":"AAA","AAA":"AAA"}`, inVal: addr(structNoCaseInlineRawValue{}), want: addr(structNoCaseInlineRawValue{AAA: "AAA"}), - wantErr: newDuplicateNameError(`"AAA"`).withOffset(len64(`{"AAA":"AAA",`)), + wantErr: export.NewDuplicateNameError([]byte(`"AAA"`), len64(`{"AAA":"AAA",`)), }, { name: jsontest.Name("Structs/DuplicateName/NoCase/OverwriteExact"), inBuf: `{"AAA":"after"}`, @@ -6069,13 +6072,13 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"aaa":"aaa","aaA":"aaA"}`, inVal: addr(structNoCaseInlineRawValue{}), want: addr(structNoCaseInlineRawValue{AaA: "aaa"}), - wantErr: newDuplicateNameError(`"aaA"`).withOffset(len64(`{"aaa":"aaa",`)), + wantErr: export.NewDuplicateNameError([]byte(`"aaA"`), len64(`{"aaa":"aaa",`)), }, { name: jsontest.Name("Structs/DuplicateName/NoCase/OverwriteNoCase"), inBuf: `{"aaa":"aaa","aaA":"aaA"}`, inVal: addr(structNoCaseInlineRawValue{}), want: addr(structNoCaseInlineRawValue{AaA: "aaa"}), - wantErr: newDuplicateNameError(`"aaA"`).withOffset(len64(`{"aaa":"aaa",`)), + wantErr: export.NewDuplicateNameError([]byte(`"aaA"`), len64(`{"aaa":"aaa",`)), }, { name: jsontest.Name("Structs/DuplicateName/Inline/Unknown"), inBuf: `{"unknown":""}`, @@ -6096,7 +6099,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"b":"","b":""}`, inVal: addr(structNoCaseInlineRawValue{}), want: addr(structNoCaseInlineRawValue{X: RawValue(`{"b":""}`)}), - wantErr: newDuplicateNameError(`"b"`).withOffset(len64(`{"b":"",`)), + wantErr: export.NewDuplicateNameError([]byte(`"b"`), len64(`{"b":"",`)), }, { name: jsontest.Name("Structs/Invalid/ErrUnexpectedEOF"), inBuf: ``, @@ -6467,7 +6470,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `]`, inVal: new(any), want: new(any), - wantErr: newInvalidCharacterError("]", "at start of value"), + wantErr: export.NewInvalidCharacterError("]", "at start of value", 0), }, { // NOTE: The semantics differs from v1, // where existing map entries were not merged into. @@ -6606,7 +6609,7 @@ func TestUnmarshal(t *testing.T) { inBuf: `{"X":{"fizz":"buzz","fizz":true}}`, inVal: new(struct{ X any }), want: addr(struct{ X any }{map[string]any{"fizz": "buzz"}}), - wantErr: newDuplicateNameError(`"fizz"`).withOffset(len64(`{"X":{"fizz":"buzz",`)), + wantErr: export.NewDuplicateNameError([]byte(`"fizz"`), len64(`{"X":{"fizz":"buzz",`)), }, { name: jsontest.Name("Interfaces/Any/Maps/AllowDuplicateNames"), opts: []Options{AllowDuplicateNames(true)}, @@ -7055,14 +7058,15 @@ func TestUnmarshal(t *testing.T) { if _, err := dec.ReadValue(); err != nil { return err } - *v = fmt.Sprintf("%d-%d", len(dec.tokens.stack), dec.tokens.last.length()) + xd := export.Decoder(dec) + *v = fmt.Sprintf("%d-%d", len(xd.Tokens.Stack), xd.Tokens.Last.Length()) return nil })), }, inBuf: `{"name":"value","name":"value"}`, inVal: addr(map[string]string{}), want: addr(map[string]string{"1-1": "1-2"}), - wantErr: &SemanticError{action: "unmarshal", GoType: reflect.PointerTo(stringType), Err: newDuplicateNameError(`"name"`).withOffset(len64(`{"name":"value",`))}, + wantErr: &SemanticError{action: "unmarshal", GoType: reflect.PointerTo(stringType), Err: export.NewDuplicateNameError([]byte(`"name"`), len64(`{"name":"value",`))}, }, { name: jsontest.Name("Functions/Map/Value/NoCaseString/V1"), opts: []Options{ @@ -7334,7 +7338,8 @@ func TestUnmarshal(t *testing.T) { } makeValueChecker := func(name string, want []PV) func(d *Decoder, v any) error { checkNext := func(d *Decoder, v any) error { - p := P{len(d.tokens.stack), d.tokens.last.length()} + xd := export.Decoder(d) + p := P{len(xd.Tokens.Stack), xd.Tokens.Last.Length()} rv := reflect.ValueOf(v) pv := PV{p, v} switch { @@ -7359,7 +7364,8 @@ func TestUnmarshal(t *testing.T) { } makePositionChecker := func(name string, want []P) func(d *Decoder, v any) error { checkNext := func(d *Decoder, v any) error { - p := P{len(d.tokens.stack), d.tokens.last.length()} + xd := export.Decoder(d) + p := P{len(xd.Tokens.Stack), xd.Tokens.Last.Length()} switch { case len(want) == 0: return fmt.Errorf("%s: %v: got more values than wanted", name, p) @@ -7738,7 +7744,7 @@ func TestUnmarshal(t *testing.T) { want: addr(struct { D time.Duration }{1}), - wantErr: newInvalidCharacterError("x", "at start of value").withOffset(len64(`{"D":`)), + wantErr: export.NewInvalidCharacterError("x", "at start of value", len64(`{"D":`)), }, { name: jsontest.Name("Duration/Format/Invalid"), inBuf: `{"D":"0s"}`, @@ -7904,7 +7910,7 @@ func TestUnmarshal(t *testing.T) { inVal: new(struct { T time.Time }), - wantErr: newInvalidCharacterError("x", "at start of value").withOffset(len64(`{"D":`)), + wantErr: export.NewInvalidCharacterError("x", "at start of value", len64(`{"D":`)), }, { name: jsontest.Name("Time/IgnoreInvalidFormat"), opts: []Options{invalidFormatOption}, @@ -8025,3 +8031,115 @@ func TestUnmarshalReuse(t *testing.T) { } }) } + +type ReaderFunc func([]byte) (int, error) + +func (f ReaderFunc) Read(b []byte) (int, error) { return f(b) } + +type WriterFunc func([]byte) (int, error) + +func (f WriterFunc) Write(b []byte) (int, error) { return f(b) } + +func TestCoderBufferGrowth(t *testing.T) { + // The growth rate of the internal buffer should be exponential, + // but should not grow unbounded. + checkGrowth := func(ns []int) { + t.Helper() + var sumBytes, sumRates, numGrows float64 + prev := ns[0] + for i := 1; i < len(ns)-1; i++ { + n := ns[i] + if n != prev { + sumRates += float64(n) / float64(prev) + numGrows++ + prev = n + } + if n > 1<<20 { + t.Fatalf("single Read/Write too large: %d", n) + } + sumBytes += float64(n) + } + if mean := sumBytes / float64(len(ns)); mean < 1<<10 { + t.Fatalf("average Read/Write too small: %0.1f", mean) + } + switch mean := sumRates / numGrows; { + case mean < 1.25: + t.Fatalf("average growth rate too slow: %0.3f", mean) + case mean > 2.00: + t.Fatalf("average growth rate too fast: %0.3f", mean) + } + } + + bb := &bytesBuffer{new(bytes.Buffer)} + + var writeSizes []int + if err := MarshalWrite(WriterFunc(func(b []byte) (int, error) { + n, err := bb.Write(b) + writeSizes = append(writeSizes, n) + return n, err + }), make([]struct{}, 1e6)); err != nil { + t.Fatalf("MarshalWrite error: %v", err) + } + checkGrowth(writeSizes) + + var readSizes []int + if err := UnmarshalRead(ReaderFunc(func(b []byte) (int, error) { + n, err := bb.Read(b) + readSizes = append(readSizes, n) + return n, err + }), new([]struct{})); err != nil { + t.Fatalf("UnmarshalRead error: %v", err) + } + checkGrowth(readSizes) +} + +func TestUintSet(t *testing.T) { + type operation any // has | insert + type has struct { + in uint + want bool + } + type insert struct { + in uint + want bool + } + + // Sequence of operations to perform (order matters). + ops := []operation{ + has{0, false}, + has{63, false}, + has{64, false}, + has{1234, false}, + insert{3, true}, + has{2, false}, + has{3, true}, + has{4, false}, + has{63, false}, + insert{3, false}, + insert{63, true}, + has{63, true}, + insert{64, true}, + insert{64, false}, + has{64, true}, + insert{3264, true}, + has{3264, true}, + insert{3, false}, + has{3, true}, + } + + var us uintSet + for i, op := range ops { + switch op := op.(type) { + case has: + if got := us.has(op.in); got != op.want { + t.Fatalf("%d: uintSet.has(%v) = %v, want %v", i, op.in, got, op.want) + } + case insert: + if got := us.insert(op.in); got != op.want { + t.Fatalf("%d: uintSet.insert(%v) = %v, want %v", i, op.in, got, op.want) + } + default: + panic(fmt.Sprintf("unknown operation: %T", op)) + } + } +} diff --git a/arshal_time.go b/arshal_time.go index b0338e2..58887d7 100644 --- a/arshal_time.go +++ b/arshal_time.go @@ -12,6 +12,7 @@ import ( "time" "github.com/go-json-experiment/json/internal/jsonopts" + "github.com/go-json-experiment/json/internal/jsonwire" ) var ( @@ -33,7 +34,8 @@ func makeTimeArshaler(fncs *arshaler, t reflect.Type) *arshaler { fncs.nonDefault = true marshalNanos := fncs.marshal fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + xe := export.Encoder(enc) + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { if mo.Format == "nanos" { mo.Format = "" return marshalNanos(enc, va, mo) @@ -53,7 +55,8 @@ func makeTimeArshaler(fncs *arshaler, t reflect.Type) *arshaler { fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { // TODO: Should there be a flag that specifies that we can unmarshal // from either form since there would be no ambiguity? - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + xd := export.Decoder(dec) + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { if uo.Format == "nanos" { uo.Format = "" return unmarshalNanos(dec, va, uo) @@ -62,9 +65,9 @@ func makeTimeArshaler(fncs *arshaler, t reflect.Type) *arshaler { } } - var flags valueFlags + var flags jsonwire.ValueFlags td := va.Addr().Interface().(*time.Duration) - val, err := dec.readValue(&flags) + val, err := xd.ReadValue(&flags) if err != nil { return err } @@ -73,7 +76,7 @@ func makeTimeArshaler(fncs *arshaler, t reflect.Type) *arshaler { *td = time.Duration(0) return nil case '"': - val = unescapeStringMayCopy(val, flags.isVerbatim()) + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) td2, err := time.ParseDuration(string(val)) if err != nil { return &SemanticError{action: "unmarshal", JSONKind: k, GoType: t, Err: err} @@ -87,9 +90,10 @@ func makeTimeArshaler(fncs *arshaler, t reflect.Type) *arshaler { case timeTimeType: fncs.nonDefault = true fncs.marshal = func(enc *Encoder, va addressableValue, mo *jsonopts.Struct) error { + xe := export.Encoder(enc) format := time.RFC3339Nano isRFC3339 := true - if mo.Format != "" && mo.FormatDepth == enc.tokens.depth() { + if mo.Format != "" && mo.FormatDepth == xe.Tokens.Depth() { var err error format, isRFC3339, err = checkTimeFormat(mo.Format) if err != nil { @@ -125,15 +129,16 @@ func makeTimeArshaler(fncs *arshaler, t reflect.Type) *arshaler { // The format may contain special characters that need escaping. // Verify that the result is a valid JSON string (common case), // otherwise escape the string correctly (slower case). - if consumeSimpleString(b) != len(b) { - b, _ = appendString(nil, string(b[len(`"`):len(b)-len(`"`)]), true, nil) + if jsonwire.ConsumeSimpleString(b) != len(b) { + b, _ = jsonwire.AppendQuote(nil, b[len(`"`):len(b)-len(`"`)], true, nil) } return enc.WriteValue(b) } fncs.unmarshal = func(dec *Decoder, va addressableValue, uo *jsonopts.Struct) error { + xd := export.Decoder(dec) format := time.RFC3339 isRFC3339 := true - if uo.Format != "" && uo.FormatDepth == dec.tokens.depth() { + if uo.Format != "" && uo.FormatDepth == xd.Tokens.Depth() { var err error format, isRFC3339, err = checkTimeFormat(uo.Format) if err != nil { @@ -141,9 +146,9 @@ func makeTimeArshaler(fncs *arshaler, t reflect.Type) *arshaler { } } - var flags valueFlags + var flags jsonwire.ValueFlags tt := va.Addr().Interface().(*time.Time) - val, err := dec.readValue(&flags) + val, err := xd.ReadValue(&flags) if err != nil { return err } @@ -153,7 +158,7 @@ func makeTimeArshaler(fncs *arshaler, t reflect.Type) *arshaler { *tt = time.Time{} return nil case '"': - val = unescapeStringMayCopy(val, flags.isVerbatim()) + val = jsonwire.UnquoteMayCopy(val, flags.IsVerbatim()) tt2, err := time.Parse(format, string(val)) if isRFC3339 && err == nil { // TODO(https://go.dev/issue/54580): RFC 3339 specifies diff --git a/doc.go b/doc.go index 1187312..462b4c8 100644 --- a/doc.go +++ b/doc.go @@ -2,61 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package json implements serialization of JSON -// as specified in RFC 4627, RFC 7159, RFC 7493, RFC 8259, and RFC 8785. +// Package json implements semantic processing of JSON as specified in RFC 8259. // JSON is a simple data interchange format that can represent // primitive data types such as booleans, strings, and numbers, // in addition to structured data types such as objects and arrays. // -// # Terminology -// -// This package uses the terms "encode" and "decode" for syntactic functionality -// that is concerned with processing JSON based on its grammar, and -// uses the terms "marshal" and "unmarshal" for semantic functionality -// that determines the meaning of JSON values as Go values and vice-versa. -// It aims to provide a clear distinction between functionality that -// is purely concerned with encoding versus that of marshaling. -// For example, one can directly encode a stream of JSON tokens without -// needing to marshal a concrete Go value representing them. -// Similarly, one can decode a stream of JSON tokens without -// needing to unmarshal them into a concrete Go value. -// -// This package uses JSON terminology when discussing JSON, which may differ -// from related concepts in Go or elsewhere in computing literature. -// -// - A JSON "object" refers to an unordered collection of name/value members. -// - A JSON "array" refers to an ordered sequence of elements. -// - A JSON "value" refers to either a literal (i.e., null, false, or true), -// string, number, object, or array. -// -// See RFC 8259 for more information. -// -// # Specifications -// -// Relevant specifications include RFC 4627, RFC 7159, RFC 7493, RFC 8259, -// and RFC 8785. Each RFC is generally a stricter subset of another RFC. -// In increasing order of strictness: -// -// - RFC 4627 and RFC 7159 do not require (but recommend) the use of UTF-8 -// and also do not require (but recommend) that object names be unique. -// - RFC 8259 requires the use of UTF-8, -// but does not require (but recommends) that object names be unique. -// - RFC 7493 requires the use of UTF-8 -// and also requires that object names be unique. -// - RFC 8785 defines a canonical representation. It requires the use of UTF-8 -// and also requires that object names be unique and in a specific ordering. -// It specifies exactly how strings and numbers must be formatted. -// -// The primary difference between RFC 4627 and RFC 7159 is that the former -// restricted top-level values to only JSON objects and arrays, while -// RFC 7159 and subsequent RFCs permit top-level values to additionally be -// JSON nulls, booleans, strings, or numbers. -// -// By default, this package operates on RFC 7493, but can be configured -// to operate according to the other RFC specifications. -// RFC 7493 is a stricter subset of RFC 8259 and fully compliant with it. -// In particular, it makes specific choices about behavior that RFC 8259 -// leaves as undefined in order to ensure greater interoperability. +// The [Marshal] and [Unmarshal] functions are used to encode/decode Go values +// to/from JSON text. // // # JSON Representation of Go structs // diff --git a/encode_test.go b/encode_test.go deleted file mode 100644 index eacac34..0000000 --- a/encode_test.go +++ /dev/null @@ -1,767 +0,0 @@ -// Copyright 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package json - -import ( - "bufio" - "bytes" - "compress/gzip" - "crypto/sha256" - "encoding/binary" - "encoding/hex" - "errors" - "flag" - "io" - "math" - "net/http" - "path" - "reflect" - "strconv" - "strings" - "testing" - "time" - "unicode" - - "github.com/go-json-experiment/json/internal/jsonflags" - "github.com/go-json-experiment/json/internal/jsontest" -) - -// TestEncoder tests whether we can produce JSON with either tokens or raw values. -func TestEncoder(t *testing.T) { - for _, td := range coderTestdata { - for _, formatName := range []string{"Compact", "Escaped", "Indented"} { - for _, typeName := range []string{"Token", "Value", "TokenDelims"} { - t.Run(path.Join(td.name.Name, typeName, formatName), func(t *testing.T) { - testEncoder(t, td.name.Where, formatName, typeName, td) - }) - } - } - } -} -func testEncoder(t *testing.T, where jsontest.CasePos, formatName, typeName string, td coderTestdataEntry) { - var want string - var opts []Options - dst := new(bytes.Buffer) - opts = append(opts, jsonflags.OmitTopLevelNewline|1) - want = td.outCompacted - switch formatName { - case "Escaped": - opts = append(opts, WithEscapeFunc(func(rune) bool { return true })) - if td.outEscaped != "" { - want = td.outEscaped - } - case "Indented": - opts = append(opts, Expand(true)) - opts = append(opts, WithIndentPrefix("\t")) - opts = append(opts, WithIndent(" ")) - if td.outIndented != "" { - want = td.outIndented - } - } - enc := NewEncoder(dst, opts...) - - switch typeName { - case "Token": - var pointers []string - for _, tok := range td.tokens { - if err := enc.WriteToken(tok); err != nil { - t.Fatalf("%s: Encoder.WriteToken error: %v", where, err) - } - if td.pointers != nil { - pointers = append(pointers, enc.StackPointer()) - } - } - if !reflect.DeepEqual(pointers, td.pointers) { - t.Fatalf("%s: pointers mismatch:\ngot %q\nwant %q", where, pointers, td.pointers) - } - case "Value": - if err := enc.WriteValue(RawValue(td.in)); err != nil { - t.Fatalf("%s: Encoder.WriteValue error: %v", where, err) - } - case "TokenDelims": - // Use WriteToken for object/array delimiters, WriteValue otherwise. - for _, tok := range td.tokens { - switch tok.Kind() { - case '{', '}', '[', ']': - if err := enc.WriteToken(tok); err != nil { - t.Fatalf("%s: Encoder.WriteToken error: %v", where, err) - } - default: - val := RawValue(tok.String()) - if tok.Kind() == '"' { - val, _ = appendString(nil, tok.String(), false, nil) - } - if err := enc.WriteValue(val); err != nil { - t.Fatalf("%s: Encoder.WriteValue error: %v", where, err) - } - } - } - } - - got := dst.String() - if got != want { - t.Errorf("%s: output mismatch:\ngot %q\nwant %q", where, got, want) - } -} - -// TestFaultyEncoder tests that temporary I/O errors are not fatal. -func TestFaultyEncoder(t *testing.T) { - for _, td := range coderTestdata { - for _, typeName := range []string{"Token", "Value"} { - t.Run(path.Join(td.name.Name, typeName), func(t *testing.T) { - testFaultyEncoder(t, td.name.Where, typeName, td) - }) - } - } -} -func testFaultyEncoder(t *testing.T, where jsontest.CasePos, typeName string, td coderTestdataEntry) { - b := &FaultyBuffer{ - MaxBytes: 1, - MayError: io.ErrShortWrite, - } - - // Write all the tokens. - // Even if the underlying io.Writer may be faulty, - // writing a valid token or value is guaranteed to at least - // be appended to the internal buffer. - // In other words, syntactic errors occur before I/O errors. - enc := NewEncoder(b) - switch typeName { - case "Token": - for i, tok := range td.tokens { - err := enc.WriteToken(tok) - if err != nil && !errors.Is(err, io.ErrShortWrite) { - t.Fatalf("%s: %d: Encoder.WriteToken error: %v", where, i, err) - } - } - case "Value": - err := enc.WriteValue(RawValue(td.in)) - if err != nil && !errors.Is(err, io.ErrShortWrite) { - t.Fatalf("%s: Encoder.WriteValue error: %v", where, err) - } - } - gotOutput := string(append(b.B, enc.unflushedBuffer()...)) - wantOutput := td.outCompacted + "\n" - if gotOutput != wantOutput { - t.Fatalf("%s: output mismatch:\ngot %s\nwant %s", where, gotOutput, wantOutput) - } -} - -type encoderMethodCall struct { - in tokOrVal - wantErr error - wantPointer string -} - -var encoderErrorTestdata = []struct { - name jsontest.CaseName - opts []Options - calls []encoderMethodCall - wantOut string -}{{ - name: jsontest.Name("InvalidToken"), - calls: []encoderMethodCall{ - {zeroToken, &SyntacticError{str: "invalid json.Token"}, ""}, - }, -}, { - name: jsontest.Name("InvalidValue"), - calls: []encoderMethodCall{ - {RawValue(`#`), newInvalidCharacterError("#", "at start of value"), ""}, - }, -}, { - name: jsontest.Name("InvalidValue/DoubleZero"), - calls: []encoderMethodCall{ - {RawValue(`00`), newInvalidCharacterError("0", "after top-level value").withOffset(len64(`0`)), ""}, - }, -}, { - name: jsontest.Name("TruncatedValue"), - calls: []encoderMethodCall{ - {zeroValue, io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("TruncatedNull"), - calls: []encoderMethodCall{ - {RawValue(`nul`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("InvalidNull"), - calls: []encoderMethodCall{ - {RawValue(`nulL`), newInvalidCharacterError("L", "within literal null (expecting 'l')").withOffset(len64(`nul`)), ""}, - }, -}, { - name: jsontest.Name("TruncatedFalse"), - calls: []encoderMethodCall{ - {RawValue(`fals`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("InvalidFalse"), - calls: []encoderMethodCall{ - {RawValue(`falsE`), newInvalidCharacterError("E", "within literal false (expecting 'e')").withOffset(len64(`fals`)), ""}, - }, -}, { - name: jsontest.Name("TruncatedTrue"), - calls: []encoderMethodCall{ - {RawValue(`tru`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("InvalidTrue"), - calls: []encoderMethodCall{ - {RawValue(`truE`), newInvalidCharacterError("E", "within literal true (expecting 'e')").withOffset(len64(`tru`)), ""}, - }, -}, { - name: jsontest.Name("TruncatedString"), - calls: []encoderMethodCall{ - {RawValue(`"star`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("InvalidString"), - calls: []encoderMethodCall{ - {RawValue(`"ok` + "\x00"), newInvalidCharacterError("\x00", `within string (expecting non-control character)`).withOffset(len64(`"ok`)), ""}, - }, -}, { - name: jsontest.Name("ValidString/AllowInvalidUTF8/Token"), - opts: []Options{AllowInvalidUTF8(true)}, - calls: []encoderMethodCall{ - {String("living\xde\xad\xbe\xef"), nil, ""}, - }, - wantOut: "\"living\xde\xad\ufffd\ufffd\"\n", -}, { - name: jsontest.Name("ValidString/AllowInvalidUTF8/Value"), - opts: []Options{AllowInvalidUTF8(true)}, - calls: []encoderMethodCall{ - {RawValue("\"living\xde\xad\xbe\xef\""), nil, ""}, - }, - wantOut: "\"living\xde\xad\ufffd\ufffd\"\n", -}, { - name: jsontest.Name("InvalidString/RejectInvalidUTF8"), - opts: []Options{AllowInvalidUTF8(false)}, - calls: []encoderMethodCall{ - {String("living\xde\xad\xbe\xef"), errInvalidUTF8, ""}, - {RawValue("\"living\xde\xad\xbe\xef\""), errInvalidUTF8.withOffset(len64("\"living\xde\xad")), ""}, - }, -}, { - name: jsontest.Name("TruncatedNumber"), - calls: []encoderMethodCall{ - {RawValue(`0.`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("InvalidNumber"), - calls: []encoderMethodCall{ - {RawValue(`0.e`), newInvalidCharacterError("e", "within number (expecting digit)").withOffset(len64(`0.`)), ""}, - }, -}, { - name: jsontest.Name("TruncatedObject/AfterStart"), - calls: []encoderMethodCall{ - {RawValue(`{`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("TruncatedObject/AfterName"), - calls: []encoderMethodCall{ - {RawValue(`{"0"`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("TruncatedObject/AfterColon"), - calls: []encoderMethodCall{ - {RawValue(`{"0":`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("TruncatedObject/AfterValue"), - calls: []encoderMethodCall{ - {RawValue(`{"0":0`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("TruncatedObject/AfterComma"), - calls: []encoderMethodCall{ - {RawValue(`{"0":0,`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("InvalidObject/MissingColon"), - calls: []encoderMethodCall{ - {RawValue(` { "fizz" "buzz" } `), newInvalidCharacterError("\"", "after object name (expecting ':')").withOffset(len64(` { "fizz" `)), ""}, - {RawValue(` { "fizz" , "buzz" } `), newInvalidCharacterError(",", "after object name (expecting ':')").withOffset(len64(` { "fizz" `)), ""}, - }, -}, { - name: jsontest.Name("InvalidObject/MissingComma"), - calls: []encoderMethodCall{ - {RawValue(` { "fizz" : "buzz" "gazz" } `), newInvalidCharacterError("\"", "after object value (expecting ',' or '}')").withOffset(len64(` { "fizz" : "buzz" `)), ""}, - {RawValue(` { "fizz" : "buzz" : "gazz" } `), newInvalidCharacterError(":", "after object value (expecting ',' or '}')").withOffset(len64(` { "fizz" : "buzz" `)), ""}, - }, -}, { - name: jsontest.Name("InvalidObject/ExtraComma"), - calls: []encoderMethodCall{ - {RawValue(` { , } `), newInvalidCharacterError(",", `at start of string (expecting '"')`).withOffset(len64(` { `)), ""}, - {RawValue(` { "fizz" : "buzz" , } `), newInvalidCharacterError("}", `at start of string (expecting '"')`).withOffset(len64(` { "fizz" : "buzz" , `)), ""}, - }, -}, { - name: jsontest.Name("InvalidObject/InvalidName"), - calls: []encoderMethodCall{ - {RawValue(`{ null }`), newInvalidCharacterError("n", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, - {RawValue(`{ false }`), newInvalidCharacterError("f", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, - {RawValue(`{ true }`), newInvalidCharacterError("t", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, - {RawValue(`{ 0 }`), newInvalidCharacterError("0", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, - {RawValue(`{ {} }`), newInvalidCharacterError("{", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, - {RawValue(`{ [] }`), newInvalidCharacterError("[", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, - {ObjectStart, nil, ""}, - {Null, errMissingName.withOffset(len64(`{`)), ""}, - {RawValue(`null`), errMissingName.withOffset(len64(`{`)), ""}, - {False, errMissingName.withOffset(len64(`{`)), ""}, - {RawValue(`false`), errMissingName.withOffset(len64(`{`)), ""}, - {True, errMissingName.withOffset(len64(`{`)), ""}, - {RawValue(`true`), errMissingName.withOffset(len64(`{`)), ""}, - {Uint(0), errMissingName.withOffset(len64(`{`)), ""}, - {RawValue(`0`), errMissingName.withOffset(len64(`{`)), ""}, - {ObjectStart, errMissingName.withOffset(len64(`{`)), ""}, - {RawValue(`{}`), errMissingName.withOffset(len64(`{`)), ""}, - {ArrayStart, errMissingName.withOffset(len64(`{`)), ""}, - {RawValue(`[]`), errMissingName.withOffset(len64(`{`)), ""}, - {ObjectEnd, nil, ""}, - }, - wantOut: "{}\n", -}, { - name: jsontest.Name("InvalidObject/InvalidValue"), - calls: []encoderMethodCall{ - {RawValue(`{ "0": x }`), newInvalidCharacterError("x", `at start of value`).withOffset(len64(`{ "0": `)), ""}, - }, -}, { - name: jsontest.Name("InvalidObject/MismatchingDelim"), - calls: []encoderMethodCall{ - {RawValue(` { ] `), newInvalidCharacterError("]", `at start of string (expecting '"')`).withOffset(len64(` { `)), ""}, - {RawValue(` { "0":0 ] `), newInvalidCharacterError("]", `after object value (expecting ',' or '}')`).withOffset(len64(` { "0":0 `)), ""}, - {ObjectStart, nil, ""}, - {ArrayEnd, errMismatchDelim.withOffset(len64(`{`)), ""}, - {RawValue(`]`), newInvalidCharacterError("]", "at start of value").withOffset(len64(`{`)), ""}, - {ObjectEnd, nil, ""}, - }, - wantOut: "{}\n", -}, { - name: jsontest.Name("ValidObject/UniqueNames"), - calls: []encoderMethodCall{ - {ObjectStart, nil, ""}, - {String("0"), nil, ""}, - {Uint(0), nil, ""}, - {String("1"), nil, ""}, - {Uint(1), nil, ""}, - {ObjectEnd, nil, ""}, - {RawValue(` { "0" : 0 , "1" : 1 } `), nil, ""}, - }, - wantOut: `{"0":0,"1":1}` + "\n" + `{"0":0,"1":1}` + "\n", -}, { - name: jsontest.Name("ValidObject/DuplicateNames"), - opts: []Options{AllowDuplicateNames(true)}, - calls: []encoderMethodCall{ - {ObjectStart, nil, ""}, - {String("0"), nil, ""}, - {Uint(0), nil, ""}, - {String("0"), nil, ""}, - {Uint(0), nil, ""}, - {ObjectEnd, nil, ""}, - {RawValue(` { "0" : 0 , "0" : 0 } `), nil, ""}, - }, - wantOut: `{"0":0,"0":0}` + "\n" + `{"0":0,"0":0}` + "\n", -}, { - name: jsontest.Name("InvalidObject/DuplicateNames"), - calls: []encoderMethodCall{ - {ObjectStart, nil, ""}, - {String("0"), nil, ""}, - {ObjectStart, nil, ""}, - {ObjectEnd, nil, ""}, - {String("0"), newDuplicateNameError(`"0"`).withOffset(len64(`{"0":{},`)), "/0"}, - {RawValue(`"0"`), newDuplicateNameError(`"0"`).withOffset(len64(`{"0":{},`)), "/0"}, - {String("1"), nil, ""}, - {ObjectStart, nil, ""}, - {ObjectEnd, nil, ""}, - {String("0"), newDuplicateNameError(`"0"`).withOffset(len64(`{"0":{},"1":{},`)), "/1"}, - {RawValue(`"0"`), newDuplicateNameError(`"0"`).withOffset(len64(`{"0":{},"1":{},`)), "/1"}, - {String("1"), newDuplicateNameError(`"1"`).withOffset(len64(`{"0":{},"1":{},`)), "/1"}, - {RawValue(`"1"`), newDuplicateNameError(`"1"`).withOffset(len64(`{"0":{},"1":{},`)), "/1"}, - {ObjectEnd, nil, ""}, - {RawValue(` { "0" : 0 , "1" : 1 , "0" : 0 } `), newDuplicateNameError(`"0"`).withOffset(len64(`{"0":{},"1":{}}` + "\n" + ` { "0" : 0 , "1" : 1 , `)), ""}, - }, - wantOut: `{"0":{},"1":{}}` + "\n", -}, { - name: jsontest.Name("TruncatedArray/AfterStart"), - calls: []encoderMethodCall{ - {RawValue(`[`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("TruncatedArray/AfterValue"), - calls: []encoderMethodCall{ - {RawValue(`[0`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("TruncatedArray/AfterComma"), - calls: []encoderMethodCall{ - {RawValue(`[0,`), io.ErrUnexpectedEOF, ""}, - }, -}, { - name: jsontest.Name("TruncatedArray/MissingComma"), - calls: []encoderMethodCall{ - {RawValue(` [ "fizz" "buzz" ] `), newInvalidCharacterError("\"", "after array value (expecting ',' or ']')").withOffset(len64(` [ "fizz" `)), ""}, - }, -}, { - name: jsontest.Name("InvalidArray/MismatchingDelim"), - calls: []encoderMethodCall{ - {RawValue(` [ } `), newInvalidCharacterError("}", `at start of value`).withOffset(len64(` [ `)), ""}, - {ArrayStart, nil, ""}, - {ObjectEnd, errMismatchDelim.withOffset(len64(`[`)), ""}, - {RawValue(`}`), newInvalidCharacterError("}", "at start of value").withOffset(len64(`[`)), ""}, - {ArrayEnd, nil, ""}, - }, - wantOut: "[]\n", -}} - -// TestEncoderErrors test that Encoder errors occur when we expect and -// leaves the Encoder in a consistent state. -func TestEncoderErrors(t *testing.T) { - for _, td := range encoderErrorTestdata { - t.Run(path.Join(td.name.Name), func(t *testing.T) { - testEncoderErrors(t, td.name.Where, td.opts, td.calls, td.wantOut) - }) - } -} -func testEncoderErrors(t *testing.T, where jsontest.CasePos, opts []Options, calls []encoderMethodCall, wantOut string) { - dst := new(bytes.Buffer) - enc := NewEncoder(dst, opts...) - for i, call := range calls { - var gotErr error - switch tokVal := call.in.(type) { - case Token: - gotErr = enc.WriteToken(tokVal) - case RawValue: - gotErr = enc.WriteValue(tokVal) - } - if !reflect.DeepEqual(gotErr, call.wantErr) { - t.Fatalf("%s: %d: error mismatch:\ngot %v\nwant %v", where, i, gotErr, call.wantErr) - } - if call.wantPointer != "" { - gotPointer := enc.StackPointer() - if gotPointer != call.wantPointer { - t.Fatalf("%s: %d: Encoder.StackPointer = %s, want %s", where, i, gotPointer, call.wantPointer) - } - } - } - gotOut := dst.String() + string(enc.unflushedBuffer()) - if gotOut != wantOut { - t.Fatalf("%s: output mismatch:\ngot %q\nwant %q", where, gotOut, wantOut) - } - gotOffset := int(enc.OutputOffset()) - wantOffset := len(wantOut) - if gotOffset != wantOffset { - t.Fatalf("%s: Encoder.OutputOffset = %v, want %v", where, gotOffset, wantOffset) - } -} - -func TestAppendString(t *testing.T) { - var ( - escapeNothing = makeEscapeRunes(false, false, nil) - escapeHTML = makeEscapeRunes(true, true, nil) - escapeNonASCII = makeEscapeRunes(false, false, func(r rune) bool { return r > unicode.MaxASCII }) - escapeEverything = makeEscapeRunes(false, false, func(r rune) bool { return true }) - ) - - tests := []struct { - in string - escapeRune *escapeRunes - want string - wantErr error - wantErrUTF8 error - }{ - {"", nil, `""`, nil, nil}, - {"hello", nil, `"hello"`, nil, nil}, - {"\x00", nil, `"\u0000"`, nil, nil}, - {"\x1f", nil, `"\u001f"`, nil, nil}, - {"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", nil, `"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`, nil, nil}, - {" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", nil, "\" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f\"", nil, nil}, - {"x\x80\ufffd", nil, "\"x\ufffd\ufffd\"", nil, errInvalidUTF8}, - {"x\xff\ufffd", nil, "\"x\ufffd\ufffd\"", nil, errInvalidUTF8}, - {"x\x80\ufffd", escapeNonASCII, "\"x\\ufffd\\ufffd\"", nil, errInvalidUTF8}, - {"x\xff\ufffd", escapeNonASCII, "\"x\\ufffd\\ufffd\"", nil, errInvalidUTF8}, - {"x\xc0", nil, "\"x\ufffd\"", nil, errInvalidUTF8}, - {"x\xc0\x80", nil, "\"x\ufffd\ufffd\"", nil, errInvalidUTF8}, - {"x\xe0", nil, "\"x\ufffd\"", nil, errInvalidUTF8}, - {"x\xe0\x80", nil, "\"x\ufffd\ufffd\"", nil, errInvalidUTF8}, - {"x\xe0\x80\x80", nil, "\"x\ufffd\ufffd\ufffd\"", nil, errInvalidUTF8}, - {"x\xf0", nil, "\"x\ufffd\"", nil, errInvalidUTF8}, - {"x\xf0\x80", nil, "\"x\ufffd\ufffd\"", nil, errInvalidUTF8}, - {"x\xf0\x80\x80", nil, "\"x\ufffd\ufffd\ufffd\"", nil, errInvalidUTF8}, - {"x\xf0\x80\x80\x80", nil, "\"x\ufffd\ufffd\ufffd\ufffd\"", nil, errInvalidUTF8}, - {"x\xed\xba\xad", nil, "\"x\ufffd\ufffd\ufffd\"", nil, errInvalidUTF8}, - {"\"\\/\b\f\n\r\t", nil, `"\"\\/\b\f\n\r\t"`, nil, nil}, - {"\"\\/\b\f\n\r\t", escapeEverything, `"\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009"`, nil, nil}, - {"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", nil, `"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃)."`, nil, nil}, - {"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", escapeNonASCII, `"\u0669(-\u032e\u032e\u0303-\u0303)\u06f6 \u0669(\u25cf\u032e\u032e\u0303\u2022\u0303)\u06f6 \u0669(\u0361\u0e4f\u032f\u0361\u0e4f)\u06f6 \u0669(-\u032e\u032e\u0303\u2022\u0303)."`, nil, nil}, - {"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", escapeEverything, `"\u0669\u0028\u002d\u032e\u032e\u0303\u002d\u0303\u0029\u06f6\u0020\u0669\u0028\u25cf\u032e\u032e\u0303\u2022\u0303\u0029\u06f6\u0020\u0669\u0028\u0361\u0e4f\u032f\u0361\u0e4f\u0029\u06f6\u0020\u0669\u0028\u002d\u032e\u032e\u0303\u2022\u0303\u0029\u002e"`, nil, nil}, - {"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, "\"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602\"", nil, nil}, - {"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", escapeEverything, `"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\ud83d\ude02"`, nil, nil}, - {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", nil, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\u0080\u2028\u2029\ufffd\U0001f602\"", nil, nil}, - {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeNothing, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\u0080\u2028\u2029\ufffd\U0001f602\"", nil, nil}, - {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeHTML, "\"\\u0000\\u001f\u0020\\\"\\u0026\\u003c\\u003e\\\\\u007f\u0080\\u2028\\u2029\ufffd\U0001f602\"", nil, nil}, - {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeNonASCII, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\\u0080\\u2028\\u2029\\ufffd\\ud83d\\ude02\"", nil, nil}, - {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeEverything, "\"\\u0000\\u001f\\u0020\\u0022\\u0026\\u003c\\u003e\\u005c\\u007f\\u0080\\u2028\\u2029\\ufffd\\ud83d\\ude02\"", nil, nil}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - got, gotErr := appendString(nil, tt.in, false, tt.escapeRune) - if string(got) != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) { - t.Errorf("appendString(nil, %q, false, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) - } - switch got, gotErr := appendString(nil, tt.in, true, tt.escapeRune); { - case tt.wantErrUTF8 == nil && (string(got) != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr)): - t.Errorf("appendString(nil, %q, true, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) - case tt.wantErrUTF8 != nil && (!strings.HasPrefix(tt.want, string(got)) || !reflect.DeepEqual(gotErr, tt.wantErrUTF8)): - t.Errorf("appendString(nil, %q, true, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErrUTF8) - } - }) - } -} - -func TestAppendNumber(t *testing.T) { - tests := []struct { - in float64 - want32 string - want64 string - }{ - {math.E, "2.7182817", "2.718281828459045"}, - {math.Pi, "3.1415927", "3.141592653589793"}, - {math.SmallestNonzeroFloat32, "1e-45", "1.401298464324817e-45"}, - {math.SmallestNonzeroFloat64, "0", "5e-324"}, - {math.MaxFloat32, "3.4028235e+38", "3.4028234663852886e+38"}, - {math.MaxFloat64, "", "1.7976931348623157e+308"}, - {0.1111111111111111, "0.11111111", "0.1111111111111111"}, - {0.2222222222222222, "0.22222222", "0.2222222222222222"}, - {0.3333333333333333, "0.33333334", "0.3333333333333333"}, - {0.4444444444444444, "0.44444445", "0.4444444444444444"}, - {0.5555555555555555, "0.5555556", "0.5555555555555555"}, - {0.6666666666666666, "0.6666667", "0.6666666666666666"}, - {0.7777777777777777, "0.7777778", "0.7777777777777777"}, - {0.8888888888888888, "0.8888889", "0.8888888888888888"}, - {0.9999999999999999, "1", "0.9999999999999999"}, - - // The following entries are from RFC 8785, appendix B - // which are designed to ensure repeatable formatting of 64-bit floats. - {math.Float64frombits(0x0000000000000000), "0", "0"}, - {math.Float64frombits(0x8000000000000000), "-0", "-0"}, // differs from RFC 8785 - {math.Float64frombits(0x0000000000000001), "0", "5e-324"}, - {math.Float64frombits(0x8000000000000001), "-0", "-5e-324"}, - {math.Float64frombits(0x7fefffffffffffff), "", "1.7976931348623157e+308"}, - {math.Float64frombits(0xffefffffffffffff), "", "-1.7976931348623157e+308"}, - {math.Float64frombits(0x4340000000000000), "9007199000000000", "9007199254740992"}, - {math.Float64frombits(0xc340000000000000), "-9007199000000000", "-9007199254740992"}, - {math.Float64frombits(0x4430000000000000), "295147900000000000000", "295147905179352830000"}, - {math.Float64frombits(0x44b52d02c7e14af5), "1e+23", "9.999999999999997e+22"}, - {math.Float64frombits(0x44b52d02c7e14af6), "1e+23", "1e+23"}, - {math.Float64frombits(0x44b52d02c7e14af7), "1e+23", "1.0000000000000001e+23"}, - {math.Float64frombits(0x444b1ae4d6e2ef4e), "1e+21", "999999999999999700000"}, - {math.Float64frombits(0x444b1ae4d6e2ef4f), "1e+21", "999999999999999900000"}, - {math.Float64frombits(0x444b1ae4d6e2ef50), "1e+21", "1e+21"}, - {math.Float64frombits(0x3eb0c6f7a0b5ed8c), "0.000001", "9.999999999999997e-7"}, - {math.Float64frombits(0x3eb0c6f7a0b5ed8d), "0.000001", "0.000001"}, - {math.Float64frombits(0x41b3de4355555553), "333333340", "333333333.3333332"}, - {math.Float64frombits(0x41b3de4355555554), "333333340", "333333333.33333325"}, - {math.Float64frombits(0x41b3de4355555555), "333333340", "333333333.3333333"}, - {math.Float64frombits(0x41b3de4355555556), "333333340", "333333333.3333334"}, - {math.Float64frombits(0x41b3de4355555557), "333333340", "333333333.33333343"}, - {math.Float64frombits(0xbecbf647612f3696), "-0.0000033333333", "-0.0000033333333333333333"}, - {math.Float64frombits(0x43143ff3c1cb0959), "1424953900000000", "1424953923781206.2"}, - - // The following are select entries from RFC 8785, appendix B, - // but modified for equivalent 32-bit behavior. - {float64(math.Float32frombits(0x65a96815)), "9.999999e+22", "9.999998877476383e+22"}, - {float64(math.Float32frombits(0x65a96816)), "1e+23", "9.999999778196308e+22"}, - {float64(math.Float32frombits(0x65a96817)), "1.0000001e+23", "1.0000000678916234e+23"}, - {float64(math.Float32frombits(0x6258d725)), "999999900000000000000", "999999879303389000000"}, - {float64(math.Float32frombits(0x6258d726)), "999999950000000000000", "999999949672133200000"}, - {float64(math.Float32frombits(0x6258d727)), "1e+21", "1.0000000200408773e+21"}, - {float64(math.Float32frombits(0x6258d728)), "1.0000001e+21", "1.0000000904096215e+21"}, - {float64(math.Float32frombits(0x358637bc)), "9.999999e-7", "9.99999883788405e-7"}, - {float64(math.Float32frombits(0x358637bd)), "0.000001", "9.999999974752427e-7"}, - {float64(math.Float32frombits(0x358637be)), "0.0000010000001", "0.0000010000001111620804"}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - if got32 := string(appendNumber(nil, tt.in, 32)); got32 != tt.want32 && tt.want32 != "" { - t.Errorf("appendNumber(nil, %v, 32) = %v, want %v", tt.in, got32, tt.want32) - } - if got64 := string(appendNumber(nil, tt.in, 64)); got64 != tt.want64 && tt.want64 != "" { - t.Errorf("appendNumber(nil, %v, 64) = %v, want %v", tt.in, got64, tt.want64) - } - }) - } -} - -// The default of 1e4 lines was chosen since it is sufficiently large to include -// test numbers from all three categories (i.e., static, series, and random). -// Yet, it is sufficiently low to execute quickly relative to other tests. -// -// Processing 1e8 lines takes a minute and processes about 4GiB worth of text. -var testCanonicalNumberLines = flag.Float64("canonical-number-lines", 1e4, "specify the number of lines to check from the canonical numbers testdata") - -// TestCanonicalNumber verifies that appendNumber complies with RFC 8785 -// according to the testdata provided by the reference implementation. -// See https://github.com/cyberphone/json-canonicalization/tree/master/testdata#es6-numbers. -func TestCanonicalNumber(t *testing.T) { - const testfileURL = "https://github.com/cyberphone/json-canonicalization/releases/download/es6testfile/es6testfile100m.txt.gz" - hashes := map[float64]string{ - 1e3: "be18b62b6f69cdab33a7e0dae0d9cfa869fda80ddc712221570f9f40a5878687", - 1e4: "b9f7a8e75ef22a835685a52ccba7f7d6bdc99e34b010992cbc5864cd12be6892", - 1e5: "22776e6d4b49fa294a0d0f349268e5c28808fe7e0cb2bcbe28f63894e494d4c7", - 1e6: "49415fee2c56c77864931bd3624faad425c3c577d6d74e89a83bc725506dad16", - 1e7: "b9f8a44a91d46813b21b9602e72f112613c91408db0b8341fb94603d9db135e0", - 1e8: "0f7dda6b0837dde083c5d6b896f7d62340c8a2415b0c7121d83145e08a755272", - } - wantHash := hashes[*testCanonicalNumberLines] - if wantHash == "" { - t.Fatalf("canonical-number-lines must be one of the following values: 1e3, 1e4, 1e5, 1e6, 1e7, 1e8") - } - numLines := int(*testCanonicalNumberLines) - - // generator returns a function that generates the next float64 to format. - // This implements the algorithm specified in the reference implementation. - generator := func() func() float64 { - static := [...]uint64{ - 0x0000000000000000, 0x8000000000000000, 0x0000000000000001, 0x8000000000000001, - 0xc46696695dbd1cc3, 0xc43211ede4974a35, 0xc3fce97ca0f21056, 0xc3c7213080c1a6ac, - 0xc39280f39a348556, 0xc35d9b1f5d20d557, 0xc327af4c4a80aaac, 0xc2f2f2a36ecd5556, - 0xc2be51057e155558, 0xc28840d131aaaaac, 0xc253670dc1555557, 0xc21f0b4935555557, - 0xc1e8d5d42aaaaaac, 0xc1b3de4355555556, 0xc17fca0555555556, 0xc1496e6aaaaaaaab, - 0xc114585555555555, 0xc0e046aaaaaaaaab, 0xc0aa0aaaaaaaaaaa, 0xc074d55555555555, - 0xc040aaaaaaaaaaab, 0xc00aaaaaaaaaaaab, 0xbfd5555555555555, 0xbfa1111111111111, - 0xbf6b4e81b4e81b4f, 0xbf35d867c3ece2a5, 0xbf0179ec9cbd821e, 0xbecbf647612f3696, - 0xbe965e9f80f29212, 0xbe61e54c672874db, 0xbe2ca213d840baf8, 0xbdf6e80fe033c8c6, - 0xbdc2533fe68fd3d2, 0xbd8d51ffd74c861c, 0xbd5774ccac3d3817, 0xbd22c3d6f030f9ac, - 0xbcee0624b3818f79, 0xbcb804ea293472c7, 0xbc833721ba905bd3, 0xbc4ebe9c5db3c61e, - 0xbc18987d17c304e5, 0xbbe3ad30dfcf371d, 0xbbaf7b816618582f, 0xbb792f9ab81379bf, - 0xbb442615600f9499, 0xbb101e77800c76e1, 0xbad9ca58cce0be35, 0xbaa4a1e0a3e6fe90, - 0xba708180831f320d, 0xba3a68cd9e985016, 0x446696695dbd1cc3, 0x443211ede4974a35, - 0x43fce97ca0f21056, 0x43c7213080c1a6ac, 0x439280f39a348556, 0x435d9b1f5d20d557, - 0x4327af4c4a80aaac, 0x42f2f2a36ecd5556, 0x42be51057e155558, 0x428840d131aaaaac, - 0x4253670dc1555557, 0x421f0b4935555557, 0x41e8d5d42aaaaaac, 0x41b3de4355555556, - 0x417fca0555555556, 0x41496e6aaaaaaaab, 0x4114585555555555, 0x40e046aaaaaaaaab, - 0x40aa0aaaaaaaaaaa, 0x4074d55555555555, 0x4040aaaaaaaaaaab, 0x400aaaaaaaaaaaab, - 0x3fd5555555555555, 0x3fa1111111111111, 0x3f6b4e81b4e81b4f, 0x3f35d867c3ece2a5, - 0x3f0179ec9cbd821e, 0x3ecbf647612f3696, 0x3e965e9f80f29212, 0x3e61e54c672874db, - 0x3e2ca213d840baf8, 0x3df6e80fe033c8c6, 0x3dc2533fe68fd3d2, 0x3d8d51ffd74c861c, - 0x3d5774ccac3d3817, 0x3d22c3d6f030f9ac, 0x3cee0624b3818f79, 0x3cb804ea293472c7, - 0x3c833721ba905bd3, 0x3c4ebe9c5db3c61e, 0x3c18987d17c304e5, 0x3be3ad30dfcf371d, - 0x3baf7b816618582f, 0x3b792f9ab81379bf, 0x3b442615600f9499, 0x3b101e77800c76e1, - 0x3ad9ca58cce0be35, 0x3aa4a1e0a3e6fe90, 0x3a708180831f320d, 0x3a3a68cd9e985016, - 0x4024000000000000, 0x4014000000000000, 0x3fe0000000000000, 0x3fa999999999999a, - 0x3f747ae147ae147b, 0x3f40624dd2f1a9fc, 0x3f0a36e2eb1c432d, 0x3ed4f8b588e368f1, - 0x3ea0c6f7a0b5ed8d, 0x3e6ad7f29abcaf48, 0x3e35798ee2308c3a, 0x3ed539223589fa95, - 0x3ed4ff26cd5a7781, 0x3ed4f95a762283ff, 0x3ed4f8c60703520c, 0x3ed4f8b72f19cd0d, - 0x3ed4f8b5b31c0c8d, 0x3ed4f8b58d1c461a, 0x3ed4f8b5894f7f0e, 0x3ed4f8b588ee37f3, - 0x3ed4f8b588e47da4, 0x3ed4f8b588e3849c, 0x3ed4f8b588e36bb5, 0x3ed4f8b588e36937, - 0x3ed4f8b588e368f8, 0x3ed4f8b588e368f1, 0x3ff0000000000000, 0xbff0000000000000, - 0xbfeffffffffffffa, 0xbfeffffffffffffb, 0x3feffffffffffffa, 0x3feffffffffffffb, - 0x3feffffffffffffc, 0x3feffffffffffffe, 0xbfefffffffffffff, 0xbfefffffffffffff, - 0x3fefffffffffffff, 0x3fefffffffffffff, 0x3fd3333333333332, 0x3fd3333333333333, - 0x3fd3333333333334, 0x0010000000000000, 0x000ffffffffffffd, 0x000fffffffffffff, - 0x7fefffffffffffff, 0xffefffffffffffff, 0x4340000000000000, 0xc340000000000000, - 0x4430000000000000, 0x44b52d02c7e14af5, 0x44b52d02c7e14af6, 0x44b52d02c7e14af7, - 0x444b1ae4d6e2ef4e, 0x444b1ae4d6e2ef4f, 0x444b1ae4d6e2ef50, 0x3eb0c6f7a0b5ed8c, - 0x3eb0c6f7a0b5ed8d, 0x41b3de4355555553, 0x41b3de4355555554, 0x41b3de4355555555, - 0x41b3de4355555556, 0x41b3de4355555557, 0xbecbf647612f3696, 0x43143ff3c1cb0959, - } - var state struct { - idx int - data []byte - block [sha256.Size]byte - } - return func() float64 { - const numSerial = 2000 - var f float64 - switch { - case state.idx < len(static): - f = math.Float64frombits(static[state.idx]) - case state.idx < len(static)+numSerial: - f = math.Float64frombits(0x0010000000000000 + uint64(state.idx-len(static))) - default: - for f == 0 || math.IsNaN(f) || math.IsInf(f, 0) { - if len(state.data) == 0 { - state.block = sha256.Sum256(state.block[:]) - state.data = state.block[:] - } - f = math.Float64frombits(binary.LittleEndian.Uint64(state.data)) - state.data = state.data[8:] - } - } - state.idx++ - return f - } - } - - // Pass through the test twice. In the first pass we only hash the output, - // while in the second pass we check every line against the golden testdata. - // If the hashes match in the first pass, then we skip the second pass. - for _, checkGolden := range []bool{false, true} { - var br *bufio.Reader // for line-by-line reading of es6testfile100m.txt - if checkGolden { - resp, err := http.Get(testfileURL) - if err != nil { - t.Fatalf("http.Get error: %v", err) - } - defer resp.Body.Close() - - zr, err := gzip.NewReader(resp.Body) - if err != nil { - t.Fatalf("gzip.NewReader error: %v", err) - } - - br = bufio.NewReader(zr) - } - - // appendNumberJCS differs from appendNumber only for -0. - appendNumberJCS := func(b []byte, f float64) []byte { - if math.Signbit(f) && f == 0 { - return append(b, '0') - } - return appendNumber(b, f, 64) - } - - var gotLine []byte - next := generator() - hash := sha256.New() - start := time.Now() - lastPrint := start - for n := 1; n <= numLines; n++ { - // Generate the formatted line for this number. - f := next() - gotLine = gotLine[:0] // reset from previous usage - gotLine = strconv.AppendUint(gotLine, math.Float64bits(f), 16) - gotLine = append(gotLine, ',') - gotLine = appendNumberJCS(gotLine, f) - gotLine = append(gotLine, '\n') - hash.Write(gotLine) - - // Check that the formatted line matches. - if checkGolden { - wantLine, err := br.ReadBytes('\n') - if err != nil { - t.Fatalf("bufio.Reader.ReadBytes error: %v", err) - } - if !bytes.Equal(gotLine, wantLine) { - t.Errorf("mismatch on line %d:\n\tgot %v\n\twant %v", - n, strings.TrimSpace(string(gotLine)), strings.TrimSpace(string(wantLine))) - } - } - - // Print progress. - if now := time.Now(); now.Sub(lastPrint) > time.Second || n == numLines { - remaining := float64(now.Sub(start)) * float64(numLines-n) / float64(n) - t.Logf("%0.3f%% (%v remaining)", - 100.0*float64(n)/float64(numLines), - time.Duration(remaining).Round(time.Second)) - lastPrint = now - } - } - - gotHash := hex.EncodeToString(hash.Sum(nil)) - if gotHash == wantHash { - return // hashes match, no need to check golden testdata - } - } -} diff --git a/errors.go b/errors.go index dfe589e..d78be84 100644 --- a/errors.go +++ b/errors.go @@ -5,43 +5,13 @@ package json import ( - "errors" "reflect" "strconv" "strings" - "unicode" - "unicode/utf8" ) const errorPrefix = "json: " -// Error matches errors returned by this package according to errors.Is. -const Error = jsonError("json error") - -type jsonError string - -func (e jsonError) Error() string { - return string(e) -} -func (e jsonError) Is(target error) bool { - return e == target || target == Error -} - -type ioError struct { - action string // either "read" or "write" - err error -} - -func (e *ioError) Error() string { - return errorPrefix + e.action + " error: " + e.err.Error() -} -func (e *ioError) Unwrap() error { - return e.err -} -func (e *ioError) Is(target error) bool { - return e == target || target == Error || errors.Is(e.err, target) -} - // SemanticError describes an error determining the meaning // of JSON data as Go data or vice-versa. // @@ -140,70 +110,10 @@ func (e *SemanticError) Error() string { return sb.String() } -func (e *SemanticError) Is(target error) bool { - return e == target || target == Error || errors.Is(e.Err, target) -} func (e *SemanticError) Unwrap() error { return e.Err } -// SyntacticError is a description of a syntactic error that occurred when -// encoding or decoding JSON according to the grammar. -// -// The contents of this error as produced by this package may change over time. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.SyntacticError] instead. -type SyntacticError struct { - requireKeyedLiterals - nonComparable - - // ByteOffset indicates that an error occurred after this byte offset. - ByteOffset int64 - str string -} - -func (e *SyntacticError) Error() string { - return errorPrefix + e.str -} -func (e *SyntacticError) Is(target error) bool { - return e == target || target == Error -} -func (e *SyntacticError) withOffset(pos int64) error { - return &SyntacticError{ByteOffset: pos, str: e.str} -} - -func newDuplicateNameError[Bytes ~[]byte | ~string](quoted Bytes) *SyntacticError { - return &SyntacticError{str: "duplicate name " + string(quoted) + " in object"} -} - -func newInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) *SyntacticError { - what := quoteRune(prefix) - return &SyntacticError{str: "invalid character " + what + " " + where} -} - -func newInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) *SyntacticError { - label := "escape sequence" - if len(what) > 6 { - label = "surrogate pair" - } - needEscape := strings.IndexFunc(string(what), func(r rune) bool { - return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r) - }) >= 0 - if needEscape { - return &SyntacticError{str: "invalid " + label + " " + strconv.Quote(string(what)) + " within string"} - } else { - return &SyntacticError{str: "invalid " + label + " `" + string(what) + "` within string"} - } -} - -func quoteRune[Bytes ~[]byte | ~string](b Bytes) string { - r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b))) - if r == utf8.RuneError && n == 1 { - return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'` - } - return strconv.QuoteRune(r) -} - func firstError(errs ...error) error { for _, err := range errs { if err != nil { diff --git a/errors_test.go b/errors_test.go index a642c72..3e607c1 100644 --- a/errors_test.go +++ b/errors_test.go @@ -8,10 +8,8 @@ import ( "archive/tar" "bytes" "errors" - "fmt" "io" "reflect" - "strconv" "strings" "testing" ) @@ -84,106 +82,3 @@ func TestSemanticError(t *testing.T) { } } } - -func TestErrorsIs(t *testing.T) { - const ( - someGlobalError = jsonError("some global error") - otherGlobalError = jsonError("other global error") - ) - - var ( - someIOError = &ioError{action: "write", err: io.ErrShortWrite} - otherIOError = &ioError{action: "read", err: io.ErrUnexpectedEOF} - someSyntacticError = &SyntacticError{str: "some syntactic error"} - otherSyntacticError = &SyntacticError{str: "other syntactic error"} - someSemanticError = &SemanticError{action: "unmarshal", JSONKind: '0', GoType: reflect.TypeOf(int(0)), Err: strconv.ErrRange} - otherSemanticError = &SemanticError{action: "marshal", GoType: reflect.TypeOf(complex128(0))} - ) - - tests := []struct { - err error - target error - want bool - }{ - // Top-level Error should match itself (identity). - {Error, Error, true}, - - // All sub-error values should match the top-level Error value. - {someGlobalError, Error, true}, - {someIOError, Error, true}, - {someSyntacticError, Error, true}, - {someSemanticError, Error, true}, - - // Top-level Error should not match any other sub-error value. - {Error, someGlobalError, false}, - {Error, someIOError, false}, - {Error, someSyntacticError, false}, - {Error, someSemanticError, false}, - - // Sub-error values should match itself (identity). - {someGlobalError, someGlobalError, true}, - {someIOError, someIOError, true}, - {someSyntacticError, someSyntacticError, true}, - {someSemanticError, someSemanticError, true}, - - // Sub-error values should not match each other. - {someGlobalError, someIOError, false}, - {someIOError, someSyntacticError, false}, - {someSyntacticError, someSemanticError, false}, - {someSemanticError, someGlobalError, false}, - - // Sub-error values should not match other error values of same type. - {someGlobalError, otherGlobalError, false}, - {someIOError, otherIOError, false}, - {someSyntacticError, otherSyntacticError, false}, - {someSemanticError, otherSemanticError, false}, - - // Error should not match any other random error. - {Error, nil, false}, - {nil, Error, false}, - {io.ErrShortWrite, Error, false}, - {Error, io.ErrShortWrite, false}, - - // Wrapped errors should be matched. - {&ioError{err: fmt.Errorf("%w", io.ErrShortWrite)}, io.ErrShortWrite, true}, // doubly wrapped - {&ioError{err: io.ErrShortWrite}, io.ErrShortWrite, true}, // singly wrapped - {&ioError{err: io.ErrShortWrite}, io.EOF, false}, - {&SemanticError{Err: fmt.Errorf("%w", strconv.ErrRange)}, strconv.ErrRange, true}, // doubly wrapped - {&SemanticError{Err: strconv.ErrRange}, strconv.ErrRange, true}, // singly wrapped - {&SemanticError{Err: strconv.ErrRange}, io.EOF, false}, - } - - for _, tt := range tests { - got := errors.Is(tt.err, tt.target) - if got != tt.want { - t.Errorf("errors.Is(%#v, %#v) = %v, want %v", tt.err, tt.target, got, tt.want) - } - // If the type supports the Is method, - // it should behave the same way if called directly. - if iserr, ok := tt.err.(interface{ Is(error) bool }); ok { - got := iserr.Is(tt.target) - if got != tt.want { - t.Errorf("%#v.Is(%#v) = %v, want %v", tt.err, tt.target, got, tt.want) - } - } - } -} - -func TestQuoteRune(t *testing.T) { - tests := []struct{ in, want string }{ - {"x", `'x'`}, - {"\n", `'\n'`}, - {"'", `'\''`}, - {"\xff", `'\xff'`}, - {"💩", `'💩'`}, - {"💩"[:1], `'\xf0'`}, - {"\uffff", `'\uffff'`}, - {"\U00101234", `'\U00101234'`}, - } - for _, tt := range tests { - got := quoteRune([]byte(tt.in)) - if got != tt.want { - t.Errorf("quoteRune(%q) = %s, want %s", tt.in, got, tt.want) - } - } -} diff --git a/fields.go b/fields.go index b2e2233..dc9a6f1 100644 --- a/fields.go +++ b/fields.go @@ -15,6 +15,8 @@ import ( "strings" "unicode" "unicode/utf8" + + "github.com/go-json-experiment/json/internal/jsonwire" ) var errIgnoredField = errors.New("ignored field") @@ -361,7 +363,7 @@ func parseFieldOptions(sf reflect.StructField) (out fieldOptions, ignored bool, out.name = opt tag = tag[n:] } - b, _ := appendString(nil, out.name, false, nil) + b, _ := jsonwire.AppendQuote(nil, out.name, false, nil) out.quotedName = string(b) // Handle any additional tag options (if any). diff --git a/fuzz_test.go b/fuzz_test.go index 5f342f0..2b583ed 100644 --- a/fuzz_test.go +++ b/fuzz_test.go @@ -6,223 +6,9 @@ package json import ( "bytes" - "errors" - "io" - "math/rand" - "reflect" "testing" - - "github.com/go-json-experiment/json/internal/jsontest" ) -func FuzzCoder(f *testing.F) { - // Add a number of inputs to the corpus including valid and invalid data. - for _, td := range coderTestdata { - f.Add(int64(0), []byte(td.in)) - } - for _, td := range decoderErrorTestdata { - f.Add(int64(0), []byte(td.in)) - } - for _, td := range encoderErrorTestdata { - f.Add(int64(0), []byte(td.wantOut)) - } - for _, td := range jsontest.Data { - f.Add(int64(0), td.Data()) - } - - f.Fuzz(func(t *testing.T, seed int64, b []byte) { - var tokVals []tokOrVal - rn := rand.NewSource(seed) - - // Read a sequence of tokens or values. Skip the test for any errors - // since we expect this with randomly generated fuzz inputs. - src := bytes.NewReader(b) - dec := NewDecoder(src) - for { - if rn.Int63()%8 > 0 { - tok, err := dec.ReadToken() - if err != nil { - if err == io.EOF { - break - } - t.Skipf("Decoder.ReadToken error: %v", err) - } - tokVals = append(tokVals, tok.Clone()) - } else { - val, err := dec.ReadValue() - if err != nil { - expectError := dec.PeekKind() == '}' || dec.PeekKind() == ']' - if expectError && errors.As(err, new(*SyntacticError)) { - continue - } - if err == io.EOF { - break - } - t.Skipf("Decoder.ReadValue error: %v", err) - } - tokVals = append(tokVals, append(zeroValue, val...)) - } - } - - // Write a sequence of tokens or values. Fail the test for any errors - // since the previous stage guarantees that the input is valid. - dst := new(bytes.Buffer) - enc := NewEncoder(dst) - for _, tokVal := range tokVals { - switch tokVal := tokVal.(type) { - case Token: - if err := enc.WriteToken(tokVal); err != nil { - t.Fatalf("Encoder.WriteToken error: %v", err) - } - case RawValue: - if err := enc.WriteValue(tokVal); err != nil { - t.Fatalf("Encoder.WriteValue error: %v", err) - } - } - } - - // Encoded output and original input must decode to the same thing. - var got, want []Token - for dec := NewDecoder(bytes.NewReader(b)); dec.PeekKind() > 0; { - tok, err := dec.ReadToken() - if err != nil { - t.Fatalf("Decoder.ReadToken error: %v", err) - } - got = append(got, tok.Clone()) - } - for dec := NewDecoder(dst); dec.PeekKind() > 0; { - tok, err := dec.ReadToken() - if err != nil { - t.Fatalf("Decoder.ReadToken error: %v", err) - } - want = append(want, tok.Clone()) - } - if !equalTokens(got, want) { - t.Fatalf("mismatching output:\ngot %v\nwant %v", got, want) - } - }) -} - -func FuzzResumableDecoder(f *testing.F) { - for _, td := range resumableDecoderTestdata { - f.Add(int64(0), []byte(td)) - } - - f.Fuzz(func(t *testing.T, seed int64, b []byte) { - rn := rand.NewSource(seed) - - // Regardless of how many bytes the underlying io.Reader produces, - // the provided tokens, values, and errors should always be identical. - t.Run("ReadToken", func(t *testing.T) { - decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn}) - decWant := NewDecoder(bytes.NewReader(b)) - gotTok, gotErr := decGot.ReadToken() - wantTok, wantErr := decWant.ReadToken() - if gotTok.String() != wantTok.String() || !reflect.DeepEqual(gotErr, wantErr) { - t.Errorf("Decoder.ReadToken = (%v, %v), want (%v, %v)", gotTok, gotErr, wantTok, wantErr) - } - }) - t.Run("ReadValue", func(t *testing.T) { - decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn}) - decWant := NewDecoder(bytes.NewReader(b)) - gotVal, gotErr := decGot.ReadValue() - wantVal, wantErr := decWant.ReadValue() - if !reflect.DeepEqual(gotVal, wantVal) || !reflect.DeepEqual(gotErr, wantErr) { - t.Errorf("Decoder.ReadValue = (%s, %v), want (%s, %v)", gotVal, gotErr, wantVal, wantErr) - } - }) - }) -} - -func FuzzRawValueReformat(f *testing.F) { - for _, td := range rawValueTestdata { - f.Add([]byte(td.in)) - } - - // isValid reports whether b is valid according to the specified options. - isValid := func(b []byte, opts ...Options) bool { - d := NewDecoder(bytes.NewReader(b), opts...) - _, errVal := d.ReadValue() - _, errEOF := d.ReadToken() - return errVal == nil && errEOF == io.EOF - } - - // stripWhitespace removes all JSON whitespace characters from the input. - stripWhitespace := func(in []byte) (out []byte) { - out = make([]byte, 0, len(in)) - for _, c := range in { - switch c { - case ' ', '\n', '\r', '\t': - default: - out = append(out, c) - } - } - return out - } - - // unmarshal unmarshals the input into an any. - unmarshal := func(in []byte) (out any) { - if err := Unmarshal(in, &out); err != nil { - return nil // ignore invalid input - } - return out - } - - f.Fuzz(func(t *testing.T, b []byte) { - validRFC7159 := isValid(b, AllowInvalidUTF8(true), AllowDuplicateNames(true)) - validRFC8259 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(true)) - validRFC7493 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(false)) - switch { - case !validRFC7159 && validRFC8259: - t.Errorf("invalid input per RFC 7159 implies invalid per RFC 8259") - case !validRFC8259 && validRFC7493: - t.Errorf("invalid input per RFC 8259 implies invalid per RFC 7493") - } - - gotValid := RawValue(b).IsValid() - wantValid := validRFC7493 - if gotValid != wantValid { - t.Errorf("RawValue.IsValid = %v, want %v", gotValid, wantValid) - } - - gotCompacted := RawValue(string(b)) - gotCompactOk := gotCompacted.Compact() == nil - wantCompactOk := validRFC7159 - if !bytes.Equal(stripWhitespace(gotCompacted), stripWhitespace(b)) { - t.Errorf("stripWhitespace(RawValue.Compact) = %s, want %s", stripWhitespace(gotCompacted), stripWhitespace(b)) - } - if !reflect.DeepEqual(unmarshal(gotCompacted), unmarshal(b)) { - t.Errorf("unmarshal(RawValue.Compact) = %s, want %s", unmarshal(gotCompacted), unmarshal(b)) - } - if gotCompactOk != wantCompactOk { - t.Errorf("RawValue.Compact success mismatch: got %v, want %v", gotCompactOk, wantCompactOk) - } - - gotIndented := RawValue(string(b)) - gotIndentOk := gotIndented.Indent("", " ") == nil - wantIndentOk := validRFC7159 - if !bytes.Equal(stripWhitespace(gotIndented), stripWhitespace(b)) { - t.Errorf("stripWhitespace(RawValue.Indent) = %s, want %s", stripWhitespace(gotIndented), stripWhitespace(b)) - } - if !reflect.DeepEqual(unmarshal(gotIndented), unmarshal(b)) { - t.Errorf("unmarshal(RawValue.Indent) = %s, want %s", unmarshal(gotIndented), unmarshal(b)) - } - if gotIndentOk != wantIndentOk { - t.Errorf("RawValue.Indent success mismatch: got %v, want %v", gotIndentOk, wantIndentOk) - } - - gotCanonicalized := RawValue(string(b)) - gotCanonicalizeOk := gotCanonicalized.Canonicalize() == nil - wantCanonicalizeOk := validRFC7493 - if !reflect.DeepEqual(unmarshal(gotCanonicalized), unmarshal(b)) { - t.Errorf("unmarshal(RawValue.Canonicalize) = %s, want %s", unmarshal(gotCanonicalized), unmarshal(b)) - } - if gotCanonicalizeOk != wantCanonicalizeOk { - t.Errorf("RawValue.Canonicalize success mismatch: got %v, want %v", gotCanonicalizeOk, wantCanonicalizeOk) - } - }) -} - func FuzzEqualFold(f *testing.F) { for _, tt := range equalFoldTestdata { f.Add([]byte(tt.in1), []byte(tt.in2)) diff --git a/inline_test.go b/inline_test.go index a5beea5..28c37df 100644 --- a/inline_test.go +++ b/inline_test.go @@ -23,80 +23,85 @@ func TestInline(t *testing.T) { t.SkipNow() } - fncs := func() map[string]bool { - m := make(map[string]bool) - for _, s := range []string{ - "Encoder.needFlush", - "Decoder.ReadValue", // thin wrapper over Decoder.readValue - "decodeBuffer.needMore", - "consumeWhitespace", - "consumeNull", - "consumeFalse", - "consumeTrue", - "consumeSimpleString", - "consumeString", // thin wrapper over consumeStringResumable - "consumeSimpleNumber", - "consumeNumber", // thin wrapper over consumeNumberResumable - "unescapeStringMayCopy", // thin wrapper over unescapeString - "hasSuffixByte", - "trimSuffixByte", - "trimSuffixString", - "trimSuffixWhitespace", - "stateMachine.appendLiteral", - "stateMachine.appendNumber", - "stateMachine.appendString", - "stateMachine.depth", - "stateMachine.reset", - "stateMachine.mayAppendDelim", - "stateMachine.needDelim", - "stateMachine.popArray", - "stateMachine.popObject", - "stateMachine.pushArray", - "stateMachine.pushObject", - "stateEntry.increment", - "stateEntry.decrement", - "stateEntry.isArray", - "stateEntry.isObject", - "stateEntry.length", - "stateEntry.needImplicitColon", - "stateEntry.needImplicitComma", - "stateEntry.needObjectName", - "stateEntry.needObjectValue", - "objectNameStack.reset", - "objectNameStack.length", - "objectNameStack.getUnquoted", - "objectNameStack.push", - "objectNameStack.replaceLastQuotedOffset", - "objectNameStack.replaceLastUnquotedName", - "objectNameStack.pop", - "objectNameStack.ensureCopiedBuffer", - "objectNamespace.insertQuoted", // thin wrapper over objectNamespace.insert - "objectNamespace.insertUnquoted", // thin wrapper over objectNamespace.insert - "Token.String", // thin wrapper over Token.string - "foldName", // thin wrapper over appendFoldedName - "hash64", - } { - m[s] = true - } - return m - }() - - cmd := exec.Command("go", "build", "-gcflags=-m") - b, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("exec.Command error: %v\n\n%s", err, b) + pkgs := map[string]map[string]bool{ + ".": { + "hash64": true, + "foldName": true, // thin wrapper over appendFoldedName + }, + "./internal/jsonwire": { + "ConsumeWhitespace": true, + "ConsumeNull": true, + "ConsumeFalse": true, + "ConsumeTrue": true, + "ConsumeSimpleString": true, + "ConsumeString": true, // thin wrapper over consumeStringResumable + "ConsumeSimpleNumber": true, + "ConsumeNumber": true, // thin wrapper over consumeNumberResumable + "UnquoteMayCopy": true, // thin wrapper over unescapeString + "HasSuffixByte": true, + "TrimSuffixByte": true, + "TrimSuffixString": true, + "TrimSuffixWhitespace": true, + }, + "./jsontext": { + "encoderState.NeedFlush": true, + "Decoder.ReadToken": true, // thin wrapper over decoderState.ReadToken + "Decoder.ReadValue": true, // thin wrapper over decoderState.ReadValue + "Encoder.WriteToken": true, // thin wrapper over encoderState.WriteToken + "Encoder.WriteValue": true, // thin wrapper over encoderState.WriteValue + "decodeBuffer.needMore": true, + "stateMachine.appendLiteral": true, + "stateMachine.appendNumber": true, + "stateMachine.appendString": true, + "stateMachine.Depth": true, + "stateMachine.reset": true, + "stateMachine.MayAppendDelim": true, + "stateMachine.needDelim": true, + "stateMachine.popArray": true, + "stateMachine.popObject": true, + "stateMachine.pushArray": true, + "stateMachine.pushObject": true, + "stateEntry.Increment": true, + "stateEntry.decrement": true, + "stateEntry.isArray": true, + "stateEntry.isObject": true, + "stateEntry.Length": true, + "stateEntry.needImplicitColon": true, + "stateEntry.needImplicitComma": true, + "stateEntry.NeedObjectName": true, + "stateEntry.needObjectValue": true, + "objectNameStack.reset": true, + "objectNameStack.length": true, + "objectNameStack.getUnquoted": true, + "objectNameStack.push": true, + "objectNameStack.ReplaceLastQuotedOffset": true, + "objectNameStack.replaceLastUnquotedName": true, + "objectNameStack.pop": true, + "objectNameStack.ensureCopiedBuffer": true, + "objectNamespace.insertQuoted": true, // thin wrapper over objectNamespace.insert + "objectNamespace.InsertUnquoted": true, // thin wrapper over objectNamespace.insert + "Token.String": true, // thin wrapper over Token.string + }, } - for _, line := range strings.Split(string(b), "\n") { - const phrase = ": can inline " - if i := strings.Index(line, phrase); i >= 0 { - fnc := line[i+len(phrase):] - fnc = strings.ReplaceAll(fnc, "(", "") - fnc = strings.ReplaceAll(fnc, "*", "") - fnc = strings.ReplaceAll(fnc, ")", "") - delete(fncs, fnc) + + for pkg, fncs := range pkgs { + cmd := exec.Command("go", "build", "-gcflags=-m", pkg) + b, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("exec.Command error: %v\n\n%s", err, b) + } + for _, line := range strings.Split(string(b), "\n") { + const phrase = ": can inline " + if i := strings.Index(line, phrase); i >= 0 { + fnc := line[i+len(phrase):] + fnc = strings.ReplaceAll(fnc, "(", "") + fnc = strings.ReplaceAll(fnc, "*", "") + fnc = strings.ReplaceAll(fnc, ")", "") + delete(fncs, fnc) + } + } + for fnc := range fncs { + t.Errorf("%v is not inlinable, expected it to be", fnc) } - } - for fnc := range fncs { - t.Errorf("%v is not inlinable, expected it to be", fnc) } } diff --git a/intern.go b/intern.go index 700a56d..42fc3d4 100644 --- a/intern.go +++ b/intern.go @@ -10,12 +10,12 @@ import ( ) // stringCache is a cache for strings converted from a []byte. -type stringCache [256]string // 256*unsafe.Sizeof(string("")) => 4KiB +type stringCache = [256]string // 256*unsafe.Sizeof(string("")) => 4KiB -// make returns the string form of b. +// makeString returns the string form of b. // It returns a pre-allocated string from c if present, otherwise // it allocates a new string, inserts it into the cache, and returns it. -func (c *stringCache) make(b []byte) string { +func makeString(c *stringCache, b []byte) string { const ( minCachedLen = 2 // single byte strings are already interned by the runtime maxCachedLen = 256 // large enough for UUIDs, IPv6 addresses, SHA-256 checksums, etc. diff --git a/intern_test.go b/intern_test.go index ab49ce9..35b0b52 100644 --- a/intern_test.go +++ b/intern_test.go @@ -18,13 +18,13 @@ func TestIntern(t *testing.T) { const alphabet = "abcdefghijklmnopqrstuvwxyz" for i := 0; i <= len(alphabet); i++ { want := alphabet[i:] - if got := sc.make([]byte(want)); got != want { + if got := makeString(&sc, []byte(want)); got != want { t.Fatalf("make = %v, want %v", got, want) } } for i := 0; i < 1000; i++ { want := fmt.Sprintf("test%b", i) - if got := sc.make([]byte(want)); got != want { + if got := makeString(&sc, []byte(want)); got != want { t.Fatalf("make = %v, want %v", got, want) } } @@ -118,7 +118,7 @@ func BenchmarkIntern(b *testing.B) { for i := 0; i < b.N; i++ { var sc stringCache for _, b := range tt.data { - sink = sc.make(b) + sink = makeString(&sc, b) } } }) diff --git a/internal/internal.go b/internal/internal.go index bddc4d4..cf020cd 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -8,3 +8,7 @@ package internal // It does not perfectly prevent usage of that API, but helps to restrict usage. // Anything with this marker is not covered by the Go compatibility agreement. type NotForPublicUse struct{} + +// AllowInternalUse is passed from "json" to "jsontext" to authenticate +// that the caller can have access to internal functionality. +var AllowInternalUse NotForPublicUse diff --git a/internal/jsonwire/decode.go b/internal/jsonwire/decode.go new file mode 100644 index 0000000..cb18a83 --- /dev/null +++ b/internal/jsonwire/decode.go @@ -0,0 +1,640 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonwire + +import ( + "io" + "math" + "slices" + "strconv" + "unicode/utf16" + "unicode/utf8" +) + +type ValueFlags uint + +const ( + _ ValueFlags = (1 << iota) / 2 // powers of two starting with zero + + stringNonVerbatim // string cannot be naively treated as valid UTF-8 + stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2. + // TODO: Track whether a number is a non-integer? +) + +func (f *ValueFlags) Join(f2 ValueFlags) { *f |= f2 } +func (f ValueFlags) IsVerbatim() bool { return f&stringNonVerbatim == 0 } +func (f ValueFlags) IsCanonical() bool { return f&stringNonCanonical == 0 } + +// ConsumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2. +func ConsumeWhitespace(b []byte) (n int) { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') { + n++ + } + return n +} + +// ConsumeNull consumes the next JSON null literal per RFC 7159, section 3. +// It returns 0 if it is invalid, in which case consumeLiteral should be used. +func ConsumeNull(b []byte) int { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + const literal = "null" + if len(b) >= len(literal) && string(b[:len(literal)]) == literal { + return len(literal) + } + return 0 +} + +// ConsumeFalse consumes the next JSON false literal per RFC 7159, section 3. +// It returns 0 if it is invalid, in which case consumeLiteral should be used. +func ConsumeFalse(b []byte) int { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + const literal = "false" + if len(b) >= len(literal) && string(b[:len(literal)]) == literal { + return len(literal) + } + return 0 +} + +// ConsumeTrue consumes the next JSON true literal per RFC 7159, section 3. +// It returns 0 if it is invalid, in which case consumeLiteral should be used. +func ConsumeTrue(b []byte) int { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + const literal = "true" + if len(b) >= len(literal) && string(b[:len(literal)]) == literal { + return len(literal) + } + return 0 +} + +// ConsumeLiteral consumes the next JSON literal per RFC 7159, section 3. +// If the input appears truncated, it returns io.ErrUnexpectedEOF. +func ConsumeLiteral(b []byte, lit string) (n int, err error) { + for i := 0; i < len(b) && i < len(lit); i++ { + if b[i] != lit[i] { + return i, NewInvalidCharacterError(b[i:], "within literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")") + } + } + if len(b) < len(lit) { + return len(b), io.ErrUnexpectedEOF + } + return len(lit), nil +} + +// ConsumeSimpleString consumes the next JSON string per RFC 7159, section 7 +// but is limited to the grammar for an ASCII string without escape sequences. +// It returns 0 if it is invalid or more complicated than a simple string, +// in which case consumeString should be called. +// +// It rejects '<', '>', and '&' for compatibility reasons since these were +// always escaped in the v1 implementation. Thus, if this function reports +// non-zero then we know that the string would be encoded the same way +// under both v1 or v2 escape semantics. +func ConsumeSimpleString(b []byte) (n int) { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + if len(b) > 0 && b[0] == '"' { + n++ + for len(b) > n && b[n] < utf8.RuneSelf && !escapeHTML.needEscapeASCII(b[n]) { + n++ + } + if uint(len(b)) > uint(n) && b[n] == '"' { + n++ + return n + } + } + return 0 +} + +// ConsumeString consumes the next JSON string per RFC 7159, section 7. +// If validateUTF8 is false, then this allows the presence of invalid UTF-8 +// characters within the string itself. +// It reports the number of bytes consumed and whether an error was encountered. +// If the input appears truncated, it returns io.ErrUnexpectedEOF. +func ConsumeString(flags *ValueFlags, b []byte, validateUTF8 bool) (n int, err error) { + return ConsumeStringResumable(flags, b, 0, validateUTF8) +} + +// ConsumeStringResumable is identical to consumeString but supports resuming +// from a previous call that returned io.ErrUnexpectedEOF. +func ConsumeStringResumable(flags *ValueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) { + // Consume the leading double quote. + switch { + case resumeOffset > 0: + n = resumeOffset // already handled the leading quote + case uint(len(b)) == 0: + return n, io.ErrUnexpectedEOF + case b[0] == '"': + n++ + default: + return n, NewInvalidCharacterError(b[n:], `at start of string (expecting '"')`) + } + + // Consume every character in the string. + for uint(len(b)) > uint(n) { + // Optimize for long sequences of unescaped characters. + noEscape := func(c byte) bool { + return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"' + } + for uint(len(b)) > uint(n) && noEscape(b[n]) { + n++ + } + if uint(len(b)) <= uint(n) { + return n, io.ErrUnexpectedEOF + } + + // Check for terminating double quote. + if b[n] == '"' { + n++ + return n, nil + } + + switch r, rn := utf8.DecodeRune(b[n:]); { + // Handle UTF-8 encoded byte sequence. + // Due to specialized handling of ASCII above, we know that + // all normal sequences at this point must be 2 bytes or larger. + case rn > 1: + n += rn + // Handle escape sequence. + case r == '\\': + flags.Join(stringNonVerbatim) + resumeOffset = n + if uint(len(b)) < uint(n+2) { + return resumeOffset, io.ErrUnexpectedEOF + } + switch r := b[n+1]; r { + case '/': + // Forward slash is the only character with 3 representations. + // Per RFC 8785, section 3.2.2.2., this must not be escaped. + flags.Join(stringNonCanonical) + n += 2 + case '"', '\\', 'b', 'f', 'n', 'r', 't': + n += 2 + case 'u': + if uint(len(b)) < uint(n+6) { + if hasEscapedUTF16Prefix(b[n:], false) { + return resumeOffset, io.ErrUnexpectedEOF + } + flags.Join(stringNonCanonical) + return n, NewInvalidEscapeSequenceError(b[n:]) + } + v1, ok := parseHexUint16(b[n+2 : n+6]) + if !ok { + flags.Join(stringNonCanonical) + return n, NewInvalidEscapeSequenceError(b[n : n+6]) + } + // Only certain control characters can use the \uFFFF notation + // for canonical formatting (per RFC 8785, section 3.2.2.2.). + switch v1 { + // \uFFFF notation not permitted for these characters. + case '\b', '\f', '\n', '\r', '\t': + flags.Join(stringNonCanonical) + default: + // \uFFFF notation only permitted for control characters. + if v1 >= ' ' { + flags.Join(stringNonCanonical) + } else { + // \uFFFF notation must be lower case. + for _, c := range b[n+2 : n+6] { + if 'A' <= c && c <= 'F' { + flags.Join(stringNonCanonical) + } + } + } + } + n += 6 + + r := rune(v1) + if validateUTF8 && utf16.IsSurrogate(r) { + if uint(len(b)) < uint(n+6) { + if hasEscapedUTF16Prefix(b[n:], true) { + return resumeOffset, io.ErrUnexpectedEOF + } + flags.Join(stringNonCanonical) + return n - 6, NewInvalidEscapeSequenceError(b[n-6:]) + } else if v2, ok := parseHexUint16(b[n+2 : n+6]); b[n] != '\\' || b[n+1] != 'u' || !ok { + flags.Join(stringNonCanonical) + return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6]) + } else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError { + flags.Join(stringNonCanonical) + return n - 6, NewInvalidEscapeSequenceError(b[n-6 : n+6]) + } else { + n += 6 + } + } + default: + flags.Join(stringNonCanonical) + return n, NewInvalidEscapeSequenceError(b[n : n+2]) + } + // Handle invalid UTF-8. + case r == utf8.RuneError: + if !utf8.FullRune(b[n:]) { + return n, io.ErrUnexpectedEOF + } + flags.Join(stringNonVerbatim | stringNonCanonical) + if validateUTF8 { + return n, ErrInvalidUTF8 + } + n++ + // Handle invalid control characters. + case r < ' ': + flags.Join(stringNonVerbatim | stringNonCanonical) + return n, NewInvalidCharacterError(b[n:], "within string (expecting non-control character)") + default: + panic("BUG: unhandled character " + QuoteRune(b[n:])) + } + } + return n, io.ErrUnexpectedEOF +} + +// AppendUnquote appends the unescaped form of a JSON string in src to dst. +// Any invalid UTF-8 within the string will be replaced with utf8.RuneError, +// but the error will be specified as having encountered such an error. +// The input must be an entire JSON string with no surrounding whitespace. +func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, err error) { + dst = slices.Grow(dst, len(src)) + + // Consume the leading double quote. + var i, n int + switch { + case uint(len(src)) == 0: + return dst, io.ErrUnexpectedEOF + case src[0] == '"': + i, n = 1, 1 + default: + return dst, NewInvalidCharacterError(src, `at start of string (expecting '"')`) + } + + // Consume every character in the string. + for uint(len(src)) > uint(n) { + // Optimize for long sequences of unescaped characters. + noEscape := func(c byte) bool { + return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"' + } + for uint(len(src)) > uint(n) && noEscape(src[n]) { + n++ + } + if uint(len(src)) <= uint(n) { + dst = append(dst, src[i:n]...) + return dst, io.ErrUnexpectedEOF + } + + // Check for terminating double quote. + if src[n] == '"' { + dst = append(dst, src[i:n]...) + n++ + if n < len(src) { + err = NewInvalidCharacterError(src[n:], "after string value") + } + return dst, err + } + + switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); { + // Handle UTF-8 encoded byte sequence. + // Due to specialized handling of ASCII above, we know that + // all normal sequences at this point must be 2 bytes or larger. + case rn > 1: + n += rn + // Handle escape sequence. + case r == '\\': + dst = append(dst, src[i:n]...) + + // Handle escape sequence. + if uint(len(src)) < uint(n+2) { + return dst, io.ErrUnexpectedEOF + } + switch r := src[n+1]; r { + case '"', '\\', '/': + dst = append(dst, r) + n += 2 + case 'b': + dst = append(dst, '\b') + n += 2 + case 'f': + dst = append(dst, '\f') + n += 2 + case 'n': + dst = append(dst, '\n') + n += 2 + case 'r': + dst = append(dst, '\r') + n += 2 + case 't': + dst = append(dst, '\t') + n += 2 + case 'u': + if uint(len(src)) < uint(n+6) { + if hasEscapedUTF16Prefix(src[n:], false) { + return dst, io.ErrUnexpectedEOF + } + return dst, NewInvalidEscapeSequenceError(src[n:]) + } + v1, ok := parseHexUint16(src[n+2 : n+6]) + if !ok { + return dst, NewInvalidEscapeSequenceError(src[n : n+6]) + } + n += 6 + + // Check whether this is a surrogate half. + r := rune(v1) + if utf16.IsSurrogate(r) { + r = utf8.RuneError // assume failure unless the following succeeds + if uint(len(src)) < uint(n+6) { + if hasEscapedUTF16Prefix(src[n:], true) { + return utf8.AppendRune(dst, r), io.ErrUnexpectedEOF + } + err = NewInvalidEscapeSequenceError(src[n-6:]) + } else if v2, ok := parseHexUint16(src[n+2 : n+6]); src[n] != '\\' || src[n+1] != 'u' || !ok { + err = NewInvalidEscapeSequenceError(src[n-6 : n+6]) + } else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError { + err = NewInvalidEscapeSequenceError(src[n-6 : n+6]) + } else { + n += 6 + } + } + + dst = utf8.AppendRune(dst, r) + default: + return dst, NewInvalidEscapeSequenceError(src[n : n+2]) + } + i = n + // Handle invalid UTF-8. + case r == utf8.RuneError: + dst = append(dst, src[i:n]...) + if !utf8.FullRuneInString(string(truncateMaxUTF8(src[n:]))) { + return dst, io.ErrUnexpectedEOF + } + // NOTE: An unescaped string may be longer than the escaped string + // because invalid UTF-8 bytes are being replaced. + dst = append(dst, "\uFFFD"...) + n += rn + i = n + err = ErrInvalidUTF8 + // Handle invalid control characters. + case r < ' ': + dst = append(dst, src[i:n]...) + return dst, NewInvalidCharacterError(src[n:], "within string (expecting non-control character)") + default: + panic("BUG: unhandled character " + QuoteRune(src[n:])) + } + } + dst = append(dst, src[i:n]...) + return dst, io.ErrUnexpectedEOF +} + +// hasEscapedUTF16Prefix reports whether b is possibly +// the truncated prefix of a \uFFFF escape sequence. +func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool { + for i := 0; i < len(b); i++ { + switch c := b[i]; { + case i == 0 && c != '\\': + return false + case i == 1 && c != 'u': + return false + case i == 2 && lowerSurrogateHalf && c != 'd' && c != 'D': + return false // not within ['\uDC00':'\uDFFF'] + case i == 3 && lowerSurrogateHalf && !('c' <= c && c <= 'f') && !('C' <= c && c <= 'F'): + return false // not within ['\uDC00':'\uDFFF'] + case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'): + return false + } + } + return true +} + +// UnquoteMayCopy returns the unescaped form of b. +// If there are no escaped characters, the output is simply a subslice of +// the input with the surrounding quotes removed. +// Otherwise, a new buffer is allocated for the output. +// It assumes the input is valid. +func UnquoteMayCopy(b []byte, isVerbatim bool) []byte { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + if isVerbatim { + return b[len(`"`) : len(b)-len(`"`)] + } + b, _ = AppendUnquote(nil, b) + return b +} + +// ConsumeSimpleNumber consumes the next JSON number per RFC 7159, section 6 +// but is limited to the grammar for a positive integer. +// It returns 0 if it is invalid or more complicated than a simple integer, +// in which case consumeNumber should be called. +func ConsumeSimpleNumber(b []byte) (n int) { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + if len(b) > 0 { + if b[0] == '0' { + n++ + } else if '1' <= b[0] && b[0] <= '9' { + n++ + for len(b) > n && ('0' <= b[n] && b[n] <= '9') { + n++ + } + } else { + return 0 + } + if uint(len(b)) <= uint(n) || (b[n] != '.' && b[n] != 'e' && b[n] != 'E') { + return n + } + } + return 0 +} + +type ConsumeNumberState uint + +const ( + consumeNumberInit ConsumeNumberState = iota + beforeIntegerDigits + withinIntegerDigits + beforeFractionalDigits + withinFractionalDigits + beforeExponentDigits + withinExponentDigits +) + +// ConsumeNumber consumes the next JSON number per RFC 7159, section 6. +// It reports the number of bytes consumed and whether an error was encountered. +// If the input appears truncated, it returns io.ErrUnexpectedEOF. +// +// Note that JSON numbers are not self-terminating. +// If the entire input is consumed, then the caller needs to consider whether +// there may be subsequent unread data that may still be part of this number. +func ConsumeNumber(b []byte) (n int, err error) { + n, _, err = ConsumeNumberResumable(b, 0, consumeNumberInit) + return n, err +} + +// ConsumeNumberResumable is identical to consumeNumber but supports resuming +// from a previous call that returned io.ErrUnexpectedEOF. +func ConsumeNumberResumable(b []byte, resumeOffset int, state ConsumeNumberState) (n int, _ ConsumeNumberState, err error) { + // Jump to the right state when resuming from a partial consumption. + n = resumeOffset + if state > consumeNumberInit { + switch state { + case withinIntegerDigits, withinFractionalDigits, withinExponentDigits: + // Consume leading digits. + for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { + n++ + } + if uint(len(b)) <= uint(n) { + return n, state, nil // still within the same state + } + state++ // switches "withinX" to "beforeY" where Y is the state after X + } + switch state { + case beforeIntegerDigits: + goto beforeInteger + case beforeFractionalDigits: + goto beforeFractional + case beforeExponentDigits: + goto beforeExponent + default: + return n, state, nil + } + } + + // Consume required integer component (with optional minus sign). +beforeInteger: + resumeOffset = n + if uint(len(b)) > 0 && b[0] == '-' { + n++ + } + switch { + case uint(len(b)) <= uint(n): + return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF + case b[n] == '0': + n++ + state = beforeFractionalDigits + case '1' <= b[n] && b[n] <= '9': + n++ + for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { + n++ + } + state = withinIntegerDigits + default: + return n, state, NewInvalidCharacterError(b[n:], "within number (expecting digit)") + } + + // Consume optional fractional component. +beforeFractional: + if uint(len(b)) > uint(n) && b[n] == '.' { + resumeOffset = n + n++ + switch { + case uint(len(b)) <= uint(n): + return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF + case '0' <= b[n] && b[n] <= '9': + n++ + default: + return n, state, NewInvalidCharacterError(b[n:], "within number (expecting digit)") + } + for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { + n++ + } + state = withinFractionalDigits + } + + // Consume optional exponent component. +beforeExponent: + if uint(len(b)) > uint(n) && (b[n] == 'e' || b[n] == 'E') { + resumeOffset = n + n++ + if uint(len(b)) > uint(n) && (b[n] == '-' || b[n] == '+') { + n++ + } + switch { + case uint(len(b)) <= uint(n): + return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF + case '0' <= b[n] && b[n] <= '9': + n++ + default: + return n, state, NewInvalidCharacterError(b[n:], "within number (expecting digit)") + } + for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { + n++ + } + state = withinExponentDigits + } + + return n, state, nil +} + +// parseHexUint16 is similar to strconv.ParseUint, +// but operates directly on []byte and is optimized for base-16. +// See https://go.dev/issue/42429. +func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) { + if len(b) != 4 { + return 0, false + } + for i := 0; i < 4; i++ { + c := b[i] + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = 10 + c - 'a' + case 'A' <= c && c <= 'F': + c = 10 + c - 'A' + default: + return 0, false + } + v = v*16 + uint16(c) + } + return v, true +} + +// ParseUint parses b as a decimal unsigned integer according to +// a strict subset of the JSON number grammar, returning the value if valid. +// It returns (0, false) if there is a syntax error and +// returns (math.MaxUint64, false) if there is an overflow. +func ParseUint(b []byte) (v uint64, ok bool) { + const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64))) + var n int + for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ { + v = 10*v + uint64(b[n]-'0') + } + switch { + case n == 0 || len(b) != n || (b[0] == '0' && string(b) != "0"): + return 0, false + case n >= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth): + return math.MaxUint64, false + } + return v, true +} + +// ParseFloat parses a floating point number according to the Go float grammar. +// Note that the JSON number grammar is a strict subset. +// +// If the number overflows the finite representation of a float, +// then we return MaxFloat since any finite value will always be infinitely +// more accurate at representing another finite value than an infinite value. +func ParseFloat(b []byte, bits int) (v float64, ok bool) { + // Fast path for exact integer numbers which fit in the + // 24-bit or 53-bit significand of a float32 or float64. + var negLen int // either 0 or 1 + if len(b) > 0 && b[0] == '-' { + negLen = 1 + } + u, ok := ParseUint(b[negLen:]) + if ok && ((bits == 32 && u <= 1<<24) || (bits == 64 && u <= 1<<53)) { + return math.Copysign(float64(u), float64(-1*negLen)), true + } + + // Note that the []byte->string conversion unfortunately allocates. + // See https://go.dev/issue/42429 for more information. + fv, err := strconv.ParseFloat(string(b), bits) + if math.IsInf(fv, 0) { + switch { + case bits == 32 && math.IsInf(fv, +1): + return +math.MaxFloat32, true + case bits == 64 && math.IsInf(fv, +1): + return +math.MaxFloat64, true + case bits == 32 && math.IsInf(fv, -1): + return -math.MaxFloat32, true + case bits == 64 && math.IsInf(fv, -1): + return -math.MaxFloat64, true + } + } + return fv, err == nil +} diff --git a/internal/jsonwire/decode_test.go b/internal/jsonwire/decode_test.go new file mode 100644 index 0000000..69fe220 --- /dev/null +++ b/internal/jsonwire/decode_test.go @@ -0,0 +1,441 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonwire + +import ( + "errors" + "io" + "math" + "reflect" + "strings" + "testing" +) + +func TestConsumeWhitespace(t *testing.T) { + tests := []struct { + in string + want int + }{ + {"", 0}, + {"a", 0}, + {" a", 1}, + {" a ", 1}, + {" \n\r\ta", 4}, + {" \n\r\t \n\r\t \n\r\t \n\r\t", 16}, + {"\u00a0", 0}, // non-breaking space is not JSON whitespace + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + if got := ConsumeWhitespace([]byte(tt.in)); got != tt.want { + t.Errorf("ConsumeWhitespace(%q) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestConsumeLiteral(t *testing.T) { + tests := []struct { + literal string + in string + want int + wantErr error + }{ + {"null", "", 0, io.ErrUnexpectedEOF}, + {"null", "n", 1, io.ErrUnexpectedEOF}, + {"null", "nu", 2, io.ErrUnexpectedEOF}, + {"null", "nul", 3, io.ErrUnexpectedEOF}, + {"null", "null", 4, nil}, + {"null", "nullx", 4, nil}, + {"null", "x", 0, NewInvalidCharacterError("x", "within literal null (expecting 'n')")}, + {"null", "nuxx", 2, NewInvalidCharacterError("x", "within literal null (expecting 'l')")}, + + {"false", "", 0, io.ErrUnexpectedEOF}, + {"false", "f", 1, io.ErrUnexpectedEOF}, + {"false", "fa", 2, io.ErrUnexpectedEOF}, + {"false", "fal", 3, io.ErrUnexpectedEOF}, + {"false", "fals", 4, io.ErrUnexpectedEOF}, + {"false", "false", 5, nil}, + {"false", "falsex", 5, nil}, + {"false", "x", 0, NewInvalidCharacterError("x", "within literal false (expecting 'f')")}, + {"false", "falsx", 4, NewInvalidCharacterError("x", "within literal false (expecting 'e')")}, + + {"true", "", 0, io.ErrUnexpectedEOF}, + {"true", "t", 1, io.ErrUnexpectedEOF}, + {"true", "tr", 2, io.ErrUnexpectedEOF}, + {"true", "tru", 3, io.ErrUnexpectedEOF}, + {"true", "true", 4, nil}, + {"true", "truex", 4, nil}, + {"true", "x", 0, NewInvalidCharacterError("x", "within literal true (expecting 't')")}, + {"true", "trux", 3, NewInvalidCharacterError("x", "within literal true (expecting 'e')")}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + var got int + switch tt.literal { + case "null": + got = ConsumeNull([]byte(tt.in)) + case "false": + got = ConsumeFalse([]byte(tt.in)) + case "true": + got = ConsumeTrue([]byte(tt.in)) + default: + t.Errorf("invalid literal: %v", tt.literal) + } + switch { + case tt.wantErr == nil && got != tt.want: + t.Errorf("Consume%v(%q) = %v, want %v", strings.Title(tt.literal), tt.in, got, tt.want) + case tt.wantErr != nil && got != 0: + t.Errorf("Consume%v(%q) = %v, want %v", strings.Title(tt.literal), tt.in, got, 0) + } + + got, gotErr := ConsumeLiteral([]byte(tt.in), tt.literal) + if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) { + t.Errorf("ConsumeLiteral(%q, %q) = (%v, %v), want (%v, %v)", tt.in, tt.literal, got, gotErr, tt.want, tt.wantErr) + } + }) + } +} + +func TestConsumeString(t *testing.T) { + var errPrev = errors.New("same as previous error") + tests := []struct { + in string + simple bool + want int + wantUTF8 int // consumed bytes if validateUTF8 is specified + wantFlags ValueFlags + wantUnquote string + wantErr error + wantErrUTF8 error // error if validateUTF8 is specified + wantErrUnquote error + }{ + {``, false, 0, 0, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"`, false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`""`, true, 2, 2, 0, "", nil, nil, nil}, + {`""x`, true, 2, 2, 0, "", nil, nil, NewInvalidCharacterError("x", "after string value")}, + {` ""x`, false, 0, 0, 0, "", NewInvalidCharacterError(" ", "at start of string (expecting '\"')"), errPrev, errPrev}, + {`"hello`, false, 6, 6, 0, "hello", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"hello"`, true, 7, 7, 0, "hello", nil, nil, nil}, + {"\"\x00\"", false, 1, 1, stringNonVerbatim | stringNonCanonical, "", NewInvalidCharacterError("\x00", "within string (expecting non-control character)"), errPrev, errPrev}, + {`"\u0000"`, false, 8, 8, stringNonVerbatim, "\x00", nil, nil, nil}, + {"\"\x1f\"", false, 1, 1, stringNonVerbatim | stringNonCanonical, "", NewInvalidCharacterError("\x1f", "within string (expecting non-control character)"), errPrev, errPrev}, + {`"\u001f"`, false, 8, 8, stringNonVerbatim, "\x1f", nil, nil, nil}, + {`"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`, true, 54, 54, 0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", nil, nil, nil}, + {"\" !#$%'()*+,-./0123456789:;=?@[]^_`{|}~\x7f\"", true, 41, 41, 0, " !#$%'()*+,-./0123456789:;=?@[]^_`{|}~\x7f", nil, nil, nil}, + {`"&"`, false, 3, 3, 0, "&", nil, nil, nil}, + {`"<"`, false, 3, 3, 0, "<", nil, nil, nil}, + {`">"`, false, 3, 3, 0, ">", nil, nil, nil}, + {"\"x\x80\"", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", nil, ErrInvalidUTF8, errPrev}, + {"\"x\xff\"", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", nil, ErrInvalidUTF8, errPrev}, + {"\"x\xc0", false, 3, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF}, + {"\"x\xc0\x80\"", false, 5, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev}, + {"\"x\xe0", false, 2, 2, 0, "x", io.ErrUnexpectedEOF, errPrev, errPrev}, + {"\"x\xe0\x80", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF}, + {"\"x\xe0\x80\x80\"", false, 6, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev}, + {"\"x\xf0", false, 2, 2, 0, "x", io.ErrUnexpectedEOF, errPrev, errPrev}, + {"\"x\xf0\x80", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF}, + {"\"x\xf0\x80\x80", false, 5, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", io.ErrUnexpectedEOF, ErrInvalidUTF8, io.ErrUnexpectedEOF}, + {"\"x\xf0\x80\x80\x80\"", false, 7, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev}, + {"\"x\xed\xba\xad\"", false, 6, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", nil, ErrInvalidUTF8, errPrev}, + {"\"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602\"", false, 25, 25, 0, "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, nil, nil}, + {`"¢"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"¢"`[:3], false, 3, 3, 0, "¢", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote + {`"¢"`[:4], false, 4, 4, 0, "¢", nil, nil, nil}, + {`"€"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"€"`[:3], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"€"`[:4], false, 4, 4, 0, "€", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote + {`"€"`[:5], false, 5, 5, 0, "€", nil, nil, nil}, + {`"𐍈"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"𐍈"`[:3], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"𐍈"`[:4], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"𐍈"`[:5], false, 5, 5, 0, "𐍈", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote + {`"𐍈"`[:6], false, 6, 6, 0, "𐍈", nil, nil, nil}, + {`"x\`, false, 2, 2, stringNonVerbatim, "x", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"x\"`, false, 4, 4, stringNonVerbatim, "x\"", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"x\x"`, false, 2, 2, stringNonVerbatim | stringNonCanonical, "x", NewInvalidEscapeSequenceError(`\x`), errPrev, errPrev}, + {`"\"\\\b\f\n\r\t"`, false, 16, 16, stringNonVerbatim, "\"\\\b\f\n\r\t", nil, nil, nil}, + {`"/"`, true, 3, 3, 0, "/", nil, nil, nil}, + {`"\/"`, false, 4, 4, stringNonVerbatim | stringNonCanonical, "/", nil, nil, nil}, + {`"\u002f"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "/", nil, nil, nil}, + {`"\u`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"\uf`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"\uff`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"\ufff`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"\ufffd`, false, 7, 7, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"\ufffd"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "\ufffd", nil, nil, nil}, + {`"\uABCD"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "\uabcd", nil, nil, nil}, + {`"\uefX0"`, false, 1, 1, stringNonVerbatim | stringNonCanonical, "", NewInvalidEscapeSequenceError(`\uefX0`), errPrev, errPrev}, + {`"\uDEAD`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"\uDEAD"`, false, 8, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", nil, NewInvalidEscapeSequenceError(`\uDEAD"`), errPrev}, + {`"\uDEAD______"`, false, 14, 1, stringNonVerbatim | stringNonCanonical, "\ufffd______", nil, NewInvalidEscapeSequenceError(`\uDEAD______`), errPrev}, + {`"\uDEAD\uXXXX"`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", NewInvalidEscapeSequenceError(`\uXXXX`), NewInvalidEscapeSequenceError(`\uDEAD\uXXXX`), NewInvalidEscapeSequenceError(`\uXXXX`)}, + {`"\uDEAD\uBEEF"`, false, 14, 1, stringNonVerbatim | stringNonCanonical, "\ufffd\ubeef", nil, NewInvalidEscapeSequenceError(`\uDEAD\uBEEF`), errPrev}, + {`"\uD800\udea`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev}, + {`"\uD800\udb`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, NewInvalidEscapeSequenceError(`\uD800\udb`), io.ErrUnexpectedEOF}, + {`"\uD800\udead"`, false, 14, 14, stringNonVerbatim | stringNonCanonical, "\U000102ad", nil, nil, nil}, + {`"\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009"`, false, 50, 50, stringNonVerbatim | stringNonCanonical, "\"\\/\b\f\n\r\t", nil, nil, nil}, + {`"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\ud83d\ude02"`, false, 56, 56, stringNonVerbatim | stringNonCanonical, "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, nil, nil}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + if tt.wantErrUTF8 == errPrev { + tt.wantErrUTF8 = tt.wantErr + } + if tt.wantErrUnquote == errPrev { + tt.wantErrUnquote = tt.wantErrUTF8 + } + + switch got := ConsumeSimpleString([]byte(tt.in)); { + case tt.simple && got != tt.want: + t.Errorf("consumeSimpleString(%q) = %v, want %v", tt.in, got, tt.want) + case !tt.simple && got != 0: + t.Errorf("consumeSimpleString(%q) = %v, want %v", tt.in, got, 0) + } + + var gotFlags ValueFlags + got, gotErr := ConsumeString(&gotFlags, []byte(tt.in), false) + if gotFlags != tt.wantFlags { + t.Errorf("consumeString(%q, false) flags = %v, want %v", tt.in, gotFlags, tt.wantFlags) + } + if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) { + t.Errorf("consumeString(%q, false) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) + } + + got, gotErr = ConsumeString(&gotFlags, []byte(tt.in), true) + if got != tt.wantUTF8 || !reflect.DeepEqual(gotErr, tt.wantErrUTF8) { + t.Errorf("consumeString(%q, false) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.wantUTF8, tt.wantErrUTF8) + } + + gotUnquote, gotErr := AppendUnquote(nil, tt.in) + if string(gotUnquote) != tt.wantUnquote || !reflect.DeepEqual(gotErr, tt.wantErrUnquote) { + t.Errorf("AppendUnquote(nil, %q) = (%q, %v), want (%q, %v)", tt.in[:got], gotUnquote, gotErr, tt.wantUnquote, tt.wantErrUnquote) + } + }) + } +} + +func TestConsumeNumber(t *testing.T) { + tests := []struct { + in string + simple bool + want int + wantErr error + }{ + {"", false, 0, io.ErrUnexpectedEOF}, + {`"NaN"`, false, 0, NewInvalidCharacterError("\"", "within number (expecting digit)")}, + {`"Infinity"`, false, 0, NewInvalidCharacterError("\"", "within number (expecting digit)")}, + {`"-Infinity"`, false, 0, NewInvalidCharacterError("\"", "within number (expecting digit)")}, + {".0", false, 0, NewInvalidCharacterError(".", "within number (expecting digit)")}, + {"0", true, 1, nil}, + {"-0", false, 2, nil}, + {"+0", false, 0, NewInvalidCharacterError("+", "within number (expecting digit)")}, + {"1", true, 1, nil}, + {"-1", false, 2, nil}, + {"00", true, 1, nil}, + {"-00", false, 2, nil}, + {"01", true, 1, nil}, + {"-01", false, 2, nil}, + {"0i", true, 1, nil}, + {"-0i", false, 2, nil}, + {"0f", true, 1, nil}, + {"-0f", false, 2, nil}, + {"9876543210", true, 10, nil}, + {"-9876543210", false, 11, nil}, + {"9876543210x", true, 10, nil}, + {"-9876543210x", false, 11, nil}, + {" 9876543210", true, 0, NewInvalidCharacterError(" ", "within number (expecting digit)")}, + {"- 9876543210", false, 1, NewInvalidCharacterError(" ", "within number (expecting digit)")}, + {strings.Repeat("9876543210", 1000), true, 10000, nil}, + {"-" + strings.Repeat("9876543210", 1000), false, 1 + 10000, nil}, + {"0.", false, 1, io.ErrUnexpectedEOF}, + {"-0.", false, 2, io.ErrUnexpectedEOF}, + {"0e", false, 1, io.ErrUnexpectedEOF}, + {"-0e", false, 2, io.ErrUnexpectedEOF}, + {"0E", false, 1, io.ErrUnexpectedEOF}, + {"-0E", false, 2, io.ErrUnexpectedEOF}, + {"0.0", false, 3, nil}, + {"-0.0", false, 4, nil}, + {"0e0", false, 3, nil}, + {"-0e0", false, 4, nil}, + {"0E0", false, 3, nil}, + {"-0E0", false, 4, nil}, + {"0.0123456789", false, 12, nil}, + {"-0.0123456789", false, 13, nil}, + {"1.f", false, 2, NewInvalidCharacterError("f", "within number (expecting digit)")}, + {"-1.f", false, 3, NewInvalidCharacterError("f", "within number (expecting digit)")}, + {"1.e", false, 2, NewInvalidCharacterError("e", "within number (expecting digit)")}, + {"-1.e", false, 3, NewInvalidCharacterError("e", "within number (expecting digit)")}, + {"1e0", false, 3, nil}, + {"-1e0", false, 4, nil}, + {"1E0", false, 3, nil}, + {"-1E0", false, 4, nil}, + {"1Ex", false, 2, NewInvalidCharacterError("x", "within number (expecting digit)")}, + {"-1Ex", false, 3, NewInvalidCharacterError("x", "within number (expecting digit)")}, + {"1e-0", false, 4, nil}, + {"-1e-0", false, 5, nil}, + {"1e+0", false, 4, nil}, + {"-1e+0", false, 5, nil}, + {"1E-0", false, 4, nil}, + {"-1E-0", false, 5, nil}, + {"1E+0", false, 4, nil}, + {"-1E+0", false, 5, nil}, + {"1E+00500", false, 8, nil}, + {"-1E+00500", false, 9, nil}, + {"1E+00500x", false, 8, nil}, + {"-1E+00500x", false, 9, nil}, + {"9876543210.0123456789e+01234589x", false, 31, nil}, + {"-9876543210.0123456789e+01234589x", false, 32, nil}, + {"1_000_000", true, 1, nil}, + {"0x12ef", true, 1, nil}, + {"0x1p-2", true, 1, nil}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + switch got := ConsumeSimpleNumber([]byte(tt.in)); { + case tt.simple && got != tt.want: + t.Errorf("ConsumeSimpleNumber(%q) = %v, want %v", tt.in, got, tt.want) + case !tt.simple && got != 0: + t.Errorf("ConsumeSimpleNumber(%q) = %v, want %v", tt.in, got, 0) + } + + got, gotErr := ConsumeNumber([]byte(tt.in)) + if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) { + t.Errorf("ConsumeNumber(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) + } + }) + } +} + +func TestParseHexUint16(t *testing.T) { + tests := []struct { + in string + want uint16 + wantOk bool + }{ + {"", 0, false}, + {"a", 0, false}, + {"ab", 0, false}, + {"abc", 0, false}, + {"abcd", 0xabcd, true}, + {"abcde", 0, false}, + {"9eA1", 0x9ea1, true}, + {"gggg", 0, false}, + {"0000", 0x0000, true}, + {"1234", 0x1234, true}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + got, gotOk := parseHexUint16([]byte(tt.in)) + if got != tt.want || gotOk != tt.wantOk { + t.Errorf("parseHexUint16(%q) = (0x%04x, %v), want (0x%04x, %v)", tt.in, got, gotOk, tt.want, tt.wantOk) + } + }) + } +} + +func TestParseUint(t *testing.T) { + tests := []struct { + in string + want uint64 + wantOk bool + }{ + {"", 0, false}, + {"0", 0, true}, + {"1", 1, true}, + {"-1", 0, false}, + {"1f", 0, false}, + {"00", 0, false}, + {"01", 0, false}, + {"10", 10, true}, + {"10.9", 0, false}, + {" 10", 0, false}, + {"10 ", 0, false}, + {"123456789", 123456789, true}, + {"123456789d", 0, false}, + {"18446744073709551614", math.MaxUint64 - 1, true}, + {"18446744073709551615", math.MaxUint64, true}, + {"18446744073709551616", math.MaxUint64, false}, + {"18446744073709551620", math.MaxUint64, false}, + {"18446744073709551700", math.MaxUint64, false}, + {"18446744073709552000", math.MaxUint64, false}, + {"18446744073709560000", math.MaxUint64, false}, + {"18446744073709600000", math.MaxUint64, false}, + {"18446744073710000000", math.MaxUint64, false}, + {"18446744073800000000", math.MaxUint64, false}, + {"18446744074000000000", math.MaxUint64, false}, + {"18446744080000000000", math.MaxUint64, false}, + {"18446744100000000000", math.MaxUint64, false}, + {"18446745000000000000", math.MaxUint64, false}, + {"18446750000000000000", math.MaxUint64, false}, + {"18446800000000000000", math.MaxUint64, false}, + {"18447000000000000000", math.MaxUint64, false}, + {"18450000000000000000", math.MaxUint64, false}, + {"18500000000000000000", math.MaxUint64, false}, + {"19000000000000000000", math.MaxUint64, false}, + {"19999999999999999999", math.MaxUint64, false}, + {"20000000000000000000", math.MaxUint64, false}, + {"100000000000000000000", math.MaxUint64, false}, + {"99999999999999999999999999999999", math.MaxUint64, false}, + {"99999999999999999999999999999999f", 0, false}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + got, gotOk := ParseUint([]byte(tt.in)) + if got != tt.want || gotOk != tt.wantOk { + t.Errorf("ParseUint(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotOk, tt.want, tt.wantOk) + } + }) + } +} + +func TestParseFloat(t *testing.T) { + tests := []struct { + in string + want32 float64 + want64 float64 + wantOk bool + }{ + {"0", 0, 0, true}, + {"-1", -1, -1, true}, + {"1", 1, 1, true}, + + {"-16777215", -16777215, -16777215, true}, // -(1<<24 - 1) + {"16777215", 16777215, 16777215, true}, // +(1<<24 - 1) + {"-16777216", -16777216, -16777216, true}, // -(1<<24) + {"16777216", 16777216, 16777216, true}, // +(1<<24) + {"-16777217", -16777216, -16777217, true}, // -(1<<24 + 1) + {"16777217", 16777216, 16777217, true}, // +(1<<24 + 1) + + {"-9007199254740991", -9007199254740992, -9007199254740991, true}, // -(1<<53 - 1) + {"9007199254740991", 9007199254740992, 9007199254740991, true}, // +(1<<53 - 1) + {"-9007199254740992", -9007199254740992, -9007199254740992, true}, // -(1<<53) + {"9007199254740992", 9007199254740992, 9007199254740992, true}, // +(1<<53) + {"-9007199254740993", -9007199254740992, -9007199254740992, true}, // -(1<<53 + 1) + {"9007199254740993", 9007199254740992, 9007199254740992, true}, // +(1<<53 + 1) + + {"-1e1000", -math.MaxFloat32, -math.MaxFloat64, true}, + {"1e1000", +math.MaxFloat32, +math.MaxFloat64, true}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + got32, gotOk32 := ParseFloat([]byte(tt.in), 32) + if got32 != tt.want32 || gotOk32 != tt.wantOk { + t.Errorf("ParseFloat(%q, 32) = (%v, %v), want (%v, %v)", tt.in, got32, gotOk32, tt.want32, tt.wantOk) + } + + got64, gotOk64 := ParseFloat([]byte(tt.in), 64) + if got64 != tt.want64 || gotOk64 != tt.wantOk { + t.Errorf("ParseFloat(%q, 64) = (%v, %v), want (%v, %v)", tt.in, got64, gotOk64, tt.want64, tt.wantOk) + } + }) + } +} diff --git a/internal/jsonwire/encode.go b/internal/jsonwire/encode.go new file mode 100644 index 0000000..18f19eb --- /dev/null +++ b/internal/jsonwire/encode.go @@ -0,0 +1,228 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonwire + +import ( + "math" + "slices" + "strconv" + "unicode/utf16" + "unicode/utf8" +) + +// ApepndQuote appends src to dst as a JSON string per RFC 7159, section 7. +// +// If validateUTF8 is specified, this rejects input that contains invalid UTF-8 +// otherwise invalid bytes are replaced with the Unicode replacement character. +// If escapeRune is provided, it specifies which runes to escape using +// hexadecimal sequences. If nil, the shortest representable form is used, +// which is also the canonical form for strings (RFC 8785, section 3.2.2.2). +// +// Note that this API allows full control over the formatting of strings +// except for whether a forward solidus '/' may be formatted as '\/' and +// the casing of hexadecimal Unicode escape sequences. +func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, validateUTF8 bool, escape *EscapeRunes) ([]byte, error) { + var i, n int + var hasInvalidUTF8 bool + dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`)) + dst = append(dst, '"') + if escape == nil || escape.IsCanonical() { + // Optimize for canonical formatting. + for uint(len(src)) > uint(n) { + // Handle single-byte ASCII. + if c := src[n]; c < utf8.RuneSelf { + n++ + if escapeCanonical.needEscapeASCII(c) { + dst = append(dst, src[i:n-1]...) + dst = appendEscapedASCII(dst, c) + i = n + } + continue + } + + // Handle multi-byte Unicode. + _, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))) + n += rn + if rn == 1 { // must be utf8.RuneError since we already checked for single-byte ASCII + hasInvalidUTF8 = true + dst = append(dst, src[i:n-rn]...) + dst = append(dst, "\ufffd"...) + i = n + } + } + } else { + // Handle arbitrary escaping. + for uint(len(src)) > uint(n) { + // Handle single-byte ASCII. + if c := src[n]; c < utf8.RuneSelf { + n++ + if escape.needEscapeASCII(c) { + dst = append(dst, src[i:n-1]...) + if escape.needEscapeASCIIAsUTF16(c) { + dst = appendEscapedUTF16(dst, uint16(c)) + } else { + dst = appendEscapedASCII(dst, c) + } + i = n + } + continue + } + + // Handle multi-byte Unicode. + switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); { + case r == utf8.RuneError && rn == 1: + hasInvalidUTF8 = true + dst = append(dst, src[i:n]...) + if escape.needEscapeRune(r) { + dst = append(dst, `\ufffd`...) + } else { + dst = append(dst, "\ufffd"...) + } + n += rn + i = n + case escape.needEscapeRune(r): + dst = append(dst, src[i:n]...) + dst = appendEscapedUnicode(dst, r) + n += rn + i = n + default: + n += rn + } + } + } + dst = append(dst, src[i:n]...) + dst = append(dst, '"') + if validateUTF8 && hasInvalidUTF8 { + return dst, ErrInvalidUTF8 + } + return dst, nil +} + +func appendEscapedASCII(dst []byte, c byte) []byte { + switch c { + case '"', '\\': + dst = append(dst, '\\', c) + case '\b': + dst = append(dst, "\\b"...) + case '\f': + dst = append(dst, "\\f"...) + case '\n': + dst = append(dst, "\\n"...) + case '\r': + dst = append(dst, "\\r"...) + case '\t': + dst = append(dst, "\\t"...) + default: + dst = appendEscapedUTF16(dst, uint16(c)) + } + return dst +} + +func appendEscapedUnicode(dst []byte, r rune) []byte { + if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' { + dst = appendEscapedUTF16(dst, uint16(r1)) + dst = appendEscapedUTF16(dst, uint16(r2)) + } else { + dst = appendEscapedUTF16(dst, uint16(r)) + } + return dst +} + +func appendEscapedUTF16(dst []byte, x uint16) []byte { + const hex = "0123456789abcdef" + return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf]) +} + +// ReformatString consumes a JSON string from src and appends it to dst, +// reformatting it if necessary for the given escapeRune parameter. +// It returns the appended output and the number of consumed input bytes. +func ReformatString(dst, src []byte, validateUTF8, preserveRaw bool, escape *EscapeRunes) ([]byte, int, error) { + // TODO: Should this update ValueFlags as input? + var flags ValueFlags + n, err := ConsumeString(&flags, src, validateUTF8) + if err != nil { + return dst, n, err + } + if preserveRaw || (escape.IsCanonical() && flags.IsCanonical()) { + dst = append(dst, src[:n]...) // copy the string verbatim + return dst, n, nil + } + + // TODO: Implement a direct, raw-to-raw reformat for strings. + // If the escapeRune option would have resulted in no changes to the output, + // it would be faster to simply append src to dst without going through + // an intermediary representation in a separate buffer. + b, _ := AppendUnquote(nil, src[:n]) + dst, _ = AppendQuote(dst, string(b), validateUTF8, escape) + return dst, n, nil +} + +// AppendFloat appends src to dst as a JSON number per RFC 7159, section 6. +// It formats numbers similar to the ES6 number-to-string conversion. +// See https://go.dev/issue/14135. +// +// The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with +// RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0, +// which is formatted as -0 instead of just 0. +// +// For 32-bit floating-point numbers, +// the output is a 32-bit equivalent of the algorithm. +// Note that ECMA-262 specifies no algorithm for 32-bit numbers. +func AppendFloat(dst []byte, src float64, bits int) []byte { + if bits == 32 { + src = float64(float32(src)) + } + + abs := math.Abs(src) + fmt := byte('f') + if abs != 0 { + if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) || + bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { + fmt = 'e' + } + } + dst = strconv.AppendFloat(dst, src, fmt, -1, bits) + if fmt == 'e' { + // Clean up e-09 to e-9. + n := len(dst) + if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' { + dst[n-2] = dst[n-1] + dst = dst[:n-1] + } + } + return dst +} + +// ReformatNumber consumes a JSON string from src and appends it to dst, +// canonicalizing it if specified. +// It returns the appended output and the number of consumed input bytes. +func ReformatNumber(dst, src []byte, canonicalize bool) ([]byte, int, error) { + n, err := ConsumeNumber(src) + if err != nil { + return dst, n, err + } + if !canonicalize { + dst = append(dst, src[:n]...) // copy the number verbatim + return dst, n, nil + } + + // Canonicalize the number per RFC 8785, section 3.2.2.3. + // As an optimization, we can copy integer numbers below 2⁵³ verbatim. + const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10)) + if n < maxExactIntegerDigits && ConsumeSimpleNumber(src[:n]) == n { + dst = append(dst, src[:n]...) // copy the number verbatim + return dst, n, nil + } + fv, _ := strconv.ParseFloat(string(src[:n]), 64) + switch { + case fv == 0: + fv = 0 // normalize negative zero as just zero + case math.IsInf(fv, +1): + fv = +math.MaxFloat64 + case math.IsInf(fv, -1): + fv = -math.MaxFloat64 + } + return AppendFloat(dst, fv, 64), n, nil +} diff --git a/internal/jsonwire/encode_test.go b/internal/jsonwire/encode_test.go new file mode 100644 index 0000000..09d497b --- /dev/null +++ b/internal/jsonwire/encode_test.go @@ -0,0 +1,336 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonwire + +import ( + "bufio" + "bytes" + "compress/gzip" + "crypto/sha256" + "encoding/binary" + "encoding/hex" + "flag" + "math" + "net/http" + "reflect" + "strconv" + "strings" + "testing" + "time" + "unicode" +) + +func TestAppendQuote(t *testing.T) { + var ( + escapeNothing = MakeEscapeRunes(false, false, nil) + escapeHTML = MakeEscapeRunes(true, true, nil) + escapeNonASCII = MakeEscapeRunes(false, false, func(r rune) bool { return r > unicode.MaxASCII }) + escapeEverything = MakeEscapeRunes(false, false, func(r rune) bool { return true }) + ) + + tests := []struct { + in string + escapeRune *EscapeRunes + want string + wantErr error + wantErrUTF8 error + }{ + {"", nil, `""`, nil, nil}, + {"hello", nil, `"hello"`, nil, nil}, + {"\x00", nil, `"\u0000"`, nil, nil}, + {"\x1f", nil, `"\u001f"`, nil, nil}, + {"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", nil, `"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`, nil, nil}, + {" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", nil, "\" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f\"", nil, nil}, + {"x\x80\ufffd", nil, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xff\ufffd", nil, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\x80\ufffd", escapeNonASCII, "\"x\\ufffd\\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xff\ufffd", escapeNonASCII, "\"x\\ufffd\\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xc0", nil, "\"x\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xc0\x80", nil, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xe0", nil, "\"x\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xe0\x80", nil, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xe0\x80\x80", nil, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xf0", nil, "\"x\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xf0\x80", nil, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xf0\x80\x80", nil, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xf0\x80\x80\x80", nil, "\"x\ufffd\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xed\xba\xad", nil, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"\"\\/\b\f\n\r\t", nil, `"\"\\/\b\f\n\r\t"`, nil, nil}, + {"\"\\/\b\f\n\r\t", escapeEverything, `"\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009"`, nil, nil}, + {"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", nil, `"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃)."`, nil, nil}, + {"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", escapeNonASCII, `"\u0669(-\u032e\u032e\u0303-\u0303)\u06f6 \u0669(\u25cf\u032e\u032e\u0303\u2022\u0303)\u06f6 \u0669(\u0361\u0e4f\u032f\u0361\u0e4f)\u06f6 \u0669(-\u032e\u032e\u0303\u2022\u0303)."`, nil, nil}, + {"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", escapeEverything, `"\u0669\u0028\u002d\u032e\u032e\u0303\u002d\u0303\u0029\u06f6\u0020\u0669\u0028\u25cf\u032e\u032e\u0303\u2022\u0303\u0029\u06f6\u0020\u0669\u0028\u0361\u0e4f\u032f\u0361\u0e4f\u0029\u06f6\u0020\u0669\u0028\u002d\u032e\u032e\u0303\u2022\u0303\u0029\u002e"`, nil, nil}, + {"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, "\"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602\"", nil, nil}, + {"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", escapeEverything, `"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\ud83d\ude02"`, nil, nil}, + {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", nil, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\u0080\u2028\u2029\ufffd\U0001f602\"", nil, nil}, + {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeNothing, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\u0080\u2028\u2029\ufffd\U0001f602\"", nil, nil}, + {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeHTML, "\"\\u0000\\u001f\u0020\\\"\\u0026\\u003c\\u003e\\\\\u007f\u0080\\u2028\\u2029\ufffd\U0001f602\"", nil, nil}, + {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeNonASCII, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\\u0080\\u2028\\u2029\\ufffd\\ud83d\\ude02\"", nil, nil}, + {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeEverything, "\"\\u0000\\u001f\\u0020\\u0022\\u0026\\u003c\\u003e\\u005c\\u007f\\u0080\\u2028\\u2029\\ufffd\\ud83d\\ude02\"", nil, nil}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + got, gotErr := AppendQuote(nil, tt.in, false, tt.escapeRune) + if string(got) != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) { + t.Errorf("AppendQuote(nil, %q, false, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) + } + switch got, gotErr := AppendQuote(nil, tt.in, true, tt.escapeRune); { + case tt.wantErrUTF8 == nil && (string(got) != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr)): + t.Errorf("AppendQuote(nil, %q, true, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) + case tt.wantErrUTF8 != nil && (!strings.HasPrefix(tt.want, string(got)) || !reflect.DeepEqual(gotErr, tt.wantErrUTF8)): + t.Errorf("AppendQuote(nil, %q, true, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErrUTF8) + } + }) + } +} + +func TestAppendNumber(t *testing.T) { + tests := []struct { + in float64 + want32 string + want64 string + }{ + {math.E, "2.7182817", "2.718281828459045"}, + {math.Pi, "3.1415927", "3.141592653589793"}, + {math.SmallestNonzeroFloat32, "1e-45", "1.401298464324817e-45"}, + {math.SmallestNonzeroFloat64, "0", "5e-324"}, + {math.MaxFloat32, "3.4028235e+38", "3.4028234663852886e+38"}, + {math.MaxFloat64, "", "1.7976931348623157e+308"}, + {0.1111111111111111, "0.11111111", "0.1111111111111111"}, + {0.2222222222222222, "0.22222222", "0.2222222222222222"}, + {0.3333333333333333, "0.33333334", "0.3333333333333333"}, + {0.4444444444444444, "0.44444445", "0.4444444444444444"}, + {0.5555555555555555, "0.5555556", "0.5555555555555555"}, + {0.6666666666666666, "0.6666667", "0.6666666666666666"}, + {0.7777777777777777, "0.7777778", "0.7777777777777777"}, + {0.8888888888888888, "0.8888889", "0.8888888888888888"}, + {0.9999999999999999, "1", "0.9999999999999999"}, + + // The following entries are from RFC 8785, appendix B + // which are designed to ensure repeatable formatting of 64-bit floats. + {math.Float64frombits(0x0000000000000000), "0", "0"}, + {math.Float64frombits(0x8000000000000000), "-0", "-0"}, // differs from RFC 8785 + {math.Float64frombits(0x0000000000000001), "0", "5e-324"}, + {math.Float64frombits(0x8000000000000001), "-0", "-5e-324"}, + {math.Float64frombits(0x7fefffffffffffff), "", "1.7976931348623157e+308"}, + {math.Float64frombits(0xffefffffffffffff), "", "-1.7976931348623157e+308"}, + {math.Float64frombits(0x4340000000000000), "9007199000000000", "9007199254740992"}, + {math.Float64frombits(0xc340000000000000), "-9007199000000000", "-9007199254740992"}, + {math.Float64frombits(0x4430000000000000), "295147900000000000000", "295147905179352830000"}, + {math.Float64frombits(0x44b52d02c7e14af5), "1e+23", "9.999999999999997e+22"}, + {math.Float64frombits(0x44b52d02c7e14af6), "1e+23", "1e+23"}, + {math.Float64frombits(0x44b52d02c7e14af7), "1e+23", "1.0000000000000001e+23"}, + {math.Float64frombits(0x444b1ae4d6e2ef4e), "1e+21", "999999999999999700000"}, + {math.Float64frombits(0x444b1ae4d6e2ef4f), "1e+21", "999999999999999900000"}, + {math.Float64frombits(0x444b1ae4d6e2ef50), "1e+21", "1e+21"}, + {math.Float64frombits(0x3eb0c6f7a0b5ed8c), "0.000001", "9.999999999999997e-7"}, + {math.Float64frombits(0x3eb0c6f7a0b5ed8d), "0.000001", "0.000001"}, + {math.Float64frombits(0x41b3de4355555553), "333333340", "333333333.3333332"}, + {math.Float64frombits(0x41b3de4355555554), "333333340", "333333333.33333325"}, + {math.Float64frombits(0x41b3de4355555555), "333333340", "333333333.3333333"}, + {math.Float64frombits(0x41b3de4355555556), "333333340", "333333333.3333334"}, + {math.Float64frombits(0x41b3de4355555557), "333333340", "333333333.33333343"}, + {math.Float64frombits(0xbecbf647612f3696), "-0.0000033333333", "-0.0000033333333333333333"}, + {math.Float64frombits(0x43143ff3c1cb0959), "1424953900000000", "1424953923781206.2"}, + + // The following are select entries from RFC 8785, appendix B, + // but modified for equivalent 32-bit behavior. + {float64(math.Float32frombits(0x65a96815)), "9.999999e+22", "9.999998877476383e+22"}, + {float64(math.Float32frombits(0x65a96816)), "1e+23", "9.999999778196308e+22"}, + {float64(math.Float32frombits(0x65a96817)), "1.0000001e+23", "1.0000000678916234e+23"}, + {float64(math.Float32frombits(0x6258d725)), "999999900000000000000", "999999879303389000000"}, + {float64(math.Float32frombits(0x6258d726)), "999999950000000000000", "999999949672133200000"}, + {float64(math.Float32frombits(0x6258d727)), "1e+21", "1.0000000200408773e+21"}, + {float64(math.Float32frombits(0x6258d728)), "1.0000001e+21", "1.0000000904096215e+21"}, + {float64(math.Float32frombits(0x358637bc)), "9.999999e-7", "9.99999883788405e-7"}, + {float64(math.Float32frombits(0x358637bd)), "0.000001", "9.999999974752427e-7"}, + {float64(math.Float32frombits(0x358637be)), "0.0000010000001", "0.0000010000001111620804"}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + if got32 := string(AppendFloat(nil, tt.in, 32)); got32 != tt.want32 && tt.want32 != "" { + t.Errorf("AppendFloat(nil, %v, 32) = %v, want %v", tt.in, got32, tt.want32) + } + if got64 := string(AppendFloat(nil, tt.in, 64)); got64 != tt.want64 && tt.want64 != "" { + t.Errorf("AppendFloat(nil, %v, 64) = %v, want %v", tt.in, got64, tt.want64) + } + }) + } +} + +// The default of 1e4 lines was chosen since it is sufficiently large to include +// test numbers from all three categories (i.e., static, series, and random). +// Yet, it is sufficiently low to execute quickly relative to other tests. +// +// Processing 1e8 lines takes a minute and processes about 4GiB worth of text. +var testCanonicalNumberLines = flag.Float64("canonical-number-lines", 1e4, "specify the number of lines to check from the canonical numbers testdata") + +// TestCanonicalNumber verifies that appendNumber complies with RFC 8785 +// according to the testdata provided by the reference implementation. +// See https://github.com/cyberphone/json-canonicalization/tree/master/testdata#es6-numbers. +func TestCanonicalNumber(t *testing.T) { + const testfileURL = "https://github.com/cyberphone/json-canonicalization/releases/download/es6testfile/es6testfile100m.txt.gz" + hashes := map[float64]string{ + 1e3: "be18b62b6f69cdab33a7e0dae0d9cfa869fda80ddc712221570f9f40a5878687", + 1e4: "b9f7a8e75ef22a835685a52ccba7f7d6bdc99e34b010992cbc5864cd12be6892", + 1e5: "22776e6d4b49fa294a0d0f349268e5c28808fe7e0cb2bcbe28f63894e494d4c7", + 1e6: "49415fee2c56c77864931bd3624faad425c3c577d6d74e89a83bc725506dad16", + 1e7: "b9f8a44a91d46813b21b9602e72f112613c91408db0b8341fb94603d9db135e0", + 1e8: "0f7dda6b0837dde083c5d6b896f7d62340c8a2415b0c7121d83145e08a755272", + } + wantHash := hashes[*testCanonicalNumberLines] + if wantHash == "" { + t.Fatalf("canonical-number-lines must be one of the following values: 1e3, 1e4, 1e5, 1e6, 1e7, 1e8") + } + numLines := int(*testCanonicalNumberLines) + + // generator returns a function that generates the next float64 to format. + // This implements the algorithm specified in the reference implementation. + generator := func() func() float64 { + static := [...]uint64{ + 0x0000000000000000, 0x8000000000000000, 0x0000000000000001, 0x8000000000000001, + 0xc46696695dbd1cc3, 0xc43211ede4974a35, 0xc3fce97ca0f21056, 0xc3c7213080c1a6ac, + 0xc39280f39a348556, 0xc35d9b1f5d20d557, 0xc327af4c4a80aaac, 0xc2f2f2a36ecd5556, + 0xc2be51057e155558, 0xc28840d131aaaaac, 0xc253670dc1555557, 0xc21f0b4935555557, + 0xc1e8d5d42aaaaaac, 0xc1b3de4355555556, 0xc17fca0555555556, 0xc1496e6aaaaaaaab, + 0xc114585555555555, 0xc0e046aaaaaaaaab, 0xc0aa0aaaaaaaaaaa, 0xc074d55555555555, + 0xc040aaaaaaaaaaab, 0xc00aaaaaaaaaaaab, 0xbfd5555555555555, 0xbfa1111111111111, + 0xbf6b4e81b4e81b4f, 0xbf35d867c3ece2a5, 0xbf0179ec9cbd821e, 0xbecbf647612f3696, + 0xbe965e9f80f29212, 0xbe61e54c672874db, 0xbe2ca213d840baf8, 0xbdf6e80fe033c8c6, + 0xbdc2533fe68fd3d2, 0xbd8d51ffd74c861c, 0xbd5774ccac3d3817, 0xbd22c3d6f030f9ac, + 0xbcee0624b3818f79, 0xbcb804ea293472c7, 0xbc833721ba905bd3, 0xbc4ebe9c5db3c61e, + 0xbc18987d17c304e5, 0xbbe3ad30dfcf371d, 0xbbaf7b816618582f, 0xbb792f9ab81379bf, + 0xbb442615600f9499, 0xbb101e77800c76e1, 0xbad9ca58cce0be35, 0xbaa4a1e0a3e6fe90, + 0xba708180831f320d, 0xba3a68cd9e985016, 0x446696695dbd1cc3, 0x443211ede4974a35, + 0x43fce97ca0f21056, 0x43c7213080c1a6ac, 0x439280f39a348556, 0x435d9b1f5d20d557, + 0x4327af4c4a80aaac, 0x42f2f2a36ecd5556, 0x42be51057e155558, 0x428840d131aaaaac, + 0x4253670dc1555557, 0x421f0b4935555557, 0x41e8d5d42aaaaaac, 0x41b3de4355555556, + 0x417fca0555555556, 0x41496e6aaaaaaaab, 0x4114585555555555, 0x40e046aaaaaaaaab, + 0x40aa0aaaaaaaaaaa, 0x4074d55555555555, 0x4040aaaaaaaaaaab, 0x400aaaaaaaaaaaab, + 0x3fd5555555555555, 0x3fa1111111111111, 0x3f6b4e81b4e81b4f, 0x3f35d867c3ece2a5, + 0x3f0179ec9cbd821e, 0x3ecbf647612f3696, 0x3e965e9f80f29212, 0x3e61e54c672874db, + 0x3e2ca213d840baf8, 0x3df6e80fe033c8c6, 0x3dc2533fe68fd3d2, 0x3d8d51ffd74c861c, + 0x3d5774ccac3d3817, 0x3d22c3d6f030f9ac, 0x3cee0624b3818f79, 0x3cb804ea293472c7, + 0x3c833721ba905bd3, 0x3c4ebe9c5db3c61e, 0x3c18987d17c304e5, 0x3be3ad30dfcf371d, + 0x3baf7b816618582f, 0x3b792f9ab81379bf, 0x3b442615600f9499, 0x3b101e77800c76e1, + 0x3ad9ca58cce0be35, 0x3aa4a1e0a3e6fe90, 0x3a708180831f320d, 0x3a3a68cd9e985016, + 0x4024000000000000, 0x4014000000000000, 0x3fe0000000000000, 0x3fa999999999999a, + 0x3f747ae147ae147b, 0x3f40624dd2f1a9fc, 0x3f0a36e2eb1c432d, 0x3ed4f8b588e368f1, + 0x3ea0c6f7a0b5ed8d, 0x3e6ad7f29abcaf48, 0x3e35798ee2308c3a, 0x3ed539223589fa95, + 0x3ed4ff26cd5a7781, 0x3ed4f95a762283ff, 0x3ed4f8c60703520c, 0x3ed4f8b72f19cd0d, + 0x3ed4f8b5b31c0c8d, 0x3ed4f8b58d1c461a, 0x3ed4f8b5894f7f0e, 0x3ed4f8b588ee37f3, + 0x3ed4f8b588e47da4, 0x3ed4f8b588e3849c, 0x3ed4f8b588e36bb5, 0x3ed4f8b588e36937, + 0x3ed4f8b588e368f8, 0x3ed4f8b588e368f1, 0x3ff0000000000000, 0xbff0000000000000, + 0xbfeffffffffffffa, 0xbfeffffffffffffb, 0x3feffffffffffffa, 0x3feffffffffffffb, + 0x3feffffffffffffc, 0x3feffffffffffffe, 0xbfefffffffffffff, 0xbfefffffffffffff, + 0x3fefffffffffffff, 0x3fefffffffffffff, 0x3fd3333333333332, 0x3fd3333333333333, + 0x3fd3333333333334, 0x0010000000000000, 0x000ffffffffffffd, 0x000fffffffffffff, + 0x7fefffffffffffff, 0xffefffffffffffff, 0x4340000000000000, 0xc340000000000000, + 0x4430000000000000, 0x44b52d02c7e14af5, 0x44b52d02c7e14af6, 0x44b52d02c7e14af7, + 0x444b1ae4d6e2ef4e, 0x444b1ae4d6e2ef4f, 0x444b1ae4d6e2ef50, 0x3eb0c6f7a0b5ed8c, + 0x3eb0c6f7a0b5ed8d, 0x41b3de4355555553, 0x41b3de4355555554, 0x41b3de4355555555, + 0x41b3de4355555556, 0x41b3de4355555557, 0xbecbf647612f3696, 0x43143ff3c1cb0959, + } + var state struct { + idx int + data []byte + block [sha256.Size]byte + } + return func() float64 { + const numSerial = 2000 + var f float64 + switch { + case state.idx < len(static): + f = math.Float64frombits(static[state.idx]) + case state.idx < len(static)+numSerial: + f = math.Float64frombits(0x0010000000000000 + uint64(state.idx-len(static))) + default: + for f == 0 || math.IsNaN(f) || math.IsInf(f, 0) { + if len(state.data) == 0 { + state.block = sha256.Sum256(state.block[:]) + state.data = state.block[:] + } + f = math.Float64frombits(binary.LittleEndian.Uint64(state.data)) + state.data = state.data[8:] + } + } + state.idx++ + return f + } + } + + // Pass through the test twice. In the first pass we only hash the output, + // while in the second pass we check every line against the golden testdata. + // If the hashes match in the first pass, then we skip the second pass. + for _, checkGolden := range []bool{false, true} { + var br *bufio.Reader // for line-by-line reading of es6testfile100m.txt + if checkGolden { + resp, err := http.Get(testfileURL) + if err != nil { + t.Fatalf("http.Get error: %v", err) + } + defer resp.Body.Close() + + zr, err := gzip.NewReader(resp.Body) + if err != nil { + t.Fatalf("gzip.NewReader error: %v", err) + } + + br = bufio.NewReader(zr) + } + + // appendNumberJCS differs from appendNumber only for -0. + appendNumberJCS := func(b []byte, f float64) []byte { + if math.Signbit(f) && f == 0 { + return append(b, '0') + } + return AppendFloat(b, f, 64) + } + + var gotLine []byte + next := generator() + hash := sha256.New() + start := time.Now() + lastPrint := start + for n := 1; n <= numLines; n++ { + // Generate the formatted line for this number. + f := next() + gotLine = gotLine[:0] // reset from previous usage + gotLine = strconv.AppendUint(gotLine, math.Float64bits(f), 16) + gotLine = append(gotLine, ',') + gotLine = appendNumberJCS(gotLine, f) + gotLine = append(gotLine, '\n') + hash.Write(gotLine) + + // Check that the formatted line matches. + if checkGolden { + wantLine, err := br.ReadBytes('\n') + if err != nil { + t.Fatalf("bufio.Reader.ReadBytes error: %v", err) + } + if !bytes.Equal(gotLine, wantLine) { + t.Errorf("mismatch on line %d:\n\tgot %v\n\twant %v", + n, strings.TrimSpace(string(gotLine)), strings.TrimSpace(string(wantLine))) + } + } + + // Print progress. + if now := time.Now(); now.Sub(lastPrint) > time.Second || n == numLines { + remaining := float64(now.Sub(start)) * float64(numLines-n) / float64(n) + t.Logf("%0.3f%% (%v remaining)", + 100.0*float64(n)/float64(numLines), + time.Duration(remaining).Round(time.Second)) + lastPrint = now + } + } + + gotHash := hex.EncodeToString(hash.Sum(nil)) + if gotHash == wantHash { + return // hashes match, no need to check golden testdata + } + } +} diff --git a/escape.go b/internal/jsonwire/escape.go similarity index 67% rename from escape.go rename to internal/jsonwire/escape.go index 2bb59da..cc024d7 100644 --- a/escape.go +++ b/internal/jsonwire/escape.go @@ -2,13 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsonwire import "unicode/utf8" // Validity of these checked in TestEscapeRunesTables. var ( - escapeCanonical = escapeRunes{ + escapeCanonical = EscapeRunes{ asciiCache: [...]int8{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -21,7 +21,7 @@ var ( }, canonical: true, } - escapeHTMLJS = escapeRunes{ + escapeHTMLJS = EscapeRunes{ asciiCache: [...]int8{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -35,24 +35,26 @@ var ( escapeHTML: true, escapeJS: true, } - escapeHTML = escapeRunes{asciiCache: escapeHTMLJS.asciiCache, escapeHTML: true} - escapeJS = escapeRunes{asciiCache: escapeCanonical.asciiCache, escapeJS: true} + escapeHTML = EscapeRunes{asciiCache: escapeHTMLJS.asciiCache, escapeHTML: true} + escapeJS = EscapeRunes{asciiCache: escapeCanonical.asciiCache, escapeJS: true} ) -// escapeRunes reports whether a rune must be escaped. -type escapeRunes struct { +// EscapeRunes reports whether a rune must be escaped. +type EscapeRunes struct { // asciiCache is a cache of whether an ASCII character must be escaped, // where 0 means not escaped, -1 escapes with the short sequence (e.g., \n), // and +1 escapes with the \uXXXX sequence. asciiCache [utf8.RuneSelf]int8 - canonical bool // whether there are no custom escapes - escapeHTML bool // should escape '<', '>', and '&' - escapeJS bool // should escape '\u2028' and '\u2029' + canonical bool // whether there are no custom escapes + escapeHTML bool // should escape '<', '>', and '&' + escapeJS bool // should escape '\u2028' and '\u2029' + escapeFunc func(rune) bool // arbitrary runes that need escaping; may be nil } -func makeEscapeRunes(html, js bool, fn func(rune) bool) *escapeRunes { +// MakeEscapeRunes constructs an escape table for the escape parameters. +func MakeEscapeRunes(html, js bool, fn func(rune) bool) *EscapeRunes { if fn == nil { switch [2]bool{html, js} { case [2]bool{false, false}: @@ -68,8 +70,8 @@ func makeEscapeRunes(html, js bool, fn func(rune) bool) *escapeRunes { return makeEscapeRunesSlow(html, js, fn) } -func makeEscapeRunesSlow(html, js bool, fn func(rune) bool) *escapeRunes { - e := escapeRunes{escapeHTML: html, escapeJS: js, escapeFunc: fn} +func makeEscapeRunesSlow(html, js bool, fn func(rune) bool) *EscapeRunes { + e := EscapeRunes{escapeHTML: html, escapeJS: js, escapeFunc: fn} e.canonical = !e.escapeHTML && !e.escapeJS && e.escapeFunc == nil // Escape characters that are required by JSON. @@ -98,19 +100,26 @@ func makeEscapeRunesSlow(html, js bool, fn func(rune) bool) *escapeRunes { return &e } -// escapeASCII reports whether c must be escaped. +// IsCanonical reports whether this uses canonical escaping, +// which is the minimal amount of escaping to produce a valid JSON string. +func (e *EscapeRunes) IsCanonical() bool { return e.canonical } + +// HasEscapeFunc reports whether EscapeFunc is in use. +func (e *EscapeRunes) HasEscapeFunc() bool { return e.escapeFunc != nil } + +// needEscapeASCII reports whether c must be escaped. // It assumes c < utf8.RuneSelf. -func (e *escapeRunes) escapeASCII(c byte) bool { +func (e *EscapeRunes) needEscapeASCII(c byte) bool { return e.asciiCache[c] != 0 } -// escapeASCIIAsUTF16 reports whether c must be escaped using a \uXXXX sequence. -func (e *escapeRunes) escapeASCIIAsUTF16(c byte) bool { +// needEscapeASCIIAsUTF16 reports whether c must be escaped using a \uXXXX sequence. +func (e *EscapeRunes) needEscapeASCIIAsUTF16(c byte) bool { return e.asciiCache[c] > 0 } -// escapeRune reports whether r must be escaped. +// needEscapeRune reports whether r must be escaped. // It assumes r >= utf8.RuneSelf. -func (e *escapeRunes) escapeRune(r rune) bool { +func (e *EscapeRunes) needEscapeRune(r rune) bool { return (e.escapeJS && (r == '\u2028' || r == '\u2029')) || (e.escapeFunc != nil && e.escapeFunc(r)) } diff --git a/escape_test.go b/internal/jsonwire/escape_test.go similarity index 91% rename from escape_test.go rename to internal/jsonwire/escape_test.go index 77e1671..522b37b 100644 --- a/escape_test.go +++ b/internal/jsonwire/escape_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsonwire import ( "reflect" @@ -11,8 +11,8 @@ import ( func TestEscapeRunesTables(t *testing.T) { tests := []struct { - got *escapeRunes - want *escapeRunes + got *EscapeRunes + want *EscapeRunes }{ {&escapeCanonical, makeEscapeRunesSlow(false, false, nil)}, {&escapeHTMLJS, makeEscapeRunesSlow(true, true, nil)}, diff --git a/internal/jsonwire/wire.go b/internal/jsonwire/wire.go new file mode 100644 index 0000000..6adfa3e --- /dev/null +++ b/internal/jsonwire/wire.go @@ -0,0 +1,169 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package jsonwire implements stateless functionality for handling JSON text. +package jsonwire + +import ( + "cmp" + "errors" + "strconv" + "strings" + "unicode" + "unicode/utf16" + "unicode/utf8" +) + +// TrimSuffixWhitespace trims JSON from the end of b. +func TrimSuffixWhitespace(b []byte) []byte { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + n := len(b) - 1 + for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') { + n-- + } + return b[:n+1] +} + +// TrimSuffixString trims a valid JSON string at the end of b. +// The behavior is undefined if there is not a valid JSON string present. +func TrimSuffixString(b []byte) []byte { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + if len(b) > 0 && b[len(b)-1] == '"' { + b = b[:len(b)-1] + } + for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') { + b = b[:len(b)-1] // trim all characters except an unescaped quote + } + if len(b) > 0 && b[len(b)-1] == '"' { + b = b[:len(b)-1] + } + return b +} + +// HasSuffixByte reports whether b ends with c. +func HasSuffixByte(b []byte, c byte) bool { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + return len(b) > 0 && b[len(b)-1] == c +} + +// TrimSuffixByte removes c from the end of b if it is present. +func TrimSuffixByte(b []byte, c byte) []byte { + // NOTE: The arguments and logic are kept simple to keep this inlinable. + if len(b) > 0 && b[len(b)-1] == c { + return b[:len(b)-1] + } + return b +} + +// QuoteRune quotes the first rune in the input. +func QuoteRune[Bytes ~[]byte | ~string](b Bytes) string { + r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b))) + if r == utf8.RuneError && n == 1 { + return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'` + } + return strconv.QuoteRune(r) +} + +// CompareUTF16 lexicographically compares x to y according +// to the UTF-16 codepoints of the UTF-8 encoded input strings. +// This implements the ordering specified in RFC 8785, section 3.2.3. +func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int { + // NOTE: This is an optimized, mostly allocation-free implementation + // of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the + // two implementations agree on the result of comparing any two strings. + isUTF16Self := func(r rune) bool { + return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF') + } + + var invalidUTF8 bool + x0, y0 := x, y + for { + if len(x) == 0 || len(y) == 0 { + if len(x) == len(y) && invalidUTF8 { + return strings.Compare(string(x0), string(y0)) + } + return cmp.Compare(len(x), len(y)) + } + + // ASCII fast-path. + if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf { + if x[0] != y[0] { + return cmp.Compare(x[0], y[0]) + } + x, y = x[1:], y[1:] + continue + } + + // Decode next pair of runes as UTF-8. + rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x))) + ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y))) + + selfx := isUTF16Self(rx) + selfy := isUTF16Self(ry) + switch { + // The x rune is a single UTF-16 codepoint, while + // the y rune is a surrogate pair of UTF-16 codepoints. + case selfx && !selfy: + ry, _ = utf16.EncodeRune(ry) + // The y rune is a single UTF-16 codepoint, while + // the x rune is a surrogate pair of UTF-16 codepoints. + case selfy && !selfx: + rx, _ = utf16.EncodeRune(rx) + } + if rx != ry { + return cmp.Compare(rx, ry) + } + invalidUTF8 = invalidUTF8 || (rx == utf8.RuneError && nx == 1) || (ry == utf8.RuneError && ny == 1) + x, y = x[nx:], y[ny:] + } +} + +// truncateMaxUTF8 truncates b such it contains at least one rune. +// +// The utf8 package currently lacks generic variants, which complicates +// generic functions that operates on either []byte or string. +// As a hack, we always call the utf8 function operating on strings, +// but always truncate the input such that the result is identical. +// +// Example usage: +// +// utf8.DecodeRuneInString(string(truncateMaxUTF8(b))) +// +// Converting a []byte to a string is stack allocated since +// truncateMaxUTF8 guarantees that the []byte is short. +func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes { + // TODO(https://go.dev/issue/56948): Remove this function and + // instead directly call generic utf8 functions wherever used. + if len(b) > utf8.UTFMax { + return b[:utf8.UTFMax] + } + return b +} + +// NewError and ErrInvalidUTF8 are injected by the "jsontext" package, +// so that these error types use the jsontext.SyntacticError type. +var ( + NewError = errors.New + ErrInvalidUTF8 = errors.New("invalid UTF-8 within string") +) + +func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error { + what := QuoteRune(prefix) + return NewError("invalid character " + what + " " + where) +} + +func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error { + label := "escape sequence" + if len(what) > 6 { + label = "surrogate pair" + } + needEscape := strings.IndexFunc(string(what), func(r rune) bool { + return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r) + }) >= 0 + if needEscape { + return NewError("invalid " + label + " " + strconv.Quote(string(what)) + " within string") + } else { + return NewError("invalid " + label + " `" + string(what) + "` within string") + } +} diff --git a/internal/jsonwire/wire_test.go b/internal/jsonwire/wire_test.go new file mode 100644 index 0000000..369aa00 --- /dev/null +++ b/internal/jsonwire/wire_test.go @@ -0,0 +1,75 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonwire + +import ( + "bytes" + "cmp" + "slices" + "testing" + "unicode/utf16" +) + +func TestQuoteRune(t *testing.T) { + tests := []struct{ in, want string }{ + {"x", `'x'`}, + {"\n", `'\n'`}, + {"'", `'\''`}, + {"\xff", `'\xff'`}, + {"💩", `'💩'`}, + {"💩"[:1], `'\xf0'`}, + {"\uffff", `'\uffff'`}, + {"\U00101234", `'\U00101234'`}, + } + for _, tt := range tests { + got := QuoteRune([]byte(tt.in)) + if got != tt.want { + t.Errorf("quoteRune(%q) = %s, want %s", tt.in, got, tt.want) + } + } +} + +var compareUTF16Testdata = []string{"", "\r", "1", "\u0080", "\u00f6", "\u20ac", "\U0001f600", "\ufb33"} + +func TestCompareUTF16(t *testing.T) { + for i, si := range compareUTF16Testdata { + for j, sj := range compareUTF16Testdata { + got := CompareUTF16([]byte(si), []byte(sj)) + want := cmp.Compare(i, j) + if got != want { + t.Errorf("CompareUTF16(%q, %q) = %v, want %v", si, sj, got, want) + } + } + } +} + +func FuzzCompareUTF16(f *testing.F) { + for _, td1 := range compareUTF16Testdata { + for _, td2 := range compareUTF16Testdata { + f.Add([]byte(td1), []byte(td2)) + } + } + + // CompareUTF16Simple is identical to CompareUTF16, + // but relies on naively converting a string to a []uint16 codepoints. + // It is easy to verify as correct, but is slow. + CompareUTF16Simple := func(x, y []byte) int { + ux := utf16.Encode([]rune(string(x))) + uy := utf16.Encode([]rune(string(y))) + if n := slices.Compare(ux, uy); n != 0 { + return n + } + return bytes.Compare(x, y) // only occurs for strings with invalid UTF-8 + } + + f.Fuzz(func(t *testing.T, s1, s2 []byte) { + // Compare the optimized and simplified implementations. + got := CompareUTF16(s1, s2) + want := CompareUTF16Simple(s1, s2) + if got != want { + t.Errorf("CompareUTF16(%q, %q) = %v, want %v", s1, s2, got, want) + } + }) +} diff --git a/coder_test.go b/jsontext/coder_test.go similarity index 92% rename from coder_test.go rename to jsontext/coder_test.go index f7ec0f3..9200d09 100644 --- a/coder_test.go +++ b/jsontext/coder_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "bytes" @@ -18,6 +18,10 @@ import ( "github.com/go-json-experiment/json/internal/jsontest" ) +func len64[Bytes ~[]byte | ~string](in Bytes) int64 { + return int64(len(in)) +} + var ( zeroToken Token zeroValue RawValue @@ -581,59 +585,6 @@ func TestCoderStackPointer(t *testing.T) { } } -func TestCoderBufferGrowth(t *testing.T) { - // The growth rate of the internal buffer should be exponential, - // but should not grow unbounded. - checkGrowth := func(ns []int) { - t.Helper() - var sumBytes, sumRates, numGrows float64 - prev := ns[0] - for i := 1; i < len(ns)-1; i++ { - n := ns[i] - if n != prev { - sumRates += float64(n) / float64(prev) - numGrows++ - prev = n - } - if n > 1<<20 { - t.Fatalf("single Read/Write too large: %d", n) - } - sumBytes += float64(n) - } - if mean := sumBytes / float64(len(ns)); mean < 1<<10 { - t.Fatalf("average Read/Write too small: %0.1f", mean) - } - switch mean := sumRates / numGrows; { - case mean < 1.25: - t.Fatalf("average growth rate too slow: %0.3f", mean) - case mean > 2.00: - t.Fatalf("average growth rate too fast: %0.3f", mean) - } - } - - bb := &bytesBuffer{new(bytes.Buffer)} - - var writeSizes []int - if err := MarshalWrite(WriterFunc(func(b []byte) (int, error) { - n, err := bb.Write(b) - writeSizes = append(writeSizes, n) - return n, err - }), make([]struct{}, 1e6)); err != nil { - t.Fatalf("MarshalWrite error: %v", err) - } - checkGrowth(writeSizes) - - var readSizes []int - if err := UnmarshalRead(ReaderFunc(func(b []byte) (int, error) { - n, err := bb.Read(b) - readSizes = append(readSizes, n) - return n, err - }), new([]struct{})); err != nil { - t.Fatalf("UnmarshalRead error: %v", err) - } - checkGrowth(readSizes) -} - func TestCoderMaxDepth(t *testing.T) { trimArray := func(b []byte) []byte { return b[len(`[`) : len(b)-len(`]`)] } maxArrays := []byte(strings.Repeat(`[`, maxNestingDepth+1) + strings.Repeat(`]`, maxNestingDepth+1)) @@ -656,17 +607,17 @@ func TestCoderMaxDepth(t *testing.T) { } t.Run("ArraysValid/SingleValue", func(t *testing.T) { - dec.reset(trimArray(maxArrays), nil) + dec.s.reset(trimArray(maxArrays), nil) checkReadValue(t, maxNestingDepth*len(`[]`), nil) }) t.Run("ArraysValid/TokenThenValue", func(t *testing.T) { - dec.reset(trimArray(maxArrays), nil) + dec.s.reset(trimArray(maxArrays), nil) checkReadToken(t, '[', nil) checkReadValue(t, (maxNestingDepth-1)*len(`[]`), nil) checkReadToken(t, ']', nil) }) t.Run("ArraysValid/AllTokens", func(t *testing.T) { - dec.reset(trimArray(maxArrays), nil) + dec.s.reset(trimArray(maxArrays), nil) for i := 0; i < maxNestingDepth; i++ { checkReadToken(t, '[', nil) } @@ -676,16 +627,16 @@ func TestCoderMaxDepth(t *testing.T) { }) t.Run("ArraysInvalid/SingleValue", func(t *testing.T) { - dec.reset(maxArrays, nil) + dec.s.reset(maxArrays, nil) checkReadValue(t, 0, errMaxDepth.withOffset(maxNestingDepth)) }) t.Run("ArraysInvalid/TokenThenValue", func(t *testing.T) { - dec.reset(maxArrays, nil) + dec.s.reset(maxArrays, nil) checkReadToken(t, '[', nil) checkReadValue(t, 0, errMaxDepth.withOffset(maxNestingDepth)) }) t.Run("ArraysInvalid/AllTokens", func(t *testing.T) { - dec.reset(maxArrays, nil) + dec.s.reset(maxArrays, nil) for i := 0; i < maxNestingDepth; i++ { checkReadToken(t, '[', nil) } @@ -693,18 +644,18 @@ func TestCoderMaxDepth(t *testing.T) { }) t.Run("ObjectsValid/SingleValue", func(t *testing.T) { - dec.reset(trimObject(maxObjects), nil) + dec.s.reset(trimObject(maxObjects), nil) checkReadValue(t, maxNestingDepth*len(`{"":}`)+len(`""`), nil) }) t.Run("ObjectsValid/TokenThenValue", func(t *testing.T) { - dec.reset(trimObject(maxObjects), nil) + dec.s.reset(trimObject(maxObjects), nil) checkReadToken(t, '{', nil) checkReadToken(t, '"', nil) checkReadValue(t, (maxNestingDepth-1)*len(`{"":}`)+len(`""`), nil) checkReadToken(t, '}', nil) }) t.Run("ObjectsValid/AllTokens", func(t *testing.T) { - dec.reset(trimObject(maxObjects), nil) + dec.s.reset(trimObject(maxObjects), nil) for i := 0; i < maxNestingDepth; i++ { checkReadToken(t, '{', nil) checkReadToken(t, '"', nil) @@ -716,17 +667,17 @@ func TestCoderMaxDepth(t *testing.T) { }) t.Run("ObjectsInvalid/SingleValue", func(t *testing.T) { - dec.reset(maxObjects, nil) + dec.s.reset(maxObjects, nil) checkReadValue(t, 0, errMaxDepth.withOffset(maxNestingDepth*len64(`{"":`))) }) t.Run("ObjectsInvalid/TokenThenValue", func(t *testing.T) { - dec.reset(maxObjects, nil) + dec.s.reset(maxObjects, nil) checkReadToken(t, '{', nil) checkReadToken(t, '"', nil) checkReadValue(t, 0, errMaxDepth.withOffset(maxNestingDepth*len64(`{"":`))) }) t.Run("ObjectsInvalid/AllTokens", func(t *testing.T) { - dec.reset(maxObjects, nil) + dec.s.reset(maxObjects, nil) for i := 0; i < maxNestingDepth; i++ { checkReadToken(t, '{', nil) checkReadToken(t, '"', nil) @@ -751,19 +702,19 @@ func TestCoderMaxDepth(t *testing.T) { } t.Run("Arrays/SingleValue", func(t *testing.T) { - enc.reset(enc.buf[:0], nil) + enc.s.reset(enc.s.Buf[:0], nil) checkWriteValue(t, maxArrays, errMaxDepth.withOffset(maxNestingDepth)) checkWriteValue(t, trimArray(maxArrays), nil) }) t.Run("Arrays/TokenThenValue", func(t *testing.T) { - enc.reset(enc.buf[:0], nil) + enc.s.reset(enc.s.Buf[:0], nil) checkWriteToken(t, ArrayStart, nil) checkWriteValue(t, trimArray(maxArrays), errMaxDepth.withOffset(maxNestingDepth)) checkWriteValue(t, trimArray(trimArray(maxArrays)), nil) checkWriteToken(t, ArrayEnd, nil) }) t.Run("Arrays/AllTokens", func(t *testing.T) { - enc.reset(enc.buf[:0], nil) + enc.s.reset(enc.s.Buf[:0], nil) for i := 0; i < maxNestingDepth; i++ { checkWriteToken(t, ArrayStart, nil) } @@ -774,12 +725,12 @@ func TestCoderMaxDepth(t *testing.T) { }) t.Run("Objects/SingleValue", func(t *testing.T) { - enc.reset(enc.buf[:0], nil) + enc.s.reset(enc.s.Buf[:0], nil) checkWriteValue(t, maxObjects, errMaxDepth.withOffset(maxNestingDepth*len64(`{"":`))) checkWriteValue(t, trimObject(maxObjects), nil) }) t.Run("Objects/TokenThenValue", func(t *testing.T) { - enc.reset(enc.buf[:0], nil) + enc.s.reset(enc.s.Buf[:0], nil) checkWriteToken(t, ObjectStart, nil) checkWriteToken(t, String(""), nil) checkWriteValue(t, trimObject(maxObjects), errMaxDepth.withOffset(maxNestingDepth*len64(`{"":`))) @@ -787,7 +738,7 @@ func TestCoderMaxDepth(t *testing.T) { checkWriteToken(t, ObjectEnd, nil) }) t.Run("Objects/AllTokens", func(t *testing.T) { - enc.reset(enc.buf[:0], nil) + enc.s.reset(enc.s.Buf[:0], nil) for i := 0; i < maxNestingDepth-1; i++ { checkWriteToken(t, ObjectStart, nil) checkWriteToken(t, String(""), nil) @@ -803,14 +754,6 @@ func TestCoderMaxDepth(t *testing.T) { }) } -type ReaderFunc func([]byte) (int, error) - -func (f ReaderFunc) Read(b []byte) (int, error) { return f(b) } - -type WriterFunc func([]byte) (int, error) - -func (f WriterFunc) Write(b []byte) (int, error) { return f(b) } - // FaultyBuffer implements io.Reader and io.Writer. // It may process fewer bytes than the provided buffer // and may randomly return an error. diff --git a/decode.go b/jsontext/decode.go similarity index 51% rename from decode.go rename to jsontext/decode.go index cda36e7..4c75d74 100644 --- a/decode.go +++ b/jsontext/decode.go @@ -2,20 +2,16 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "bytes" "errors" "io" - "math" - "slices" - "strconv" - "unicode/utf16" - "unicode/utf8" "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonopts" + "github.com/go-json-experiment/json/internal/jsonwire" ) // NOTE: The logic for decoding is complicated by the fact that reading from @@ -77,14 +73,18 @@ import ( // may not represent the most sensible method to call for any given token/value. // For example, it is probably more common to call ReadToken to obtain a // string token for object names. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Decoder] instead. type Decoder struct { + s decoderState +} + +// decoderState is the low-level state of Decoder. +// It has exported fields and method for use by the "json" package. +type decoderState struct { state decodeBuffer - options jsonopts.Struct + jsonopts.Struct - stringCache *stringCache // only used when unmarshaling + StringCache *[256]string // only used when unmarshaling; identical to json.stringCache } // decodeBuffer is a buffer split into 4 segments: @@ -117,8 +117,6 @@ type decodeBuffer struct { // If r is a bytes.Buffer, then the decoder parses directly from the buffer // without first copying the contents to an intermediate buffer. // Additional writes to the buffer must not occur while the decoder is in use. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.NewDecoder] instead. func NewDecoder(r io.Reader, opts ...Options) *Decoder { d := new(Decoder) d.Reset(r, opts...) @@ -134,30 +132,30 @@ func (d *Decoder) Reset(r io.Reader, opts ...Options) { panic("jsontext: invalid nil Decoder") case r == nil: panic("jsontext: invalid nil io.Writer") - case d.options.Flags.Get(jsonflags.WithinArshalCall): + case d.s.Flags.Get(jsonflags.WithinArshalCall): panic("jsontext: cannot reset Decoder passed to json.UnmarshalerV2") } - d.reset(nil, r, opts...) + d.s.reset(nil, r, opts...) } -func (d *Decoder) reset(b []byte, r io.Reader, opts ...Options) { +func (d *decoderState) reset(b []byte, r io.Reader, opts ...Options) { d.state.reset() d.decodeBuffer = decodeBuffer{buf: b, rd: r} - d.options = jsonopts.Struct{} - d.options.Join(opts...) + d.Struct = jsonopts.Struct{} + d.Struct.Join(opts...) } var errBufferWriteAfterNext = errors.New("invalid bytes.Buffer.Write call after calling bytes.Buffer.Next") // fetch reads at least 1 byte from the underlying io.Reader. // It returns io.ErrUnexpectedEOF if zero bytes were read and io.EOF was seen. -func (d *Decoder) fetch() error { +func (d *decoderState) fetch() error { if d.rd == nil { return io.ErrUnexpectedEOF } // Inform objectNameStack that we are about to fetch new buffer content. - d.names.copyQuotedBuffer(d.buf) + d.Names.copyQuotedBuffer(d.buf) // Specialize bytes.Buffer for better performance. if bb, ok := d.rd.(*bytes.Buffer); ok { @@ -268,12 +266,15 @@ func (d *decodeBuffer) injectSyntacticErrorWithPosition(err error, pos int) erro func (d *decodeBuffer) previousOffsetStart() int64 { return d.baseOffset + int64(d.prevStart) } func (d *decodeBuffer) previousOffsetEnd() int64 { return d.baseOffset + int64(d.prevEnd) } -func (d *decodeBuffer) previousBuffer() []byte { return d.buf[d.prevStart:d.prevEnd] } +func (d *decodeBuffer) PreviousBuffer() []byte { return d.buf[d.prevStart:d.prevEnd] } func (d *decodeBuffer) unreadBuffer() []byte { return d.buf[d.prevEnd:len(d.buf)] } // PeekKind retrieves the next token kind, but does not advance the read offset. // It returns 0 if there are no more tokens. func (d *Decoder) PeekKind() Kind { + return d.s.PeekKind() +} +func (d *decoderState) PeekKind() Kind { // Check whether we have a cached peek result. if d.peekPos > 0 { return Kind(d.buf[d.peekPos]).normalize() @@ -284,10 +285,10 @@ func (d *Decoder) PeekKind() Kind { pos := d.prevEnd // Consume leading whitespace. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { - if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 { + if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 { err = io.EOF // EOF possibly if no Tokens present after top-level value } d.peekPos, d.peekErr = -1, err @@ -300,7 +301,7 @@ func (d *Decoder) PeekKind() Kind { if c := d.buf[pos]; c == ':' || c == ',' { delim = c pos += 1 - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { d.peekPos, d.peekErr = -1, d.checkDelimBeforeIOError(delim, err) @@ -309,7 +310,7 @@ func (d *Decoder) PeekKind() Kind { } } next := Kind(d.buf[pos]).normalize() - if d.tokens.needDelim(next) != delim { + if d.Tokens.needDelim(next) != delim { d.peekPos, d.peekErr = -1, d.checkDelim(delim, next) return invalidKind } @@ -324,47 +325,51 @@ func (d *Decoder) PeekKind() Kind { // checkDelimBeforeIOError checks whether the delim is even valid // before returning an IO error, which occurs after the delim. -func (d *Decoder) checkDelimBeforeIOError(delim byte, err error) error { +func (d *decoderState) checkDelimBeforeIOError(delim byte, err error) error { // Since an IO error occurred, we do not know what the next kind is. // However, knowing the next kind is necessary to validate // whether the current delim is at least potentially valid. // Since a JSON string is always valid as the next token, // conservatively assume that is the next kind for validation. const next = Kind('"') - if d.tokens.needDelim(next) != delim { + if d.Tokens.needDelim(next) != delim { err = d.checkDelim(delim, next) } return err } // checkDelim checks whether delim is valid for the given next kind. -func (d *Decoder) checkDelim(delim byte, next Kind) error { +func (d *decoderState) checkDelim(delim byte, next Kind) error { pos := d.prevEnd // restore position to right after leading whitespace - pos += consumeWhitespace(d.buf[pos:]) - err := d.tokens.checkDelim(delim, next) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) + err := d.Tokens.checkDelim(delim, next) return d.injectSyntacticErrorWithPosition(err, pos) } // SkipValue is semantically equivalent to calling ReadValue and discarding // the result except that memory is not wasted trying to hold the entire result. func (d *Decoder) SkipValue() error { + return d.s.SkipValue() +} +func (d *decoderState) SkipValue() error { switch d.PeekKind() { case '{', '[': // For JSON objects and arrays, keep skipping all tokens // until the depth matches the starting depth. - depth := d.tokens.depth() + depth := d.Tokens.Depth() for { if _, err := d.ReadToken(); err != nil { return err } - if depth >= d.tokens.depth() { + if depth >= d.Tokens.Depth() { return nil } } default: // Trying to skip a value when the next token is a '}' or ']' // will result in an error being returned here. - if _, err := d.ReadValue(); err != nil { + var flags jsonwire.ValueFlags + if _, err := d.ReadValue(&flags); err != nil { return err } return nil @@ -375,6 +380,9 @@ func (d *Decoder) SkipValue() error { // The returned token is only valid until the next Peek, Read, or Skip call. // It returns io.EOF if there are no more tokens. func (d *Decoder) ReadToken() (Token, error) { + return d.s.ReadToken() +} +func (d *decoderState) ReadToken() (Token, error) { // Determine the next kind. var err error var next Kind @@ -393,10 +401,10 @@ func (d *Decoder) ReadToken() (Token, error) { pos = d.prevEnd // Consume leading whitespace. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { - if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 { + if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 { err = io.EOF // EOF possibly if no Tokens present after top-level value } return Token{}, err @@ -408,7 +416,7 @@ func (d *Decoder) ReadToken() (Token, error) { if c := d.buf[pos]; c == ':' || c == ',' { delim = c pos += 1 - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { return Token{}, d.checkDelimBeforeIOError(delim, err) @@ -416,7 +424,7 @@ func (d *Decoder) ReadToken() (Token, error) { } } next = Kind(d.buf[pos]).normalize() - if d.tokens.needDelim(next) != delim { + if d.Tokens.needDelim(next) != delim { return Token{}, d.checkDelim(delim, next) } } @@ -425,7 +433,7 @@ func (d *Decoder) ReadToken() (Token, error) { var n int switch next { case 'n': - if consumeNull(d.buf[pos:]) == 0 { + if jsonwire.ConsumeNull(d.buf[pos:]) == 0 { pos, err = d.consumeLiteral(pos, "null") if err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos) @@ -433,14 +441,14 @@ func (d *Decoder) ReadToken() (Token, error) { } else { pos += len("null") } - if err = d.tokens.appendLiteral(); err != nil { + if err = d.Tokens.appendLiteral(); err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("null")) // report position at start of literal } d.prevStart, d.prevEnd = pos, pos return Null, nil case 'f': - if consumeFalse(d.buf[pos:]) == 0 { + if jsonwire.ConsumeFalse(d.buf[pos:]) == 0 { pos, err = d.consumeLiteral(pos, "false") if err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos) @@ -448,14 +456,14 @@ func (d *Decoder) ReadToken() (Token, error) { } else { pos += len("false") } - if err = d.tokens.appendLiteral(); err != nil { + if err = d.Tokens.appendLiteral(); err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("false")) // report position at start of literal } d.prevStart, d.prevEnd = pos, pos return False, nil case 't': - if consumeTrue(d.buf[pos:]) == 0 { + if jsonwire.ConsumeTrue(d.buf[pos:]) == 0 { pos, err = d.consumeLiteral(pos, "true") if err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos) @@ -463,15 +471,15 @@ func (d *Decoder) ReadToken() (Token, error) { } else { pos += len("true") } - if err = d.tokens.appendLiteral(); err != nil { + if err = d.Tokens.appendLiteral(); err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("true")) // report position at start of literal } d.prevStart, d.prevEnd = pos, pos return True, nil case '"': - var flags valueFlags // TODO: Preserve this in Token? - if n = consumeSimpleString(d.buf[pos:]); n == 0 { + var flags jsonwire.ValueFlags // TODO: Preserve this in Token? + if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 { oldAbsPos := d.baseOffset + int64(pos) pos, err = d.consumeString(&flags, pos) newAbsPos := d.baseOffset + int64(pos) @@ -482,17 +490,17 @@ func (d *Decoder) ReadToken() (Token, error) { } else { pos += n } - if !d.options.Flags.Get(jsonflags.AllowDuplicateNames) && d.tokens.last.needObjectName() { - if !d.tokens.last.isValidNamespace() { + if !d.Flags.Get(jsonflags.AllowDuplicateNames) && d.Tokens.Last.NeedObjectName() { + if !d.Tokens.Last.isValidNamespace() { return Token{}, errInvalidNamespace } - if d.tokens.last.isActiveNamespace() && !d.namespaces.last().insertQuoted(d.buf[pos-n:pos], flags.isVerbatim()) { + if d.Tokens.Last.isActiveNamespace() && !d.Namespaces.Last().insertQuoted(d.buf[pos-n:pos], flags.IsVerbatim()) { err = newDuplicateNameError(d.buf[pos-n : pos]) return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of string } - d.names.replaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds + d.Names.ReplaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds } - if err = d.tokens.appendString(); err != nil { + if err = d.Tokens.appendString(); err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of string } d.prevStart, d.prevEnd = pos-n, pos @@ -501,7 +509,7 @@ func (d *Decoder) ReadToken() (Token, error) { case '0': // NOTE: Since JSON numbers are not self-terminating, // we need to make sure that the next byte is not part of a number. - if n = consumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) { + if n = jsonwire.ConsumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) { oldAbsPos := d.baseOffset + int64(pos) pos, err = d.consumeNumber(pos) newAbsPos := d.baseOffset + int64(pos) @@ -512,38 +520,38 @@ func (d *Decoder) ReadToken() (Token, error) { } else { pos += n } - if err = d.tokens.appendNumber(); err != nil { + if err = d.Tokens.appendNumber(); err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of number } d.prevStart, d.prevEnd = pos-n, pos return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil case '{': - if err = d.tokens.pushObject(); err != nil { + if err = d.Tokens.pushObject(); err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos) } - if !d.options.Flags.Get(jsonflags.AllowDuplicateNames) { - d.names.push() - d.namespaces.push() + if !d.Flags.Get(jsonflags.AllowDuplicateNames) { + d.Names.push() + d.Namespaces.push() } pos += 1 d.prevStart, d.prevEnd = pos, pos return ObjectStart, nil case '}': - if err = d.tokens.popObject(); err != nil { + if err = d.Tokens.popObject(); err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos) } - if !d.options.Flags.Get(jsonflags.AllowDuplicateNames) { - d.names.pop() - d.namespaces.pop() + if !d.Flags.Get(jsonflags.AllowDuplicateNames) { + d.Names.pop() + d.Namespaces.pop() } pos += 1 d.prevStart, d.prevEnd = pos, pos return ObjectEnd, nil case '[': - if err = d.tokens.pushArray(); err != nil { + if err = d.Tokens.pushArray(); err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos) } pos += 1 @@ -551,7 +559,7 @@ func (d *Decoder) ReadToken() (Token, error) { return ArrayStart, nil case ']': - if err = d.tokens.popArray(); err != nil { + if err = d.Tokens.popArray(); err != nil { return Token{}, d.injectSyntacticErrorWithPosition(err, pos) } pos += 1 @@ -564,20 +572,6 @@ func (d *Decoder) ReadToken() (Token, error) { } } -type valueFlags uint - -const ( - _ valueFlags = (1 << iota) / 2 // powers of two starting with zero - - stringNonVerbatim // string cannot be naively treated as valid UTF-8 - stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2. - // TODO: Track whether a number is a non-integer? -) - -func (f *valueFlags) set(f2 valueFlags) { *f |= f2 } -func (f valueFlags) isVerbatim() bool { return f&stringNonVerbatim == 0 } -func (f valueFlags) isCanonical() bool { return f&stringNonCanonical == 0 } - // ReadValue returns the next raw JSON value, advancing the read offset. // The value is stripped of any leading or trailing whitespace. // The returned value is only valid until the next Peek, Read, or Skip call and @@ -586,10 +580,10 @@ func (f valueFlags) isCanonical() bool { return f&stringNonCanonical == 0 } // then it reports a SyntacticError and the internal state remains unchanged. // It returns io.EOF if there are no more values. func (d *Decoder) ReadValue() (RawValue, error) { - var flags valueFlags - return d.readValue(&flags) + var flags jsonwire.ValueFlags + return d.s.ReadValue(&flags) } -func (d *Decoder) readValue(flags *valueFlags) (RawValue, error) { +func (d *decoderState) ReadValue(flags *jsonwire.ValueFlags) (RawValue, error) { // Determine the next kind. var err error var next Kind @@ -608,10 +602,10 @@ func (d *Decoder) readValue(flags *valueFlags) (RawValue, error) { pos = d.prevEnd // Consume leading whitespace. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { - if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 { + if err == io.ErrUnexpectedEOF && d.Tokens.Depth() == 1 { err = io.EOF // EOF possibly if no Tokens present after top-level value } return nil, err @@ -623,7 +617,7 @@ func (d *Decoder) readValue(flags *valueFlags) (RawValue, error) { if c := d.buf[pos]; c == ':' || c == ',' { delim = c pos += 1 - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { return nil, d.checkDelimBeforeIOError(delim, err) @@ -631,14 +625,14 @@ func (d *Decoder) readValue(flags *valueFlags) (RawValue, error) { } } next = Kind(d.buf[pos]).normalize() - if d.tokens.needDelim(next) != delim { + if d.Tokens.needDelim(next) != delim { return nil, d.checkDelim(delim, next) } } // Handle the next value. oldAbsPos := d.baseOffset + int64(pos) - pos, err = d.consumeValue(flags, pos, d.tokens.depth()) + pos, err = d.consumeValue(flags, pos, d.Tokens.Depth()) newAbsPos := d.baseOffset + int64(pos) n := int(newAbsPos - oldAbsPos) if err != nil { @@ -646,34 +640,34 @@ func (d *Decoder) readValue(flags *valueFlags) (RawValue, error) { } switch next { case 'n', 't', 'f': - err = d.tokens.appendLiteral() + err = d.Tokens.appendLiteral() case '"': - if !d.options.Flags.Get(jsonflags.AllowDuplicateNames) && d.tokens.last.needObjectName() { - if !d.tokens.last.isValidNamespace() { + if !d.Flags.Get(jsonflags.AllowDuplicateNames) && d.Tokens.Last.NeedObjectName() { + if !d.Tokens.Last.isValidNamespace() { err = errInvalidNamespace break } - if d.tokens.last.isActiveNamespace() && !d.namespaces.last().insertQuoted(d.buf[pos-n:pos], flags.isVerbatim()) { + if d.Tokens.Last.isActiveNamespace() && !d.Namespaces.Last().insertQuoted(d.buf[pos-n:pos], flags.IsVerbatim()) { err = newDuplicateNameError(d.buf[pos-n : pos]) break } - d.names.replaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds + d.Names.ReplaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds } - err = d.tokens.appendString() + err = d.Tokens.appendString() case '0': - err = d.tokens.appendNumber() + err = d.Tokens.appendNumber() case '{': - if err = d.tokens.pushObject(); err != nil { + if err = d.Tokens.pushObject(); err != nil { break } - if err = d.tokens.popObject(); err != nil { + if err = d.Tokens.popObject(); err != nil { panic("BUG: popObject should never fail immediately after pushObject: " + err.Error()) } case '[': - if err = d.tokens.pushArray(); err != nil { + if err = d.Tokens.pushArray(); err != nil { break } - if err = d.tokens.popArray(); err != nil { + if err = d.Tokens.popArray(); err != nil { panic("BUG: popArray should never fail immediately after pushArray: " + err.Error()) } } @@ -685,8 +679,8 @@ func (d *Decoder) readValue(flags *valueFlags) (RawValue, error) { return d.buf[pos-n : pos : pos], nil } -// checkEOF verifies that the input has no more data. -func (d *Decoder) checkEOF() error { +// CheckEOF verifies that the input has no more data. +func (d *decoderState) CheckEOF() error { switch pos, err := d.consumeWhitespace(d.prevEnd); err { case nil: err := newInvalidCharacterError(d.buf[pos:], "after top-level value") @@ -704,7 +698,7 @@ func (d *Decoder) checkEOF() error { // // The following pattern is common in this implementation: // -// pos += consumeWhitespace(d.buf[pos:]) +// pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) // if d.needMore(pos) { // if pos, err = d.consumeWhitespace(pos); err != nil { // return ... @@ -715,9 +709,9 @@ func (d *Decoder) checkEOF() error { // consumeWhitespace must be inlined. The body of the if statement is // executed only in rare situations where we need to fetch more data. // Since fetching may return an error, we also need to check the error. -func (d *Decoder) consumeWhitespace(pos int) (newPos int, err error) { +func (d *decoderState) consumeWhitespace(pos int) (newPos int, err error) { for { - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { absPos := d.baseOffset + int64(pos) err = d.fetch() // will mutate d.buf and invalidate pos @@ -733,31 +727,31 @@ func (d *Decoder) consumeWhitespace(pos int) (newPos int, err error) { // consumeValue consumes a single JSON value starting at d.buf[pos:]. // It returns the new position in d.buf immediately after the value. -func (d *Decoder) consumeValue(flags *valueFlags, pos, depth int) (newPos int, err error) { +func (d *decoderState) consumeValue(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) { for { var n int var err error switch next := Kind(d.buf[pos]).normalize(); next { case 'n': - if n = consumeNull(d.buf[pos:]); n == 0 { - n, err = consumeLiteral(d.buf[pos:], "null") + if n = jsonwire.ConsumeNull(d.buf[pos:]); n == 0 { + n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "null") } case 'f': - if n = consumeFalse(d.buf[pos:]); n == 0 { - n, err = consumeLiteral(d.buf[pos:], "false") + if n = jsonwire.ConsumeFalse(d.buf[pos:]); n == 0 { + n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "false") } case 't': - if n = consumeTrue(d.buf[pos:]); n == 0 { - n, err = consumeLiteral(d.buf[pos:], "true") + if n = jsonwire.ConsumeTrue(d.buf[pos:]); n == 0 { + n, err = jsonwire.ConsumeLiteral(d.buf[pos:], "true") } case '"': - if n = consumeSimpleString(d.buf[pos:]); n == 0 { + if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 { return d.consumeString(flags, pos) } case '0': // NOTE: Since JSON numbers are not self-terminating, // we need to make sure that the next byte is not part of a number. - if n = consumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) { + if n = jsonwire.ConsumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) { return d.consumeNumber(pos) } case '{': @@ -782,9 +776,9 @@ func (d *Decoder) consumeValue(flags *valueFlags, pos, depth int) (newPos int, e // consumeLiteral consumes a single JSON literal starting at d.buf[pos:]. // It returns the new position in d.buf immediately after the literal. -func (d *Decoder) consumeLiteral(pos int, lit string) (newPos int, err error) { +func (d *decoderState) consumeLiteral(pos int, lit string) (newPos int, err error) { for { - n, err := consumeLiteral(d.buf[pos:], lit) + n, err := jsonwire.ConsumeLiteral(d.buf[pos:], lit) if err == io.ErrUnexpectedEOF { absPos := d.baseOffset + int64(pos) err = d.fetch() // will mutate d.buf and invalidate pos @@ -800,10 +794,10 @@ func (d *Decoder) consumeLiteral(pos int, lit string) (newPos int, err error) { // consumeString consumes a single JSON string starting at d.buf[pos:]. // It returns the new position in d.buf immediately after the string. -func (d *Decoder) consumeString(flags *valueFlags, pos int) (newPos int, err error) { +func (d *decoderState) consumeString(flags *jsonwire.ValueFlags, pos int) (newPos int, err error) { var n int for { - n, err = consumeStringResumable(flags, d.buf[pos:], n, !d.options.Flags.Get(jsonflags.AllowInvalidUTF8)) + n, err = jsonwire.ConsumeStringResumable(flags, d.buf[pos:], n, !d.Flags.Get(jsonflags.AllowInvalidUTF8)) if err == io.ErrUnexpectedEOF { absPos := d.baseOffset + int64(pos) err = d.fetch() // will mutate d.buf and invalidate pos @@ -819,11 +813,11 @@ func (d *Decoder) consumeString(flags *valueFlags, pos int) (newPos int, err err // consumeNumber consumes a single JSON number starting at d.buf[pos:]. // It returns the new position in d.buf immediately after the number. -func (d *Decoder) consumeNumber(pos int) (newPos int, err error) { +func (d *decoderState) consumeNumber(pos int) (newPos int, err error) { var n int - var state consumeNumberState + var state jsonwire.ConsumeNumberState for { - n, state, err = consumeNumberResumable(d.buf[pos:], n, state) + n, state, err = jsonwire.ConsumeNumberResumable(d.buf[pos:], n, state) // NOTE: Since JSON numbers are not self-terminating, // we need to make sure that the next byte is not part of a number. if err == io.ErrUnexpectedEOF || d.needMore(pos+n) { @@ -845,13 +839,13 @@ func (d *Decoder) consumeNumber(pos int) (newPos int, err error) { // consumeObject consumes a single JSON object starting at d.buf[pos:]. // It returns the new position in d.buf immediately after the object. -func (d *Decoder) consumeObject(flags *valueFlags, pos, depth int) (newPos int, err error) { +func (d *decoderState) consumeObject(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) { var n int var names *objectNamespace - if !d.options.Flags.Get(jsonflags.AllowDuplicateNames) { - d.namespaces.push() - defer d.namespaces.pop() - names = d.namespaces.last() + if !d.Flags.Get(jsonflags.AllowDuplicateNames) { + d.Namespaces.push() + defer d.Namespaces.pop() + names = d.Namespaces.Last() } // Handle before start. @@ -863,7 +857,7 @@ func (d *Decoder) consumeObject(flags *valueFlags, pos, depth int) (newPos int, pos++ // Handle after start. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { return pos, err @@ -877,31 +871,31 @@ func (d *Decoder) consumeObject(flags *valueFlags, pos, depth int) (newPos int, depth++ for { // Handle before name. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { return pos, err } } - var flags2 valueFlags - if n = consumeSimpleString(d.buf[pos:]); n == 0 { + var flags2 jsonwire.ValueFlags + if n = jsonwire.ConsumeSimpleString(d.buf[pos:]); n == 0 { oldAbsPos := d.baseOffset + int64(pos) pos, err = d.consumeString(&flags2, pos) newAbsPos := d.baseOffset + int64(pos) n = int(newAbsPos - oldAbsPos) - flags.set(flags2) + flags.Join(flags2) if err != nil { return pos, err } } else { pos += n } - if !d.options.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(d.buf[pos-n:pos], flags2.isVerbatim()) { + if !d.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(d.buf[pos-n:pos], flags2.IsVerbatim()) { return pos - n, newDuplicateNameError(d.buf[pos-n : pos]) } // Handle after name. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { return pos, err @@ -913,7 +907,7 @@ func (d *Decoder) consumeObject(flags *valueFlags, pos, depth int) (newPos int, pos++ // Handle before value. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { return pos, err @@ -925,7 +919,7 @@ func (d *Decoder) consumeObject(flags *valueFlags, pos, depth int) (newPos int, } // Handle after value. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { return pos, err @@ -946,7 +940,7 @@ func (d *Decoder) consumeObject(flags *valueFlags, pos, depth int) (newPos int, // consumeArray consumes a single JSON array starting at d.buf[pos:]. // It returns the new position in d.buf immediately after the array. -func (d *Decoder) consumeArray(flags *valueFlags, pos, depth int) (newPos int, err error) { +func (d *decoderState) consumeArray(flags *jsonwire.ValueFlags, pos, depth int) (newPos int, err error) { // Handle before start. if uint(pos) >= uint(len(d.buf)) || d.buf[pos] != '[' { panic("BUG: consumeArray must be called with a buffer that starts with '['") @@ -956,7 +950,7 @@ func (d *Decoder) consumeArray(flags *valueFlags, pos, depth int) (newPos int, e pos++ // Handle after start. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { return pos, err @@ -970,7 +964,7 @@ func (d *Decoder) consumeArray(flags *valueFlags, pos, depth int) (newPos int, e depth++ for { // Handle before value. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { return pos, err @@ -982,7 +976,7 @@ func (d *Decoder) consumeArray(flags *valueFlags, pos, depth int) (newPos int, e } // Handle after value. - pos += consumeWhitespace(d.buf[pos:]) + pos += jsonwire.ConsumeWhitespace(d.buf[pos:]) if d.needMore(pos) { if pos, err = d.consumeWhitespace(pos); err != nil { return pos, err @@ -1006,7 +1000,7 @@ func (d *Decoder) consumeArray(flags *valueFlags, pos, depth int) (newPos int, e // The number of bytes actually read from the underlying io.Reader may be more // than this offset due to internal buffering effects. func (d *Decoder) InputOffset() int64 { - return d.previousOffsetEnd() + return d.s.previousOffsetEnd() } // UnreadBuffer returns the data remaining in the unread buffer, @@ -1014,7 +1008,7 @@ func (d *Decoder) InputOffset() int64 { // The returned buffer must not be mutated while Decoder continues to be used. // The buffer contents are valid until the next Peek, Read, or Skip call. func (d *Decoder) UnreadBuffer() []byte { - return d.unreadBuffer() + return d.s.unreadBuffer() } // StackDepth returns the depth of the state machine for read JSON data. @@ -1024,7 +1018,7 @@ func (d *Decoder) UnreadBuffer() []byte { // The depth is zero-indexed, where zero represents the top-level JSON value. func (d *Decoder) StackDepth() int { // NOTE: Keep in sync with Encoder.StackDepth. - return d.tokens.depth() - 1 + return d.s.Tokens.Depth() - 1 } // StackIndex returns information about the specified stack level. @@ -1041,13 +1035,13 @@ func (d *Decoder) StackDepth() int { // A complete JSON object must have an even length. func (d *Decoder) StackIndex(i int) (Kind, int) { // NOTE: Keep in sync with Encoder.StackIndex. - switch s := d.tokens.index(i); { + switch s := d.s.Tokens.index(i); { case i > 0 && s.isObject(): - return '{', s.length() + return '{', s.Length() case i > 0 && s.isArray(): - return '[', s.length() + return '[', s.Length() default: - return 0, s.length() + return 0, s.Length() } } @@ -1055,639 +1049,6 @@ func (d *Decoder) StackIndex(i int) (Kind, int) { // Object names are only present if AllowDuplicateNames is false, otherwise // object members are represented using their index within the object. func (d *Decoder) StackPointer() string { - d.names.copyQuotedBuffer(d.buf) - return string(d.appendStackPointer(nil)) -} - -// consumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2. -func consumeWhitespace(b []byte) (n int) { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') { - n++ - } - return n -} - -// consumeNull consumes the next JSON null literal per RFC 7159, section 3. -// It returns 0 if it is invalid, in which case consumeLiteral should be used. -func consumeNull(b []byte) int { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - const literal = "null" - if len(b) >= len(literal) && string(b[:len(literal)]) == literal { - return len(literal) - } - return 0 -} - -// consumeFalse consumes the next JSON false literal per RFC 7159, section 3. -// It returns 0 if it is invalid, in which case consumeLiteral should be used. -func consumeFalse(b []byte) int { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - const literal = "false" - if len(b) >= len(literal) && string(b[:len(literal)]) == literal { - return len(literal) - } - return 0 -} - -// consumeTrue consumes the next JSON true literal per RFC 7159, section 3. -// It returns 0 if it is invalid, in which case consumeLiteral should be used. -func consumeTrue(b []byte) int { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - const literal = "true" - if len(b) >= len(literal) && string(b[:len(literal)]) == literal { - return len(literal) - } - return 0 -} - -// consumeLiteral consumes the next JSON literal per RFC 7159, section 3. -// If the input appears truncated, it returns io.ErrUnexpectedEOF. -func consumeLiteral(b []byte, lit string) (n int, err error) { - for i := 0; i < len(b) && i < len(lit); i++ { - if b[i] != lit[i] { - return i, newInvalidCharacterError(b[i:], "within literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")") - } - } - if len(b) < len(lit) { - return len(b), io.ErrUnexpectedEOF - } - return len(lit), nil -} - -// consumeSimpleString consumes the next JSON string per RFC 7159, section 7 -// but is limited to the grammar for an ASCII string without escape sequences. -// It returns 0 if it is invalid or more complicated than a simple string, -// in which case consumeString should be called. -// -// It rejects '<', '>', and '&' for compatibility reasons since these were -// always escaped in the v1 implementation. Thus, if this function reports ok -// then we know that the string would be encoded the same way under both -// v1 or v2 escape semantics. -func consumeSimpleString(b []byte) (n int) { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - if len(b) > 0 && b[0] == '"' { - n++ - for len(b) > n && b[n] < utf8.RuneSelf && !escapeHTML.escapeASCII(b[n]) { - n++ - } - if uint(len(b)) > uint(n) && b[n] == '"' { - n++ - return n - } - } - return 0 -} - -// consumeString consumes the next JSON string per RFC 7159, section 7. -// If validateUTF8 is false, then this allows the presence of invalid UTF-8 -// characters within the string itself. -// It reports the number of bytes consumed and whether an error was encountered. -// If the input appears truncated, it returns io.ErrUnexpectedEOF. -func consumeString(flags *valueFlags, b []byte, validateUTF8 bool) (n int, err error) { - return consumeStringResumable(flags, b, 0, validateUTF8) -} - -// consumeStringResumable is identical to consumeString but supports resuming -// from a previous call that returned io.ErrUnexpectedEOF. -func consumeStringResumable(flags *valueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) { - // Consume the leading double quote. - switch { - case resumeOffset > 0: - n = resumeOffset // already handled the leading quote - case uint(len(b)) == 0: - return n, io.ErrUnexpectedEOF - case b[0] == '"': - n++ - default: - return n, newInvalidCharacterError(b[n:], `at start of string (expecting '"')`) - } - - // Consume every character in the string. - for uint(len(b)) > uint(n) { - // Optimize for long sequences of unescaped characters. - noEscape := func(c byte) bool { - return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"' - } - for uint(len(b)) > uint(n) && noEscape(b[n]) { - n++ - } - if uint(len(b)) <= uint(n) { - return n, io.ErrUnexpectedEOF - } - - // Check for terminating double quote. - if b[n] == '"' { - n++ - return n, nil - } - - switch r, rn := utf8.DecodeRune(b[n:]); { - // Handle UTF-8 encoded byte sequence. - // Due to specialized handling of ASCII above, we know that - // all normal sequences at this point must be 2 bytes or larger. - case rn > 1: - n += rn - // Handle escape sequence. - case r == '\\': - flags.set(stringNonVerbatim) - resumeOffset = n - if uint(len(b)) < uint(n+2) { - return resumeOffset, io.ErrUnexpectedEOF - } - switch r := b[n+1]; r { - case '/': - // Forward slash is the only character with 3 representations. - // Per RFC 8785, section 3.2.2.2., this must not be escaped. - flags.set(stringNonCanonical) - n += 2 - case '"', '\\', 'b', 'f', 'n', 'r', 't': - n += 2 - case 'u': - if uint(len(b)) < uint(n+6) { - if hasEscapedUTF16Prefix(b[n:], false) { - return resumeOffset, io.ErrUnexpectedEOF - } - flags.set(stringNonCanonical) - return n, newInvalidEscapeSequenceError(b[n:]) - } - v1, ok := parseHexUint16(b[n+2 : n+6]) - if !ok { - flags.set(stringNonCanonical) - return n, newInvalidEscapeSequenceError(b[n : n+6]) - } - // Only certain control characters can use the \uFFFF notation - // for canonical formatting (per RFC 8785, section 3.2.2.2.). - switch v1 { - // \uFFFF notation not permitted for these characters. - case '\b', '\f', '\n', '\r', '\t': - flags.set(stringNonCanonical) - default: - // \uFFFF notation only permitted for control characters. - if v1 >= ' ' { - flags.set(stringNonCanonical) - } else { - // \uFFFF notation must be lower case. - for _, c := range b[n+2 : n+6] { - if 'A' <= c && c <= 'F' { - flags.set(stringNonCanonical) - } - } - } - } - n += 6 - - r := rune(v1) - if validateUTF8 && utf16.IsSurrogate(r) { - if uint(len(b)) < uint(n+6) { - if hasEscapedUTF16Prefix(b[n:], true) { - return resumeOffset, io.ErrUnexpectedEOF - } - flags.set(stringNonCanonical) - return n - 6, newInvalidEscapeSequenceError(b[n-6:]) - } else if v2, ok := parseHexUint16(b[n+2 : n+6]); b[n] != '\\' || b[n+1] != 'u' || !ok { - flags.set(stringNonCanonical) - return n - 6, newInvalidEscapeSequenceError(b[n-6 : n+6]) - } else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError { - flags.set(stringNonCanonical) - return n - 6, newInvalidEscapeSequenceError(b[n-6 : n+6]) - } else { - n += 6 - } - } - default: - flags.set(stringNonCanonical) - return n, newInvalidEscapeSequenceError(b[n : n+2]) - } - // Handle invalid UTF-8. - case r == utf8.RuneError: - if !utf8.FullRune(b[n:]) { - return n, io.ErrUnexpectedEOF - } - flags.set(stringNonVerbatim | stringNonCanonical) - if validateUTF8 { - return n, errInvalidUTF8 - } - n++ - // Handle invalid control characters. - case r < ' ': - flags.set(stringNonVerbatim | stringNonCanonical) - return n, newInvalidCharacterError(b[n:], "within string (expecting non-control character)") - default: - panic("BUG: unhandled character " + quoteRune(b[n:])) - } - } - return n, io.ErrUnexpectedEOF -} - -// unescapeString appends the unescaped form of a JSON string in src to dst. -// Any invalid UTF-8 within the string will be replaced with utf8.RuneError, -// but the error will be specified as having encountered such an error. -// The input must be an entire JSON string with no surrounding whitespace. -func unescapeString[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, err error) { - dst = slices.Grow(dst, len(src)) - - // Consume the leading double quote. - var i, n int - switch { - case uint(len(src)) == 0: - return dst, io.ErrUnexpectedEOF - case src[0] == '"': - i, n = 1, 1 - default: - return dst, newInvalidCharacterError(src, `at start of string (expecting '"')`) - } - - // Consume every character in the string. - for uint(len(src)) > uint(n) { - // Optimize for long sequences of unescaped characters. - noEscape := func(c byte) bool { - return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"' - } - for uint(len(src)) > uint(n) && noEscape(src[n]) { - n++ - } - if uint(len(src)) <= uint(n) { - dst = append(dst, src[i:n]...) - return dst, io.ErrUnexpectedEOF - } - - // Check for terminating double quote. - if src[n] == '"' { - dst = append(dst, src[i:n]...) - n++ - if n < len(src) { - err = newInvalidCharacterError(src[n:], "after string value") - } - return dst, err - } - - switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); { - // Handle UTF-8 encoded byte sequence. - // Due to specialized handling of ASCII above, we know that - // all normal sequences at this point must be 2 bytes or larger. - case rn > 1: - n += rn - // Handle escape sequence. - case r == '\\': - dst = append(dst, src[i:n]...) - - // Handle escape sequence. - if uint(len(src)) < uint(n+2) { - return dst, io.ErrUnexpectedEOF - } - switch r := src[n+1]; r { - case '"', '\\', '/': - dst = append(dst, r) - n += 2 - case 'b': - dst = append(dst, '\b') - n += 2 - case 'f': - dst = append(dst, '\f') - n += 2 - case 'n': - dst = append(dst, '\n') - n += 2 - case 'r': - dst = append(dst, '\r') - n += 2 - case 't': - dst = append(dst, '\t') - n += 2 - case 'u': - if uint(len(src)) < uint(n+6) { - if hasEscapedUTF16Prefix(src[n:], false) { - return dst, io.ErrUnexpectedEOF - } - return dst, newInvalidEscapeSequenceError(src[n:]) - } - v1, ok := parseHexUint16(src[n+2 : n+6]) - if !ok { - return dst, newInvalidEscapeSequenceError(src[n : n+6]) - } - n += 6 - - // Check whether this is a surrogate half. - r := rune(v1) - if utf16.IsSurrogate(r) { - r = utf8.RuneError // assume failure unless the following succeeds - if uint(len(src)) < uint(n+6) { - if hasEscapedUTF16Prefix(src[n:], true) { - return utf8.AppendRune(dst, r), io.ErrUnexpectedEOF - } - err = newInvalidEscapeSequenceError(src[n-6:]) - } else if v2, ok := parseHexUint16(src[n+2 : n+6]); src[n] != '\\' || src[n+1] != 'u' || !ok { - err = newInvalidEscapeSequenceError(src[n-6 : n+6]) - } else if r = utf16.DecodeRune(rune(v1), rune(v2)); r == utf8.RuneError { - err = newInvalidEscapeSequenceError(src[n-6 : n+6]) - } else { - n += 6 - } - } - - dst = utf8.AppendRune(dst, r) - default: - return dst, newInvalidEscapeSequenceError(src[n : n+2]) - } - i = n - // Handle invalid UTF-8. - case r == utf8.RuneError: - dst = append(dst, src[i:n]...) - if !utf8.FullRuneInString(string(truncateMaxUTF8(src[n:]))) { - return dst, io.ErrUnexpectedEOF - } - // NOTE: An unescaped string may be longer than the escaped string - // because invalid UTF-8 bytes are being replaced. - dst = append(dst, "\uFFFD"...) - n += rn - i = n - err = errInvalidUTF8 - // Handle invalid control characters. - case r < ' ': - dst = append(dst, src[i:n]...) - return dst, newInvalidCharacterError(src[n:], "within string (expecting non-control character)") - default: - panic("BUG: unhandled character " + quoteRune(src[n:])) - } - } - dst = append(dst, src[i:n]...) - return dst, io.ErrUnexpectedEOF -} - -// hasEscapedUTF16Prefix reports whether b is possibly -// the truncated prefix of a \uFFFF escape sequence. -func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool { - for i := 0; i < len(b); i++ { - switch c := b[i]; { - case i == 0 && c != '\\': - return false - case i == 1 && c != 'u': - return false - case i == 2 && lowerSurrogateHalf && c != 'd' && c != 'D': - return false // not within ['\uDC00':'\uDFFF'] - case i == 3 && lowerSurrogateHalf && !('c' <= c && c <= 'f') && !('C' <= c && c <= 'F'): - return false // not within ['\uDC00':'\uDFFF'] - case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'): - return false - } - } - return true -} - -// unescapeStringMayCopy returns the unescaped form of b. -// If there are no escaped characters, the output is simply a subslice of -// the input with the surrounding quotes removed. -// Otherwise, a new buffer is allocated for the output. -func unescapeStringMayCopy(b []byte, isVerbatim bool) []byte { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - if isVerbatim { - return b[len(`"`) : len(b)-len(`"`)] - } - b, _ = unescapeString(nil, b) - return b -} - -// consumeSimpleNumber consumes the next JSON number per RFC 7159, section 6 -// but is limited to the grammar for a positive integer. -// It returns 0 if it is invalid or more complicated than a simple integer, -// in which case consumeNumber should be called. -func consumeSimpleNumber(b []byte) (n int) { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - if len(b) > 0 { - if b[0] == '0' { - n++ - } else if '1' <= b[0] && b[0] <= '9' { - n++ - for len(b) > n && ('0' <= b[n] && b[n] <= '9') { - n++ - } - } else { - return 0 - } - if uint(len(b)) <= uint(n) || (b[n] != '.' && b[n] != 'e' && b[n] != 'E') { - return n - } - } - return 0 -} - -type consumeNumberState uint - -const ( - consumeNumberInit consumeNumberState = iota - beforeIntegerDigits - withinIntegerDigits - beforeFractionalDigits - withinFractionalDigits - beforeExponentDigits - withinExponentDigits -) - -// consumeNumber consumes the next JSON number per RFC 7159, section 6. -// It reports the number of bytes consumed and whether an error was encountered. -// If the input appears truncated, it returns io.ErrUnexpectedEOF. -// -// Note that JSON numbers are not self-terminating. -// If the entire input is consumed, then the caller needs to consider whether -// there may be subsequent unread data that may still be part of this number. -func consumeNumber(b []byte) (n int, err error) { - n, _, err = consumeNumberResumable(b, 0, consumeNumberInit) - return n, err -} - -// consumeNumberResumable is identical to consumeNumber but supports resuming -// from a previous call that returned io.ErrUnexpectedEOF. -func consumeNumberResumable(b []byte, resumeOffset int, state consumeNumberState) (n int, _ consumeNumberState, err error) { - // Jump to the right state when resuming from a partial consumption. - n = resumeOffset - if state > consumeNumberInit { - switch state { - case withinIntegerDigits, withinFractionalDigits, withinExponentDigits: - // Consume leading digits. - for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { - n++ - } - if uint(len(b)) <= uint(n) { - return n, state, nil // still within the same state - } - state++ // switches "withinX" to "beforeY" where Y is the state after X - } - switch state { - case beforeIntegerDigits: - goto beforeInteger - case beforeFractionalDigits: - goto beforeFractional - case beforeExponentDigits: - goto beforeExponent - default: - return n, state, nil - } - } - - // Consume required integer component (with optional minus sign). -beforeInteger: - resumeOffset = n - if uint(len(b)) > 0 && b[0] == '-' { - n++ - } - switch { - case uint(len(b)) <= uint(n): - return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF - case b[n] == '0': - n++ - state = beforeFractionalDigits - case '1' <= b[n] && b[n] <= '9': - n++ - for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { - n++ - } - state = withinIntegerDigits - default: - return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)") - } - - // Consume optional fractional component. -beforeFractional: - if uint(len(b)) > uint(n) && b[n] == '.' { - resumeOffset = n - n++ - switch { - case uint(len(b)) <= uint(n): - return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF - case '0' <= b[n] && b[n] <= '9': - n++ - default: - return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)") - } - for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { - n++ - } - state = withinFractionalDigits - } - - // Consume optional exponent component. -beforeExponent: - if uint(len(b)) > uint(n) && (b[n] == 'e' || b[n] == 'E') { - resumeOffset = n - n++ - if uint(len(b)) > uint(n) && (b[n] == '-' || b[n] == '+') { - n++ - } - switch { - case uint(len(b)) <= uint(n): - return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF - case '0' <= b[n] && b[n] <= '9': - n++ - default: - return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)") - } - for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') { - n++ - } - state = withinExponentDigits - } - - return n, state, nil -} - -// parseHexUint16 is similar to strconv.ParseUint, -// but operates directly on []byte and is optimized for base-16. -// See https://go.dev/issue/42429. -func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) { - if len(b) != 4 { - return 0, false - } - for i := 0; i < 4; i++ { - c := b[i] - switch { - case '0' <= c && c <= '9': - c = c - '0' - case 'a' <= c && c <= 'f': - c = 10 + c - 'a' - case 'A' <= c && c <= 'F': - c = 10 + c - 'A' - default: - return 0, false - } - v = v*16 + uint16(c) - } - return v, true -} - -// parseDecUint parses b as a decimal unsigned integer according to -// a strict subset of the JSON number grammar, returning the value if valid. -// It returns (0, false) if there is a syntax error and -// returns (math.MaxUint64, false) if there is an overflow. -func parseDecUint(b []byte) (v uint64, ok bool) { - const unsafeWidth = 20 // len(fmt.Sprint(uint64(math.MaxUint64))) - var n int - for ; len(b) > n && ('0' <= b[n] && b[n] <= '9'); n++ { - v = 10*v + uint64(b[n]-'0') - } - switch { - case n == 0 || len(b) != n || (b[0] == '0' && string(b) != "0"): - return 0, false - case n >= unsafeWidth && (b[0] != '1' || v < 1e19 || n > unsafeWidth): - return math.MaxUint64, false - } - return v, true -} - -// parseFloat parses a floating point number according to the Go float grammar. -// Note that the JSON number grammar is a strict subset. -// -// If the number overflows the finite representation of a float, -// then we return MaxFloat since any finite value will always be infinitely -// more accurate at representing another finite value than an infinite value. -func parseFloat(b []byte, bits int) (v float64, ok bool) { - // Fast path for exact integer numbers which fit in the - // 24-bit or 53-bit significand of a float32 or float64. - var negLen int // either 0 or 1 - if len(b) > 0 && b[0] == '-' { - negLen = 1 - } - u, ok := parseDecUint(b[negLen:]) - if ok && ((bits == 32 && u <= 1<<24) || (bits == 64 && u <= 1<<53)) { - return math.Copysign(float64(u), float64(-1*negLen)), true - } - - // Note that the []byte->string conversion unfortunately allocates. - // See https://go.dev/issue/42429 for more information. - fv, err := strconv.ParseFloat(string(b), bits) - if math.IsInf(fv, 0) { - switch { - case bits == 32 && math.IsInf(fv, +1): - return +math.MaxFloat32, true - case bits == 64 && math.IsInf(fv, +1): - return +math.MaxFloat64, true - case bits == 32 && math.IsInf(fv, -1): - return -math.MaxFloat32, true - case bits == 64 && math.IsInf(fv, -1): - return -math.MaxFloat64, true - } - } - return fv, err == nil -} - -// truncateMaxUTF8 truncates b such it contains at least one rune. -// -// The utf8 package currently lacks generic variants, which complicates -// generic functions that operates on either []byte or string. -// As a hack, we always call the utf8 function operating on strings, -// but always truncate the input such that the result is identical. -// -// Example usage: -// -// utf8.DecodeRuneInString(string(truncateMaxUTF8(b))) -// -// Converting a []byte to a string is stack allocated since -// truncateMaxUTF8 guarantees that the []byte is short. -func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes { - // TODO(https://go.dev/issue/56948): Remove this function and - // instead directly call generic utf8 functions wherever used. - if len(b) > utf8.UTFMax { - return b[:utf8.UTFMax] - } - return b + d.s.Names.copyQuotedBuffer(d.s.buf) + return string(d.s.appendStackPointer(nil)) } diff --git a/decode_test.go b/jsontext/decode_test.go similarity index 62% rename from decode_test.go rename to jsontext/decode_test.go index eb7686a..fde2e7f 100644 --- a/decode_test.go +++ b/jsontext/decode_test.go @@ -2,14 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "bytes" "errors" "fmt" "io" - "math" "net" "path" "reflect" @@ -787,7 +786,7 @@ func testDecoderErrors(t *testing.T, where jsontest.CasePos, opts []Options, in if gotOffset != wantOffset { t.Fatalf("%s: Decoder.InputOffset = %v, want %v", where, gotOffset, wantOffset) } - gotUnread := string(dec.unreadBuffer()) // should be a prefix of wantUnread + gotUnread := string(dec.s.unreadBuffer()) // should be a prefix of wantUnread wantUnread := in[wantOffset:] if !strings.HasPrefix(wantUnread, gotUnread) { t.Fatalf("%s: Decoder.UnreadBuffer = %v, want %v", where, gotUnread, wantUnread) @@ -1017,430 +1016,3 @@ func TestPeekableDecoder(t *testing.T) { } } } - -func TestConsumeWhitespace(t *testing.T) { - tests := []struct { - in string - want int - }{ - {"", 0}, - {"a", 0}, - {" a", 1}, - {" a ", 1}, - {" \n\r\ta", 4}, - {" \n\r\t \n\r\t \n\r\t \n\r\t", 16}, - {"\u00a0", 0}, // non-breaking space is not JSON whitespace - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - if got := consumeWhitespace([]byte(tt.in)); got != tt.want { - t.Errorf("consumeWhitespace(%q) = %v, want %v", tt.in, got, tt.want) - } - }) - } -} - -func TestConsumeLiteral(t *testing.T) { - tests := []struct { - literal string - in string - want int - wantErr error - }{ - {"null", "", 0, io.ErrUnexpectedEOF}, - {"null", "n", 1, io.ErrUnexpectedEOF}, - {"null", "nu", 2, io.ErrUnexpectedEOF}, - {"null", "nul", 3, io.ErrUnexpectedEOF}, - {"null", "null", 4, nil}, - {"null", "nullx", 4, nil}, - {"null", "x", 0, newInvalidCharacterError("x", "within literal null (expecting 'n')")}, - {"null", "nuxx", 2, newInvalidCharacterError("x", "within literal null (expecting 'l')")}, - - {"false", "", 0, io.ErrUnexpectedEOF}, - {"false", "f", 1, io.ErrUnexpectedEOF}, - {"false", "fa", 2, io.ErrUnexpectedEOF}, - {"false", "fal", 3, io.ErrUnexpectedEOF}, - {"false", "fals", 4, io.ErrUnexpectedEOF}, - {"false", "false", 5, nil}, - {"false", "falsex", 5, nil}, - {"false", "x", 0, newInvalidCharacterError("x", "within literal false (expecting 'f')")}, - {"false", "falsx", 4, newInvalidCharacterError("x", "within literal false (expecting 'e')")}, - - {"true", "", 0, io.ErrUnexpectedEOF}, - {"true", "t", 1, io.ErrUnexpectedEOF}, - {"true", "tr", 2, io.ErrUnexpectedEOF}, - {"true", "tru", 3, io.ErrUnexpectedEOF}, - {"true", "true", 4, nil}, - {"true", "truex", 4, nil}, - {"true", "x", 0, newInvalidCharacterError("x", "within literal true (expecting 't')")}, - {"true", "trux", 3, newInvalidCharacterError("x", "within literal true (expecting 'e')")}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - var got int - switch tt.literal { - case "null": - got = consumeNull([]byte(tt.in)) - case "false": - got = consumeFalse([]byte(tt.in)) - case "true": - got = consumeTrue([]byte(tt.in)) - default: - t.Errorf("invalid literal: %v", tt.literal) - } - switch { - case tt.wantErr == nil && got != tt.want: - t.Errorf("consume%v(%q) = %v, want %v", strings.Title(tt.literal), tt.in, got, tt.want) - case tt.wantErr != nil && got != 0: - t.Errorf("consume%v(%q) = %v, want %v", strings.Title(tt.literal), tt.in, got, 0) - } - - got, gotErr := consumeLiteral([]byte(tt.in), tt.literal) - if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) { - t.Errorf("consumeLiteral(%q, %q) = (%v, %v), want (%v, %v)", tt.in, tt.literal, got, gotErr, tt.want, tt.wantErr) - } - }) - } -} - -func TestConsumeString(t *testing.T) { - var errPrev = errors.New("same as previous error") - tests := []struct { - in string - simple bool - want int - wantUTF8 int // consumed bytes if validateUTF8 is specified - wantFlags valueFlags - wantUnquote string - wantErr error - wantErrUTF8 error // error if validateUTF8 is specified - wantErrUnquote error - }{ - {``, false, 0, 0, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"`, false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`""`, true, 2, 2, 0, "", nil, nil, nil}, - {`""x`, true, 2, 2, 0, "", nil, nil, newInvalidCharacterError("x", "after string value")}, - {` ""x`, false, 0, 0, 0, "", newInvalidCharacterError(" ", "at start of string (expecting '\"')"), errPrev, errPrev}, - {`"hello`, false, 6, 6, 0, "hello", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"hello"`, true, 7, 7, 0, "hello", nil, nil, nil}, - {"\"\x00\"", false, 1, 1, stringNonVerbatim | stringNonCanonical, "", newInvalidCharacterError("\x00", "within string (expecting non-control character)"), errPrev, errPrev}, - {`"\u0000"`, false, 8, 8, stringNonVerbatim, "\x00", nil, nil, nil}, - {"\"\x1f\"", false, 1, 1, stringNonVerbatim | stringNonCanonical, "", newInvalidCharacterError("\x1f", "within string (expecting non-control character)"), errPrev, errPrev}, - {`"\u001f"`, false, 8, 8, stringNonVerbatim, "\x1f", nil, nil, nil}, - {`"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`, true, 54, 54, 0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", nil, nil, nil}, - {"\" !#$%'()*+,-./0123456789:;=?@[]^_`{|}~\x7f\"", true, 41, 41, 0, " !#$%'()*+,-./0123456789:;=?@[]^_`{|}~\x7f", nil, nil, nil}, - {`"&"`, false, 3, 3, 0, "&", nil, nil, nil}, - {`"<"`, false, 3, 3, 0, "<", nil, nil, nil}, - {`">"`, false, 3, 3, 0, ">", nil, nil, nil}, - {"\"x\x80\"", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", nil, errInvalidUTF8, errPrev}, - {"\"x\xff\"", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", nil, errInvalidUTF8, errPrev}, - {"\"x\xc0", false, 3, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd", io.ErrUnexpectedEOF, errInvalidUTF8, io.ErrUnexpectedEOF}, - {"\"x\xc0\x80\"", false, 5, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", nil, errInvalidUTF8, errPrev}, - {"\"x\xe0", false, 2, 2, 0, "x", io.ErrUnexpectedEOF, errPrev, errPrev}, - {"\"x\xe0\x80", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", io.ErrUnexpectedEOF, errInvalidUTF8, io.ErrUnexpectedEOF}, - {"\"x\xe0\x80\x80\"", false, 6, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", nil, errInvalidUTF8, errPrev}, - {"\"x\xf0", false, 2, 2, 0, "x", io.ErrUnexpectedEOF, errPrev, errPrev}, - {"\"x\xf0\x80", false, 4, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd", io.ErrUnexpectedEOF, errInvalidUTF8, io.ErrUnexpectedEOF}, - {"\"x\xf0\x80\x80", false, 5, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", io.ErrUnexpectedEOF, errInvalidUTF8, io.ErrUnexpectedEOF}, - {"\"x\xf0\x80\x80\x80\"", false, 7, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd\ufffd", nil, errInvalidUTF8, errPrev}, - {"\"x\xed\xba\xad\"", false, 6, 2, stringNonVerbatim | stringNonCanonical, "x\ufffd\ufffd\ufffd", nil, errInvalidUTF8, errPrev}, - {"\"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602\"", false, 25, 25, 0, "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, nil, nil}, - {`"¢"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"¢"`[:3], false, 3, 3, 0, "¢", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote - {`"¢"`[:4], false, 4, 4, 0, "¢", nil, nil, nil}, - {`"€"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"€"`[:3], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"€"`[:4], false, 4, 4, 0, "€", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote - {`"€"`[:5], false, 5, 5, 0, "€", nil, nil, nil}, - {`"𐍈"`[:2], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"𐍈"`[:3], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"𐍈"`[:4], false, 1, 1, 0, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"𐍈"`[:5], false, 5, 5, 0, "𐍈", io.ErrUnexpectedEOF, errPrev, errPrev}, // missing terminating quote - {`"𐍈"`[:6], false, 6, 6, 0, "𐍈", nil, nil, nil}, - {`"x\`, false, 2, 2, stringNonVerbatim, "x", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"x\"`, false, 4, 4, stringNonVerbatim, "x\"", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"x\x"`, false, 2, 2, stringNonVerbatim | stringNonCanonical, "x", newInvalidEscapeSequenceError(`\x`), errPrev, errPrev}, - {`"\"\\\b\f\n\r\t"`, false, 16, 16, stringNonVerbatim, "\"\\\b\f\n\r\t", nil, nil, nil}, - {`"/"`, true, 3, 3, 0, "/", nil, nil, nil}, - {`"\/"`, false, 4, 4, stringNonVerbatim | stringNonCanonical, "/", nil, nil, nil}, - {`"\u002f"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "/", nil, nil, nil}, - {`"\u`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"\uf`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"\uff`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"\ufff`, false, 1, 1, stringNonVerbatim, "", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"\ufffd`, false, 7, 7, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"\ufffd"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "\ufffd", nil, nil, nil}, - {`"\uABCD"`, false, 8, 8, stringNonVerbatim | stringNonCanonical, "\uabcd", nil, nil, nil}, - {`"\uefX0"`, false, 1, 1, stringNonVerbatim | stringNonCanonical, "", newInvalidEscapeSequenceError(`\uefX0`), errPrev, errPrev}, - {`"\uDEAD`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"\uDEAD"`, false, 8, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", nil, newInvalidEscapeSequenceError(`\uDEAD"`), errPrev}, - {`"\uDEAD______"`, false, 14, 1, stringNonVerbatim | stringNonCanonical, "\ufffd______", nil, newInvalidEscapeSequenceError(`\uDEAD______`), errPrev}, - {`"\uDEAD\uXXXX"`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", newInvalidEscapeSequenceError(`\uXXXX`), newInvalidEscapeSequenceError(`\uDEAD\uXXXX`), newInvalidEscapeSequenceError(`\uXXXX`)}, - {`"\uDEAD\uBEEF"`, false, 14, 1, stringNonVerbatim | stringNonCanonical, "\ufffd\ubeef", nil, newInvalidEscapeSequenceError(`\uDEAD\uBEEF`), errPrev}, - {`"\uD800\udea`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, errPrev, errPrev}, - {`"\uD800\udb`, false, 7, 1, stringNonVerbatim | stringNonCanonical, "\ufffd", io.ErrUnexpectedEOF, newInvalidEscapeSequenceError(`\uD800\udb`), io.ErrUnexpectedEOF}, - {`"\uD800\udead"`, false, 14, 14, stringNonVerbatim | stringNonCanonical, "\U000102ad", nil, nil, nil}, - {`"\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009"`, false, 50, 50, stringNonVerbatim | stringNonCanonical, "\"\\/\b\f\n\r\t", nil, nil, nil}, - {`"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\ud83d\ude02"`, false, 56, 56, stringNonVerbatim | stringNonCanonical, "\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, nil, nil}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - if tt.wantErrUTF8 == errPrev { - tt.wantErrUTF8 = tt.wantErr - } - if tt.wantErrUnquote == errPrev { - tt.wantErrUnquote = tt.wantErrUTF8 - } - - switch got := consumeSimpleString([]byte(tt.in)); { - case tt.simple && got != tt.want: - t.Errorf("consumeSimpleString(%q) = %v, want %v", tt.in, got, tt.want) - case !tt.simple && got != 0: - t.Errorf("consumeSimpleString(%q) = %v, want %v", tt.in, got, 0) - } - - var gotFlags valueFlags - got, gotErr := consumeString(&gotFlags, []byte(tt.in), false) - if gotFlags != tt.wantFlags { - t.Errorf("consumeString(%q, false) flags = %v, want %v", tt.in, gotFlags, tt.wantFlags) - } - if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) { - t.Errorf("consumeString(%q, false) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) - } - - got, gotErr = consumeString(&gotFlags, []byte(tt.in), true) - if got != tt.wantUTF8 || !reflect.DeepEqual(gotErr, tt.wantErrUTF8) { - t.Errorf("consumeString(%q, false) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.wantUTF8, tt.wantErrUTF8) - } - - gotUnquote, gotErr := unescapeString(nil, tt.in) - if string(gotUnquote) != tt.wantUnquote || !reflect.DeepEqual(gotErr, tt.wantErrUnquote) { - t.Errorf("unescapeString(nil, %q) = (%q, %v), want (%q, %v)", tt.in[:got], gotUnquote, gotErr, tt.wantUnquote, tt.wantErrUnquote) - } - }) - } -} - -func TestConsumeNumber(t *testing.T) { - tests := []struct { - in string - simple bool - want int - wantErr error - }{ - {"", false, 0, io.ErrUnexpectedEOF}, - {`"NaN"`, false, 0, newInvalidCharacterError("\"", "within number (expecting digit)")}, - {`"Infinity"`, false, 0, newInvalidCharacterError("\"", "within number (expecting digit)")}, - {`"-Infinity"`, false, 0, newInvalidCharacterError("\"", "within number (expecting digit)")}, - {".0", false, 0, newInvalidCharacterError(".", "within number (expecting digit)")}, - {"0", true, 1, nil}, - {"-0", false, 2, nil}, - {"+0", false, 0, newInvalidCharacterError("+", "within number (expecting digit)")}, - {"1", true, 1, nil}, - {"-1", false, 2, nil}, - {"00", true, 1, nil}, - {"-00", false, 2, nil}, - {"01", true, 1, nil}, - {"-01", false, 2, nil}, - {"0i", true, 1, nil}, - {"-0i", false, 2, nil}, - {"0f", true, 1, nil}, - {"-0f", false, 2, nil}, - {"9876543210", true, 10, nil}, - {"-9876543210", false, 11, nil}, - {"9876543210x", true, 10, nil}, - {"-9876543210x", false, 11, nil}, - {" 9876543210", true, 0, newInvalidCharacterError(" ", "within number (expecting digit)")}, - {"- 9876543210", false, 1, newInvalidCharacterError(" ", "within number (expecting digit)")}, - {strings.Repeat("9876543210", 1000), true, 10000, nil}, - {"-" + strings.Repeat("9876543210", 1000), false, 1 + 10000, nil}, - {"0.", false, 1, io.ErrUnexpectedEOF}, - {"-0.", false, 2, io.ErrUnexpectedEOF}, - {"0e", false, 1, io.ErrUnexpectedEOF}, - {"-0e", false, 2, io.ErrUnexpectedEOF}, - {"0E", false, 1, io.ErrUnexpectedEOF}, - {"-0E", false, 2, io.ErrUnexpectedEOF}, - {"0.0", false, 3, nil}, - {"-0.0", false, 4, nil}, - {"0e0", false, 3, nil}, - {"-0e0", false, 4, nil}, - {"0E0", false, 3, nil}, - {"-0E0", false, 4, nil}, - {"0.0123456789", false, 12, nil}, - {"-0.0123456789", false, 13, nil}, - {"1.f", false, 2, newInvalidCharacterError("f", "within number (expecting digit)")}, - {"-1.f", false, 3, newInvalidCharacterError("f", "within number (expecting digit)")}, - {"1.e", false, 2, newInvalidCharacterError("e", "within number (expecting digit)")}, - {"-1.e", false, 3, newInvalidCharacterError("e", "within number (expecting digit)")}, - {"1e0", false, 3, nil}, - {"-1e0", false, 4, nil}, - {"1E0", false, 3, nil}, - {"-1E0", false, 4, nil}, - {"1Ex", false, 2, newInvalidCharacterError("x", "within number (expecting digit)")}, - {"-1Ex", false, 3, newInvalidCharacterError("x", "within number (expecting digit)")}, - {"1e-0", false, 4, nil}, - {"-1e-0", false, 5, nil}, - {"1e+0", false, 4, nil}, - {"-1e+0", false, 5, nil}, - {"1E-0", false, 4, nil}, - {"-1E-0", false, 5, nil}, - {"1E+0", false, 4, nil}, - {"-1E+0", false, 5, nil}, - {"1E+00500", false, 8, nil}, - {"-1E+00500", false, 9, nil}, - {"1E+00500x", false, 8, nil}, - {"-1E+00500x", false, 9, nil}, - {"9876543210.0123456789e+01234589x", false, 31, nil}, - {"-9876543210.0123456789e+01234589x", false, 32, nil}, - {"1_000_000", true, 1, nil}, - {"0x12ef", true, 1, nil}, - {"0x1p-2", true, 1, nil}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - switch got := consumeSimpleNumber([]byte(tt.in)); { - case tt.simple && got != tt.want: - t.Errorf("consumeSimpleNumber(%q) = %v, want %v", tt.in, got, tt.want) - case !tt.simple && got != 0: - t.Errorf("consumeSimpleNumber(%q) = %v, want %v", tt.in, got, 0) - } - - got, gotErr := consumeNumber([]byte(tt.in)) - if got != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) { - t.Errorf("consumeNumber(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) - } - }) - } -} - -func TestParseHexUint16(t *testing.T) { - tests := []struct { - in string - want uint16 - wantOk bool - }{ - {"", 0, false}, - {"a", 0, false}, - {"ab", 0, false}, - {"abc", 0, false}, - {"abcd", 0xabcd, true}, - {"abcde", 0, false}, - {"9eA1", 0x9ea1, true}, - {"gggg", 0, false}, - {"0000", 0x0000, true}, - {"1234", 0x1234, true}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - got, gotOk := parseHexUint16([]byte(tt.in)) - if got != tt.want || gotOk != tt.wantOk { - t.Errorf("parseHexUint16(%q) = (0x%04x, %v), want (0x%04x, %v)", tt.in, got, gotOk, tt.want, tt.wantOk) - } - }) - } -} - -func TestParseDecUint(t *testing.T) { - tests := []struct { - in string - want uint64 - wantOk bool - }{ - {"", 0, false}, - {"0", 0, true}, - {"1", 1, true}, - {"-1", 0, false}, - {"1f", 0, false}, - {"00", 0, false}, - {"01", 0, false}, - {"10", 10, true}, - {"10.9", 0, false}, - {" 10", 0, false}, - {"10 ", 0, false}, - {"123456789", 123456789, true}, - {"123456789d", 0, false}, - {"18446744073709551614", math.MaxUint64 - 1, true}, - {"18446744073709551615", math.MaxUint64, true}, - {"18446744073709551616", math.MaxUint64, false}, - {"18446744073709551620", math.MaxUint64, false}, - {"18446744073709551700", math.MaxUint64, false}, - {"18446744073709552000", math.MaxUint64, false}, - {"18446744073709560000", math.MaxUint64, false}, - {"18446744073709600000", math.MaxUint64, false}, - {"18446744073710000000", math.MaxUint64, false}, - {"18446744073800000000", math.MaxUint64, false}, - {"18446744074000000000", math.MaxUint64, false}, - {"18446744080000000000", math.MaxUint64, false}, - {"18446744100000000000", math.MaxUint64, false}, - {"18446745000000000000", math.MaxUint64, false}, - {"18446750000000000000", math.MaxUint64, false}, - {"18446800000000000000", math.MaxUint64, false}, - {"18447000000000000000", math.MaxUint64, false}, - {"18450000000000000000", math.MaxUint64, false}, - {"18500000000000000000", math.MaxUint64, false}, - {"19000000000000000000", math.MaxUint64, false}, - {"19999999999999999999", math.MaxUint64, false}, - {"20000000000000000000", math.MaxUint64, false}, - {"100000000000000000000", math.MaxUint64, false}, - {"99999999999999999999999999999999", math.MaxUint64, false}, - {"99999999999999999999999999999999f", 0, false}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - got, gotOk := parseDecUint([]byte(tt.in)) - if got != tt.want || gotOk != tt.wantOk { - t.Errorf("parseDecUint(%q) = (%v, %v), want (%v, %v)", tt.in, got, gotOk, tt.want, tt.wantOk) - } - }) - } -} - -func TestParseFloat(t *testing.T) { - tests := []struct { - in string - want32 float64 - want64 float64 - wantOk bool - }{ - {"0", 0, 0, true}, - {"-1", -1, -1, true}, - {"1", 1, 1, true}, - - {"-16777215", -16777215, -16777215, true}, // -(1<<24 - 1) - {"16777215", 16777215, 16777215, true}, // +(1<<24 - 1) - {"-16777216", -16777216, -16777216, true}, // -(1<<24) - {"16777216", 16777216, 16777216, true}, // +(1<<24) - {"-16777217", -16777216, -16777217, true}, // -(1<<24 + 1) - {"16777217", 16777216, 16777217, true}, // +(1<<24 + 1) - - {"-9007199254740991", -9007199254740992, -9007199254740991, true}, // -(1<<53 - 1) - {"9007199254740991", 9007199254740992, 9007199254740991, true}, // +(1<<53 - 1) - {"-9007199254740992", -9007199254740992, -9007199254740992, true}, // -(1<<53) - {"9007199254740992", 9007199254740992, 9007199254740992, true}, // +(1<<53) - {"-9007199254740993", -9007199254740992, -9007199254740992, true}, // -(1<<53 + 1) - {"9007199254740993", 9007199254740992, 9007199254740992, true}, // +(1<<53 + 1) - - {"-1e1000", -math.MaxFloat32, -math.MaxFloat64, true}, - {"1e1000", +math.MaxFloat32, +math.MaxFloat64, true}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - got32, gotOk32 := parseFloat([]byte(tt.in), 32) - if got32 != tt.want32 || gotOk32 != tt.wantOk { - t.Errorf("parseFloat(%q, 32) = (%v, %v), want (%v, %v)", tt.in, got32, gotOk32, tt.want32, tt.wantOk) - } - - got64, gotOk64 := parseFloat([]byte(tt.in), 64) - if got64 != tt.want64 || gotOk64 != tt.wantOk { - t.Errorf("parseFloat(%q, 64) = (%v, %v), want (%v, %v)", tt.in, got64, gotOk64, tt.want64, tt.wantOk) - } - }) - } -} diff --git a/jsontext/doc.go b/jsontext/doc.go new file mode 100644 index 0000000..5440bbe --- /dev/null +++ b/jsontext/doc.go @@ -0,0 +1,69 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package jsontext implements syntactic processing of JSON +// as specified in RFC 4627, RFC 7159, RFC 7493, RFC 8259, and RFC 8785. +// JSON is a simple data interchange format that can represent +// primitive data types such as booleans, strings, and numbers, +// in addition to structured data types such as objects and arrays. +// +// The [Encoder] and [Decoder] types are used to encode or decode +// a stream of JSON values or tokens. +// +// # Terminology +// +// This package uses the terms "encode" and "decode" for syntactic functionality +// that is concerned with processing JSON based on its grammar, and +// uses the terms "marshal" and "unmarshal" for semantic functionality +// that determines the meaning of JSON values as Go values and vice-versa. +// It aims to provide a clear distinction between functionality that +// is purely concerned with encoding versus that of marshaling. +// For example, one can directly encode a stream of JSON tokens without +// needing to marshal a concrete Go value representing them. +// Similarly, one can decode a stream of JSON tokens without +// needing to unmarshal them into a concrete Go value. +// +// This package uses JSON terminology when discussing JSON, which may differ +// from related concepts in Go or elsewhere in computing literature. +// +// - A JSON "object" refers to an unordered collection of name/value members. +// - A JSON "array" refers to an ordered sequence of elements. +// - A JSON "value" refers to either a literal (i.e., null, false, or true), +// string, number, object, or array. +// +// See RFC 8259 for more information. +// +// # Specifications +// +// Relevant specifications include RFC 4627, RFC 7159, RFC 7493, RFC 8259, +// and RFC 8785. Each RFC is generally a stricter subset of another RFC. +// In increasing order of strictness: +// +// - RFC 4627 and RFC 7159 do not require (but recommend) the use of UTF-8 +// and also do not require (but recommend) that object names be unique. +// - RFC 8259 requires the use of UTF-8, +// but does not require (but recommends) that object names be unique. +// - RFC 7493 requires the use of UTF-8 +// and also requires that object names be unique. +// - RFC 8785 defines a canonical representation. It requires the use of UTF-8 +// and also requires that object names be unique and in a specific ordering. +// It specifies exactly how strings and numbers must be formatted. +// +// The primary difference between RFC 4627 and RFC 7159 is that the former +// restricted top-level values to only JSON objects and arrays, while +// RFC 7159 and subsequent RFCs permit top-level values to additionally be +// JSON nulls, booleans, strings, or numbers. +// +// By default, this package operates on RFC 7493, but can be configured +// to operate according to the other RFC specifications. +// RFC 7493 is a stricter subset of RFC 8259 and fully compliant with it. +// In particular, it makes specific choices about behavior that RFC 8259 +// leaves as undefined in order to ensure greater interoperability. +package jsontext + +// requireKeyedLiterals can be embedded in a struct to require keyed literals. +type requireKeyedLiterals struct{} + +// nonComparable can be embedded in a struct to prevent comparability. +type nonComparable [0]func() diff --git a/encode.go b/jsontext/encode.go similarity index 54% rename from encode.go rename to jsontext/encode.go index e732ba7..ab4acd0 100644 --- a/encode.go +++ b/jsontext/encode.go @@ -2,20 +2,18 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "bytes" "io" "math" "math/bits" - "slices" "strconv" - "unicode/utf16" - "unicode/utf8" "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonopts" + "github.com/go-json-experiment/json/internal/jsonwire" ) // Encoder is a streaming encoder from raw JSON tokens and values. @@ -47,15 +45,19 @@ import ( // may not represent the most sensible method to call for any given token/value. // For example, it is probably more common to call WriteToken with a string // for object names. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Encoder] instead. type Encoder struct { + s encoderState +} + +// encoderState is the low-level state of Encoder. +// It has exported fields and method for use by the "json" package. +type encoderState struct { state encodeBuffer - options jsonopts.Struct + jsonopts.Struct - escapeRunes *escapeRunes - seenPointers seenPointers // only used when marshaling + EscapeRunes *jsonwire.EscapeRunes + SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers } // encodeBuffer is a buffer split into 2 segments: @@ -63,7 +65,7 @@ type Encoder struct { // - buf[0:len(buf)] // written (but unflushed) portion of the buffer // - buf[len(buf):cap(buf)] // unused portion of the buffer type encodeBuffer struct { - buf []byte // may alias wr if it is a bytes.Buffer + Buf []byte // may alias wr if it is a bytes.Buffer // baseOffset is added to len(buf) to obtain the absolute offset // relative to the start of io.Writer stream. @@ -87,8 +89,6 @@ type encodeBuffer struct { // // If w is a bytes.Buffer, then the encoder appends directly into the buffer // without copying the contents from an intermediate buffer. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.NewEncoder] instead. func NewEncoder(w io.Writer, opts ...Options) *Encoder { e := new(Encoder) e.Reset(w, opts...) @@ -104,55 +104,55 @@ func (e *Encoder) Reset(w io.Writer, opts ...Options) { panic("jsontext: invalid nil Encoder") case w == nil: panic("jsontext: invalid nil io.Writer") - case e.options.Flags.Get(jsonflags.WithinArshalCall): + case e.s.Flags.Get(jsonflags.WithinArshalCall): panic("jsontext: cannot reset Encoder passed to json.MarshalerV2") } - e.reset(nil, w, opts...) + e.s.reset(nil, w, opts...) } -func (e *Encoder) reset(b []byte, w io.Writer, opts ...Options) { +func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) { e.state.reset() - e.encodeBuffer = encodeBuffer{buf: b, wr: w, bufStats: e.bufStats} + e.encodeBuffer = encodeBuffer{Buf: b, wr: w, bufStats: e.bufStats} if bb, ok := w.(*bytes.Buffer); ok && bb != nil { - e.buf = bb.Bytes()[bb.Len():] // alias the unused buffer of bb - } - e.options = jsonopts.Struct{} - e.options.Join(opts...) - e.escapeRunes = makeEscapeRunes( - e.options.Flags.Get(jsonflags.EscapeForHTML), - e.options.Flags.Get(jsonflags.EscapeForJS), - e.options.EscapeFunc, + e.Buf = bb.Bytes()[bb.Len():] // alias the unused buffer of bb + } + e.Struct = jsonopts.Struct{} + e.Struct.Join(opts...) + e.EscapeRunes = jsonwire.MakeEscapeRunes( + e.Flags.Get(jsonflags.EscapeForHTML), + e.Flags.Get(jsonflags.EscapeForJS), + e.EscapeFunc, ) - if e.options.Flags.Get(jsonflags.Expand) && !e.options.Flags.Has(jsonflags.Indent) { - e.options.Indent = "\t" + if e.Flags.Get(jsonflags.Expand) && !e.Flags.Has(jsonflags.Indent) { + e.Indent = "\t" } } -// needFlush determines whether to flush at this point. -func (e *Encoder) needFlush() bool { +// NeedFlush determines whether to flush at this point. +func (e *encoderState) NeedFlush() bool { // NOTE: This function is carefully written to be inlinable. // Avoid flushing if e.wr is nil since there is no underlying writer. // Flush if less than 25% of the capacity remains. // Flushing at some constant fraction ensures that the buffer stops growing // so long as the largest Token or Value fits within that unused capacity. - return e.wr != nil && (e.tokens.depth() == 1 || len(e.buf) > 3*cap(e.buf)/4) + return e.wr != nil && (e.Tokens.Depth() == 1 || len(e.Buf) > 3*cap(e.Buf)/4) } -// flush flushes the buffer to the underlying io.Writer. +// Flush flushes the buffer to the underlying io.Writer. // It may append a trailing newline after the top-level value. -func (e *Encoder) flush() error { +func (e *encoderState) Flush() error { if e.wr == nil || e.avoidFlush() { return nil } // In streaming mode, always emit a newline after the top-level value. - if e.tokens.depth() == 1 && !e.options.Flags.Get(jsonflags.OmitTopLevelNewline) { - e.buf = append(e.buf, '\n') + if e.Tokens.Depth() == 1 && !e.Flags.Get(jsonflags.OmitTopLevelNewline) { + e.Buf = append(e.Buf, '\n') } // Inform objectNameStack that we are about to flush the buffer content. - e.names.copyQuotedBuffer(e.buf) + e.Names.copyQuotedBuffer(e.Buf) // Specialize bytes.Buffer for better performance. if bb, ok := e.wr.(*bytes.Buffer); ok { @@ -160,7 +160,7 @@ func (e *Encoder) flush() error { // then the Write call simply increments the internal offset, // otherwise Write operates as expected. // See https://go.dev/issue/42986. - n, _ := bb.Write(e.buf) // never fails unless bb is nil + n, _ := bb.Write(e.Buf) // never fails unless bb is nil e.baseOffset += int64(n) // If the internal buffer of bytes.Buffer is too small, @@ -172,23 +172,23 @@ func (e *Encoder) flush() error { bb.Grow(avail + 1) } - e.buf = bb.AvailableBuffer() + e.Buf = bb.AvailableBuffer() return nil } // Flush the internal buffer to the underlying io.Writer. - n, err := e.wr.Write(e.buf) + n, err := e.wr.Write(e.Buf) e.baseOffset += int64(n) if err != nil { // In the event of an error, preserve the unflushed portion. // Thus, write errors aren't fatal so long as the io.Writer // maintains consistent state after errors. if n > 0 { - e.buf = e.buf[:copy(e.buf, e.buf[n:])] + e.Buf = e.Buf[:copy(e.Buf, e.Buf[n:])] } return &ioError{action: "write", err: err} } - e.buf = e.buf[:0] + e.Buf = e.Buf[:0] // Check whether to grow the buffer. // Note that cap(e.buf) may already exceed maxBufferSize since @@ -197,12 +197,12 @@ func (e *Encoder) flush() error { const growthSizeFactor = 2 // higher value is faster const growthRateFactor = 2 // higher value is slower // By default, grow if below the maximum buffer size. - grow := cap(e.buf) <= maxBufferSize/growthSizeFactor + grow := cap(e.Buf) <= maxBufferSize/growthSizeFactor // Growing can be expensive, so only grow // if a sufficient number of bytes have been processed. - grow = grow && int64(cap(e.buf)) < e.previousOffsetEnd()/growthRateFactor + grow = grow && int64(cap(e.Buf)) < e.previousOffsetEnd()/growthRateFactor if grow { - e.buf = make([]byte, 0, cap(e.buf)*growthSizeFactor) + e.Buf = make([]byte, 0, cap(e.Buf)*growthSizeFactor) } return nil @@ -218,24 +218,24 @@ func (e *encodeBuffer) injectSyntacticErrorWithPosition(err error, pos int) erro return err } -func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + len64(e.buf) } -func (e *encodeBuffer) unflushedBuffer() []byte { return e.buf } +func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.Buf)) } +func (e *encodeBuffer) unflushedBuffer() []byte { return e.Buf } // avoidFlush indicates whether to avoid flushing to ensure there is always // enough in the buffer to unwrite the last object member if it were empty. -func (e *Encoder) avoidFlush() bool { +func (e *encoderState) avoidFlush() bool { switch { - case e.tokens.last.length() == 0: + case e.Tokens.Last.Length() == 0: // Never flush after ObjectStart or ArrayStart since we don't know yet // if the object or array will end up being empty. return true - case e.tokens.last.needObjectValue(): + case e.Tokens.Last.needObjectValue(): // Never flush before the object value since we don't know yet // if the object value will end up being empty. return true - case e.tokens.last.needObjectName() && len(e.buf) >= 2: + case e.Tokens.Last.NeedObjectName() && len(e.Buf) >= 2: // Never flush after the object value if it does turn out to be empty. - switch string(e.buf[len(e.buf)-2:]) { + switch string(e.Buf[len(e.Buf)-2:]) { case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value return true } @@ -243,10 +243,10 @@ func (e *Encoder) avoidFlush() bool { return false } -// unwriteEmptyObjectMember unwrites the last object member if it is empty +// UnwriteEmptyObjectMember unwrites the last object member if it is empty // and reports whether it performed an unwrite operation. -func (e *Encoder) unwriteEmptyObjectMember(prevName *string) bool { - if last := e.tokens.last; !last.isObject() || !last.needObjectName() || last.length() == 0 { +func (e *encoderState) UnwriteEmptyObjectMember(prevName *string) bool { + if last := e.Tokens.Last; !last.isObject() || !last.NeedObjectName() || last.Length() == 0 { panic("BUG: must be called on an object after writing a value") } @@ -279,89 +279,53 @@ func (e *Encoder) unwriteEmptyObjectMember(prevName *string) bool { // Unwrite the value, whitespace, colon, name, whitespace, and comma. b = b[:len(b)-n] - b = trimSuffixWhitespace(b) - b = trimSuffixByte(b, ':') - b = trimSuffixString(b) - b = trimSuffixWhitespace(b) - b = trimSuffixByte(b, ',') - e.buf = b // store back truncated unflushed buffer + b = jsonwire.TrimSuffixWhitespace(b) + b = jsonwire.TrimSuffixByte(b, ':') + b = jsonwire.TrimSuffixString(b) + b = jsonwire.TrimSuffixWhitespace(b) + b = jsonwire.TrimSuffixByte(b, ',') + e.Buf = b // store back truncated unflushed buffer // Undo state changes. - e.tokens.last.decrement() // for object member value - e.tokens.last.decrement() // for object member name - if !e.options.Flags.Get(jsonflags.AllowDuplicateNames) { - if e.tokens.last.isActiveNamespace() { - e.namespaces.last().removeLast() + e.Tokens.Last.decrement() // for object member value + e.Tokens.Last.decrement() // for object member name + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + if e.Tokens.Last.isActiveNamespace() { + e.Namespaces.Last().removeLast() } - e.names.clearLast() + e.Names.clearLast() if prevName != nil { - e.names.copyQuotedBuffer(e.buf) // required by objectNameStack.replaceLastUnquotedName - e.names.replaceLastUnquotedName(*prevName) + e.Names.copyQuotedBuffer(e.Buf) // required by objectNameStack.replaceLastUnquotedName + e.Names.replaceLastUnquotedName(*prevName) } } return true } -// unwriteOnlyObjectMemberName unwrites the only object member name +// UnwriteOnlyObjectMemberName unwrites the only object member name // and returns the unquoted name. -func (e *Encoder) unwriteOnlyObjectMemberName() string { - if last := e.tokens.last; !last.isObject() || last.length() != 1 { +func (e *encoderState) UnwriteOnlyObjectMemberName() string { + if last := e.Tokens.Last; !last.isObject() || last.Length() != 1 { panic("BUG: must be called on an object after writing first name") } // Unwrite the name and whitespace. - b := trimSuffixString(e.buf) - isVerbatim := bytes.IndexByte(e.buf[len(b):], '\\') < 0 - name := string(unescapeStringMayCopy(e.buf[len(b):], isVerbatim)) - e.buf = trimSuffixWhitespace(b) + b := jsonwire.TrimSuffixString(e.Buf) + isVerbatim := bytes.IndexByte(e.Buf[len(b):], '\\') < 0 + name := string(jsonwire.UnquoteMayCopy(e.Buf[len(b):], isVerbatim)) + e.Buf = jsonwire.TrimSuffixWhitespace(b) // Undo state changes. - e.tokens.last.decrement() - if !e.options.Flags.Get(jsonflags.AllowDuplicateNames) { - if e.tokens.last.isActiveNamespace() { - e.namespaces.last().removeLast() + e.Tokens.Last.decrement() + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + if e.Tokens.Last.isActiveNamespace() { + e.Namespaces.Last().removeLast() } - e.names.clearLast() + e.Names.clearLast() } return name } -func trimSuffixWhitespace(b []byte) []byte { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - n := len(b) - 1 - for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') { - n-- - } - return b[:n+1] -} - -func trimSuffixString(b []byte) []byte { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - if len(b) > 0 && b[len(b)-1] == '"' { - b = b[:len(b)-1] - } - for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') { - b = b[:len(b)-1] // trim all characters except an unescaped quote - } - if len(b) > 0 && b[len(b)-1] == '"' { - b = b[:len(b)-1] - } - return b -} - -func hasSuffixByte(b []byte, c byte) bool { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - return len(b) > 0 && b[len(b)-1] == c -} - -func trimSuffixByte(b []byte, c byte) []byte { - // NOTE: The arguments and logic are kept simple to keep this inlinable. - if len(b) > 0 && b[len(b)-1] == c { - return b[:len(b)-1] - } - return b -} - // WriteToken writes the next token and advances the internal write offset. // // The provided token kind must be consistent with the JSON grammar. @@ -372,12 +336,15 @@ func trimSuffixByte(b []byte, c byte) []byte { // the internal state remains unchanged. // The offset reported in SyntacticError will be relative to the OutputOffset. func (e *Encoder) WriteToken(t Token) error { + return e.s.WriteToken(t) +} +func (e *encoderState) WriteToken(t Token) error { k := t.Kind() - b := e.buf // use local variable to avoid mutating e in case of error + b := e.Buf // use local variable to avoid mutating e in case of error // Append any delimiters or optional whitespace. - b = e.tokens.mayAppendDelim(b, k) - if e.options.Flags.Get(jsonflags.Expand) { + b = e.Tokens.MayAppendDelim(b, k) + if e.Flags.Get(jsonflags.Expand) { b = e.appendWhitespace(b, k) } pos := len(b) // offset before the token @@ -387,59 +354,59 @@ func (e *Encoder) WriteToken(t Token) error { switch k { case 'n': b = append(b, "null"...) - err = e.tokens.appendLiteral() + err = e.Tokens.appendLiteral() case 'f': b = append(b, "false"...) - err = e.tokens.appendLiteral() + err = e.Tokens.appendLiteral() case 't': b = append(b, "true"...) - err = e.tokens.appendLiteral() + err = e.Tokens.appendLiteral() case '"': n0 := len(b) // offset before calling t.appendString - if b, err = t.appendString(b, !e.options.Flags.Get(jsonflags.AllowInvalidUTF8), e.options.Flags.Get(jsonflags.PreserveRawStrings), e.escapeRunes); err != nil { + if b, err = t.appendString(b, !e.Flags.Get(jsonflags.AllowInvalidUTF8), e.Flags.Get(jsonflags.PreserveRawStrings), e.EscapeRunes); err != nil { break } - if !e.options.Flags.Get(jsonflags.AllowDuplicateNames) && e.tokens.last.needObjectName() { - if !e.tokens.last.isValidNamespace() { + if !e.Flags.Get(jsonflags.AllowDuplicateNames) && e.Tokens.Last.NeedObjectName() { + if !e.Tokens.Last.isValidNamespace() { err = errInvalidNamespace break } - if e.tokens.last.isActiveNamespace() && !e.namespaces.last().insertQuoted(b[n0:], false) { + if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[n0:], false) { err = newDuplicateNameError(b[n0:]) break } - e.names.replaceLastQuotedOffset(n0) // only replace if insertQuoted succeeds + e.Names.ReplaceLastQuotedOffset(n0) // only replace if insertQuoted succeeds } - err = e.tokens.appendString() + err = e.Tokens.appendString() case '0': - if b, err = t.appendNumber(b, e.options.Flags.Get(jsonflags.CanonicalizeNumbers)); err != nil { + if b, err = t.appendNumber(b, e.Flags.Get(jsonflags.CanonicalizeNumbers)); err != nil { break } - err = e.tokens.appendNumber() + err = e.Tokens.appendNumber() case '{': b = append(b, '{') - if err = e.tokens.pushObject(); err != nil { + if err = e.Tokens.pushObject(); err != nil { break } - if !e.options.Flags.Get(jsonflags.AllowDuplicateNames) { - e.names.push() - e.namespaces.push() + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + e.Names.push() + e.Namespaces.push() } case '}': b = append(b, '}') - if err = e.tokens.popObject(); err != nil { + if err = e.Tokens.popObject(); err != nil { break } - if !e.options.Flags.Get(jsonflags.AllowDuplicateNames) { - e.names.pop() - e.namespaces.pop() + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + e.Names.pop() + e.Namespaces.pop() } case '[': b = append(b, '[') - err = e.tokens.pushArray() + err = e.Tokens.pushArray() case ']': b = append(b, ']') - err = e.tokens.popArray() + err = e.Tokens.popArray() default: err = &SyntacticError{str: "invalid json.Token"} } @@ -448,28 +415,28 @@ func (e *Encoder) WriteToken(t Token) error { } // Finish off the buffer and store it back into e. - e.buf = b - if e.needFlush() { - return e.flush() + e.Buf = b + if e.NeedFlush() { + return e.Flush() } return nil } const ( - rawIntNumber = -1 - rawUintNumber = -2 + rawIntNumber = 'i' + rawUintNumber = 'u' ) -// writeNumber is specialized version of WriteToken, but optimized for numbers. +// WriteNumber is specialized version of WriteToken, but optimized for numbers. // As a special-case, if bits is -1 or -2, it will treat v as // the raw-encoded bits of an int64 or uint64, respectively. // It is only called from arshal_default.go. -func (e *Encoder) writeNumber(v float64, bits int, quote bool) error { - b := e.buf // use local variable to avoid mutating e in case of error +func (e *encoderState) WriteNumber(v float64, bits int, quote bool) error { + b := e.Buf // use local variable to avoid mutating e in case of error // Append any delimiters or optional whitespace. - b = e.tokens.mayAppendDelim(b, '0') - if e.options.Flags.Get(jsonflags.Expand) { + b = e.Tokens.MayAppendDelim(b, '0') + if e.Flags.Get(jsonflags.Expand) { b = e.appendWhitespace(b, '0') } pos := len(b) // offset before the token @@ -484,29 +451,29 @@ func (e *Encoder) writeNumber(v float64, bits int, quote bool) error { case rawUintNumber: b = strconv.AppendUint(b, uint64(math.Float64bits(v)), 10) default: - b = appendNumber(b, v, bits) + b = jsonwire.AppendFloat(b, v, bits) } b = append(b, '"') // Escape the string if necessary. - if e.escapeRunes.escapeFunc != nil { + if e.EscapeRunes.HasEscapeFunc() { b2 := append(e.unusedCache, b[n0+len(`"`):len(b)-len(`"`)]...) - b, _ = appendString(b[:n0], string(b2), false, e.escapeRunes) + b, _ = jsonwire.AppendQuote(b[:n0], string(b2), false, e.EscapeRunes) e.unusedCache = b2[:0] } // Update the state machine. - if !e.options.Flags.Get(jsonflags.AllowDuplicateNames) && e.tokens.last.needObjectName() { - if !e.tokens.last.isValidNamespace() { + if !e.Flags.Get(jsonflags.AllowDuplicateNames) && e.Tokens.Last.NeedObjectName() { + if !e.Tokens.Last.isValidNamespace() { return errInvalidNamespace } - if e.tokens.last.isActiveNamespace() && !e.namespaces.last().insertQuoted(b[n0:], false) { + if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[n0:], false) { err := newDuplicateNameError(b[n0:]) return e.injectSyntacticErrorWithPosition(err, pos) } - e.names.replaceLastQuotedOffset(n0) // only replace if insertQuoted succeeds + e.Names.ReplaceLastQuotedOffset(n0) // only replace if insertQuoted succeeds } - if err := e.tokens.appendString(); err != nil { + if err := e.Tokens.appendString(); err != nil { return e.injectSyntacticErrorWithPosition(err, pos) } } else { @@ -516,17 +483,17 @@ func (e *Encoder) writeNumber(v float64, bits int, quote bool) error { case rawUintNumber: b = strconv.AppendUint(b, uint64(math.Float64bits(v)), 10) default: - b = appendNumber(b, v, bits) + b = jsonwire.AppendFloat(b, v, bits) } - if err := e.tokens.appendNumber(); err != nil { + if err := e.Tokens.appendNumber(); err != nil { return e.injectSyntacticErrorWithPosition(err, pos) } } // Finish off the buffer and store it back into e. - e.buf = b - if e.needFlush() { - return e.flush() + e.Buf = b + if e.NeedFlush() { + return e.Flush() } return nil } @@ -542,27 +509,30 @@ func (e *Encoder) writeNumber(v float64, bits int, quote bool) error { // The offset reported in SyntacticError will be relative to the OutputOffset // plus the offset into v of any encountered syntax error. func (e *Encoder) WriteValue(v RawValue) error { + return e.s.WriteValue(v) +} +func (e *encoderState) WriteValue(v RawValue) error { e.maxValue |= len(v) // bitwise OR is a fast approximation of max k := v.Kind() - b := e.buf // use local variable to avoid mutating e in case of error + b := e.Buf // use local variable to avoid mutating e in case of error // Append any delimiters or optional whitespace. - b = e.tokens.mayAppendDelim(b, k) - if e.options.Flags.Get(jsonflags.Expand) { + b = e.Tokens.MayAppendDelim(b, k) + if e.Flags.Get(jsonflags.Expand) { b = e.appendWhitespace(b, k) } pos := len(b) // offset before the value // Append the value the output. var n int - n += consumeWhitespace(v[n:]) - b, m, err := e.reformatValue(b, v[n:], e.tokens.depth()) + n += jsonwire.ConsumeWhitespace(v[n:]) + b, m, err := e.reformatValue(b, v[n:], e.Tokens.Depth()) if err != nil { return e.injectSyntacticErrorWithPosition(err, pos+n+m) } n += m - n += consumeWhitespace(v[n:]) + n += jsonwire.ConsumeWhitespace(v[n:]) if len(v) > n { err = newInvalidCharacterError(v[n:], "after top-level value") return e.injectSyntacticErrorWithPosition(err, pos+n) @@ -571,34 +541,34 @@ func (e *Encoder) WriteValue(v RawValue) error { // Append the kind to the state machine. switch k { case 'n', 'f', 't': - err = e.tokens.appendLiteral() + err = e.Tokens.appendLiteral() case '"': - if !e.options.Flags.Get(jsonflags.AllowDuplicateNames) && e.tokens.last.needObjectName() { - if !e.tokens.last.isValidNamespace() { + if !e.Flags.Get(jsonflags.AllowDuplicateNames) && e.Tokens.Last.NeedObjectName() { + if !e.Tokens.Last.isValidNamespace() { err = errInvalidNamespace break } - if e.tokens.last.isActiveNamespace() && !e.namespaces.last().insertQuoted(b[pos:], false) { + if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) { err = newDuplicateNameError(b[pos:]) break } - e.names.replaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds + e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds } - err = e.tokens.appendString() + err = e.Tokens.appendString() case '0': - err = e.tokens.appendNumber() + err = e.Tokens.appendNumber() case '{': - if err = e.tokens.pushObject(); err != nil { + if err = e.Tokens.pushObject(); err != nil { break } - if err = e.tokens.popObject(); err != nil { + if err = e.Tokens.popObject(); err != nil { panic("BUG: popObject should never fail immediately after pushObject: " + err.Error()) } case '[': - if err = e.tokens.pushArray(); err != nil { + if err = e.Tokens.pushArray(); err != nil { break } - if err = e.tokens.popArray(); err != nil { + if err = e.Tokens.popArray(); err != nil { panic("BUG: popArray should never fail immediately after pushArray: " + err.Error()) } } @@ -607,32 +577,32 @@ func (e *Encoder) WriteValue(v RawValue) error { } // Finish off the buffer and store it back into e. - e.buf = b - if e.needFlush() { - return e.flush() + e.Buf = b + if e.NeedFlush() { + return e.Flush() } return nil } // appendWhitespace appends whitespace that immediately precedes the next token. -func (e *Encoder) appendWhitespace(b []byte, next Kind) []byte { - if e.tokens.needDelim(next) == ':' { +func (e *encoderState) appendWhitespace(b []byte, next Kind) []byte { + if e.Tokens.needDelim(next) == ':' { return append(b, ' ') } else { - return e.appendIndent(b, e.tokens.needIndent(next)) + return e.AppendIndent(b, e.Tokens.NeedIndent(next)) } } -// appendIndent appends the appropriate number of indentation characters +// AppendIndent appends the appropriate number of indentation characters // for the current nested level, n. -func (e *Encoder) appendIndent(b []byte, n int) []byte { +func (e *encoderState) AppendIndent(b []byte, n int) []byte { if n == 0 { return b } b = append(b, '\n') - b = append(b, e.options.IndentPrefix...) + b = append(b, e.IndentPrefix...) for ; n > 1; n-- { - b = append(b, e.options.Indent...) + b = append(b, e.Indent...) } return b } @@ -640,42 +610,42 @@ func (e *Encoder) appendIndent(b []byte, n int) []byte { // reformatValue parses a JSON value from the start of src and // appends it to the end of dst, reformatting whitespace and strings as needed. // It returns the extended dst buffer and the number of consumed input bytes. -func (e *Encoder) reformatValue(dst []byte, src RawValue, depth int) ([]byte, int, error) { - // TODO: Should this update valueFlags as input? +func (e *encoderState) reformatValue(dst []byte, src RawValue, depth int) ([]byte, int, error) { + // TODO: Should this update ValueFlags as input? if len(src) == 0 { return dst, 0, io.ErrUnexpectedEOF } switch k := Kind(src[0]).normalize(); k { case 'n': - if consumeNull(src) == 0 { - n, err := consumeLiteral(src, "null") + if jsonwire.ConsumeNull(src) == 0 { + n, err := jsonwire.ConsumeLiteral(src, "null") return dst, n, err } return append(dst, "null"...), len("null"), nil case 'f': - if consumeFalse(src) == 0 { - n, err := consumeLiteral(src, "false") + if jsonwire.ConsumeFalse(src) == 0 { + n, err := jsonwire.ConsumeLiteral(src, "false") return dst, n, err } return append(dst, "false"...), len("false"), nil case 't': - if consumeTrue(src) == 0 { - n, err := consumeLiteral(src, "true") + if jsonwire.ConsumeTrue(src) == 0 { + n, err := jsonwire.ConsumeLiteral(src, "true") return dst, n, err } return append(dst, "true"...), len("true"), nil case '"': - if n := consumeSimpleString(src); n > 0 && e.escapeRunes.escapeFunc == nil { + if n := jsonwire.ConsumeSimpleString(src); n > 0 && !e.EscapeRunes.HasEscapeFunc() { dst, src = append(dst, src[:n]...), src[n:] // copy simple strings verbatim return dst, n, nil } - return reformatString(dst, src, !e.options.Flags.Get(jsonflags.AllowInvalidUTF8), e.options.Flags.Get(jsonflags.PreserveRawStrings), e.escapeRunes) + return jsonwire.ReformatString(dst, src, !e.Flags.Get(jsonflags.AllowInvalidUTF8), e.Flags.Get(jsonflags.PreserveRawStrings), e.EscapeRunes) case '0': - if n := consumeSimpleNumber(src); n > 0 && !e.options.Flags.Get(jsonflags.CanonicalizeNumbers) { + if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) { dst, src = append(dst, src[:n]...), src[n:] // copy simple numbers verbatim return dst, n, nil } - return reformatNumber(dst, src, e.options.Flags.Get(jsonflags.CanonicalizeNumbers)) + return jsonwire.ReformatNumber(dst, src, e.Flags.Get(jsonflags.CanonicalizeNumbers)) case '{': return e.reformatObject(dst, src, depth) case '[': @@ -688,7 +658,7 @@ func (e *Encoder) reformatValue(dst []byte, src RawValue, depth int) ([]byte, in // reformatObject parses a JSON object from the start of src and // appends it to the end of src, reformatting whitespace and strings as needed. // It returns the extended dst buffer and the number of consumed input bytes. -func (e *Encoder) reformatObject(dst []byte, src RawValue, depth int) ([]byte, int, error) { +func (e *encoderState) reformatObject(dst []byte, src RawValue, depth int) ([]byte, int, error) { // Append object start. if len(src) == 0 || src[0] != '{' { panic("BUG: reformatObject must be called with a buffer that starts with '{'") @@ -699,7 +669,7 @@ func (e *Encoder) reformatObject(dst []byte, src RawValue, depth int) ([]byte, i n := len("{") // Append (possible) object end. - n += consumeWhitespace(src[n:]) + n += jsonwire.ConsumeWhitespace(src[n:]) if uint(len(src)) <= uint(n) { return dst, n, io.ErrUnexpectedEOF } @@ -711,40 +681,40 @@ func (e *Encoder) reformatObject(dst []byte, src RawValue, depth int) ([]byte, i var err error var names *objectNamespace - if !e.options.Flags.Get(jsonflags.AllowDuplicateNames) { - e.namespaces.push() - defer e.namespaces.pop() - names = e.namespaces.last() + if !e.Flags.Get(jsonflags.AllowDuplicateNames) { + e.Namespaces.push() + defer e.Namespaces.pop() + names = e.Namespaces.Last() } depth++ for { // Append optional newline and indentation. - if e.options.Flags.Get(jsonflags.Expand) { - dst = e.appendIndent(dst, depth) + if e.Flags.Get(jsonflags.Expand) { + dst = e.AppendIndent(dst, depth) } // Append object name. - n += consumeWhitespace(src[n:]) + n += jsonwire.ConsumeWhitespace(src[n:]) if uint(len(src)) <= uint(n) { return dst, n, io.ErrUnexpectedEOF } - m := consumeSimpleString(src[n:]) - if m > 0 && e.escapeRunes.escapeFunc == nil { + m := jsonwire.ConsumeSimpleString(src[n:]) + if m > 0 && !e.EscapeRunes.HasEscapeFunc() { dst = append(dst, src[n:n+m]...) } else { - dst, m, err = reformatString(dst, src[n:], !e.options.Flags.Get(jsonflags.AllowInvalidUTF8), e.options.Flags.Get(jsonflags.PreserveRawStrings), e.escapeRunes) + dst, m, err = jsonwire.ReformatString(dst, src[n:], !e.Flags.Get(jsonflags.AllowInvalidUTF8), e.Flags.Get(jsonflags.PreserveRawStrings), e.EscapeRunes) if err != nil { return dst, n + m, err } } // TODO: Specify whether the name is verbatim or not. - if !e.options.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(src[n:n+m], false) { + if !e.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(src[n:n+m], false) { return dst, n, newDuplicateNameError(src[n : n+m]) } n += m // Append colon. - n += consumeWhitespace(src[n:]) + n += jsonwire.ConsumeWhitespace(src[n:]) if uint(len(src)) <= uint(n) { return dst, n, io.ErrUnexpectedEOF } @@ -753,12 +723,12 @@ func (e *Encoder) reformatObject(dst []byte, src RawValue, depth int) ([]byte, i } dst = append(dst, ':') n += len(":") - if e.options.Flags.Get(jsonflags.Expand) { + if e.Flags.Get(jsonflags.Expand) { dst = append(dst, ' ') } // Append object value. - n += consumeWhitespace(src[n:]) + n += jsonwire.ConsumeWhitespace(src[n:]) if uint(len(src)) <= uint(n) { return dst, n, io.ErrUnexpectedEOF } @@ -769,7 +739,7 @@ func (e *Encoder) reformatObject(dst []byte, src RawValue, depth int) ([]byte, i n += m // Append comma or object end. - n += consumeWhitespace(src[n:]) + n += jsonwire.ConsumeWhitespace(src[n:]) if uint(len(src)) <= uint(n) { return dst, n, io.ErrUnexpectedEOF } @@ -779,8 +749,8 @@ func (e *Encoder) reformatObject(dst []byte, src RawValue, depth int) ([]byte, i n += len(",") continue case '}': - if e.options.Flags.Get(jsonflags.Expand) { - dst = e.appendIndent(dst, depth-1) + if e.Flags.Get(jsonflags.Expand) { + dst = e.AppendIndent(dst, depth-1) } dst = append(dst, '}') n += len("}") @@ -794,7 +764,7 @@ func (e *Encoder) reformatObject(dst []byte, src RawValue, depth int) ([]byte, i // reformatArray parses a JSON array from the start of src and // appends it to the end of dst, reformatting whitespace and strings as needed. // It returns the extended dst buffer and the number of consumed input bytes. -func (e *Encoder) reformatArray(dst []byte, src RawValue, depth int) ([]byte, int, error) { +func (e *encoderState) reformatArray(dst []byte, src RawValue, depth int) ([]byte, int, error) { // Append array start. if len(src) == 0 || src[0] != '[' { panic("BUG: reformatArray must be called with a buffer that starts with '['") @@ -805,7 +775,7 @@ func (e *Encoder) reformatArray(dst []byte, src RawValue, depth int) ([]byte, in n := len("[") // Append (possible) array end. - n += consumeWhitespace(src[n:]) + n += jsonwire.ConsumeWhitespace(src[n:]) if uint(len(src)) <= uint(n) { return dst, n, io.ErrUnexpectedEOF } @@ -819,12 +789,12 @@ func (e *Encoder) reformatArray(dst []byte, src RawValue, depth int) ([]byte, in depth++ for { // Append optional newline and indentation. - if e.options.Flags.Get(jsonflags.Expand) { - dst = e.appendIndent(dst, depth) + if e.Flags.Get(jsonflags.Expand) { + dst = e.AppendIndent(dst, depth) } // Append array value. - n += consumeWhitespace(src[n:]) + n += jsonwire.ConsumeWhitespace(src[n:]) if uint(len(src)) <= uint(n) { return dst, n, io.ErrUnexpectedEOF } @@ -836,7 +806,7 @@ func (e *Encoder) reformatArray(dst []byte, src RawValue, depth int) ([]byte, in n += m // Append comma or array end. - n += consumeWhitespace(src[n:]) + n += jsonwire.ConsumeWhitespace(src[n:]) if uint(len(src)) <= uint(n) { return dst, n, io.ErrUnexpectedEOF } @@ -846,8 +816,8 @@ func (e *Encoder) reformatArray(dst []byte, src RawValue, depth int) ([]byte, in n += len(",") continue case ']': - if e.options.Flags.Get(jsonflags.Expand) { - dst = e.appendIndent(dst, depth-1) + if e.Flags.Get(jsonflags.Expand) { + dst = e.AppendIndent(dst, depth-1) } dst = append(dst, ']') n += len("]") @@ -863,7 +833,7 @@ func (e *Encoder) reformatArray(dst []byte, src RawValue, depth int) ([]byte, in // The number of bytes actually written to the underlying io.Writer may be less // than this offset due to internal buffering effects. func (e *Encoder) OutputOffset() int64 { - return e.previousOffsetEnd() + return e.s.previousOffsetEnd() } // UnusedBuffer returns a zero-length buffer with a possible non-zero capacity. @@ -886,11 +856,11 @@ func (e *Encoder) UnusedBuffer() []byte { // without using unsafe.Pointer. Thus, we just return a different buffer. // Should this ever alias e.buf, we need to consider how it operates with // the specialized performance optimization for bytes.Buffer. - n := 1 << bits.Len(uint(e.maxValue|63)) // fast approximation for max length - if cap(e.unusedCache) < n { - e.unusedCache = make([]byte, 0, n) + n := 1 << bits.Len(uint(e.s.maxValue|63)) // fast approximation for max length + if cap(e.s.unusedCache) < n { + e.s.unusedCache = make([]byte, 0, n) } - return e.unusedCache + return e.s.unusedCache } // StackDepth returns the depth of the state machine for written JSON data. @@ -900,7 +870,7 @@ func (e *Encoder) UnusedBuffer() []byte { // The depth is zero-indexed, where zero represents the top-level JSON value. func (e *Encoder) StackDepth() int { // NOTE: Keep in sync with Decoder.StackDepth. - return e.tokens.depth() - 1 + return e.s.Tokens.Depth() - 1 } // StackIndex returns information about the specified stack level. @@ -917,13 +887,13 @@ func (e *Encoder) StackDepth() int { // A complete JSON object must have an even length. func (e *Encoder) StackIndex(i int) (Kind, int) { // NOTE: Keep in sync with Decoder.StackIndex. - switch s := e.tokens.index(i); { + switch s := e.s.Tokens.index(i); { case i > 0 && s.isObject(): - return '{', s.length() + return '{', s.Length() case i > 0 && s.isArray(): - return '[', s.length() + return '[', s.Length() default: - return 0, s.length() + return 0, s.Length() } } @@ -931,221 +901,6 @@ func (e *Encoder) StackIndex(i int) (Kind, int) { // Object names are only present if AllowDuplicateNames is false, otherwise // object members are represented using their index within the object. func (e *Encoder) StackPointer() string { - e.names.copyQuotedBuffer(e.buf) - return string(e.appendStackPointer(nil)) -} - -// appendString appends src to dst as a JSON string per RFC 7159, section 7. -// -// If validateUTF8 is specified, this rejects input that contains invalid UTF-8 -// otherwise invalid bytes are replaced with the Unicode replacement character. -// If escapeRune is provided, it specifies which runes to escape using -// hexadecimal sequences. If nil, the shortest representable form is used, -// which is also the canonical form for strings (RFC 8785, section 3.2.2.2). -// -// Note that this API allows full control over the formatting of strings -// except for whether a forward solidus '/' may be formatted as '\/' and -// the casing of hexadecimal Unicode escape sequences. -func appendString[Bytes ~[]byte | ~string](dst []byte, src Bytes, validateUTF8 bool, escape *escapeRunes) ([]byte, error) { - var i, n int - var hasInvalidUTF8 bool - dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`)) - dst = append(dst, '"') - if escape == nil || escape.canonical { - // Optimize for canonical formatting. - for uint(len(src)) > uint(n) { - // Handle single-byte ASCII. - if c := src[n]; c < utf8.RuneSelf { - n++ - if escapeCanonical.escapeASCII(c) { - dst = append(dst, src[i:n-1]...) - dst = appendEscapedASCII(dst, c) - i = n - } - continue - } - - // Handle multi-byte Unicode. - _, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))) - n += rn - if rn == 1 { // must be utf8.RuneError since we already checked for single-byte ASCII - hasInvalidUTF8 = true - dst = append(dst, src[i:n-rn]...) - dst = append(dst, "\ufffd"...) - i = n - } - } - } else { - // Handle arbitrary escaping. - for uint(len(src)) > uint(n) { - // Handle single-byte ASCII. - if c := src[n]; c < utf8.RuneSelf { - n++ - if escape.escapeASCII(c) { - dst = append(dst, src[i:n-1]...) - if escape.escapeASCIIAsUTF16(c) { - dst = appendEscapedUTF16(dst, uint16(c)) - } else { - dst = appendEscapedASCII(dst, c) - } - i = n - } - continue - } - - // Handle multi-byte Unicode. - switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); { - case r == utf8.RuneError && rn == 1: - hasInvalidUTF8 = true - dst = append(dst, src[i:n]...) - if escape.escapeRune(r) { - dst = append(dst, `\ufffd`...) - } else { - dst = append(dst, "\ufffd"...) - } - n += rn - i = n - case escape.escapeRune(r): - dst = append(dst, src[i:n]...) - dst = appendEscapedUnicode(dst, r) - n += rn - i = n - default: - n += rn - } - } - } - dst = append(dst, src[i:n]...) - dst = append(dst, '"') - if validateUTF8 && hasInvalidUTF8 { - return dst, errInvalidUTF8 - } - return dst, nil -} - -func appendEscapedASCII(dst []byte, c byte) []byte { - switch c { - case '"', '\\': - dst = append(dst, '\\', c) - case '\b': - dst = append(dst, "\\b"...) - case '\f': - dst = append(dst, "\\f"...) - case '\n': - dst = append(dst, "\\n"...) - case '\r': - dst = append(dst, "\\r"...) - case '\t': - dst = append(dst, "\\t"...) - default: - dst = appendEscapedUTF16(dst, uint16(c)) - } - return dst -} - -func appendEscapedUnicode(dst []byte, r rune) []byte { - if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' { - dst = appendEscapedUTF16(dst, uint16(r1)) - dst = appendEscapedUTF16(dst, uint16(r2)) - } else { - dst = appendEscapedUTF16(dst, uint16(r)) - } - return dst -} - -func appendEscapedUTF16(dst []byte, x uint16) []byte { - const hex = "0123456789abcdef" - return append(dst, '\\', 'u', hex[(x>>12)&0xf], hex[(x>>8)&0xf], hex[(x>>4)&0xf], hex[(x>>0)&0xf]) -} - -// reformatString consumes a JSON string from src and appends it to dst, -// reformatting it if necessary for the given escapeRune parameter. -// It returns the appended output and the number of consumed input bytes. -func reformatString(dst, src []byte, validateUTF8, preserveRaw bool, escape *escapeRunes) ([]byte, int, error) { - // TODO: Should this update valueFlags as input? - var flags valueFlags - n, err := consumeString(&flags, src, validateUTF8) - if err != nil { - return dst, n, err - } - if preserveRaw || (escape.canonical && flags.isCanonical()) { - dst = append(dst, src[:n]...) // copy the string verbatim - return dst, n, nil - } - - // TODO: Implement a direct, raw-to-raw reformat for strings. - // If the escapeRune option would have resulted in no changes to the output, - // it would be faster to simply append src to dst without going through - // an intermediary representation in a separate buffer. - b, _ := unescapeString(nil, src[:n]) - dst, _ = appendString(dst, string(b), validateUTF8, escape) - return dst, n, nil -} - -// appendNumber appends src to dst as a JSON number per RFC 7159, section 6. -// It formats numbers similar to the ES6 number-to-string conversion. -// See https://go.dev/issue/14135. -// -// The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with -// RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0, -// which is formatted as -0 instead of just 0. -// -// For 32-bit floating-point numbers, -// the output is a 32-bit equivalent of the algorithm. -// Note that ECMA-262 specifies no algorithm for 32-bit numbers. -func appendNumber(dst []byte, src float64, bits int) []byte { - if bits == 32 { - src = float64(float32(src)) - } - - abs := math.Abs(src) - fmt := byte('f') - if abs != 0 { - if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) || - bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { - fmt = 'e' - } - } - dst = strconv.AppendFloat(dst, src, fmt, -1, bits) - if fmt == 'e' { - // Clean up e-09 to e-9. - n := len(dst) - if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' { - dst[n-2] = dst[n-1] - dst = dst[:n-1] - } - } - return dst -} - -// reformatNumber consumes a JSON string from src and appends it to dst, -// canonicalizing it if specified. -// It returns the appended output and the number of consumed input bytes. -func reformatNumber(dst, src []byte, canonicalize bool) ([]byte, int, error) { - n, err := consumeNumber(src) - if err != nil { - return dst, n, err - } - if !canonicalize { - dst = append(dst, src[:n]...) // copy the number verbatim - return dst, n, nil - } - - // Canonicalize the number per RFC 8785, section 3.2.2.3. - // As an optimization, we can copy integer numbers below 2⁵³ verbatim. - const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10)) - if n < maxExactIntegerDigits && consumeSimpleNumber(src[:n]) == n { - dst = append(dst, src[:n]...) // copy the number verbatim - return dst, n, nil - } - fv, _ := strconv.ParseFloat(string(src[:n]), 64) - switch { - case fv == 0: - fv = 0 // normalize negative zero as just zero - case math.IsInf(fv, +1): - fv = +math.MaxFloat64 - case math.IsInf(fv, -1): - fv = -math.MaxFloat64 - } - return appendNumber(dst, fv, 64), n, nil + e.s.Names.copyQuotedBuffer(e.s.Buf) + return string(e.s.appendStackPointer(nil)) } diff --git a/jsontext/encode_test.go b/jsontext/encode_test.go new file mode 100644 index 0000000..d707dad --- /dev/null +++ b/jsontext/encode_test.go @@ -0,0 +1,443 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsontext + +import ( + "bytes" + "errors" + "io" + "path" + "reflect" + "testing" + + "github.com/go-json-experiment/json/internal/jsonflags" + "github.com/go-json-experiment/json/internal/jsontest" + "github.com/go-json-experiment/json/internal/jsonwire" +) + +// TestEncoder tests whether we can produce JSON with either tokens or raw values. +func TestEncoder(t *testing.T) { + for _, td := range coderTestdata { + for _, formatName := range []string{"Compact", "Escaped", "Indented"} { + for _, typeName := range []string{"Token", "Value", "TokenDelims"} { + t.Run(path.Join(td.name.Name, typeName, formatName), func(t *testing.T) { + testEncoder(t, td.name.Where, formatName, typeName, td) + }) + } + } + } +} +func testEncoder(t *testing.T, where jsontest.CasePos, formatName, typeName string, td coderTestdataEntry) { + var want string + var opts []Options + dst := new(bytes.Buffer) + opts = append(opts, jsonflags.OmitTopLevelNewline|1) + want = td.outCompacted + switch formatName { + case "Escaped": + opts = append(opts, WithEscapeFunc(func(rune) bool { return true })) + if td.outEscaped != "" { + want = td.outEscaped + } + case "Indented": + opts = append(opts, Expand(true)) + opts = append(opts, WithIndentPrefix("\t")) + opts = append(opts, WithIndent(" ")) + if td.outIndented != "" { + want = td.outIndented + } + } + enc := NewEncoder(dst, opts...) + + switch typeName { + case "Token": + var pointers []string + for _, tok := range td.tokens { + if err := enc.WriteToken(tok); err != nil { + t.Fatalf("%s: Encoder.WriteToken error: %v", where, err) + } + if td.pointers != nil { + pointers = append(pointers, enc.StackPointer()) + } + } + if !reflect.DeepEqual(pointers, td.pointers) { + t.Fatalf("%s: pointers mismatch:\ngot %q\nwant %q", where, pointers, td.pointers) + } + case "Value": + if err := enc.WriteValue(RawValue(td.in)); err != nil { + t.Fatalf("%s: Encoder.WriteValue error: %v", where, err) + } + case "TokenDelims": + // Use WriteToken for object/array delimiters, WriteValue otherwise. + for _, tok := range td.tokens { + switch tok.Kind() { + case '{', '}', '[', ']': + if err := enc.WriteToken(tok); err != nil { + t.Fatalf("%s: Encoder.WriteToken error: %v", where, err) + } + default: + val := RawValue(tok.String()) + if tok.Kind() == '"' { + val, _ = jsonwire.AppendQuote(nil, tok.String(), false, nil) + } + if err := enc.WriteValue(val); err != nil { + t.Fatalf("%s: Encoder.WriteValue error: %v", where, err) + } + } + } + } + + got := dst.String() + if got != want { + t.Errorf("%s: output mismatch:\ngot %q\nwant %q", where, got, want) + } +} + +// TestFaultyEncoder tests that temporary I/O errors are not fatal. +func TestFaultyEncoder(t *testing.T) { + for _, td := range coderTestdata { + for _, typeName := range []string{"Token", "Value"} { + t.Run(path.Join(td.name.Name, typeName), func(t *testing.T) { + testFaultyEncoder(t, td.name.Where, typeName, td) + }) + } + } +} +func testFaultyEncoder(t *testing.T, where jsontest.CasePos, typeName string, td coderTestdataEntry) { + b := &FaultyBuffer{ + MaxBytes: 1, + MayError: io.ErrShortWrite, + } + + // Write all the tokens. + // Even if the underlying io.Writer may be faulty, + // writing a valid token or value is guaranteed to at least + // be appended to the internal buffer. + // In other words, syntactic errors occur before I/O errors. + enc := NewEncoder(b) + switch typeName { + case "Token": + for i, tok := range td.tokens { + err := enc.WriteToken(tok) + if err != nil && !errors.Is(err, io.ErrShortWrite) { + t.Fatalf("%s: %d: Encoder.WriteToken error: %v", where, i, err) + } + } + case "Value": + err := enc.WriteValue(RawValue(td.in)) + if err != nil && !errors.Is(err, io.ErrShortWrite) { + t.Fatalf("%s: Encoder.WriteValue error: %v", where, err) + } + } + gotOutput := string(append(b.B, enc.s.unflushedBuffer()...)) + wantOutput := td.outCompacted + "\n" + if gotOutput != wantOutput { + t.Fatalf("%s: output mismatch:\ngot %s\nwant %s", where, gotOutput, wantOutput) + } +} + +type encoderMethodCall struct { + in tokOrVal + wantErr error + wantPointer string +} + +var encoderErrorTestdata = []struct { + name jsontest.CaseName + opts []Options + calls []encoderMethodCall + wantOut string +}{{ + name: jsontest.Name("InvalidToken"), + calls: []encoderMethodCall{ + {zeroToken, &SyntacticError{str: "invalid json.Token"}, ""}, + }, +}, { + name: jsontest.Name("InvalidValue"), + calls: []encoderMethodCall{ + {RawValue(`#`), newInvalidCharacterError("#", "at start of value"), ""}, + }, +}, { + name: jsontest.Name("InvalidValue/DoubleZero"), + calls: []encoderMethodCall{ + {RawValue(`00`), newInvalidCharacterError("0", "after top-level value").withOffset(len64(`0`)), ""}, + }, +}, { + name: jsontest.Name("TruncatedValue"), + calls: []encoderMethodCall{ + {zeroValue, io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("TruncatedNull"), + calls: []encoderMethodCall{ + {RawValue(`nul`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("InvalidNull"), + calls: []encoderMethodCall{ + {RawValue(`nulL`), newInvalidCharacterError("L", "within literal null (expecting 'l')").withOffset(len64(`nul`)), ""}, + }, +}, { + name: jsontest.Name("TruncatedFalse"), + calls: []encoderMethodCall{ + {RawValue(`fals`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("InvalidFalse"), + calls: []encoderMethodCall{ + {RawValue(`falsE`), newInvalidCharacterError("E", "within literal false (expecting 'e')").withOffset(len64(`fals`)), ""}, + }, +}, { + name: jsontest.Name("TruncatedTrue"), + calls: []encoderMethodCall{ + {RawValue(`tru`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("InvalidTrue"), + calls: []encoderMethodCall{ + {RawValue(`truE`), newInvalidCharacterError("E", "within literal true (expecting 'e')").withOffset(len64(`tru`)), ""}, + }, +}, { + name: jsontest.Name("TruncatedString"), + calls: []encoderMethodCall{ + {RawValue(`"star`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("InvalidString"), + calls: []encoderMethodCall{ + {RawValue(`"ok` + "\x00"), newInvalidCharacterError("\x00", `within string (expecting non-control character)`).withOffset(len64(`"ok`)), ""}, + }, +}, { + name: jsontest.Name("ValidString/AllowInvalidUTF8/Token"), + opts: []Options{AllowInvalidUTF8(true)}, + calls: []encoderMethodCall{ + {String("living\xde\xad\xbe\xef"), nil, ""}, + }, + wantOut: "\"living\xde\xad\ufffd\ufffd\"\n", +}, { + name: jsontest.Name("ValidString/AllowInvalidUTF8/Value"), + opts: []Options{AllowInvalidUTF8(true)}, + calls: []encoderMethodCall{ + {RawValue("\"living\xde\xad\xbe\xef\""), nil, ""}, + }, + wantOut: "\"living\xde\xad\ufffd\ufffd\"\n", +}, { + name: jsontest.Name("InvalidString/RejectInvalidUTF8"), + opts: []Options{AllowInvalidUTF8(false)}, + calls: []encoderMethodCall{ + {String("living\xde\xad\xbe\xef"), errInvalidUTF8, ""}, + {RawValue("\"living\xde\xad\xbe\xef\""), errInvalidUTF8.withOffset(len64("\"living\xde\xad")), ""}, + }, +}, { + name: jsontest.Name("TruncatedNumber"), + calls: []encoderMethodCall{ + {RawValue(`0.`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("InvalidNumber"), + calls: []encoderMethodCall{ + {RawValue(`0.e`), newInvalidCharacterError("e", "within number (expecting digit)").withOffset(len64(`0.`)), ""}, + }, +}, { + name: jsontest.Name("TruncatedObject/AfterStart"), + calls: []encoderMethodCall{ + {RawValue(`{`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("TruncatedObject/AfterName"), + calls: []encoderMethodCall{ + {RawValue(`{"0"`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("TruncatedObject/AfterColon"), + calls: []encoderMethodCall{ + {RawValue(`{"0":`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("TruncatedObject/AfterValue"), + calls: []encoderMethodCall{ + {RawValue(`{"0":0`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("TruncatedObject/AfterComma"), + calls: []encoderMethodCall{ + {RawValue(`{"0":0,`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("InvalidObject/MissingColon"), + calls: []encoderMethodCall{ + {RawValue(` { "fizz" "buzz" } `), newInvalidCharacterError("\"", "after object name (expecting ':')").withOffset(len64(` { "fizz" `)), ""}, + {RawValue(` { "fizz" , "buzz" } `), newInvalidCharacterError(",", "after object name (expecting ':')").withOffset(len64(` { "fizz" `)), ""}, + }, +}, { + name: jsontest.Name("InvalidObject/MissingComma"), + calls: []encoderMethodCall{ + {RawValue(` { "fizz" : "buzz" "gazz" } `), newInvalidCharacterError("\"", "after object value (expecting ',' or '}')").withOffset(len64(` { "fizz" : "buzz" `)), ""}, + {RawValue(` { "fizz" : "buzz" : "gazz" } `), newInvalidCharacterError(":", "after object value (expecting ',' or '}')").withOffset(len64(` { "fizz" : "buzz" `)), ""}, + }, +}, { + name: jsontest.Name("InvalidObject/ExtraComma"), + calls: []encoderMethodCall{ + {RawValue(` { , } `), newInvalidCharacterError(",", `at start of string (expecting '"')`).withOffset(len64(` { `)), ""}, + {RawValue(` { "fizz" : "buzz" , } `), newInvalidCharacterError("}", `at start of string (expecting '"')`).withOffset(len64(` { "fizz" : "buzz" , `)), ""}, + }, +}, { + name: jsontest.Name("InvalidObject/InvalidName"), + calls: []encoderMethodCall{ + {RawValue(`{ null }`), newInvalidCharacterError("n", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, + {RawValue(`{ false }`), newInvalidCharacterError("f", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, + {RawValue(`{ true }`), newInvalidCharacterError("t", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, + {RawValue(`{ 0 }`), newInvalidCharacterError("0", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, + {RawValue(`{ {} }`), newInvalidCharacterError("{", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, + {RawValue(`{ [] }`), newInvalidCharacterError("[", `at start of string (expecting '"')`).withOffset(len64(`{ `)), ""}, + {ObjectStart, nil, ""}, + {Null, errMissingName.withOffset(len64(`{`)), ""}, + {RawValue(`null`), errMissingName.withOffset(len64(`{`)), ""}, + {False, errMissingName.withOffset(len64(`{`)), ""}, + {RawValue(`false`), errMissingName.withOffset(len64(`{`)), ""}, + {True, errMissingName.withOffset(len64(`{`)), ""}, + {RawValue(`true`), errMissingName.withOffset(len64(`{`)), ""}, + {Uint(0), errMissingName.withOffset(len64(`{`)), ""}, + {RawValue(`0`), errMissingName.withOffset(len64(`{`)), ""}, + {ObjectStart, errMissingName.withOffset(len64(`{`)), ""}, + {RawValue(`{}`), errMissingName.withOffset(len64(`{`)), ""}, + {ArrayStart, errMissingName.withOffset(len64(`{`)), ""}, + {RawValue(`[]`), errMissingName.withOffset(len64(`{`)), ""}, + {ObjectEnd, nil, ""}, + }, + wantOut: "{}\n", +}, { + name: jsontest.Name("InvalidObject/InvalidValue"), + calls: []encoderMethodCall{ + {RawValue(`{ "0": x }`), newInvalidCharacterError("x", `at start of value`).withOffset(len64(`{ "0": `)), ""}, + }, +}, { + name: jsontest.Name("InvalidObject/MismatchingDelim"), + calls: []encoderMethodCall{ + {RawValue(` { ] `), newInvalidCharacterError("]", `at start of string (expecting '"')`).withOffset(len64(` { `)), ""}, + {RawValue(` { "0":0 ] `), newInvalidCharacterError("]", `after object value (expecting ',' or '}')`).withOffset(len64(` { "0":0 `)), ""}, + {ObjectStart, nil, ""}, + {ArrayEnd, errMismatchDelim.withOffset(len64(`{`)), ""}, + {RawValue(`]`), newInvalidCharacterError("]", "at start of value").withOffset(len64(`{`)), ""}, + {ObjectEnd, nil, ""}, + }, + wantOut: "{}\n", +}, { + name: jsontest.Name("ValidObject/UniqueNames"), + calls: []encoderMethodCall{ + {ObjectStart, nil, ""}, + {String("0"), nil, ""}, + {Uint(0), nil, ""}, + {String("1"), nil, ""}, + {Uint(1), nil, ""}, + {ObjectEnd, nil, ""}, + {RawValue(` { "0" : 0 , "1" : 1 } `), nil, ""}, + }, + wantOut: `{"0":0,"1":1}` + "\n" + `{"0":0,"1":1}` + "\n", +}, { + name: jsontest.Name("ValidObject/DuplicateNames"), + opts: []Options{AllowDuplicateNames(true)}, + calls: []encoderMethodCall{ + {ObjectStart, nil, ""}, + {String("0"), nil, ""}, + {Uint(0), nil, ""}, + {String("0"), nil, ""}, + {Uint(0), nil, ""}, + {ObjectEnd, nil, ""}, + {RawValue(` { "0" : 0 , "0" : 0 } `), nil, ""}, + }, + wantOut: `{"0":0,"0":0}` + "\n" + `{"0":0,"0":0}` + "\n", +}, { + name: jsontest.Name("InvalidObject/DuplicateNames"), + calls: []encoderMethodCall{ + {ObjectStart, nil, ""}, + {String("0"), nil, ""}, + {ObjectStart, nil, ""}, + {ObjectEnd, nil, ""}, + {String("0"), newDuplicateNameError(`"0"`).withOffset(len64(`{"0":{},`)), "/0"}, + {RawValue(`"0"`), newDuplicateNameError(`"0"`).withOffset(len64(`{"0":{},`)), "/0"}, + {String("1"), nil, ""}, + {ObjectStart, nil, ""}, + {ObjectEnd, nil, ""}, + {String("0"), newDuplicateNameError(`"0"`).withOffset(len64(`{"0":{},"1":{},`)), "/1"}, + {RawValue(`"0"`), newDuplicateNameError(`"0"`).withOffset(len64(`{"0":{},"1":{},`)), "/1"}, + {String("1"), newDuplicateNameError(`"1"`).withOffset(len64(`{"0":{},"1":{},`)), "/1"}, + {RawValue(`"1"`), newDuplicateNameError(`"1"`).withOffset(len64(`{"0":{},"1":{},`)), "/1"}, + {ObjectEnd, nil, ""}, + {RawValue(` { "0" : 0 , "1" : 1 , "0" : 0 } `), newDuplicateNameError(`"0"`).withOffset(len64(`{"0":{},"1":{}}` + "\n" + ` { "0" : 0 , "1" : 1 , `)), ""}, + }, + wantOut: `{"0":{},"1":{}}` + "\n", +}, { + name: jsontest.Name("TruncatedArray/AfterStart"), + calls: []encoderMethodCall{ + {RawValue(`[`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("TruncatedArray/AfterValue"), + calls: []encoderMethodCall{ + {RawValue(`[0`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("TruncatedArray/AfterComma"), + calls: []encoderMethodCall{ + {RawValue(`[0,`), io.ErrUnexpectedEOF, ""}, + }, +}, { + name: jsontest.Name("TruncatedArray/MissingComma"), + calls: []encoderMethodCall{ + {RawValue(` [ "fizz" "buzz" ] `), newInvalidCharacterError("\"", "after array value (expecting ',' or ']')").withOffset(len64(` [ "fizz" `)), ""}, + }, +}, { + name: jsontest.Name("InvalidArray/MismatchingDelim"), + calls: []encoderMethodCall{ + {RawValue(` [ } `), newInvalidCharacterError("}", `at start of value`).withOffset(len64(` [ `)), ""}, + {ArrayStart, nil, ""}, + {ObjectEnd, errMismatchDelim.withOffset(len64(`[`)), ""}, + {RawValue(`}`), newInvalidCharacterError("}", "at start of value").withOffset(len64(`[`)), ""}, + {ArrayEnd, nil, ""}, + }, + wantOut: "[]\n", +}} + +// TestEncoderErrors test that Encoder errors occur when we expect and +// leaves the Encoder in a consistent state. +func TestEncoderErrors(t *testing.T) { + for _, td := range encoderErrorTestdata { + t.Run(path.Join(td.name.Name), func(t *testing.T) { + testEncoderErrors(t, td.name.Where, td.opts, td.calls, td.wantOut) + }) + } +} +func testEncoderErrors(t *testing.T, where jsontest.CasePos, opts []Options, calls []encoderMethodCall, wantOut string) { + dst := new(bytes.Buffer) + enc := NewEncoder(dst, opts...) + for i, call := range calls { + var gotErr error + switch tokVal := call.in.(type) { + case Token: + gotErr = enc.WriteToken(tokVal) + case RawValue: + gotErr = enc.WriteValue(tokVal) + } + if !reflect.DeepEqual(gotErr, call.wantErr) { + t.Fatalf("%s: %d: error mismatch:\ngot %v\nwant %v", where, i, gotErr, call.wantErr) + } + if call.wantPointer != "" { + gotPointer := enc.StackPointer() + if gotPointer != call.wantPointer { + t.Fatalf("%s: %d: Encoder.StackPointer = %s, want %s", where, i, gotPointer, call.wantPointer) + } + } + } + gotOut := dst.String() + string(enc.s.unflushedBuffer()) + if gotOut != wantOut { + t.Fatalf("%s: output mismatch:\ngot %q\nwant %q", where, gotOut, wantOut) + } + gotOffset := int(enc.OutputOffset()) + wantOffset := len(wantOut) + if gotOffset != wantOffset { + t.Fatalf("%s: Encoder.OutputOffset = %v, want %v", where, gotOffset, wantOffset) + } +} diff --git a/jsontext/errors.go b/jsontext/errors.go new file mode 100644 index 0000000..2a5d078 --- /dev/null +++ b/jsontext/errors.go @@ -0,0 +1,60 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsontext + +import ( + "github.com/go-json-experiment/json/internal/jsonwire" +) + +const errorPrefix = "jsontext: " + +type ioError struct { + action string // either "read" or "write" + err error +} + +func (e *ioError) Error() string { + return errorPrefix + e.action + " error: " + e.err.Error() +} +func (e *ioError) Unwrap() error { + return e.err +} + +// SyntacticError is a description of a syntactic error that occurred when +// encoding or decoding JSON according to the grammar. +// +// The contents of this error as produced by this package may change over time. +type SyntacticError struct { + requireKeyedLiterals + nonComparable + + // ByteOffset indicates that an error occurred after this byte offset. + ByteOffset int64 + str string +} + +func (e *SyntacticError) Error() string { + return errorPrefix + e.str +} +func (e *SyntacticError) withOffset(pos int64) error { + return &SyntacticError{ByteOffset: pos, str: e.str} +} + +func newDuplicateNameError[Bytes ~[]byte | ~string](quoted Bytes) *SyntacticError { + return &SyntacticError{str: "duplicate name " + string(quoted) + " in object"} +} + +func newInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) *SyntacticError { + what := jsonwire.QuoteRune(prefix) + return &SyntacticError{str: "invalid character " + what + " " + where} +} + +// TODO: Error types between "json", "jsontext", and "jsonwire" is a mess. +// Clean this up. +func init() { + // Inject behavior in "jsonwire" so that it can produce SyntacticError types. + jsonwire.NewError = func(s string) error { return &SyntacticError{str: s} } + jsonwire.ErrInvalidUTF8 = &SyntacticError{str: jsonwire.ErrInvalidUTF8.Error()} +} diff --git a/jsontext/export.go b/jsontext/export.go new file mode 100644 index 0000000..201ec9d --- /dev/null +++ b/jsontext/export.go @@ -0,0 +1,83 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsontext + +import ( + "io" + + "github.com/go-json-experiment/json/internal" +) + +// Internal is for internal use only. +// This is exempt from the Go compatibility agreement. +var Internal exporter + +type exporter struct{} + +// Export exposes internal functionality from "jsontext" to "json". +// This cannot be statically called by other packages since +// they cannot obtain a reference to the internal.AllowInternalUse value. +func (exporter) Export(p *internal.NotForPublicUse) export { + if p != &internal.AllowInternalUse { + panic("unauthorized call to Export") + } + return export{} +} + +// The export type exposes functionality to packages with visibility to +// the internal.AllowInternalUse variable. The "json" package uses this +// to modify low-level state in the Encoder and Decoder types. +// It mutates the state directly instead of calling ReadToken or WriteToken +// since this is more performant. The public APIs need to track state to ensure +// that users are constructing a valid JSON value, but the "json" implementation +// guarantees that it emits valid JSON by the structure of the code itself. +type export struct{} + +// Encoder returns a pointer to the underlying encoderState. +func (export) Encoder(e *Encoder) *encoderState { return &e.s } + +// Decoder returns a pointer to the underlying decoderState. +func (export) Decoder(d *Decoder) *decoderState { return &d.s } + +func (export) GetBufferedEncoder(o ...Options) *Encoder { + return getBufferedEncoder(o...) +} +func (export) PutBufferedEncoder(e *Encoder) { + putBufferedEncoder(e) +} + +func (export) GetStreamingEncoder(w io.Writer, o ...Options) *Encoder { + return getStreamingEncoder(w, o...) +} +func (export) PutStreamingEncoder(e *Encoder) { + putStreamingEncoder(e) +} + +func (export) GetBufferedDecoder(b []byte, o ...Options) *Decoder { + return getBufferedDecoder(b, o...) +} +func (export) PutBufferedDecoder(d *Decoder) { + putBufferedDecoder(d) +} + +func (export) GetStreamingDecoder(r io.Reader, o ...Options) *Decoder { + return getStreamingDecoder(r, o...) +} +func (export) PutStreamingDecoder(d *Decoder) { + putStreamingDecoder(d) +} + +func (export) NewDuplicateNameError(quoted []byte, pos int64) error { + return newDuplicateNameError(quoted).withOffset(pos) +} +func (export) NewInvalidCharacterError(prefix, where string, pos int64) error { + return newInvalidCharacterError(prefix, where).withOffset(pos) +} +func (export) NewMissingNameError(pos int64) error { + return errMissingName.withOffset(pos) +} +func (export) NewInvalidUTF8Error(pos int64) error { + return errInvalidUTF8.withOffset(pos) +} diff --git a/jsontext/fuzz_test.go b/jsontext/fuzz_test.go new file mode 100644 index 0000000..59d26ad --- /dev/null +++ b/jsontext/fuzz_test.go @@ -0,0 +1,207 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsontext + +import ( + "bytes" + "errors" + "io" + "math/rand" + "reflect" + "testing" + + "github.com/go-json-experiment/json/internal/jsontest" +) + +func FuzzCoder(f *testing.F) { + // Add a number of inputs to the corpus including valid and invalid data. + for _, td := range coderTestdata { + f.Add(int64(0), []byte(td.in)) + } + for _, td := range decoderErrorTestdata { + f.Add(int64(0), []byte(td.in)) + } + for _, td := range encoderErrorTestdata { + f.Add(int64(0), []byte(td.wantOut)) + } + for _, td := range jsontest.Data { + f.Add(int64(0), td.Data()) + } + + f.Fuzz(func(t *testing.T, seed int64, b []byte) { + var tokVals []tokOrVal + rn := rand.NewSource(seed) + + // Read a sequence of tokens or values. Skip the test for any errors + // since we expect this with randomly generated fuzz inputs. + src := bytes.NewReader(b) + dec := NewDecoder(src) + for { + if rn.Int63()%8 > 0 { + tok, err := dec.ReadToken() + if err != nil { + if err == io.EOF { + break + } + t.Skipf("Decoder.ReadToken error: %v", err) + } + tokVals = append(tokVals, tok.Clone()) + } else { + val, err := dec.ReadValue() + if err != nil { + expectError := dec.PeekKind() == '}' || dec.PeekKind() == ']' + if expectError && errors.As(err, new(*SyntacticError)) { + continue + } + if err == io.EOF { + break + } + t.Skipf("Decoder.ReadValue error: %v", err) + } + tokVals = append(tokVals, append(zeroValue, val...)) + } + } + + // Write a sequence of tokens or values. Fail the test for any errors + // since the previous stage guarantees that the input is valid. + dst := new(bytes.Buffer) + enc := NewEncoder(dst) + for _, tokVal := range tokVals { + switch tokVal := tokVal.(type) { + case Token: + if err := enc.WriteToken(tokVal); err != nil { + t.Fatalf("Encoder.WriteToken error: %v", err) + } + case RawValue: + if err := enc.WriteValue(tokVal); err != nil { + t.Fatalf("Encoder.WriteValue error: %v", err) + } + } + } + + // Encoded output and original input must decode to the same thing. + var got, want []Token + for dec := NewDecoder(bytes.NewReader(b)); dec.PeekKind() > 0; { + tok, err := dec.ReadToken() + if err != nil { + t.Fatalf("Decoder.ReadToken error: %v", err) + } + got = append(got, tok.Clone()) + } + for dec := NewDecoder(dst); dec.PeekKind() > 0; { + tok, err := dec.ReadToken() + if err != nil { + t.Fatalf("Decoder.ReadToken error: %v", err) + } + want = append(want, tok.Clone()) + } + if !equalTokens(got, want) { + t.Fatalf("mismatching output:\ngot %v\nwant %v", got, want) + } + }) +} + +func FuzzResumableDecoder(f *testing.F) { + for _, td := range resumableDecoderTestdata { + f.Add(int64(0), []byte(td)) + } + + f.Fuzz(func(t *testing.T, seed int64, b []byte) { + rn := rand.NewSource(seed) + + // Regardless of how many bytes the underlying io.Reader produces, + // the provided tokens, values, and errors should always be identical. + t.Run("ReadToken", func(t *testing.T) { + decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn}) + decWant := NewDecoder(bytes.NewReader(b)) + gotTok, gotErr := decGot.ReadToken() + wantTok, wantErr := decWant.ReadToken() + if gotTok.String() != wantTok.String() || !reflect.DeepEqual(gotErr, wantErr) { + t.Errorf("Decoder.ReadToken = (%v, %v), want (%v, %v)", gotTok, gotErr, wantTok, wantErr) + } + }) + t.Run("ReadValue", func(t *testing.T) { + decGot := NewDecoder(&FaultyBuffer{B: b, MaxBytes: 8, Rand: rn}) + decWant := NewDecoder(bytes.NewReader(b)) + gotVal, gotErr := decGot.ReadValue() + wantVal, wantErr := decWant.ReadValue() + if !reflect.DeepEqual(gotVal, wantVal) || !reflect.DeepEqual(gotErr, wantErr) { + t.Errorf("Decoder.ReadValue = (%s, %v), want (%s, %v)", gotVal, gotErr, wantVal, wantErr) + } + }) + }) +} + +func FuzzRawValueReformat(f *testing.F) { + for _, td := range rawValueTestdata { + f.Add([]byte(td.in)) + } + + // isValid reports whether b is valid according to the specified options. + isValid := func(b []byte, opts ...Options) bool { + d := NewDecoder(bytes.NewReader(b), opts...) + _, errVal := d.ReadValue() + _, errEOF := d.ReadToken() + return errVal == nil && errEOF == io.EOF + } + + // stripWhitespace removes all JSON whitespace characters from the input. + stripWhitespace := func(in []byte) (out []byte) { + out = make([]byte, 0, len(in)) + for _, c := range in { + switch c { + case ' ', '\n', '\r', '\t': + default: + out = append(out, c) + } + } + return out + } + + f.Fuzz(func(t *testing.T, b []byte) { + validRFC7159 := isValid(b, AllowInvalidUTF8(true), AllowDuplicateNames(true)) + validRFC8259 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(true)) + validRFC7493 := isValid(b, AllowInvalidUTF8(false), AllowDuplicateNames(false)) + switch { + case !validRFC7159 && validRFC8259: + t.Errorf("invalid input per RFC 7159 implies invalid per RFC 8259") + case !validRFC8259 && validRFC7493: + t.Errorf("invalid input per RFC 8259 implies invalid per RFC 7493") + } + + gotValid := RawValue(b).IsValid() + wantValid := validRFC7493 + if gotValid != wantValid { + t.Errorf("RawValue.IsValid = %v, want %v", gotValid, wantValid) + } + + gotCompacted := RawValue(string(b)) + gotCompactOk := gotCompacted.Compact() == nil + wantCompactOk := validRFC7159 + if !bytes.Equal(stripWhitespace(gotCompacted), stripWhitespace(b)) { + t.Errorf("stripWhitespace(RawValue.Compact) = %s, want %s", stripWhitespace(gotCompacted), stripWhitespace(b)) + } + if gotCompactOk != wantCompactOk { + t.Errorf("RawValue.Compact success mismatch: got %v, want %v", gotCompactOk, wantCompactOk) + } + + gotIndented := RawValue(string(b)) + gotIndentOk := gotIndented.Indent("", " ") == nil + wantIndentOk := validRFC7159 + if !bytes.Equal(stripWhitespace(gotIndented), stripWhitespace(b)) { + t.Errorf("stripWhitespace(RawValue.Indent) = %s, want %s", stripWhitespace(gotIndented), stripWhitespace(b)) + } + if gotIndentOk != wantIndentOk { + t.Errorf("RawValue.Indent success mismatch: got %v, want %v", gotIndentOk, wantIndentOk) + } + + gotCanonicalized := RawValue(string(b)) + gotCanonicalizeOk := gotCanonicalized.Canonicalize() == nil + wantCanonicalizeOk := validRFC7493 + if gotCanonicalizeOk != wantCanonicalizeOk { + t.Errorf("RawValue.Canonicalize success mismatch: got %v, want %v", gotCanonicalizeOk, wantCanonicalizeOk) + } + }) +} diff --git a/coder_options.go b/jsontext/options.go similarity index 88% rename from coder_options.go rename to jsontext/options.go index 3a42508..1869a17 100644 --- a/coder_options.go +++ b/jsontext/options.go @@ -2,15 +2,24 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "strings" "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonopts" + "github.com/go-json-experiment/json/internal/jsonwire" ) +// Options configures [NewEncoder], [Encoder.Reset], [NewDecoder], +// and [Decoder.Reset] with specific features. +// The Options type is identical to [encoding/json.Options] and +// [encoding/json/v2.Options]. Options from the other packages may +// be passed to functionality in this package, but are ignored. +// Options from this packed may be used with the other packages. +type Options = jsonopts.Options + // AllowDuplicateNames specifies that JSON objects may contain // duplicate member names. Disabling the duplicate name check may provide // performance benefits, but breaks compliance with RFC 7493, section 2.3. @@ -18,8 +27,6 @@ import ( // which leaves the handling of duplicate names as unspecified behavior. // // This affects either encoding or decoding. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.AllowDuplicateNames] instead. func AllowDuplicateNames(v bool) Options { if v { return jsonflags.AllowDuplicateNames | 1 @@ -34,8 +41,6 @@ func AllowDuplicateNames(v bool) Options { // RFC 7493, section 2.1, and RFC 8259, section 8.1. // // This affects either encoding or decoding. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.AllowInvalidUTF8] instead. func AllowInvalidUTF8(v bool) Options { if v { return jsonflags.AllowInvalidUTF8 | 1 @@ -49,8 +54,6 @@ func AllowInvalidUTF8(v bool) Options { // the output is safe to embed within HTML. // // This only affects encoding and is ignored when decoding. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.EscapeForHTML] instead. func EscapeForHTML(v bool) Options { if v { return jsonflags.EscapeForHTML | 1 @@ -64,8 +67,6 @@ func EscapeForHTML(v bool) Options { // the output is valid to embed within JavaScript. See RFC 8259, section 12. // // This only affects encoding and is ignored when decoding. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.EscapeForJS] instead. func EscapeForJS(v bool) Options { if v { return jsonflags.EscapeForJS | 1 @@ -86,8 +87,6 @@ func EscapeForJS(v bool) Options { // which is also the formatting specified by RFC 8785, section 3.2.2.2. // // This only affects encoding and is ignored when decoding. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.WithEscapeFunc] instead. func WithEscapeFunc(fn func(rune) bool) Options { return jsonopts.EscapeFunc(fn) } @@ -101,8 +100,6 @@ func WithEscapeFunc(fn func(rune) bool) Options { // where no whitespace is emitted between JSON values. // // This only affects encoding and is ignored when decoding. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Expand] instead. func Expand(v bool) Options { if v { return jsonflags.Expand | 1 @@ -122,8 +119,6 @@ func Expand(v bool) Options { // // This only affects encoding and is ignored when decoding. // Use of this option implies [Expand] being set to true. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.WithIndent] instead. func WithIndent(indent string) Options { // Fast-path: Return a constant for common indents, which avoids allocating. // These are derived from analyzing the Go module proxy on 2023-07-01. @@ -144,7 +139,7 @@ func WithIndent(indent string) Options { // Otherwise, allocate for this unique value. if s := strings.Trim(indent, " \t"); len(s) > 0 { - panic("json: invalid character " + quoteRune([]byte(s)) + " in indent") + panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent") } return jsonopts.Indent(indent) } @@ -157,11 +152,9 @@ func WithIndent(indent string) Options { // // This only affects encoding and is ignored when decoding. // Use of this option implies [Expand] being set to true. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.WithIndentPrefix] instead. func WithIndentPrefix(prefix string) Options { if s := strings.Trim(prefix, " \t"); len(s) > 0 { - panic("json: invalid character " + quoteRune([]byte(s)) + " in indent prefix") + panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix") } return jsonopts.IndentPrefix(prefix) } diff --git a/pools.go b/jsontext/pools.go similarity index 70% rename from pools.go rename to jsontext/pools.go index ef12c6c..63941ba 100644 --- a/pools.go +++ b/jsontext/pools.go @@ -2,13 +2,12 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "bytes" "io" "math/bits" - "slices" "sync" ) @@ -42,14 +41,14 @@ type bufferStatistics struct { func getBufferedEncoder(opts ...Options) *Encoder { e := bufferedEncoderPool.Get().(*Encoder) - if e.buf == nil { + if e.s.Buf == nil { // Round up to nearest 2ⁿ to make best use of malloc size classes. // See runtime/sizeclasses.go on Go1.15. // Logical OR with 63 to ensure 64 as the minimum buffer size. - n := 1 << bits.Len(uint(e.bufStats.prevLen|63)) - e.buf = make([]byte, 0, n) + n := 1 << bits.Len(uint(e.s.bufStats.prevLen|63)) + e.s.Buf = make([]byte, 0, n) } - e.reset(e.buf[:0], nil, opts...) + e.s.reset(e.s.Buf[:0], nil, opts...) return e } func putBufferedEncoder(e *Encoder) { @@ -68,16 +67,16 @@ func putBufferedEncoder(e *Encoder) { // // See https://go.dev/issue/27735. switch { - case cap(e.buf) <= 4<<10: // always recycle buffers smaller than 4KiB - e.bufStats.strikes = 0 - case cap(e.buf)/4 <= len(e.buf): // at least 25% utilization - e.bufStats.strikes = 0 - case e.bufStats.strikes < 4: // at most 4 strikes - e.bufStats.strikes++ + case cap(e.s.Buf) <= 4<<10: // always recycle buffers smaller than 4KiB + e.s.bufStats.strikes = 0 + case cap(e.s.Buf)/4 <= len(e.s.Buf): // at least 25% utilization + e.s.bufStats.strikes = 0 + case e.s.bufStats.strikes < 4: // at most 4 strikes + e.s.bufStats.strikes++ default: // discard the buffer; too large and too often under-utilized - e.bufStats.strikes = 0 - e.bufStats.prevLen = len(e.buf) // heuristic for size to allocate next time - e.buf = nil + e.s.bufStats.strikes = 0 + e.s.bufStats.prevLen = len(e.s.Buf) // heuristic for size to allocate next time + e.s.Buf = nil } bufferedEncoderPool.Put(e) } @@ -85,20 +84,20 @@ func putBufferedEncoder(e *Encoder) { func getStreamingEncoder(w io.Writer, opts ...Options) *Encoder { if _, ok := w.(*bytes.Buffer); ok { e := bytesBufferEncoderPool.Get().(*Encoder) - e.reset(nil, w, opts...) // buffer taken from bytes.Buffer + e.s.reset(nil, w, opts...) // buffer taken from bytes.Buffer return e } else { e := streamingEncoderPool.Get().(*Encoder) - e.reset(e.buf[:0], w, opts...) // preserve existing buffer + e.s.reset(e.s.Buf[:0], w, opts...) // preserve existing buffer return e } } func putStreamingEncoder(e *Encoder) { - if _, ok := e.wr.(*bytes.Buffer); ok { + if _, ok := e.s.wr.(*bytes.Buffer); ok { bytesBufferEncoderPool.Put(e) } else { - if cap(e.buf) > 64<<10 { - e.buf = nil // avoid pinning arbitrarily large amounts of memory + if cap(e.s.Buf) > 64<<10 { + e.s.Buf = nil // avoid pinning arbitrarily large amounts of memory } streamingEncoderPool.Put(e) } @@ -121,7 +120,7 @@ var ( func getBufferedDecoder(b []byte, opts ...Options) *Decoder { d := bufferedDecoderPool.Get().(*Decoder) - d.reset(b, nil, opts...) + d.s.reset(b, nil, opts...) return d } func putBufferedDecoder(d *Decoder) { @@ -131,47 +130,21 @@ func putBufferedDecoder(d *Decoder) { func getStreamingDecoder(r io.Reader, opts ...Options) *Decoder { if _, ok := r.(*bytes.Buffer); ok { d := bytesBufferDecoderPool.Get().(*Decoder) - d.reset(nil, r, opts...) // buffer taken from bytes.Buffer + d.s.reset(nil, r, opts...) // buffer taken from bytes.Buffer return d } else { d := streamingDecoderPool.Get().(*Decoder) - d.reset(d.buf[:0], r, opts...) // preserve existing buffer + d.s.reset(d.s.buf[:0], r, opts...) // preserve existing buffer return d } } func putStreamingDecoder(d *Decoder) { - if _, ok := d.rd.(*bytes.Buffer); ok { + if _, ok := d.s.rd.(*bytes.Buffer); ok { bytesBufferDecoderPool.Put(d) } else { - if cap(d.buf) > 64<<10 { - d.buf = nil // avoid pinning arbitrarily large amounts of memory + if cap(d.s.buf) > 64<<10 { + d.s.buf = nil // avoid pinning arbitrarily large amounts of memory } streamingDecoderPool.Put(d) } } - -var stringsPools = &sync.Pool{New: func() any { return new(stringSlice) }} - -type stringSlice []string - -// getStrings returns a non-nil pointer to a slice with length n. -func getStrings(n int) *stringSlice { - s := stringsPools.Get().(*stringSlice) - if cap(*s) < n { - *s = make([]string, n) - } - *s = (*s)[:n] - return s -} - -func putStrings(s *stringSlice) { - if cap(*s) > 1<<10 { - *s = nil // avoid pinning arbitrarily large amounts of memory - } - stringsPools.Put(s) -} - -// Sort sorts the string slice according to RFC 8785, section 3.2.3. -func (ss *stringSlice) Sort() { - slices.SortFunc(*ss, func(x, y string) int { return compareUTF16(x, y) }) -} diff --git a/quote.go b/jsontext/quote.go similarity index 81% rename from quote.go rename to jsontext/quote.go index 67d0cbc..9fe7aa2 100644 --- a/quote.go +++ b/jsontext/quote.go @@ -2,7 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext + +import "github.com/go-json-experiment/json/internal/jsonwire" var errInvalidUTF8 = &SyntacticError{str: "invalid UTF-8 within string"} @@ -11,10 +13,8 @@ var errInvalidUTF8 = &SyntacticError{str: "invalid UTF-8 within string"} // It uses the minimal string representation per RFC 8785, section 3.2.2.2. // Invalid UTF-8 bytes are replaced with the Unicode replacement character // and an error is returned at the end indicating the presence of invalid UTF-8. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.AppendQuote] instead. func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { - return appendString(dst, src, true, nil) + return jsonwire.AppendQuote(dst, src, true, nil) } // AppendUnquote appends the decoded interpretation of src as a @@ -23,8 +23,6 @@ func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) // Invalid UTF-8 bytes are replaced with the Unicode replacement character // and an error is returned at the end indicating the presence of invalid UTF-8. // Any trailing bytes after the JSON string literal results in an error. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.AppendUnquote] instead. func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { - return unescapeString(dst, src) + return jsonwire.AppendUnquote(dst, src) } diff --git a/state.go b/jsontext/state.go similarity index 80% rename from state.go rename to jsontext/state.go index 631fae8..1a8174c 100644 --- a/state.go +++ b/jsontext/state.go @@ -2,11 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "math" "strconv" + + "github.com/go-json-experiment/json/internal/jsonwire" ) var ( @@ -16,34 +18,34 @@ var ( errMissingComma = &SyntacticError{str: "missing character ',' after object or array value"} errMismatchDelim = &SyntacticError{str: "mismatching structural token for object or array"} errMaxDepth = &SyntacticError{str: "exceeded max depth"} -) -const errInvalidNamespace = jsonError("object namespace is in an invalid state") + errInvalidNamespace = &SyntacticError{str: "object namespace is in an invalid state"} +) // Per RFC 8259, section 9, implementations may enforce a maximum depth. // Such a limit is necessary to prevent stack overflows. const maxNestingDepth = 10000 type state struct { - // tokens validates whether the next token kind is valid. - tokens stateMachine + // Tokens validates whether the next token kind is valid. + Tokens stateMachine - // names is a stack of object names. + // Names is a stack of object names. // Not used if AllowDuplicateNames is true. - names objectNameStack + Names objectNameStack - // namespaces is a stack of object namespaces. + // Namespaces is a stack of object namespaces. // For performance reasons, Encoder or Decoder may not update this // if Marshal or Unmarshal is able to track names in a more efficient way. // See makeMapArshaler and makeStructArshaler. // Not used if AllowDuplicateNames is true. - namespaces objectNamespaceStack + Namespaces objectNamespaceStack } func (s *state) reset() { - s.tokens.reset() - s.names.reset() - s.namespaces.reset() + s.Tokens.reset() + s.Names.reset() + s.Namespaces.reset() } // appendStackPointer appends a JSON Pointer (RFC 6901) to the current value. @@ -53,16 +55,16 @@ func (s *state) reset() { // Invariant: Must call s.names.copyQuotedBuffer beforehand. func (s state) appendStackPointer(b []byte) []byte { var objectDepth int - for i := 1; i < s.tokens.depth(); i++ { - e := s.tokens.index(i) - if e.length() == 0 { + for i := 1; i < s.Tokens.Depth(); i++ { + e := s.Tokens.index(i) + if e.Length() == 0 { break // empty object or array } b = append(b, '/') switch { case e.isObject(): - if objectDepth < s.names.length() { - for _, c := range s.names.getUnquoted(objectDepth) { + if objectDepth < s.Names.length() { + for _, c := range s.Names.getUnquoted(objectDepth) { // Per RFC 6901, section 3, escape '~' and '/' characters. switch c { case '~': @@ -77,11 +79,11 @@ func (s state) appendStackPointer(b []byte) []byte { // Since the names stack is unpopulated, the name is unknown. // As a best-effort replacement, use the numeric member index. // While inaccurate, it produces a syntactically valid pointer. - b = strconv.AppendUint(b, uint64((e.length()-1)/2), 10) + b = strconv.AppendUint(b, uint64((e.Length()-1)/2), 10) } objectDepth++ case e.isArray(): - b = strconv.AppendUint(b, uint64(e.length()-1), 10) + b = strconv.AppendUint(b, uint64(e.Length()-1), 10) } } return b @@ -100,51 +102,51 @@ func (s state) appendStackPointer(b []byte) []byte { // For performance, most methods are carefully written to be inlinable. // The zero value is a valid state machine ready for use. type stateMachine struct { - stack []stateEntry - last stateEntry + Stack []stateEntry + Last stateEntry } // reset resets the state machine. // The machine always starts with a minimum depth of 1. func (m *stateMachine) reset() { - m.stack = m.stack[:0] - if cap(m.stack) > 1<<10 { - m.stack = nil + m.Stack = m.Stack[:0] + if cap(m.Stack) > 1<<10 { + m.Stack = nil } - m.last = stateTypeArray + m.Last = stateTypeArray } -// depth is the current nested depth of JSON objects and arrays. +// Depth is the current nested depth of JSON objects and arrays. // It is one-indexed (i.e., top-level values have a depth of 1). -func (m stateMachine) depth() int { - return len(m.stack) + 1 +func (m stateMachine) Depth() int { + return len(m.Stack) + 1 } // index returns a reference to the ith entry. // It is only valid until the next push method call. func (m *stateMachine) index(i int) *stateEntry { - if i == len(m.stack) { - return &m.last + if i == len(m.Stack) { + return &m.Last } - return &m.stack[i] + return &m.Stack[i] } -// depthLength reports the current nested depth and +// DepthLength reports the current nested depth and // the length of the last JSON object or array. -func (m stateMachine) depthLength() (int, int) { - return m.depth(), m.last.length() +func (m stateMachine) DepthLength() (int, int) { + return m.Depth(), m.Last.Length() } // appendLiteral appends a JSON literal as the next token in the sequence. // If an error is returned, the state is not mutated. func (m *stateMachine) appendLiteral() error { switch { - case m.last.needObjectName(): + case m.Last.NeedObjectName(): return errMissingName - case !m.last.isValidNamespace(): + case !m.Last.isValidNamespace(): return errInvalidNamespace default: - m.last.increment() + m.Last.Increment() return nil } } @@ -153,10 +155,10 @@ func (m *stateMachine) appendLiteral() error { // If an error is returned, the state is not mutated. func (m *stateMachine) appendString() error { switch { - case !m.last.isValidNamespace(): + case !m.Last.isValidNamespace(): return errInvalidNamespace default: - m.last.increment() + m.Last.Increment() return nil } } @@ -171,16 +173,16 @@ func (m *stateMachine) appendNumber() error { // If an error is returned, the state is not mutated. func (m *stateMachine) pushObject() error { switch { - case m.last.needObjectName(): + case m.Last.NeedObjectName(): return errMissingName - case !m.last.isValidNamespace(): + case !m.Last.isValidNamespace(): return errInvalidNamespace - case len(m.stack) == maxNestingDepth: + case len(m.Stack) == maxNestingDepth: return errMaxDepth default: - m.last.increment() - m.stack = append(m.stack, m.last) - m.last = stateTypeObject + m.Last.Increment() + m.Stack = append(m.Stack, m.Last) + m.Last = stateTypeObject return nil } } @@ -189,15 +191,15 @@ func (m *stateMachine) pushObject() error { // If an error is returned, the state is not mutated. func (m *stateMachine) popObject() error { switch { - case !m.last.isObject(): + case !m.Last.isObject(): return errMismatchDelim - case m.last.needObjectValue(): + case m.Last.needObjectValue(): return errMissingValue - case !m.last.isValidNamespace(): + case !m.Last.isValidNamespace(): return errInvalidNamespace default: - m.last = m.stack[len(m.stack)-1] - m.stack = m.stack[:len(m.stack)-1] + m.Last = m.Stack[len(m.Stack)-1] + m.Stack = m.Stack[:len(m.Stack)-1] return nil } } @@ -206,16 +208,16 @@ func (m *stateMachine) popObject() error { // If an error is returned, the state is not mutated. func (m *stateMachine) pushArray() error { switch { - case m.last.needObjectName(): + case m.Last.NeedObjectName(): return errMissingName - case !m.last.isValidNamespace(): + case !m.Last.isValidNamespace(): return errInvalidNamespace - case len(m.stack) == maxNestingDepth: + case len(m.Stack) == maxNestingDepth: return errMaxDepth default: - m.last.increment() - m.stack = append(m.stack, m.last) - m.last = stateTypeArray + m.Last.Increment() + m.Stack = append(m.Stack, m.Last) + m.Last = stateTypeArray return nil } } @@ -224,43 +226,43 @@ func (m *stateMachine) pushArray() error { // If an error is returned, the state is not mutated. func (m *stateMachine) popArray() error { switch { - case !m.last.isArray() || len(m.stack) == 0: // forbid popping top-level virtual JSON array + case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array return errMismatchDelim - case !m.last.isValidNamespace(): + case !m.Last.isValidNamespace(): return errInvalidNamespace default: - m.last = m.stack[len(m.stack)-1] - m.stack = m.stack[:len(m.stack)-1] + m.Last = m.Stack[len(m.Stack)-1] + m.Stack = m.Stack[:len(m.Stack)-1] return nil } } -// needIndent reports whether indent whitespace should be injected. +// NeedIndent reports whether indent whitespace should be injected. // A zero value means that no whitespace should be injected. // A positive value means '\n', indentPrefix, and (n-1) copies of indentBody // should be appended to the output immediately before the next token. -func (m stateMachine) needIndent(next Kind) (n int) { +func (m stateMachine) NeedIndent(next Kind) (n int) { willEnd := next == '}' || next == ']' switch { - case m.depth() == 1: + case m.Depth() == 1: return 0 // top-level values are never indented - case m.last.length() == 0 && willEnd: + case m.Last.Length() == 0 && willEnd: return 0 // an empty object or array is never indented - case m.last.length() == 0 || m.last.needImplicitComma(next): - return m.depth() + case m.Last.Length() == 0 || m.Last.needImplicitComma(next): + return m.Depth() case willEnd: - return m.depth() - 1 + return m.Depth() - 1 default: return 0 } } -// mayAppendDelim appends a colon or comma that may precede the next token. -func (m stateMachine) mayAppendDelim(b []byte, next Kind) []byte { +// MayAppendDelim appends a colon or comma that may precede the next token. +func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte { switch { - case m.last.needImplicitColon(): + case m.Last.needImplicitColon(): return append(b, ':') - case m.last.needImplicitComma(next) && len(m.stack) != 0: // comma not needed for top-level values + case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values return append(b, ',') default: return b @@ -272,9 +274,9 @@ func (m stateMachine) mayAppendDelim(b []byte, next Kind) []byte { // A zero value means no delimiter should be emitted. func (m stateMachine) needDelim(next Kind) (delim byte) { switch { - case m.last.needImplicitColon(): + case m.Last.needImplicitColon(): return ':' - case m.last.needImplicitComma(next) && len(m.stack) != 0: // comma not needed for top-level values + case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values return ',' default: return 0 @@ -296,15 +298,15 @@ func (m stateMachine) checkDelim(delim byte, next Kind) error { } } -// invalidateDisabledNamespaces marks all disabled namespaces as invalid. +// InvalidateDisabledNamespaces marks all disabled namespaces as invalid. // // For efficiency, Marshal and Unmarshal may disable namespaces since there are // more efficient ways to track duplicate names. However, if an error occurs, // the namespaces in Encoder or Decoder will be left in an inconsistent state. // Mark the namespaces as invalid so that future method calls on // Encoder or Decoder will return an error. -func (m *stateMachine) invalidateDisabledNamespaces() { - for i := 0; i < m.depth(); i++ { +func (m *stateMachine) InvalidateDisabledNamespaces() { + for i := 0; i < m.Depth(); i++ { e := m.index(i) if !e.isActiveNamespace() { e.invalidateNamespace() @@ -338,9 +340,9 @@ const ( stateCountEven stateEntry = 0x0000_0000_0000_0000 ) -// length reports the number of elements in the JSON object or array. +// Length reports the number of elements in the JSON object or array. // Each name and value in an object entry is treated as a separate element. -func (e stateEntry) length() int { +func (e stateEntry) Length() int { return int(e & stateCountMask) } @@ -354,9 +356,9 @@ func (e stateEntry) isArray() bool { return e&stateTypeMask == stateTypeArray } -// needObjectName reports whether the next token must be a JSON string, +// NeedObjectName reports whether the next token must be a JSON string, // which is necessary for JSON object names. -func (e stateEntry) needObjectName() bool { +func (e stateEntry) NeedObjectName() bool { return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven } @@ -376,13 +378,13 @@ func (e stateEntry) needObjectValue() bool { // which always occurs after a value in a JSON object or array // before the next value (or name). func (e stateEntry) needImplicitComma(next Kind) bool { - return !e.needObjectValue() && e.length() > 0 && next != '}' && next != ']' + return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']' } -// increment increments the number of elements for the current object or array. +// Increment increments the number of elements for the current object or array. // This assumes that overflow won't practically be an issue since // 1< 0 { startOffset = ns.offsets[i-1] } - if n := consumeSimpleString(quotedName); n > 0 { + if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 { ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...) } else { - ns.unquotedNames, _ = unescapeString(ns.unquotedNames[:startOffset], quotedName) + ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName) } ns.offsets[i] = len(ns.unquotedNames) } @@ -574,14 +576,14 @@ func (nss *objectNamespaceStack) reset() { func (nss *objectNamespaceStack) push() { if cap(*nss) > len(*nss) { *nss = (*nss)[:len(*nss)+1] - nss.last().reset() + nss.Last().reset() } else { *nss = append(*nss, objectNamespace{}) } } -// last returns a pointer to the last JSON object namespace. -func (nss objectNamespaceStack) last() *objectNamespace { +// Last returns a pointer to the last JSON object namespace. +func (nss objectNamespaceStack) Last() *objectNamespace { return &nss[len(nss)-1] } @@ -650,13 +652,13 @@ func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool { } return ns.insert(name, !isVerbatim) } -func (ns *objectNamespace) insertUnquoted(name []byte) bool { +func (ns *objectNamespace) InsertUnquoted(name []byte) bool { return ns.insert(name, false) } func (ns *objectNamespace) insert(name []byte, quoted bool) bool { var allNames []byte if quoted { - allNames, _ = unescapeString(ns.allUnquotedNames, name) + allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name) } else { allNames = append(ns.allUnquotedNames, name...) } @@ -711,46 +713,3 @@ func (ns *objectNamespace) removeLast() { ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]] } } - -type uintSet64 uint64 - -func (s uintSet64) has(i uint) bool { return s&(1< 0 } -func (s *uintSet64) set(i uint) { *s |= 1 << i } - -// uintSet is a set of unsigned integers. -// It is optimized for most integers being close to zero. -type uintSet struct { - lo uintSet64 - hi []uintSet64 -} - -// has reports whether i is in the set. -func (s *uintSet) has(i uint) bool { - if i < 64 { - return s.lo.has(i) - } else { - i -= 64 - iHi, iLo := int(i/64), i%64 - return iHi < len(s.hi) && s.hi[iHi].has(iLo) - } -} - -// insert inserts i into the set and reports whether it was the first insertion. -func (s *uintSet) insert(i uint) bool { - // TODO: Make this inlinable at least for the lower 64-bit case. - if i < 64 { - has := s.lo.has(i) - s.lo.set(i) - return !has - } else { - i -= 64 - iHi, iLo := int(i/64), i%64 - if iHi >= len(s.hi) { - s.hi = append(s.hi, make([]uintSet64, iHi+1-len(s.hi))...) - s.hi = s.hi[:cap(s.hi)] - } - has := s.hi[iHi].has(iLo) - s.hi[iHi].set(iLo) - return !has - } -} diff --git a/state_test.go b/jsontext/state_test.go similarity index 87% rename from state_test.go rename to jsontext/state_test.go index 7868796..6946c8b 100644 --- a/state_test.go +++ b/jsontext/state_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "fmt" @@ -157,9 +157,9 @@ func TestStateMachine(t *testing.T) { switch op := op.(type) { case stackLengths: var got []int - for i := 0; i < state.depth(); i++ { + for i := 0; i < state.Depth(); i++ { e := state.index(i) - got = append(got, e.length()) + got = append(got, e.Length()) } want := []int(op) if !reflect.DeepEqual(got, want) { @@ -299,7 +299,7 @@ func TestObjectNamespace(t *testing.T) { t.Fatalf("%d: objectNamespace{%v}.insert(%v) = %v, want %v", i, strings.Join(wantNames, " "), op.name, gotInserted, op.wantInserted) } if gotInserted { - b, _ := unescapeString(nil, []byte(op.name)) + b, _ := AppendUnquote(nil, []byte(op.name)) wantNames = append(wantNames, string(b)) } case removeLast: @@ -326,7 +326,7 @@ func TestObjectNamespace(t *testing.T) { // Insert a large number of names. for i := 0; i < 64; i++ { - ns.insertUnquoted([]byte(fmt.Sprintf(`name%d`, i))) + ns.InsertUnquoted([]byte(fmt.Sprintf(`name%d`, i))) } // Verify that we did switch to using a Go map. @@ -335,54 +335,3 @@ func TestObjectNamespace(t *testing.T) { } } } - -func TestUintSet(t *testing.T) { - type operation any // has | insert - type has struct { - in uint - want bool - } - type insert struct { - in uint - want bool - } - - // Sequence of operations to perform (order matters). - ops := []operation{ - has{0, false}, - has{63, false}, - has{64, false}, - has{1234, false}, - insert{3, true}, - has{2, false}, - has{3, true}, - has{4, false}, - has{63, false}, - insert{3, false}, - insert{63, true}, - has{63, true}, - insert{64, true}, - insert{64, false}, - has{64, true}, - insert{3264, true}, - has{3264, true}, - insert{3, false}, - has{3, true}, - } - - var us uintSet - for i, op := range ops { - switch op := op.(type) { - case has: - if got := us.has(op.in); got != op.want { - t.Fatalf("%d: uintSet.has(%v) = %v, want %v", i, op.in, got, op.want) - } - case insert: - if got := us.insert(op.in); got != op.want { - t.Fatalf("%d: uintSet.insert(%v) = %v, want %v", i, op.in, got, op.want) - } - default: - panic(fmt.Sprintf("unknown operation: %T", op)) - } - } -} diff --git a/jsontext/text.go b/jsontext/text.go deleted file mode 100644 index 22d9ba9..0000000 --- a/jsontext/text.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package jsontext implements syntactic processing of JSON. -// -// At present, the declarations in this package are aliases -// to the equivalent declarations in the v2 "json" package -// as those declarations will be directly moved here in the future. -package jsontext - -import ( - "io" - - "github.com/go-json-experiment/json" - "github.com/go-json-experiment/json/internal/jsonopts" -) - -type ( - Options = jsonopts.Options - - Encoder = json.Encoder - Decoder = json.Decoder - - Kind = json.Kind - Token = json.Token - Value = json.RawValue - - SyntacticError = json.SyntacticError -) - -func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { - return json.AppendQuote(dst, src) -} - -func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { - return json.AppendUnquote(dst, src) -} - -func NewEncoder(w io.Writer, opts ...Options) *Encoder { return json.NewEncoder(w, opts...) } -func NewDecoder(r io.Reader, opts ...Options) *Decoder { return json.NewDecoder(r, opts...) } - -func AllowDuplicateNames(v bool) Options { return json.AllowDuplicateNames(v) } -func AllowInvalidUTF8(v bool) Options { return json.AllowInvalidUTF8(v) } -func EscapeForHTML(v bool) Options { return json.EscapeForHTML(v) } -func EscapeForJS(v bool) Options { return json.EscapeForJS(v) } -func WithEscapeFunc(fn func(rune) bool) Options { return json.WithEscapeFunc(fn) } -func Expand(v bool) Options { return json.Expand(v) } -func WithIndent(indent string) Options { return json.WithIndent(indent) } -func WithIndentPrefix(prefix string) Options { return json.WithIndentPrefix(prefix) } - -var ( - Null Token = json.Null - False Token = json.False - True Token = json.True - - ObjectStart Token = json.ObjectStart - ObjectEnd Token = json.ObjectEnd - ArrayStart Token = json.ArrayStart - ArrayEnd Token = json.ArrayEnd -) - -func Bool(b bool) Token { return json.Bool(b) } -func Float(n float64) Token { return json.Float(n) } -func Int(n int64) Token { return json.Int(n) } -func String(s string) Token { return json.String(s) } -func Uint(n uint64) Token { return json.Uint(n) } diff --git a/token.go b/jsontext/token.go similarity index 85% rename from token.go rename to jsontext/token.go index 84d549b..2bd01d7 100644 --- a/token.go +++ b/jsontext/token.go @@ -2,11 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "math" "strconv" + + "github.com/go-json-experiment/json/internal/jsonwire" ) // NOTE: Token is analogous to v1 json.Token. @@ -30,8 +32,6 @@ const ( // A Token cannot represent entire array or object values, while a RawValue can. // There is no Token to represent commas and colons since // these structural tokens can be inferred from the surrounding context. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Token] instead. type Token struct { nonComparable @@ -85,21 +85,14 @@ type Token struct { // TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues? var ( - // Deprecated: Use [github.com/go-json-experiment/json/jsontext.Null] instead. - Null Token = rawToken("null") - // Deprecated: Use [github.com/go-json-experiment/json/jsontext.False] instead. + Null Token = rawToken("null") False Token = rawToken("false") - // Deprecated: Use [github.com/go-json-experiment/json/jsontext.True] instead. - True Token = rawToken("true") + True Token = rawToken("true") - // Deprecated: Use [github.com/go-json-experiment/json/jsontext.ObjectStart] instead. ObjectStart Token = rawToken("{") - // Deprecated: Use [github.com/go-json-experiment/json/jsontext.ObjectEnd] instead. - ObjectEnd Token = rawToken("}") - // Deprecated: Use [github.com/go-json-experiment/json/jsontext.ArrayStart] instead. - ArrayStart Token = rawToken("[") - // Deprecated: Use [github.com/go-json-experiment/json/jsontext.ArrayEnd] instead. - ArrayEnd Token = rawToken("]") + ObjectEnd Token = rawToken("}") + ArrayStart Token = rawToken("[") + ArrayEnd Token = rawToken("]") zeroString Token = rawToken(`""`) zeroNumber Token = rawToken(`0`) @@ -114,8 +107,6 @@ func rawToken(s string) Token { } // Bool constructs a Token representing a JSON boolean. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Bool] instead. func Bool(b bool) Token { if b { return True @@ -126,8 +117,6 @@ func Bool(b bool) Token { // String constructs a Token representing a JSON string. // The provided string should contain valid UTF-8, otherwise invalid characters // may be mangled as the Unicode replacement character. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.String] instead. func String(s string) Token { if len(s) == 0 { return zeroString @@ -138,8 +127,6 @@ func String(s string) Token { // Float constructs a Token representing a JSON number. // The values NaN, +Inf, and -Inf will be represented // as a JSON string with the values "NaN", "Infinity", and "-Infinity". -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Float] instead. func Float(n float64) Token { switch { case math.Float64bits(n) == 0: @@ -155,8 +142,6 @@ func Float(n float64) Token { } // Int constructs a Token representing a JSON number from an int64. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Int] instead. func Int(n int64) Token { if n == 0 { return zeroNumber @@ -165,8 +150,6 @@ func Int(n int64) Token { } // Uint constructs a Token representing a JSON number from a uint64. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Uint] instead. func Uint(n uint64) Token { if n == 0 { return zeroNumber @@ -203,7 +186,7 @@ func (t Token) Clone() Token { panic(invalidTokenPanic) } // TODO(https://go.dev/issue/45038): Use bytes.Clone. - buf := append([]byte(nil), raw.previousBuffer()...) + buf := append([]byte(nil), raw.PreviousBuffer()...) return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}} } return t @@ -224,20 +207,20 @@ func (t Token) Bool() bool { // appendString appends a JSON string to dst and returns it. // It panics if t is not a JSON string. -func (t Token) appendString(dst []byte, validateUTF8, preserveRaw bool, escape *escapeRunes) ([]byte, error) { +func (t Token) appendString(dst []byte, validateUTF8, preserveRaw bool, escape *jsonwire.EscapeRunes) ([]byte, error) { if raw := t.raw; raw != nil { // Handle raw string value. - buf := raw.previousBuffer() + buf := raw.PreviousBuffer() if Kind(buf[0]) == '"' { - if escape.escapeFunc == nil && consumeSimpleString(buf) == len(buf) { + if !escape.HasEscapeFunc() && jsonwire.ConsumeSimpleString(buf) == len(buf) { return append(dst, buf...), nil } - dst, _, err := reformatString(dst, buf, validateUTF8, preserveRaw, escape) + dst, _, err := jsonwire.ReformatString(dst, buf, validateUTF8, preserveRaw, escape) return dst, err } } else if len(t.str) != 0 && t.num == 0 { // Handle exact string value. - return appendString(dst, t.str, validateUTF8, escape) + return jsonwire.AppendQuote(dst, t.str, validateUTF8, escape) } panic("invalid JSON token kind: " + t.Kind().String()) @@ -261,11 +244,11 @@ func (t Token) string() (string, []byte) { if uint64(raw.previousOffsetStart()) != t.num { panic(invalidTokenPanic) } - buf := raw.previousBuffer() + buf := raw.PreviousBuffer() if buf[0] == '"' { - // TODO: Preserve valueFlags in Token? - isVerbatim := consumeSimpleString(buf) == len(buf) - return "", unescapeStringMayCopy(buf, isVerbatim) + // TODO: Preserve ValueFlags in Token? + isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf) + return "", jsonwire.UnquoteMayCopy(buf, isVerbatim) } // Handle tokens that are not JSON strings for fmt.Stringer. return "", buf @@ -277,7 +260,7 @@ func (t Token) string() (string, []byte) { if t.num > 0 { switch t.str[0] { case 'f': - return string(appendNumber(nil, math.Float64frombits(t.num), 64)), nil + return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil case 'i': return strconv.FormatInt(int64(t.num), 10), nil case 'u': @@ -292,19 +275,19 @@ func (t Token) string() (string, []byte) { func (t Token) appendNumber(dst []byte, canonicalize bool) ([]byte, error) { if raw := t.raw; raw != nil { // Handle raw number value. - buf := raw.previousBuffer() + buf := raw.PreviousBuffer() if Kind(buf[0]).normalize() == '0' { if !canonicalize { return append(dst, buf...), nil } - dst, _, err := reformatNumber(dst, buf, canonicalize) + dst, _, err := jsonwire.ReformatNumber(dst, buf, canonicalize) return dst, err } } else if t.num != 0 { // Handle exact number value. switch t.str[0] { case 'f': - return appendNumber(dst, math.Float64frombits(t.num), 64), nil + return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil case 'i': return strconv.AppendInt(dst, int64(t.num), 10), nil case 'u': @@ -325,9 +308,9 @@ func (t Token) Float() float64 { if uint64(raw.previousOffsetStart()) != t.num { panic(invalidTokenPanic) } - buf := raw.previousBuffer() + buf := raw.PreviousBuffer() if Kind(buf[0]).normalize() == '0' { - fv, _ := parseFloat(buf, 64) + fv, _ := jsonwire.ParseFloat(buf, 64) return fv } } else if t.num != 0 { @@ -369,11 +352,11 @@ func (t Token) Int() int64 { panic(invalidTokenPanic) } neg := false - buf := raw.previousBuffer() + buf := raw.PreviousBuffer() if len(buf) > 0 && buf[0] == '-' { neg, buf = true, buf[1:] } - if numAbs, ok := parseDecUint(buf); ok { + if numAbs, ok := jsonwire.ParseUint(buf); ok { if neg { if numAbs > -minInt64 { return minInt64 @@ -430,11 +413,11 @@ func (t Token) Uint() uint64 { panic(invalidTokenPanic) } neg := false - buf := raw.previousBuffer() + buf := raw.PreviousBuffer() if len(buf) > 0 && buf[0] == '-' { neg, buf = true, buf[1:] } - if num, ok := parseDecUint(buf); ok { + if num, ok := jsonwire.ParseUint(buf); ok { if neg { return minUint64 } @@ -502,8 +485,6 @@ func (t Token) Kind() Kind { // // An invalid kind is usually represented using 0, // but may be non-zero due to invalid JSON data. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Kind] instead. type Kind byte const invalidKind Kind = 0 @@ -530,7 +511,7 @@ func (k Kind) String() string { case ']': return "]" default: - return "" + return "" } } diff --git a/token_test.go b/jsontext/token_test.go similarity index 99% rename from token_test.go rename to jsontext/token_test.go index 513f2f9..457aefb 100644 --- a/token_test.go +++ b/jsontext/token_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "math" diff --git a/value.go b/jsontext/value.go similarity index 75% rename from value.go rename to jsontext/value.go index 39de272..4620bf4 100644 --- a/value.go +++ b/jsontext/value.go @@ -2,36 +2,35 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( "bytes" - "cmp" "errors" "io" "slices" "strings" "sync" - "unicode/utf16" - "unicode/utf8" "github.com/go-json-experiment/json/internal/jsonflags" + "github.com/go-json-experiment/json/internal/jsonwire" ) -// NOTE: RawValue is analogous to v1 json.RawMessage. +// NOTE: Value is analogous to v1 json.RawMessage. -// RawValue represents a single raw JSON value, which may be one of the following: +// Value represents a single raw JSON value, which may be one of the following: // - a JSON literal (i.e., null, true, or false) // - a JSON string (e.g., "hello, world!") // - a JSON number (e.g., 123.456) // - an entire JSON object (e.g., {"fizz":"buzz"} ) // - an entire JSON array (e.g., [1,2,3] ) // -// RawValue can represent entire array or object values, while Token cannot. -// RawValue may contain leading and/or trailing whitespace. -// -// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Value] instead. -type RawValue []byte +// Value can represent entire array or object values, while Token cannot. +// Value may contain leading and/or trailing whitespace. +type Value []byte + +// Deprecated: Use [Value] instead. +type RawValue = Value // Clone returns a copy of v. func (v RawValue) Clone() RawValue { @@ -136,7 +135,7 @@ func (v *RawValue) UnmarshalJSON(b []byte) error { // Kind returns the starting token kind. // For a valid value, this will never include '}' or ']'. func (v RawValue) Kind() Kind { - if v := v[consumeWhitespace(v):]; len(v) > 0 { + if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 { return Kind(v[0]).normalize() } return invalidKind @@ -146,7 +145,7 @@ func (v *RawValue) reformat(canonical, multiline bool, prefix, indent string) er // Write the entire value to reformat all tokens and whitespace. e := getBufferedEncoder() defer putBufferedEncoder(e) - eo := &e.options + eo := &e.s.Struct if canonical { eo.Flags.Set(jsonflags.AllowInvalidUTF8 | 0) // per RFC 8785, section 3.2.4 eo.Flags.Set(jsonflags.AllowDuplicateNames | 0) // per RFC 8785, section 3.1 @@ -158,10 +157,10 @@ func (v *RawValue) reformat(canonical, multiline bool, prefix, indent string) er eo.Flags.Set(jsonflags.Expand | 0) // per RFC 8785, section 3.2.1 } else { if s := strings.TrimLeft(prefix, " \t"); len(s) > 0 { - panic("json: invalid character " + quoteRune([]byte(s)) + " in indent prefix") + panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix") } if s := strings.TrimLeft(indent, " \t"); len(s) > 0 { - panic("json: invalid character " + quoteRune([]byte(s)) + " in indent") + panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent") } eo.Flags.Set(jsonflags.AllowInvalidUTF8 | 1) eo.Flags.Set(jsonflags.AllowDuplicateNames | 1) @@ -177,7 +176,7 @@ func (v *RawValue) reformat(canonical, multiline bool, prefix, indent string) er } } eo.Flags.Set(jsonflags.OmitTopLevelNewline | 1) - if err := e.WriteValue(*v); err != nil { + if err := e.s.WriteValue(*v); err != nil { return err } @@ -189,15 +188,15 @@ func (v *RawValue) reformat(canonical, multiline bool, prefix, indent string) er defer putBufferedEncoder(e2) // Disable redundant checks performed earlier during encoding. - d := getBufferedDecoder(e.buf) + d := getBufferedDecoder(e.s.Buf) defer putBufferedDecoder(d) - d.options.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) - reorderObjects(d, &e2.buf) // per RFC 8785, section 3.2.3 + d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) + reorderObjects(d, &e2.s.Buf) // per RFC 8785, section 3.2.3 } // Store the result back into the value if different. - if !bytes.Equal(*v, e.buf) { - *v = append((*v)[:0], e.buf...) + if !bytes.Equal(*v, e.s.Buf) { + *v = append((*v)[:0], e.s.Buf...) } return nil } @@ -251,14 +250,14 @@ func reorderObjects(d *Decoder, scratch *[]byte) { beforeBody := d.InputOffset() // offset after '{' for d.PeekKind() != '}' { beforeName := d.InputOffset() - var flags valueFlags - name, _ := d.readValue(&flags) - name = unescapeStringMayCopy(name, flags.isVerbatim()) + var flags jsonwire.ValueFlags + name, _ := d.s.ReadValue(&flags) + name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim()) reorderObjects(d, scratch) afterValue := d.InputOffset() if isSorted && len(*members) > 0 { - isSorted = compareUTF16(prevName, []byte(name)) < 0 + isSorted = jsonwire.CompareUTF16(prevName, []byte(name)) < 0 } *members = append(*members, memberName{name, beforeName, afterValue}) prevName = name @@ -271,7 +270,7 @@ func reorderObjects(d *Decoder, scratch *[]byte) { return } slices.SortFunc(*members, func(x, y memberName) int { - return compareUTF16(x.name, y.name) + return jsonwire.CompareUTF16(x.name, y.name) }) // Append the reordered members to a new buffer, @@ -284,10 +283,10 @@ func reorderObjects(d *Decoder, scratch *[]byte) { // sum([m.after-m.before for m in members]) == afterBody-beforeBody sorted := (*scratch)[:0] for i, member := range *members { - if d.buf[member.before] == ',' { + if d.s.buf[member.before] == ',' { member.before++ // trim leading comma } - sorted = append(sorted, d.buf[member.before:member.after]...) + sorted = append(sorted, d.s.buf[member.before:member.after]...) if i < len(*members)-1 { sorted = append(sorted, ',') // append trailing comma } @@ -295,7 +294,7 @@ func reorderObjects(d *Decoder, scratch *[]byte) { if int(afterBody-beforeBody) != len(sorted) { panic("BUG: length invariant violated") } - copy(d.buf[beforeBody:afterBody], sorted) + copy(d.s.buf[beforeBody:afterBody], sorted) // Update scratch buffer to the largest amount ever used. if len(sorted) > len(*scratch) { @@ -308,59 +307,3 @@ func reorderObjects(d *Decoder, scratch *[]byte) { d.ReadToken() } } - -// compareUTF16 lexicographically compares x to y according -// to the UTF-16 codepoints of the UTF-8 encoded input strings. -// This implements the ordering specified in RFC 8785, section 3.2.3. -// The inputs must be valid UTF-8, otherwise this may panic. -func compareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int { - // NOTE: This is an optimized, allocation-free implementation - // of lessUTF16Simple in fuzz_test.go. FuzzLessUTF16 verifies that the - // two implementations agree on the result of comparing any two strings. - - isUTF16Self := func(r rune) bool { - return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF') - } - - var invalidUTF8 bool - x0, y0 := x, y - for { - if len(x) == 0 || len(y) == 0 { - if len(x) == len(y) && invalidUTF8 { - return strings.Compare(string(x0), string(y0)) - } - return cmp.Compare(len(x), len(y)) - } - - // ASCII fast-path. - if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf { - if x[0] != y[0] { - return cmp.Compare(x[0], y[0]) - } - x, y = x[1:], y[1:] - continue - } - - // Decode next pair of runes as UTF-8. - rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x))) - ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y))) - - selfx := isUTF16Self(rx) - selfy := isUTF16Self(ry) - switch { - // The x rune is a single UTF-16 codepoint, while - // the y rune is a surrogate pair of UTF-16 codepoints. - case selfx && !selfy: - ry, _ = utf16.EncodeRune(ry) - // The y rune is a single UTF-16 codepoint, while - // the x rune is a surrogate pair of UTF-16 codepoints. - case selfy && !selfx: - rx, _ = utf16.EncodeRune(rx) - } - if rx != ry { - return cmp.Compare(rx, ry) - } - invalidUTF8 = invalidUTF8 || (rx == utf8.RuneError && nx == 1) || (ry == utf8.RuneError && ny == 1) - x, y = x[nx:], y[ny:] - } -} diff --git a/value_test.go b/jsontext/value_test.go similarity index 83% rename from value_test.go rename to jsontext/value_test.go index aef5de9..41e9029 100644 --- a/value_test.go +++ b/jsontext/value_test.go @@ -2,17 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package json +package jsontext import ( - "bytes" - "cmp" "io" "reflect" - "slices" "strings" "testing" - "unicode/utf16" "github.com/go-json-experiment/json/internal/jsontest" ) @@ -117,7 +113,7 @@ var rawValueTestdata = append(func() (out []rawValueTestdataEntry) { in: `"\ud800"`, wantValid: false, // uses RFC 7493 as the definition; which validates UTF-8 wantCompacted: `"\ud800"`, - wantCanonicalizeErr: newInvalidEscapeSequenceError(`\ud800"`).withOffset(len64(`"`)), + wantCanonicalizeErr: &SyntacticError{str: "invalid surrogate pair `\\ud800\"` within string", ByteOffset: len64(`"`)}, }, { name: jsontest.Name("UppercaseEscaped"), in: `"\u000B"`, @@ -199,46 +195,3 @@ func TestRawValueMethods(t *testing.T) { }) } } - -var compareUTF16Testdata = []string{"", "\r", "1", "\u0080", "\u00f6", "\u20ac", "\U0001f600", "\ufb33"} - -func TestCompareUTF16(t *testing.T) { - for i, si := range compareUTF16Testdata { - for j, sj := range compareUTF16Testdata { - got := compareUTF16([]byte(si), []byte(sj)) - want := cmp.Compare(i, j) - if got != want { - t.Errorf("compareUTF16(%q, %q) = %v, want %v", si, sj, got, want) - } - } - } -} - -func FuzzCompareUTF16(f *testing.F) { - for _, td1 := range compareUTF16Testdata { - for _, td2 := range compareUTF16Testdata { - f.Add([]byte(td1), []byte(td2)) - } - } - - // compareUTF16Simple is identical to compareUTF16, - // but relies on naively converting a string to a []uint16 codepoints. - // It is easy to verify as correct, but is slow. - compareUTF16Simple := func(x, y []byte) int { - ux := utf16.Encode([]rune(string(x))) - uy := utf16.Encode([]rune(string(y))) - if n := slices.Compare(ux, uy); n != 0 { - return n - } - return bytes.Compare(x, y) // only occurs for strings with invalid UTF-8 - } - - f.Fuzz(func(t *testing.T, s1, s2 []byte) { - // Compare the optimized and simplified implementations. - got := compareUTF16(s1, s2) - want := compareUTF16Simple(s1, s2) - if got != want { - t.Errorf("compareUTF16(%q, %q) = %v, want %v", s1, s2, got, want) - } - }) -} diff --git a/text.go b/text.go new file mode 100644 index 0000000..ff62fee --- /dev/null +++ b/text.go @@ -0,0 +1,105 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "io" + + "github.com/go-json-experiment/json/jsontext" +) + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Encoder] instead. +type Encoder = jsontext.Encoder + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Decoder] instead. +type Decoder = jsontext.Decoder + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Kind] instead. +type Kind = jsontext.Kind + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Token] instead. +type Token = jsontext.Token + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Value] instead. +type RawValue = jsontext.Value + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.SyntacticError] instead. +type SyntacticError = jsontext.SyntacticError + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.AppendQuote] instead. +func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { + return jsontext.AppendQuote(dst, src) +} + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.AppendUnquote] instead. +func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { + return jsontext.AppendUnquote(dst, src) +} + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.NewEncoder] instead. +func NewEncoder(w io.Writer, opts ...Options) *Encoder { return jsontext.NewEncoder(w, opts...) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.NewDecoder] instead. +func NewDecoder(r io.Reader, opts ...Options) *Decoder { return jsontext.NewDecoder(r, opts...) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.AllowDuplicateNames] instead. +func AllowDuplicateNames(v bool) Options { return jsontext.AllowDuplicateNames(v) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.AllowInvalidUTF8] instead. +func AllowInvalidUTF8(v bool) Options { return jsontext.AllowInvalidUTF8(v) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.EscapeForHTML] instead. +func EscapeForHTML(v bool) Options { return jsontext.EscapeForHTML(v) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.EscapeForJS] instead. +func EscapeForJS(v bool) Options { return jsontext.EscapeForJS(v) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.WithEscapeFunc] instead. +func WithEscapeFunc(fn func(rune) bool) Options { return jsontext.WithEscapeFunc(fn) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Expand] instead. +func Expand(v bool) Options { return jsontext.Expand(v) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.WithIndent] instead. +func WithIndent(indent string) Options { return jsontext.WithIndent(indent) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.WithIndentPrefix] instead. +func WithIndentPrefix(prefix string) Options { return jsontext.WithIndentPrefix(prefix) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Null] instead. +var Null Token = jsontext.Null + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.False] instead. +var False Token = jsontext.False + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.True] instead. +var True Token = jsontext.True + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.ObjectStart] instead. +var ObjectStart Token = jsontext.ObjectStart + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.ObjectEnd] instead. +var ObjectEnd Token = jsontext.ObjectEnd + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.ArrayStart] instead. +var ArrayStart Token = jsontext.ArrayStart + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.ArrayEnd] instead. +var ArrayEnd Token = jsontext.ArrayEnd + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Bool] instead. +func Bool(b bool) Token { return jsontext.Bool(b) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Float] instead. +func Float(n float64) Token { return jsontext.Float(n) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Int] instead. +func Int(n int64) Token { return jsontext.Int(n) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.String] instead. +func String(s string) Token { return jsontext.String(s) } + +// Deprecated: Use [github.com/go-json-experiment/json/jsontext.Uint] instead. +func Uint(n uint64) Token { return jsontext.Uint(n) }