diff --git a/v2/bench_test.go b/v2/bench_test.go index e413180b..2f7dcc63 100644 --- a/v2/bench_test.go +++ b/v2/bench_test.go @@ -108,7 +108,11 @@ func BenchmarkExtractV1UsingReader(b *testing.B) { if err != nil { b.Fatal(err) } - _, err = io.Copy(dst, reader.DataReader()) + dr, err := reader.DataReader() + if err != nil { + b.Fatal(err) + } + _, err = io.Copy(dst, dr) if err != nil { b.Fatal(err) } diff --git a/v2/blockstore/readonly.go b/v2/blockstore/readonly.go index 307486d7..32c49046 100644 --- a/v2/blockstore/readonly.go +++ b/v2/blockstore/readonly.go @@ -120,15 +120,28 @@ func NewReadOnly(backing io.ReaderAt, idx index.Index, opts ...carv2.Option) (*R } if idx == nil { if v2r.Header.HasIndex() { - idx, err = index.ReadFrom(v2r.IndexReader()) + ir, err := v2r.IndexReader() if err != nil { return nil, err } - } else if idx, err = generateIndex(v2r.DataReader(), opts...); err != nil { - return nil, err + idx, err = index.ReadFrom(ir) + if err != nil { + return nil, err + } + } else { + dr, err := v2r.DataReader() + if err != nil { + return nil, err + } + if idx, err = generateIndex(dr, opts...); err != nil { + return nil, err + } } } - b.backing = v2r.DataReader() + b.backing, err = v2r.DataReader() + if err != nil { + return nil, err + } b.idx = idx return b, nil default: @@ -142,7 +155,11 @@ func readVersion(at io.ReaderAt, opts ...carv2.Option) (uint64, error) { case io.Reader: rr = r default: - rr = internalio.NewOffsetReadSeeker(r, 0) + var err error + rr, err = internalio.NewOffsetReadSeeker(r, 0) + if err != nil { + return 0, err + } } return carv2.ReadVersion(rr, opts...) } @@ -157,7 +174,11 @@ func generateIndex(at io.ReaderAt, opts ...carv2.Option) (index.Index, error) { return nil, err } default: - rs = internalio.NewOffsetReadSeeker(r, 0) + var err error + rs, err = internalio.NewOffsetReadSeeker(r, 0) + if err != nil { + return nil, err + } } // Note, we do not set any write options so that all write options fall back onto defaults. @@ -183,7 +204,7 @@ func OpenReadOnly(path string, opts ...carv2.Option) (*ReadOnly, error) { } func (b *ReadOnly) readBlock(idx int64) (cid.Cid, []byte, error) { - r, err := internalio.NewOffsetReadSeekerWithError(b.backing, idx) + r, err := internalio.NewOffsetReadSeeker(b.backing, idx) if err != nil { return cid.Cid{}, nil, err } @@ -216,8 +237,11 @@ func (b *ReadOnly) Has(ctx context.Context, key cid.Cid) (bool, error) { var fnFound bool var fnErr error err := b.idx.GetAll(key, func(offset uint64) bool { - uar := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) - var err error + uar, err := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) + if err != nil { + fnErr = err + return false + } _, err = varint.ReadUvarint(uar) if err != nil { fnErr = err @@ -317,7 +341,11 @@ func (b *ReadOnly) GetSize(ctx context.Context, key cid.Cid) (int, error) { fnSize := -1 var fnErr error err := b.idx.GetAll(key, func(offset uint64) bool { - rdr := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) + rdr, err := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) + if err != nil { + fnErr = err + return false + } sectionLen, err := varint.ReadUvarint(rdr) if err != nil { fnErr = err @@ -401,7 +429,10 @@ func (b *ReadOnly) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { } // TODO we may use this walk for populating the index, and we need to be able to iterate keys in this way somewhere for index generation. In general though, when it's asked for all keys from a blockstore with an index, we should iterate through the index when possible rather than linear reads through the full car. - rdr := internalio.NewOffsetReadSeeker(b.backing, 0) + rdr, err := internalio.NewOffsetReadSeeker(b.backing, 0) + if err != nil { + return nil, err + } header, err := carv1.ReadHeader(rdr, b.opts.MaxAllowedHeaderSize) if err != nil { b.mu.RUnlock() // don't hold the mutex forever @@ -492,7 +523,11 @@ func (b *ReadOnly) HashOnRead(bool) { // Roots returns the root CIDs of the backing CAR. func (b *ReadOnly) Roots() ([]cid.Cid, error) { - header, err := carv1.ReadHeader(internalio.NewOffsetReadSeeker(b.backing, 0), b.opts.MaxAllowedHeaderSize) + ors, err := internalio.NewOffsetReadSeeker(b.backing, 0) + if err != nil { + return nil, err + } + header, err := carv1.ReadHeader(ors, b.opts.MaxAllowedHeaderSize) if err != nil { return nil, fmt.Errorf("error reading car header: %w", err) } diff --git a/v2/blockstore/readonly_test.go b/v2/blockstore/readonly_test.go index f55a1e50..5a947a75 100644 --- a/v2/blockstore/readonly_test.go +++ b/v2/blockstore/readonly_test.go @@ -226,7 +226,9 @@ func newV1ReaderFromV2File(t *testing.T, carv2Path string, zeroLenSectionAsEOF b t.Cleanup(func() { f.Close() }) v2r, err := carv2.NewReader(f) require.NoError(t, err) - v1r, err := newV1Reader(v2r.DataReader(), zeroLenSectionAsEOF) + dr, err := v2r.DataReader() + require.NoError(t, err) + v1r, err := newV1Reader(dr, zeroLenSectionAsEOF) require.NoError(t, err) return v1r } diff --git a/v2/blockstore/readwrite.go b/v2/blockstore/readwrite.go index 0fad9893..774f37ff 100644 --- a/v2/blockstore/readwrite.go +++ b/v2/blockstore/readwrite.go @@ -139,7 +139,7 @@ func OpenReadWrite(path string, roots []cid.Cid, opts ...carv2.Option) (*ReadWri offset = 0 } rwbs.dataWriter = internalio.NewOffsetWriter(rwbs.f, offset) - v1r, err := internalio.NewOffsetReadSeekerWithError(rwbs.f, offset) + v1r, err := internalio.NewOffsetReadSeeker(rwbs.f, offset) if err != nil { return nil, err } @@ -169,11 +169,11 @@ func (b *ReadWrite) initWithRoots(v2 bool, roots []cid.Cid) error { return carv1.WriteHeader(&carv1.CarHeader{Roots: roots, Version: 1}, b.dataWriter) } -func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid, opts ...carv2.Option) error { +func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid) error { // On resumption it is expected that the CARv2 Pragma, and the CARv1 header is successfully written. // Otherwise we cannot resume from the file. // Read pragma to assert if b.f is indeed a CARv2. - version, err := carv2.ReadVersion(b.f, opts...) + version, err := carv2.ReadVersion(b.f) if err != nil { // The file is not a valid CAR file and cannot resume from it. // Or the write must have failed before pragma was written. @@ -193,7 +193,7 @@ func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid, opts ...carv2.Opti // Check if file was finalized by trying to read the CARv2 header. // We check because if finalized the CARv1 reader behaviour needs to be adjusted since // EOF will not signify end of CARv1 payload. i.e. index is most likely present. - r, err := internalio.NewOffsetReadSeekerWithError(b.f, carv2.PragmaSize) + r, err := internalio.NewOffsetReadSeeker(b.f, carv2.PragmaSize) if err != nil { return err } @@ -223,7 +223,10 @@ func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid, opts ...carv2.Opti } // Use the given CARv1 padding to instantiate the CARv1 reader on file. - v1r := internalio.NewOffsetReadSeeker(b.ronly.backing, 0) + v1r, err := internalio.NewOffsetReadSeeker(b.ronly.backing, 0) + if err != nil { + return err + } header, err := carv1.ReadHeader(v1r, b.opts.MaxAllowedHeaderSize) if err != nil { // Cannot read the CARv1 header; the file is most likely corrupt. diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index 6f64ac72..11cc99a2 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -21,7 +21,6 @@ import ( carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/blockstore" "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" @@ -488,7 +487,9 @@ func TestBlockstoreResumption(t *testing.T) { wantPayloadReader, err := carv1.NewCarReader(v1f) require.NoError(t, err) - gotPayloadReader, err := carv1.NewCarReader(v2r.DataReader()) + dr, err := v2r.DataReader() + require.NoError(t, err) + gotPayloadReader, err := carv1.NewCarReader(dr) require.NoError(t, err) require.Equal(t, wantPayloadReader.Header, gotPayloadReader.Header) @@ -516,11 +517,15 @@ func TestBlockstoreResumption(t *testing.T) { // Assert index in resumed from file is identical to index generated from the data payload portion of the generated CARv2 file. _, err = v1f.Seek(0, io.SeekStart) require.NoError(t, err) - gotIdx, err := index.ReadFrom(v2r.IndexReader()) + ir, err := v2r.IndexReader() + require.NoError(t, err) + gotIdx, err := index.ReadFrom(ir) require.NoError(t, err) - wantIdx, err := carv2.GenerateIndex(v2r.DataReader()) + dr, err = v2r.DataReader() require.NoError(t, err) - testutil.AssertIdenticalIndexes(t, wantIdx, gotIdx) + wantIdx, err := carv2.GenerateIndex(dr) + require.NoError(t, err) + require.Equal(t, wantIdx, gotIdx) } func TestBlockstoreResumptionIsSupportedOnFinalizedFile(t *testing.T) { @@ -829,29 +834,37 @@ func TestOpenReadWrite_WritesIdentityCIDsWhenOptionIsEnabled(t *testing.T) { t.Cleanup(func() { require.NoError(t, r.Close()) }) require.True(t, r.Header.HasIndex()) - ir := r.IndexReader() + ir, err := r.IndexReader() + require.NoError(t, err) require.NotNil(t, ir) gotIdx, err := index.ReadFrom(ir) require.NoError(t, err) // Determine expected offset as the length of header plus one - header, err := carv1.ReadHeader(r.DataReader(), carv1.DefaultMaxAllowedHeaderSize) + dr, err := r.DataReader() + require.NoError(t, err) + header, err := carv1.ReadHeader(dr, carv1.DefaultMaxAllowedHeaderSize) require.NoError(t, err) object, err := cbor.DumpObject(header) require.NoError(t, err) expectedOffset := len(object) + 1 // Assert index is iterable and has exactly one record with expected multihash and offset. - var count int - err = gotIdx.ForEach(func(mh multihash.Multihash, offset uint64) error { - count++ - require.Equal(t, idmh, mh) - require.Equal(t, uint64(expectedOffset), offset) - return nil - }) - require.NoError(t, err) - require.Equal(t, 1, count) + switch idx := gotIdx.(type) { + case index.IterableIndex: + var i int + err := idx.ForEach(func(mh multihash.Multihash, offset uint64) error { + i++ + require.Equal(t, idmh, mh) + require.Equal(t, uint64(expectedOffset), offset) + return nil + }) + require.NoError(t, err) + require.Equal(t, 1, i) + default: + require.Failf(t, "unexpected index type", "wanted %v but got %v", multicodec.CarMultihashIndexSorted, idx.Codec()) + } } func TestOpenReadWrite_ErrorsWhenWritingTooLargeOfACid(t *testing.T) { @@ -914,7 +927,9 @@ func TestReadWrite_ReWritingCARv1WithIdentityCidIsIdenticalToOriginalWithOptions // Note, we hash instead of comparing bytes to avoid excessive memory usage when sample CARv1 is large. hasher := sha512.New() - gotWritten, err := io.Copy(hasher, v2r.DataReader()) + dr, err := v2r.DataReader() + require.NoError(t, err) + gotWritten, err := io.Copy(hasher, dr) require.NoError(t, err) gotSum := hasher.Sum(nil) diff --git a/v2/example_test.go b/v2/example_test.go index 53dfa349..57378aea 100644 --- a/v2/example_test.go +++ b/v2/example_test.go @@ -51,7 +51,11 @@ func ExampleWrapV1File() { if err != nil { panic(err) } - inner, err := ioutil.ReadAll(cr.DataReader()) + dr, err := cr.DataReader() + if err != nil { + panic(err) + } + inner, err := ioutil.ReadAll(dr) if err != nil { panic(err) } diff --git a/v2/fuzz_test.go b/v2/fuzz_test.go index c7473750..c45d83cb 100644 --- a/v2/fuzz_test.go +++ b/v2/fuzz_test.go @@ -12,7 +12,6 @@ import ( "testing" car "github.com/ipld/go-car/v2" - "github.com/ipld/go-car/v2/index" "github.com/ipld/go-car/v2/internal/carv1" ) @@ -73,48 +72,15 @@ func FuzzReader(f *testing.F) { } subject.Roots() - ir := subject.IndexReader() - if ir != nil { - index.ReadFrom(ir) + _, err = subject.IndexReader() + if err != nil { + return } - car.GenerateIndex(subject.DataReader()) - }) -} - -func FuzzIndex(f *testing.F) { - files, err := filepath.Glob("testdata/*.car") - if err != nil { - f.Fatal(err) - } - for _, fname := range files { - func() { - file, err := os.Open(fname) - if err != nil { - f.Fatal(err) - } - defer file.Close() - subject, err := car.NewReader(file) - if err != nil { - return - } - indexRdr := subject.IndexReader() - if indexRdr == nil { - return - } - _, n, err := index.ReadFromWithSize(indexRdr) - if err != nil { - return - } - data, err := io.ReadAll(io.NewSectionReader(indexRdr, 0, n)) - if err != nil { - f.Fatal(err) - } - f.Add(data) - }() - } - - f.Fuzz(func(t *testing.T, data []byte) { - index.ReadFrom(bytes.NewReader(data)) + dr, err := subject.DataReader() + if err != nil { + return + } + car.GenerateIndex(dr) }) } @@ -137,15 +103,18 @@ func FuzzInspect(f *testing.F) { if err != nil { t.Fatal("second NewReader on same data failed", err.Error()) } - - if i := reader.IndexReader(); i != nil { - _, err = index.ReadFrom(i) - if err != nil { - return - } + i, err := reader.IndexReader() + if err != nil { + return + } + // FIXME: Once indexes are safe to parse, do not skip .car with index in the differential fuzzing. + if i == nil { + return + } + dr, err := reader.DataReader() + if err != nil { + return } - - dr := reader.DataReader() _, err = carv1.ReadHeader(dr, carv1.DefaultMaxAllowedHeaderSize) if err != nil { diff --git a/v2/index/example_test.go b/v2/index/example_test.go index c6f83ea2..d2a9da54 100644 --- a/v2/index/example_test.go +++ b/v2/index/example_test.go @@ -5,10 +5,10 @@ import ( "io" "io/ioutil" "os" + "reflect" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" - "github.com/multiformats/go-multihash" ) // ExampleReadFrom unmarshalls an index from an indexed CARv2 file, and for each root CID prints the @@ -28,7 +28,11 @@ func ExampleReadFrom() { } // Read and unmarshall index within CARv2 file. - idx, err := index.ReadFrom(cr.IndexReader()) + ir, err := cr.IndexReader() + if err != nil { + panic(err) + } + idx, err := index.ReadFrom(ir) if err != nil { panic(err) } @@ -62,7 +66,11 @@ func ExampleWriteTo() { }() // Read and unmarshall index within CARv2 file. - idx, err := index.ReadFrom(cr.IndexReader()) + ir, err := cr.IndexReader() + if err != nil { + panic(err) + } + idx, err := index.ReadFrom(ir) if err != nil { panic(err) } @@ -94,27 +102,13 @@ func ExampleWriteTo() { panic(err) } - // Expect indices to be equal - collect all of the multihashes and their - // offsets from the first and compare to the second - mha := make(map[string]uint64, 0) - _ = idx.ForEach(func(mh multihash.Multihash, off uint64) error { - mha[mh.HexString()] = off - return nil - }) - var count int - _ = reReadIdx.ForEach(func(mh multihash.Multihash, off uint64) error { - count++ - if expectedOffset, ok := mha[mh.HexString()]; !ok || expectedOffset != off { - panic("expected to get the same index as the CARv2 file") - } - return nil - }) - if count != len(mha) { + // Expect indices to be equal. + if reflect.DeepEqual(idx, reReadIdx) { + fmt.Printf("Saved index file matches the index embedded in CARv2 at %v.\n", src) + } else { panic("expected to get the same index as the CARv2 file") } - fmt.Printf("Saved index file matches the index embedded in CARv2 at %v.\n", src) - // Output: // Saved index file matches the index embedded in CARv2 at ../testdata/sample-wrapped-v2.car. } diff --git a/v2/index/index.go b/v2/index/index.go index 3a2b3f1d..911eaa7a 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -44,17 +44,12 @@ type ( Marshal(w io.Writer) (uint64, error) // Unmarshal decodes the index from its serial form. - // Deprecated: This function is slurpy and will copy everything into memory. + // Note, this function will copy the entire index into memory. + // + // Do not unmarshal index from untrusted CARv2 files. Instead the index should be + // regenerated from the CARv2 data payload. Unmarshal(r io.Reader) error - // UnmarshalLazyRead lazily decodes the index from its serial form. It is a - // safer alternative to to Unmarshal, particularly when reading index data - // from untrusted sources (which is not recommended) but also in more - // constrained memory environments. - // Instead of slurping UnmarshalLazyRead will keep a reference to the the - // io.ReaderAt passed in and ask for data as needed. - UnmarshalLazyRead(r io.ReaderAt) (indexSize int64, err error) - // Load inserts a number of records into the index. // Note that Index will load all given records. Any filtering of the records such as // exclusion of CIDs with multihash.IDENTITY code must occur prior to calling this function. @@ -74,17 +69,17 @@ type ( // meaning that no callbacks happen, // ErrNotFound is returned. GetAll(cid.Cid, func(uint64) bool) error + } + + // IterableIndex is an index which support iterating over it's elements + IterableIndex interface { + Index // ForEach takes a callback function that will be called // on each entry in the index. The arguments to the callback are // the multihash of the element, and the offset in the car file // where the element appears. // - // Note that index with codec multicodec.CarIndexSorted does not support ForEach enumeration. - // Because this index type only contains the multihash digest and not the code. - // Calling ForEach on this index type will result in error. - // Use multicodec.CarMultihashIndexSorted index type instead. - // // If the callback returns a non-nil error, the iteration is aborted, // and the ForEach function returns the error to the user. // @@ -94,12 +89,6 @@ type ( // The order of calls to the given function is deterministic, but entirely index-specific. ForEach(func(multihash.Multihash, uint64) error) error } - - // IterableIndex is an index which support iterating over it's elements - // Deprecated: IterableIndex has been moved into Index. Just use Index now. - IterableIndex interface { - Index - } ) // GetFirst is a wrapper over Index.GetAll, returning the offset for the first @@ -143,32 +132,29 @@ func WriteTo(idx Index, w io.Writer) (uint64, error) { // ReadFrom reads index from r. // The reader decodes the index by reading the first byte to interpret the encoding. // Returns error if the encoding is not known. +// // Attempting to read index data from untrusted sources is not recommended. -func ReadFrom(r io.ReaderAt) (Index, error) { - idx, _, err := ReadFromWithSize(r) - return idx, err -} - -// ReadFromWithSize is just like ReadFrom but return the size of the Index. -// The size is only valid when err != nil. -// Attempting to read index data from untrusted sources is not recommended. -func ReadFromWithSize(r io.ReaderAt) (Index, int64, error) { - code, err := varint.ReadUvarint(internalio.NewOffsetReadSeeker(r, 0)) +// Instead the index should be regenerated from the CARv2 data payload. +func ReadFrom(r io.Reader) (Index, error) { + codec, err := ReadCodec(r) if err != nil { - return nil, 0, err + return nil, err } - codec := multicodec.Code(code) idx, err := New(codec) if err != nil { - return nil, 0, err + return nil, err } - rdr, err := internalio.NewOffsetReadSeekerWithError(r, int64(varint.UvarintSize(code))) - if err != nil { - return nil, 0, err + if err := idx.Unmarshal(r); err != nil { + return nil, err } - n, err := idx.UnmarshalLazyRead(rdr) + return idx, nil +} + +// ReadCodec reads the codec of the index by decoding the first varint read from r. +func ReadCodec(r io.Reader) (multicodec.Code, error) { + code, err := varint.ReadUvarint(internalio.ToByteReader(r)) if err != nil { - return nil, 0, err + return 0, err } - return idx, n, nil + return multicodec.Code(code), nil } diff --git a/v2/index/index_test.go b/v2/index/index_test.go index 0d380caf..972b4c66 100644 --- a/v2/index/index_test.go +++ b/v2/index/index_test.go @@ -8,7 +8,6 @@ import ( "testing" blocks "github.com/ipfs/go-block-format" - "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/ipld/go-car/v2/internal/carv1/util" "github.com/multiformats/go-multicodec" @@ -55,6 +54,9 @@ func TestReadFrom(t *testing.T) { subject, err := ReadFrom(idxf) require.NoError(t, err) + _, err = idxf.Seek(0, io.SeekStart) + require.NoError(t, err) + idxf2, err := os.Open("../testdata/sample-multihash-index-sorted.carindex") require.NoError(t, err) t.Cleanup(func() { require.NoError(t, idxf2.Close()) }) @@ -126,7 +128,7 @@ func TestWriteTo(t *testing.T) { require.NoError(t, err) // Assert they are equal - testutil.AssertIdenticalIndexes(t, wantIdx, gotIdx) + require.Equal(t, wantIdx, gotIdx) } func TestMarshalledIndexStartsWithCodec(t *testing.T) { diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index 60d0e87d..aeed4c11 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -5,22 +5,16 @@ import ( "encoding/binary" "errors" "fmt" + internalio "github.com/ipld/go-car/v2/internal/io" "io" "sort" - "github.com/ipld/go-car/v2/internal/errsort" - internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" "github.com/ipfs/go-cid" "github.com/multiformats/go-multihash" ) -type sizedReaderAt interface { - io.ReaderAt - Size() int64 -} - var _ Index = (*multiWidthIndex)(nil) type ( @@ -32,7 +26,7 @@ type ( singleWidthIndex struct { width uint32 len uint64 // in struct, len is #items. when marshaled, it's saved as #bytes. - index sizedReaderAt + index []byte } multiWidthIndex map[uint32]singleWidthIndex ) @@ -60,24 +54,27 @@ func (s *singleWidthIndex) Marshal(w io.Writer) (uint64, error) { return 0, err } l += 4 - sz := s.index.Size() - if err := binary.Write(w, binary.LittleEndian, sz); err != nil { + if err := binary.Write(w, binary.LittleEndian, int64(len(s.index))); err != nil { return l, err } l += 8 - n, err := io.Copy(w, io.NewSectionReader(s.index, 0, sz)) + n, err := w.Write(s.index) return l + uint64(n), err } -// Unmarshal decodes the index from its serial form. -// Deprecated: This function is slurpy and will copy the index in memory. func (s *singleWidthIndex) Unmarshal(r io.Reader) error { var width uint32 if err := binary.Read(r, binary.LittleEndian, &width); err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF + } return err } var dataLen uint64 if err := binary.Read(r, binary.LittleEndian, &dataLen); err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF + } return err } @@ -89,26 +86,10 @@ func (s *singleWidthIndex) Unmarshal(r io.Reader) error { if _, err := io.ReadFull(r, buf); err != nil { return err } - s.index = bytes.NewReader(buf) + s.index = buf return nil } -func (s *singleWidthIndex) UnmarshalLazyRead(r io.ReaderAt) (indexSize int64, err error) { - var b [12]byte - _, err = internalio.FullReadAt(r, b[:], 0) - if err != nil { - return 0, err - } - - width := binary.LittleEndian.Uint32(b[:4]) - dataLen := binary.LittleEndian.Uint64(b[4:12]) - if err := s.checkUnmarshalLengths(width, dataLen, uint64(len(b))); err != nil { - return 0, err - } - s.index = io.NewSectionReader(r, int64(len(b)), int64(dataLen)) - return int64(dataLen) + int64(len(b)), nil -} - func (s *singleWidthIndex) checkUnmarshalLengths(width uint32, dataLen, extra uint64) error { if width <= 8 { return errors.New("malformed index; width must be bigger than 8") @@ -129,6 +110,10 @@ func (s *singleWidthIndex) checkUnmarshalLengths(width uint32, dataLen, extra ui return nil } +func (s *singleWidthIndex) Less(i int, digest []byte) bool { + return bytes.Compare(digest[:], s.index[i*int(s.width):((i+1)*int(s.width)-8)]) <= 0 +} + func (s *singleWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { d, err := multihash.Decode(c.Hash()) if err != nil { @@ -138,35 +123,18 @@ func (s *singleWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { } func (s *singleWidthIndex) getAll(d []byte, fn func(uint64) bool) error { - digestLen := int64(s.width) - 8 - b := make([]byte, digestLen) - idxI, err := errsort.Search(int(s.len), func(i int) (bool, error) { - digestStart := int64(i) * int64(s.width) - _, err := internalio.FullReadAt(s.index, b, digestStart) - if err != nil { - return false, err - } - return bytes.Compare(d, b) <= 0, nil + idx := sort.Search(int(s.len), func(i int) bool { + return s.Less(i, d) }) - if err != nil { - return err - } - idx := int64(idxI) var any bool for ; uint64(idx) < s.len; idx++ { - digestStart := idx * int64(s.width) - offsetEnd := digestStart + int64(s.width) + digestStart := idx * int(s.width) + offsetEnd := (idx + 1) * int(s.width) digestEnd := offsetEnd - 8 - digestLen := digestEnd - digestStart - b := make([]byte, offsetEnd-digestStart) - _, err := internalio.FullReadAt(s.index, b, digestStart) - if err != nil { - return err - } - if bytes.Equal(d, b[:digestLen]) { + if bytes.Equal(d[:], s.index[digestStart:digestEnd]) { any = true - offset := binary.LittleEndian.Uint64(b[digestLen:]) + offset := binary.LittleEndian.Uint64(s.index[digestEnd:offsetEnd]) if !fn(offset) { // User signalled to stop searching; therefore, break. break @@ -200,19 +168,13 @@ func (s *singleWidthIndex) Load(items []Record) error { } func (s *singleWidthIndex) forEachDigest(f func(digest []byte, offset uint64) error) error { - segmentCount := s.index.Size() / int64(s.width) - for i := int64(0); i < segmentCount; i++ { - digestStart := i * int64(s.width) - offsetEnd := digestStart + int64(s.width) + segmentCount := len(s.index) / int(s.width) + for i := 0; i < segmentCount; i++ { + digestStart := i * int(s.width) + offsetEnd := (i + 1) * int(s.width) digestEnd := offsetEnd - 8 - digestLen := digestEnd - digestStart - b := make([]byte, offsetEnd-digestStart) - _, err := internalio.FullReadAt(s.index, b, digestStart) - if err != nil { - return err - } - digest := b[:digestLen] - offset := binary.LittleEndian.Uint64(b[digestLen:]) + digest := s.index[digestStart:digestEnd] + offset := binary.LittleEndian.Uint64(s.index[digestEnd:offsetEnd]) if err := f(digest, offset); err != nil { return err } @@ -265,49 +227,38 @@ func (m *multiWidthIndex) Marshal(w io.Writer) (uint64, error) { } func (m *multiWidthIndex) Unmarshal(r io.Reader) error { + reader := internalio.ToByteReadSeeker(r) var l int32 - if err := binary.Read(r, binary.LittleEndian, &l); err != nil { - return err - } - for i := 0; i < int(l); i++ { - s := singleWidthIndex{} - if err := s.Unmarshal(r); err != nil { - return err + if err := binary.Read(reader, binary.LittleEndian, &l); err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF } - (*m)[s.width] = s + return err } - return nil -} - -func (m *multiWidthIndex) UnmarshalLazyRead(r io.ReaderAt) (sum int64, err error) { - var b [4]byte - _, err = internalio.FullReadAt(r, b[:], 0) + sum, err := reader.Seek(0, io.SeekCurrent) if err != nil { - return 0, err + return err } - count := binary.LittleEndian.Uint32(b[:4]) - if int32(count) < 0 { - return 0, errors.New("index too big; multiWidthIndex count is overflowing int32") + if int32(l) < 0 { + return errors.New("index too big; multiWidthIndex count is overflowing int32") } - sum += int64(len(b)) - for ; count > 0; count-- { + for i := 0; i < int(l); i++ { s := singleWidthIndex{} - or, err := internalio.NewOffsetReadSeekerWithError(r, sum) - if err != nil { - return 0, err + if err := s.Unmarshal(r); err != nil { + return err } - n, err := s.UnmarshalLazyRead(or) + n, err := reader.Seek(0, io.SeekCurrent) if err != nil { - return 0, err + return err } oldSum := sum sum += n if sum < oldSum { - return 0, errors.New("index too big; multiWidthIndex len is overflowing int64") + return errors.New("index too big; multiWidthIndex len is overflowing int64") } (*m)[s.width] = s } - return sum, nil + return nil } func (m *multiWidthIndex) Load(items []Record) error { @@ -339,17 +290,13 @@ func (m *multiWidthIndex) Load(items []Record) error { s := singleWidthIndex{ width: uint32(rcrdWdth), len: uint64(len(lst)), - index: bytes.NewReader(compact), + index: compact, } (*m)[uint32(width)+8] = s } return nil } -func (m *multiWidthIndex) ForEach(func(multihash.Multihash, uint64) error) error { - return fmt.Errorf("%s does not support ForEach enumeration; use %s instead", multicodec.CarIndexSorted, multicodec.CarMultihashIndexSorted) -} - func (m *multiWidthIndex) forEachDigest(f func(digest []byte, offset uint64) error) error { sizes := make([]uint32, 0, len(*m)) for k := range *m { diff --git a/v2/index/indexsorted_test.go b/v2/index/indexsorted_test.go index 8e1a4527..5c1ee449 100644 --- a/v2/index/indexsorted_test.go +++ b/v2/index/indexsorted_test.go @@ -1,27 +1,14 @@ package index import ( - "bytes" "encoding/binary" "testing" "github.com/ipfs/go-merkledag" "github.com/multiformats/go-multicodec" - "github.com/multiformats/go-multihash" "github.com/stretchr/testify/require" ) -func TestSortedIndex_ErrorsOnForEach(t *testing.T) { - subject, err := New(multicodec.CarIndexSorted) - require.NoError(t, err) - err = subject.ForEach(func(multihash.Multihash, uint64) error { return nil }) - require.Error(t, err) - require.Equal(t, - "car-index-sorted does not support ForEach enumeration; use car-multihash-index-sorted instead", - err.Error(), - ) -} - func TestSortedIndexCodec(t *testing.T) { require.Equal(t, multicodec.CarIndexSorted, newSorted().Codec()) } @@ -64,7 +51,7 @@ func TestSingleWidthIndex_GetAll(t *testing.T) { subject := &singleWidthIndex{ width: 9, len: uint64(l), - index: bytes.NewReader(buf), + index: buf, } var foundCount int diff --git a/v2/index/mhindexsorted.go b/v2/index/mhindexsorted.go index 0200f700..e0ef675d 100644 --- a/v2/index/mhindexsorted.go +++ b/v2/index/mhindexsorted.go @@ -3,17 +3,18 @@ package index import ( "encoding/binary" "errors" + internalio "github.com/ipld/go-car/v2/internal/io" "io" "sort" "github.com/ipfs/go-cid" - internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" ) var ( - _ Index = (*MultihashIndexSorted)(nil) + _ Index = (*MultihashIndexSorted)(nil) + _ IterableIndex = (*MultihashIndexSorted)(nil) ) type ( @@ -42,34 +43,14 @@ func (m *multiWidthCodedIndex) Marshal(w io.Writer) (uint64, error) { func (m *multiWidthCodedIndex) Unmarshal(r io.Reader) error { if err := binary.Read(r, binary.LittleEndian, &m.code); err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF + } return err } return m.multiWidthIndex.Unmarshal(r) } -func (m *multiWidthCodedIndex) UnmarshalLazyRead(r io.ReaderAt) (int64, error) { - var b [8]byte - _, err := internalio.FullReadAt(r, b[:], 0) - if err != nil { - return 0, err - } - m.code = binary.LittleEndian.Uint64(b[:8]) - rdr, err := internalio.NewOffsetReadSeekerWithError(r, int64(len(b))) - if err != nil { - return 0, err - } - sum, err := m.multiWidthIndex.UnmarshalLazyRead(rdr) - if err != nil { - return 0, err - } - oldSum := sum - sum += int64(len(b)) - if sum < oldSum { - return 0, errors.New("index too big; multiWidthCodedIndex len is overflowing") - } - return sum, nil -} - func (m *multiWidthCodedIndex) forEach(f func(mh multihash.Multihash, offset uint64) error) error { return m.multiWidthIndex.forEachDigest(func(digest []byte, offset uint64) error { mh, err := multihash.Encode(digest, m.code) @@ -117,49 +98,38 @@ func (m *MultihashIndexSorted) sortedMultihashCodes() []uint64 { } func (m *MultihashIndexSorted) Unmarshal(r io.Reader) error { + reader := internalio.ToByteReadSeeker(r) var l int32 - if err := binary.Read(r, binary.LittleEndian, &l); err != nil { - return err - } - for i := 0; i < int(l); i++ { - mwci := newMultiWidthCodedIndex() - if err := mwci.Unmarshal(r); err != nil { - return err + if err := binary.Read(reader, binary.LittleEndian, &l); err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF } - m.put(mwci) + return err } - return nil -} - -func (m *MultihashIndexSorted) UnmarshalLazyRead(r io.ReaderAt) (sum int64, err error) { - var b [4]byte - _, err = internalio.FullReadAt(r, b[:], 0) + sum, err := reader.Seek(0, io.SeekCurrent) if err != nil { - return 0, err + return err } - sum += int64(len(b)) - count := binary.LittleEndian.Uint32(b[:4]) - if int32(count) < 0 { - return 0, errors.New("index too big; MultihashIndexSorted count is overflowing int32") + if int32(l) < 0 { + return errors.New("index too big; MultihashIndexSorted count is overflowing int32") } - for ; count > 0; count-- { + for i := 0; i < int(l); i++ { mwci := newMultiWidthCodedIndex() - or, err := internalio.NewOffsetReadSeekerWithError(r, sum) - if err != nil { - return 0, err + if err := mwci.Unmarshal(r); err != nil { + return err } - n, err := mwci.UnmarshalLazyRead(or) + n, err := reader.Seek(0, io.SeekCurrent) if err != nil { - return 0, err + return err } oldSum := sum sum += n if sum < oldSum { - return 0, errors.New("index too big; MultihashIndexSorted sum is overflowing int64") + return errors.New("index too big; MultihashIndexSorted len is overflowing int64") } m.put(mwci) } - return sum, nil + return nil } func (m *MultihashIndexSorted) put(mwci *multiWidthCodedIndex) { diff --git a/v2/index/mhindexsorted_test.go b/v2/index/mhindexsorted_test.go index 7704d3a2..520157e4 100644 --- a/v2/index/mhindexsorted_test.go +++ b/v2/index/mhindexsorted_test.go @@ -53,11 +53,14 @@ func TestMultiWidthCodedIndex_StableIterate(t *testing.T) { records = append(records, generateIndexRecords(t, multihash.IDENTITY, rng)...) // Create a new mh sorted index and load randomly generated records into it. - subject, err := index.New(multicodec.CarMultihashIndexSorted) + idx, err := index.New(multicodec.CarMultihashIndexSorted) require.NoError(t, err) - err = subject.Load(records) + err = idx.Load(records) require.NoError(t, err) + subject, ok := idx.(index.IterableIndex) + require.True(t, ok) + mh := make([]multihash.Multihash, 0, len(records)) require.NoError(t, subject.ForEach(func(m multihash.Multihash, _ uint64) error { mh = append(mh, m) diff --git a/v2/index/testutil/equal_index.go b/v2/index/testutil/equal_index.go deleted file mode 100644 index 43d0b3e9..00000000 --- a/v2/index/testutil/equal_index.go +++ /dev/null @@ -1,71 +0,0 @@ -package testutil - -import ( - "sync" - "testing" - - "github.com/multiformats/go-multihash" - "github.com/stretchr/testify/require" -) - -type Index interface { - ForEach(func(multihash.Multihash, uint64) error) error -} - -// insertUint64 perform one round of insertion sort on the last element -func insertUint64(s []uint64) { - switch len(s) { - case 0, 1: - return - default: - cur := s[len(s)-1] - for j := len(s) - 1; j > 0; { - j-- - if cur >= s[j] { - s[j+1] = cur - break - } - s[j+1] = s[j] - } - } -} - -func AssertIdenticalIndexes(t *testing.T, a, b Index) { - var wg sync.WaitGroup - // key is multihash.Multihash.HexString - var aCount uint - var aErr error - aMap := make(map[string][]uint64) - wg.Add(1) - - go func() { - defer wg.Done() - aErr = a.ForEach(func(mh multihash.Multihash, off uint64) error { - aCount++ - str := mh.HexString() - slice := aMap[str] - slice = append(slice, off) - insertUint64(slice) - aMap[str] = slice - return nil - }) - }() - - var bCount uint - bMap := make(map[string][]uint64) - bErr := b.ForEach(func(mh multihash.Multihash, off uint64) error { - bCount++ - str := mh.HexString() - slice := bMap[str] - slice = append(slice, off) - insertUint64(slice) - bMap[str] = slice - return nil - }) - wg.Wait() - require.NoError(t, aErr) - require.NoError(t, bErr) - - require.Equal(t, aCount, bCount) - require.Equal(t, aMap, bMap) -} diff --git a/v2/index_gen.go b/v2/index_gen.go index 33ba7800..b0b87453 100644 --- a/v2/index_gen.go +++ b/v2/index_gen.go @@ -209,10 +209,18 @@ func ReadOrGenerateIndex(rs io.ReadSeeker, opts ...Option) (index.Index, error) } // If index is present, then no need to generate; decode and return it. if v2r.Header.HasIndex() { - return index.ReadFrom(v2r.IndexReader()) + ir, err := v2r.IndexReader() + if err != nil { + return nil, err + } + return index.ReadFrom(ir) } // Otherwise, generate index from CARv1 payload wrapped within CARv2 format. - return GenerateIndex(v2r.DataReader(), opts...) + dr, err := v2r.DataReader() + if err != nil { + return nil, err + } + return GenerateIndex(dr, opts...) default: return nil, fmt.Errorf("unknown version %v", version) } diff --git a/v2/index_gen_test.go b/v2/index_gen_test.go index 43a9c2ac..11011e81 100644 --- a/v2/index_gen_test.go +++ b/v2/index_gen_test.go @@ -9,7 +9,6 @@ import ( "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" @@ -48,7 +47,9 @@ func TestGenerateIndex(t *testing.T) { t.Cleanup(func() { assert.NoError(t, v2.Close()) }) reader, err := carv2.NewReader(v2) require.NoError(t, err) - want, err := index.ReadFrom(reader.IndexReader()) + ir, err := reader.IndexReader() + require.NoError(t, err) + want, err := index.ReadFrom(ir) require.NoError(t, err) return want }, @@ -104,7 +105,7 @@ func TestGenerateIndex(t *testing.T) { if want == nil { require.Nil(t, got) } else { - testutil.AssertIdenticalIndexes(t, want, got) + require.Equal(t, want, got) } } } diff --git a/v2/internal/io/fullReaderAt.go b/v2/internal/io/fullReaderAt.go deleted file mode 100644 index 57f26685..00000000 --- a/v2/internal/io/fullReaderAt.go +++ /dev/null @@ -1,20 +0,0 @@ -package io - -import "io" - -func FullReadAt(r io.ReaderAt, b []byte, off int64) (sum int64, err error) { - for int64(len(b)) > sum { - n, err := r.ReadAt(b[sum:], off+sum) - sum += int64(n) - if err != nil { - if err == io.EOF { - if sum < int64(len(b)) { - return sum, io.ErrUnexpectedEOF - } - return sum, nil - } - return sum, err - } - } - return sum, nil -} diff --git a/v2/internal/io/offset_read_seeker.go b/v2/internal/io/offset_read_seeker.go index bbdcf4c6..b3899ab7 100644 --- a/v2/internal/io/offset_read_seeker.go +++ b/v2/internal/io/offset_read_seeker.go @@ -35,15 +35,7 @@ type ReadSeekerAt interface { // NewOffsetReadSeeker returns an ReadSeekerAt that reads from r // starting offset offset off and stops with io.EOF when r reaches its end. // The Seek function will panic if whence io.SeekEnd is passed. -func NewOffsetReadSeeker(r io.ReaderAt, off int64) ReadSeekerAt { - nr, err := NewOffsetReadSeekerWithError(r, off) - if err != nil { - return erroringReader{err} - } - return nr -} - -func NewOffsetReadSeekerWithError(r io.ReaderAt, off int64) (ReadSeekerAt, error) { +func NewOffsetReadSeeker(r io.ReaderAt, off int64) (ReadSeekerAt, error) { if or, ok := r.(*offsetReadSeeker); ok { oldBase := or.base newBase := or.base + off @@ -128,23 +120,3 @@ func (o *offsetReadSeeker) Seek(offset int64, whence int) (int64, error) { func (o *offsetReadSeeker) Position() int64 { return o.off - o.base } - -type erroringReader struct { - err error -} - -func (e erroringReader) Read(_ []byte) (int, error) { - return 0, e.err -} - -func (e erroringReader) ReadAt(_ []byte, n int64) (int, error) { - return 0, e.err -} - -func (e erroringReader) ReadByte() (byte, error) { - return 0, e.err -} - -func (e erroringReader) Seek(_ int64, _ int) (int64, error) { - return 0, e.err -} diff --git a/v2/reader.go b/v2/reader.go index c34a0672..4628fd89 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -56,8 +56,10 @@ func NewReader(r io.ReaderAt, opts ...Option) (*Reader, error) { } cr.opts = ApplyOptions(opts...) - or := internalio.NewOffsetReadSeeker(r, 0) - var err error + or, err := internalio.NewOffsetReadSeeker(r, 0) + if err != nil { + return nil, err + } cr.Version, err = ReadVersion(or, opts...) if err != nil { return nil, err @@ -82,7 +84,11 @@ func (r *Reader) Roots() ([]cid.Cid, error) { if r.roots != nil { return r.roots, nil } - header, err := carv1.ReadHeader(r.DataReader(), r.opts.MaxAllowedHeaderSize) + dr, err := r.DataReader() + if err != nil { + return nil, err + } + header, err := carv1.ReadHeader(dr, r.opts.MaxAllowedHeaderSize) if err != nil { return nil, err } @@ -106,9 +112,9 @@ type SectionReader interface { } // DataReader provides a reader containing the data payload in CARv1 format. -func (r *Reader) DataReader() SectionReader { +func (r *Reader) DataReader() (SectionReader, error) { if r.Version == 2 { - return io.NewSectionReader(r.r, int64(r.Header.DataOffset), int64(r.Header.DataSize)) + return io.NewSectionReader(r.r, int64(r.Header.DataOffset), int64(r.Header.DataSize)), nil } return internalio.NewOffsetReadSeeker(r.r, 0) } @@ -116,9 +122,9 @@ func (r *Reader) DataReader() SectionReader { // IndexReader provides an io.Reader containing the index for the data payload if the index is // present. Otherwise, returns nil. // Note, this function will always return nil if the backing payload represents a CARv1. -func (r *Reader) IndexReader() io.ReaderAt { +func (r *Reader) IndexReader() (io.Reader, error) { if r.Version == 1 || !r.Header.HasIndex() { - return nil + return nil, nil } return internalio.NewOffsetReadSeeker(r.r, int64(r.Header.IndexOffset)) } @@ -139,7 +145,6 @@ type Stats struct { MaxBlockLength uint64 MinBlockLength uint64 IndexCodec multicodec.Code - IndexSize uint64 } // Inspect does a quick scan of a CAR, performing basic validation of the format @@ -201,7 +206,10 @@ func (r *Reader) Inspect(validateBlockHash bool) (Stats, error) { var minCidLength uint64 = math.MaxUint64 var minBlockLength uint64 = math.MaxUint64 - dr := r.DataReader() + dr, err := r.DataReader() + if err != nil { + return Stats{}, err + } bdr := internalio.ToByteReader(dr) // read roots, not using Roots(), because we need the offset setup in the data trader @@ -327,14 +335,15 @@ func (r *Reader) Inspect(validateBlockHash bool) (Stats, error) { } if stats.Version != 1 && stats.Header.HasIndex() { - // performs an UnmarshalLazyRead which should have its own validation and - // is intended to be a fast initial scan - ind, size, err := index.ReadFromWithSize(r.IndexReader()) + idxr, err := r.IndexReader() + if err != nil { + return Stats{}, err + } + idx, err := index.ReadFrom(idxr) if err != nil { return Stats{}, err } - stats.IndexCodec = ind.Codec() - stats.IndexSize = uint64(size) + stats.IndexCodec = idx.Codec() } return stats, nil diff --git a/v2/reader_test.go b/v2/reader_test.go index ed2b78e2..cdeb74d7 100644 --- a/v2/reader_test.go +++ b/v2/reader_test.go @@ -11,7 +11,6 @@ import ( "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/multiformats/go-multicodec" "github.com/stretchr/testify/require" @@ -141,7 +140,9 @@ func TestReader_WithCarV1Consistency(t *testing.T) { gotRoots, err := subject.Roots() require.NoError(t, err) require.Equal(t, wantReader.Header.Roots, gotRoots) - require.Nil(t, subject.IndexReader()) + ir, err := subject.IndexReader() + require.Nil(t, ir) + require.NoError(t, err) }) } } @@ -173,13 +174,16 @@ func TestReader_WithCarV2Consistency(t *testing.T) { require.NoError(t, err) require.Equal(t, wantReader.Header.Roots, gotRoots) - gotIndexReader := subject.IndexReader() + gotIndexReader, err := subject.IndexReader() + require.NoError(t, err) require.NotNil(t, gotIndexReader) gotIndex, err := index.ReadFrom(gotIndexReader) require.NoError(t, err) - wantIndex, err := carv2.GenerateIndex(subject.DataReader()) + dr, err := subject.DataReader() require.NoError(t, err) - testutil.AssertIdenticalIndexes(t, wantIndex, gotIndex) + wantIndex, err := carv2.GenerateIndex(dr) + require.NoError(t, err) + require.Equal(t, wantIndex, gotIndex) }) } } @@ -187,13 +191,15 @@ func TestReader_WithCarV2Consistency(t *testing.T) { func TestOpenReader_DoesNotPanicForReadersCreatedBeforeClosure(t *testing.T) { subject, err := carv2.OpenReader("testdata/sample-wrapped-v2.car") require.NoError(t, err) - dReaderBeforeClosure := subject.DataReader() - iReaderBeforeClosure := subject.IndexReader() + dReaderBeforeClosure, err := subject.DataReader() + require.NoError(t, err) + iReaderBeforeClosure, err := subject.IndexReader() + require.NoError(t, err) require.NoError(t, subject.Close()) buf := make([]byte, 1) - panicTest := func(r io.ReaderAt) { - _, err := r.ReadAt(buf, 0) + panicTest := func(r io.Reader) { + _, err := r.Read(buf) require.EqualError(t, err, "mmap: closed") } @@ -205,12 +211,14 @@ func TestOpenReader_DoesNotPanicForReadersCreatedAfterClosure(t *testing.T) { subject, err := carv2.OpenReader("testdata/sample-wrapped-v2.car") require.NoError(t, err) require.NoError(t, subject.Close()) - dReaderAfterClosure := subject.DataReader() - iReaderAfterClosure := subject.IndexReader() + dReaderAfterClosure, err := subject.DataReader() + require.NoError(t, err) + iReaderAfterClosure, err := subject.IndexReader() + require.NoError(t, err) buf := make([]byte, 1) - panicTest := func(r io.ReaderAt) { - _, err := r.ReadAt(buf, 0) + panicTest := func(r io.Reader) { + _, err := r.Read(buf) require.EqualError(t, err, "mmap: closed") } @@ -237,7 +245,9 @@ func TestReader_ReturnsNilWhenThereIsNoIndex(t *testing.T) { subject, err := carv2.OpenReader(tt.path) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, subject.Close()) }) - require.Nil(t, subject.IndexReader()) + ir, err := subject.IndexReader() + require.NoError(t, err) + require.Nil(t, ir) }) } } @@ -365,7 +375,6 @@ func TestInspect(t *testing.T) { MaxBlockLength: 9, MinBlockLength: 4, IndexCodec: multicodec.CarMultihashIndexSorted, - IndexSize: 148, }, }, // same as CarV1 but with a zero-byte EOF to test options diff --git a/v2/writer_test.go b/v2/writer_test.go index 1bf3ca3d..12dcd6a9 100644 --- a/v2/writer_test.go +++ b/v2/writer_test.go @@ -9,7 +9,6 @@ import ( "testing" "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/stretchr/testify/require" @@ -48,16 +47,20 @@ func TestWrapV1(t *testing.T) { require.NoError(t, err) wantPayload, err := ioutil.ReadAll(sf) require.NoError(t, err) - gotPayload, err := ioutil.ReadAll(subject.DataReader()) + dr, err := subject.DataReader() + require.NoError(t, err) + gotPayload, err := ioutil.ReadAll(dr) require.NoError(t, err) require.Equal(t, wantPayload, gotPayload) // Assert embedded index in CARv2 is same as index generated from the original CARv1. wantIdx, err := GenerateIndexFromFile(src) require.NoError(t, err) - gotIdx, err := index.ReadFrom(subject.IndexReader()) + ir, err := subject.IndexReader() + require.NoError(t, err) + gotIdx, err := index.ReadFrom(ir) require.NoError(t, err) - testutil.AssertIdenticalIndexes(t, wantIdx, gotIdx) + require.Equal(t, wantIdx, gotIdx) } func TestExtractV1(t *testing.T) {