Skip to content

Commit

Permalink
Revert changes to index.Index while keeping most of security fixes
Browse files Browse the repository at this point in the history
Revert the changes to `index.Index` interface such that it is the same
as the current go-car main branch head.

Reverting the changes, however, means that unmarshalling untrusted
indices is indeed dangerous and should not be done on untrusted files.

Note, the `carv2.Reader` APIs are changed to return errors as well as
readers when getting `DataReader` and `IndexReader`. This is to
accommodate issues detected by fuzz testing while removing boiler plate
code in internal IO reader conversion. This is a breaking change to the
current API but should be straight forward to roll out.

Remove index fuzz tests and change inspection to only read the index
codec instead of reading the entire index.
  • Loading branch information
masih committed Jul 6, 2022
1 parent 6f66dc0 commit 4bc6774
Show file tree
Hide file tree
Showing 22 changed files with 304 additions and 472 deletions.
6 changes: 5 additions & 1 deletion v2/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ func BenchmarkExtractV1UsingReader(b *testing.B) {
if err != nil {
b.Fatal(err)
}
_, err = io.Copy(dst, reader.DataReader())
dr, err := reader.DataReader()
if err != nil {
b.Fatal(err)
}
_, err = io.Copy(dst, dr)
if err != nil {
b.Fatal(err)
}
Expand Down
59 changes: 47 additions & 12 deletions v2/blockstore/readonly.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,28 @@ func NewReadOnly(backing io.ReaderAt, idx index.Index, opts ...carv2.Option) (*R
}
if idx == nil {
if v2r.Header.HasIndex() {
idx, err = index.ReadFrom(v2r.IndexReader())
ir, err := v2r.IndexReader()
if err != nil {
return nil, err
}
} else if idx, err = generateIndex(v2r.DataReader(), opts...); err != nil {
return nil, err
idx, err = index.ReadFrom(ir)
if err != nil {
return nil, err
}
} else {
dr, err := v2r.DataReader()
if err != nil {
return nil, err
}
if idx, err = generateIndex(dr, opts...); err != nil {
return nil, err
}
}
}
b.backing = v2r.DataReader()
b.backing, err = v2r.DataReader()
if err != nil {
return nil, err
}
b.idx = idx
return b, nil
default:
Expand All @@ -142,7 +155,11 @@ func readVersion(at io.ReaderAt, opts ...carv2.Option) (uint64, error) {
case io.Reader:
rr = r
default:
rr = internalio.NewOffsetReadSeeker(r, 0)
var err error
rr, err = internalio.NewOffsetReadSeeker(r, 0)
if err != nil {
return 0, err
}
}
return carv2.ReadVersion(rr, opts...)
}
Expand All @@ -157,7 +174,11 @@ func generateIndex(at io.ReaderAt, opts ...carv2.Option) (index.Index, error) {
return nil, err
}
default:
rs = internalio.NewOffsetReadSeeker(r, 0)
var err error
rs, err = internalio.NewOffsetReadSeeker(r, 0)
if err != nil {
return nil, err
}
}

// Note, we do not set any write options so that all write options fall back onto defaults.
Expand All @@ -183,7 +204,7 @@ func OpenReadOnly(path string, opts ...carv2.Option) (*ReadOnly, error) {
}

func (b *ReadOnly) readBlock(idx int64) (cid.Cid, []byte, error) {
r, err := internalio.NewOffsetReadSeekerWithError(b.backing, idx)
r, err := internalio.NewOffsetReadSeeker(b.backing, idx)
if err != nil {
return cid.Cid{}, nil, err
}
Expand Down Expand Up @@ -216,8 +237,11 @@ func (b *ReadOnly) Has(ctx context.Context, key cid.Cid) (bool, error) {
var fnFound bool
var fnErr error
err := b.idx.GetAll(key, func(offset uint64) bool {
uar := internalio.NewOffsetReadSeeker(b.backing, int64(offset))
var err error
uar, err := internalio.NewOffsetReadSeeker(b.backing, int64(offset))
if err != nil {
fnErr = err
return false
}
_, err = varint.ReadUvarint(uar)
if err != nil {
fnErr = err
Expand Down Expand Up @@ -317,7 +341,11 @@ func (b *ReadOnly) GetSize(ctx context.Context, key cid.Cid) (int, error) {
fnSize := -1
var fnErr error
err := b.idx.GetAll(key, func(offset uint64) bool {
rdr := internalio.NewOffsetReadSeeker(b.backing, int64(offset))
rdr, err := internalio.NewOffsetReadSeeker(b.backing, int64(offset))
if err != nil {
fnErr = err
return false
}
sectionLen, err := varint.ReadUvarint(rdr)
if err != nil {
fnErr = err
Expand Down Expand Up @@ -401,7 +429,10 @@ func (b *ReadOnly) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
}

// TODO we may use this walk for populating the index, and we need to be able to iterate keys in this way somewhere for index generation. In general though, when it's asked for all keys from a blockstore with an index, we should iterate through the index when possible rather than linear reads through the full car.
rdr := internalio.NewOffsetReadSeeker(b.backing, 0)
rdr, err := internalio.NewOffsetReadSeeker(b.backing, 0)
if err != nil {
return nil, err
}
header, err := carv1.ReadHeader(rdr, b.opts.MaxAllowedHeaderSize)
if err != nil {
b.mu.RUnlock() // don't hold the mutex forever
Expand Down Expand Up @@ -492,7 +523,11 @@ func (b *ReadOnly) HashOnRead(bool) {

// Roots returns the root CIDs of the backing CAR.
func (b *ReadOnly) Roots() ([]cid.Cid, error) {
header, err := carv1.ReadHeader(internalio.NewOffsetReadSeeker(b.backing, 0), b.opts.MaxAllowedHeaderSize)
ors, err := internalio.NewOffsetReadSeeker(b.backing, 0)
if err != nil {
return nil, err
}
header, err := carv1.ReadHeader(ors, b.opts.MaxAllowedHeaderSize)
if err != nil {
return nil, fmt.Errorf("error reading car header: %w", err)
}
Expand Down
4 changes: 3 additions & 1 deletion v2/blockstore/readonly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,9 @@ func newV1ReaderFromV2File(t *testing.T, carv2Path string, zeroLenSectionAsEOF b
t.Cleanup(func() { f.Close() })
v2r, err := carv2.NewReader(f)
require.NoError(t, err)
v1r, err := newV1Reader(v2r.DataReader(), zeroLenSectionAsEOF)
dr, err := v2r.DataReader()
require.NoError(t, err)
v1r, err := newV1Reader(dr, zeroLenSectionAsEOF)
require.NoError(t, err)
return v1r
}
Expand Down
13 changes: 8 additions & 5 deletions v2/blockstore/readwrite.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func OpenReadWrite(path string, roots []cid.Cid, opts ...carv2.Option) (*ReadWri
offset = 0
}
rwbs.dataWriter = internalio.NewOffsetWriter(rwbs.f, offset)
v1r, err := internalio.NewOffsetReadSeekerWithError(rwbs.f, offset)
v1r, err := internalio.NewOffsetReadSeeker(rwbs.f, offset)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -169,11 +169,11 @@ func (b *ReadWrite) initWithRoots(v2 bool, roots []cid.Cid) error {
return carv1.WriteHeader(&carv1.CarHeader{Roots: roots, Version: 1}, b.dataWriter)
}

func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid, opts ...carv2.Option) error {
func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid) error {
// On resumption it is expected that the CARv2 Pragma, and the CARv1 header is successfully written.
// Otherwise we cannot resume from the file.
// Read pragma to assert if b.f is indeed a CARv2.
version, err := carv2.ReadVersion(b.f, opts...)
version, err := carv2.ReadVersion(b.f)
if err != nil {
// The file is not a valid CAR file and cannot resume from it.
// Or the write must have failed before pragma was written.
Expand All @@ -193,7 +193,7 @@ func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid, opts ...carv2.Opti
// Check if file was finalized by trying to read the CARv2 header.
// We check because if finalized the CARv1 reader behaviour needs to be adjusted since
// EOF will not signify end of CARv1 payload. i.e. index is most likely present.
r, err := internalio.NewOffsetReadSeekerWithError(b.f, carv2.PragmaSize)
r, err := internalio.NewOffsetReadSeeker(b.f, carv2.PragmaSize)
if err != nil {
return err
}
Expand Down Expand Up @@ -223,7 +223,10 @@ func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid, opts ...carv2.Opti
}

// Use the given CARv1 padding to instantiate the CARv1 reader on file.
v1r := internalio.NewOffsetReadSeeker(b.ronly.backing, 0)
v1r, err := internalio.NewOffsetReadSeeker(b.ronly.backing, 0)
if err != nil {
return err
}
header, err := carv1.ReadHeader(v1r, b.opts.MaxAllowedHeaderSize)
if err != nil {
// Cannot read the CARv1 header; the file is most likely corrupt.
Expand Down
49 changes: 32 additions & 17 deletions v2/blockstore/readwrite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
carv2 "github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/blockstore"
"github.com/ipld/go-car/v2/index"
"github.com/ipld/go-car/v2/index/testutil"
"github.com/ipld/go-car/v2/internal/carv1"
"github.com/multiformats/go-multicodec"
"github.com/multiformats/go-multihash"
Expand Down Expand Up @@ -488,7 +487,9 @@ func TestBlockstoreResumption(t *testing.T) {
wantPayloadReader, err := carv1.NewCarReader(v1f)
require.NoError(t, err)

gotPayloadReader, err := carv1.NewCarReader(v2r.DataReader())
dr, err := v2r.DataReader()
require.NoError(t, err)
gotPayloadReader, err := carv1.NewCarReader(dr)
require.NoError(t, err)

require.Equal(t, wantPayloadReader.Header, gotPayloadReader.Header)
Expand Down Expand Up @@ -516,11 +517,15 @@ func TestBlockstoreResumption(t *testing.T) {
// Assert index in resumed from file is identical to index generated from the data payload portion of the generated CARv2 file.
_, err = v1f.Seek(0, io.SeekStart)
require.NoError(t, err)
gotIdx, err := index.ReadFrom(v2r.IndexReader())
ir, err := v2r.IndexReader()
require.NoError(t, err)
gotIdx, err := index.ReadFrom(ir)
require.NoError(t, err)
wantIdx, err := carv2.GenerateIndex(v2r.DataReader())
dr, err = v2r.DataReader()
require.NoError(t, err)
testutil.AssertIdenticalIndexes(t, wantIdx, gotIdx)
wantIdx, err := carv2.GenerateIndex(dr)
require.NoError(t, err)
require.Equal(t, wantIdx, gotIdx)
}

func TestBlockstoreResumptionIsSupportedOnFinalizedFile(t *testing.T) {
Expand Down Expand Up @@ -829,29 +834,37 @@ func TestOpenReadWrite_WritesIdentityCIDsWhenOptionIsEnabled(t *testing.T) {
t.Cleanup(func() { require.NoError(t, r.Close()) })
require.True(t, r.Header.HasIndex())

ir := r.IndexReader()
ir, err := r.IndexReader()
require.NoError(t, err)
require.NotNil(t, ir)

gotIdx, err := index.ReadFrom(ir)
require.NoError(t, err)

// Determine expected offset as the length of header plus one
header, err := carv1.ReadHeader(r.DataReader(), carv1.DefaultMaxAllowedHeaderSize)
dr, err := r.DataReader()
require.NoError(t, err)
header, err := carv1.ReadHeader(dr, carv1.DefaultMaxAllowedHeaderSize)
require.NoError(t, err)
object, err := cbor.DumpObject(header)
require.NoError(t, err)
expectedOffset := len(object) + 1

// Assert index is iterable and has exactly one record with expected multihash and offset.
var count int
err = gotIdx.ForEach(func(mh multihash.Multihash, offset uint64) error {
count++
require.Equal(t, idmh, mh)
require.Equal(t, uint64(expectedOffset), offset)
return nil
})
require.NoError(t, err)
require.Equal(t, 1, count)
switch idx := gotIdx.(type) {
case index.IterableIndex:
var i int
err := idx.ForEach(func(mh multihash.Multihash, offset uint64) error {
i++
require.Equal(t, idmh, mh)
require.Equal(t, uint64(expectedOffset), offset)
return nil
})
require.NoError(t, err)
require.Equal(t, 1, i)
default:
require.Failf(t, "unexpected index type", "wanted %v but got %v", multicodec.CarMultihashIndexSorted, idx.Codec())
}
}

func TestOpenReadWrite_ErrorsWhenWritingTooLargeOfACid(t *testing.T) {
Expand Down Expand Up @@ -914,7 +927,9 @@ func TestReadWrite_ReWritingCARv1WithIdentityCidIsIdenticalToOriginalWithOptions
// Note, we hash instead of comparing bytes to avoid excessive memory usage when sample CARv1 is large.

hasher := sha512.New()
gotWritten, err := io.Copy(hasher, v2r.DataReader())
dr, err := v2r.DataReader()
require.NoError(t, err)
gotWritten, err := io.Copy(hasher, dr)
require.NoError(t, err)
gotSum := hasher.Sum(nil)

Expand Down
6 changes: 5 additions & 1 deletion v2/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ func ExampleWrapV1File() {
if err != nil {
panic(err)
}
inner, err := ioutil.ReadAll(cr.DataReader())
dr, err := cr.DataReader()
if err != nil {
panic(err)
}
inner, err := ioutil.ReadAll(dr)
if err != nil {
panic(err)
}
Expand Down
69 changes: 19 additions & 50 deletions v2/fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"testing"

car "github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/index"
"github.com/ipld/go-car/v2/internal/carv1"
)

Expand Down Expand Up @@ -73,48 +72,15 @@ func FuzzReader(f *testing.F) {
}

subject.Roots()
ir := subject.IndexReader()
if ir != nil {
index.ReadFrom(ir)
_, err = subject.IndexReader()
if err != nil {
return
}
car.GenerateIndex(subject.DataReader())
})
}

func FuzzIndex(f *testing.F) {
files, err := filepath.Glob("testdata/*.car")
if err != nil {
f.Fatal(err)
}
for _, fname := range files {
func() {
file, err := os.Open(fname)
if err != nil {
f.Fatal(err)
}
defer file.Close()
subject, err := car.NewReader(file)
if err != nil {
return
}
indexRdr := subject.IndexReader()
if indexRdr == nil {
return
}
_, n, err := index.ReadFromWithSize(indexRdr)
if err != nil {
return
}
data, err := io.ReadAll(io.NewSectionReader(indexRdr, 0, n))
if err != nil {
f.Fatal(err)
}
f.Add(data)
}()
}

f.Fuzz(func(t *testing.T, data []byte) {
index.ReadFrom(bytes.NewReader(data))
dr, err := subject.DataReader()
if err != nil {
return
}
car.GenerateIndex(dr)
})
}

Expand All @@ -137,15 +103,18 @@ func FuzzInspect(f *testing.F) {
if err != nil {
t.Fatal("second NewReader on same data failed", err.Error())
}

if i := reader.IndexReader(); i != nil {
_, err = index.ReadFrom(i)
if err != nil {
return
}
i, err := reader.IndexReader()
if err != nil {
return
}
// FIXME: Once indexes are safe to parse, do not skip .car with index in the differential fuzzing.
if i == nil {
return
}
dr, err := reader.DataReader()
if err != nil {
return
}

dr := reader.DataReader()

_, err = carv1.ReadHeader(dr, carv1.DefaultMaxAllowedHeaderSize)
if err != nil {
Expand Down
Loading

0 comments on commit 4bc6774

Please sign in to comment.