Skip to content

Commit

Permalink
add WrapV1File
Browse files Browse the repository at this point in the history
See the added doc and example.

Also needed to make index marshaling deterministic, as per the spec.


This commit was moved from ipld/go-car@2611339
  • Loading branch information
mvdan committed Jul 16, 2021
1 parent 81641ca commit 7b4ca3a
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 2 deletions.
50 changes: 50 additions & 0 deletions ipld/car/v2/example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package car_test

import (
"bytes"
"fmt"
"io/ioutil"

carv2 "github.com/ipld/go-car/v2"
)

func ExampleWrapV1File() {
// We have a sample CARv1 file.
// Wrap it as-is in a CARv2, with an index.
// Writing the result to testdata allows reusing that file in other tests,
// and also helps ensure that the result is deterministic.
src := "testdata/sample-v1.car"
dst := "testdata/sample-wrapped-v2.car"
if err := carv2.WrapV1File(src, dst); err != nil {
panic(err)
}

// Open our new CARv2 file and show some info about it.
cr, err := carv2.NewReaderMmap(dst)
if err != nil {
panic(err)
}
defer cr.Close()
roots, err := cr.Roots()
if err != nil {
panic(err)
}
fmt.Println("Roots:", roots)
fmt.Println("Has index:", cr.Header.HasIndex())

// Verify that the CARv1 remains exactly the same.
orig, err := ioutil.ReadFile(src)
if err != nil {
panic(err)
}
inner, err := ioutil.ReadAll(cr.CarV1Reader())
if err != nil {
panic(err)
}
fmt.Println("Inner CARv1 is exactly the same:", bytes.Equal(orig, inner))

// Output:
// Roots: [bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy]
// Has index: true
// Inner CARv1 is exactly the same: true
}
4 changes: 4 additions & 0 deletions ipld/car/v2/index/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ func Generate(car io.ReaderAt) (Index, error) {
if err != nil {
return nil, fmt.Errorf("error reading car header: %w", err)
}

// TODO: Generate should likely just take an io.ReadSeeker.
// TODO: ensure the input's header version is 1.

offset, err := carv1.HeaderSize(header)
if err != nil {
return nil, err
Expand Down
18 changes: 16 additions & 2 deletions ipld/car/v2/index/indexsorted.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func (s *singleWidthIndex) Marshal(w io.Writer) error {
if err := binary.Write(w, binary.LittleEndian, int64(len(s.index))); err != nil {
return err
}
// TODO: we could just w.Write(s.index) here and avoid overhead
_, err := io.Copy(w, bytes.NewBuffer(s.index))
return err
}
Expand Down Expand Up @@ -129,8 +130,21 @@ func (m *multiWidthIndex) Codec() Codec {

func (m *multiWidthIndex) Marshal(w io.Writer) error {
binary.Write(w, binary.LittleEndian, int32(len(*m)))
for _, s := range *m {
if err := s.Marshal(w); err != nil {

// The widths are unique, but ranging over a map isn't deterministic.
// As per the CARv2 spec, we must order buckets by digest length.

widths := make([]uint32, 0, len(*m))
for width := range *m {
widths = append(widths, width)
}
sort.Slice(widths, func(i, j int) bool {
return widths[i] < widths[j]
})

for _, width := range widths {
bucket := (*m)[width]
if err := bucket.Marshal(w); err != nil {
return err
}
}
Expand Down
Binary file added ipld/car/v2/testdata/sample-v1.car
Binary file not shown.
Binary file added ipld/car/v2/testdata/sample-wrapped-v2.car
Binary file not shown.
66 changes: 66 additions & 0 deletions ipld/car/v2/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"context"
"io"
"os"

"github.com/ipfs/go-cid"
format "github.com/ipfs/go-ipld-format"
Expand Down Expand Up @@ -130,3 +131,68 @@ func (w *Writer) writeIndex(writer io.Writer, carV1 []byte) (int64, error) {
// FIXME refactor index to expose the number of bytes written.
return 0, index.WriteTo(idx, writer)
}

// WrapV1File takes a source path to a CARv1 file and wraps it as a CARv2 file
// with an index, writing the result to the destination path.
// The resulting CARv2 file's inner CARv1 payload is left unmodified,
// and does not use any padding before the innner CARv1 or index.
func WrapV1File(srcPath, dstPath string) error {
// TODO: verify src is indeed a CARv1 to prevent misuse.
// index.Generate should probably be in charge of that.

// TODO: also expose WrapV1(io.ReadSeeker, io.Writer),
// once index.Generate takes a ReadSeeker.

// We don't use mmap.Open, so we can later use io.Copy.
f1, err := os.Open(srcPath)
if err != nil {
return err
}
defer f1.Close()

idx, err := index.Generate(f1)
if err != nil {
return err
}

// Use Seek to learn the size of the CARv1 before reading it.
v1Size, err := f1.Seek(0, io.SeekEnd)
if err != nil {
return err
}
if _, err := f1.Seek(0, io.SeekStart); err != nil {
return err
}

// Only create the destination CARv2 when we've gathered all the
// information we need, such as the index and the CARv1 size.
f2, err := os.Create(dstPath)
if err != nil {
return err
}
defer f2.Close()

// Similar to the Writer API, write all components of a CARv2 to the
// destination file: Pragma, Header, CARv1, Index.
v2Header := NewHeader(uint64(v1Size))
if _, err := f2.Write(Pragma); err != nil {
return err
}
if _, err := v2Header.WriteTo(f2); err != nil {
return err
}
if _, err := io.Copy(f2, f1); err != nil {
return err
}
if err := index.WriteTo(idx, f2); err != nil {
return err
}

// Check the close error, since we're writing to f2.
// Note that we also do a "defer f2.Close()" above,
// to make sure that the earlier error returns don't leak the file.
if err := f2.Close(); err != nil {
return err
}
return nil
}

0 comments on commit 7b4ca3a

Please sign in to comment.