Skip to content

Commit

Permalink
Merge pull request #2035 from mtrmac/wip
Browse files Browse the repository at this point in the history
Ensure chunked TOC and tar-split metadata are consistent
  • Loading branch information
openshift-merge-bot[bot] committed Jul 19, 2024
2 parents 981169d + 2c4c5b8 commit 8d26ede
Show file tree
Hide file tree
Showing 12 changed files with 546 additions and 46 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ require (
github.com/tchap/go-patricia/v2 v2.3.1
github.com/ulikunitz/xz v0.5.12
github.com/vbatts/tar-split v0.11.5
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
golang.org/x/sys v0.22.0
gotest.tools v2.2.0+incompatible
)
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
Expand Down
134 changes: 134 additions & 0 deletions pkg/chunked/compression_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ import (
"errors"
"fmt"
"io"
"maps"
"strconv"
"time"

"github.com/containers/storage/pkg/chunked/internal"
"github.com/klauspost/compress/zstd"
"github.com/klauspost/pgzip"
digest "github.com/opencontainers/go-digest"
"github.com/vbatts/tar-split/archive/tar"
expMaps "golang.org/x/exp/maps"
)

var typesToTar = map[string]byte{
Expand Down Expand Up @@ -221,6 +224,12 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
if err != nil {
return nil, nil, nil, 0, fmt.Errorf("validating and decompressing tar-split: %w", err)
}
// We use the TOC for creating on-disk files, but the tar-split for creating metadata
// when exporting the layer contents. Ensure the two match, otherwise local inspection of a container
// might be misleading about the exported contents.
if err := ensureTOCMatchesTarSplit(toc, decodedTarSplit); err != nil {
return nil, nil, nil, 0, fmt.Errorf("tar-split and TOC data is inconsistent: %w", err)
}
} else if tarSplitChunk.Offset > 0 {
// We must ignore the tar-split when the digest is not present in the TOC, because we can’t authenticate it.
//
Expand All @@ -234,6 +243,131 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
return decodedBlob, toc, decodedTarSplit, int64(manifestChunk.Offset), err
}

// ensureTOCMatchesTarSplit validates that toc and tarSplit contain _exactly_ the same entries.
func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
pendingFiles := map[string]*internal.FileMetadata{} // Name -> an entry in toc.Entries
for i := range toc.Entries {
e := &toc.Entries[i]
if e.Type != internal.TypeChunk {
if _, ok := pendingFiles[e.Name]; ok {
return fmt.Errorf("TOC contains duplicate entries for path %q", e.Name)
}
pendingFiles[e.Name] = e
}
}

if err := iterateTarSplit(tarSplit, func(hdr *tar.Header) error {
e, ok := pendingFiles[hdr.Name]
if !ok {
return fmt.Errorf("tar-split contains an entry for %q missing in TOC", hdr.Name)
}
delete(pendingFiles, hdr.Name)
expected, err := internal.NewFileMetadata(hdr)
if err != nil {
return fmt.Errorf("determining expected metadata for %q: %w", hdr.Name, err)
}
if err := ensureFileMetadataAttributesMatch(e, &expected); err != nil {
return fmt.Errorf("TOC and tar-split metadata doesn’t match: %w", err)
}

return nil
}); err != nil {
return err
}
if len(pendingFiles) != 0 {
remaining := expMaps.Keys(pendingFiles)
if len(remaining) > 5 {
remaining = remaining[:5] // Just to limit the size of the output.
}
return fmt.Errorf("TOC contains entries not present in tar-split, incl. %q", remaining)
}
return nil
}

// ensureTimePointersMatch ensures that a and b are equal
func ensureTimePointersMatch(a, b *time.Time) error {
// We didn’t always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil.
// The archive/tar code turns time.IsZero() timestamps into an Unix timestamp of 0 when writing, but turns an Unix timestamp of 0
// when writing into a (local-timezone) Jan 1 1970, which is not IsZero(). So, treat that the same as IsZero as well.
unixZero := time.Unix(0, 0)
if a != nil && (a.IsZero() || a.Equal(unixZero)) {
a = nil
}
if b != nil && (b.IsZero() || b.Equal(unixZero)) {
b = nil
}
switch {
case a == nil && b == nil:
return nil
case a == nil:
return fmt.Errorf("nil != %v", *b)
case b == nil:
return fmt.Errorf("%v != nil", *a)
default:
if a.Equal(*b) {
return nil
}
return fmt.Errorf("%v != %v", *a, *b)
}
}

// ensureFileMetadataAttributesMatch ensures that a and b match in file attributes (it ignores entries relevant to locating data
// in the tar stream or matching contents)
func ensureFileMetadataAttributesMatch(a, b *internal.FileMetadata) error {
// Keep this in sync with internal.FileMetadata!

if a.Type != b.Type {
return fmt.Errorf("mismatch of Type: %q != %q", a.Type, b.Type)
}
if a.Name != b.Name {
return fmt.Errorf("mismatch of Name: %q != %q", a.Name, b.Name)
}
if a.Linkname != b.Linkname {
return fmt.Errorf("mismatch of Linkname: %q != %q", a.Linkname, b.Linkname)
}
if a.Mode != b.Mode {
return fmt.Errorf("mismatch of Mode: %q != %q", a.Mode, b.Mode)
}
if a.Size != b.Size {
return fmt.Errorf("mismatch of Size: %q != %q", a.Size, b.Size)
}
if a.UID != b.UID {
return fmt.Errorf("mismatch of UID: %q != %q", a.UID, b.UID)
}
if a.GID != b.GID {
return fmt.Errorf("mismatch of GID: %q != %q", a.GID, b.GID)
}

if err := ensureTimePointersMatch(a.ModTime, b.ModTime); err != nil {
return fmt.Errorf("mismatch of ModTime: %w", err)
}
if err := ensureTimePointersMatch(a.AccessTime, b.AccessTime); err != nil {
return fmt.Errorf("mismatch of AccessTime: %w", err)
}
if err := ensureTimePointersMatch(a.ChangeTime, b.ChangeTime); err != nil {
return fmt.Errorf("mismatch of ChangeTime: %w", err)
}
if a.Devmajor != b.Devmajor {
return fmt.Errorf("mismatch of Devmajor: %q != %q", a.Devmajor, b.Devmajor)
}
if a.Devminor != b.Devminor {
return fmt.Errorf("mismatch of Devminor: %q != %q", a.Devminor, b.Devminor)
}
if !maps.Equal(a.Xattrs, b.Xattrs) {
return fmt.Errorf("mismatch of Xattrs: %q != %q", a.Xattrs, b.Xattrs)
}

// Digest is not compared
// Offset is not compared
// EndOffset is not compared

// ChunkSize is not compared
// ChunkOffset is not compared
// ChunkDigest is not compared
// ChunkType is not compared
return nil
}

func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompressedChecksum string) ([]byte, error) {
d, err := digest.Parse(expectedCompressedChecksum)
if err != nil {
Expand Down
46 changes: 5 additions & 41 deletions pkg/chunked/compressor/compressor.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@ package compressor
import (
"bufio"
"bytes"
"encoding/base64"
"io"
"strings"
"time"

"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/ioutils"
"github.com/klauspost/compress/zstd"
Expand Down Expand Up @@ -234,14 +230,6 @@ func newTarSplitData(level int) (*tarSplitData, error) {
}, nil
}

// timeIfNotZero returns a pointer to the time.Time if it is not zero, otherwise it returns nil.
func timeIfNotZero(t *time.Time) *time.Time {
if t == nil || t.IsZero() {
return nil
}
return t
}

func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error {
// total written so far. Used to retrieve partial offsets in the file
dest := ioutils.NewWriteCounter(destFile)
Expand Down Expand Up @@ -380,38 +368,14 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
}
}

typ, err := internal.GetType(hdr.Typeflag)
mainEntry, err := internal.NewFileMetadata(hdr)
if err != nil {
return err
}
xattrs := make(map[string]string)
for k, v := range hdr.PAXRecords {
xattrKey, ok := strings.CutPrefix(k, archive.PaxSchilyXattr)
if !ok {
continue
}
xattrs[xattrKey] = base64.StdEncoding.EncodeToString([]byte(v))
}
entries := []internal.FileMetadata{
{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: timeIfNotZero(&hdr.ModTime),
AccessTime: timeIfNotZero(&hdr.AccessTime),
ChangeTime: timeIfNotZero(&hdr.ChangeTime),
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
Digest: checksum,
Offset: startOffset,
EndOffset: lastOffset,
},
}
mainEntry.Digest = checksum
mainEntry.Offset = startOffset
mainEntry.EndOffset = lastOffset
entries := []internal.FileMetadata{mainEntry}
for i := 1; i < len(chunks); i++ {
entries = append(entries, internal.FileMetadata{
Type: internal.TypeChunk,
Expand Down
57 changes: 56 additions & 1 deletion pkg/chunked/internal/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,19 @@ package internal
// larger software like the graph drivers.

import (
"archive/tar"
"bytes"
"encoding/base64"
"encoding/binary"
"fmt"
"io"
"strings"
"time"

"github.com/containers/storage/pkg/archive"
jsoniter "github.com/json-iterator/go"
"github.com/klauspost/compress/zstd"
"github.com/opencontainers/go-digest"
"github.com/vbatts/tar-split/archive/tar"
)

// TOC is short for Table of Contents and is used by the zstd:chunked
Expand All @@ -36,10 +39,22 @@ type TOC struct {
// that duplicates what can found in the tar header (and should match), but
// also special/custom content (see below).
//
// Regular files may optionally be represented as a sequence of “chunks”,
// which may be ChunkTypeData or ChunkTypeZeros (and ChunkTypeData boundaries
// are heuristically determined to increase chance of chunk matching / reuse
// similar to rsync). In that case, the regular file is represented
// as an initial TypeReg entry (with all metadata for the file as a whole)
// immediately followed by zero or more TypeChunk entries (containing only Type,
// Name and Chunk* fields); if there is at least one TypeChunk entry, the Chunk*
// fields are relevant in all of these entries, including the initial
// TypeReg one.
//
// Note that the metadata here, when fetched by a zstd:chunked aware client,
// is used instead of that in the tar stream. The contents of the tar stream
// are not used in this scenario.
type FileMetadata struct {
// If you add any fields, update ensureFileMetadataMatches as well!

// The metadata below largely duplicates that in the tar headers.
Type string `json:"type"`
Name string `json:"name"`
Expand Down Expand Up @@ -267,3 +282,43 @@ func footerDataToBlob(footer ZstdChunkedFooterData) []byte {

return manifestDataLE
}

// timeIfNotZero returns a pointer to the time.Time if it is not zero, otherwise it returns nil.
func timeIfNotZero(t *time.Time) *time.Time {
if t == nil || t.IsZero() {
return nil
}
return t
}

// NewFileMetadata creates a basic FileMetadata entry for hdr.
// The caller must set DigestOffset/EndOffset, and the Chunk* values, separately.
func NewFileMetadata(hdr *tar.Header) (FileMetadata, error) {
typ, err := GetType(hdr.Typeflag)
if err != nil {
return FileMetadata{}, err
}
xattrs := make(map[string]string)
for k, v := range hdr.PAXRecords {
xattrKey, ok := strings.CutPrefix(k, archive.PaxSchilyXattr)
if !ok {
continue
}
xattrs[xattrKey] = base64.StdEncoding.EncodeToString([]byte(v))
}
return FileMetadata{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: timeIfNotZero(&hdr.ModTime),
AccessTime: timeIfNotZero(&hdr.AccessTime),
ChangeTime: timeIfNotZero(&hdr.ChangeTime),
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
}, nil
}
Loading

0 comments on commit 8d26ede

Please sign in to comment.