Skip to content

Commit

Permalink
refactor(githubfs): Use ZipFS
Browse files Browse the repository at this point in the history
The TAR retrieved from GitHub may be corrupted. For example, I can see there is a package.json file, but I cannot read it. Therefore, I switched to zipfs, which might be more stable.

This commit also adds a new error message: "repo is too large," since we read the stream to the RAM now. Note that it may introduce $2n$ memory usage.
  • Loading branch information
pan93412 committed Sep 12, 2024
1 parent 5e6f66e commit baee6ae
Showing 1 changed file with 26 additions and 17 deletions.
43 changes: 26 additions & 17 deletions internal/source/githubfs.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
package source

import (
"archive/tar"
"compress/gzip"
"archive/zip"
"bytes"
"context"
"fmt"
"io"
"strings"

"github.com/google/go-github/v63/github"
"github.com/spf13/afero"
"github.com/spf13/afero/tarfs"
"github.com/spf13/afero/zipfs"
)

type githubFsOptions struct {
Expand Down Expand Up @@ -40,51 +40,60 @@ func NewGitHubFs(repoOwner, repoName string, token *string, options ...GitHubFsO
opt(fsOptions)
}

fs, err := getRefTarballFs(fsOptions.owner, fsOptions.name, fsOptions.ref, token)
fs, err := getRefZipFs(fsOptions.owner, fsOptions.name, fsOptions.ref, token)
if err != nil {
return nil, fmt.Errorf("get ref tarball fs: %w", err)
}

return fs, nil
}

const tarFileSizeLimit = 1024 * 1024 * 1024 /* 1 GiB */
const zipFileSizeLimit = 1024 * 1024 * 1024 /* 1 GiB */

func getRefTarballFs(owner, name, ref string, token *string) (afero.Fs, error) {
func getRefZipFs(owner, name, ref string, token *string) (afero.Fs, error) {
githubClient := github.NewClient(nil)
if token != nil {
githubClient = githubClient.WithAuthToken(*token)
}

repo, _, err := githubClient.Repositories.GetArchiveLink(context.Background(), owner, name, github.Tarball, &github.RepositoryContentGetOptions{
repo, _, err := githubClient.Repositories.GetArchiveLink(context.Background(), owner, name, github.Zipball, &github.RepositoryContentGetOptions{
Ref: ref,
}, 1)
if err != nil {
return nil, fmt.Errorf("get archive link: %w", err)
}

tarContent, err := githubClient.Client().Get(repo.String())
content, err := githubClient.Client().Get(repo.String())
if err != nil {
return nil, fmt.Errorf("get tarball: %w", err)
}
defer func() {
_ = tarContent.Body.Close()
_ = content.Body.Close()
}()

// FIXME: Hint users when the repo is too large.
lr := io.LimitReader(tarContent.Body, tarFileSizeLimit)
zipballStreamReader := io.LimitReader(content.Body, zipFileSizeLimit+1)

gzipReader, err := gzip.NewReader(lr)
buf := new(bytes.Buffer)
n, err := buf.ReadFrom(zipballStreamReader)
if err != nil {
return nil, fmt.Errorf("new gzip reader: %w", err)
return nil, fmt.Errorf("read tarball: %w", err)
}
if n > zipFileSizeLimit {
return nil, fmt.Errorf("repo is too large; limit is %f GiB", float64(zipFileSizeLimit)/1024/1024)
}

zipballReadAtReader := bytes.NewReader(buf.Bytes())

zipReader, err := zip.NewReader(zipballReadAtReader, n)
if err != nil {
return nil, fmt.Errorf("new zip reader: %w", err)
}

tarReader := tar.NewReader(gzipReader)
fs := tarfs.New(tarReader)
fs := zipfs.New(zipReader)

filename := tarContent.Header.Get("Content-Disposition")
filename := content.Header.Get("Content-Disposition")
if attachmentName, ok := strings.CutPrefix(filename, "attachment; filename="); ok {
return afero.NewBasePathFs(fs, strings.TrimSuffix(attachmentName, ".tar.gz")), nil
return afero.NewBasePathFs(fs, strings.TrimSuffix(attachmentName, ".zip")), nil
}

return fs, nil
Expand Down

0 comments on commit baee6ae

Please sign in to comment.