From b0f291a37b9d9ac3537eba004e59ef5d786395a9 Mon Sep 17 00:00:00 2001 From: gammazero Date: Thu, 1 Apr 2021 20:15:48 -0700 Subject: [PATCH 1/2] New SumStream function reads from io.Reader This provides a way to read from a stream instead of having to load a potentially large file into memory first. --- opts/opts.go | 8 +------- sum.go | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/opts/opts.go b/opts/opts.go index 809693d..c3c33ae 100644 --- a/opts/opts.go +++ b/opts/opts.go @@ -8,7 +8,6 @@ import ( "flag" "fmt" "io" - "io/ioutil" "sort" "strings" @@ -149,10 +148,5 @@ func (o *Options) Check(r io.Reader, h1 mh.Multihash) error { // Multihash reads all the data in r and calculates its multihash. func (o *Options) Multihash(r io.Reader) (mh.Multihash, error) { - b, err := ioutil.ReadAll(r) - if err != nil { - return nil, err - } - - return mh.Sum(b, o.AlgorithmCode, o.Length) + return mh.SumStream(r, o.AlgorithmCode, o.Length) } diff --git a/sum.go b/sum.go index 6d01fe6..76600c6 100644 --- a/sum.go +++ b/sum.go @@ -1,8 +1,10 @@ package multihash import ( + "bytes" "errors" "fmt" + "io" mhreg "github.com/multiformats/go-multihash/core" ) @@ -13,9 +15,16 @@ var ErrSumNotSupported = mhreg.ErrSumNotSupported var ErrLenTooLarge = errors.New("requested length was too large for digest") // Sum obtains the cryptographic sum of a given buffer. The length parameter -// indicates the length of the resulting digest and passing a negative value -// use default length values for the selected hash function. +// indicates the length of the resulting digest. Passing a negative value uses +// default length values for the selected hash function. func Sum(data []byte, code uint64, length int) (Multihash, error) { + return SumStream(bytes.NewReader(data), code, length) +} + +// Sum obtains the cryptographic sum of a given stream. The length parameter +// indicates the length of the resulting digest. Passing a negative value uses +// default length values for the selected hash function. +func SumStream(r io.Reader, code uint64, length int) (Multihash, error) { // Get the algorithm. hasher, err := GetHasher(code) if err != nil { @@ -23,7 +32,9 @@ func Sum(data []byte, code uint64, length int) (Multihash, error) { } // Feed data in. - hasher.Write(data) + if _, err = io.Copy(hasher, r); err != nil { + return nil, err + } // Compute final hash. // A new slice is allocated. FUTURE: see other comment below about allocation, and review together with this line to try to improve. From 707d9c23b418e3219a7343030a4c295dbe74dede Mon Sep 17 00:00:00 2001 From: gammazero Date: Tue, 6 Apr 2021 10:28:06 -0700 Subject: [PATCH 2/2] Review change - avoid using bytes.Reader if not necessary. --- sum.go | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/sum.go b/sum.go index 76600c6..d7d75c5 100644 --- a/sum.go +++ b/sum.go @@ -1,9 +1,9 @@ package multihash import ( - "bytes" "errors" "fmt" + "hash" "io" mhreg "github.com/multiformats/go-multihash/core" @@ -18,12 +18,21 @@ var ErrLenTooLarge = errors.New("requested length was too large for digest") // indicates the length of the resulting digest. Passing a negative value uses // default length values for the selected hash function. func Sum(data []byte, code uint64, length int) (Multihash, error) { - return SumStream(bytes.NewReader(data), code, length) + // Get the algorithm. + hasher, err := GetHasher(code) + if err != nil { + return nil, err + } + + // Feed data in. + hasher.Write(data) + + return encodeHash(hasher, code, length) } -// Sum obtains the cryptographic sum of a given stream. The length parameter -// indicates the length of the resulting digest. Passing a negative value uses -// default length values for the selected hash function. +// SumStream obtains the cryptographic sum of a given stream. The length +// parameter indicates the length of the resulting digest. Passing a negative +// value uses default length values for the selected hash function. func SumStream(r io.Reader, code uint64, length int) (Multihash, error) { // Get the algorithm. hasher, err := GetHasher(code) @@ -36,6 +45,10 @@ func SumStream(r io.Reader, code uint64, length int) (Multihash, error) { return nil, err } + return encodeHash(hasher, code, length) +} + +func encodeHash(hasher hash.Hash, code uint64, length int) (Multihash, error) { // Compute final hash. // A new slice is allocated. FUTURE: see other comment below about allocation, and review together with this line to try to improve. sum := hasher.Sum(nil)