From e822a4894f5061349dcdc05b0db7c317c3aad5c8 Mon Sep 17 00:00:00 2001 From: Martti Tamm Date: Thu, 23 May 2024 10:33:39 +0300 Subject: [PATCH 01/10] Fix: send full public-key file to htsget --- storage/htsget.go | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/storage/htsget.go b/storage/htsget.go index fa1706c3..aee94f54 100644 --- a/storage/htsget.go +++ b/storage/htsget.go @@ -1,9 +1,9 @@ package storage import ( - "bufio" "bytes" "context" + "encoding/base64" "encoding/json" "fmt" "io" @@ -128,36 +128,24 @@ func htsgetUrl(url, useProtocol string) (updatedUrl string, token string) { return } -func htsgetHeader() string { +func htsgetHeaderJson() string { ensureKeyFiles() - file, err := os.Open(publicKeyFile) + b, err := os.ReadFile(publicKeyFile) if err != nil { - fmt.Println("Could not read", publicKeyFile, "file, which should exist:", err) + fmt.Println("[ERROR] Could not read", publicKeyFile, + "file, which should exist:", err) panic(1) } - publicKey := "" - scanner := bufio.NewScanner(file) - if scanner.Scan() { // Skip one header line. - if scanner.Scan() { - publicKey = scanner.Text() // The key is on the second line. - } - } - file.Close() - // HTTP headers to be encoded as JSON: headers := make(map[string]string) - - if publicKey == "" { - fmt.Println("[WARN] Could not read public key (second line) from", publicKeyFile, "file.") - } else { - headers["client-public-key"] = publicKey - } + headers["client-public-key"] = base64.StdEncoding.EncodeToString(b) headersJson, err := json.Marshal(&headers) if err != nil { - fmt.Println("Failed to format JSON-header for passing client-public-key:", err) + fmt.Println("[ERROR] Failed to format JSON-header for passing", + "client-public-key:", err) panic(1) } @@ -173,7 +161,7 @@ func htsgetArgs(url, useProtocol string, decrypt bool) []string { } if decrypt { - cmdArgs = append(cmdArgs, "--headers", htsgetHeader()) + cmdArgs = append(cmdArgs, "--headers", htsgetHeaderJson()) } cmdArgs = append(cmdArgs, httpsUrl) @@ -261,11 +249,6 @@ func cmdPipe(cmd1, cmd2 *exec.Cmd, destFilePath string) { stderr2 := new(bytes.Buffer) r, w := io.Pipe() - if err != nil { - fmt.Printf("[ERROR] failed to create OS pipe: %v", err) - return - } - cmd1.Stdout = w cmd1.Stderr = stderr1 From a3c83a47542805d61157d0adfae1391495fd1a3b Mon Sep 17 00:00:00 2001 From: Martti Tamm Date: Thu, 1 Aug 2024 14:53:42 +0300 Subject: [PATCH 02/10] HTSGET client + Crypt4gh decryption --- config/config.go | 7 +- config/default-config.yaml | 2 +- go.mod | 2 +- storage/crypt4gh/decrypt.go | 408 ++++++++++++++ storage/crypt4gh/keys.go | 516 ++++++++++++++++++ storage/crypt4gh/keys_test.go | 80 +++ .../crypt4gh/testdata/keys/key.encrypted.sec | 3 + storage/crypt4gh/testdata/keys/key.plain.sec | 3 + storage/crypt4gh/testdata/keys/key.pub | 3 + storage/htsget.go | 225 ++------ storage/htsget/client.go | 241 ++++++++ storage/htsget_test.go | 50 ++ 12 files changed, 1341 insertions(+), 199 deletions(-) create mode 100644 storage/crypt4gh/decrypt.go create mode 100644 storage/crypt4gh/keys.go create mode 100644 storage/crypt4gh/keys_test.go create mode 100644 storage/crypt4gh/testdata/keys/key.encrypted.sec create mode 100644 storage/crypt4gh/testdata/keys/key.plain.sec create mode 100644 storage/crypt4gh/testdata/keys/key.pub create mode 100644 storage/htsget/client.go create mode 100644 storage/htsget_test.go diff --git a/config/config.go b/config/config.go index 75662b2d..bf18e670 100644 --- a/config/config.go +++ b/config/config.go @@ -390,13 +390,14 @@ func (h FTPStorage) Valid() bool { return !h.Disabled } -// HTSGETStorage configures the http storage backend. +// HTSGETStorage configures the storage backend for the HTSGET protocol. type HTSGETStorage struct { + // Controls whether HTSGET storage is disabled (defaults to false) Disabled bool // Actual protocol for fetching the resource (defaults to 'https') Protocol string - // Whether Funnel should generate and send its crypt4gh public key (default: false) - SendPublicKey bool + // Timeout for each HTSGET request (defaults to 30 seconds) + Timeout Duration } // Valid validates the HTSGETStorage configuration. diff --git a/config/default-config.yaml b/config/default-config.yaml index 7ac3cc13..6fd0e2b3 100644 --- a/config/default-config.yaml +++ b/config/default-config.yaml @@ -396,4 +396,4 @@ FTPStorage: HTSGETStorage: Disabled: false Protocol: https - SendPublicKey: false + Timeout: 30s diff --git a/go.mod b/go.mod index b4717ce7..c1ff8cea 100644 --- a/go.mod +++ b/go.mod @@ -41,6 +41,7 @@ require ( github.com/spf13/cobra v1.8.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.9.0 + golang.org/x/crypto v0.19.0 golang.org/x/net v0.21.0 golang.org/x/oauth2 v0.17.0 golang.org/x/term v0.17.0 @@ -156,7 +157,6 @@ require ( go.opentelemetry.io/otel/metric v1.24.0 // indirect go.opentelemetry.io/otel/sdk v1.24.0 // indirect go.opentelemetry.io/otel/trace v1.24.0 // indirect - golang.org/x/crypto v0.19.0 // indirect golang.org/x/sync v0.6.0 // indirect golang.org/x/sys v0.17.0 // indirect golang.org/x/text v0.14.0 // indirect diff --git a/storage/crypt4gh/decrypt.go b/storage/crypt4gh/decrypt.go new file mode 100644 index 00000000..a6f7dfc8 --- /dev/null +++ b/storage/crypt4gh/decrypt.go @@ -0,0 +1,408 @@ +package crypt4gh + +import ( + "crypto/cipher" + "errors" + "fmt" + "io" + "os" + + "golang.org/x/crypto/blake2b" + "golang.org/x/crypto/chacha20poly1305" + "golang.org/x/crypto/curve25519" +) + +type Crypt4gh struct { + keyPair *Crypt4ghKeyPair + stream io.Reader + headerPacketCount uint32 + headerPacketProcessed uint32 + dataKeys []cipher.AEAD + dataBlock []byte + dataBlockPos int + dataBlockCount int + editListLengths []uint64 + editListSkip bool +} + +// Reads the magic-number at the beginning of the file to check if the file +// might be considered a Crypt4gh file. +func IsCrypt4ghFile(path string) bool { + file, err := os.Open(path) + if err != nil { + return false + } + + defer file.Close() + + magicBuffer := make([]byte, 8) + _, err = file.Read(magicBuffer) + return err == nil && string(magicBuffer) == "crypt4gh" +} + +func (c *Crypt4gh) Read(buffer []byte) (n int, err error) { + amount := len(buffer) + if amount < 1 { + return 0, errors.New("Provided byte buffer has no space") + } + + var start, end int + + for amount > 0 { + c.applyEditListSkip() + + for c.dataBlockPos >= len(c.dataBlock) { + err = c.decryptDataBlock() + if err != nil { + return 0, err + } + } + + start, end, amount = c.updateDataBlockRange(amount) + copy(buffer, c.dataBlock[start:end]) + n += end - start + } + + return n, err +} + +func (c *Crypt4gh) updateDataBlockRange(amount int) (int, int, int) { + // We can assume here that c.dataBlockPos < len(c.dataBlock) + start := c.dataBlockPos + end := start + amount + + if end > len(c.dataBlock) { + end = len(c.dataBlock) + } + + amount, end = c.applyEditListKeep(amount, start, end) + + // Already updates the position though reading has not been performed yet: + c.dataBlockPos = end + + return start, end, amount +} + +func (c *Crypt4gh) readHeader() error { + err := c.checkMagicNumber() + + if err == nil { + err = c.checkVersion() + } + + if err == nil { + err = c.storeHeaderPacketCount() + } + + if err == nil { + for i := uint32(0); i < c.headerPacketCount; i++ { + err = c.readHeaderPacket() + if err != nil { + break + } + } + } + + if err == nil { + err = c.checkAtLeastOneDataKey() + } + + return err +} + +func (c *Crypt4gh) checkMagicNumber() error { + magicNumber, errRead := c.readBytes(8) + + if string(magicNumber) == "crypt4gh" { + return nil + } + + errMagic := errors.New("Not a Crypt4gh file (missing/wrong magic number)") + return errors.Join(errMagic, errRead) +} + +func (c *Crypt4gh) checkVersion() error { + version, errRead := c.readInt32() + + if version == 1 { + return nil + } + + errVersion := fmt.Errorf("Crypt4gh file version (%d) not supported", version) + return errors.Join(errVersion, errRead) +} + +func (c *Crypt4gh) checkAtLeastOneDataKey() error { + if len(c.dataKeys) == 0 { + return fmt.Errorf("The Crypt4gh file is not shared to the provided "+ + "key-pair (scanned %d header packets)", c.headerPacketProcessed) + } + return nil +} + +func (c *Crypt4gh) storeHeaderPacketCount() error { + var errRead error + c.headerPacketCount, errRead = c.readInt32() + return errRead +} + +func (c *Crypt4gh) readHeaderPacket() error { + c.headerPacketProcessed += 1 + + packetLength, errRead := c.readInt32() + if errRead != nil { + return errRead + } + + headerEncryptionMethod, errRead := c.readInt32() + if errRead != nil { + return errRead + } + + if headerEncryptionMethod != 0 { + fmt.Printf("Unrecognized header packet encryption method value (%d). "+ + "Only 0 (X25519_chacha20_ietf_poly1305) is supported. "+ + "Skipping the header packet.", headerEncryptionMethod) + return nil + } + + writerPublicKey, errRead := c.readBytes(32) + if errRead != nil { + return errRead + } + + nonce, errRead := c.readBytes(12) + if errRead != nil { + return errRead + } + + // Subtracting the length of previously read items to get the length: + remainingLength := uint(packetLength) - 52 + encryptedPayloadWithMac, errRead := c.readBytes(remainingLength) + + payload := c.decryptPacketPayload(encryptedPayloadWithMac, writerPublicKey, nonce) + if len(payload) > 0 { + c.parseHeaderPayload(payload) + } + + return errRead +} + +func (c *Crypt4gh) parseHeaderPayload(payload []byte) { + packetType := readInt32(payload[0:4]) + dataEncryptionParameters := packetType == 0 + dataEditList := packetType == 1 + + if dataEncryptionParameters { + if len(payload) != 40 { + c.warnPacket("payload with data encryption parameters has a "+ + "non-expected length [%d] (expected: 40).", len(payload)) + } + + dataEncryptionMethod := readInt32(payload[4:8]) + + if dataEncryptionMethod != 0 { + c.warnPacket("specifies an unsupported data encryption method "+ + "[%d] while the only supported method is "+ + "[chacha20_ietf_poly1305 = 0].", dataEncryptionMethod) + return + } + + if dataKey, errKey := chacha20poly1305.New(payload[8:40]); errKey != nil { + c.warnPacket("ChaCha20-IETF-Poly1305 data-key error: %v", errKey) + } else { + c.dataKeys = append(c.dataKeys, dataKey) + } + + } else if dataEditList { + numberLengths := readInt32(payload[4:8]) + expectedLength := 8 + 8*int(numberLengths) + + if len(payload) != expectedLength { + c.warnPacket("payload with data edit list has a non expected "+ + "length [%d] (expected: [%d]).", len(payload), expectedLength) + } + + if len(c.editListLengths) > 0 { + c.warnPacket("has more than one edit-list (only one permitted)") + return + } + + // Read and store the lengths of the edit list + for startPos := 8; numberLengths > 0; numberLengths-- { + c.editListLengths = append(c.editListLengths, readInt64(payload[startPos:startPos+8])) + startPos += 8 + } + + // The first length is about skipping a number of bytes + c.editListSkip = true + + } else { + c.warnPacket("specifies an unsupported packet type [%d] while only "+ + "[data_encryption_parameters = 0] and [data_edit_list = 1] are "+ + "supported.", packetType) + } +} + +func (c *Crypt4gh) decryptPacketPayload(encryptedPayloadWithMac, writerPublicKey, nonce []byte) []byte { + curve, errCurve := curve25519.X25519(c.keyPair.secretKey, writerPublicKey) + if errCurve != nil { + c.warnPacket("curve25519 error: %v", errCurve) + return nil + } + + length1 := len(curve) + length2 := length1 + len(c.keyPair.publicKey) + length3 := length2 + len(writerPublicKey) + + keys := make([]byte, length3) + + copy(keys, curve) + copy(keys[length1:], c.keyPair.publicKey) + copy(keys[length2:], writerPublicKey) + + sharedKey := blake2b.Sum512(keys) + + aead, errKey := chacha20poly1305.New(sharedKey[:32]) + if errKey != nil { + c.warnPacket("ChaCha20-IETF-Poly1305 shared-key error: %v", errKey) + return nil + } + + plaintext, errOpen := aead.Open(nil, nonce, encryptedPayloadWithMac, nil) + + if errOpen != nil { + // c.warnPacket("ChaCha20-IETF-Poly1305 deciphering error : %v", errOpen) + return nil // This error and the packet payload must be ignored + } + + return plaintext +} + +func (c *Crypt4gh) decryptDataBlock() error { + c.dataBlockCount++ + + block, err := c.readBytesMax(65564) + if err != nil { + return err + } + + for i := range c.dataKeys { + key := c.dataKeys[i] + + nonce := block[0:key.NonceSize()] + encryptedDataWithMac := block[key.NonceSize():] + + plaintext, errOpen := key.Open(nil, nonce, encryptedDataWithMac, nil) + + if errOpen == nil { + c.dataBlock = plaintext + c.dataBlockPos = 0 + return nil + } + } + + fmt.Println("[WARN] Didn't find a suitable key for deciphering a data block.") + return nil +} + +func (c *Crypt4gh) applyEditListSkip() { + if !c.editListSkip || len(c.editListLengths) == 0 { + return + } + + remainingAmount := uint64(len(c.dataBlock) - c.dataBlockPos) + skipAmount := &c.editListLengths[0] + + if *skipAmount < remainingAmount { + c.dataBlockPos += int(*skipAmount) + *skipAmount = 0 + } else { + *skipAmount -= remainingAmount + c.dataBlockPos = len(c.dataBlock) + } + + if *skipAmount == 0 { + c.editListLengths = c.editListLengths[1:] + c.editListSkip = false + } +} + +func (c *Crypt4gh) applyEditListKeep(amount, start, end int) (remainingAmount, updatedEnd int) { + rangeLength := end - start + remainingAmount = amount - rangeLength + updatedEnd = end + + if c.editListSkip || len(c.editListLengths) == 0 { + return + } + + keepAmount := &c.editListLengths[0] + + if *keepAmount < uint64(rangeLength) { + rangeLength = int(*keepAmount) + updatedEnd = start + rangeLength + remainingAmount = amount - rangeLength + } + + *keepAmount -= uint64(rangeLength) + + if *keepAmount == 0 { + c.editListLengths = c.editListLengths[1:] + c.editListSkip = true + } + + return +} + +func (c *Crypt4gh) warnPacket(msg string, args ...any) { + fmt.Printf("[WARN] Header packet [%d/%d] %s\n", + c.headerPacketProcessed, + c.headerPacketCount, + fmt.Sprintf(msg, args...)) +} + +func (c *Crypt4gh) readBytes(count uint) ([]byte, error) { + b, err := c.readBytesMax(count) + + if err == nil && count != uint(len(b)) { + err = fmt.Errorf("Could not read the entire value (got %d out of %d bytes)", len(b), count) + } + + return b, err +} + +func (c *Crypt4gh) readBytesMax(count uint) ([]byte, error) { + b := make([]byte, count) + actualCount, err := c.stream.Read(b) + return b[:actualCount], err +} + +func (c *Crypt4gh) readInt32() (uint32, error) { + bytes, err := c.readBytes(4) + var num uint32 + + if err == nil { + num = readInt32(bytes) + } + + return num, err +} + +// Reads 4 bytes in the Little-Endian order to compute an unsigned integer. +func readInt32(bytes []byte) uint32 { + var num uint32 + for i := range bytes[0:4] { + num = uint32(bytes[i])<<(8*i) | num + } + return num +} + +// Reads 8 bytes in the Little-Endian order to compute an unsigned integer. +func readInt64(bytes []byte) uint64 { + var num uint64 + for i := range bytes[0:8] { + num = uint64(bytes[i])<<(8*i) | num + } + return num +} diff --git a/storage/crypt4gh/keys.go b/storage/crypt4gh/keys.go new file mode 100644 index 00000000..8bc3fa60 --- /dev/null +++ b/storage/crypt4gh/keys.go @@ -0,0 +1,516 @@ +package crypt4gh + +import ( + "bufio" + "bytes" + "crypto" + "crypto/ecdh" + "crypto/rand" + "encoding/base64" + "errors" + "fmt" + "io" + "os" + "path" + "strings" + + "golang.org/x/crypto/bcrypt" + "golang.org/x/crypto/chacha20poly1305" + "golang.org/x/crypto/pbkdf2" + "golang.org/x/crypto/scrypt" +) + +const privateKeyMagic = "c4gh-v1" +const presumedDirName = ".c4gh" + +var base64Decoder *base64.Encoding = base64.StdEncoding.WithPadding(base64.StdPadding) + +type Crypt4ghKeyPair struct { + publicKey []byte + secretKey []byte +} + +func (k *Crypt4ghKeyPair) EncodePublicKeyBase64() string { + header, footer := getKeyFileHeaderFooter("PUBLIC") + + content := bytes.NewBufferString(header) + content.WriteString(base64Decoder.EncodeToString(k.publicKey)) + content.WriteRune('\n') + content.WriteString(footer) + + return base64.StdEncoding.EncodeToString(content.Bytes()) +} + +func (k *Crypt4ghKeyPair) Save(publicKeyPath, privateKeyPath string, passphrase []byte) error { + err := saveKeyFile(publicKeyPath, "PUBLIC", k.publicKey) + if err != nil { + return err + } + + encodedKey, err := encodePrivateKey(k.secretKey, passphrase) + if err != nil { + return err + } + + return saveKeyFile(privateKeyPath, "PRIVATE", encodedKey) +} + +// Decrypts given Crypt4gh file stream (expecting the header part followed by encrypted body). +func (k *Crypt4ghKeyPair) Decrypt(r io.Reader) (io.Reader, error) { + c := Crypt4gh{keyPair: k, stream: r} + err := c.readHeader() + return &c, err +} + +// Decrypts given Crypt4gh file stream (body) using an explicitly provided header information. +func (k *Crypt4ghKeyPair) DecryptWithHeader(header []byte, body io.Reader) (io.Reader, error) { + c := Crypt4gh{keyPair: k, stream: bytes.NewReader(header)} + err := c.readHeader() + c.stream = body // After parsing header, switch to the body reader + return &c, err +} + +// Initiates a completely new key-pair, which is stored only in memory. +func NewKeyPair() (*Crypt4ghKeyPair, error) { + edCurve, err := ecdh.X25519().GenerateKey(rand.Reader) + if err != nil { + return nil, err + } + + return &Crypt4ghKeyPair{ + publicKey: edCurve.PublicKey().Bytes(), + secretKey: edCurve.Bytes(), + }, nil +} + +// Initiates a key-pair from the provided file-paths, where the encrypted secret +// will be accessed using the provided passphrase. Failure to parse the files, +// or decrypt the secret key will result in errors returned by this method. +// Note that the public key file is optional: when it exists, its content will +// be verified to make sure that it pairs with the secret key. Mismatch of keys +// will also result in an error. +// Also note that when the secret key is not encrypted, the passphrase may be +// nil or, if present, its value will be ignored. +func KeyPairFromFiles(publicKeyPath, secretKeyPath string, passphrase []byte) (*Crypt4ghKeyPair, error) { + sec, err := parseSecretKeyFile(secretKeyPath, passphrase) + if err != nil { + return nil, err + } + + secKey, err := ecdh.X25519().NewPrivateKey(sec) + if err != nil { + return nil, err + } + + pub := secKey.PublicKey().Bytes() + + if isFile(publicKeyPath) { + pubFromFile, err := parsePublicKeyFile(publicKeyPath) + + if err != nil { + return nil, err + } else if !bytes.Equal(pub, pubFromFile) { + return nil, errors.New("The crypt4gh public key from the file " + + "does not match the private key") + } + } + + return &Crypt4ghKeyPair{ + publicKey: pub, + secretKey: sec, + }, nil +} + +// The most general-purpose way to load Crypt4gh keys from files, or generate +// and save them when the files cannot be resolved. +// +// First, the public and private key file-paths are resolved from environment +// variables: C4GH_PUBLIC_KEY (optional), C4GH_SECRET_KEY, C4GH_PASSPHRASE +// (optional). If C4GH_SECRET_KEY refers to an unencrypted secret key, +// C4GH_PASSPHRASE may be omitted. If C4GH_PUBLIC_KEY is provided and the file +// exists, it must match with the secret key. Also note that when the files of +// C4GH_PUBLIC_KEY and C4GH_SECRET_KEY do not exist yet, a new key-pair will be +// generated and stored in the specified files (secret key will be encrypted +// with C4GH_SECRET_KEY, if present). +// +// When the variables are declared, the local and home directory files will be +// tried instead: .c4gh/key[.pub] and ~/.c4gh/key[.pub]. If these files +// (especially the secret key) do not exist, a new key-pair will be generated +// and stored in the home-directory file-paths, and, on failure, in the local +// directory file-paths. This method returns an error only when it generates +// new keys but cannot save them to resolved paths. +func ResolveKeyPair() (*Crypt4ghKeyPair, error) { + publicKeyPath := os.Getenv("C4GH_PUBLIC_KEY") + secretKeyPath := os.Getenv("C4GH_SECRET_KEY") + passphrase := []byte(os.Getenv("C4GH_PASSPHRASE")) + + defaultKeysDir := resolveKeysDir(secretKeyPath) + + // When existing keys cannot be resolved, default to the in-memory + // generated key-pair: + if defaultKeysDir == "" { + return NewKeyPair() + } + + // When file-paths are missing, set default values and attempt to use them: + if secretKeyPath == "" { + secretKeyPath = path.Join(defaultKeysDir, "key") + } + if publicKeyPath == "" { + publicKeyPath = path.Join(defaultKeysDir, "key.pub") + } + + // Load existing key: + if isFile(secretKeyPath) { + return KeyPairFromFiles(publicKeyPath, secretKeyPath, []byte(passphrase)) + } + + // Generate new key-pair and save it to the files: + keyPair, err := NewKeyPair() + if err == nil { + err = keyPair.Save(publicKeyPath, secretKeyPath, passphrase) + } + return keyPair, err +} + +// Attempts to resolve the directory of the keys. +// On failure, it returns an empty string. +// Look up order is following: +// +// 1. When the provided file-path is not empty, use the directory of the key. +// 2. Fall back to .c4gh/ directory in the current work directory +// 3. When user's home-directory can be resolved, fall back to the ~/.c4gh/ +// directory. +// 4. When the directory does not exist and cannot be created, fail by +// returning "". +// +// To summarise the edge-cases: +// 1. If no keys are found, they will be created at ~/.c4gh/key[.pub]. +// 2. When the current directory contains the .c4gh directory then that will +// override the home-directory. +// 3. explicitly provided paths will be always trusted (without explicitly +// checking whether they exist) +func resolveKeysDir(secretKeyPath string) string { + var keysDir string + + if secretKeyPath != "" { + keysDir = path.Dir(secretKeyPath) + } else if isDir(presumedDirName) { + keysDir = presumedDirName + } else { + var errDir error + keysDir, errDir = os.UserHomeDir() + + if errDir == nil { + // Place the keys into a private sub-directory: + keysDir = path.Join(keysDir, presumedDirName) + } else { + keysDir = presumedDirName + } + } + + // Check the directory exists or if it can be created: + directoryExists := isDir(keysDir) + + if !directoryExists { + err := os.MkdirAll(keysDir, 0700) + directoryExists = err == nil + } + + if !directoryExists { + keysDir = "" + } + + return keysDir +} + +// Reports whether the path exists and refers to a directory. +func isDir(path string) bool { + fileInfo, err := os.Stat(path) + return err == nil && fileInfo != nil && fileInfo.IsDir() +} + +// Reports whether the path exists and refers to a regular file. +func isFile(path string) bool { + fileInfo, err := os.Stat(path) + return err == nil && fileInfo != nil && fileInfo.Mode().IsRegular() +} + +func parsePublicKeyFile(path string) ([]byte, error) { + b, err := readKeyFile(path, "PUBLIC") + + if err == nil && len(b) != 32 { + err = fmt.Errorf("The decoded public key has non-expected length: %d (expected 32 bytes)", len(b)) + } + + if err != nil { + err = errors.Join(fmt.Errorf("Failed to parse the public key from file [%s]", path), err) + } + + return b, err +} + +func parseSecretKeyFile(path string, passphrase []byte) ([]byte, error) { + b, err := readKeyFile(path, "PRIVATE") + + if err != nil { + return nil, errors.Join(fmt.Errorf("Failed to parse the secret key from file [%s]", path), err) + } + + return parsePrivateKey(b, passphrase) +} + +func readKeyFile(path, keyType string) ([]byte, error) { + file, err := os.Open(path) + if err != nil { + return nil, err + } + + defer file.Close() + + expectHeader, expectFooter := getKeyFileHeaderFooter(keyType) + + r := bufio.NewReader(file) + + err = checkLine(r, expectHeader) + if err != nil { + return nil, err + } + + line, _, err := r.ReadLine() + if err != nil { + return nil, err + } + + b, err := base64Decoder.DecodeString(string(line)) + if err != nil { + return nil, errors.Join(errors.New("Failed to decode the Base64 string in the Crypt4gh key file"), err) + } + + fmt.Println("Reading Crypt4gh", keyType, "key from file", path) + + return b, checkLine(r, expectFooter) +} + +func saveKeyFile(path, keyType string, data []byte) error { + fmt.Println("Saving Crypt4gh", keyType, "key to file", path) + header, footer := getKeyFileHeaderFooter(keyType) + + content := bytes.NewBufferString(header) + content.WriteString(base64Decoder.EncodeToString(data)) + content.WriteRune('\n') + content.WriteString(footer) + + return os.WriteFile(path, content.Bytes(), 0400) +} + +func getKeyFileHeaderFooter(keyType string) (string, string) { + // keyType should be "PUBLIC" or "PRIVATE" + return "-----BEGIN CRYPT4GH " + keyType + " KEY-----\n", + "-----END CRYPT4GH " + keyType + " KEY-----\n" +} + +// Checks that the next len(line) bytes () match the given "line" string. +// On success, the method returns nil. +func checkLine(r io.Reader, line string) error { + b := make([]byte, len(line)) + + n, err := r.Read(b) + if err != nil { + return err + } + + value := string(b[:n]) + + if value != line { + return fmt.Errorf("Mismatch of expected line: expected [%s] but got [%s]", + strings.TrimRight(line, "\n"), strings.TrimRight(value, "\n")) + } + + return nil +} + +func readBytes(bytes []byte, startPos int) ([]byte, int) { + length := int(bytes[startPos])<<8 | int(bytes[startPos+1]) + start := startPos + 2 + end := start + length + return bytes[start:end], end +} + +func readString(bytes []byte, startPos int) (string, int) { + b, end := readBytes(bytes, startPos) + return string(b), end +} + +func getLengthBytes(l int) []byte { + b := [2]byte{byte(l >> 8), byte(l)} + return b[:] +} + +func parsePrivateKey(payload, passphrase []byte) ([]byte, error) { + pos := len(privateKeyMagic) + magic := string(payload[0:pos]) + + if magic != privateKeyMagic { + return nil, fmt.Errorf("Unexpected magic [%s] (expected: [%s])", + magic, privateKeyMagic) + } + + kdfname, pos := readString(payload, pos) + + if kdfname != "none" && + kdfname != "bcrypt" && + kdfname != "scrypt" && + kdfname != "pbkdf2_hmac_sha256" { + return nil, fmt.Errorf("Unsupported Key Derivation Function [%s] "+ + "(expected [none], [bcrypt], [scrypt], or [pbkdf2_hmac_sha256])", kdfname) + } + + var roundsSalt []byte + if kdfname != "none" { + roundsSalt, pos = readBytes(payload, pos) + } + + ciphername, pos := readString(payload, pos) + + if ciphername != "none" && ciphername != "chacha20_poly1305" { + return nil, fmt.Errorf("Unsupported cipher alorithm for the key "+ + "protection [%s] (expected [none] or [chacha20_poly1305])", + ciphername) + } + + encryptedKey, _ := readBytes(payload, pos) + + return decryptPrivateKey(encryptedKey, passphrase, kdfname, roundsSalt, ciphername) +} + +func decryptPrivateKey( + encryptedKey, passphrase []byte, + kdfname string, + roundsSalt []byte, + ciphername string, +) ([]byte, error) { + + // With these parameters, the key is actually not encrypted: + if kdfname == "none" && ciphername == "none" { + return encryptedKey, nil + } + + if kdfname == "none" || ciphername == "none" { + return nil, fmt.Errorf("Unexpected key encryption information: "+ + "kdfname=%s, ciphername=%s", kdfname, ciphername) + } + + if len(passphrase) == 0 { + return nil, errors.New("The secret key is encrypted but no passphrase was provided") + } + + rounds := int(roundsSalt[0])<<24 | int(roundsSalt[1])<<16 | int(roundsSalt[2])<<8 | int(roundsSalt[3]) + salt := roundsSalt[4:] + keySize := chacha20poly1305.KeySize + + var err error + + // Deriving a key from passphrase. + // Parameters inspired by https://github.com/EGA-archive/crypt4gh/blob/master/crypt4gh/keys/kdf.py + switch kdfname { + case "bcrypt": + passphrase, err = bcrypt.GenerateFromPassword(passphrase, rounds) + case "scrypt": + passphrase, err = scrypt.Key(passphrase, salt, 1<<14, 8, 1, keySize) + case "pbkdf2_hmac_sha256": + passphrase = pbkdf2.Key(passphrase, salt, rounds, keySize, crypto.SHA256.New) + default: + err = errors.New("Given KDF is not supported") + } + + if err != nil { + return nil, errors.Join(fmt.Errorf("Failed to derive a key using the "+ + "provided passphrase and function [%s]", kdfname), err) + } + + // Starting to decrypt the key. + dataKey, err := chacha20poly1305.New(passphrase) + if err != nil { + return nil, err + } + + nonce := encryptedKey[0:chacha20poly1305.NonceSize] + ciphertext := encryptedKey[chacha20poly1305.NonceSize:] + + decryptedKey, err := dataKey.Open(nil, nonce, ciphertext, nil) + if err != nil { + err = errors.Join(errors.New("Failed to decrypt the secret key (wrong passphrase)"), err) + } + + return decryptedKey, err +} + +func encodePrivateKey(key, passphrase []byte) ([]byte, error) { + key, kdfname, roundsSalt, ciphername, err := encryptPrivateKey(key, passphrase) + if err != nil { + return nil, err + } + + content := bytes.NewBufferString(privateKeyMagic) + + content.Write(getLengthBytes(len(kdfname))) + content.Write([]byte(kdfname)) + + if kdfname != "none" { + content.Write(getLengthBytes(len(roundsSalt))) + content.Write(roundsSalt) + } + + content.Write(getLengthBytes(len(ciphername))) + content.Write([]byte(ciphername)) + + content.Write(getLengthBytes(len(key))) + content.Write(key) + + return content.Bytes(), nil +} + +func encryptPrivateKey(key, passphrase []byte) ( + encryptedKey []byte, + kdfname string, + roundsSalt []byte, + ciphername string, + err error, +) { + if len(passphrase) == 0 { + return key, "none", nil, "none", nil + } + + salt := make([]byte, 16) + if _, err = rand.Reader.Read(salt); err != nil { + return nil, "", nil, "", err + } + + // Derive a key from the passphrase: + derivedKey, err := scrypt.Key(passphrase, salt, 1<<14, 8, 1, chacha20poly1305.KeySize) + if err != nil { + return nil, "", nil, "", err + } + + // Initialise chacha20poly1305 from derived key for symmetric encryption: + aead, err := chacha20poly1305.New(derivedKey) + if err != nil { + return nil, "", nil, "", err + } + + // Initialise encrypted key with a random nonce, and leave capacity for the ciphertext. + encryptedKey = make([]byte, aead.NonceSize(), aead.NonceSize()+len(key)+aead.Overhead()) + if _, err := rand.Read(encryptedKey); err != nil { + return nil, "", nil, "", err + } + + // Encrypt the key: + encryptedKey = aead.Seal(encryptedKey, encryptedKey, key, nil) + + // Prepare rounds+salt byte-array for the private key file: + roundsSalt = make([]byte, 4+len(salt)) + copy(roundsSalt[4:], salt) // First 4 bytes ("rounds") remain all zeros. + + return encryptedKey, "scrypt", roundsSalt, "chacha20_poly1305", nil +} diff --git a/storage/crypt4gh/keys_test.go b/storage/crypt4gh/keys_test.go new file mode 100644 index 00000000..06529e20 --- /dev/null +++ b/storage/crypt4gh/keys_test.go @@ -0,0 +1,80 @@ +package crypt4gh + +import ( + "os" + "testing" +) + +const encodedPublicKey = "LS0tLS1CRUdJTiBDUllQVDRHSCBQVUJMSUMgS0VZLS0tLS0KQ3N4Mk5KNVhicTJuM3Q4dWdSeTJabGRLYXRoWDRLa0haZ1dzcGhuTTlSbz0KLS0tLS1FTkQgQ1JZUFQ0R0ggUFVCTElDIEtFWS0tLS0tCg==" + +func TestLoadingEncryptedKeys(t *testing.T) { + os.Setenv("C4GH_PUBLIC_KEY", "testdata/keys/key.pub") + os.Setenv("C4GH_SECRET_KEY", "testdata/keys/key.encrypted.sec") + os.Setenv("C4GH_PASSPHRASE", "abcDEFghi") + + c4gh, err := ResolveKeyPair() + + if err != nil { + t.Error("Could not load encrypted Crypt4gh key-pair", err) + } else if len(c4gh.publicKey) == 0 { + t.Error("Loaded Crypt4gh public key is empty", err) + } else if len(c4gh.secretKey) == 0 { + t.Error("Loaded Crypt4gh secret key is empty", err) + } else if c4gh.EncodePublicKeyBase64() != encodedPublicKey { + t.Error("Unexpected BASE64-encoded public key:", c4gh.EncodePublicKeyBase64()) + } +} + +func TestLoadingNonEncryptedKeys(t *testing.T) { + os.Setenv("C4GH_PUBLIC_KEY", "testdata/keys/key.pub") + os.Setenv("C4GH_SECRET_KEY", "testdata/keys/key.plain.sec") + os.Setenv("C4GH_PASSPHRASE", "to-be-ignored") + + c4gh, err := ResolveKeyPair() + + if err != nil { + t.Error("Could not load plain-text Crypt4gh key-pair", err) + } else if len(c4gh.publicKey) == 0 { + t.Error("Loaded Crypt4gh public key is empty", err) + } else if len(c4gh.secretKey) == 0 { + t.Error("Loaded Crypt4gh secret key is empty", err) + } else if c4gh.EncodePublicKeyBase64() != encodedPublicKey { + t.Error("Unexpected BASE64-encoded public key:", c4gh.EncodePublicKeyBase64()) + } +} + +func TestGeneratingAndSavingNewKeys(t *testing.T) { + c4gh, err := NewKeyPair() + + if err != nil { + t.Error("Could not generate a Crypt4gh key-pair", err) + } else if len(c4gh.publicKey) == 0 { + t.Error("Generated Crypt4gh public key is empty", err) + } else if len(c4gh.secretKey) == 0 { + t.Error("Generated Crypt4gh secret key is empty", err) + } + + pubPath := "tmp_key.pub" + secPath := "tmp_key.sec" + + defer os.Remove(pubPath) + defer os.Remove(secPath) + + err = c4gh.Save(pubPath, secPath, nil) + if err != nil { + t.Error("Could not generate a Crypt4gh key-pair", err) + } + + os.Remove(pubPath) + os.Remove(secPath) + + err = c4gh.Save(pubPath, secPath, []byte("abcDEFghi")) + if err != nil { + t.Error("Could not generate a Crypt4gh key-pair", err) + } + + _, err = KeyPairFromFiles(pubPath, secPath, []byte("abcDEFghi")) + if err != nil { + t.Error("Could not reload saved Crypt4gh key-pair", err) + } +} diff --git a/storage/crypt4gh/testdata/keys/key.encrypted.sec b/storage/crypt4gh/testdata/keys/key.encrypted.sec new file mode 100644 index 00000000..10c7e155 --- /dev/null +++ b/storage/crypt4gh/testdata/keys/key.encrypted.sec @@ -0,0 +1,3 @@ +-----BEGIN CRYPT4GH PRIVATE KEY----- +YzRnaC12MQAGc2NyeXB0ABQAAAAAU4X3nZrf7h2o2xHZMhzX6gARY2hhY2hhMjBfcG9seTEzMDUAPLd/CeqXqD1uduHW0Vgb4GPfR3NUNDn1462ViQg/O5OrAI2ailLIy5l4RC3C6z8ExUDBn2khE+V2czP+jQ== +-----END CRYPT4GH PRIVATE KEY----- diff --git a/storage/crypt4gh/testdata/keys/key.plain.sec b/storage/crypt4gh/testdata/keys/key.plain.sec new file mode 100644 index 00000000..f601ac57 --- /dev/null +++ b/storage/crypt4gh/testdata/keys/key.plain.sec @@ -0,0 +1,3 @@ +-----BEGIN CRYPT4GH PRIVATE KEY----- +YzRnaC12MQAEbm9uZQAEbm9uZQAgEJtp37bKxdvJKiEa1vwda1fJ82UP1bCmjsA/zQJRcUY= +-----END CRYPT4GH PRIVATE KEY----- diff --git a/storage/crypt4gh/testdata/keys/key.pub b/storage/crypt4gh/testdata/keys/key.pub new file mode 100644 index 00000000..3f93c217 --- /dev/null +++ b/storage/crypt4gh/testdata/keys/key.pub @@ -0,0 +1,3 @@ +-----BEGIN CRYPT4GH PUBLIC KEY----- +Csx2NJ5Xbq2n3t8ugRy2ZldKathX4KkHZgWsphnM9Ro= +-----END CRYPT4GH PUBLIC KEY----- diff --git a/storage/htsget.go b/storage/htsget.go index aee94f54..1d975f79 100644 --- a/storage/htsget.go +++ b/storage/htsget.go @@ -1,74 +1,68 @@ package storage import ( - "bytes" "context" - "encoding/base64" - "encoding/json" "fmt" - "io" "os" - "os/exec" "strings" + "time" "github.com/ohsu-comp-bio/funnel/config" + "github.com/ohsu-comp-bio/funnel/storage/htsget" ) const ( - protocol = "htsget://" - protocolBearer = protocol + "bearer:" - privateKeyFile = ".private.key" - publicKeyFile = ".public.key" + protocolPrefix = "htsget://" + protocolBearer = protocolPrefix + "bearer:" ) -// HTSGET provides read access to public URLs. -// -// Note that it relies on following programs to be installed and available in -// the system PATH: -// -// - "htsget" (client implementation of the protocol) -// - "crypt4gh" (to support "*.c4gh" encrypted resources) -// - "crypt4gh-keygen" (to generate private and public keys) -// -// For more info about the programs: -// - https://htsget.readthedocs.io/en/latest/ -// - https://crypt4gh.readthedocs.io/en/latest/ +// HTSGET provides read-access to public URLs. +// It is a client implementation based on the specification +// http://samtools.github.io/hts-specs/htsget.html +// HTSGET URLs need to provided in Funnel tasks as +// `htsget://[bearer:token@]host/path/to/api/{reads|variants}/resource-id` +// Where a Bearer token can be optionally specified to forward JWT credentials. type HTSGET struct { conf config.HTSGETStorage } -// NewHTSGET creates a new HTSGET instance. +// NewHTSGET creates a new HTSGET instance based on the provided configuration. func NewHTSGET(conf config.HTSGETStorage) (*HTSGET, error) { return &HTSGET{conf: conf}, nil } -// Join a directory URL with a subpath. +// Join a directory URL with a subpath. Not supported with HTSGET. func (b *HTSGET) Join(url, path string) (string, error) { - return "", nil + return "", fmt.Errorf("htsgetStorage: Join operation is not supported") } -// Stat returns information about the object at the given storage URL. +// Stat returns information about the object at the given storage URL. Not supported with HTSGET. func (b *HTSGET) Stat(ctx context.Context, url string) (*Object, error) { - return nil, nil + return nil, fmt.Errorf("htsgetStorage: Stat operation is not supported") } -// List a directory. Calling List on a File is an error. +// List a directory. Calling List on a File is an error. Not supported with HTSGET. func (b *HTSGET) List(ctx context.Context, url string) ([]*Object, error) { - return nil, nil + return nil, fmt.Errorf("htsgetStorage: List operation is not supported") } +// Not supported with HTSGET. func (b *HTSGET) Put(ctx context.Context, url, path string) (*Object, error) { - return nil, nil + return nil, fmt.Errorf("htsgetStorage: Put operation is not supported") } -// Get copies a file from a given URL to the host path. +// Get operation copies a file from a given URL to the host path. // // If configuration specifies sending a public key, the received content will // be also decrypted locally before writing to the file. func (b *HTSGET) Get(ctx context.Context, url, path string) (*Object, error) { - htsgetArgs := htsgetArgs(url, b.conf.Protocol, b.conf.SendPublicKey) - cmd1, cmd2 := htsgetCmds(htsgetArgs, b.conf.SendPublicKey) - cmdPipe(cmd1, cmd2, path) + httpsUrl, token := htsgetUrl(url, b.conf.Protocol) + + client := htsget.NewHtsgetClient(httpsUrl, token, time.Duration(b.conf.Timeout)) + err := client.DownloadTo(path) + if err != nil { + return nil, err + } // Check that the destination file exists: info, err := os.Stat(path) @@ -78,9 +72,9 @@ func (b *HTSGET) Get(ctx context.Context, url, path string) (*Object, error) { return &Object{ URL: url, + Name: path, Size: info.Size(), LastModified: info.ModTime(), - Name: path, }, nil } @@ -101,7 +95,7 @@ func (b *HTSGET) UnsupportedOperations(url string) UnsupportedOperations { } func (b *HTSGET) supportsPrefix(url string) error { - if !strings.HasPrefix(url, protocol) { + if !strings.HasPrefix(url, protocolPrefix) { return &ErrUnsupportedProtocol{"htsgetStorage"} } return nil @@ -112,7 +106,7 @@ func htsgetUrl(url, useProtocol string) (updatedUrl string, token string) { useProtocol = "https" } useProtocol += "://" - updatedUrl = strings.Replace(url, protocol, useProtocol, 1) + updatedUrl = strings.Replace(url, protocolPrefix, useProtocol, 1) // Optional info: parse the "token" from "htsget://bearer:token@host..." if strings.HasPrefix(url, protocolBearer) { @@ -125,162 +119,5 @@ func htsgetUrl(url, useProtocol string) (updatedUrl string, token string) { } } - return -} - -func htsgetHeaderJson() string { - ensureKeyFiles() - - b, err := os.ReadFile(publicKeyFile) - if err != nil { - fmt.Println("[ERROR] Could not read", publicKeyFile, - "file, which should exist:", err) - panic(1) - } - - // HTTP headers to be encoded as JSON: - headers := make(map[string]string) - headers["client-public-key"] = base64.StdEncoding.EncodeToString(b) - - headersJson, err := json.Marshal(&headers) - if err != nil { - fmt.Println("[ERROR] Failed to format JSON-header for passing", - "client-public-key:", err) - panic(1) - } - - return string(headersJson) -} - -func htsgetArgs(url, useProtocol string, decrypt bool) []string { - httpsUrl, token := htsgetUrl(url, useProtocol) - cmdArgs := make([]string, 0) - - if len(token) > 0 { - cmdArgs = append(cmdArgs, "--bearer-token", token) - } - - if decrypt { - cmdArgs = append(cmdArgs, "--headers", htsgetHeaderJson()) - } - - cmdArgs = append(cmdArgs, httpsUrl) - return cmdArgs -} - -func htsgetCmds(htsgetArgs []string, decrypt bool) (cmd1, cmd2 *exec.Cmd) { - cmd1 = exec.Command("htsget", htsgetArgs...) - - if decrypt { - cmd2 = exec.Command("crypt4gh", "decrypt", "--sk", privateKeyFile) - } else { - cmd2 = exec.Command("cat") - } - - return -} - -func ensureKeyFiles() { - files := []string{publicKeyFile, privateKeyFile} - filesExist := true - - for i := range files { - if file, err := os.Open(files[i]); err == nil { - file.Close() - } else { - filesExist = false - break - } - } - - if !filesExist { - err := runCmd("crypt4gh-keygen", "-f", "--nocrypt", - "--sk", privateKeyFile, "--pk", publicKeyFile) - if err != nil { - fmt.Println("Could not generate crypt4gh key-files:", err) - panic(1) - } else { - fmt.Println("[INFO] Generated crypt4gh key-pair.") - } - } -} - -func runCmd(commandName string, commandArgs ...string) error { - cmd := exec.Command(commandName, commandArgs...) - - var stdout bytes.Buffer - var stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err := cmd.Run() - if err != nil { - err = fmt.Errorf("Error running command %s: %v\nSTDOUT: %s\nSTDERR: %s", - commandName, err, stdout.String(), stderr.String()) - } - return err -} - -func cmdFailed(cmd *exec.Cmd, stderr *bytes.Buffer) bool { - fmt.Println("Waiting for ", cmd.Path) - if err := cmd.Wait(); err != nil { - fmt.Printf("[ERROR] `%s` command failed: %v\n", cmd.Path, err) - if stderr.Len() > 0 { - fmt.Println("Output from STDERR:") - fmt.Print(stderr.String()) - } - return true - } else { - fmt.Println("Waiting done ") - return false - } -} - -func cmdPipe(cmd1, cmd2 *exec.Cmd, destFilePath string) { - fw, err := os.Create(destFilePath) - if err != nil { - fmt.Println("[ERROR] Failed to create file for saving content:", destFilePath, err) - return - } - defer fw.Close() - - // Output from cmd1 goes to cmd2, and output from cmd2 goes to the file. - stderr1 := new(bytes.Buffer) - stderr2 := new(bytes.Buffer) - r, w := io.Pipe() - - cmd1.Stdout = w - cmd1.Stderr = stderr1 - - cmd2.Stdin = r - cmd2.Stdout = fw - cmd2.Stderr = stderr2 - - if err := cmd1.Start(); err != nil { - fmt.Printf("[ERROR] failed to run `%s` command: %v", cmd1.Path, err) - return - } - - if err := cmd2.Start(); err != nil { - fmt.Printf("[ERROR] failed to run `%s` command: %v", cmd2.Path, err) - return - } - - fmt.Println("cmd1:", cmd1.String()) - fmt.Println("cmd2:", cmd2.String()) - fmt.Println("dest:", destFilePath) - - if cmdFailed(cmd1, stderr1) { - fw.Close() - os.Remove(destFilePath) - } - - w.Close() - - if cmdFailed(cmd2, stderr2) { - fw.Close() - os.Remove(destFilePath) - } - - r.Close() + return updatedUrl, token } diff --git a/storage/htsget/client.go b/storage/htsget/client.go new file mode 100644 index 00000000..f346113f --- /dev/null +++ b/storage/htsget/client.go @@ -0,0 +1,241 @@ +package htsget + +import ( + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "strings" + "time" + + "github.com/ohsu-comp-bio/funnel/storage/crypt4gh" +) + +// The main struct for holding the data of an HTSGET client instance +type HtsgetClient struct { + Timeout time.Duration + Url string + authorization string + keyPair *crypt4gh.Crypt4ghKeyPair +} + +// JSON struct (HTSGET response) for holding an item to be fetched +type HtsgetUrl struct { + Url string `json:"url"` + Headers map[string]string `json:"headers"` + DataClass string `json:"class"` +} + +// JSON struct (HTSGET response) for holding items to be fetched +type HtsgetFileInfo struct { + Format string `json:"format"` + Urls []HtsgetUrl `json:"urls"` +} + +// Main JSON struct (HTSGET response) +type HtsgetResponse struct { + FileInfo HtsgetFileInfo `json:"htsget"` +} + +// Returns a new HTSGET client for fetching an HTSGET resource. +// Optionally, a value can be provided for the Authorization header (in the +// HTTP request). A timeout limit (per request) is also expected. +func NewHtsgetClient(url, authorization string, timeout time.Duration) *HtsgetClient { + keys, err := crypt4gh.ResolveKeyPair() + if err != nil { + fmt.Println("[WARN] Minor issue while resolving Crypt4gh key-pair:", err) + } + return &HtsgetClient{timeout, url, authorization, keys} +} + +// Downloads the HTSGET resource (specified when the client was created) to the +// specified local file path. This method ensures that the data gets copied to +// the specified file, or it returns an error to indicate a failure. +func (hc *HtsgetClient) DownloadTo(destFile string) error { + fileInfo, err := hc.fetchHtsgetFileInfo() + if err != nil { + return err + } + + tempFile, err := fileInfo.downloadPartsToTempFile(hc.Timeout) + if err != nil { + return err + } + + if crypt4gh.IsCrypt4ghFile(tempFile) { + return hc.decryptFile(tempFile, destFile) + } + + err = os.Rename(tempFile, destFile) + if err != nil { + return errors.Join(errors.New("Cannot move the downloaded file to target file-path"), err) + } + return nil +} + +// Performs the initial HTSGET request and returns the extracted JSON. +func (hc *HtsgetClient) fetchHtsgetFileInfo() (*HtsgetFileInfo, error) { + ctx, cancel := context.WithTimeout(context.Background(), hc.Timeout) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, "GET", hc.Url, nil) + if err != nil { + return nil, err + } + + if hc.authorization != "" { + req.Header.Add("Authorization", hc.authorization) + } + + req.Header.Add("client-public-key", hc.keyPair.EncodePublicKeyBase64()) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + + contentType := resp.Header.Get("Content-Type") + if resp.StatusCode != 200 && contentType != "application/json" { + return nil, fmt.Errorf("Bad response from HTSGET service: "+ + "HTTP [%d] content-type [%s]", resp.StatusCode, contentType) + } + + var parsedJson HtsgetResponse + err = json.NewDecoder(resp.Body).Decode(&parsedJson) + if err != nil { + return nil, err + } + + if len(parsedJson.FileInfo.Urls) == 0 { + return nil, errors.New("Bad JSON from the HTSGET service: expected a " + + "JSON object with the 'htsget.urls' array with at least one URL") + } + + return &parsedJson.FileInfo, nil +} + +// Decrypts (Crypt4gh) the temporaray file to the final file path. +// Does not remove the temporary file. +func (hc *HtsgetClient) decryptFile(tempFile, destFile string) error { + defer os.Remove(tempFile) + + tempStream, err := os.Open(tempFile) + if err != nil { + return errors.Join(fmt.Errorf("Failed to read the downloaded file: %s", tempFile), err) + } + + defer tempStream.Close() + + destStream, err := os.OpenFile(destFile, os.O_WRONLY, 0400) + if err != nil { + return errors.Join(fmt.Errorf("Failed to write to the target file: %s", destFile), err) + } + + defer destStream.Close() + + decryptedStrem, err := hc.keyPair.Decrypt(tempStream) + if err != nil { + return err + } + + _, err = io.Copy(destStream, decryptedStrem) + return err +} + +// Downloads parts of the HTSGET resource to a temporary file. +func (fi *HtsgetFileInfo) downloadPartsToTempFile(timeout time.Duration) (string, error) { + file, err := os.CreateTemp("", "htsget.partial") + if err != nil { + return "", err + } + defer file.Close() + + for i := range fi.Urls { + err := fi.Urls[i].copyTo(file, timeout) + + if err != nil { + return "", errors.Join( + fmt.Errorf("Failed to retrieve HTSGET file part %d/%d", i+1, + len(fi.Urls)), err) + } + } + + return file.Name(), nil +} + +// Downloads or copies the current part of data to the specified file-writer. +func (hu *HtsgetUrl) copyTo(dst io.Writer, timeout time.Duration) error { + if strings.HasPrefix(hu.Url, "data:") { + return hu.copyFromData(dst) + } + + if strings.HasPrefix(hu.Url, "https:") || strings.HasPrefix(hu.Url, "http:") { + return hu.copyFromHttp(dst, timeout) + } + + return fmt.Errorf("Unsupported HTSGET URL: [%s]", hu.Url) +} + +// Decodes the current (BASE64) part of data to the specified file-writer. +func (hu *HtsgetUrl) copyFromData(dst io.Writer) error { + url := hu.Url + + contentSepPos := strings.Index(url, ",") + if contentSepPos < 0 { + return fmt.Errorf("Received invalid data-URL: [%s...] (comma-separator not found)", url[:20]) + } + + content := url[contentSepPos:] + if len(content) == 0 { + return nil + } + + // Write the content as-is when there is no ";base64": + base64Pos := strings.Index(url, ";base64") + if base64Pos < 0 || base64Pos > contentSepPos { + _, err := dst.Write([]byte(content)) + return err + } + + // The content needs to be decoded from base64: + b, err := base64.StdEncoding.DecodeString(content) + if err != nil { + return err + } + + _, err = dst.Write(b) + return err +} + +// Downloads the current part of data (over HTTP) to the specified file-writer. +func (hu *HtsgetUrl) copyFromHttp(dst io.Writer, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, "GET", hu.Url, nil) + if err != nil { + return err + } + + for key, value := range hu.Headers { + req.Header.Add(key, value) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + + contentType := resp.Header.Get("Content-Type") + if resp.StatusCode != 206 { + return fmt.Errorf("Bad response from HTSGET service while fetching data: "+ + "HTTP [%d] content-type [%s]", resp.StatusCode, contentType) + } + + _, err = io.Copy(dst, resp.Request.Body) + return err +} diff --git a/storage/htsget_test.go b/storage/htsget_test.go new file mode 100644 index 00000000..b3561362 --- /dev/null +++ b/storage/htsget_test.go @@ -0,0 +1,50 @@ +package storage + +import ( + "context" + "testing" + + "github.com/ohsu-comp-bio/funnel/config" +) + +func TestHTSGET(t *testing.T) { + invalidUrl := "https://google.com" + validUrl := "htsget://google.com" + + store, err := NewHTSGET(config.HTSGETStorage{}) + if err != nil { + t.Fatal("Unexpected error while creating an HTSGET backend:", err) + } + + // Wrong protocol results in unsupported operation for each action + ops := store.UnsupportedOperations(invalidUrl) + if ops.Stat == nil || ops.Get == nil || ops.Join == nil || ops.List == nil || ops.Put == nil { + t.Error("Not all operations were denied when an HTTPS URL was specified to HTSGET") + } + + // Correct protocol results in some unsupported operations (except GET) + ops = store.UnsupportedOperations(validUrl) + if ops.Stat == nil || ops.Join == nil || ops.List == nil || ops.Put == nil { + t.Error("Some non-supported operations were permitted for an HTSGET URL") + } else if ops.Get != nil { + t.Error("GET operation was not permitted for an HTSGET URL", err) + } + + // Verifying unsupported operations + + if _, err = store.Stat(context.Background(), validUrl); err == nil { + t.Error("Stat call should have failed for HTSGET") + } + + if _, err = store.Join(validUrl, ""); err == nil { + t.Error("Join call should have failed for HTSGET") + } + + if _, err = store.List(context.Background(), validUrl); err == nil { + t.Error("List call should have failed for HTSGET") + } + + if _, err = store.Put(context.Background(), validUrl, "path"); err == nil { + t.Error("Put call should have failed for HTSGET") + } +} From 0c45c6e0322ccc8b7c3718059f8da89b33bc5184 Mon Sep 17 00:00:00 2001 From: Martti Tamm Date: Thu, 1 Aug 2024 16:37:07 +0300 Subject: [PATCH 03/10] Remove htsget from Docker build --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 09f31f86..2b764550 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,8 +11,7 @@ COPY . . RUN --mount=type=cache,target=/root/.cache/go-build make build # final stage -FROM debian -RUN apt-get update && apt-get install -y --no-install-recommends curl python3-pip && pip3 install htsget --break-system-packages && rm -rf ~/.cache +FROM alpine WORKDIR /opt/funnel VOLUME /opt/funnel/funnel-work-dir EXPOSE 8000 9090 From 0005ab840ce266c0a5d4707a84f9896cf4f381b6 Mon Sep 17 00:00:00 2001 From: Martti Tamm Date: Thu, 1 Aug 2024 16:37:32 +0300 Subject: [PATCH 04/10] Update htsget storage documentation --- website/content/docs/storage/htsget.md | 45 ++++++++++++++------------ 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/website/content/docs/storage/htsget.md b/website/content/docs/storage/htsget.md index e64913aa..59fa2d62 100644 --- a/website/content/docs/storage/htsget.md +++ b/website/content/docs/storage/htsget.md @@ -7,43 +7,46 @@ menu: # Htsget Storage -Funnel supports content-retrieval using [Htsget][spec]-compatible API, if the -host environment has [htsget](htsget-client) and [crypt4gh](crypt4gh) -(including `crypt4gh-keygen`) software installed. -(These programs are not part of Funnel itself.) +Funnel supports content-retrieval from an [Htsget][htsget]-compatible API. +When the received content is encrypted using [Crypt4gh][crypt4gh], Funnel +automatically decrypts the received content (using internally generated +key-pair) so that the executor wouldn't have to. Htsget is a protocol that enables downloading only specific parts of genomic data (reads/variants). The first HTTP query receives a JSON that instructs next HTTP requests for fetching the parts. Finally the parts need to be concatenated (in the order they were specified) into a single valid file (e.g. VCF or BAM). -Note that the htsget storage supports only retrieval and not storing the data! +Note that the Htsget storage supports only retrieval, and not storing the data! -The task input file URL needs to specify `htsget` as the protocol. Funnel -replaces it with the protocol specified in the configuration (default is -`https`). +The task input file URL needs to specify `htsget` as the resource protocol. +Funnel replaces it with the protocol specified in the configuration. The +default protocol is `https`, which is also presumed in the Htsget +specification. For testing purposes, it can be changed to `http`. If the service expects a `Bearer` token, it can be specified in the URL. For example: `htsget://bearer:your-token-here@fakedomain.com/...`. -Here the `bearer:` part is the required syntax to active the `your-token-here` -value to be sent to the htsget-service as a header value: +Here the `bearer:` part is the required syntax to activate the +`your-token-here` value to be sent to the htsget-service as a header value: `Authorization: Bearer your-token-here`. -If the htsget-service expects the client (Funnel) to send its public key -(crypt4gh), the `SendPublicKey` option must be set to `true` in the -configuration. In this scenario, Funnel will generate a local key-pair and -send its public key in the `client-public-key` header value. Htsget-service is -expected to send the content encrypted with the public key, and Funnel will -decrypt the data locally using `crypt4gh`. +Funnel always sends its public key in the header of the request to the Htsget +service. When the Htsget service supports [the content encryption using +Crypt4gh][htsget-crypt4gh], it can generate a custom Crypt4gh file header where +the Funnel instance can decrypt and find the symmetric key used for content +encryption. + +Default Htsget Storage configuration should be sufficient for most cases: ```yaml HTSGETStorage: Disabled: false Protocol: https - SendPublicKey: false + Timeout: 30s ``` ### Example task -``` + +```json { "name": "Hello world", "inputs": [{ @@ -66,6 +69,6 @@ HTSGETStorage: } ``` -[spec]: https://samtools.github.io/hts-specs/htsget.html -[htsget-client]: https://htsget.readthedocs.io/en/latest/ -[crypt4gh]: https://crypt4gh.readthedocs.io/en/latest/ +[htsget]: https://samtools.github.io/hts-specs/htsget.html +[crypt4gh]: http://samtools.github.io/hts-specs/crypt4gh.pdf +[htsget-crypt4gh]: https://github.com/umccr/htsget-rs/blob/crypt4gh/docs/crypt4gh/ARCHITECTURE.md \ No newline at end of file From d279b5d2985bd1ddad52447fb6362a38773c0ce5 Mon Sep 17 00:00:00 2001 From: Martti Tamm Date: Mon, 5 Aug 2024 09:46:27 +0300 Subject: [PATCH 05/10] Fix: Docker dind image tags --- Dockerfile.dind | 2 +- Dockerfile.dind-rootless | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.dind b/Dockerfile.dind index 586bc060..0f3349ea 100644 --- a/Dockerfile.dind +++ b/Dockerfile.dind @@ -11,7 +11,7 @@ COPY . . RUN --mount=type=cache,target=/root/.cache/go-build make build # final stage -FROM docker:stable-dind +FROM docker:dind WORKDIR /opt/funnel VOLUME /opt/funnel/funnel-work-dir EXPOSE 8000 9090 diff --git a/Dockerfile.dind-rootless b/Dockerfile.dind-rootless index 9d4562bd..ea7aee5b 100644 --- a/Dockerfile.dind-rootless +++ b/Dockerfile.dind-rootless @@ -11,7 +11,7 @@ COPY . . RUN --mount=type=cache,target=/root/.cache/go-build make build # final stage -FROM docker:stable-dind-rootless +FROM docker:dind-rootless WORKDIR /opt/funnel VOLUME /opt/funnel/funnel-work-dir EXPOSE 8000 9090 From c8641ea0176b29aea25503d32a7af44e06192722 Mon Sep 17 00:00:00 2001 From: Martti Tamm Date: Mon, 5 Aug 2024 10:47:58 +0300 Subject: [PATCH 06/10] Logging and minor fixes --- storage/crypt4gh/decrypt.go | 55 +++++++++++++++++++------- storage/crypt4gh/keys.go | 6 ++- storage/htsget/client.go | 12 ++++-- website/content/docs/storage/htsget.md | 8 ++-- 4 files changed, 57 insertions(+), 24 deletions(-) diff --git a/storage/crypt4gh/decrypt.go b/storage/crypt4gh/decrypt.go index a6f7dfc8..df26182b 100644 --- a/storage/crypt4gh/decrypt.go +++ b/storage/crypt4gh/decrypt.go @@ -49,13 +49,15 @@ func (c *Crypt4gh) Read(buffer []byte) (n int, err error) { var start, end int for amount > 0 { - c.applyEditListSkip() - for c.dataBlockPos >= len(c.dataBlock) { err = c.decryptDataBlock() if err != nil { - return 0, err + if n > 0 { + return n, nil + } + return n, err } + c.applyEditListSkip() } start, end, amount = c.updateDataBlockRange(amount) @@ -160,9 +162,9 @@ func (c *Crypt4gh) readHeaderPacket() error { } if headerEncryptionMethod != 0 { - fmt.Printf("Unrecognized header packet encryption method value (%d). "+ - "Only 0 (X25519_chacha20_ietf_poly1305) is supported. "+ - "Skipping the header packet.", headerEncryptionMethod) + log.Warn(fmt.Sprintf("Unrecognized header packet encryption method "+ + "value (%d). Only 0 (X25519_chacha20_ietf_poly1305) is supported. "+ + "Skipping the header packet.", headerEncryptionMethod)) return nil } @@ -212,6 +214,8 @@ func (c *Crypt4gh) parseHeaderPayload(payload []byte) { c.warnPacket("ChaCha20-IETF-Poly1305 data-key error: %v", errKey) } else { c.dataKeys = append(c.dataKeys, dataKey) + log.Debug("Successfully received data encryption keys from the " + + "file header") } } else if dataEditList { @@ -224,16 +228,19 @@ func (c *Crypt4gh) parseHeaderPayload(payload []byte) { } if len(c.editListLengths) > 0 { - c.warnPacket("has more than one edit-list (only one permitted)") + c.warnPacket("supplies another edit-list (only one permitted)") return } // Read and store the lengths of the edit list for startPos := 8; numberLengths > 0; numberLengths-- { - c.editListLengths = append(c.editListLengths, readInt64(payload[startPos:startPos+8])) + c.editListLengths = append(c.editListLengths, + readInt64(payload[startPos:startPos+8])) startPos += 8 } + log.Debug(fmt.Sprintf("Header defines an edit-list: %v", c.editListLengths)) + // The first length is about skipping a number of bytes c.editListSkip = true @@ -272,7 +279,7 @@ func (c *Crypt4gh) decryptPacketPayload(encryptedPayloadWithMac, writerPublicKey plaintext, errOpen := aead.Open(nil, nonce, encryptedPayloadWithMac, nil) if errOpen != nil { - // c.warnPacket("ChaCha20-IETF-Poly1305 deciphering error : %v", errOpen) + c.warnPacket("ChaCha20-IETF-Poly1305 deciphering error : %v", errOpen) return nil // This error and the packet payload must be ignored } @@ -298,11 +305,24 @@ func (c *Crypt4gh) decryptDataBlock() error { if errOpen == nil { c.dataBlock = plaintext c.dataBlockPos = 0 + log.Debug("Successfully decrypted a data block", + "data_block_number", c.dataBlockCount, + ) return nil + } else { + log.Warn("Failed to decrypt a data block with a key", + "data_block_number", c.dataBlockCount, + "tried_key_number", i+1, + "keys_count", len(c.dataKeys), + ) } } - fmt.Println("[WARN] Didn't find a suitable key for deciphering a data block.") + log.Warn("Failed to decrypt a data block (tried all keys)", + "data_block_number", c.dataBlockCount, + "keys_count", len(c.dataKeys), + ) + return nil } @@ -314,12 +334,16 @@ func (c *Crypt4gh) applyEditListSkip() { remainingAmount := uint64(len(c.dataBlock) - c.dataBlockPos) skipAmount := &c.editListLengths[0] - if *skipAmount < remainingAmount { + if remainingAmount == 0 { + return + } + + if *skipAmount <= remainingAmount { c.dataBlockPos += int(*skipAmount) *skipAmount = 0 } else { + c.dataBlockPos += int(remainingAmount) *skipAmount -= remainingAmount - c.dataBlockPos = len(c.dataBlock) } if *skipAmount == 0 { @@ -339,7 +363,7 @@ func (c *Crypt4gh) applyEditListKeep(amount, start, end int) (remainingAmount, u keepAmount := &c.editListLengths[0] - if *keepAmount < uint64(rangeLength) { + if *keepAmount <= uint64(rangeLength) { rangeLength = int(*keepAmount) updatedEnd = start + rangeLength remainingAmount = amount - rangeLength @@ -350,16 +374,17 @@ func (c *Crypt4gh) applyEditListKeep(amount, start, end int) (remainingAmount, u if *keepAmount == 0 { c.editListLengths = c.editListLengths[1:] c.editListSkip = true + c.applyEditListSkip() } return } func (c *Crypt4gh) warnPacket(msg string, args ...any) { - fmt.Printf("[WARN] Header packet [%d/%d] %s\n", + log.Warn(fmt.Sprintf("Header packet [%d/%d] %s\n", c.headerPacketProcessed, c.headerPacketCount, - fmt.Sprintf(msg, args...)) + fmt.Sprintf(msg, args...))) } func (c *Crypt4gh) readBytes(count uint) ([]byte, error) { diff --git a/storage/crypt4gh/keys.go b/storage/crypt4gh/keys.go index 8bc3fa60..2bbab369 100644 --- a/storage/crypt4gh/keys.go +++ b/storage/crypt4gh/keys.go @@ -14,6 +14,7 @@ import ( "path" "strings" + "github.com/ohsu-comp-bio/funnel/logger" "golang.org/x/crypto/bcrypt" "golang.org/x/crypto/chacha20poly1305" "golang.org/x/crypto/pbkdf2" @@ -24,6 +25,7 @@ const privateKeyMagic = "c4gh-v1" const presumedDirName = ".c4gh" var base64Decoder *base64.Encoding = base64.StdEncoding.WithPadding(base64.StdPadding) +var log = logger.NewLogger("crypt4gh", logger.DefaultConfig()) type Crypt4ghKeyPair struct { publicKey []byte @@ -287,13 +289,13 @@ func readKeyFile(path, keyType string) ([]byte, error) { return nil, errors.Join(errors.New("Failed to decode the Base64 string in the Crypt4gh key file"), err) } - fmt.Println("Reading Crypt4gh", keyType, "key from file", path) + log.Debug(fmt.Sprint("Reading Crypt4gh", keyType, "key from file", path)) return b, checkLine(r, expectFooter) } func saveKeyFile(path, keyType string, data []byte) error { - fmt.Println("Saving Crypt4gh", keyType, "key to file", path) + log.Debug(fmt.Sprint("Saving Crypt4gh", keyType, "key to file", path)) header, footer := getKeyFileHeaderFooter(keyType) content := bytes.NewBufferString(header) diff --git a/storage/htsget/client.go b/storage/htsget/client.go index f346113f..6722ea60 100644 --- a/storage/htsget/client.go +++ b/storage/htsget/client.go @@ -9,6 +9,7 @@ import ( "io" "net/http" "os" + "path" "strings" "time" @@ -66,6 +67,11 @@ func (hc *HtsgetClient) DownloadTo(destFile string) error { return err } + err = os.MkdirAll(path.Dir(destFile), 0700) + if err != nil { + return err + } + if crypt4gh.IsCrypt4ghFile(tempFile) { return hc.decryptFile(tempFile, destFile) } @@ -130,7 +136,7 @@ func (hc *HtsgetClient) decryptFile(tempFile, destFile string) error { defer tempStream.Close() - destStream, err := os.OpenFile(destFile, os.O_WRONLY, 0400) + destStream, err := os.Create(destFile) if err != nil { return errors.Join(fmt.Errorf("Failed to write to the target file: %s", destFile), err) } @@ -189,7 +195,7 @@ func (hu *HtsgetUrl) copyFromData(dst io.Writer) error { return fmt.Errorf("Received invalid data-URL: [%s...] (comma-separator not found)", url[:20]) } - content := url[contentSepPos:] + content := url[contentSepPos+1:] if len(content) == 0 { return nil } @@ -236,6 +242,6 @@ func (hu *HtsgetUrl) copyFromHttp(dst io.Writer, timeout time.Duration) error { "HTTP [%d] content-type [%s]", resp.StatusCode, contentType) } - _, err = io.Copy(dst, resp.Request.Body) + _, err = io.Copy(dst, resp.Body) return err } diff --git a/website/content/docs/storage/htsget.md b/website/content/docs/storage/htsget.md index 59fa2d62..6160fcb7 100644 --- a/website/content/docs/storage/htsget.md +++ b/website/content/docs/storage/htsget.md @@ -50,12 +50,12 @@ HTSGETStorage: { "name": "Hello world", "inputs": [{ - "url": "htsget://fakedomain.com/variants/genome2341?referenceName=1&start=10000&end=20000", + "url": "htsget://htsget-server/variants/genome2341?referenceName=1&start=10000&end=20000", "path": "/inputs/genome.vcf.gz" }], "outputs": [{ - "url": "file:///path/to/funnel-data/output.txt", - "path": "/outputs/out.txt" + "url": "file:///results/line_count.txt", + "path": "/outputs/line_count.txt" }], "executors": [{ "image": "alpine", @@ -64,7 +64,7 @@ HTSGETStorage: "-c", "zcat /inputs/genome.vcf.gz | wc -l" ], - "stdout": "/outputs/out.txt", + "stdout": "/outputs/line_count.txt" }] } ``` From d740d999cde4e5836e6e203225b7a23329628b11 Mon Sep 17 00:00:00 2001 From: Martti Tamm Date: Mon, 5 Aug 2024 14:50:04 +0300 Subject: [PATCH 07/10] Fix command completed log message --- worker/docker.go | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/worker/docker.go b/worker/docker.go index 3a5d375c..bfc2bed1 100644 --- a/worker/docker.go +++ b/worker/docker.go @@ -14,7 +14,7 @@ import ( type DockerCommand struct { ContainerName string RemoveContainer bool - Command + Command } // Run runs the Docker command and blocks until done. @@ -31,7 +31,7 @@ func (dcmd DockerCommand) Run(ctx context.Context) error { dcmd.Event.Error("failed to pull docker image", err) } - args := []string{"run", "-i", "--read-only"} + args := []string{"docker", "run", "-i", "--read-only"} if dcmd.RemoveContainer { args = append(args, "--rm") @@ -60,8 +60,11 @@ func (dcmd DockerCommand) Run(ctx context.Context) error { args = append(args, dcmd.ShellCommand...) // Roughly: `docker run --rm -i --read-only -w [workdir] -v [bindings] [imageName] [cmd]` - dcmd.Event.Info("Running command", "cmd", "docker "+strings.Join(args, " ")) - cmd := exec.Command("docker", args...) + + cmdStr := strings.Join(args, " ") + + dcmd.Event.Info("Running command", "cmd", cmdStr) + cmd := exec.Command(args[0], args[1:]...) if dcmd.Stdin != nil { cmd.Stdin = dcmd.Stdin @@ -74,7 +77,12 @@ func (dcmd DockerCommand) Run(ctx context.Context) error { } go dcmd.inspectContainer(ctx) out := cmd.Run() - dcmd.Event.Info("Command %s Complete exit=%s", strings.Join(args, " "), out) + + dcmd.Event.Info("Command completed", + "cmd", cmdStr, + "err", out, + "exit", cmd.ProcessState.ExitCode()) + return out } From 5097d81c74bd1c4c575ba20f2f2248f45de53395 Mon Sep 17 00:00:00 2001 From: Martti Tamm Date: Tue, 6 Aug 2024 16:15:08 +0300 Subject: [PATCH 08/10] Add Crypt4gh content decryption tests --- storage/crypt4gh/decrypt.go | 109 ++++++++-------- storage/crypt4gh/decrypt_test.go | 123 ++++++++++++++++++ storage/crypt4gh/keys_test.go | 8 +- .../testdata/{keys => }/key.encrypted.sec | 0 .../testdata/{keys => }/key.plain.sec | 0 storage/crypt4gh/testdata/{keys => }/key.pub | 0 6 files changed, 183 insertions(+), 57 deletions(-) create mode 100644 storage/crypt4gh/decrypt_test.go rename storage/crypt4gh/testdata/{keys => }/key.encrypted.sec (100%) rename storage/crypt4gh/testdata/{keys => }/key.plain.sec (100%) rename storage/crypt4gh/testdata/{keys => }/key.pub (100%) diff --git a/storage/crypt4gh/decrypt.go b/storage/crypt4gh/decrypt.go index df26182b..c4dd3af3 100644 --- a/storage/crypt4gh/decrypt.go +++ b/storage/crypt4gh/decrypt.go @@ -25,8 +25,8 @@ type Crypt4gh struct { editListSkip bool } -// Reads the magic-number at the beginning of the file to check if the file -// might be considered a Crypt4gh file. +// Reads the magic-number and the version number at the beginning of the file +// to check if the file might be considered to be a supported Crypt4gh file. func IsCrypt4ghFile(path string) bool { file, err := os.Open(path) if err != nil { @@ -35,54 +35,56 @@ func IsCrypt4ghFile(path string) bool { defer file.Close() - magicBuffer := make([]byte, 8) - _, err = file.Read(magicBuffer) - return err == nil && string(magicBuffer) == "crypt4gh" + buffer := make([]byte, 12) + _, err = file.Read(buffer) + + return err == nil && + string(buffer[0:8]) == "crypt4gh" && + readInt32(buffer[8:12]) == 1 } func (c *Crypt4gh) Read(buffer []byte) (n int, err error) { - amount := len(buffer) - if amount < 1 { - return 0, errors.New("Provided byte buffer has no space") - } - - var start, end int + addedCount := 0 - for amount > 0 { - for c.dataBlockPos >= len(c.dataBlock) { - err = c.decryptDataBlock() - if err != nil { - if n > 0 { - return n, nil - } - return n, err - } - c.applyEditListSkip() - } + for n < len(buffer) && err == nil { + addedCount, err = c.copyTo(buffer[n:]) + n += addedCount + } - start, end, amount = c.updateDataBlockRange(amount) - copy(buffer, c.dataBlock[start:end]) - n += end - start + if n > 0 { + return n, nil } return n, err } -func (c *Crypt4gh) updateDataBlockRange(amount int) (int, int, int) { - // We can assume here that c.dataBlockPos < len(c.dataBlock) - start := c.dataBlockPos - end := start + amount - - if end > len(c.dataBlock) { - end = len(c.dataBlock) +func (c *Crypt4gh) copyTo(buffer []byte) (int, error) { + err := c.loadDataBlock() + if err != nil { + return 0, err } - amount, end = c.applyEditListKeep(amount, start, end) + start, end, amount := c.getAvailableRange(len(buffer)) + + copy(buffer, c.dataBlock[start:end]) - // Already updates the position though reading has not been performed yet: c.dataBlockPos = end - return start, end, amount + return amount, nil +} + +func (c *Crypt4gh) loadDataBlock() error { + c.applyEditListSkip() + + // Load next unprocessed block, and skip bytes defined in the edit-list: + for c.dataBlockPos >= len(c.dataBlock) { + err := c.decryptDataBlock() + if err != nil { + return err + } + c.applyEditListSkip() + } + return nil } func (c *Crypt4gh) readHeader() error { @@ -352,32 +354,33 @@ func (c *Crypt4gh) applyEditListSkip() { } } -func (c *Crypt4gh) applyEditListKeep(amount, start, end int) (remainingAmount, updatedEnd int) { - rangeLength := end - start - remainingAmount = amount - rangeLength - updatedEnd = end +func (c *Crypt4gh) getAvailableRange(amount int) (start, end, providedAmount int) { + providedAmount = amount - if c.editListSkip || len(c.editListLengths) == 0 { - return + if c.dataBlockPos+providedAmount > len(c.dataBlock) { + providedAmount = len(c.dataBlock) - c.dataBlockPos } - keepAmount := &c.editListLengths[0] + // apply Edit-List: + if len(c.editListLengths) > 0 && !c.editListSkip { + keepAmount := &c.editListLengths[0] - if *keepAmount <= uint64(rangeLength) { - rangeLength = int(*keepAmount) - updatedEnd = start + rangeLength - remainingAmount = amount - rangeLength - } + // Reduce the available amount of bytes to read, if necessary + if *keepAmount < uint64(providedAmount) { + providedAmount = int(*keepAmount) + } - *keepAmount -= uint64(rangeLength) + // Reduce the amount of bytes to keep (in the edit list): + *keepAmount -= uint64(providedAmount) - if *keepAmount == 0 { - c.editListLengths = c.editListLengths[1:] - c.editListSkip = true - c.applyEditListSkip() + // Switch to edit-list skip-mode, once keep-mode is exhausted: + if *keepAmount == 0 { + c.editListLengths = c.editListLengths[1:] + c.editListSkip = true + } } - return + return c.dataBlockPos, c.dataBlockPos + providedAmount, providedAmount } func (c *Crypt4gh) warnPacket(msg string, args ...any) { diff --git a/storage/crypt4gh/decrypt_test.go b/storage/crypt4gh/decrypt_test.go new file mode 100644 index 00000000..d721460e --- /dev/null +++ b/storage/crypt4gh/decrypt_test.go @@ -0,0 +1,123 @@ +package crypt4gh + +import ( + "bytes" + "crypto/cipher" + "crypto/rand" + "encoding/binary" + "io" + "testing" + + "golang.org/x/crypto/blake2b" + "golang.org/x/crypto/chacha20poly1305" + "golang.org/x/crypto/curve25519" +) + +// Test plain-text content for the test-cases +const content = `First line +Second line +Third line +` + +// Holds the key-pair for encrypting/decrypting the content +var c4gh, _ = KeyPairFromFiles("testdata/key.pub", "testdata/key.plain.sec", nil) + +// Encrypts test-data in memory (as it would be in a Crypt4gh file) and then decrypts the content. +// No edit-list used. +func TestDecryptFullText(t *testing.T) { + decrypted, err := encryptAndDecryptContent(0, -1) + + if err != nil { + t.Error("Failed to parse the encrypted content", err) + } else if decrypted != content { + t.Errorf("Decrypted content does not match the original one (got: %v)", decrypted) + } +} + +// Encrypts test-data in memory together with edit-list (as it would be in a +// Crypt4gh file) for requesting only the second line to be rendered, and then +// decrypts the content (expecting only the second line). +func TestDecryptWithDataEditList(t *testing.T) { + decrypted, err := encryptAndDecryptContent(11, 11) + + if err != nil { + t.Error("Failed to parse the encrypted content", err) + } else if decrypted != "Second line" { + t.Errorf("Decrypted content does not match the second line (got: %v)", decrypted) + } +} + +func encryptAndDecryptContent(rangeStart, rangeLength int) (string, error) { + reader, err := c4gh.Decrypt(encryptContent(rangeStart, rangeLength)) + if err != nil { + return "", err + } + + buffer := new(bytes.Buffer) + _, _ = io.Copy(buffer, reader) + return buffer.String(), nil +} + +func encryptContent(rangeStart, rangeLength int) *bytes.Buffer { + sharedKey := generateSharedKey() + aead, _ := chacha20poly1305.New(sharedKey) + + buffer := new(bytes.Buffer) // Stores Crypt4gh v1 formatted encrypted content + buffer.WriteString("crypt4gh") // "magic" text + buffer.Write([]byte{1, 0, 0, 0}) // Version "1" + + if rangeStart == 0 && rangeLength < 1 { + buffer.Write([]byte{1, 0, 0, 0}) // Packet count == 1 + } else { + buffer.Write([]byte{2, 0, 0, 0}) // Packet count == 2 + } + + // Writes a header packet for storing the key + encryptionPacket := [40]byte{} + copy(encryptionPacket[8:40], sharedKey) // key + writeHeaderPacket(&aead, buffer, encryptionPacket[:]) + + // Writes a header packet for storing a data-edit list + if rangeStart >= 0 && rangeStart < len(content) && rangeLength > 0 { + afterRange := len(content) - rangeStart - rangeLength + dataEditListPacket := make([]byte, 32) + dataEditListPacket[0] = 1 // specifies that packetType = data-edit list + dataEditListPacket[4] = 3 // specifies that number of following lengths + binary.LittleEndian.PutUint64(dataEditListPacket[8:16], uint64(rangeStart)) // Skip this number of bytes + binary.LittleEndian.PutUint64(dataEditListPacket[16:24], uint64(rangeLength)) // Keep this number of bytes + binary.LittleEndian.PutUint64(dataEditListPacket[24:32], uint64(afterRange)) // Skip this number of bytes + + writeHeaderPacket(&aead, buffer, dataEditListPacket) + } + + // Writes encrypted content + nonce := nonce() + buffer.Write(nonce) // nonce (12 bytes) of the encrypted payload + buffer.Write(aead.Seal(nil, nonce, []byte(content), nil)) // encrypted payload + return buffer +} + +func writeHeaderPacket(aead *cipher.AEAD, buffer *bytes.Buffer, payload []byte) { + nonce := nonce() + encrypted := (*aead).Seal(nil, nonce, payload, nil) + + buffer.Write(binary.LittleEndian.AppendUint32(nil, uint32(52+len(encrypted)))) // packet length + buffer.Write([]byte{0, 0, 0, 0}) // packet encryption method (0) + buffer.Write(c4gh.publicKey[:32]) // writer's public key (32 bytes) + buffer.Write(nonce) // nonce (12 bytes) of the encrypted payload + buffer.Write(encrypted) // encrypted payload +} + +func generateSharedKey() []byte { + diffieHellmanKey, _ := curve25519.X25519(c4gh.secretKey, c4gh.publicKey) + diffieHellmanKey = append(diffieHellmanKey, c4gh.publicKey...) // reader's + diffieHellmanKey = append(diffieHellmanKey, c4gh.publicKey...) // writer's + hash := blake2b.Sum512(diffieHellmanKey) + return hash[:chacha20poly1305.KeySize] +} + +func nonce() []byte { + nonce := make([]byte, 12) + _, _ = rand.Read(nonce[:]) + return nonce +} diff --git a/storage/crypt4gh/keys_test.go b/storage/crypt4gh/keys_test.go index 06529e20..cb8dd3a6 100644 --- a/storage/crypt4gh/keys_test.go +++ b/storage/crypt4gh/keys_test.go @@ -8,8 +8,8 @@ import ( const encodedPublicKey = "LS0tLS1CRUdJTiBDUllQVDRHSCBQVUJMSUMgS0VZLS0tLS0KQ3N4Mk5KNVhicTJuM3Q4dWdSeTJabGRLYXRoWDRLa0haZ1dzcGhuTTlSbz0KLS0tLS1FTkQgQ1JZUFQ0R0ggUFVCTElDIEtFWS0tLS0tCg==" func TestLoadingEncryptedKeys(t *testing.T) { - os.Setenv("C4GH_PUBLIC_KEY", "testdata/keys/key.pub") - os.Setenv("C4GH_SECRET_KEY", "testdata/keys/key.encrypted.sec") + os.Setenv("C4GH_PUBLIC_KEY", "testdata/key.pub") + os.Setenv("C4GH_SECRET_KEY", "testdata/key.encrypted.sec") os.Setenv("C4GH_PASSPHRASE", "abcDEFghi") c4gh, err := ResolveKeyPair() @@ -26,8 +26,8 @@ func TestLoadingEncryptedKeys(t *testing.T) { } func TestLoadingNonEncryptedKeys(t *testing.T) { - os.Setenv("C4GH_PUBLIC_KEY", "testdata/keys/key.pub") - os.Setenv("C4GH_SECRET_KEY", "testdata/keys/key.plain.sec") + os.Setenv("C4GH_PUBLIC_KEY", "testdata/key.pub") + os.Setenv("C4GH_SECRET_KEY", "testdata/key.plain.sec") os.Setenv("C4GH_PASSPHRASE", "to-be-ignored") c4gh, err := ResolveKeyPair() diff --git a/storage/crypt4gh/testdata/keys/key.encrypted.sec b/storage/crypt4gh/testdata/key.encrypted.sec similarity index 100% rename from storage/crypt4gh/testdata/keys/key.encrypted.sec rename to storage/crypt4gh/testdata/key.encrypted.sec diff --git a/storage/crypt4gh/testdata/keys/key.plain.sec b/storage/crypt4gh/testdata/key.plain.sec similarity index 100% rename from storage/crypt4gh/testdata/keys/key.plain.sec rename to storage/crypt4gh/testdata/key.plain.sec diff --git a/storage/crypt4gh/testdata/keys/key.pub b/storage/crypt4gh/testdata/key.pub similarity index 100% rename from storage/crypt4gh/testdata/keys/key.pub rename to storage/crypt4gh/testdata/key.pub From 6d3dc371732577e59bc34dcc5924b74ad8cf3946 Mon Sep 17 00:00:00 2001 From: Martti Tamm Date: Tue, 6 Aug 2024 16:23:33 +0300 Subject: [PATCH 09/10] Add tests for the IsCrypt4ghFile() function --- storage/crypt4gh/decrypt_test.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/storage/crypt4gh/decrypt_test.go b/storage/crypt4gh/decrypt_test.go index d721460e..54000465 100644 --- a/storage/crypt4gh/decrypt_test.go +++ b/storage/crypt4gh/decrypt_test.go @@ -6,6 +6,8 @@ import ( "crypto/rand" "encoding/binary" "io" + "os" + "path" "testing" "golang.org/x/crypto/blake2b" @@ -22,6 +24,24 @@ Third line // Holds the key-pair for encrypting/decrypting the content var c4gh, _ = KeyPairFromFiles("testdata/key.pub", "testdata/key.plain.sec", nil) +// Verifies the IsCrypt4ghFile() function. +func TestIsCrypt4ghFile(t *testing.T) { + tempDir := t.TempDir() + correctFile := path.Join(tempDir, "correct.c4gh") + + if err := os.WriteFile(correctFile, encryptContent(0, -1).Bytes(), 0600); err != nil { + t.Error("Failed to create a test-file") + } + + if !IsCrypt4ghFile(correctFile) { + t.Error("Failed to recognise the correct Crypt4gh file as valid") + } + + if IsCrypt4ghFile(path.Join(tempDir, "unknown.c4gh")) { + t.Error("Failed to recognise a non-existent Crypt4gh file as invalid") + } +} + // Encrypts test-data in memory (as it would be in a Crypt4gh file) and then decrypts the content. // No edit-list used. func TestDecryptFullText(t *testing.T) { From b923c2253b74d37a4064e974492254fbfffa645f Mon Sep 17 00:00:00 2001 From: Martti Tamm Date: Wed, 7 Aug 2024 12:48:47 +0300 Subject: [PATCH 10/10] Editorial changes: comments, docs --- storage/crypt4gh/decrypt.go | 1 + storage/crypt4gh/decrypt_test.go | 7 +++- storage/crypt4gh/keys.go | 51 ++++++++++++++++---------- storage/crypt4gh/keys_test.go | 6 +-- storage/htsget_test.go | 4 +- website/content/docs/storage/htsget.md | 17 ++++++--- 6 files changed, 54 insertions(+), 32 deletions(-) diff --git a/storage/crypt4gh/decrypt.go b/storage/crypt4gh/decrypt.go index c4dd3af3..140fd208 100644 --- a/storage/crypt4gh/decrypt.go +++ b/storage/crypt4gh/decrypt.go @@ -12,6 +12,7 @@ import ( "golang.org/x/crypto/curve25519" ) +// Handles Crypt4gh decryption context per source stream. type Crypt4gh struct { keyPair *Crypt4ghKeyPair stream io.Reader diff --git a/storage/crypt4gh/decrypt_test.go b/storage/crypt4gh/decrypt_test.go index 54000465..3a376455 100644 --- a/storage/crypt4gh/decrypt_test.go +++ b/storage/crypt4gh/decrypt_test.go @@ -78,6 +78,11 @@ func encryptAndDecryptContent(rangeStart, rangeLength int) (string, error) { return buffer.String(), nil } +// Very simplified approach for encrypting some content. Good enough for testing. +// When range is provided, an edit-list packet is added to the header so that +// receiver would look for the part of content defined by the start position +// and length. Specify start=0 and length=-1 to avoid the edit-list. +// Returns the Cryp4gh formatted encrypted data with header in the buffer. func encryptContent(rangeStart, rangeLength int) *bytes.Buffer { sharedKey := generateSharedKey() aead, _ := chacha20poly1305.New(sharedKey) @@ -138,6 +143,6 @@ func generateSharedKey() []byte { func nonce() []byte { nonce := make([]byte, 12) - _, _ = rand.Read(nonce[:]) + _, _ = rand.Read(nonce) return nonce } diff --git a/storage/crypt4gh/keys.go b/storage/crypt4gh/keys.go index 2bbab369..0f66592d 100644 --- a/storage/crypt4gh/keys.go +++ b/storage/crypt4gh/keys.go @@ -32,6 +32,8 @@ type Crypt4ghKeyPair struct { secretKey []byte } +// Produces BASE64-encoded public-key where the key is represented just as in +// the public-key file. func (k *Crypt4ghKeyPair) EncodePublicKeyBase64() string { header, footer := getKeyFileHeaderFooter("PUBLIC") @@ -43,6 +45,8 @@ func (k *Crypt4ghKeyPair) EncodePublicKeyBase64() string { return base64.StdEncoding.EncodeToString(content.Bytes()) } +// Saves the current key-pair to the specified files. If passphrase is not +// empty, the private key will be encrypted using the passphrase func (k *Crypt4ghKeyPair) Save(publicKeyPath, privateKeyPath string, passphrase []byte) error { err := saveKeyFile(publicKeyPath, "PUBLIC", k.publicKey) if err != nil { @@ -57,14 +61,16 @@ func (k *Crypt4ghKeyPair) Save(publicKeyPath, privateKeyPath string, passphrase return saveKeyFile(privateKeyPath, "PRIVATE", encodedKey) } -// Decrypts given Crypt4gh file stream (expecting the header part followed by encrypted body). +// Wraps given reader in order to decrypt the Crypt4gh file stream (expecting +// the header part followed by encrypted body). func (k *Crypt4ghKeyPair) Decrypt(r io.Reader) (io.Reader, error) { c := Crypt4gh{keyPair: k, stream: r} err := c.readHeader() return &c, err } -// Decrypts given Crypt4gh file stream (body) using an explicitly provided header information. +// Returns a reader providing decrypted data for given Crypt4gh file stream +// (body) and explicit Crypt4gh header information. func (k *Crypt4ghKeyPair) DecryptWithHeader(header []byte, body io.Reader) (io.Reader, error) { c := Crypt4gh{keyPair: k, stream: bytes.NewReader(header)} err := c.readHeader() @@ -179,27 +185,30 @@ func ResolveKeyPair() (*Crypt4ghKeyPair, error) { // On failure, it returns an empty string. // Look up order is following: // -// 1. When the provided file-path is not empty, use the directory of the key. -// 2. Fall back to .c4gh/ directory in the current work directory +// 1. When the provided file-path is not empty, use its directory (even if it +// does not exist yet: it will be created). +// 2. Fall back to .c4gh/ directory in the current directory, if it exists. // 3. When user's home-directory can be resolved, fall back to the ~/.c4gh/ -// directory. +// directory (creating it, if missing). When the home-directory cannot be +// resolved, fall back to .c4gh/ directory in the current directory. // 4. When the directory does not exist and cannot be created, fail by // returning "". // // To summarise the edge-cases: -// 1. If no keys are found, they will be created at ~/.c4gh/key[.pub]. -// 2. When the current directory contains the .c4gh directory then that will +// 1. Explicitly provided paths will be always trusted (if the directories +// don't exist yet, they will be created) +// 2. If no explicit path is provided, keys will be created at +// ~/.c4gh/key[.pub] +// 3. When the current directory contains the .c4gh directory then that will // override the home-directory. -// 3. explicitly provided paths will be always trusted (without explicitly -// checking whether they exist) func resolveKeysDir(secretKeyPath string) string { var keysDir string - if secretKeyPath != "" { + if secretKeyPath != "" { // explicit path keysDir = path.Dir(secretKeyPath) - } else if isDir(presumedDirName) { + } else if isDir(presumedDirName) { // ./.c4gh/ keysDir = presumedDirName - } else { + } else { // attempting ~/.c4gh/ var errDir error keysDir, errDir = os.UserHomeDir() @@ -207,7 +216,7 @@ func resolveKeysDir(secretKeyPath string) string { // Place the keys into a private sub-directory: keysDir = path.Join(keysDir, presumedDirName) } else { - keysDir = presumedDirName + keysDir = presumedDirName // Fall-back: ./.c4gh/ } } @@ -215,12 +224,9 @@ func resolveKeysDir(secretKeyPath string) string { directoryExists := isDir(keysDir) if !directoryExists { - err := os.MkdirAll(keysDir, 0700) - directoryExists = err == nil - } - - if !directoryExists { - keysDir = "" + if err := os.MkdirAll(keysDir, 0700); err != nil { + return "" + } } return keysDir @@ -332,6 +338,10 @@ func checkLine(r io.Reader, line string) error { return nil } +// Extract a number of bytes from given list. The length of the bytes to be +// returned is specified by the first two bytes (big-endian) at the starting +// position. The second returned int indicates the position after the extracted +// bytes. func readBytes(bytes []byte, startPos int) ([]byte, int) { length := int(bytes[startPos])<<8 | int(bytes[startPos+1]) start := startPos + 2 @@ -344,6 +354,7 @@ func readString(bytes []byte, startPos int) (string, int) { return string(b), end } +// Returns a two-byte list holding the provided int in big-endian encoding. func getLengthBytes(l int) []byte { b := [2]byte{byte(l >> 8), byte(l)} return b[:] @@ -399,7 +410,7 @@ func decryptPrivateKey( } if kdfname == "none" || ciphername == "none" { - return nil, fmt.Errorf("Unexpected key encryption information: "+ + return nil, fmt.Errorf("Invalid key encryption information: "+ "kdfname=%s, ciphername=%s", kdfname, ciphername) } diff --git a/storage/crypt4gh/keys_test.go b/storage/crypt4gh/keys_test.go index cb8dd3a6..f5b428a5 100644 --- a/storage/crypt4gh/keys_test.go +++ b/storage/crypt4gh/keys_test.go @@ -62,7 +62,7 @@ func TestGeneratingAndSavingNewKeys(t *testing.T) { err = c4gh.Save(pubPath, secPath, nil) if err != nil { - t.Error("Could not generate a Crypt4gh key-pair", err) + t.Error("Could not save a Crypt4gh key-pair to file", err) } os.Remove(pubPath) @@ -70,11 +70,11 @@ func TestGeneratingAndSavingNewKeys(t *testing.T) { err = c4gh.Save(pubPath, secPath, []byte("abcDEFghi")) if err != nil { - t.Error("Could not generate a Crypt4gh key-pair", err) + t.Error("Could not save a Crypt4gh key-pair to file", err) } _, err = KeyPairFromFiles(pubPath, secPath, []byte("abcDEFghi")) if err != nil { - t.Error("Could not reload saved Crypt4gh key-pair", err) + t.Error("Could not reload the saved Crypt4gh key-pair", err) } } diff --git a/storage/htsget_test.go b/storage/htsget_test.go index b3561362..5a2042f1 100644 --- a/storage/htsget_test.go +++ b/storage/htsget_test.go @@ -8,8 +8,8 @@ import ( ) func TestHTSGET(t *testing.T) { - invalidUrl := "https://google.com" - validUrl := "htsget://google.com" + invalidUrl := "https://example.org" + validUrl := "htsget://bearer:token@example.org" store, err := NewHTSGET(config.HTSGETStorage{}) if err != nil { diff --git a/website/content/docs/storage/htsget.md b/website/content/docs/storage/htsget.md index 6160fcb7..e1aa80d6 100644 --- a/website/content/docs/storage/htsget.md +++ b/website/content/docs/storage/htsget.md @@ -24,16 +24,21 @@ default protocol is `https`, which is also presumed in the Htsget specification. For testing purposes, it can be changed to `http`. If the service expects a `Bearer` token, it can be specified in the URL. -For example: `htsget://bearer:your-token-here@fakedomain.com/...`. +For example: `htsget://bearer:your-token-here@example.org/...`. Here the `bearer:` part is the required syntax to activate the `your-token-here` value to be sent to the htsget-service as a header value: `Authorization: Bearer your-token-here`. -Funnel always sends its public key in the header of the request to the Htsget -service. When the Htsget service supports [the content encryption using -Crypt4gh][htsget-crypt4gh], it can generate a custom Crypt4gh file header where -the Funnel instance can decrypt and find the symmetric key used for content -encryption. +Funnel always sends its public key in the header (`client-public-key`) of the +request to the Htsget service. When the Htsget service supports [the content +encryption using Crypt4gh][htsget-crypt4gh], the service can generate a custom +Crypt4gh file header containing the symmetric key for decrypting the referred +content (Crypt4gh formatted data-blocks). Funnel checks the beginning of the +received content to know whether Crypt4gh decryption can be applied. Therefore, +tasks always receive the data decrypted. For sensitive data, the deployment +environment (server) should pay attention to restricting access to the Funnel's +data directories, possibly having separate Funnel instances for different +data-projects. Default Htsget Storage configuration should be sufficient for most cases: