diff --git a/api/internal/git/repospec.go b/api/internal/git/repospec.go index cae95b4450..8f9c247776 100644 --- a/api/internal/git/repospec.go +++ b/api/internal/git/repospec.go @@ -5,6 +5,7 @@ package git import ( "fmt" + "log" "net/url" "path/filepath" "strconv" @@ -128,7 +129,7 @@ func parseGitURL(n string) *RepoSpec { repoSpec.KustRootPath = parsePath(n[index+len(gitDelimiter)+len(repoSpec.RepoPath):]) return repoSpec } - repoSpec.Host, n = parseHostSpec(n) + repoSpec.Host, n = extractHost(n) isLocal := strings.HasPrefix(repoSpec.Host, "file://") if !isLocal { repoSpec.GitSuffix = gitSuffix @@ -227,56 +228,146 @@ func parsePath(n string) string { return parsed.Path } -func parseHostSpec(n string) (string, string) { - var host string - // Start accumulating the host part. - for _, p := range []string{ - // Order matters here. - "git::", "gh:", "ssh://", "https://", "http://", "file://", - "git@", "github.com:", "github.com/"} { - if len(p) < len(n) && strings.ToLower(n[:len(p)]) == p { - n = n[len(p):] - host += p +func extractHost(n string) (string, string) { + n = ignoreForcedGitProtocol(n) + scheme, n := extractScheme(n) + username, n := extractUsername(n) + stdGithub := isStandardGithubHost(n) + acceptSCP := acceptSCPStyle(scheme, username, stdGithub) + + // Validate the username and scheme before attempting host/path parsing, because if the parsing + // so far has not succeeded, we will not be able to extract the host and path correctly. + if err := validateScheme(scheme, acceptSCP); err != nil { + // TODO: return this error instead. + return "", n + } + + // Now that we have extracted a valid scheme+username, we can parse host itself. + + // The file protocol specifies an absolute path to a local git repo. + // Everything after the scheme (including any 'username' we found) is actually part of that path. + if scheme == "file://" { + return scheme, username + n + } + sepIndex := findPathSeparator(n, acceptSCP) + host, rest := n[:sepIndex+1], n[sepIndex+1:] + + // Github URLs are strictly normalized in a way that may discard scheme and username components. + if stdGithub { + scheme, username, host = normalizeGithubHostParts(scheme, username) + } + + // Host is required, so do not concat the scheme and username if we didn't find one. + if host == "" { + // TODO: This should return an error. + return "", n + } + return scheme + username + host, rest +} + +// ignoreForcedGitProtocol strips the "git::" prefix from URLs. +// We used to use go-getter to handle our urls: https://github.com/hashicorp/go-getter. +// The git:: prefix signaled go-getter to use the git protocol to fetch the url's contents. +// We silently strip this prefix to allow these go-getter-style urls to continue to work, +// although the git protocol (which is insecure and unsupported on many platforms, including Github) +// will not actually be used as intended. +func ignoreForcedGitProtocol(n string) string { + n, found := trimPrefixIgnoreCase(n, "git::") + if found { + log.Println("Warning: Forcing the git protocol using the 'git::' URL prefix is not supported. " + + "Kustomize currently strips this invalid prefix, but will stop doing so in a future release. " + + "Please remove the 'git::' prefix from your configuration.") + } + return n +} + +// acceptSCPStyle returns true if the scheme and username indicate potential use of an SCP-style URL. +// With this style, the scheme is not explicit and the path is delimited by a colon. +// Strictly speaking the username is optional in SCP-like syntax, but Kustomize has always +// required it for non-Github URLs. +// Example: user@host.xz:path/to/repo.git/ +func acceptSCPStyle(scheme, username string, isGithubURL bool) bool { + return scheme == "" && (username != "" || isGithubURL) +} + +func validateScheme(scheme string, acceptSCPStyle bool) error { + // see https://git-scm.com/docs/git-fetch#_git_urls for info relevant to these validations + switch scheme { + case "": + // Empty scheme is only ok if it's a Github URL or if it looks like SCP-style syntax + if !acceptSCPStyle { + return fmt.Errorf("failed to parse scheme") } + case "ssh://", "file://", "https://", "http://": + // These are all supported schemes + default: + // At time of writing, we should never end up here because we do not parse out + // unsupported schemes to begin with. + return fmt.Errorf("unsupported scheme %q", scheme) } - if host == "git@" { - i := strings.Index(n, "/") - if i > -1 { - host += n[:i+1] - n = n[i+1:] - } else { - i = strings.Index(n, ":") - if i > -1 { - host += n[:i+1] - n = n[i+1:] - } + return nil +} + +func extractScheme(s string) (string, string) { + for _, prefix := range []string{"ssh://", "https://", "http://", "file://"} { + if rest, found := trimPrefixIgnoreCase(s, prefix); found { + return prefix, rest } - return host, n } + return "", s +} + +func extractUsername(s string) (string, string) { + if trimmed, found := trimPrefixIgnoreCase(s, gitUsername); found { + return gitUsername, trimmed + } + return "", s +} - // If host is a http(s) or ssh URL, grab the domain part. - for _, p := range []string{ - "ssh://", "https://", "http://"} { - if strings.HasSuffix(host, p) { - i := strings.Index(n, "/") - if i > -1 { - host += n[0 : i+1] - n = n[i+1:] - } - break +func isStandardGithubHost(s string) bool { + lowerCased := strings.ToLower(s) + return strings.HasPrefix(lowerCased, "github.com/") || + strings.HasPrefix(lowerCased, "github.com:") +} + +// trimPrefixIgnoreCase returns the rest of s and true if prefix, ignoring case, prefixes s. +// Otherwise, trimPrefixIgnoreCase returns s and false. +func trimPrefixIgnoreCase(s, prefix string) (string, bool) { + if len(prefix) <= len(s) && strings.ToLower(s[:len(prefix)]) == prefix { + return s[len(prefix):], true + } + return s, false +} + +func findPathSeparator(hostPath string, acceptSCP bool) int { + sepIndex := strings.Index(hostPath, "/") + if acceptSCP { + // The colon acts as a delimiter in scp-style ssh URLs only if not prefixed by '/'. + if colonIndex := strings.Index(hostPath, ":"); colonIndex > 0 && colonIndex < sepIndex { + sepIndex = colonIndex } } + return sepIndex +} + +const normalizedHTTPGithub = "https://github.com/" +const gitUsername = "git@" +const normalizedSCPGithub = gitUsername + "github.com:" - return normalizeGitHostSpec(host), n +func normalizeGithubHostParts(scheme, username string) (string, string, string) { + if strings.HasPrefix(scheme, "ssh://") || username != "" { + return "", username, "github.com:" + } + return "https://", "", "github.com/" } func normalizeGitHostSpec(host string) string { s := strings.ToLower(host) if strings.Contains(s, "github.com") { - if strings.Contains(s, "git@") || strings.Contains(s, "ssh:") { - host = "git@github.com:" + if strings.Contains(s, gitUsername) || strings.Contains(s, "ssh:") { + host = normalizedSCPGithub } else { - host = "https://github.com/" + host = normalizedHTTPGithub } } if strings.HasPrefix(s, "git::") { diff --git a/api/internal/git/repospec_test.go b/api/internal/git/repospec_test.go index 1d371b45a0..84a878164a 100644 --- a/api/internal/git/repospec_test.go +++ b/api/internal/git/repospec_test.go @@ -19,8 +19,6 @@ func TestNewRepoSpecFromUrl_Permute(t *testing.T) { // we probably stil don't want to break backwards compatibility for things // that are unintentionally supported. var schemeAuthority = []struct{ raw, normalized string }{ - {"gh:", "gh:"}, - {"GH:", "gh:"}, {"gitHub.com/", "https://github.com/"}, {"github.com:", "https://github.com/"}, {"http://github.com/", "https://github.com/"}, @@ -34,6 +32,7 @@ func TestNewRepoSpecFromUrl_Permute(t *testing.T) { {"git::https://git.example.com/", "https://git.example.com/"}, {"git@github.com:", "git@github.com:"}, {"git@github.com/", "git@github.com:"}, + {"git::git@github.com:", "git@github.com:"}, } var repoPaths = []string{"someOrg/someRepo", "kubernetes/website"} var pathNames = []string{"README.md", "foo/krusty.txt", ""} @@ -81,6 +80,10 @@ func TestNewRepoSpecFromUrlErrors(t *testing.T) { "/tmp", "uri looks like abs path", }, + "relative path": { + "../../tmp", + "url lacks host", + }, "no_slashes": { "iauhsdiuashduas", "url lacks repoPath", @@ -105,6 +108,14 @@ func TestNewRepoSpecFromUrlErrors(t *testing.T) { "https://github.com/org/repo.git//path/../../exits/repo", "url path exits repo", }, + "bad github separator": { + "github.com!org/repo.git//path", + "url lacks host", + }, + "mysterious gh: prefix previously supported is no longer handled": { + "gh:org/repo", + "url lacks repoPath", + }, } for name, testCase := range badData { @@ -191,24 +202,50 @@ func TestNewRepoSpecFromUrl_Smoke(t *testing.T) { }, { name: "t6", - input: "git@gitlab2.sqtools.ru:10022/infra/kubernetes/thanos-base.git?ref=v0.1.0", - cloneSpec: "git@gitlab2.sqtools.ru:10022/infra/kubernetes/thanos-base.git", + input: "git@gitlab2.sqtools.ru:infra/kubernetes/thanos-base.git?ref=v0.1.0", + cloneSpec: "git@gitlab2.sqtools.ru:infra/kubernetes/thanos-base.git", absPath: notCloned.String(), repoSpec: RepoSpec{ - Host: "git@gitlab2.sqtools.ru:10022/", + Host: "git@gitlab2.sqtools.ru:", RepoPath: "infra/kubernetes/thanos-base", Ref: "v0.1.0", GitSuffix: ".git", }, }, { - name: "t7", + name: "non-github_scp", input: "git@bitbucket.org:company/project.git//path?ref=branch", cloneSpec: "git@bitbucket.org:company/project.git", absPath: notCloned.Join("path"), repoSpec: RepoSpec{ - Host: "git@bitbucket.org:company/", - RepoPath: "project", + Host: "git@bitbucket.org:", + RepoPath: "company/project", + KustRootPath: "/path", + Ref: "branch", + GitSuffix: ".git", + }, + }, + { + name: "non-github_scp incorrectly using slash (invalid but currently passed through to git)", + input: "git@bitbucket.org/company/project.git//path?ref=branch", + cloneSpec: "git@bitbucket.org/company/project.git", + absPath: notCloned.Join("path"), + repoSpec: RepoSpec{ + Host: "git@bitbucket.org/", + RepoPath: "company/project", + KustRootPath: "/path", + Ref: "branch", + GitSuffix: ".git", + }, + }, + { + name: "non-github_git-user_ssh", + input: "ssh://git@bitbucket.org/company/project.git//path?ref=branch", + cloneSpec: "ssh://git@bitbucket.org/company/project.git", + absPath: notCloned.Join("path"), + repoSpec: RepoSpec{ + Host: "ssh://git@bitbucket.org/", + RepoPath: "company/project", KustRootPath: "/path", Ref: "branch", GitSuffix: ".git", @@ -459,6 +496,106 @@ func TestNewRepoSpecFromUrl_Smoke(t *testing.T) { GitSuffix: ".git", }, }, + { + name: "non-git username with non-github host", + input: "ssh://myusername@bitbucket.org/ourteamname/ourrepositoryname.git//path?ref=branch", + cloneSpec: "ssh://myusername@bitbucket.org/ourteamname/ourrepositoryname.git", + absPath: notCloned.Join("path"), + repoSpec: RepoSpec{ + Host: "ssh://myusername@bitbucket.org/", + RepoPath: "ourteamname/ourrepositoryname", + KustRootPath: "/path", + Ref: "branch", + GitSuffix: ".git", + }, + }, + { + name: "username-like filepath with file protocol", + input: "file://git@home/path/to/repository.git//path?ref=branch", + cloneSpec: "file://git@home/path/to/repository.git", + absPath: notCloned.Join("path"), + repoSpec: RepoSpec{ + Host: "file://", + RepoPath: "git@home/path/to/repository", + KustRootPath: "/path", + Ref: "branch", + GitSuffix: ".git", + }, + }, + { + name: "username with http protocol (invalid but currently passed through to git)", + input: "http://git@home.com/path/to/repository.git//path?ref=branch", + cloneSpec: "http://git@home.com/path/to/repository.git", + absPath: notCloned.Join("path"), + repoSpec: RepoSpec{ + Host: "http://git@home.com/", + RepoPath: "path/to/repository", + KustRootPath: "/path", + Ref: "branch", + GitSuffix: ".git", + }, + }, + { + name: "username with https protocol (invalid but currently passed through to git)", + input: "https://git@home.com/path/to/repository.git//path?ref=branch", + cloneSpec: "https://git@home.com/path/to/repository.git", + absPath: notCloned.Join("path"), + repoSpec: RepoSpec{ + Host: "https://git@home.com/", + RepoPath: "path/to/repository", + KustRootPath: "/path", + Ref: "branch", + GitSuffix: ".git", + }, + }, + { + name: "unsupported protocol after username (invalid but currently passed through to git)", + input: "git@scp://github.com/org/repo.git//path", + cloneSpec: "git@scp://github.com/org/repo.git", + absPath: notCloned.Join("path"), + repoSpec: RepoSpec{ + Host: "git@scp:", + RepoPath: "//github.com/org/repo", + KustRootPath: "/path", + GitSuffix: ".git", + }, + }, + { + name: "supported protocol after username (invalid but currently passed through to git)", + input: "git@ssh://github.com/org/repo.git//path", + cloneSpec: "git@ssh://github.com/org/repo.git", + absPath: notCloned.Join("path"), + repoSpec: RepoSpec{ + Host: "git@ssh:", + RepoPath: "//github.com/org/repo", + KustRootPath: "/path", + GitSuffix: ".git", + }, + }, + { + name: "complex github ssh url from docs", + input: "ssh://git@ssh.github.com:443/YOUR-USERNAME/YOUR-REPOSITORY.git", + cloneSpec: "ssh://git@ssh.github.com:443/YOUR-USERNAME/YOUR-REPOSITORY.git", + absPath: notCloned.String(), + repoSpec: RepoSpec{ + Host: "ssh://git@ssh.github.com:443/", + RepoPath: "YOUR-USERNAME/YOUR-REPOSITORY", + KustRootPath: "", + GitSuffix: ".git", + }, + }, + { + name: "colon behind slash not scp delimiter", + input: "git@gitlab.com/user:name/YOUR-REPOSITORY.git/path", + cloneSpec: "git@gitlab.com/user:name/YOUR-REPOSITORY.git", + absPath: notCloned.Join("path"), + repoSpec: RepoSpec{ + Host: "git@gitlab.com/", + RepoPath: "user:name/YOUR-REPOSITORY", + KustRootPath: "path", + GitSuffix: ".git", + }, + }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { diff --git a/api/internal/localizer/util_test.go b/api/internal/localizer/util_test.go index ce70b4c01d..91fca64ce9 100644 --- a/api/internal/localizer/util_test.go +++ b/api/internal/localizer/util_test.go @@ -226,10 +226,6 @@ func TestLocRootPath_URLComponents(t *testing.T) { urlf: "file:///var/run/repo//%s?ref=value", path: simpleJoin(t, FileSchemeDir, "var", "run", "repo", "value"), }, - "gh_shorthand": { - urlf: "gh:org/repo//%s?ref=value", - path: simpleJoin(t, "gh", "org", "repo", "value"), - }, "IPv6": { urlf: "https://[2001:4860:4860::8888]/org/repo//%s?ref=value", path: simpleJoin(t, "2001:4860:4860::8888", "org", "repo", "value"),