-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
repospec.go
387 lines (329 loc) · 13.8 KB
/
repospec.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
// Copyright 2019 The Kubernetes Authors.
// SPDX-License-Identifier: Apache-2.0
package git
import (
"fmt"
"log"
"net/url"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
"sigs.k8s.io/kustomize/kyaml/errors"
"sigs.k8s.io/kustomize/kyaml/filesys"
)
// Used as a temporary non-empty occupant of the cloneDir
// field, as something distinguishable from the empty string
// in various outputs (especially tests). Not using an
// actual directory name here, as that's a temporary directory
// with a unique name that isn't created until clone time.
const notCloned = filesys.ConfirmedDir("/notCloned")
// RepoSpec specifies a git repository and a branch and path therein.
type RepoSpec struct {
// Raw, original spec, used to look for cycles.
// TODO(monopole): Drop raw, use processed fields instead.
raw string
// Host, e.g. https://github.com/
Host string
// RepoPath name (Path to repository),
// e.g. kubernetes-sigs/kustomize
RepoPath string
// Dir is where the repository is cloned to.
Dir filesys.ConfirmedDir
// Relative path in the repository, and in the cloneDir,
// to a Kustomization.
KustRootPath string
// Branch or tag reference.
Ref string
// Submodules indicates whether or not to clone git submodules.
Submodules bool
// Timeout is the maximum duration allowed for execing git commands.
Timeout time.Duration
}
// CloneSpec returns a string suitable for "git clone {spec}".
func (x *RepoSpec) CloneSpec() string {
return x.Host + x.RepoPath
}
func (x *RepoSpec) CloneDir() filesys.ConfirmedDir {
return x.Dir
}
func (x *RepoSpec) Raw() string {
return x.raw
}
func (x *RepoSpec) AbsPath() string {
return x.Dir.Join(x.KustRootPath)
}
func (x *RepoSpec) Cleaner(fSys filesys.FileSystem) func() error {
return func() error { return fSys.RemoveAll(x.Dir.String()) }
}
const (
refQuery = "?ref="
gitSuffix = ".git"
gitRootDelimiter = "_git/"
pathSeparator = "/" // do not use filepath.Separator, as this is a URL
)
// NewRepoSpecFromURL parses git-like urls.
// From strings like [email protected]:someOrg/someRepo.git or
// https://github.com/someOrg/someRepo?ref=someHash, extract
// the different parts of URL, set into a RepoSpec object and return RepoSpec object.
// It MUST return an error if the input is not a git-like URL, as this is used by some code paths
// to distinguish between local and remote paths.
//
// In particular, NewRepoSpecFromURL separates the URL used to clone the repo from the
// elements Kustomize uses for other purposes (e.g. query params that turn into args, and
// the path to the kustomization root within the repo).
func NewRepoSpecFromURL(n string) (*RepoSpec, error) {
repoSpec := &RepoSpec{raw: n, Dir: notCloned, Timeout: defaultTimeout, Submodules: defaultSubmodules}
if filepath.IsAbs(n) {
return nil, fmt.Errorf("uri looks like abs path: %s", n)
}
// Parse the query first. This is safe because according to rfc3986 "?" is only allowed in the
// query and is not recognized %-encoded.
// Note that parseQuery returns default values for empty parameters.
n, query, _ := strings.Cut(n, "?")
repoSpec.Ref, repoSpec.Timeout, repoSpec.Submodules = parseQuery(query)
var err error
// Parse the host (e.g. scheme, username, domain) segment.
repoSpec.Host, n, err = extractHost(n)
if err != nil {
return nil, err
}
// In some cases, we're given a path to a git repo + a path to the kustomization root within
// that repo. We need to split them so that we can ultimately give the repo only to the cloner.
repoSpec.RepoPath, repoSpec.KustRootPath, err = parsePathParts(n, defaultRepoPathLength(repoSpec.Host))
if err != nil {
return nil, err
}
return repoSpec, nil
}
const allSegments = -999999
const orgRepoSegments = 2
func defaultRepoPathLength(host string) int {
if strings.HasPrefix(host, fileScheme) {
return allSegments
}
return orgRepoSegments
}
// parsePathParts splits the repo path that will ultimately be passed to git to clone the
// repo from the kustomization root path, which Kustomize will execute the build in after the repo
// is cloned.
//
// We first try to do this based on explicit markers in the URL (e.g. _git, .git or //).
// If none are present, we try to apply a historical default repo path length that is derived from
// Github URLs. If there aren't enough segments, we have historically considered the URL invalid.
func parsePathParts(n string, defaultSegmentLength int) (string, string, error) {
repoPath, kustRootPath, success := tryExplicitMarkerSplit(n)
if !success {
repoPath, kustRootPath, success = tryDefaultLengthSplit(n, defaultSegmentLength)
}
// Validate the result
if !success || len(repoPath) == 0 {
return "", "", fmt.Errorf("failed to parse repo path segment")
}
if kustRootPathExitsRepo(kustRootPath) {
return "", "", fmt.Errorf("url path exits repo: %s", n)
}
return repoPath, strings.TrimPrefix(kustRootPath, pathSeparator), nil
}
func tryExplicitMarkerSplit(n string) (string, string, bool) {
// Look for the _git delimiter, which by convention is expected to be ONE directory above the repo root.
// If found, split on the NEXT path element, which is the repo root.
// Example: https://[email protected]/org/project/_git/repo/path/to/kustomization/root
if gitRootIdx := strings.Index(n, gitRootDelimiter); gitRootIdx >= 0 {
gitRootPath := n[:gitRootIdx+len(gitRootDelimiter)]
subpathSegments := strings.Split(n[gitRootIdx+len(gitRootDelimiter):], pathSeparator)
return gitRootPath + subpathSegments[0], strings.Join(subpathSegments[1:], pathSeparator), true
// Look for a double-slash in the path, which if present separates the repo root from the kust path.
// It is a convention, not a real path element, so do not preserve it in the returned value.
// Example: https://github.com/org/repo//path/to/kustomozation/root
} else if repoRootIdx := strings.Index(n, "//"); repoRootIdx >= 0 {
return n[:repoRootIdx], n[repoRootIdx+2:], true
// Look for .git in the path, which if present is part of the directory name of the git repo.
// This means we want to grab everything up to and including that suffix
// Example: https://github.com/org/repo.git/path/to/kustomozation/root
} else if gitSuffixIdx := strings.Index(n, gitSuffix); gitSuffixIdx >= 0 {
upToGitSuffix := n[:gitSuffixIdx+len(gitSuffix)]
afterGitSuffix := n[gitSuffixIdx+len(gitSuffix):]
return upToGitSuffix, afterGitSuffix, true
}
return "", "", false
}
func tryDefaultLengthSplit(n string, defaultSegmentLength int) (string, string, bool) {
// If the default is to take all segments, do so.
if defaultSegmentLength == allSegments {
return n, "", true
// If the default is N segments, make sure we have at least that many and take them if so.
// If we have less than N, we have historically considered the URL invalid.
} else if segments := strings.Split(n, pathSeparator); len(segments) >= defaultSegmentLength {
firstNSegments := strings.Join(segments[:defaultSegmentLength], pathSeparator)
rest := strings.Join(segments[defaultSegmentLength:], pathSeparator)
return firstNSegments, rest, true
}
return "", "", false
}
func kustRootPathExitsRepo(kustRootPath string) bool {
cleanedPath := filepath.Clean(strings.TrimPrefix(kustRootPath, string(filepath.Separator)))
pathElements := strings.Split(cleanedPath, string(filepath.Separator))
return len(pathElements) > 0 &&
pathElements[0] == filesys.ParentDir
}
// Clone git submodules by default.
const defaultSubmodules = true
// Arbitrary, but non-infinite, timeout for running commands.
const defaultTimeout = 27 * time.Second
func parseQuery(query string) (string, time.Duration, bool) {
values, err := url.ParseQuery(query)
// in event of parse failure, return defaults
if err != nil {
return "", defaultTimeout, defaultSubmodules
}
// ref is the desired git ref to target. Can be specified by in a git URL
// with ?ref=<string> or ?version=<string>, although ref takes precedence.
ref := values.Get("version")
if queryValue := values.Get("ref"); queryValue != "" {
ref = queryValue
}
// depth is the desired git exec timeout. Can be specified by in a git URL
// with ?timeout=<duration>.
duration := defaultTimeout
if queryValue := values.Get("timeout"); queryValue != "" {
// Attempt to first parse as a number of integer seconds (like "61"),
// and then attempt to parse as a suffixed duration (like "61s").
if intValue, err := strconv.Atoi(queryValue); err == nil && intValue > 0 {
duration = time.Duration(intValue) * time.Second
} else if durationValue, err := time.ParseDuration(queryValue); err == nil && durationValue > 0 {
duration = durationValue
}
}
// submodules indicates if git submodule cloning is desired. Can be
// specified by in a git URL with ?submodules=<bool>.
submodules := defaultSubmodules
if queryValue := values.Get("submodules"); queryValue != "" {
if boolValue, err := strconv.ParseBool(queryValue); err == nil {
submodules = boolValue
}
}
return ref, duration, submodules
}
func extractHost(n string) (string, string, error) {
n = ignoreForcedGitProtocol(n)
scheme, n := extractScheme(n)
username, n := extractUsername(n)
stdGithub := isStandardGithubHost(n)
acceptSCP := acceptSCPStyle(scheme, username, stdGithub)
// Validate the username and scheme before attempting host/path parsing, because if the parsing
// so far has not succeeded, we will not be able to extract the host and path correctly.
if err := validateScheme(scheme, acceptSCP); err != nil {
return "", "", err
}
// Now that we have extracted a valid scheme+username, we can parse host itself.
// The file protocol specifies an absolute path to a local git repo.
// Everything after the scheme (including any 'username' we found) is actually part of that path.
if scheme == fileScheme {
return scheme, username + n, nil
}
var host, rest = n, ""
if sepIndex := findPathSeparator(n, acceptSCP); sepIndex >= 0 {
host, rest = n[:sepIndex+1], n[sepIndex+1:]
}
// Github URLs are strictly normalized in a way that may discard scheme and username components.
if stdGithub {
scheme, username, host = normalizeGithubHostParts(scheme, username)
}
// Host is required, so do not concat the scheme and username if we didn't find one.
if host == "" {
return "", "", errors.Errorf("failed to parse host segment")
}
return scheme + username + host, rest, nil
}
// ignoreForcedGitProtocol strips the "git::" prefix from URLs.
// We used to use go-getter to handle our urls: https://github.com/hashicorp/go-getter.
// The git:: prefix signaled go-getter to use the git protocol to fetch the url's contents.
// We silently strip this prefix to allow these go-getter-style urls to continue to work,
// although the git protocol (which is insecure and unsupported on many platforms, including Github)
// will not actually be used as intended.
func ignoreForcedGitProtocol(n string) string {
n, found := trimPrefixIgnoreCase(n, "git::")
if found {
log.Println("Warning: Forcing the git protocol using the 'git::' URL prefix is not supported. " +
"Kustomize currently strips this invalid prefix, but will stop doing so in a future release. " +
"Please remove the 'git::' prefix from your configuration.")
}
return n
}
// acceptSCPStyle returns true if the scheme and username indicate potential use of an SCP-style URL.
// With this style, the scheme is not explicit and the path is delimited by a colon.
// Strictly speaking the username is optional in SCP-like syntax, but Kustomize has always
// required it for non-Github URLs.
// Example: [email protected]:path/to/repo.git/
func acceptSCPStyle(scheme, username string, isGithubURL bool) bool {
return scheme == "" && (username != "" || isGithubURL)
}
func validateScheme(scheme string, acceptSCPStyle bool) error {
// see https://git-scm.com/docs/git-fetch#_git_urls for info relevant to these validations
switch scheme {
case "":
// Empty scheme is only ok if it's a Github URL or if it looks like SCP-style syntax
if !acceptSCPStyle {
return fmt.Errorf("failed to parse scheme")
}
case sshScheme, fileScheme, httpsScheme, httpScheme:
// These are all supported schemes
default:
// At time of writing, we should never end up here because we do not parse out
// unsupported schemes to begin with.
return fmt.Errorf("unsupported scheme %q", scheme)
}
return nil
}
const fileScheme = "file://"
const httpScheme = "http://"
const httpsScheme = "https://"
const sshScheme = "ssh://"
func extractScheme(s string) (string, string) {
for _, prefix := range []string{sshScheme, httpsScheme, httpScheme, fileScheme} {
if rest, found := trimPrefixIgnoreCase(s, prefix); found {
return prefix, rest
}
}
return "", s
}
func extractUsername(s string) (string, string) {
var userRegexp = regexp.MustCompile(`^([a-zA-Z][a-zA-Z0-9-]*)@`)
if m := userRegexp.FindStringSubmatch(s); m != nil {
username := m[1] + "@"
return username, s[len(username):]
}
return "", s
}
func isStandardGithubHost(s string) bool {
lowerCased := strings.ToLower(s)
return strings.HasPrefix(lowerCased, "github.com/") ||
strings.HasPrefix(lowerCased, "github.com:")
}
// trimPrefixIgnoreCase returns the rest of s and true if prefix, ignoring case, prefixes s.
// Otherwise, trimPrefixIgnoreCase returns s and false.
func trimPrefixIgnoreCase(s, prefix string) (string, bool) {
if len(prefix) <= len(s) && strings.ToLower(s[:len(prefix)]) == prefix {
return s[len(prefix):], true
}
return s, false
}
func findPathSeparator(hostPath string, acceptSCP bool) int {
sepIndex := strings.Index(hostPath, pathSeparator)
if acceptSCP {
colonIndex := strings.Index(hostPath, ":")
// The colon acts as a delimiter in scp-style ssh URLs only if not prefixed by '/'.
if sepIndex == -1 || (colonIndex > 0 && colonIndex < sepIndex) {
sepIndex = colonIndex
}
}
return sepIndex
}
func normalizeGithubHostParts(scheme, username string) (string, string, string) {
if strings.HasPrefix(scheme, sshScheme) || username != "" {
return "", username, "github.com:"
}
return httpsScheme, "", "github.com/"
}