From 117ce2820b7482c2c55a4970dea9050482e356fe Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Tue, 18 Sep 2018 18:28:18 -0700 Subject: [PATCH 1/4] config/libvirt/cache: Caching for libvirt pulls Checking our RHCOS source against ETag [1] / If-None-Match [2] and Last-Modified [3] / If-Modified-Since [4], it seems to support In-None-Match well, but only supports If-Modified-Since for exact matches: $ URL=http://aos-ostree.rhev-ci-vms.eng.rdu2.redhat.com/rhcos/images/cloud/latest/rhcos-qemu.qcow2.gz $ curl -I "${URL}" HTTP/1.1 200 OK Server: nginx/1.8.0 Date: Wed, 19 Sep 2018 04:32:19 GMT Content-Type: application/octet-stream Content-Length: 684934062 Last-Modified: Tue, 18 Sep 2018 20:05:24 GMT Connection: keep-alive ETag: "5ba15a84-28d343ae" Accept-Ranges: bytes $ curl -sIH 'If-None-Match: "5ba15a84-28d343ae"' "${URL}" | head -n1 HTTP/1.1 304 Not Modified $ curl -sIH 'If-Modified-Since: Tue, 18 Sep 2018 20:05:24 GMT' "${URL}" | head -n1 HTTP/1.1 304 Not Modified $ curl -sIH 'If-Modified-Since: Tue, 18 Sep 2015 20:05:24 GMT' "${URL}" | head -n1 HTTP/1.1 200 OK $ curl -sIH 'If-Modified-Since: Tue, 18 Sep 2018 20:05:25 GMT' "${URL}" | grep 'HTTP\|Last-Modified' HTTP/1.1 200 OK Last-Modified: Tue, 18 Sep 2018 20:05:24 GMT That last entry should have 304ed, although the spec has [4]: When used for cache updates, a cache will typically use the value of the cached message's Last-Modified field to generate the field value of If-Modified-Since. This behavior is most interoperable for cases where clocks are poorly synchronized or when the server has chosen to only honor exact timestamp matches (due to a problem with Last-Modified dates that appear to go "back in time" when the origin server's clock is corrected or a representation is restored from an archived backup). However, caches occasionally generate the field value based on other data, such as the Date header field of the cached message or the local clock time that the message was received, particularly when the cached message does not contain a Last-Modified field. ... When used for cache updates, a cache will typically use the value of the cached message's Last-Modified field to generate the field value of If-Modified-Since. This behavior is most interoperable for cases where clocks are poorly synchronized or when the server has chosen to only honor exact timestamp matches (due to a problem with Last-Modified dates that appear to go "back in time" when the origin server's clock is corrected or a representation is restored from an archived backup). However, caches occasionally generate the field value based on other data, such as the Date header field of the cached message or the local clock time that the message was received, particularly when the cached message does not contain a Last-Modified field. So the server is violating the SHOULD by not 304ing dates greater than Last-Modified, but it's not violating a MUST-level requirement. The server requirements around If-None-Match are MUST-level [2], so using it should be more portable. The RFC also seems to prefer clients use If(-None)-Match [4,5]. I'm using gregjones/httpcache for the caching, since that implemenation seems reasonably popular and the repo's been around for a few years. That library uses the belt-and-suspenders approach of setting both If-None-Match (to the cached ETag) and If-Modified-Since (to the cached Last-Modified) [6], so we should be fine. UserCacheDir requires Go 1.11 [7,8,9]. [1]: https://tools.ietf.org/html/rfc7232#section-2.3 [2]: https://tools.ietf.org/html/rfc7232#section-3.2 [3]: https://tools.ietf.org/html/rfc7232#section-2.2 [4]: https://tools.ietf.org/html/rfc7232#section-3.3 [5]: https://tools.ietf.org/html/rfc7232#section-2.4 [6]: https://github.com/gregjones/httpcache/blob/9cad4c3443a7200dd6400aef47183728de563a38/httpcache.go#L169-L181 [7]: https://github.com/golang/go/issues/22536 [8]: https://github.com/golang/go/commit/816154b06553a4cf8ee7ad089f5e444b37bed43d [9]: https://github.com/golang/go/commit/50bd1c4d4eb4fac8ddeb5f063c099daccfb71b26 --- installer/pkg/workflow/init.go | 13 +++++- pkg/types/config/libvirt/cache.go | 68 +++++++++++++++++++++++++++++++ pkg/types/config/parser.go | 1 + 3 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 pkg/types/config/libvirt/cache.go diff --git a/installer/pkg/workflow/init.go b/installer/pkg/workflow/init.go index 69c3f68f738..9e0c7daee2f 100644 --- a/installer/pkg/workflow/init.go +++ b/installer/pkg/workflow/init.go @@ -10,7 +10,6 @@ import ( yaml "gopkg.in/yaml.v2" configgenerator "github.com/openshift/installer/installer/pkg/config-generator" - "github.com/openshift/installer/installer/pkg/copy" "github.com/openshift/installer/pkg/types/config" ) @@ -93,6 +92,12 @@ func prepareWorspaceStep(m *metadata) error { return err } + if cluster.Platform == config.PlatformLibvirt { + if err := cluster.Libvirt.UseCachedImage(); err != nil { + return err + } + } + // generate clusterDir folder clusterDir := filepath.Join(dir, cluster.Name) m.clusterDir = clusterDir @@ -106,7 +111,11 @@ func prepareWorspaceStep(m *metadata) error { // put config file under the clusterDir folder configFilePath := filepath.Join(clusterDir, configFileName) - if err := copy.Copy(m.configFilePath, configFilePath); err != nil { + configContent, err := yaml.Marshal(cluster) + if err != nil { + return err + } + if err := ioutil.WriteFile(configFilePath, configContent, 0666); err != nil { return fmt.Errorf("failed to create cluster config at %q: %v", clusterDir, err) } diff --git a/pkg/types/config/libvirt/cache.go b/pkg/types/config/libvirt/cache.go new file mode 100644 index 00000000000..8a8ca6eb5b5 --- /dev/null +++ b/pkg/types/config/libvirt/cache.go @@ -0,0 +1,68 @@ +package libvirt + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" + + "github.com/gregjones/httpcache" + "github.com/gregjones/httpcache/diskcache" +) + +// UseCachedImage leaves non-file:// image URIs unalterered. +// Other URIs are retrieved with a local cache at +// $XDG_CACHE_HOME/openshift-install/libvirt [1]. This allows you to +// use the same remote image URI multiple times without needing to +// worry about redundant downloads, although you will want to +// periodically blow away your cache. +// +// [1]: https://standards.freedesktop.org/basedir-spec/basedir-spec-0.7.html +func (libvirt *Libvirt) UseCachedImage() (err error) { + // FIXME: set the default URI here? Leave it elsewhere? + + if strings.HasPrefix(libvirt.Image, "file://") { + return nil + } + + baseCacheDir, err := os.UserCacheDir() + if err != nil { + return err + } + + cacheDir := filepath.Join(baseCacheDir, "openshift-install", "libvirt") + err = os.MkdirAll(cacheDir, 0777) + if err != nil { + return err + } + + cache := diskcache.New(cacheDir) + transport := httpcache.NewTransport(cache) + resp, err := transport.Client().Get(libvirt.Image) + if err != nil { + return err + } + if resp.StatusCode != 200 { + return fmt.Errorf("%s while getting %s", resp.Status, libvirt.Image) + } + defer resp.Body.Close() + + // FIXME: diskcache's diskv backend doesn't expose direct file access. + // We can write our own cache implementation to get around this, + // but for now just dump this into /tmp without cleanup. + file, err := ioutil.TempFile("", "openshift-install-") + if err != nil { + return err + } + defer file.Close() + + _, err = io.Copy(file, resp.Body) + if err != nil { + return err + } + + libvirt.Image = fmt.Sprintf("file://%s", filepath.ToSlash(file.Name())) + return nil +} diff --git a/pkg/types/config/parser.go b/pkg/types/config/parser.go index ab0d98667c1..050b9c94485 100644 --- a/pkg/types/config/parser.go +++ b/pkg/types/config/parser.go @@ -31,6 +31,7 @@ func ParseConfig(data []byte) (*Cluster, error) { return nil, err } cluster.PullSecret = string(data) + cluster.PullSecretPath = "" } if cluster.Platform == PlatformAWS && cluster.EC2AMIOverride == "" { From 899366403d72261e23ffe1f5d516963c15771cb5 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 19 Sep 2018 09:15:52 -0700 Subject: [PATCH 2/4] config/libvirt/cache: Stub in $XDG_CACHE_HOME Assume the caller has HOME set and XDG_CACHE_HOME not set, because that's easy. The ~/.cache default is from [1]. Once we bump to Go 1.11, we can revert this commit and get the more-robust stdlib implementation, which is why I haven't bothered with a robust implementation here. [1]: https://standards.freedesktop.org/basedir-spec/basedir-spec-0.7.html --- pkg/types/config/libvirt/cache.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/types/config/libvirt/cache.go b/pkg/types/config/libvirt/cache.go index 8a8ca6eb5b5..429c0652356 100644 --- a/pkg/types/config/libvirt/cache.go +++ b/pkg/types/config/libvirt/cache.go @@ -27,10 +27,12 @@ func (libvirt *Libvirt) UseCachedImage() (err error) { return nil } - baseCacheDir, err := os.UserCacheDir() - if err != nil { - return err - } + // FIXME: Use os.UserCacheDir() once we bump to Go 1.11 + // baseCacheDir, err := os.UserCacheDir() + // if err != nil { + // return err + // } + baseCacheDir := filepath.Join(os.Getenv("HOME"), ".cache") cacheDir := filepath.Join(baseCacheDir, "openshift-install", "libvirt") err = os.MkdirAll(cacheDir, 0777) From 7abe94d4cb365434279fb16749b7e5eb842781a4 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 21 Sep 2018 21:58:21 -0700 Subject: [PATCH 3/4] config/libvirt/cache: Decompress when URI has ".gz" suffix Passing the compressed images to libvirt was giving me "no bootable device" errors with my: $ rpm -q libvirt qemu-kvm-rhev libvirt-3.9.0-14.el7_5.7.x86_64 qemu-kvm-rhev-2.9.0-16.el7_4.13.x86_64 Ideally, the HTTP headers would tell us that the image was compressed (via Content-Type [1] or Content-Encoding [2]), but aos-ostree.rhev-ci-vms.eng.rdu2.redhat.com isn't configured to do that at the moment: $ curl -I http://aos-ostree.rhev-ci-vms.eng.rdu2.redhat.com/rhcos/images/cloud/latest/rhcos-qemu.qcow2.gz HTTP/1.1 200 OK Server: nginx/1.8.0 Date: Sat, 22 Sep 2018 04:59:53 GMT Content-Type: application/octet-stream Content-Length: 684751099 Last-Modified: Fri, 21 Sep 2018 13:56:07 GMT Connection: keep-alive ETag: "5ba4f877-28d078fb" Accept-Ranges: bytes $ curl -I http://aos-ostree.rhev-ci-vms.eng.rdu2.redhat.com/rhcos/images/cloud/latest/rhcos-qemu.qcow2 HTTP/1.1 404 Not Found Server: nginx/1.8.0 Date: Sat, 22 Sep 2018 04:59:56 GMT Content-Type: text/html Content-Length: 168 Connection: keep-alive I've opened [3] about getting Content-Encoding support via gzip_static [4], but in the meantime, just assume that anything with a ".gz" suffix is gzipped. Unzipping is also somewhat expensive (although not as expensive as network requests). It would be nice for callers to be able to configure whether we cache the compressed images (less disk consumption) or the uncompressed images (less CPU load when launching clusters). For now, we're just caching the network response. [1]: https://tools.ietf.org/html/rfc7231#section-3.1.1.5 [2]: https://tools.ietf.org/html/rfc7231#section-3.1.2.2 [3]: https://projects.engineering.redhat.com/browse/COREOS-593 [4]: http://nginx.org/en/docs/http/ngx_http_gzip_static_module.html --- pkg/types/config/libvirt/cache.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pkg/types/config/libvirt/cache.go b/pkg/types/config/libvirt/cache.go index 429c0652356..95c7298e152 100644 --- a/pkg/types/config/libvirt/cache.go +++ b/pkg/types/config/libvirt/cache.go @@ -1,6 +1,7 @@ package libvirt import ( + "compress/gzip" "fmt" "io" "io/ioutil" @@ -51,6 +52,16 @@ func (libvirt *Libvirt) UseCachedImage() (err error) { } defer resp.Body.Close() + var reader io.Reader + if strings.HasSuffix(libvirt.Image, ".gz") { + reader, err = gzip.NewReader(resp.Body) + if err != nil { + return err + } + } else { + reader = resp.Body + } + // FIXME: diskcache's diskv backend doesn't expose direct file access. // We can write our own cache implementation to get around this, // but for now just dump this into /tmp without cleanup. @@ -60,7 +71,7 @@ func (libvirt *Libvirt) UseCachedImage() (err error) { } defer file.Close() - _, err = io.Copy(file, resp.Body) + _, err = io.Copy(file, reader) if err != nil { return err } From 85286c578135fe9822833818535283bbdc63d983 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 21 Sep 2018 21:42:21 -0700 Subject: [PATCH 4/4] *: Update BUILD.bazel Generated with: $ bazel run //:gazelle using: $ bazel version Build label: 0.17.2- (@non-git) Build target: bazel-out/k8-opt/bin/src/main/java/com/google/devtools/build/lib/bazel/BazelServer_deploy.jar Build time: Fri Sep 21 15:04:25 2018 (1537542265) Build timestamp: 1537542265 Build timestamp as int: 1537542265 --- installer/pkg/workflow/BUILD.bazel | 1 - pkg/types/config/libvirt/BUILD.bazel | 11 +++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/installer/pkg/workflow/BUILD.bazel b/installer/pkg/workflow/BUILD.bazel index f73e3990091..43b0c4645e5 100644 --- a/installer/pkg/workflow/BUILD.bazel +++ b/installer/pkg/workflow/BUILD.bazel @@ -16,7 +16,6 @@ go_library( visibility = ["//visibility:public"], deps = [ "//installer/pkg/config-generator:go_default_library", - "//installer/pkg/copy:go_default_library", "//pkg/types/config:go_default_library", "//vendor/github.com/Sirupsen/logrus:go_default_library", "//vendor/gopkg.in/yaml.v2:go_default_library", diff --git a/pkg/types/config/libvirt/BUILD.bazel b/pkg/types/config/libvirt/BUILD.bazel index d93f70647ee..a3c2d4a20aa 100644 --- a/pkg/types/config/libvirt/BUILD.bazel +++ b/pkg/types/config/libvirt/BUILD.bazel @@ -2,8 +2,15 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "go_default_library", - srcs = ["libvirt.go"], + srcs = [ + "cache.go", + "libvirt.go", + ], importpath = "github.com/openshift/installer/pkg/types/config/libvirt", visibility = ["//visibility:public"], - deps = ["//vendor/github.com/apparentlymart/go-cidr/cidr:go_default_library"], + deps = [ + "//vendor/github.com/apparentlymart/go-cidr/cidr:go_default_library", + "//vendor/github.com/gregjones/httpcache:go_default_library", + "//vendor/github.com/gregjones/httpcache/diskcache:go_default_library", + ], )