diff --git a/001-resources-v2/branch-gen.yml b/001-resources-v2/branch-gen.yml new file mode 100644 index 00000000..c0ea55c4 --- /dev/null +++ b/001-resources-v2/branch-gen.yml @@ -0,0 +1,39 @@ +resources: +- name: atc + type: git + source: {uri: https://github.com/concourse/atc} + space: master + +- name: atc-gen + type: git + source: {uri: "https://github.com/concourse/atc"} + +jobs: +- name: gen + plan: + - get: atc + trigger: true + - task: gen + file: atc/ci/gen.yml + config: + platform: ... + image_resource: ... + outputs: + - name: generated-repo + # has the generated code committed to the repo + - name: branch-name + # has a 'name' file with `gen-(some deterministic hash)` + - put: atc-gen + params: {branch_name: branch-name/name, repository: generated-repo} + +- name: test + plan: + - get: atc + passed: [gen] + trigger: true + - get: atc-gen + passed: [gen] + spaces: [gen-*] + trigger: true + - task: test + file: atc/ci/test.yml diff --git a/001-resources-v2/git-example/.gitignore b/001-resources-v2/git-example/.gitignore new file mode 100644 index 00000000..bbb00daa --- /dev/null +++ b/001-resources-v2/git-example/.gitignore @@ -0,0 +1,2 @@ +check-repo +dot diff --git a/001-resources-v2/git-example/Gemfile b/001-resources-v2/git-example/Gemfile new file mode 100644 index 00000000..10d1a910 --- /dev/null +++ b/001-resources-v2/git-example/Gemfile @@ -0,0 +1,4 @@ +source :rubygems + +gem 'rugged' +gem 'pry' diff --git a/001-resources-v2/git-example/Gemfile.lock b/001-resources-v2/git-example/Gemfile.lock new file mode 100644 index 00000000..0d15428b --- /dev/null +++ b/001-resources-v2/git-example/Gemfile.lock @@ -0,0 +1,19 @@ +GEM + remote: http://rubygems.org/ + specs: + coderay (1.1.2) + method_source (0.9.0) + pry (0.11.3) + coderay (~> 1.1.0) + method_source (~> 0.9.0) + rugged (0.27.2) + +PLATFORMS + ruby + +DEPENDENCIES + pry + rugged + +BUNDLED WITH + 1.16.2 diff --git a/001-resources-v2/git-example/README.md b/001-resources-v2/git-example/README.md new file mode 100644 index 00000000..cc5b51f8 --- /dev/null +++ b/001-resources-v2/git-example/README.md @@ -0,0 +1,13 @@ +# Git Resource v2 + +This implementation is done in Ruby using the `git` gem. I chose Ruby +over Bash because having a real language with more accessible data +structures is probably going to be more important with this new +interface, and Ruby feels pretty well suited (really just need a bit more +than Bash). + +Please leave comments on parts you like/don't like! But bear in mind the +goal here isn't necessarily the prettiness of the code, it's to see what +kinds of things the resource has to do. I'll be using Ruby purely as a +scripting language, hacking things together where needed in the interest +of brevity. diff --git a/001-resources-v2/git-example/artifact b/001-resources-v2/git-example/artifact new file mode 100755 index 00000000..792e47cb --- /dev/null +++ b/001-resources-v2/git-example/artifact @@ -0,0 +1,121 @@ +#!/usr/bin/env ruby + +require "json" +require "rugged" +require "pry" +require "benchmark" + +$request = JSON.parse(STDIN.read, symbolize_names: true) + +def commit_version(c, s) + JSON.dump({ + space: s, + version: {ref: c.oid}, + metadata: [ + {name: "author", value: enc(c, c.author[:name])}, + {name: "author_date", value: c.author[:time].to_s}, + {name: "committer", value: enc(c, c.committer[:name])}, + {name: "committer_date", value: c.committer[:time].to_s}, + {name: "message", value: enc(c, c.message)} + ] + }) +end + +def bench(label, &blk) + time = Benchmark.realtime(&blk) + $stderr.puts "#{label}: #{time}s" +end + +def enc(c, str) + str = str.force_encoding("ISO-8859-1") unless c.header_field("Encoding") + str.encode("UTF-8") +end + +case ARGV[0] +when "check" + repo_dir = File.basename($request[:config][:uri]) + + repo = + if Dir.exists?(repo_dir) + Rugged::Repository.new(repo_dir).tap do |r| + r.fetch("origin") + end + else + Rugged::Repository.clone_at( + $request[:config][:uri], + repo_dir, + bare: true, + progress: lambda { |t| $stderr.print t }) + end + + spaces = [] + default_branch = nil + file = File.new $request[:response_path], 'w' + + repo.branches.each do |b| + unless b.remote? + # assume the only local branch is the default one + default_branch = b.name + + file.puts JSON.dump({ + default_space: default_branch + }) + next + end + + space_name = b.name.sub("#{b.remote_name}/", "") + + commits = [] + + walker = Rugged::Walker.new(repo) + walker.sorting(Rugged::SORT_TOPO|Rugged::SORT_REVERSE) + walker.simplify_first_parent + walker.push(b.target) + + from = $request[:from][space_name.to_sym] + + if from && repo.include?(from[:ref]) + commit = repo.lookup(from[:ref]) + walker.hide(commit) + + file.puts commit_version(commit, space_name) + end + + bench("#{space_name} walk") do + walker.walk do |c| + # TODO: test if commit satisfies paths/ignore_paths + file.puts commit_version(c, space_name) + end + end + + next if commits.empty? + + $stderr.puts "#{space_name} commits: #{commits.size} (latest: #{has_latest})" + end + + file.close + +when "get" + $request = { + config: {uri: "https://github.com/vito/booklit"}, + space: "master", + version: {ref: "f828f2758256b0e93dc3c101f75604efe92ca07e"} + } + + repo = + Rugged::Repository.clone_at( + $request[:config][:uri], + "dot", # TODO: irl this would be '.' + checkout_branch: $request[:space]) + + repo.checkout($request[:version][:ref]) + + # TODO: update/init submodules recursively + + # TODO: draw the rest of the owl + # + # most of this is uninteresting. + +when "put" + puts "putting" +end diff --git a/001-resources-v2/git-example/collect-all-versions b/001-resources-v2/git-example/collect-all-versions new file mode 100755 index 00000000..d7579958 --- /dev/null +++ b/001-resources-v2/git-example/collect-all-versions @@ -0,0 +1,83 @@ +#!/usr/bin/env ruby + +require "json" +require "subprocess" +require "stringio" + +# concourse (~15 branches, ~8500 commits) +$request = { + config: {uri: "https://github.com/concourse/concourse"}, + from: {} +} + +# booklit (~10 branches, ~200 commits) +$request = { + config: {uri: "https://github.com/vito/booklit"}, + from: {} +} + +# rails (~36 branches, ~70k commits) +$request = { + config: {uri: "https://github.com/rails/rails"}, + from: {} +} + +# linux (~2 branches, ~766k commits) +$request = { + config: {uri: "https://github.com/torvalds/linux"}, + from: {} +} + +def check_all + all_start = Time.now + check_has_latest = false + done_last_check = false + + total_versions = {} + + while true + check_start = Time.now + + Subprocess::Process.new( + ["bundle", "exec", "./artifact", "check"], + stdin: Subprocess::PIPE, + stdout: Subprocess::PIPE) do |check| + out, _ = check.communicate(JSON.dump($request)) + + done_last_check = true if check_has_latest + + res = JSON.parse(out, symbolize_names: true) + + res[:spaces].each do |s| + space = s[:space] + versions = s[:versions] + + if versions.first[:version] == versions.last[:version] + puts "checked #{space}: #{versions.first[:version][:ref]}" + else + puts "checked #{space}: #{versions.first[:version][:ref]}..#{versions.last[:version][:ref]}" + end + + total_versions[space] ||= 0 + total_versions[space] += versions.size + if versions.first[:version] == $request[:from][space] + total_versions[space] -= 1 + end + + $request[:from][space] = versions.last[:version] + end + + check_has_latest = res[:spaces].all? { |s| s[:has_latest] } + end + + if done_last_check + puts "time for stable check: #{Time.now - check_start}s" + break + end + end + + puts "total time for all checks: #{Time.now - all_start}s" + puts "total versions: #{total_versions.to_json}" +end + +check_all diff --git a/001-resources-v2/git-example/info b/001-resources-v2/git-example/info new file mode 100755 index 00000000..be0e0398 --- /dev/null +++ b/001-resources-v2/git-example/info @@ -0,0 +1,12 @@ +#!/usr/bin/env ruby + +require "json" + +puts JSON.dump({ + artifacts: { + api_version: "2.0", + check: "artifact check", + get: "artifact get", + put: "artifact put" + } +}) diff --git a/001-resources-v2/main-pipeline.yml b/001-resources-v2/main-pipeline.yml new file mode 100644 index 00000000..7be40cc3 --- /dev/null +++ b/001-resources-v2/main-pipeline.yml @@ -0,0 +1,121 @@ +# In this example, we demonstrate that today's model of an +# always-rolling-forward pipeline doesn't need to mention spaces at all, so +# long as the semver-git resource continuously rolls its "default space" +# forward to the latest major.minor version available. +# +# Interesting note: the bin-rc and bosh-rc jobs will dynamically switch the +# space they run against because they have a `get` of the resource's default +# space, which will change as the version is bumped. This would result in the +# box's color changing to 'pending' in the UI until a build in that space runs. + +--- +resources: +- name: booklit + type: git + source: + uri: https://github.com/vito/booklit + +- name: version + type: semver-git + source: + uri: https://github.com/vito/booklit + +- name: bin-rc + type: s3 + +- name: bosh-rc + type: s3 + +jobs: +- name: unit + plan: + - get: booklit + trigger: true + - task: unit + file: booklit/ci/test.yml + +- name: major + plan: + - get: booklit + - put: version + params: {bump: major, pre: rc, repo: booklit} + +- name: minor + plan: + - get: booklit + - put: version + params: {bump: minor, pre: rc, repo: booklit} + +- name: rc + plan: + - get: booklit + passed: [unit] + trigger: true + - put: version + params: {pre: rc, repo: booklit} + +- name: bin-rc + plan: + - get: booklit + passed: [rc] + trigger: true + - get: version + passed: [rc] + trigger: true + - task: build-rc + file: booklit/ci/bin-rc.yml + - put: bin-rc + +- name: bin-testflight + plan: + - get: booklit + passed: [bin-rc] + trigger: true + - get: bin-rc + passed: [bin-rc] + trigger: true + - task: integration + file: booklit/ci/testflight.yml + +- name: bosh-rc + plan: + - get: booklit + passed: [rc] + trigger: true + - get: version + passed: [rc] + trigger: true + - task: build-rc + file: booklit/ci/bosh-rc.yml + - put: bosh-rc + +- name: bosh-testflight + plan: + - get: booklit + passed: [bosh-rc] + trigger: true + - get: bosh-rc + passed: [bosh-rc] + trigger: true + - task: integration + file: booklit/ci/testflight.yml + +- name: ship + plan: + - get: booklit + passed: [bin-testflight, bosh-testflight] + trigger: true + - get: bin-rc + passed: [bin-testflight] + - get: bosh-rc + passed: [bosh-testflight] + - put: version + params: {bump: final, repo: booklit} + +- name: patch + plan: + - get: booklit + passed: [ship] + trigger: true + - put: version + params: {bump: patch, pre: rc, repo: booklit} diff --git a/001-resources-v2/notifications.yml b/001-resources-v2/notifications.yml new file mode 100644 index 00000000..1305434a --- /dev/null +++ b/001-resources-v2/notifications.yml @@ -0,0 +1,32 @@ +resource_types: +- name: slack-notifier + type: docker-image + source: {repository: concourse/slack-notifier-resource} + +resources: +- name: atc + type: git + source: + uri: https://github.com/concourse/atc + +- name: slack-alert + type: slack-notifier + source: + url: https://hooks.slack.com/services/XXXX + notify: + - on: build_finished + params: + template: + succeeded: | + Yay! <{{.ExternalURL}}/builds/{{.BuildID}}|{{.BuildName}}> succeeded. :) + failed: | + Oh no! <{{.ExternalURL}}/builds/{{.BuildID}}|{{.BuildName}}> failed. :( + +jobs: +- name: atc-pr-unit + plan: + - get: atc-pr + trigger: true + spaces: all + - task: unit + file: atc/ci/pr.yml diff --git a/001-resources-v2/proposal.md b/001-resources-v2/proposal.md new file mode 100644 index 00000000..c6dc358d --- /dev/null +++ b/001-resources-v2/proposal.md @@ -0,0 +1,626 @@ +# Summary + +This RFC proposes a new resource interface to replace the existing resource +interface. + +As part of this proposal, the interface will now be versioned, starting at 2.0. +Today's resource interface (documented +[here](https://github.com/concourse/docs/blob/b9d291e5a821046b8a5de48c50b5ccba5a977493/lit/reference/resource-types/implementing.lit)) +will be called version 1, even though it was never really versioned. + +The introduction of this new interface will be gradual, allowing Concourse +users to use a mix of v1 and v2 resources throughout their pipelines. While the +new interface is defined in terms of entirely new concepts like +[spaces](https://github.com/concourse/concourse/issues/1707), v1 resources will +be silently 'adapted' to v2 automatically. + + +# Motivation + +* Support for multi-branch workflows and build matrixes: + * https://github.com/concourse/concourse/issues/1172 + * https://github.com/concourse/concourse/issues/1707 + +* Support for creating new branches dynamically (as spaces): + * https://github.com/concourse/git-resource/pull/172 + +* Support for creating multiple versions at once: + * https://github.com/concourse/concourse/issues/535 + * https://github.com/concourse/concourse/issues/2660 + +* Support for deleting versions: + * https://github.com/concourse/concourse/issues/362 + +* Having resource metadata immediately available via `check`: + * https://github.com/concourse/git-resource/issues/193 + +* Unifying `source` and `params` as just `config` so that resources don't have + to care where configuration is being set in pipelines: + * https://github.com/concourse/concourse/issues/310 + +* Improving stability of reattaching to builds by reading resource responses + from files instead of `stdout`: + * https://github.com/concourse/concourse/issues/1580 + +* Ensuring resource version history is always correct and up-to-date, enabling + it to be [deduped](https://github.com/concourse/concourse/issues/2386) and + removing the need for [purging + history](https://github.com/concourse/concourse/issues/145) and + [removing/renaming + resources](https://github.com/concourse/concourse/issues/372). + +* Closing gaps in the resource interface that turned them into a "local maxima" + and resulted in their being used in somewhat cumbersome ways (notifications, + partially-implemented resources, etc.) + + +# Proposal + +* TODO: document 'info' +* TODO: make metadata more flexible? + * https://github.com/concourse/concourse/issues/310 + * https://github.com/concourse/concourse/issues/2900 + + +## General Types + +```go +// Space is a name of a space, e.g. "master", "release/3.14", "1.0". +type Space string + +// Config is a black box containing all user-supplied configuration, combining +// `source` in the resource definition with `params` from the step (in the +// case of `get` or `put`). +type Config map[string]interface{} + +// Version is a key-value identifier for a version of a resource, e.g. +// `{"ref":"abcdef"}`, `{"version":"1.2.3"}`. +type Version map[string]string + +// Metadata is an ordered list of metadata fields to display to the user about +// a resource version. It's ordered so that the resource can decide the best +// way to show it. +type Metadata []MetadataField + +// MetadataField is an arbitrary key-value to display to the user about a +// version of a resource. +type MetadataField struct { + Name string `json:"name"` + Value string `json:"value"` +} +``` + +## Versioned Artifacts interface + +### `check`: Detect versions across spaces. + +The `check` command will be invoked with the following JSON structure on +`stdin`: + +```go +// CheckRequest contains the resource's configuration and latest version +// associated to each space. +type CheckRequest struct { + Config Config `json:"config"` + From map[Space]Version `json:"from"` + ResponsePath string `json:"response_path"` +} +``` + +The `check` script responds by writing JSON objects ("events") to a file +specified by `response_path`. Each JSON object has an `action` and a different +set of fields based on the action. + +The following event types may be emitted by `check`: + +* `default_space`: Emitted when the resource has learned of a space which + should be considered the "default", e.g. the default branch of a `git` repo + or the latest version available for a semver'd resource. + + Required fields for this event: + + * `space`: The name of the space. + +* `discovered`: Emitted when a version is discovered for a given space. These + must be emitted in chronological order (relative to other `discovered` events + for the given space - other events may be intermixed). + + Required fields for this event: + + * `space`: The space the version is in. + * `version`: The version object. + * `metadata`: A list of JSON objects with `name` and `value`, shown to the + user. + +* `reset`: Emitted when a given space's "current version" is no longer present + (e.g. someone ran `git push -f`). This has the effect of marking all + currently-recorded versions of the space 'deleted', after which the resource + will emit any and all versions from the beginning, thus 'un-deleting' + anything that's actually still there. + + Required fields for this event: + + * `space`: The name of the space. + +The first request will have an empty object as `from`. + +Any spaces discovered by the resource but not present in `from` should emit +versions from the very first version. + +For each space and associated version in `from`, the resource should emit all +versions that appear *after* the given version (not including the given +version). + +If a space or given version in `from` is no longer present (in the case of `git +push -f` or branch deletion), the resource should emit a `reset` event for the +space. If the space is still there, but the verion was gone, it should follow +the `reset` event with all versions detected from the beginning, as if the +`from` value was never specified. + +The resource should determine a "default space", if any. Having a default space +is useful for things like Git repos which have a default branch, or version +spaces (e.g. `1.8`, `2.0`) which can point to the latest version line by +default. If there is no default space, the user must specify it explicitly in +the pipeline, either by configuring one on the resource (`default_space: foo`) +or on every `get` step using the resource (`spaces: [foo]`). + +#### example + +Given the following request on `stdin`: + +```json +{ + "config": { + "uri": "https://github.com/concourse/concourse" + }, + "from": { + "master": {"ref": "abc123"}, + "feature/foo": {"ref":"def456"}, + "feature/bar": {"ref":"987cia"} + }, + "response_path": "/tmp/check-response.json" +} +``` + +If the `feature/foo` branch has new commits, `master` is the default branch and +has no new commits, and `feature/bar` has been `push -f`ed, you may see +something like the following in `/tmp/check-response.json`: + +```json +{"action":"discovered","space":"feature/foo","version":{"ref":"abcdf8"},"metadata":[{"name":"message","value":"fix thing"}]} +{"action":"reset","space":"feature/bar"} +{"action":"discovered","space":"feature/bar","version":{"ref":"abcde0"},"metadata":[{"name":"message","value":"initial commit"}]} +{"action":"discovered","space":"feature/bar","version":{"ref":"abcde1"},"metadata":[{"name":"message","value":"add readme"}]} +{"action":"default_space","space":"master"} +{"action":"discovered","space":"feature/foo","version":{"ref":"abcdf9"},"metadata":[{"name":"message","value":"fix thing even more"}]} +{"action":"discovered","space":"feature/bar","version":{"ref":"abcde2"},"metadata":[{"name":"message","value":"finish the feature"}]} +``` + +A few things to note: + +* A `reset` event is emitted immediately upon detecting that the given version + for `feature/bar` (`987cia`) is no longer available, followed by a + `discovered` event for every commit going back to the initial commit on the + branch. + +* No versions are emitted for `master`, because it's already up to date + (`abc123` is the latest commit). + +* The versions detected for `feature/foo` may appear between events for + `feature/bar`, as they're for unrelated spaces. The order only matters within + the space. + + +### `get`: Fetch a version from the resource's space. + +The `get` command will be invoked with the following JSON structure on `stdin`: + +```go +type GetRequest struct { + Config Config `json:"config"` + Space Space `json:"space"` + Version Version `json:"version"` +} +``` + +The command will be invoked with a completely empty working directory. The +command should populate this directory with the requested bits. The `git` +resource, for example, would clone directly into the working directory. + +If the requested version is unavailable, the command should exit nonzero. + +No response is expected. + +Anything printed to `stdout` and `stderr` will propagate to the build logs. + + +### `put`: Idempotently create or destroy resource versions across spaces. + +The `put` command will be invoked with the following JSON structure on `stdin`: + +```go +type PutRequest struct { + Config Config `json:"config"` + ResponsePath string `json:"response_path"` +} +``` + +The command will be invoked with all of the build plan's artifacts present in +the working directory, each as `./(artifact name)`. + +The `put` script responds by writing JSON objects ("events") to a file +specified by `response_path`, just like `check`. Each JSON object has an +`action` and a different set of fields based on the action. + +Anything printed to `stdout` and `stderr` will propagate to the build logs. + +The following event types may be emitted by `put`: + +* `created`: Emitted when the resource has created (perhaps idempotently) a + version. The version will be recorded as an output of the build. + + Versions produced by `put` will *not* be directly inserted into the + resource's version history in the pipeline, as they were with v1 resources. + This enables one-off versions to be created and fetched within a build + without disrupting the normal detection of resource versions across the + + Required fields for this event: + + * `space`: The space the version is in. + * `version`: The version object. + * `metadata`: A list of JSON objects with `name` and `value`, shown to the + user. Note that this is return by both `put` and `check`, because there's a + chance that `put` produces a version that wouldn't normally be discovered + by `check`. + +* `deleted`: Emitted when a version has been deleted. The version record will + remain in the database for archival purposes, but it will no longer be a + candidate for any builds. + + Required fields for this event: + + * `space`: The space the version is in. + * `version`: The version object. + +Because the space is included on each event, `put` allows new spaces to be +generated dynamically based on params and/or the bits in its working directory +and propagated to the rest of the pipeline. + + +# Examples + +## Resource Implementations + +I've started cooking up new resources using this interface. I've left `TODO`s +for parts that need more thinking or discussion. Please leave comments! + +### `git` + +[Code](https://github.com/vito/rfcs/tree/resources-v2/01-resources-v2/git-example) + +This resource models the original `git` resource. It represents each branch as a space. + +### `semver-git` + +[Code](https://github.com/vito/rfcs/tree/resources-v2/01-resources-v2/semver-example) + +This is a whole new semver resource intended to replace the original `semver` +resource with a better model that supports concurrent version lines (i.e. +supporting multiple major/minor releases with patches). It does this by managing +tags in an existing Git repository. + +### `s3` + +[Code](https://github.com/vito/rfcs/tree/resources-v2/01-resources-v2/s3-example) + +This resource models the original `s3` resource. Only regex versions were +implemented, each space corresponds to a major.minor version. For example, 1.2.0 +and 1.2.1 is the same space but 1.3.0 is a different space. Single numbers are +also supported with default minor of 0. The default space is set to the latest +minor version. + + +## Pipeline Usage + +TODO: + +- Pull Requests +- Feature branches +- Build matrixes +- Generating branches (and propagating them downstream) +- Semver artifacts +- Fanning out against multiple IaaSes +- Pool resource? +- BOSH deploys + + +# Summary of Changes + +## Overarching Changes + +* Add an `info` script which returns a JSON object indicating the supported + interfaces, their protocol versions, and any other interface-specific + meta-configuration (for example, which commands to execute for the + interface's hooks). + +* The first supported interface will be called `artifacts`, and its version + will start at `2.0` as it's really the next iteration of the existing + "resources" concept, but with a more specific name. + +* There are no more hardcoded paths (`/opt/resource/X`) - instead there's the + single `info` entrypoint, which is run in the container's working directory. + This is more platform-agnostic. + + +## Changes to Versioned Artifact resources + +* Remove the distinction between `source` and `params`; resources will receive + a single `config`. The distinction will remain in the pipeline. This makes it + easier to implement a resource without planning ahead for dynamic vs. static + usage patterns. This will become more powerful if concourse/concourse#684 is + implemented. + +* Change `check` to run against all spaces. It will be given a mapping of each + space to its current latest version, and return the set of all spaces, along + with any new versions in each space. + + This is all done as one batch call so that resources can decide how to + efficiently perform the check. It also keeps the container overhead down to + one per resource, rather than one per space. + +* Remove the implicit `get` after every `put`, now requiring the pipeline to + explicitly configure a `get` field on the same step. This is necessary now + that `put` can potentially perform an operation resulting solely in `deleted` + events, in which case there is nothing to fetch. + + This has also been requested by users for quite a while, for the sake of + optimizing jobs that have no need for the implicit `get`. + +* Change `put` to emit a sequence of created versions, rather than just one. + + Technically the `git` resource may push many commits, so returning more than + one version is necessary to track them all as outputs of a build. This could + also support batch creation. + + To ensure `check` is the source of truth for ordering, the versions emitted + by `put` are not directly inserted into the database. Instead, they are + simply recorded as outputs of the build. The order does matter, however - if + a user configures a `get` on the `put` step, the last version emitted will be + fetched. For this reason they should be emitted in chronological order. + +* Change `put` to additionally return a sequence of *deleted* versions. + + There has long been a call for a batch `delete` or `destroy` action. Adding + this to `put` alongside the set of created versions allows `put` to become a + general idempotent side-effect performer, rather than implying that each + resource must support a separate `delete` action. + +* Change `get` to always run against a particular space, given by + the request payload. + +* Change `check` to include metadata for each version. Change `get` to no + longer return it. + + This way metadata is always immediately available, which could enable us to + have a richer UI for the version history page. + + The original thought was that metadata collection may be expensive, but so + far we haven't seen that to be the case. + +* Change `get` script to no longer return a version, since it's always given + one now. As a result, `get` no longer has a response; it just succeeds or + fails. + +* Change `get` and `put` to run with the bits as their working directory, + rather than taking the path as an argument. This was something people would + trip up on when implementing a resource. + +* Change `check` and `put` to write its JSON response to a specified file, + rather than `stdout`, so that we don't have to be attached to process its + response. + + This is one of the few ways a build can error after the ATC reattaches + (`unexpected end of JSON`). With it written to a file, we can just try to + read the file when we re-attach after seeing that the process exited. This + also frees up `stdout`/`stderr` for normal logging, which has been an + occasional pitfall during resource development/debugging. + + Another motivation for this is safety: with `check` emitting a ton of data, + there is danger in Garden losing chunks of the output due to a slow consumer. + Writing to a file circumvents this issue. + + +# New Implications + +## The `get` after the `put` in Concourse pipelines + +With v1 resources, every `put` in a Concourse pipeline implied a `get` of the +version that was created. With v2, the `get` will be made opt-in. This has been +a long-time ask, and one objective reason to make it opt-in is that Concourse +can't know ahead of time that there will even be anything to `get` - for +example, the `put` could emit only `deleted` events. + +So, to `get` the latest version that was produced by the `put`, you would +configure something like: + +```yaml +- put: my-resource + get: my-created-resource +- task: use-my-created-resource +``` + +The value for the `get` field is the name under which the artifact will be +saved (just like `get` steps). When specified, the last version emitted will be +fetched (from whichever space it was in). + +## Single-state resources + +Resources that really only have a "current state", such as deployments, can now +represent their state more clearly because old versions that are no longer +there will be marked 'deleted'. + +## Non-linearly versioned artifact storage + +This can be done by representing each non-linear version in a separate space. +For example, generated code could be pushed to a generated (but deterministic) +branch name, and that space could then be passed along. + +## Build-local Versions + +Now that `put` doesn't directly modify the resource's version history, it can +be used to provide explicitly versioned 'variants' of original versions without +doubling up the version history. One use case for this is pull-requests: you +may want a build to pull in one resource for the PR itself, another resource +for the base branch of the upstream reap, and then `put` to produce a +"combined" version of the two, representing the PR merged into the upstream +repo: + +```yaml +jobs: +- name: run-pr + plan: + - get: concourse-pr # pr: 123, ref: deadbeef + trigger: true + - get: concourse # ref: abcdef + - put: concourse-pr + get: merged-pr + params: + merge_base: concourse + status: pending + + # the `put` will learns base ref from `concourse` input and param, and emit + # a 'created' event with the following version: + # + # pr: 123, ref: deadbeef, base: abcdef + # + # the `get` will then run with that version and knows to merge onto the + # given base ref + + - task: unit + # uses 'merged-pr' as an input +``` + + +# Open Questions + +## Are there examples of `put`ing to multiple spaces at once? + +Initially there was a limitation that `put` could only emit versions pertaining +to a single space. This was to prevent ambiguity with "`get` after `put`" - +which space would the `get` fetch from? We loosened this constraint because it +felt somewhat arbitrary, as the protocol allows it easily, and recording +outputs and marking versions as deleted across spaces isn't any harder than +with a single space. + +To loosen the constraint we've instead constrained the `get` to only fetch the +last version, from whichever space it was in. But are there any good examples +of this being useful, or have we just moved the arbitrary restriction +elsewhere? (At least we've moved it out of the resource interface - technically +this is a pipeline concern, not a resource interface concern.) + +Would users want to fetch multiple spaces that were created? Would they want to +do this statically (at pipeline definition time) or dynamically (at runtime)? +Static would be relatively easily as the build plan would just result in +multiple `get` steps, but dynamic would run into the same challenges as with +[dynamic build plan +generation](https://github.com/concourse/concourse/issues/684). However users +could always just separate it into a different job spanning the spaces +dynamically with wildcards. + +Here's a mockup for static configuration: + +```yaml +- put: foo + params: bar + get: {artifact-name-a: space-a, artifact-name-b: space-b} +``` + +...but is that useful? + +This is really in need of a use case to define it further, but for now the +constraint has been lifted from the resource interface, and it's up to the rest +of Concourse's pipeline mechanics to determine what's possible from there. + + +# Answered Questions + +
Can we reduce the `check` overhead? + +

+With spaces there will be more `check`s than ever. Right now, there's one +container per recurring `check`. Can we reduce the container overhead here by +requiring that resource `check`s be side-effect free and able to run in +parallel? +

+ +

+There may be substantial security implications for this. +

+ +

+This is now done as one big `check` across all spaces, run in a single +container. Resources can choose how to perform this efficiently and safely. +This may mean GraphQL requests or just iterating over local shared state in +series. Even in the worst-case, where no parallelism is involved, it will at +least consume only one container. +

+
+ +
Is `destroy` general enough to be a part of the interface? + +

+It may be the case that most resources cannot easily support `destroy`. One +example is the `git` resource. It doesn't really make sense to `destroy` a +commit. Even if it did (`push -f`?), it's a kind of weird workflow to support +out of the box. +

+ +

+Could we instead just have `put` and ensure that we `check` in such a way that +deleted versions are automatically noticed? What would the overhead of this +be? This only works if the versions are "chained", as with the `git` case. +

+ +

+Decided against introducing `destroy` in favor of having `put` return two sets +for each space: versions created and versions deleted. This generalizes `put` +into an idempotent versioned artifact side effect performer. +

+
+ +
Should `put` be given a space or return the space? + +

+The verb `PUT` in HTTP implies an idempotent action against a given resource. So +it's intuitive that the `put` verb here would do the same. +

+

+However, many of today's usage of `put` would be against a dynamically +determined space. For example, most semver workflows involve `put`ing with the +version determined by a file (often coming from the `semver` resource). So the +space isn't known statically at pipeline configuration time. +

+

+What's more, the resulting space for a semver push would only be `MAJOR.MINOR`, +excluding the final patch segment. This is annoying to have to explicitly +configure in your build. +

+

+If we instead have `put` return both the space and the versions, this would be a +lot simpler. +

+

+Answered this at the same time as having `put` return a set of deleted +versions. It'll return multiple spaces and versions created/deleted for them. +

+
+ + +# Implementation Notes + +## Performance Implications + +Now that we're going to be collecting all versions of every resource, we should +be careful not to be scanning the entire table all the time, and even make an +effort to share data when possible. We have implemented this with +https://github.com/concourse/concourse/issues/2386. diff --git a/001-resources-v2/prs.yml b/001-resources-v2/prs.yml new file mode 100644 index 00000000..64e8aa83 --- /dev/null +++ b/001-resources-v2/prs.yml @@ -0,0 +1,22 @@ +resource_types: +- name: github-pr + type: docker-image + source: {repository: concourse/github-pr-resource} + +resources: +- name: atc-pr + type: github-pr + source: + repository: concourse/atc + access_token: ((token)) + +jobs: +- name: atc-pr-unit + plan: + - get: atc-pr + trigger: true + spaces: all + - task: unit + file: atc/ci/pr.yml + + # TODO: this currently doesn't do any commit status indication diff --git a/001-resources-v2/release-pipeline.yml b/001-resources-v2/release-pipeline.yml new file mode 100644 index 00000000..336cf4ba --- /dev/null +++ b/001-resources-v2/release-pipeline.yml @@ -0,0 +1,58 @@ +# This example shows a subset of the main pipeline for supporting release +# branches. There's quite a bit of duplication, but in principle you may have a +# different set of checks and balances for patch releases. + +--- +resources: +- name: booklit + type: git + source: + uri: https://github.com/vito/booklit + +- name: version + type: semver-git + source: + uri: https://github.com/vito/booklit + +jobs: +- name: unit + plan: + - get: booklit + spaces: [release/*] + trigger: true + - task: unit + file: booklit/ci/test.yml + +- name: rc + plan: + - get: booklit + spaces: [release/*] + passed: [unit] + trigger: true + - put: version + params: {pre: rc, repo: booklit} + +- name: integration + plan: + - get: booklit + spaces: [release/*] + passed: [rc] + trigger: true + - task: integration + file: booklit/ci/integration.yml + +- name: ship + plan: + - get: booklit + spaces: [release/*] + passed: [integration] + trigger: true + - put: version + params: {bump: final, repo: booklit} + +- name: patch + plan: + - get: booklit + spaces: [release/*] + - put: version + params: {bump: patch, pre: rc, repo: booklit} diff --git a/001-resources-v2/s3-example/Gemfile b/001-resources-v2/s3-example/Gemfile new file mode 100644 index 00000000..b64884b3 --- /dev/null +++ b/001-resources-v2/s3-example/Gemfile @@ -0,0 +1,4 @@ +source :rubygems + +gem 'aws-sdk-s3' +gem 'semverly' diff --git a/001-resources-v2/s3-example/Gemfile.lock b/001-resources-v2/s3-example/Gemfile.lock new file mode 100644 index 00000000..1858f0ae --- /dev/null +++ b/001-resources-v2/s3-example/Gemfile.lock @@ -0,0 +1,30 @@ +GEM + remote: http://rubygems.org/ + specs: + aws-eventstream (1.0.1) + aws-partitions (1.106.0) + aws-sdk-core (3.33.0) + aws-eventstream (~> 1.0) + aws-partitions (~> 1.0) + aws-sigv4 (~> 1.0) + jmespath (~> 1.0) + aws-sdk-kms (1.9.0) + aws-sdk-core (~> 3, >= 3.26.0) + aws-sigv4 (~> 1.0) + aws-sdk-s3 (1.21.0) + aws-sdk-core (~> 3, >= 3.26.0) + aws-sdk-kms (~> 1) + aws-sigv4 (~> 1.0) + aws-sigv4 (1.0.3) + jmespath (1.4.0) + semverly (1.0.0) + +PLATFORMS + ruby + +DEPENDENCIES + aws-sdk-s3 + semverly + +BUNDLED WITH + 1.16.2 diff --git a/001-resources-v2/s3-example/artifact b/001-resources-v2/s3-example/artifact new file mode 100755 index 00000000..7705fc84 --- /dev/null +++ b/001-resources-v2/s3-example/artifact @@ -0,0 +1,114 @@ +#!/usr/bin/env ruby + +require 'json' +require 'aws-sdk-s3' +require 'semverly' + +Aws.config.update( + region: 'us-east-1', + credentials: Aws::Credentials.new(ENV['ACCESS_KEY_ID'], + ENV['SECRET_ACCESS_KEY']) +) + +client = Aws::S3::Client.new + +case ARGV[0] +when 'check' + request = { + config: { + bucket: 'concourse-s3-test', + regex: 'concourse-(.*).pivotal' + }, + response_path: '/tmp/response' + } + + bucket = request[:config][:bucket] + prefix = request[:config][:regex].partition('(').first + file = File.new request[:response_path], 'w' + + response = client.list_objects_v2( + bucket: bucket, + prefix: prefix + ) + + versions = [] + + response.contents.each do |object| + matches = object.key.match request[:config][:regex] + continue unless matches + + semver = SemVer.parse(matches[1]) + space = "#{semver.major}.#{semver.minor}" + + versions << { space: space, path: object.key, version: matches[1] } + end + + versions = versions.sort_by do |version| + [version[:space], SemVer.parse(version[:version])] + end + + latest_version = versions.last + if latest_version + file.puts JSON.dump( + default_space: latest_version[:space] + ) + else + file.puts JSON.dump( + default_space: nil + ) + end + + versions.each do |version| + file.puts JSON.dump( + space: version[:space], + version: { path: version[:path] }, + metadata: [] + ) + end + + file.close + +when 'get' + request = { + config: { + bucket: 'concourse-s3-test', + regex: 'concourse-(.*).pivotal' + }, + space: '1.0', + version: { path: 'concourse-1.0.0.pivotal' } + } + + client.get_object( + response_target: request[:version][:path], + bucket: request[:config][:bucket], + key: request[:version][:path] + ) + +when 'put' + request = { + config: { + bucket: 'concourse-s3-test', + regex: 'concourse-(.*).pivotal', + file: 'concourse-3.0.0.pivotal' + }, + response_path: '/tmp/response' + } + + matches = request[:config][:file].match request[:config][:regex] + raise 'file path does not match regex' unless matches + + semver = SemVer.parse(matches[1]) + space = "#{semver.major}.#{semver.minor}" + + File.open(request[:config][:file], 'rb') do |f| + client.put_object(bucket: request[:config][:bucket], + key: request[:config][:file], body: f) + end + + File.open(request[:response_path], 'w') do |f| + f.puts JSON.dump( + space: space, + Created: [{ path: request[:config][:file] }] + ) + end +end diff --git a/001-resources-v2/s3-example/info b/001-resources-v2/s3-example/info new file mode 100755 index 00000000..be0e0398 --- /dev/null +++ b/001-resources-v2/s3-example/info @@ -0,0 +1,12 @@ +#!/usr/bin/env ruby + +require "json" + +puts JSON.dump({ + artifacts: { + api_version: "2.0", + check: "artifact check", + get: "artifact get", + put: "artifact put" + } +}) diff --git a/001-resources-v2/semver-example/.gitignore b/001-resources-v2/semver-example/.gitignore new file mode 100644 index 00000000..bbb00daa --- /dev/null +++ b/001-resources-v2/semver-example/.gitignore @@ -0,0 +1,2 @@ +check-repo +dot diff --git a/001-resources-v2/semver-example/Gemfile b/001-resources-v2/semver-example/Gemfile new file mode 100644 index 00000000..72115298 --- /dev/null +++ b/001-resources-v2/semver-example/Gemfile @@ -0,0 +1,5 @@ +source :rubygems + +gem 'git' +gem 'semantic' +gem 'pry' diff --git a/001-resources-v2/semver-example/Gemfile.lock b/001-resources-v2/semver-example/Gemfile.lock new file mode 100644 index 00000000..44bb5be8 --- /dev/null +++ b/001-resources-v2/semver-example/Gemfile.lock @@ -0,0 +1,21 @@ +GEM + remote: http://rubygems.org/ + specs: + coderay (1.1.2) + git (1.4.0) + method_source (0.9.0) + pry (0.11.3) + coderay (~> 1.1.0) + method_source (~> 0.9.0) + semantic (1.6.1) + +PLATFORMS + ruby + +DEPENDENCIES + git + pry + semantic + +BUNDLED WITH + 1.16.2 diff --git a/001-resources-v2/semver-example/README.md b/001-resources-v2/semver-example/README.md new file mode 100644 index 00000000..cc5b51f8 --- /dev/null +++ b/001-resources-v2/semver-example/README.md @@ -0,0 +1,13 @@ +# Git Resource v2 + +This implementation is done in Ruby using the `git` gem. I chose Ruby +over Bash because having a real language with more accessible data +structures is probably going to be more important with this new +interface, and Ruby feels pretty well suited (really just need a bit more +than Bash). + +Please leave comments on parts you like/don't like! But bear in mind the +goal here isn't necessarily the prettiness of the code, it's to see what +kinds of things the resource has to do. I'll be using Ruby purely as a +scripting language, hacking things together where needed in the interest +of brevity. diff --git a/001-resources-v2/semver-example/artifact b/001-resources-v2/semver-example/artifact new file mode 100755 index 00000000..35d3d2e0 --- /dev/null +++ b/001-resources-v2/semver-example/artifact @@ -0,0 +1,150 @@ +#!/usr/bin/env ruby + +require "json" +require "git" +require "semantic" + +# $request = JSON.parse(STDIN.read, symbolize_names: true) + +def commit_versions(log) + log.collect do |c| + { + version: {ref: c.sha}, + metadata: [ + {name: "author", value: c.author.name}, + {name: "author_date", value: c.author_date}, + {name: "commit", value: c.sha}, + {name: "committer", value: c.committer.name}, + {name: "committer_date", value: c.committer_date}, + {name: "message", value: c.message} + ] + } + end +end + +case ARGV[0] +when "check" + $request = { + config: {uri: "https://github.com/concourse/concourse"}, + from: { + "2.7": {version: "2.7.5"}, + "3.0": {version: "3.0.0"}, + "3.9": {version: "3.9.1"} + } + } + + tags = Git.ls_remote($request[:config][:uri])["tags"] + + versions = [] + tags.each do |tag, _| + next unless tag =~ /^v\d/ + + # TODO: skip non-matches + versions << Semantic::Version.new(tag[1..-1]) + end + + versions.sort! + + from = {} + $request[:from].each do |space, version| + from[space] = Semantic::Version.new(version[:version]) + end + + latest_space = nil + + space_versions = {} + versions.each do |v| + name = "#{v.major}.#{v.minor}" + latest_space = name + next if from[name] && v < from[name] + + space_versions[name] ||= [] + space_versions[name] << { + version: {version: v.to_s}, + metadata: [] + } + end + + spaces = [] + space_versions.each do |space, vs| + spaces << { + space: space, + versions: vs, + has_latest: true + } + end + + puts JSON.dump({ + spaces: spaces, + default_space: latest_space + }) + +when "get" + File.open("version", "w") do |file| + file.write($request[:version][:version]) + end + +when "put" + $request = { + config: { + uri: "https://github.com/concourse/concourse", + file: "version/version", + repo: "concourse" + }, + response: "./response.json" + } + + # TODO: set up auth/etc + repo = Git.open($request[:config][:repo]).fetch("origin", tags: true) + + if file_path = $config[:file] + version = Semantic::Version.new(File.read(file_path)) + else + # get current version from latest tag + latest_tag = repo.describe("HEAD", tags: true, abbrev: 0, match: "v*") + + version = Semantic::Version.new(latest_tag) + + case bump = $config[:bump] + when "final" + version.pre = nil + else + # this ends with a ! but doesn't mutate it... weird. + version = version.increment!(bump) + end + + if pre = $config[:pre] + num = + if version.pre + current_pre, current_num = version.identifiers(version.pre) + if pre == current_pre + current_num + 1 + else + 1 + end + else + 1 + end + + version.pre = "#{pre}.#{num}" + end + end + + space = "#{version.major}.#{version.minor}" + + tag = repo.add_tag("v#{version}", force: true) + + # TODO: set up auth/etc + repo.push("origin", "refs/tags/#{tag.name}") + + response = JSON.dump({ + space => { + created: [{version: tag.name}], + deleted: [] + } + }) + + File.open($request[:response], "w") do |f| + f.write(response) + end +end diff --git a/001-resources-v2/semver-example/info b/001-resources-v2/semver-example/info new file mode 100755 index 00000000..be0e0398 --- /dev/null +++ b/001-resources-v2/semver-example/info @@ -0,0 +1,12 @@ +#!/usr/bin/env ruby + +require "json" + +puts JSON.dump({ + artifacts: { + api_version: "2.0", + check: "artifact check", + get: "artifact get", + put: "artifact put" + } +})