From b97636183d0ed3c93ddae421995b110b02531d4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= Date: Tue, 17 Apr 2018 16:26:32 +0200 Subject: [PATCH] sync: do not remove repositories on some errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit is more explicit on what to do when a catalog error happens. In this case, if a plain catalog error happens (fetching the list of repositories and their manifests), then nothing will be done. If fetching the catalog worked, and fetching the list of tags then suddenly fails, before this commit that repo would've get nuked because it would've get a repository with no tags. This commit instructs to skip that repository when considering whether to remove or not some repositories that are found to be dangling. See #1293 See #1599 Signed-off-by: Miquel Sabaté Solà --- app/models/repository.rb | 24 ++++++++++----- lib/portus/background/sync.rb | 26 +++++++++++----- lib/portus/registry_client.rb | 39 ++++++++++++++--------- spec/lib/portus/background/sync_spec.rb | 41 ++++++++++++++++++++++++- spec/lib/portus/registry_client_spec.rb | 30 +++++++++++++++++- 5 files changed, 129 insertions(+), 31 deletions(-) diff --git a/app/models/repository.rb b/app/models/repository.rb index 9c03dcf87..000206aad 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -151,6 +151,20 @@ def self.id_and_digest_from_event(event, repo) [id, digest] end + # Returns the repository for the given full repository name. If it cannot be + # found, then it returns nil. + def self.from_catalog(name, create_if_missing = false) + # If the namespace does not exist, get out. + namespace, name = Namespace.get_from_name(name) + return if namespace.nil? + + if create_if_missing + Repository.find_or_create_by!(name: name, namespace: namespace) + else + Repository.find_by(name: name, namespace: namespace) + end + end + # Create or update the given repository in JSON format. The given repository # follows the same JSON format as in the one used by the Catalog API. # Therefore, it's a hash with two keys: @@ -164,17 +178,13 @@ def self.id_and_digest_from_event(event, repo) # # Returns the final repository object. def self.create_or_update!(repo) - # If the namespace does not exist, get out. - namespace, name = Namespace.get_from_name(repo["name"]) - return if namespace.nil? + repository = Repository.from_catalog(repo["name"], true) + return unless repository + tags = repository.tags.pluck(:name) # The portus user is the author for the created tags. portus = User.find_by(username: "portus") - # Add the needed tags. - repository = Repository.find_or_create_by!(name: name, namespace: namespace) - tags = repository.tags.pluck(:name) - to_be_deleted_tags = tags - repo["tags"] client = Registry.get.client diff --git a/lib/portus/background/sync.rb b/lib/portus/background/sync.rb index 6c47fc924..c97be22fe 100644 --- a/lib/portus/background/sync.rb +++ b/lib/portus/background/sync.rb @@ -50,6 +50,14 @@ def enabled? # Simply fetches the catalog from the registry and calls # `update_registry!`. + # + # If an exception was raised when fetching the catalog (e.g. timeout), + # then it will handle it by logging such exception. Moreover, sometimes + # the given catalog object contains nil values for the `tags` field. This + # happens when there were exceptions being raised when fetching the list + # of tags for that particular repository. In these cases the + # `update_registry!` method will never remove them to avoid false + # positives. def execute! ::Registry.find_each do |registry| cat = registry.client.catalog @@ -89,19 +97,23 @@ def to_s protected # This method updates the database of this application with the given - # registry contents. + # registry contents. If a repository had a blank `tags` field, then this + # method will not removed (because maybe the image is still uploading, or + # there was a temporary error when fetching the tags). def update_registry!(catalog) dangling_repos = Repository.all.pluck(:id) # In this loop we will create/update all the repos from the catalog. # Created/updated repos will be removed from the "repos" array. catalog.each do |r| - if r["tags"].blank? - Rails.logger.debug "skip upload not finished repo #{r["name"]}" - else - repository = Repository.create_or_update!(r) - dangling_repos.delete repository.id unless repository.nil? - end + repository = if r["tags"].blank? + Rails.logger.debug "skipping repo with no tags: #{r["name"]}" + Repository.from_catalog(r["name"]) + else + Repository.create_or_update!(r) + end + + dangling_repos.delete repository.id unless repository.nil? end # At this point, the remaining items in the "repos" array are repos that diff --git a/lib/portus/registry_client.rb b/lib/portus/registry_client.rb index 7a05432b2..f25e1f2e7 100644 --- a/lib/portus/registry_client.rb +++ b/lib/portus/registry_client.rb @@ -85,9 +85,16 @@ def manifest(repository, tag = "latest") # corresponding tags. If something goes wrong while fetching the repos from # the catalog (e.g. authorization error), it will raise an exception. # - # Returns an array of hashes which contain two keys: - # - name: a string containing the name of the repository. - # - tags: an array containing the available tags for the repository. + # Returns an array of hashes where each hash contains a `name` and a `tags` + # field. The given repository name is fully qualified and the `tags` field + # simply contains an array of strings for each tag. + # + # The list of tags for each repository is taken by calling `#tags`, and it + # handles the exceptions that might be raised. If an exception was raised + # when fetching the tags (e.g. a timeout), then it will set the `tags` field + # of the currently evaluated repository to nil. This is done this way + # because setting an empty value would be ambiguous, and leaving exception + # handling to upper layers might be confusing. # # Three different exceptions might be raised: # @@ -185,24 +192,26 @@ def fetch_link(header) link.strip[1, link.size - 2] end - # Adds the available tags for each of the given repositories. If there is a - # problem while fetching a repository's tag, it will return an empty array. - # Otherwise it will return an array with the results as specified in the - # documentation of the `catalog` method. + # Adds the available tags for each of the given repositories. If the given + # repository object is nil, then it returns an empty array. # - # It rescues the exceptions that might be raised by `#tags`, so if a fetch - # fails for a particular repository, this method tries to fetch the tags for - # other methods. + # The returned object on success (or partial success) is explained in the + # `#catalog` method. def add_tags(repositories) return [] if repositories.nil? result = [] repositories.each do |repo| - ts = tags(repo) - result << { "name" => repo, "tags" => ts } if ts.present? - rescue ::Portus::RequestError, ::Portus::Errors::NotFoundError, - ::Portus::RegistryClient::RegistryError => e - Rails.logger.debug "Could not get tags for repo: #{repo}: #{e.message}." + ts = nil + + begin + ts = tags(repo) + rescue ::Portus::RequestError, ::Portus::Errors::NotFoundError, + ::Portus::RegistryClient::RegistryError => e + Rails.logger.debug "Could not get tags for repo: #{repo}: #{e.message}." + end + + result << { "name" => repo, "tags" => ts } end result end diff --git a/spec/lib/portus/background/sync_spec.rb b/spec/lib/portus/background/sync_spec.rb index 3f2eda23e..a15ea71c6 100644 --- a/spec/lib/portus/background/sync_spec.rb +++ b/spec/lib/portus/background/sync_spec.rb @@ -165,7 +165,7 @@ def update_registry!(catalog) end describe "Database already filled with repos" do - let!(:registry) { create(:registry) } + let!(:registry) { create(:registry, "hostname" => "registry.test.lan") } let!(:owner) { create(:user) } let!(:namespace) { create(:namespace, registry: registry) } let!(:repo1) { create(:repository, name: "repo1", namespace: namespace) } @@ -227,6 +227,45 @@ def update_registry!(catalog) tags = Repository.find_by(name: "repo2").tags expect(tags.map(&:name)).to match_array(%w[tag2 tag3]) end + + it "does not remove a repository with nil tags on update-delete" do + APP_CONFIG["background"]["sync"]["strategy"] = "update-delete" + + allow_any_instance_of(::Portus::RegistryClient).to receive(:manifest).and_return(["", ""]) + + # repo2 is not removed because it exists and the tags is nil. This + # happens when fetching tags raised a handled exception. + sync = SyncMock.new + sync.update_registry!([{ "name" => "busybox", "tags" => ["latest", "0.1"] }, + { "name" => "#{namespace.name}/repo1", "tags" => ["latest"] }, + { "name" => "#{namespace.name}/repo2", "tags" => nil }]) + + r = Repository.find_by(name: "repo2") + expect(r.tags.size).to eq 2 + end + + it "does not remove a repository when its tags raised an exception" do + APP_CONFIG["background"]["sync"]["strategy"] = "update-delete" + + VCR.turn_on! + + allow_any_instance_of(::Portus::RegistryClient).to receive(:manifest).and_return(["", ""]) + allow_any_instance_of(::Portus::RegistryClient).to receive(:tags) do + raise ::Portus::Errors::NotFoundError, "I AM ERROR" + end + + ns = Namespace.where(global: true).first + busybox = create(:repository, name: "busybox", namespace: ns) + create(:tag, name: "doesnotexist", repository: busybox) + + VCR.use_cassette("registry/get_registry_catalog", record: :none) do + sync = SyncMock.new + sync.execute! + end + + expect(Repository.count).to eq 1 + expect(Repository.first.tags.size).to eq 1 + end end describe "uploading repository whose tags is nil" do diff --git a/spec/lib/portus/registry_client_spec.rb b/spec/lib/portus/registry_client_spec.rb index b89f173c7..b98571691 100644 --- a/spec/lib/portus/registry_client_spec.rb +++ b/spec/lib/portus/registry_client_spec.rb @@ -297,9 +297,14 @@ def fetch_link_test(header) ) catalog = registry.catalog - expect(catalog.length).to be 1 + expect(catalog.length).to be 2 expect(catalog[0]["name"]).to eq "busybox" expect(catalog[0]["tags"]).to match_array(["latest"]) + + # The second repo failed at fetching tags. Thus, it will be set to nil + # to reflect this situation. + expect(catalog[1]["name"]).to eq "another" + expect(catalog[1]["tags"]).to be_nil end end @@ -347,6 +352,29 @@ def fetch_link_test(header) link = "; rel=\"next\"" expect(registry.fetch_link_test(link)).to eq "v2/_catalog?last=mssola%2Fbusybox89&n=100" end + + it "returns nil for tags when an exception happened" do + create(:registry) + create(:admin, username: "portus") + + allow_any_instance_of(::Portus::RegistryClient).to receive(:tags) do + raise ::Portus::Errors::NotFoundError, "I AM ERROR" + end + + VCR.use_cassette("registry/get_registry_catalog", record: :none) do + registry = described_class.new( + registry_server, + false, + "portus", + Rails.application.secrets.portus_password + ) + + catalog = registry.catalog + expect(catalog.length).to be 1 + expect(catalog[0]["name"]).to eq "busybox" + expect(catalog[0]["tags"]).to be_nil + end + end end context "fetching lists from the catalog" do