Skip to content
This repository has been archived by the owner on Apr 17, 2023. It is now read-only.

Commit

Permalink
sync: do not remove repositories on some errors
Browse files Browse the repository at this point in the history
This commit is more explicit on what to do when a catalog error happens.
In this case, if a plain catalog error happens (fetching the list of
repositories and their manifests), then nothing will be done. If
fetching the catalog worked, and fetching the list of tags then suddenly
fails, before this commit that repo would've get nuked because it
would've get a repository with no tags.

This commit instructs to skip that repository when considering whether
to remove or not some repositories that are found to be dangling.

See #1293
See #1599

Signed-off-by: Miquel Sabaté Solà <[email protected]>
  • Loading branch information
mssola committed Apr 18, 2018
1 parent 79bf527 commit b976361
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 31 deletions.
24 changes: 17 additions & 7 deletions app/models/repository.rb
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,20 @@ def self.id_and_digest_from_event(event, repo)
[id, digest]
end

# Returns the repository for the given full repository name. If it cannot be
# found, then it returns nil.
def self.from_catalog(name, create_if_missing = false)
# If the namespace does not exist, get out.
namespace, name = Namespace.get_from_name(name)
return if namespace.nil?

if create_if_missing
Repository.find_or_create_by!(name: name, namespace: namespace)
else
Repository.find_by(name: name, namespace: namespace)
end
end

# Create or update the given repository in JSON format. The given repository
# follows the same JSON format as in the one used by the Catalog API.
# Therefore, it's a hash with two keys:
Expand All @@ -164,17 +178,13 @@ def self.id_and_digest_from_event(event, repo)
#
# Returns the final repository object.
def self.create_or_update!(repo)
# If the namespace does not exist, get out.
namespace, name = Namespace.get_from_name(repo["name"])
return if namespace.nil?
repository = Repository.from_catalog(repo["name"], true)
return unless repository
tags = repository.tags.pluck(:name)

# The portus user is the author for the created tags.
portus = User.find_by(username: "portus")

# Add the needed tags.
repository = Repository.find_or_create_by!(name: name, namespace: namespace)
tags = repository.tags.pluck(:name)

to_be_deleted_tags = tags - repo["tags"]

client = Registry.get.client
Expand Down
26 changes: 19 additions & 7 deletions lib/portus/background/sync.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ def enabled?

# Simply fetches the catalog from the registry and calls
# `update_registry!`.
#
# If an exception was raised when fetching the catalog (e.g. timeout),
# then it will handle it by logging such exception. Moreover, sometimes
# the given catalog object contains nil values for the `tags` field. This
# happens when there were exceptions being raised when fetching the list
# of tags for that particular repository. In these cases the
# `update_registry!` method will never remove them to avoid false
# positives.
def execute!
::Registry.find_each do |registry|
cat = registry.client.catalog
Expand Down Expand Up @@ -89,19 +97,23 @@ def to_s
protected

# This method updates the database of this application with the given
# registry contents.
# registry contents. If a repository had a blank `tags` field, then this
# method will not removed (because maybe the image is still uploading, or
# there was a temporary error when fetching the tags).
def update_registry!(catalog)
dangling_repos = Repository.all.pluck(:id)

# In this loop we will create/update all the repos from the catalog.
# Created/updated repos will be removed from the "repos" array.
catalog.each do |r|
if r["tags"].blank?
Rails.logger.debug "skip upload not finished repo #{r["name"]}"
else
repository = Repository.create_or_update!(r)
dangling_repos.delete repository.id unless repository.nil?
end
repository = if r["tags"].blank?
Rails.logger.debug "skipping repo with no tags: #{r["name"]}"
Repository.from_catalog(r["name"])
else
Repository.create_or_update!(r)
end

dangling_repos.delete repository.id unless repository.nil?
end

# At this point, the remaining items in the "repos" array are repos that
Expand Down
39 changes: 24 additions & 15 deletions lib/portus/registry_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,16 @@ def manifest(repository, tag = "latest")
# corresponding tags. If something goes wrong while fetching the repos from
# the catalog (e.g. authorization error), it will raise an exception.
#
# Returns an array of hashes which contain two keys:
# - name: a string containing the name of the repository.
# - tags: an array containing the available tags for the repository.
# Returns an array of hashes where each hash contains a `name` and a `tags`
# field. The given repository name is fully qualified and the `tags` field
# simply contains an array of strings for each tag.
#
# The list of tags for each repository is taken by calling `#tags`, and it
# handles the exceptions that might be raised. If an exception was raised
# when fetching the tags (e.g. a timeout), then it will set the `tags` field
# of the currently evaluated repository to nil. This is done this way
# because setting an empty value would be ambiguous, and leaving exception
# handling to upper layers might be confusing.
#
# Three different exceptions might be raised:
#
Expand Down Expand Up @@ -185,24 +192,26 @@ def fetch_link(header)
link.strip[1, link.size - 2]
end

# Adds the available tags for each of the given repositories. If there is a
# problem while fetching a repository's tag, it will return an empty array.
# Otherwise it will return an array with the results as specified in the
# documentation of the `catalog` method.
# Adds the available tags for each of the given repositories. If the given
# repository object is nil, then it returns an empty array.
#
# It rescues the exceptions that might be raised by `#tags`, so if a fetch
# fails for a particular repository, this method tries to fetch the tags for
# other methods.
# The returned object on success (or partial success) is explained in the
# `#catalog` method.
def add_tags(repositories)
return [] if repositories.nil?

result = []
repositories.each do |repo|
ts = tags(repo)
result << { "name" => repo, "tags" => ts } if ts.present?
rescue ::Portus::RequestError, ::Portus::Errors::NotFoundError,
::Portus::RegistryClient::RegistryError => e
Rails.logger.debug "Could not get tags for repo: #{repo}: #{e.message}."
ts = nil

begin
ts = tags(repo)
rescue ::Portus::RequestError, ::Portus::Errors::NotFoundError,
::Portus::RegistryClient::RegistryError => e
Rails.logger.debug "Could not get tags for repo: #{repo}: #{e.message}."
end

result << { "name" => repo, "tags" => ts }
end
result
end
Expand Down
41 changes: 40 additions & 1 deletion spec/lib/portus/background/sync_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def update_registry!(catalog)
end

describe "Database already filled with repos" do
let!(:registry) { create(:registry) }
let!(:registry) { create(:registry, "hostname" => "registry.test.lan") }
let!(:owner) { create(:user) }
let!(:namespace) { create(:namespace, registry: registry) }
let!(:repo1) { create(:repository, name: "repo1", namespace: namespace) }
Expand Down Expand Up @@ -227,6 +227,45 @@ def update_registry!(catalog)
tags = Repository.find_by(name: "repo2").tags
expect(tags.map(&:name)).to match_array(%w[tag2 tag3])
end

it "does not remove a repository with nil tags on update-delete" do
APP_CONFIG["background"]["sync"]["strategy"] = "update-delete"

allow_any_instance_of(::Portus::RegistryClient).to receive(:manifest).and_return(["", ""])

# repo2 is not removed because it exists and the tags is nil. This
# happens when fetching tags raised a handled exception.
sync = SyncMock.new
sync.update_registry!([{ "name" => "busybox", "tags" => ["latest", "0.1"] },
{ "name" => "#{namespace.name}/repo1", "tags" => ["latest"] },
{ "name" => "#{namespace.name}/repo2", "tags" => nil }])

r = Repository.find_by(name: "repo2")
expect(r.tags.size).to eq 2
end

it "does not remove a repository when its tags raised an exception" do
APP_CONFIG["background"]["sync"]["strategy"] = "update-delete"

VCR.turn_on!

allow_any_instance_of(::Portus::RegistryClient).to receive(:manifest).and_return(["", ""])
allow_any_instance_of(::Portus::RegistryClient).to receive(:tags) do
raise ::Portus::Errors::NotFoundError, "I AM ERROR"
end

ns = Namespace.where(global: true).first
busybox = create(:repository, name: "busybox", namespace: ns)
create(:tag, name: "doesnotexist", repository: busybox)

VCR.use_cassette("registry/get_registry_catalog", record: :none) do
sync = SyncMock.new
sync.execute!
end

expect(Repository.count).to eq 1
expect(Repository.first.tags.size).to eq 1
end
end

describe "uploading repository whose tags is nil" do
Expand Down
30 changes: 29 additions & 1 deletion spec/lib/portus/registry_client_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,14 @@ def fetch_link_test(header)
)

catalog = registry.catalog
expect(catalog.length).to be 1
expect(catalog.length).to be 2
expect(catalog[0]["name"]).to eq "busybox"
expect(catalog[0]["tags"]).to match_array(["latest"])

# The second repo failed at fetching tags. Thus, it will be set to nil
# to reflect this situation.
expect(catalog[1]["name"]).to eq "another"
expect(catalog[1]["tags"]).to be_nil
end
end

Expand Down Expand Up @@ -347,6 +352,29 @@ def fetch_link_test(header)
link = "<v2/_catalog?last=mssola%2Fbusybox89&n=100>; rel=\"next\""
expect(registry.fetch_link_test(link)).to eq "v2/_catalog?last=mssola%2Fbusybox89&n=100"
end

it "returns nil for tags when an exception happened" do
create(:registry)
create(:admin, username: "portus")

allow_any_instance_of(::Portus::RegistryClient).to receive(:tags) do
raise ::Portus::Errors::NotFoundError, "I AM ERROR"
end

VCR.use_cassette("registry/get_registry_catalog", record: :none) do
registry = described_class.new(
registry_server,
false,
"portus",
Rails.application.secrets.portus_password
)

catalog = registry.catalog
expect(catalog.length).to be 1
expect(catalog[0]["name"]).to eq "busybox"
expect(catalog[0]["tags"]).to be_nil
end
end
end

context "fetching lists from the catalog" do
Expand Down

0 comments on commit b976361

Please sign in to comment.