From a37d2e69becfa5ad4f43ee799f8142c61935c120 Mon Sep 17 00:00:00 2001 From: Brian Austin <13002992+brianjaustin@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:39:07 -0500 Subject: [PATCH] AO3-5578 Try to be nicer to the database when copying icons (#5019) * Test listing in s3 * Other tasks & fixes * Experiment with delayed upload * Fixes * Upload after txn * Rubocop things * Fixes * Avoid duplicate attachments * Revert "Avoid duplicate attachments" This reverts commit 476bd029e0e01cfad64d7767262e3352f3bc1d73. --- lib/tasks/after_tasks.rake | 185 +++++++++++++++++++++++++------------ 1 file changed, 126 insertions(+), 59 deletions(-) diff --git a/lib/tasks/after_tasks.rake b/lib/tasks/after_tasks.rake index 762d243cee..597a0dc59f 100644 --- a/lib/tasks/after_tasks.rake +++ b/lib/tasks/after_tasks.rake @@ -325,101 +325,168 @@ namespace :After do desc "Migrate collection icons to ActiveStorage paths" task(migrate_collection_icons: :environment) do + require "aws-sdk-s3" require "open-uri" return unless Rails.env.staging? || Rails.env.production? + bucket_name = ENV["S3_BUCKET"] + prefix = "collections/icons/" + s3 = Aws::S3::Resource.new( + region: ENV["S3_REGION"], + access_key_id: ENV["S3_ACCESS_KEY_ID"], + secret_access_key: ENV["S3_SECRET_ACCESS_KEY"] + ) + old_bucket = s3.bucket(bucket_name) + new_bucket = s3.bucket(ENV["TARGET_BUCKET"]) + Collection.no_touching do - Collection.find_in_batches.with_index do |batch, index| - batch.each do |collection| - next if collection.icon_file_name.blank? - - image = collection.icon_file_name - ext = File.extname(image) - image_original = "original#{ext}" - - # Collection icons are co-mingled in production and staging... - icon_url = "https://s3.amazonaws.com/otw-ao3-icons/collections/icons/#{collection.id}/#{image_original}" - begin - collection.icon.attach(io: URI.parse(icon_url).open, - filename: image_original, - content_type: collection.icon_content_type) - rescue StandardError => e - puts "Error '#{e}' copying #{icon_url}" + old_bucket.objects(prefix: prefix).each do |object| + # Path example: staging/icons/108621/original.png + path_parts = object.key.split("/") + next unless path_parts[-1]&.include?("original") + next if ActiveStorage::Attachment.where(record_type: "Collection", record_id: path_parts[-2]).any? + + collection_id = path_parts[-2] + old_icon = URI.open("https://s3.amazonaws.com/#{bucket_name}/#{object.key}") + checksum = OpenSSL::Digest.new("MD5").tap do |result| + while (chunk = old_icon.read(5.megabytes)) + result << chunk end + old_icon.rewind + end.base64digest + + key = nil + ActiveRecord::Base.transaction do + blob = ActiveStorage::Blob.create_before_direct_upload!( + filename: path_parts[-1], + byte_size: old_icon.size, + checksum: checksum, + content_type: Marcel::MimeType.for(old_icon) + ) + key = blob.key + blob.attachments.create( + name: "icon", + record_type: "Collection", + record_id: collection_id + ) end - puts "Finished batch #{index + 1}" && $stdout.flush - sleep 10 + new_bucket.put_object(key: key, body: old_icon, acl: "bucket-owner-full-control") + puts "Finished collection #{collection_id}" + $stdout.flush end end end desc "Migrate pseud icons to ActiveStorage paths" task(migrate_pseud_icons: :environment) do + require "aws-sdk-s3" require "open-uri" return unless Rails.env.staging? || Rails.env.production? + bucket_name = ENV["S3_BUCKET"] + prefix = Rails.env.production? ? "icons/" : "staging/icons/" + s3 = Aws::S3::Resource.new( + region: ENV["S3_REGION"], + access_key_id: ENV["S3_ACCESS_KEY_ID"], + secret_access_key: ENV["S3_SECRET_ACCESS_KEY"] + ) + old_bucket = s3.bucket(bucket_name) + new_bucket = s3.bucket(ENV["TARGET_BUCKET"]) + Pseud.no_touching do - Pseud.find_in_batches.with_index do |batch, index| - ActiveRecord::Base.transaction do - batch.each do |pseud| - next if pseud.icon_file_name.blank? - - image = pseud.icon_file_name - ext = File.extname(image) - image_original = "original#{ext}" - - icon_url = if Rails.env.production? - "https://s3.amazonaws.com/otw-ao3-icons/icons/#{pseud.id}/#{image_original}" - else - "https://s3.amazonaws.com/otw-ao3-icons/staging/icons/#{pseud.id}/#{image_original}" - end - begin - pseud.icon.attach(io: URI.parse(icon_url).open, - filename: image_original, - content_type: pseud.icon_content_type) - rescue StandardError => e - puts "Error '#{e}' copying #{icon_url}" - end + old_bucket.objects(prefix: prefix).each do |object| + # Path example: staging/icons/108621/original.png + path_parts = object.key.split("/") + next unless path_parts[-1]&.include?("original") + next if ActiveStorage::Attachment.where(record_type: "Pseud", record_id: path_parts[-2]).any? + + pseud_id = path_parts[-2] + old_icon = URI.open("https://s3.amazonaws.com/#{bucket_name}/#{object.key}") + checksum = OpenSSL::Digest.new("MD5").tap do |result| + while (chunk = old_icon.read(5.megabytes)) + result << chunk end + old_icon.rewind + end.base64digest + + key = nil + ActiveRecord::Base.transaction do + blob = ActiveStorage::Blob.create_before_direct_upload!( + filename: path_parts[-1], + byte_size: old_icon.size, + checksum: checksum, + content_type: Marcel::MimeType.for(old_icon) + ) + key = blob.key + blob.attachments.create( + name: "icon", + record_type: "Pseud", + record_id: pseud_id + ) end - puts "Finished batch #{index + 1}" && $stdout.flush - sleep 10 + new_bucket.put_object(key: key, body: old_icon, acl: "bucket-owner-full-control") + puts "Finished pseud #{pseud_id}" + $stdout.flush end end end desc "Migrate skin icons to ActiveStorage paths" task(migrate_skin_icons: :environment) do + require "aws-sdk-s3" require "open-uri" return unless Rails.env.staging? || Rails.env.production? + bucket_name = ENV["S3_BUCKET"] + prefix = "skins/icons/" + s3 = Aws::S3::Resource.new( + region: ENV["S3_REGION"], + access_key_id: ENV["S3_ACCESS_KEY_ID"], + secret_access_key: ENV["S3_SECRET_ACCESS_KEY"] + ) + old_bucket = s3.bucket(bucket_name) + new_bucket = s3.bucket(ENV["TARGET_BUCKET"]) + Skin.no_touching do - Skin.find_in_batches.with_index do |batch, index| - batch.each do |skin| - next if skin.icon_file_name.blank? - - image = skin.icon_file_name - ext = File.extname(image) - image_original = "original#{ext}" - - # Skin icons are co-mingled in production and staging... - icon_url = "https://s3.amazonaws.com/otw-ao3-icons/skins/icons/#{skin.id}/#{image_original}" - begin - skin.icon.attach(io: URI.parse(icon_url).open, - filename: image_original, - content_type: skin.icon_content_type) - rescue StandardError => e - puts "Error '#{e}' copying #{icon_url}" + old_bucket.objects(prefix: prefix).each do |object| + # Path example: staging/icons/108621/original.png + path_parts = object.key.split("/") + next unless path_parts[-1]&.include?("original") + next if ActiveStorage::Attachment.where(record_type: "Skin", record_id: path_parts[-2]).any? + + skin_id = path_parts[-2] + old_icon = URI.open("https://s3.amazonaws.com/#{bucket_name}/#{object.key}") + checksum = OpenSSL::Digest.new("MD5").tap do |result| + while (chunk = old_icon.read(5.megabytes)) + result << chunk end + old_icon.rewind + end.base64digest + + key = nil + ActiveRecord::Base.transaction do + blob = ActiveStorage::Blob.create_before_direct_upload!( + filename: path_parts[-1], + byte_size: old_icon.size, + checksum: checksum, + content_type: Marcel::MimeType.for(old_icon) + ) + key = blob.key + blob.attachments.create( + name: "icon", + record_type: "Skin", + record_id: skin_id + ) end - puts "Finished batch #{index + 1}" && $stdout.flush - sleep 10 + new_bucket.put_object(key: key, body: old_icon, acl: "bucket-owner-full-control") + puts "Finished skin #{skin_id}" + $stdout.flush end end end