diff --git a/app/services/decidim/repair_comments_service.rb b/app/services/decidim/repair_comments_service.rb new file mode 100644 index 00000000..284f2ffa --- /dev/null +++ b/app/services/decidim/repair_comments_service.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Decidim + class RepairCommentsService + include Decidim::TranslatableAttributes + + def self.run + new.execute + end + + def execute + return [] if ok? + + update_comments! + end + + def ok? + invalid_comments.empty? + end + + def invalid_comments + return @invalid_comments if @invalid_comments + + invalid_comments = [] + Decidim::Comments::Comment.find_each do |comment| + next if translated_attribute(comment.body).is_a?(String) + + invalid_comments << [comment, comment.body.values.first] + end + @invalid_comments = invalid_comments + end + + private + + # Update each users with new nickname + # Returns Array of updated User ID + def update_comments! + invalid_comments.map do |comment, new_body| + comment.body = new_body + + comment.id if comment.save!(validate: false) # Validation is skipped to allow updating comments from root that don't accepts new comments + end.compact + end + end +end diff --git a/app/services/decidim/repair_url_in_content_service.rb b/app/services/decidim/repair_url_in_content_service.rb new file mode 100644 index 00000000..f13161ac --- /dev/null +++ b/app/services/decidim/repair_url_in_content_service.rb @@ -0,0 +1,156 @@ +# frozen_string_literal: true + +require "decidim/content_fixer" + +module Decidim + # Looks for any occurence of "@deprecated_endpoint" in every database columns of type COLUMN_TYPES + # For each field containing @deprecated_endpoint: + # - Looks for the current ActiveStorage::Blob with the given filename + # - Find the blob's service_url + # - Replace the @deprecated_endpoint with the blob's service_url in text + # - Update the column + # Context: + # After S3 assets migration with rake task "bundle exec rake scaleway:storage:migrate_from_local", every linked documents URL were well updated. + # However every links added to text fields redirecting to an uploaded file were outdated and still redirects to the old S3 bucket + class RepairUrlInContentService + COLUMN_TYPES = [:string, :jsonb, :text].freeze + + # @param [String] deprecated_endpoint + # @param [Logger] logger + def self.run(deprecated_endpoint, logger = nil) + new(deprecated_endpoint, logger).run + end + + # @param [String] deprecated_endpoint + # @param [Logger] logger + def initialize(deprecated_endpoint, logger = nil) + @logger = logger || Rails.logger + @deprecated_endpoint = deprecated_endpoint&.gsub(%r{https?://}, "") + end + + def run + # Find all models that have a column of type string jsonb or text + # For each model, find all records that have a column of type string jsonb or text + # For each record, replace all urls contained in content with the new url + # Save the record + return false if @deprecated_endpoint.blank? + + models.each do |model| + next unless model.respond_to?(:columns) + + @logger.info("Checking model #{model} for deprecated endpoints #{@deprecated_endpoint}") + records = records_for model + next if records.blank? + + @logger.info "Found #{records.count} records to update for #{model}" + records.each do |record| + columns = model.columns.select { |column| column.type.in? COLUMN_TYPES } + record = update_each_column(record, columns) + + save_record!(record) + end + end + end + + def save_record!(record) + if record.invalid? + @logger.warn "Invalid record #{record.class}##{record.id}: #{record.errors.full_messages.join(", ")}" + return + end + + if record.has_changes_to_save? + record.class.transaction do + record.save! + end + else + @logger.info "No changes to save for #{record.class}##{record.id}" + end + end + + # In some cases, the column returns a settings object, + # therefore we need to update each of its attributes before saving the column + # @param [Object] record + # @param [[ActiveRecord::ConnectionAdapters::PostgreSQL::Column]] columns + # @return record | nil + def update_each_column(record, columns) + columns.each do |column| + current_content = current_content_for(record, column) + next if current_content.blank? + + column_name = column.try(:name) ? column.name : column + + @logger.info "Updating ##{[record.class, record.try(:id), column_name].compact.join("# ")}" + + if current_content.is_a?(Hash) || current_content.is_a?(Array) || current_content.is_a?(String) + next unless current_content.to_s.include?(@deprecated_endpoint) + + new_content = Decidim::ContentFixer.repair(current_content, @deprecated_endpoint, @logger) + + @logger.info "Old content: #{current_content}" + @logger.info "New content: #{new_content}" + + write_attribute(record, column, new_content) + else + # If the column is a settings object, we need to update each of its attributes using a recursive call + write_attribute(record, column, update_each_column(current_content, current_content.instance_variables)) + end + end + + record + end + + def write_attribute(record, column, value) + if column.try(:name) + record.write_attribute(:"#{column.name}", value) + else + record.instance_variable_set(column, value) + end + end + + def current_content_for(record, column) + if column.try(:name) + record.send(column.name) + else + record.instance_variable_get(column) + end + end + + def records_for(model) + model.columns.map do |col| + next unless col.type.in?(COLUMN_TYPES) + + model.where("#{col.name}::text LIKE ?", "%#{@deprecated_endpoint}%") + end.compact.reduce(&:or) + rescue StandardError => e + @logger.warn "Error while fetching records from #{model}: #{e.message}" + [] + end + + def models + ActiveRecord::Base.connection.tables.map do |table| + next unless table.starts_with?("decidim_") + + classify_model(table) + end.compact + end + + # Because of the way decidim models are named, we need to try to find the model by subbing _ with / and then classify it + # For example "decidim_comments_comments" becomes "Decidim::CommentsComment", then "Decidim::Comments::Comment" + # This helps us find models that are namespaced + # @param [String] table + def classify_model(table) + if table.include?("_") + new_table = table.sub("_", "/") + model = new_table.classify.safe_constantize + + return model if model + + classify_model(new_table) + else + @logger.warn "Could not find model for table #{table}" + + nil + end + end + end +end diff --git a/lib/decidim/content_fixer.rb b/lib/decidim/content_fixer.rb new file mode 100644 index 00000000..0614f8b3 --- /dev/null +++ b/lib/decidim/content_fixer.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +module Decidim + class ContentFixer + TAGS_TO_FIX = %w(a img).freeze + + def initialize(content, deprecated_endpoint, logger) + @content = content + @logger = logger + @deprecated_endpoint = deprecated_endpoint + end + + def self.repair(content, deprecated_endpoint, logger) + new(content, deprecated_endpoint, logger).repair + end + + def repair + case @content + when Hash + @content.transform_values do |value| + find_and_replace(value) + end + when String + find_and_replace(@content) + when Array + @content.map do |value| + find_and_replace(value) + end + else + @logger.warn("Unsupported type #{@content.class}") + + nil + end + end + + def find_and_replace(content) + return content unless content.is_a?(String) && content.include?(@deprecated_endpoint) + + wrapper = nokogiri_will_wrap_with_p?(content) ? "p" : "body" + + doc = Nokogiri::HTML(content) + + TAGS_TO_FIX.each do |tag| + replace_urls(doc, tag) + end + + doc.css(wrapper).inner_html + end + + def blobs + @blobs ||= ActiveStorage::Blob.pluck(:filename, :id) + end + + def replace_urls(doc, tag) + attribute = tag == "img" ? "src" : "href" + + doc.css(tag).each do |source| + next unless source[attribute].include?(@deprecated_endpoint) + + new_source = new_source(source[attribute]) + + next unless new_source + + @logger.info "Replacing #{source[attribute]} with #{new_source}" + source[attribute] = new_source + end + end + + def new_source(source) + uri = URI.parse(source) + filename = if source.include?("response-content-disposition") + CGI.parse(uri.query)["response-content-disposition"].first.match(/filename=("?)(.+)\1/)[2] + else + uri.path.split("/").last + end + _filename, id = blobs.select { |blob, _id| ActiveSupport::Inflector.transliterate(blob) == filename }.first + + find_service_url_for_blob(id) + rescue URI::InvalidURIError + @logger.warn "Invalid URI for #{source}" + nil + end + + def find_service_url_for_blob(blob_id) + Rails.application.routes.url_helpers.rails_blob_path(ActiveStorage::Blob.find(blob_id), only_path: true) + rescue ActiveRecord::RecordNotFound + @logger.warn "Blob #{blob_id} not found" + nil + end + + def nokogiri_will_wrap_with_p?(content) + !content.start_with?("<") + end + end +end diff --git a/lib/tasks/repair_data.rake b/lib/tasks/repair_data.rake index b6f26372..9d0a527a 100644 --- a/lib/tasks/repair_data.rake +++ b/lib/tasks/repair_data.rake @@ -18,5 +18,37 @@ namespace :decidim do logger.info("Operation terminated") end + + desc "Check for malformed comments body and repair them if needed" + task comments: :environment do + logger = Logger.new($stdout) + logger.info("Checking all comments...") + + updated_comments_ids = Decidim::RepairCommentsService.run + + if updated_comments_ids.blank? + logger.info("No comments updated") + else + logger.info("#{updated_comments_ids} comments updated") + logger.info("Updated comments ID : #{updated_comments_ids.join(",")}") + end + + logger.info("Operation terminated") + end + + desc "Add all missing translation for translatable resources" + + task url_in_content: :environment do + logger = Logger.new($stdout) + deprecated_hosts = ENV["DEPRECATED_OBJECTSTORE_S3_HOSTS"].to_s.split(",").map(&:strip) + + if deprecated_hosts.blank? + logger.warn("DEPRECATED_OBJECTSTORE_S3_HOSTS env variable is not set") + else + deprecated_hosts.each do |host| + Decidim::RepairUrlInContentService.run(host, logger) + end + end + end end end diff --git a/spec/lib/decidim/content_fixer_spec.rb b/spec/lib/decidim/content_fixer_spec.rb new file mode 100644 index 00000000..3f5e7147 --- /dev/null +++ b/spec/lib/decidim/content_fixer_spec.rb @@ -0,0 +1,161 @@ +# frozen_string_literal: true + +require "spec_helper" + +require "decidim/content_fixer" + +describe Decidim::ContentFixer do + subject { described_class.new(content, deprecated_endpoint, logger) } + + let(:logger) { Rails.logger } + let(:deprecated_endpoint) { "s3.decidim.org" } + let(:invalid_resource) { create(:comment, body: invalid_body_comment) } + let(:invalid_body_comment) { { en: "
Here is a not valid comment with Link text
" } } + let(:content) { "Here is a not valid comment with Link text
" } + let(:deprecated_url) { "https://#{deprecated_endpoint}/xxxx?response-content-disposition=inline%3Bfilename%3D\"BuPa23_reglement-interieur.pdf\"%3Bfilename*%3DUTF-8''BuPa23_r%25C3%25A8glement-int%25C3%25A9rieur.pdf&response-content-type=application%2Fpdf" } + let!(:blob) { ActiveStorage::Blob.create_after_upload!(filename: "BuPa23_reglement-interieur.pdf", io: File.open("spec/fixtures/BuPa23_reglement-interieur.pdf"), content_type: "application/pdf") } + let(:blob_path) { Rails.application.routes.url_helpers.rails_blob_path(ActiveStorage::Blob.find(blob.id), only_path: true) } + + describe "#repair" do + it "returns the repaired content" do + replaced_content = subject.repair + + expect(replaced_content).to include(blob_path) + expect(replaced_content).not_to include(deprecated_endpoint) + end + + context "when content is a hash" do + let(:content) { { en: "Here is a not valid comment with Link text
" } } + + it "returns the repaired content" do + replaced_content = subject.repair + + expect(replaced_content[:en]).to include(blob_path) + expect(replaced_content[:en]).not_to include(deprecated_endpoint) + end + end + + context "when content is an array" do + let(:content) { ["Here is a not valid comment with Link text
"] } + + it "returns the repaired content" do + replaced_content = subject.repair + + expect(replaced_content.first).to include(blob_path) + expect(replaced_content.first).not_to include(deprecated_endpoint) + end + end + + context "when content is not a string, hash or array" do + let(:content) { 1 } + + it "raises an error" do + expect(subject.repair).to eq(nil) + end + end + end + + describe "#find_and_replace" do + it "replaces the deprecated url with the new url" do + replaced_content = subject.find_and_replace(content) + + expect(replaced_content).to start_with("" } + + it "replaces the deprecated url with the new url" do + replaced_content = subject.find_and_replace(content) + + expect(replaced_content).to start_with("Link text" } + + it "replaces the deprecated url with the new url" do + replaced_content = subject.find_and_replace(content) + + expect(replaced_content).not_to start_with("Link text" } + + it "returns false" do + expect(subject.nokogiri_will_wrap_with_p?(content)).to eq(true) + end + end + end + + describe "#find_service_url_for_blob" do + it "returns the service url for the given blob" do + expect(subject.find_service_url_for_blob(blob.id)).to eq(blob_path) + end + + context "when blob is not found" do + it "returns nil" do + expect(subject.find_service_url_for_blob(blob.id + 1)).to eq(nil) + end + end + end +end diff --git a/spec/lib/tasks/repair_data_url_in_content_spec.rb b/spec/lib/tasks/repair_data_url_in_content_spec.rb new file mode 100644 index 00000000..7ddce020 --- /dev/null +++ b/spec/lib/tasks/repair_data_url_in_content_spec.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require "spec_helper" + +describe "rake decidim:repair:url_in_content", type: :task do + let(:task) { Rake::Task[:"decidim:repair:url_in_content"] } + let(:deprecated_hosts) { "https://s3.example.org,https://www.s3.example.org" } + + before do + clear_enqueued_jobs + clear_performed_jobs + end + + after do + clear_enqueued_jobs + clear_performed_jobs + end + + it "preloads the Rails environment" do + expect(task.prerequisites).to include "environment" + end + + it "calls the service" do + with_modified_env DEPRECATED_OBJECTSTORE_S3_HOSTS: deprecated_hosts do + expect(Decidim::RepairUrlInContentService).to receive(:run).at_least(:twice).and_return(true) + + task.execute + end + end + + context "when env variable is not set" do + ["", nil].each do |value| + it "raises an error" do + with_modified_env deprecated_hosts: value do + expect(Decidim::RepairUrlInContentService).not_to receive(:run) + + task.execute + end + end + end + end +end diff --git a/spec/services/decidim/repair_comments_service_spec.rb b/spec/services/decidim/repair_comments_service_spec.rb new file mode 100644 index 00000000..1c9da68d --- /dev/null +++ b/spec/services/decidim/repair_comments_service_spec.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +require "spec_helper" + +describe Decidim::RepairCommentsService do + subject { described_class.new } + + let!(:comments) { create_list(:comment, 10) } + + describe "#execute" do + it "returns empty array" do + expect(subject.execute).to be_empty + end + + it "does not change valid comments" do + expect { subject.execute }.not_to(change { comments.map(&:body) }) + end + + context "when invalid bodys" do + let(:invalid) { build :comment } + + before do + invalid.body = { "en" => { "en" => "foobar" } } + invalid.save!(validate: false) + end + + it "returns array of invalid comments IDs" do + expect(subject.execute).to eq([invalid.id]) + invalid.reload.body.delete("machine_translations") + expect(invalid.body).to eq({ "en" => "foobar" }) + end + end + end + + describe "#ok?" do + it "returns true" do + expect(subject).to be_ok + end + end + + describe "#invalid_comments" do + let(:invalid) { build :comment } + + before do + invalid.body = { "en" => { "en" => "foobar" } } + invalid.save!(validate: false) + end + + it "returns array of invalid comments" do + expect(subject.invalid_comments).to eq([[invalid, { "en" => "foobar" }]]) + end + end +end diff --git a/spec/services/decidim/repair_url_in_content_service_spec.rb b/spec/services/decidim/repair_url_in_content_service_spec.rb new file mode 100644 index 00000000..36ab5aad --- /dev/null +++ b/spec/services/decidim/repair_url_in_content_service_spec.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +require "spec_helper" + +describe Decidim::RepairUrlInContentService do + subject { described_class.run(deprecated_endpoint) } + + let(:deprecated_endpoint) { "s3.decidim.org" } + let(:invalid_resource1) { create(:comment, body: invalid_body_comment) } + let(:invalid_resource2) { create(:comment) } + let(:invalid_body_comment) { { en: "
Here is a not valid comment with Link text
" } } + let(:deprecated_url) { "https://#{deprecated_endpoint}/xxxx?response-content-disposition=inline%3Bfilename%3D\"BuPa23_reglement-interieur.pdf\"%3Bfilename*%3DUTF-8''BuPa23_r%25C3%25A8glement-int%25C3%25A9rieur.pdf&response-content-type=application%2Fpdf" } + let!(:blob) { ActiveStorage::Blob.create_after_upload!(filename: "BuPa23_reglement-interieur.pdf", io: File.open("spec/fixtures/BuPa23_reglement-interieur.pdf"), content_type: "application/pdf") } + let(:blob_path) { Rails.application.routes.url_helpers.rails_blob_path(ActiveStorage::Blob.find(blob.id), only_path: true) } + + describe "#run" do + it "updates values from comments" do + expect do + subject + invalid_resource1.reload + end.to change(invalid_resource1, :body) + + expect(invalid_resource1.body["en"]).to include(blob_path) + end + + context "when invalid contains an image" do + let(:invalid_body_comment) { { en: "Here is a not valid comment with
" } } + + it "updates values from comments" do + expect do + subject + invalid_resource1.reload + end.to change(invalid_resource1, :body) + + expect(invalid_resource1.body["en"]).to include(blob_path) + end + end + + context "when deprecated url is not a direct link" do + let(:deprecated_url) { "https://#{deprecated_endpoint}/rails/active_storage/representations/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBBY3c9IiwiZXhwIjpudWxsLCJwdXIiOiJibG9iX2lkIn19--0dd7fba2bf600153aca7a8ada9d0b568010c7d1c/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaDdCam9TY21WemFYcGxYM1J2WDJacGRGc0hNR2tCN1E9PSIsImV4cCI6bnVsbCwicHVyIjoidmFyaWF0aW9uIn19--4e2c28b6f31f9da4a43a726c999148c0062324fa/BuPa23_reglement-interieur.pdf" } + + it "updates values from comments" do + expect do + subject + invalid_resource1.reload + end.to change(invalid_resource1, :body) + + expect(invalid_resource1.body["en"]).to include(blob_path) + end + end + + context "when deprecated_endpoint is blank" do + let(:deprecated_endpoint) { nil } + + it "returns false" do + expect(subject).to be_falsey + end + end + + context "when resource is a ContentBlock" do + let(:invalid_resource1) { create(:content_block, manifest_name: :html, scope_name: :homepage) } + let(:invalid_html_content) { "" } + + let(:settings) do + Decidim.available_locales.each_with_object({}) do |locale, hash| + hash["html_content_#{locale}"] = invalid_html_content + end + end + + before do + form = Decidim::Admin::ContentBlockForm.from_params( + { + content_block: { + settings: settings, + images: {} + } + } + ) + Decidim::Admin::UpdateContentBlock.new(form, invalid_resource1, invalid_resource1.scope_name).call + end + + it "updates values from content blocks" do + expect do + subject + invalid_resource1.reload + end.to change(invalid_resource1, :settings) + + expect(invalid_resource1.settings.html_content[:en]).to include(blob_path) + end + end + end + + describe "#models" do + subject { described_class.new(deprecated_endpoint) } + + it "returns models" do + [ + Decidim::Comments::Comment, + Decidim::Proposals::Proposal, + Decidim::ContentBlock + ].each do |model| + expect(subject.models).to include(model) + end + end + end + + describe "#records_for" do + subject { described_class.new(deprecated_endpoint).records_for(model) } + + let(:model) { Decidim::Comments::Comment } + + it "returns all records that have a column of type string jsonb or text" do + expect(subject).to include(invalid_resource1) + expect(subject).not_to include(invalid_resource2) + end + + it "generates a unique SQL query" do + expect(subject.to_sql).to eq("SELECT \"decidim_comments_comments\".* FROM \"decidim_comments_comments\" WHERE (((((decidim_commentable_type::text LIKE '%#{deprecated_endpoint}%') OR (decidim_root_commentable_type::text LIKE '%#{deprecated_endpoint}%')) OR (decidim_author_type::text LIKE '%#{deprecated_endpoint}%')) OR (body::text LIKE '%#{deprecated_endpoint}%')) OR (decidim_participatory_space_type::text LIKE '%#{deprecated_endpoint}%'))") + end + + context "when model cannot be constantized" do + let(:model) { "Decidim::Comments::NotExistingModel" } + + it "returns an empty array" do + expect(subject).to eq([]) + end + end + end +end