Skip to content

Commit

Permalink
Backport repair services for comment and url
Browse files Browse the repository at this point in the history
  • Loading branch information
moustachu committed Sep 28, 2023
1 parent 04107a5 commit ca43a22
Show file tree
Hide file tree
Showing 9 changed files with 713 additions and 0 deletions.
45 changes: 45 additions & 0 deletions app/services/decidim/repair_comments_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# frozen_string_literal: true

module Decidim
class RepairCommentsService
include Decidim::TranslatableAttributes

def self.run
new.execute
end

def execute
return [] if ok?

update_comments!
end

def ok?
invalid_comments.empty?
end

def invalid_comments
return @invalid_comments if @invalid_comments

invalid_comments = []
Decidim::Comments::Comment.find_each do |comment|
next if translated_attribute(comment.body).is_a?(String)

invalid_comments << [comment, comment.body.values.first]
end
@invalid_comments = invalid_comments
end

private

# Update each users with new nickname
# Returns Array of updated User ID
def update_comments!
invalid_comments.map do |comment, new_body|
comment.body = new_body

comment.id if comment.save!(validate: false) # Validation is skipped to allow updating comments from root that don't accepts new comments
end.compact
end
end
end
156 changes: 156 additions & 0 deletions app/services/decidim/repair_url_in_content_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# frozen_string_literal: true

require "decidim/content_fixer"

module Decidim
# Looks for any occurence of "@deprecated_endpoint" in every database columns of type COLUMN_TYPES
# For each field containing @deprecated_endpoint:
# - Looks for the current ActiveStorage::Blob with the given filename
# - Find the blob's service_url
# - Replace the @deprecated_endpoint with the blob's service_url in text
# - Update the column
# Context:
# After S3 assets migration with rake task "bundle exec rake scaleway:storage:migrate_from_local", every linked documents URL were well updated.
# However every links added to text fields redirecting to an uploaded file were outdated and still redirects to the old S3 bucket
class RepairUrlInContentService
COLUMN_TYPES = [:string, :jsonb, :text].freeze

# @param [String] deprecated_endpoint
# @param [Logger] logger
def self.run(deprecated_endpoint, logger = nil)
new(deprecated_endpoint, logger).run
end

# @param [String] deprecated_endpoint
# @param [Logger] logger
def initialize(deprecated_endpoint, logger = nil)
@logger = logger || Rails.logger
@deprecated_endpoint = deprecated_endpoint&.gsub(%r{https?://}, "")
end

def run
# Find all models that have a column of type string jsonb or text
# For each model, find all records that have a column of type string jsonb or text
# For each record, replace all urls contained in content with the new url
# Save the record
return false if @deprecated_endpoint.blank?

models.each do |model|
next unless model.respond_to?(:columns)

@logger.info("Checking model #{model} for deprecated endpoints #{@deprecated_endpoint}")
records = records_for model
next if records.blank?

@logger.info "Found #{records.count} records to update for #{model}"
records.each do |record|
columns = model.columns.select { |column| column.type.in? COLUMN_TYPES }
record = update_each_column(record, columns)

save_record!(record)
end
end
end

def save_record!(record)
if record.invalid?
@logger.warn "Invalid record #{record.class}##{record.id}: #{record.errors.full_messages.join(", ")}"
return
end

if record.has_changes_to_save?
record.class.transaction do
record.save!
end
else
@logger.info "No changes to save for #{record.class}##{record.id}"
end
end

# In some cases, the column returns a settings object,
# therefore we need to update each of its attributes before saving the column
# @param [Object] record
# @param [[ActiveRecord::ConnectionAdapters::PostgreSQL::Column]] columns
# @return record | nil
def update_each_column(record, columns)
columns.each do |column|
current_content = current_content_for(record, column)
next if current_content.blank?

column_name = column.try(:name) ? column.name : column

@logger.info "Updating ##{[record.class, record.try(:id), column_name].compact.join("# ")}"

if current_content.is_a?(Hash) || current_content.is_a?(Array) || current_content.is_a?(String)
next unless current_content.to_s.include?(@deprecated_endpoint)

new_content = Decidim::ContentFixer.repair(current_content, @deprecated_endpoint, @logger)

@logger.info "Old content: #{current_content}"
@logger.info "New content: #{new_content}"

write_attribute(record, column, new_content)
else
# If the column is a settings object, we need to update each of its attributes using a recursive call
write_attribute(record, column, update_each_column(current_content, current_content.instance_variables))
end
end

record
end

def write_attribute(record, column, value)
if column.try(:name)
record.write_attribute(:"#{column.name}", value)
else
record.instance_variable_set(column, value)
end
end

def current_content_for(record, column)
if column.try(:name)
record.send(column.name)
else
record.instance_variable_get(column)
end
end

def records_for(model)
model.columns.map do |col|
next unless col.type.in?(COLUMN_TYPES)

model.where("#{col.name}::text LIKE ?", "%#{@deprecated_endpoint}%")
end.compact.reduce(&:or)
rescue StandardError => e
@logger.warn "Error while fetching records from #{model}: #{e.message}"
[]
end

def models
ActiveRecord::Base.connection.tables.map do |table|
next unless table.starts_with?("decidim_")

classify_model(table)
end.compact
end

# Because of the way decidim models are named, we need to try to find the model by subbing _ with / and then classify it
# For example "decidim_comments_comments" becomes "Decidim::CommentsComment", then "Decidim::Comments::Comment"
# This helps us find models that are namespaced
# @param [String] table
def classify_model(table)
if table.include?("_")
new_table = table.sub("_", "/")
model = new_table.classify.safe_constantize

return model if model

classify_model(new_table)
else
@logger.warn "Could not find model for table #{table}"

nil
end
end
end
end
95 changes: 95 additions & 0 deletions lib/decidim/content_fixer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# frozen_string_literal: true

module Decidim
class ContentFixer
TAGS_TO_FIX = %w(a img).freeze

def initialize(content, deprecated_endpoint, logger)
@content = content
@logger = logger
@deprecated_endpoint = deprecated_endpoint
end

def self.repair(content, deprecated_endpoint, logger)
new(content, deprecated_endpoint, logger).repair
end

def repair
case @content
when Hash
@content.transform_values do |value|
find_and_replace(value)
end
when String
find_and_replace(@content)
when Array
@content.map do |value|
find_and_replace(value)
end
else
@logger.warn("Unsupported type #{@content.class}")

nil
end
end

def find_and_replace(content)
return content unless content.is_a?(String) && content.include?(@deprecated_endpoint)

wrapper = nokogiri_will_wrap_with_p?(content) ? "p" : "body"

doc = Nokogiri::HTML(content)

TAGS_TO_FIX.each do |tag|
replace_urls(doc, tag)
end

doc.css(wrapper).inner_html
end

def blobs
@blobs ||= ActiveStorage::Blob.pluck(:filename, :id)
end

def replace_urls(doc, tag)
attribute = tag == "img" ? "src" : "href"

doc.css(tag).each do |source|
next unless source[attribute].include?(@deprecated_endpoint)

new_source = new_source(source[attribute])

next unless new_source

@logger.info "Replacing #{source[attribute]} with #{new_source}"
source[attribute] = new_source
end
end

def new_source(source)
uri = URI.parse(source)
filename = if source.include?("response-content-disposition")
CGI.parse(uri.query)["response-content-disposition"].first.match(/filename=("?)(.+)\1/)[2]
else
uri.path.split("/").last
end
_filename, id = blobs.select { |blob, _id| ActiveSupport::Inflector.transliterate(blob) == filename }.first

find_service_url_for_blob(id)
rescue URI::InvalidURIError
@logger.warn "Invalid URI for #{source}"
nil
end

def find_service_url_for_blob(blob_id)
Rails.application.routes.url_helpers.rails_blob_path(ActiveStorage::Blob.find(blob_id), only_path: true)
rescue ActiveRecord::RecordNotFound
@logger.warn "Blob #{blob_id} not found"
nil
end

def nokogiri_will_wrap_with_p?(content)
!content.start_with?("<")
end
end
end
32 changes: 32 additions & 0 deletions lib/tasks/repair_data.rake
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,37 @@ namespace :decidim do

logger.info("Operation terminated")
end

desc "Check for malformed comments body and repair them if needed"
task comments: :environment do
logger = Logger.new($stdout)
logger.info("Checking all comments...")

updated_comments_ids = Decidim::RepairCommentsService.run

if updated_comments_ids.blank?
logger.info("No comments updated")
else
logger.info("#{updated_comments_ids} comments updated")
logger.info("Updated comments ID : #{updated_comments_ids.join(",")}")
end

logger.info("Operation terminated")
end

desc "Add all missing translation for translatable resources"

task url_in_content: :environment do
logger = Logger.new($stdout)
deprecated_hosts = ENV["DEPRECATED_OBJECTSTORE_S3_HOSTS"].to_s.split(",").map(&:strip)

if deprecated_hosts.blank?
logger.warn("DEPRECATED_OBJECTSTORE_S3_HOSTS env variable is not set")
else
deprecated_hosts.each do |host|
Decidim::RepairUrlInContentService.run(host, logger)
end
end
end
end
end
Binary file added spec/fixtures/BuPa23_reglement-interieur.pdf
Binary file not shown.
Loading

0 comments on commit ca43a22

Please sign in to comment.