From 4aff4104085daaa540287370d67ddab5f1305dd8 Mon Sep 17 00:00:00 2001 From: Jay Varner Date: Mon, 26 Aug 2024 11:01:09 -0400 Subject: [PATCH] Clean up before starting update. --- .gitignore | 3 +- .rubocop.yml | 1 + Gemfile.lock | 5 ++- app/jobs/load_big_sam_job.rb | 46 +++++++++++++++++-------- app/models/entity.rb | 7 ++-- app/models/repository.rb | 2 ++ app/views/letters/_letter.json.jbuilder | 2 +- spec/models/repository_spec.rb | 1 + tmp/.keep | 0 tmp/pids/.keep | 0 tmp/storage/.keep | 0 11 files changed, 46 insertions(+), 21 deletions(-) delete mode 100644 tmp/.keep delete mode 100644 tmp/pids/.keep delete mode 100644 tmp/storage/.keep diff --git a/.gitignore b/.gitignore index fe07f13..b65cb4f 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,5 @@ BigSam* *.csv *.zip -token.yaml \ No newline at end of file +token.yaml +lib/snippets.rb \ No newline at end of file diff --git a/.rubocop.yml b/.rubocop.yml index fdd16e9..ace8566 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -35,6 +35,7 @@ Metrics/BlockLength: Exclude: - spec/**/* - db/**/* + - lib/snippets.rb Layout/SpaceAroundEqualsInParameterDefault: EnforcedStyle: space diff --git a/Gemfile.lock b/Gemfile.lock index 8dbd930..ecfa248 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -271,13 +271,15 @@ GEM timeout net-ssh (7.0.1) nio4r (2.5.8) + nokogiri (1.13.6-x86_64-darwin) + racc (~> 1.4) nokogiri (1.13.6-x86_64-linux) racc (~> 1.4) os (1.1.4) parallel (1.22.1) parser (3.1.2.0) ast (~> 2.4.1) - pg (1.4.1) + pg (1.4.2) public_suffix (4.0.7) puma (5.6.4) nio4r (~> 2.0) @@ -431,6 +433,7 @@ GEM zeitwerk (2.6.0) PLATFORMS + x86_64-darwin-21 x86_64-linux DEPENDENCIES diff --git a/app/jobs/load_big_sam_job.rb b/app/jobs/load_big_sam_job.rb index 88b5fcb..9ed5b95 100644 --- a/app/jobs/load_big_sam_job.rb +++ b/app/jobs/load_big_sam_job.rb @@ -111,8 +111,8 @@ def load_letters(rows) row[:reg_recipient]&.split(';')&.each do |recipient| recipient = recipient.strip.titleize entity = Entity.find_by(label: recipient) - entity = get_entity(label: recipient, type: 'organization', return_nil: true) if entity.nil? entity = get_person(recipient) if entity.nil? + entity = get_entity(label: recipient, type: 'organization', return_nil: true) if entity.nil? entity = Entity.create(label: recipient) if entity.nil? LetterRecipient.find_or_create_by(letter:, entity:) @@ -124,7 +124,7 @@ def load_letters(rows) if row[:reg_place_sent] begin - destination = get_entity(label: row[:reg_sent_sent], type: 'place') + destination = get_entity(label: row[:reg_place_sent], type: 'place') letter.destinations << destination rescue ActiveRecord::RecordInvalid, Elasticsearch::Transport::Transport::Errors::BadRequest, Elasticsearch::Transport::Transport::Errors::NotFound @@ -151,6 +151,7 @@ def load_letters(rows) if row[:first_repository] repository = Repository.find_or_create_by(label: row[:first_repository]) + logger.error("Firts Repository #{repository.label}") repository.format = row[:first_format] repository.american = row[:euro_or_am].downcase == 'american' if row[:euro_or_am] # repository.published = row[:first_public].downcase == 'public' if row[:first_public] @@ -255,8 +256,8 @@ def load_letters(rows) lang = Language.find_or_create_by(label: language.downcase) letter.languages << lang unless letter.languages.include?(lang) rescue ActiveRecord::RecordInvalid, - Elasticsearch::Transport::Transport::Errors::BadRequest, - Elasticsearch::Transport::Transport::Errors::NotFound + Elasticsearch::Transport::Transport::Errors::BadRequest, + Elasticsearch::Transport::Transport::Errors::NotFound end letter.typed = row[:autograph_or_typed] == 'T' @@ -286,6 +287,7 @@ def get_letter(row) end def get_entity(label: nil, type: nil, return_nil: false) + logger.error("Get Entity with label: #{label} or type #{type}") label = label.strip.gsub(/[\[!@%&?"\]]/, '').titleize label = mac?(label) entity = Entity.public_send(type) @@ -298,32 +300,39 @@ def get_entity(label: nil, type: nil, return_nil: false) return nil if entity.nil? && return_nil entity = Entity.find_or_create_by(label:, e_type: type) if entity.nil? + logger.error("Found or created entity #{entity.label}") entity end def get_person(name) + entity = nil names = Namae.parse(name).first if names&.given && names&.family names.family = "van #{names.family}" if names.particle&.downcase == 'van' names = o?(names) + names = o?(names) entity = Entity.find_by(first_name: names.given, last_name: names.family) end - entity = Entity.find_or_create_by(name:, e_type: type) if entity.nil? + if entity.nil? && entity.nil? + entity = Entity.find_or_create_by(first_name: names.given, last_name: names.family, + e_type: 'person') + end + logger.debug "Found person #{entity.label} from #{name}" entity end -def fix_date(row) - row[:day] = '1' if row[:day] == '0' - row[:month] = '1' if row[:month] == '0' - row[:year] = '99' if row[:year] == '0' - row[:day] = row[:day].gsub(/[\[!@%&?"\]]/, '').to_i if row[:day].is_a?(String) - row[:month] = row[:month].gsub(/[\[!@%&?"\]]/, '').to_i if row[:month].is_a?(String) - row[:year] = "19#{row[:year]}".gsub(/[\[!@%&?"\]]/, '').to_i if row[:year].is_a?(String) - row[:year] = row[:year] + 1900 if row[:year].to_s.size == 2 + def fix_date(row) + row[:day] = '1' if row[:day] == '0' + row[:month] = '1' if row[:month] == '0' + row[:year] = '99' if row[:year] == '0' + row[:day] = row[:day].gsub(/[\[!@%&?"\]]/, '').to_i if row[:day].is_a?(String) + row[:month] = row[:month].gsub(/[\[!@%&?"\]]/, '').to_i if row[:month].is_a?(String) + row[:year] = "19#{row[:year]}".gsub(/[\[!@%&?"\]]/, '').to_i if row[:year].is_a?(String) + row[:year] = row[:year] + 1900 if row[:year].to_s.size == 2 - row -end + row + end def mac?(label) parts = label.split @@ -347,6 +356,13 @@ def mac?(label) parts.join(' ') end + def mac_name?(names) + if names.family.starts_with?('Mac ') || names.family.starts_with?('Mc ') + names.family = names.family.split.map(&:titleize).join + end + names + end + def o?(names) return names unless names.family.starts_with?("O'") diff --git a/app/models/entity.rb b/app/models/entity.rb index fa9f7ae..91927e8 100644 --- a/app/models/entity.rb +++ b/app/models/entity.rb @@ -7,7 +7,7 @@ class Entity < ApplicationRecord include EntityCommon before_save :check_published, :remove_blank_values, :add_full_stops, :remove_div, :concat_label, :to_plain_text - after_destroy :remove_published + before_destroy :remove_published after_commit :reindex_published has_many :mentions, dependent: :destroy @@ -116,6 +116,7 @@ def concat_label self.life_dates = nil if life_dates == 'nd' self.life_dates = life_dates.gsub(/[()]/, '') if life_dates end + # self.label = 'ZZZ' if self.label.nil? end # rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity @@ -194,8 +195,8 @@ def remove_blank_values end def reindex_published - if published - published_entity = PublishedEntity.find(id) + published_entity = PublishedEntity.find_by(id:) + if published && published_entity published_entity&.reindex PublishedEntity.reindex if ENV['RAILS_ENV'] == 'test' else diff --git a/app/models/repository.rb b/app/models/repository.rb index 989b1a1..c31199e 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -17,6 +17,8 @@ class Repository < ApplicationRecord private def reindex_published + return unless saved_change_to_attribute?(:published) + letters.each do |letter| letter.update(published:) letter.all_entities.map(&:save) diff --git a/app/views/letters/_letter.json.jbuilder b/app/views/letters/_letter.json.jbuilder index 2802e28..90a567a 100644 --- a/app/views/letters/_letter.json.jbuilder +++ b/app/views/letters/_letter.json.jbuilder @@ -65,7 +65,7 @@ json.repositories do second_repo = [ letter.second_repository.repository.label, letter.second_repository.format, - letter.second_repository.collection.label + letter.second_repository.collection&.label ] json.set! 'second_repository', second_repo.compact.join(', ') end diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb index dd5f32c..f3fd41f 100644 --- a/spec/models/repository_spec.rb +++ b/spec/models/repository_spec.rb @@ -13,6 +13,7 @@ end repo.update(published: true) + repo.save expect(repo.letters.map(&:published)).to all(be true) repo.letters.each do |letter| diff --git a/tmp/.keep b/tmp/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/tmp/pids/.keep b/tmp/pids/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/tmp/storage/.keep b/tmp/storage/.keep deleted file mode 100644 index e69de29..0000000