Skip to content

Commit

Permalink
Refactoring FEMA import job to unarchive archived shelters if they ar…
Browse files Browse the repository at this point in the history
…e received via the import.
  • Loading branch information
omnilord committed Oct 13, 2018
1 parent 375ae8f commit a16b7a1
Showing 1 changed file with 51 additions and 30 deletions.
81 changes: 51 additions & 30 deletions app/jobs/import_fema_shelters_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,70 @@ class ImportFemaSheltersJob < ApplicationJob
def perform(*args)
logger.info "Starting ImportFemaSheltersJob #{Time.now}"
fema_data = FemaImporter.shelters
duplicates = 0
fema_data.each do |data|
if duplicate?(data)
duplicates += 1
else
Shelter.create!(data)
end
end
logger.info "ImportFemaSheltersJob Complete - Imported Shelters: #{fema_data.count - duplicates}"
imported = 0
fema_data.each { |data| imported += deduplicated_import!(data) }
logger.info "ImportFemaSheltersJob Complete - Records received: #{fema_data.length}, Imported Shelters: #{imported}"
end

private

def duplicate?(data)
def deduplicated_import!(data)
# This is a very naive deduplication effort, yes it does
# an unindexed scan of the database against several columns of text
#
# TODO: Use Arel for the where named functions
#
# arel = Shelter.arel_table

cnt = Shelter.unscope(:where).where('LOWER(TRIM(address)) = ?', data[:address].strip.downcase).count
unless cnt.positive?
lat = data[:latitude].to_f
lon = data[:longitude].to_f
delta = 0.0002
cnt += Shelter.unscope(:where).where(
'(latitude between ? and ?) AND (longitude between ? and ?)',
lat - delta, lat + delta, lon - delta, lon + delta
).count
end
cnt += Shelter.unscope(:where).where(
'LOWER(TRIM(shelter)) = ? AND LOWER(TRIM(city)) = ? AND LOWER(TRIM(state)) = ? AND LOWER(TRIM(zip)) = ?',
data[:shelter].strip.downcase, data[:city].strip.downcase, data[:state].strip.downcase, data[:zip].strip.downcase
).count unless cnt.positive?
cnt += Shelter.unscope(:where).where('LOWER(TRIM(source)) = ?', data[:source].strip.downcase).count unless cnt.positive?
shelter = nil

if shelter = find_by_address(data).first
logger.info "Duplicate found by address: #{data[:shelter]} @ #{data[:address]}"
elsif shelter = find_by_coordinates(data).first
logger.info "Duplicate found by coordinates: #{data[:shelter]} @ [#{data[:latitude]}, #{data[:longitude]}]"
elsif shelter = find_by_location_fields(data).first
logger.info "Duplicate found by fields: #{data[:shelter]} @ [#{data[:city]}, #{data[:state]}, #{data[:zip]}]"
elsif shelter = find_by_source(data).first
logger.info "Duplicate found by source: #{data[:shelter]} @ [#{data[:source]}]"
end

if cnt > 0
logger.info "Duplicate: #{data[:shelter]} @ #{data[:address]}"
true
if shelter.nil?
Shelter.create!(data)
1
else
false
unarchive!(shelter)
0
end
end

def find_by_address(data)
Shelter.unscope(:where).where('LOWER(TRIM(address)) = ?', data[:address].strip.downcase)
end

def find_by_coordinates(data)
lat = data[:latitude].to_f
lon = data[:longitude].to_f
delta = 0.0002
shelter = Shelter.unscope(:where).where(
'(latitude between ? and ?) AND (longitude between ? and ?)',
lat - delta, lat + delta, lon - delta, lon + delta
)
end

def find_by_location_fields(data)
Shelter.unscope(:where).where(
'LOWER(TRIM(shelter)) = ? AND LOWER(TRIM(city)) = ? AND LOWER(TRIM(state)) = ? AND LOWER(TRIM(zip)) = ?',
data[:shelter].strip.downcase, data[:city].strip.downcase, data[:state].strip.downcase, data[:zip].strip.downcase
)
end

def find_by_source(data)
Shelter.unscope(:where).where('LOWER(TRIM(source)) = ?', data[:source].strip.downcase)
end

def unarchive!(shelter)
unless shelter.active
logger.info "Unarchiving pre-existing shelter with ID #{shelter.id}"
shelter.update_columns(active: true, updated_at: Time.now)
end
end
end

0 comments on commit a16b7a1

Please sign in to comment.