Skip to content

Commit

Permalink
Merge pull request #64 from omnilord/managing-duplicates
Browse files Browse the repository at this point in the history
Better control of deduplication through Deleting resources via UI and finer import logic
  • Loading branch information
miklb authored Oct 15, 2018
2 parents aaed403 + 0389853 commit 187413b
Show file tree
Hide file tree
Showing 15 changed files with 652 additions and 605 deletions.
7 changes: 6 additions & 1 deletion app/controllers/distribution_points_controller.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class DistributionPointsController < ApplicationController
before_action :authenticate_admin!, only: [:archive, :unarchive]
before_action :authenticate_admin!, only: [:archive, :unarchive, :destroy]
before_action :authenticate_user!, only: [:mark_current]
before_action :set_headers, except: [:index]
before_action :set_index_headers, only: [:index]
Expand Down Expand Up @@ -65,6 +65,11 @@ def update
end

def destroy
if @distribution_point.trash!(current_user, params[:reason])
redirect_to distribution_points_path, notice: "'#{@distribution_point.facility_name}' has been moved to the trash."
else
redirect_to @distribution_point, notice: "Something went wrong, '#{@distribution_point.facility_name}' has not been trashed."
end
end

def archived
Expand Down
7 changes: 6 additions & 1 deletion app/controllers/shelters_controller.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class SheltersController < ApplicationController
before_action :authenticate_admin!, only: [:archive, :unarchive]
before_action :authenticate_admin!, only: [:archive, :unarchive, :destroy]
before_action :authenticate_user!, only: [:mark_current]
before_action :set_headers, except: [:index]
before_action :set_index_headers, only: [:index]
Expand Down Expand Up @@ -65,6 +65,11 @@ def update
end

def destroy
if @shelter.trash!(current_user, params[:reason])
redirect_to shelters_path, notice: "'#{@shelter.shelter}' has been moved to the trash."
else
redirect_to @shelter, notice: "Something went wrong, '#{@shelter.shelter}' has not been trashed."
end
end

def archived
Expand Down
76 changes: 54 additions & 22 deletions app/jobs/import_fema_shelters_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,70 @@ class ImportFemaSheltersJob < ApplicationJob
def perform(*args)
logger.info "Starting ImportFemaSheltersJob #{Time.now}"
fema_data = FemaImporter.shelters
duplicates = 0
fema_data.each do |data|
if duplicate?(data)
duplicates += 1
else
Shelter.create!(data)
end
end
logger.info "ImportFemaSheltersJob Complete - Imported Shelters: #{fema_data.count - duplicates}"
imported = 0
fema_data.each { |data| imported += deduplicated_import!(data) }
logger.info "ImportFemaSheltersJob Complete - Records received: #{fema_data.length}, Imported Shelters: #{imported}"
end

private

def duplicate?(data)
def deduplicated_import!(data)
# This is a very naive deduplication effort, yes it does
# an unindexed scan of the database against several columns of text
#
# TODO: Use Arel for the where named functions
# arel = Shelter.arel_table

shelter = nil

if shelter = find_by_address(data).first
logger.info "Duplicate found by address: #{data[:shelter]} @ #{data[:address]}"
elsif shelter = find_by_coordinates(data).first
logger.info "Duplicate found by coordinates: #{data[:shelter]} @ [#{data[:latitude]}, #{data[:longitude]}]"
elsif shelter = find_by_location_fields(data).first
logger.info "Duplicate found by fields: #{data[:shelter]} @ [#{data[:city]}, #{data[:state]}, #{data[:zip]}]"
elsif shelter = find_by_source(data).first
logger.info "Duplicate found by source: #{data[:shelter]} @ [#{data[:source]}]"
end

arel = Shelter.arel_table
cnt = Shelter.unscope(:where).where(
'LOWER(TRIM(shelter)) = ? AND LOWER(TRIM(city)) = ? AND LOWER(TRIM(state)) = ? AND LOWER(TRIM(zip)) = ?',
data[:shelter].strip.downcase, data[:city].strip.downcase, data[:state].strip.downcase, data[:zip].strip.downcase
).count
cnt += Shelter.unscope(:where).where('LOWER(TRIM(address)) = ?', data[:address].strip.downcase).count
cnt += Shelter.unscope(:where).where('LOWER(TRIM(source)) = ?', data[:source].strip.downcase).count

if cnt > 0
logger.info "Duplicate: #{data[:shelter]} @ #{data[:address]}"
true
if shelter.nil?
Shelter.create!(data)
1
else
false
unarchive!(shelter)
0
end
end

def find_by_address(data)
Shelter.unscope(:where).where('LOWER(TRIM(address)) = ?', data[:address].strip.downcase)
end

def find_by_coordinates(data)
lat = data[:latitude].to_f
lon = data[:longitude].to_f
delta = 0.0002
shelter = Shelter.unscope(:where).where(
'(latitude between ? and ?) AND (longitude between ? and ?)',
lat - delta, lat + delta, lon - delta, lon + delta
)
end

def find_by_location_fields(data)
Shelter.unscope(:where).where(
'LOWER(TRIM(shelter)) = ? AND LOWER(TRIM(city)) = ? AND LOWER(TRIM(state)) = ? AND LOWER(TRIM(zip)) = ?',
data[:shelter].strip.downcase, data[:city].strip.downcase, data[:state].strip.downcase, data[:zip].strip.downcase
)
end

def find_by_source(data)
Shelter.unscope(:where).where('LOWER(TRIM(source)) = ?', data[:source].strip.downcase)
end

def unarchive!(shelter)
unless shelter.active
logger.info "Unarchiving pre-existing shelter with ID #{shelter.id}"
shelter.update_columns(active: true, updated_at: Time.now)
end
end
end
2 changes: 1 addition & 1 deletion app/models/concerns/geocodable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ module Geocodable
# latitude, longitude, address, city, state, zip, county

included do
after_commit :schedule_reverse_geocode
after_commit :schedule_reverse_geocode, on: [:create, :update]

scope :incomplete_geocoding, -> { where(<<~SQL
county IS NULL OR TRIM(county) = '' OR
Expand Down
21 changes: 21 additions & 0 deletions app/models/concerns/trashable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
module Trashable
extend ActiveSupport::Concern

included do
has_one :trash, as: :trashable
end

def trash!(user, reason = nil)
transaction do
trash = Trash.create(trashable: self,
user: user,
data: self.attributes,
reason: reason)
unless trash && self.destroy
trash = nil
raise ActiveRecord::Rollback
end
end
trash
end
end
3 changes: 2 additions & 1 deletion app/models/distribution_point.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
class DistributionPoint < ApplicationRecord
include Geocodable
include Trashable

default_scope { where(archived: false) }

Expand Down Expand Up @@ -37,7 +38,7 @@ class DistributionPoint < ApplicationRecord

has_many :drafts, as: :record

after_commit do
after_commit on: [:create, :update] do
DistributionPointUpdateNotifierJob.perform_later self
end

Expand Down
3 changes: 2 additions & 1 deletion app/models/shelter.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
class Shelter < ApplicationRecord
include Geocodable
include Trashable

default_scope { where(active: true) }

Expand Down Expand Up @@ -55,7 +56,7 @@ class Shelter < ApplicationRecord

has_many :drafts, as: :record

after_commit do
after_commit on: [:create, :update] do
ShelterUpdateNotifierJob.perform_later self
end

Expand Down
16 changes: 16 additions & 0 deletions app/models/trash.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
class Trash < ApplicationRecord
belongs_to :user
belongs_to :trashable, polymorphic: true, optional: true

def restore!
restoring = nil
transaction do
restoring = trashable_type.constantize.create(data)
unless restoring && self.destroy
restoring = nil
raise ActiveRecord::Rollback
end
end
restoring
end
end
6 changes: 5 additions & 1 deletion app/views/distribution_points/show.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
method: :post,
data: { confirm: 'Are you sure?' },
class: 'button button-clear' %>
<% end %>
<% end %> |
<%= link_to 'Delete', @distribution_point,
method: :delete,
data: { confirm: 'Are you sure?' },
class: 'button button-clear' %>
<% end %>
<% if user_signed_in? && @distribution_point.outdated? %> |
<%= link_to 'Mark Current', mark_current_distribution_point_path(@distribution_point),
Expand Down
6 changes: 5 additions & 1 deletion app/views/shelters/show.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
method: :delete,
data: { confirm: 'Are you sure?' },
class: 'button button-clear' %>
<% end %>
<% end %> |
<%= link_to 'Delete', @shelter,
method: :delete,
data: { confirm: 'Are you sure?' },
class: 'button button-clear' %>
<% end %>
<% if user_signed_in? && @shelter.outdated? %> |
<%= link_to 'Mark Current', mark_current_shelter_path(@shelter),
Expand Down
11 changes: 11 additions & 0 deletions db/migrate/20181013222410_create_trashes.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class CreateTrashes < ActiveRecord::Migration[5.1]
def change
create_table :trashes do |t|
t.references :trashable, polymorphic: true, null: false
t.jsonb :data
t.references :user, foreign_key: true
t.text :reason
t.timestamps
end
end
end
75 changes: 74 additions & 1 deletion db/structure.sql
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,41 @@ CREATE SEQUENCE public.shelters_id_seq
ALTER SEQUENCE public.shelters_id_seq OWNED BY public.shelters.id;


--
-- Name: trashes; Type: TABLE; Schema: public; Owner: -
--

CREATE TABLE public.trashes (
id bigint NOT NULL,
trashable_type character varying NOT NULL,
trashable_id bigint NOT NULL,
data jsonb,
user_id bigint,
reason text,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);


--
-- Name: trashes_id_seq; Type: SEQUENCE; Schema: public; Owner: -
--

CREATE SEQUENCE public.trashes_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;


--
-- Name: trashes_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
--

ALTER SEQUENCE public.trashes_id_seq OWNED BY public.trashes.id;


--
-- Name: users; Type: TABLE; Schema: public; Owner: -
--
Expand Down Expand Up @@ -679,6 +714,13 @@ ALTER TABLE ONLY public.pages ALTER COLUMN id SET DEFAULT nextval('public.pages_
ALTER TABLE ONLY public.shelters ALTER COLUMN id SET DEFAULT nextval('public.shelters_id_seq'::regclass);


--
-- Name: trashes id; Type: DEFAULT; Schema: public; Owner: -
--

ALTER TABLE ONLY public.trashes ALTER COLUMN id SET DEFAULT nextval('public.trashes_id_seq'::regclass);


--
-- Name: users id; Type: DEFAULT; Schema: public; Owner: -
--
Expand Down Expand Up @@ -797,6 +839,14 @@ ALTER TABLE ONLY public.shelters
ADD CONSTRAINT shelters_pkey PRIMARY KEY (id);


--
-- Name: trashes trashes_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--

ALTER TABLE ONLY public.trashes
ADD CONSTRAINT trashes_pkey PRIMARY KEY (id);


--
-- Name: users users_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
Expand Down Expand Up @@ -862,6 +912,20 @@ CREATE INDEX index_drafts_on_denied_by_id ON public.drafts USING btree (denied_b
CREATE INDEX index_drafts_on_record_type_and_record_id ON public.drafts USING btree (record_type, record_id);


--
-- Name: index_trashes_on_trashable_type_and_trashable_id; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX index_trashes_on_trashable_type_and_trashable_id ON public.trashes USING btree (trashable_type, trashable_id);


--
-- Name: index_trashes_on_user_id; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX index_trashes_on_user_id ON public.trashes USING btree (user_id);


--
-- Name: index_users_on_confirmation_token; Type: INDEX; Schema: public; Owner: -
--
Expand Down Expand Up @@ -899,6 +963,14 @@ ALTER TABLE ONLY public.drafts
ADD CONSTRAINT fk_rails_687f4d113b FOREIGN KEY (denied_by_id) REFERENCES public.users(id);


--
-- Name: trashes fk_rails_7d97d072f5; Type: FK CONSTRAINT; Schema: public; Owner: -
--

ALTER TABLE ONLY public.trashes
ADD CONSTRAINT fk_rails_7d97d072f5 FOREIGN KEY (user_id) REFERENCES public.users(id);


--
-- PostgreSQL database dump complete
--
Expand Down Expand Up @@ -940,6 +1012,7 @@ INSERT INTO "schema_migrations" (version) VALUES
('20170910045633'),
('20180914132709'),
('20180915035427'),
('20180919043949');
('20180919043949'),
('20181013222410');


27 changes: 27 additions & 0 deletions test/controllers/distribution_points_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,31 @@ class DistributionPointsControllerTest < ActionDispatch::IntegrationTest
pod.reload
refute_equal oldtimestamp, pod.updated_at
end

test "guests may not delete" do
pod = distribution_points(:nrg)
sign_in users(:guest)
delete distribution_point_path(pod)
assert_response :redirect
assert_redirected_to root_path
end

test "admins can delete" do
pod = distribution_points(:nrg)
sign_in users(:admin)
delete distribution_point_path(pod)
assert_response :redirect
assert_redirected_to distribution_points_path
end

test "deleted items get moved to trash" do
pod = distribution_points(:nrg)
expected_pods = DistributionPoint.count - 1
expected_trash = Trash.count + 1
sign_in users(:admin)
delete distribution_point_path(pod)
assert_response :redirect
assert_equal expected_pods, DistributionPoint.count
assert_equal expected_trash, Trash.count
end
end
Loading

0 comments on commit 187413b

Please sign in to comment.