Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes broken importer from CSV #10

Merged
merged 4 commits into from
Aug 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .rubocop_todo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,7 @@ Performance/RegexpMatch:
Exclude:
- 'lib/importer/csv_parser.rb'
- 'app/services/solr_config_uploader.rb'
- 'app/models/account.rb'
- 'app/models/account.rb'

ClassLength:
Max: 120
Binary file added lib/assets/batch-upload-test/example3.tiff
Binary file not shown.
Binary file added lib/assets/batch-upload-test/sample.docx
Binary file not shown.
3 changes: 3 additions & 0 deletions lib/assets/csv_test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,type,title,description,resource_type,contributor,contributor,date_created,file
11111111-2465-1111-5468-000123456789,ETD,Hammock Tacos,,image,Tracy S Gertler,Cynthia V Stack,2015-03-29T22:12:12.100363+00:00,example3.tiff
22222222-0971-2222-1353-000123456789,ETD,Freegan Intelligentsia,,text,Tracy S Gertler,Cynthia V Stack,2015-03-29T22:12:12.100363+00:00,sample.docx
1 change: 0 additions & 1 deletion lib/importer/csv_parser.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
module Importer
# rubocop:disable Metrics/ClassLength
class CSVParser
include Enumerable

Expand Down
2 changes: 1 addition & 1 deletion lib/importer/factory/image_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module Factory
class ImageFactory < ObjectFactory
include WithAssociatedCollection

self.klass = GenericWork
self.klass = Image
# A way to identify objects that are not Hydra minted identifiers
self.system_identifier_field = :identifier

Expand Down
29 changes: 25 additions & 4 deletions lib/importer/factory/object_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,37 @@ def transform_attributes
.merge(file_attributes)
end

# NOTE: This approach is probably broken since the actor that handled `:files` attribute was removed:
# https://github.com/samvera/hyrax/commit/3f1b58195d4381c51fde8b9149016c5b09f0c9b4
# Find existing file or upload new file. This assumes a Work will have unique file titles;
# could filter by URIs instead (slower).
# When an uploaded_file already exists we do not want to pass its id in `file_attributes`
# otherwise it gets reuploaded by `work_actor`.
def upload_ids
work_files_titles = object.file_sets.map(&:title) if object.present? && object.file_sets.present?
work_files_titles && work_files_titles.include?(attributes[:file]) ? [] : [import_file(file_paths.first)]
end

def file_attributes
files_directory.present? && files.present? ? { files: file_paths } : {}
hash = {}
hash[:uploaded_files] = upload_ids if files_directory.present? && attributes[:file].present?
hash[:remote_files] = attributes[:remote_files] if attributes[:remote_files].present?
hash
end

def file_paths
files.map { |file_name| File.join(files_directory, file_name) }
attributes[:file].map { |file_name| File.join(files_directory, file_name) } if attributes[:file]
end

def import_file(path)
u = Hyrax::UploadedFile.new
u.user_id = User.find_by_user_key(User.batch_user_key).id if User.find_by_user_key(User.batch_user_key)
u.file = CarrierWave::SanitizedFile.new(path)
u.save
u.id
end

## TO DO: handle invalid file in CSV
## currently the importer stops if no file corresponding to a given file_name is found

# Regardless of what the MODS Parser gives us, these are the properties we are prepared to accept.
def permitted_attributes
klass.properties.keys.map(&:to_sym) + %i[id edit_users edit_groups read_groups visibility]
Expand Down
1 change: 1 addition & 0 deletions lib/importer/factory/with_associated_collection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def create_attributes
# Strip out the :collection key, and add the member_of_collection_ids,
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
def update_attributes
return super if attributes[:collection].nil?
super.except(:collection).merge(member_of_collection_ids: [collection.id])
end

Expand Down
82 changes: 41 additions & 41 deletions spec/lib/importer/factory/etd_factory_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,50 @@

RSpec.describe Importer::Factory::ETDFactory, :clean do
let(:factory) { described_class.new(attributes) }
let(:actor) { double }
before do
allow(Hyrax::CurationConcern).to receive(:actor).and_return(actor)
end
let(:files) { [] }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end

context 'with files' do
let(:factory) { described_class.new(attributes, 'tmp/files', files) }
let(:files) { ['img.png'] }
let(:coll) { create(:collection) }

context "for a new image" do
it 'calls the actor with the files' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
let(:work) { GenericWork }

context "for an existing image without files" do
let(:work) { create(:generic_work) }
let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }

it 'creates file sets' do
expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
end
# context 'with files' do
# let(:factory) { described_class.new(attributes, 'tmp/files', files) }
# let(:files) { ['img.png'] }
# let(:coll) { create(:collection) }
#
# context "for a new image" do
# it 'calls the actor with the files' do
# expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
#
# context "for an existing image without files" do
# let(:work) { create(:generic_work) }
# let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }
#
# it 'creates file sets' do
# expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
# end

context 'when a collection already exists' do
let!(:coll) { create(:collection) }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end
let(:actor) { Hyrax::CurationConcern.actor }

it 'does not create a new collection' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
Expand All @@ -58,4 +56,6 @@
end.to change(Collection, :count).by(0)
end
end

include_examples("csv_importer")
end
82 changes: 41 additions & 41 deletions spec/lib/importer/factory/image_factory_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,50 @@

RSpec.describe Importer::Factory::ImageFactory, :clean do
let(:factory) { described_class.new(attributes) }
let(:actor) { double }
before do
allow(Hyrax::CurationConcern).to receive(:actor).and_return(actor)
end
let(:files) { [] }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end

context 'with files' do
let(:factory) { described_class.new(attributes, 'tmp/files', files) }
let(:files) { ['img.png'] }
let!(:coll) { create(:collection) }

context "for a new image" do
it 'creates file sets with access controls' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
let(:work) { Image }

context "for an existing image without files" do
let(:work) { create(:generic_work) }
let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }

it 'creates file sets' do
expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
end
# context 'with files' do
# let(:factory) { described_class.new(attributes, 'tmp/files', files) }
# let(:files) { ['img.png'] }
# let!(:coll) { create(:collection) }
#
# context "for a new image" do
# it 'creates file sets with access controls' do
# expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
#
# context "for an existing image without files" do
# let(:work) { create(:generic_work) }
# let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }
#
# it 'creates file sets' do
# expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
# end

context 'when a collection already exists' do
let!(:coll) { create(:collection) }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end
let(:actor) { Hyrax::CurationConcern.actor }

it 'does not create a new collection' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
Expand All @@ -58,4 +56,6 @@
end.to change(Collection, :count).by(0)
end
end

include_examples("csv_importer")
end
57 changes: 57 additions & 0 deletions spec/support/shared_examples_for_csv_importer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
RSpec.shared_examples "csv_importer" do
context "with a file" do
let(:attributes) do
{
id: "123",
title: ["Gluten-free umami"],
file: ["world.png"]
}
end
let(:factory) { described_class.new(attributes, 'spec/fixtures/images') }

before { factory.run }

describe "#run" do
it "uploads the content of the file" do
expect(Hyrax::UploadedFile.last[:file]).to eq("world.png")
end
end

describe "when a work with the same id already exists" do
let(:new_attr) do
{
id: "123",
title: ["Squid tofu banjo"],
file: ["nypl-hydra-of-lerna.jpg"]
}
end

it "updates metadata" do
new_factory = described_class.new(new_attr, 'spec/fixtures/images')
new_factory.run
expect(work.last.title).to eq(["Squid tofu banjo"])
end
end
end

context "without a file" do
## the csv_importer still creates a Work when no file is provided.
## TO DO: handle invalid file in CSV; current behavior:
## the importer stops if no file corresponding to a given file_name is found
let(:attributes) { { id: "345", title: ["Artisan succulents"] } }
let(:factory) { described_class.new(attributes) }

before { factory.run }

it "creates a Work with supplied metadata" do
expect(work.find("345").title).to eq(["Artisan succulents"])
end

it "updates a Work with supplied metadata" do
new_attr = { id: "345", title: ["Retro humblebrag"] }
new_factory = described_class.new(new_attr)
new_factory.run
expect(work.find("345").title).to eq(["Retro humblebrag"])
end
end
end