diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 6b2e39e37..1a5e3a7b1 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -114,4 +114,7 @@ Performance/RegexpMatch: Exclude: - 'lib/importer/csv_parser.rb' - 'app/services/solr_config_uploader.rb' - - 'app/models/account.rb' \ No newline at end of file + - 'app/models/account.rb' + +ClassLength: + Max: 120 diff --git a/lib/assets/batch-upload-test/example3.tiff b/lib/assets/batch-upload-test/example3.tiff new file mode 100644 index 000000000..cbf4002dc Binary files /dev/null and b/lib/assets/batch-upload-test/example3.tiff differ diff --git a/lib/assets/batch-upload-test/sample.docx b/lib/assets/batch-upload-test/sample.docx new file mode 100644 index 000000000..f56721b91 Binary files /dev/null and b/lib/assets/batch-upload-test/sample.docx differ diff --git a/lib/assets/csv_test.csv b/lib/assets/csv_test.csv new file mode 100644 index 000000000..f1ae96556 --- /dev/null +++ b/lib/assets/csv_test.csv @@ -0,0 +1,3 @@ +id,type,title,description,resource_type,contributor,contributor,date_created,file +11111111-2465-1111-5468-000123456789,ETD,Hammock Tacos,,image,Tracy S Gertler,Cynthia V Stack,2015-03-29T22:12:12.100363+00:00,example3.tiff +22222222-0971-2222-1353-000123456789,ETD,Freegan Intelligentsia,,text,Tracy S Gertler,Cynthia V Stack,2015-03-29T22:12:12.100363+00:00,sample.docx diff --git a/lib/importer/csv_parser.rb b/lib/importer/csv_parser.rb index 74476d779..e084be044 100644 --- a/lib/importer/csv_parser.rb +++ b/lib/importer/csv_parser.rb @@ -1,5 +1,4 @@ module Importer - # rubocop:disable Metrics/ClassLength class CSVParser include Enumerable diff --git a/lib/importer/factory/image_factory.rb b/lib/importer/factory/image_factory.rb index fec9d4acc..fe0993005 100644 --- a/lib/importer/factory/image_factory.rb +++ b/lib/importer/factory/image_factory.rb @@ -3,7 +3,7 @@ module Factory class ImageFactory < ObjectFactory include WithAssociatedCollection - self.klass = GenericWork + self.klass = Image # A way to identify objects that are not Hydra minted identifiers self.system_identifier_field = :identifier diff --git a/lib/importer/factory/object_factory.rb b/lib/importer/factory/object_factory.rb index 2d7520ef4..ae7d78b41 100644 --- a/lib/importer/factory/object_factory.rb +++ b/lib/importer/factory/object_factory.rb @@ -107,16 +107,37 @@ def transform_attributes .merge(file_attributes) end - # NOTE: This approach is probably broken since the actor that handled `:files` attribute was removed: - # https://github.com/samvera/hyrax/commit/3f1b58195d4381c51fde8b9149016c5b09f0c9b4 + # Find existing file or upload new file. This assumes a Work will have unique file titles; + # could filter by URIs instead (slower). + # When an uploaded_file already exists we do not want to pass its id in `file_attributes` + # otherwise it gets reuploaded by `work_actor`. + def upload_ids + work_files_titles = object.file_sets.map(&:title) if object.present? && object.file_sets.present? + work_files_titles && work_files_titles.include?(attributes[:file]) ? [] : [import_file(file_paths.first)] + end + def file_attributes - files_directory.present? && files.present? ? { files: file_paths } : {} + hash = {} + hash[:uploaded_files] = upload_ids if files_directory.present? && attributes[:file].present? + hash[:remote_files] = attributes[:remote_files] if attributes[:remote_files].present? + hash end def file_paths - files.map { |file_name| File.join(files_directory, file_name) } + attributes[:file].map { |file_name| File.join(files_directory, file_name) } if attributes[:file] end + def import_file(path) + u = Hyrax::UploadedFile.new + u.user_id = User.find_by_user_key(User.batch_user_key).id if User.find_by_user_key(User.batch_user_key) + u.file = CarrierWave::SanitizedFile.new(path) + u.save + u.id + end + + ## TO DO: handle invalid file in CSV + ## currently the importer stops if no file corresponding to a given file_name is found + # Regardless of what the MODS Parser gives us, these are the properties we are prepared to accept. def permitted_attributes klass.properties.keys.map(&:to_sym) + %i[id edit_users edit_groups read_groups visibility] diff --git a/lib/importer/factory/with_associated_collection.rb b/lib/importer/factory/with_associated_collection.rb index e40edd98a..013fee154 100644 --- a/lib/importer/factory/with_associated_collection.rb +++ b/lib/importer/factory/with_associated_collection.rb @@ -13,6 +13,7 @@ def create_attributes # Strip out the :collection key, and add the member_of_collection_ids, # which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor def update_attributes + return super if attributes[:collection].nil? super.except(:collection).merge(member_of_collection_ids: [collection.id]) end diff --git a/spec/lib/importer/factory/etd_factory_spec.rb b/spec/lib/importer/factory/etd_factory_spec.rb index 470e31d54..b81cf4de7 100644 --- a/spec/lib/importer/factory/etd_factory_spec.rb +++ b/spec/lib/importer/factory/etd_factory_spec.rb @@ -2,52 +2,50 @@ RSpec.describe Importer::Factory::ETDFactory, :clean do let(:factory) { described_class.new(attributes) } - let(:actor) { double } - before do - allow(Hyrax::CurationConcern).to receive(:actor).and_return(actor) - end let(:files) { [] } - let(:attributes) do - { - collection: { id: coll.id }, - files: files, - identifier: ['123'], - title: ['Test image'], - read_groups: ['public'], - depositor: 'bob', - edit_users: ['bob'] - } - end - - context 'with files' do - let(:factory) { described_class.new(attributes, 'tmp/files', files) } - let(:files) { ['img.png'] } - let(:coll) { create(:collection) } - - context "for a new image" do - it 'calls the actor with the files' do - expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k| - expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png']) - end - factory.run - end - end + let(:work) { GenericWork } - context "for an existing image without files" do - let(:work) { create(:generic_work) } - let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) } - - it 'creates file sets' do - expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k| - expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png']) - end - factory.run - end - end - end + # context 'with files' do + # let(:factory) { described_class.new(attributes, 'tmp/files', files) } + # let(:files) { ['img.png'] } + # let(:coll) { create(:collection) } + # + # context "for a new image" do + # it 'calls the actor with the files' do + # expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k| + # expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png']) + # end + # factory.run + # end + # end + # + # context "for an existing image without files" do + # let(:work) { create(:generic_work) } + # let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) } + # + # it 'creates file sets' do + # expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k| + # expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png']) + # end + # factory.run + # end + # end + # end context 'when a collection already exists' do let!(:coll) { create(:collection) } + let(:attributes) do + { + collection: { id: coll.id }, + files: files, + identifier: ['123'], + title: ['Test image'], + read_groups: ['public'], + depositor: 'bob', + edit_users: ['bob'] + } + end + let(:actor) { Hyrax::CurationConcern.actor } it 'does not create a new collection' do expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k| @@ -58,4 +56,6 @@ end.to change(Collection, :count).by(0) end end + + include_examples("csv_importer") end diff --git a/spec/lib/importer/factory/image_factory_spec.rb b/spec/lib/importer/factory/image_factory_spec.rb index a7d1e388e..cc50a388f 100644 --- a/spec/lib/importer/factory/image_factory_spec.rb +++ b/spec/lib/importer/factory/image_factory_spec.rb @@ -2,52 +2,50 @@ RSpec.describe Importer::Factory::ImageFactory, :clean do let(:factory) { described_class.new(attributes) } - let(:actor) { double } - before do - allow(Hyrax::CurationConcern).to receive(:actor).and_return(actor) - end let(:files) { [] } - let(:attributes) do - { - collection: { id: coll.id }, - files: files, - identifier: ['123'], - title: ['Test image'], - read_groups: ['public'], - depositor: 'bob', - edit_users: ['bob'] - } - end - - context 'with files' do - let(:factory) { described_class.new(attributes, 'tmp/files', files) } - let(:files) { ['img.png'] } - let!(:coll) { create(:collection) } - - context "for a new image" do - it 'creates file sets with access controls' do - expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k| - expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png']) - end - factory.run - end - end + let(:work) { Image } - context "for an existing image without files" do - let(:work) { create(:generic_work) } - let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) } - - it 'creates file sets' do - expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k| - expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png']) - end - factory.run - end - end - end + # context 'with files' do + # let(:factory) { described_class.new(attributes, 'tmp/files', files) } + # let(:files) { ['img.png'] } + # let!(:coll) { create(:collection) } + # + # context "for a new image" do + # it 'creates file sets with access controls' do + # expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k| + # expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png']) + # end + # factory.run + # end + # end + # + # context "for an existing image without files" do + # let(:work) { create(:generic_work) } + # let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) } + # + # it 'creates file sets' do + # expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k| + # expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png']) + # end + # factory.run + # end + # end + # end context 'when a collection already exists' do let!(:coll) { create(:collection) } + let(:attributes) do + { + collection: { id: coll.id }, + files: files, + identifier: ['123'], + title: ['Test image'], + read_groups: ['public'], + depositor: 'bob', + edit_users: ['bob'] + } + end + let(:actor) { Hyrax::CurationConcern.actor } it 'does not create a new collection' do expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k| @@ -58,4 +56,6 @@ end.to change(Collection, :count).by(0) end end + + include_examples("csv_importer") end diff --git a/spec/support/shared_examples_for_csv_importer_spec.rb b/spec/support/shared_examples_for_csv_importer_spec.rb new file mode 100644 index 000000000..b4e15a7b4 --- /dev/null +++ b/spec/support/shared_examples_for_csv_importer_spec.rb @@ -0,0 +1,57 @@ +RSpec.shared_examples "csv_importer" do + context "with a file" do + let(:attributes) do + { + id: "123", + title: ["Gluten-free umami"], + file: ["world.png"] + } + end + let(:factory) { described_class.new(attributes, 'spec/fixtures/images') } + + before { factory.run } + + describe "#run" do + it "uploads the content of the file" do + expect(Hyrax::UploadedFile.last[:file]).to eq("world.png") + end + end + + describe "when a work with the same id already exists" do + let(:new_attr) do + { + id: "123", + title: ["Squid tofu banjo"], + file: ["nypl-hydra-of-lerna.jpg"] + } + end + + it "updates metadata" do + new_factory = described_class.new(new_attr, 'spec/fixtures/images') + new_factory.run + expect(work.last.title).to eq(["Squid tofu banjo"]) + end + end + end + + context "without a file" do + ## the csv_importer still creates a Work when no file is provided. + ## TO DO: handle invalid file in CSV; current behavior: + ## the importer stops if no file corresponding to a given file_name is found + let(:attributes) { { id: "345", title: ["Artisan succulents"] } } + let(:factory) { described_class.new(attributes) } + + before { factory.run } + + it "creates a Work with supplied metadata" do + expect(work.find("345").title).to eq(["Artisan succulents"]) + end + + it "updates a Work with supplied metadata" do + new_attr = { id: "345", title: ["Retro humblebrag"] } + new_factory = described_class.new(new_attr) + new_factory.run + expect(work.find("345").title).to eq(["Retro humblebrag"]) + end + end +end