diff --git a/README.md b/README.md index 44f2a52..b604102 100644 --- a/README.md +++ b/README.md @@ -1,81 +1,139 @@ ## Introduction -This module contains plugins and some example migrations to import data from a Fedora 3 Islandora instance -into an Islandora CLAW instance. - -This is a base setup, it requires adjustments to the default Repository Object and configuration changes -for your setup. - -## Required changes -The default Repository Object provided with Islandora CLAW requires one additional field to allow for -these migrations (or you can comment out these field migrations). - -1. A large text field called `field_mods_text`, this will store the MODS datastream from the source object. - -This is defined in the `config/install/migrate_plus.migration.islandora_basic_image.yml` and -can be commented out there. - -## Example usage -To use this migration, clone this repo into your Drupal 8 instance `modules/contrib` directory. - -DO NOT INSTALL THE MODULE YET!!! - -You will need to edit the 3 `migrate_plus.migration.islandora_basic_image*` files in the `config/install` directory. - -At a minimum you'll need to set: -1. `solr_base_url: http://10.0.2.2:9080/solr` to your Solr instance -1. `fedora_base_url: &fedora_base_url http://10.0.2.2:9080/fedora` to your Fedora, please leave the `&fedora_base_url` -this is a placeholder and saves re-typing this value in other locations. -1. The `username` and `password` in the block +This module contains plugins to import data from a Fedora 3 Islandora instance +into an Islandora CLAW instance. It also contains a feature as a submodule +that contains some example migrations. The example migrations are based on forms from vanilla Islandora 7.x solution +packs, and are meant to work with the fields defined in `islandora_demo`. If you customized your MODS forms, then you +will also need to customize the example migration and `islandora_demo`. + +Currently, the following content models can be migrated over with full functionality: + +- Collection +- Basic Image +- Large Image +- Audio +- Video +- PDF +- Binary + +If you want some sample Basic Image objects with metadata made from stock forms, check out [this zip +file](docs/examples/sample_objects.zip) that you can use with `islandora_zip_batch_importer`. All the images were +obtained from [Pexels](https://www.pexels.com/) and are free to use for personal or business purposes, with the +original photographers attributed in the MODS. + +## Installation + +Download this module, its feature, and its dependencies with composer + +``` +composer require islandora/migrate_7x_claw +``` + +Install the module and example migrations at the same time using drush + +``` +drush en islandora_migrate_7x_claw_feature +``` + +## Configuration + +By default, the migrations are configured to work with an `islandora_vagrant` instance running on the same host as a +`claw-playbook` instance, which is convienent for development and testing. But for your Islandora 7.x instance, the +following config will need to be set the same way on the source plugin of each migration (except for the +"7.x Tags Migration from CSV" migration): + +- `solr_base_url` should point to your Islandora 7.x Solr instance (i.e. `http://example.org:8080/solr`) +- `fedora_base_url` should point to your Fedora 3 instance (i.e. `http://example.org:8080/fedora`) +- The `username` and `password` for your Fedora 3 instance in the block ``` - authentication: &fedora_auth plugin: basic username: fedoraAdmin password: fedoraAdmin ``` +- `q` is used to define a Solr query that selects which objects get migrated. From a fresh clone, the +migrations are configured to look for `islandora:sp_basic_image_collection` and all its children with the following query: + ``` + RELS_EXT_isMemberOfCollection_uri_ms:"info:fedora/islandora:sp_basic_image_collection" OR PID:"islandora:sp_basic_image_collection" + ``` +You can easily import a collection of your own by changing the PID in the above query, or you can provide your own +query to migrate over objects in other ways (such as per content model, in order by date created, etc...). If you can write a Solr select query for it, you can migrate it into CLAW. Omitting `q` from configuration will default to `*:*` +for the Solr query. + +Once you've updated the configuration, you need to re-import the feature to load your changes. You can do this with `drush`: +``` +drush -y fim islandora_migrate_7x_claw_feature +``` + +You can also use the UI to import the feature if you go to `admin/config/development/features` and click on the `Changed` link next to "Migrate 7x Claw Feature". + +![Changed Link](docs/images/feature_click_changed.png) -You may also need (or want) to alter the content model field name in Solr. -`content_model_field: RELS_EXT_hasModel_uri_ms` -and the content model to migrate. -`content_model: islandora:sp_basic_image` +From there, you can select all changes and clicking "Import Changes" -These changes need to be made in all 3 migration configuration files. +![Import Changes](docs/images/feature_import_changes.png) -Now you can install the `migrate_7x_claw` module. +## Running the migrations -If you have installed the `migrate_ui` module you can review the process in the `Admin -> Structure -> Migrations`. +You can quickly run all migrations using `drush`: +``` +drush -y mim --group islandora_7x +``` -You can then see. -![List of Migrations](docs/images/migrations.jpg) +If you want to go through the UI, you can visit `admin/structure/migrate` to see a list of migration groups. The migrations provided by this module have the machine name `islandora_7x`. -If you click **List Migrations** you will see 3 migrations. +![Migrations Groups](docs/images/migrate_groups.png) -![Migration](docs/images/migrate1.jpg) +You will see 8 migrations. _The "7.x Tags Migration from CSV" needs to be run first_. -The _Basic Image Objects OBJ Media_ migration requires the other two be completed first, if you try to run this one it -will run the other two first. +![Migrations](docs/images/migrations.png) -Clicking **Execute** on the _Basic Image Objects_ displays a page like. +Clicking **Execute** on "7.x Tags Migration from CSV" migration displays a page like -![Migration Execute](docs/images/migrate2.jpg) +![Execute Migration](docs/images/execute_migration.png) -The operations you can run are -* **Import** - import the objects +The operations you can run for a migration are +* **Import** - import un-migrated objects (check the "Update" checkbox to re-run previously migrated objects) * **Rollback** - delete all the objects (if any) previously imported * **Stop** - stop a long running import. * **Reset** - reset an import that might have failed. -With _Import_ selected press **Execute**. +If you select "Import", and then click "Execute", it will run the migration. It should process 5 items. + +Then you can run the "Islandora Media" migration, which depends on the remaining migrations. Running it effectively +runs the entire group of migrations other than the "7.x Tags Migration from CSV" migration. After they're all done, +you should be able to navigate to the home page of your CLAW instance and see your content brought over from +Islandora 7.x! -When complete, you should see something like below (your number will be different). +![Content in CLAW](docs/images/content_in_claw.png) -![Migration result](docs/images/migrate_result1.jpg) +If you click on any node you should see all its metadata, which has been extracted from its MODS and Solr documents. +Here's the original object in Islandora 7.x: -Once you have completed all 3 +![Free Smells in 7x](docs/images/free_smells_in_7x.png) + +And here it is in Islandora CLAW: + +![Free Smells in CLAW](docs/images/free_smells_in_claw.png) + +Clicking on the Media tab will reveal all of the datastreams migrated over from 7.x, which you can now manage through CLAW. Here's the original datastreams in Islandora 7.x: + +![Free Smells Datastreams](docs/images/free_smells_datastreams.png) + +And here they are in Islandora CLAW as Media: + +![Free Smells Media](docs/images/free_smells_media.png) + +You can also check out the collection itself, which should have its "Members" block populated: + +![Collection in CLAW](docs/images/collection_in_claw.png) ## How this migration works -To allow for the magic Danny content modelling overhaul. +You provide a query, as `q` in the source plugin configuration, that defines which objects get migrated. For each +result in the query, you can choose to use either the Solr doc for an object, the FOXML file for an object, or +a particular datastream for an object by setting the `url_type` configuration. The migrations for subjects, geographics, and agents all target the MODS file of an object. The migration for datastreams uses FOXML, and the migration for the objects themselves use the Solr doc. + +All datastreams are migrated over as-is, regardless of what data is extracted by the migrations and applied as fields. + +Collection hierarchy is preserved so long as all the collections are in the `q` query results. -1. The migration searches Solr for all of the content models specified. -1. Each is migrated to a new node in Drupal. -Then it creates a file for the OBJ datastream -of each of these objects. Lastly it creates a media object that links the file to the node. \ No newline at end of file +Subject, geographic, and person/corporate agents from MODS all get transformed into taxonomy terms, and content +is tagged with these terms. diff --git a/docs/examples/sample_objects.zip b/docs/examples/sample_objects.zip new file mode 100644 index 0000000..1ae6835 Binary files /dev/null and b/docs/examples/sample_objects.zip differ diff --git a/docs/images/collection_in_claw.png b/docs/images/collection_in_claw.png new file mode 100644 index 0000000..c419f0b Binary files /dev/null and b/docs/images/collection_in_claw.png differ diff --git a/docs/images/content_in_claw.png b/docs/images/content_in_claw.png new file mode 100644 index 0000000..3b7713d Binary files /dev/null and b/docs/images/content_in_claw.png differ diff --git a/docs/images/execute_migration.png b/docs/images/execute_migration.png new file mode 100644 index 0000000..ccfb968 Binary files /dev/null and b/docs/images/execute_migration.png differ diff --git a/docs/images/feature_click_changed.png b/docs/images/feature_click_changed.png new file mode 100644 index 0000000..c6dc87b Binary files /dev/null and b/docs/images/feature_click_changed.png differ diff --git a/docs/images/feature_import_changes.png b/docs/images/feature_import_changes.png new file mode 100644 index 0000000..4d6a38a Binary files /dev/null and b/docs/images/feature_import_changes.png differ diff --git a/docs/images/free_smells_datastreams.png b/docs/images/free_smells_datastreams.png new file mode 100644 index 0000000..df52472 Binary files /dev/null and b/docs/images/free_smells_datastreams.png differ diff --git a/docs/images/free_smells_in_7x.png b/docs/images/free_smells_in_7x.png new file mode 100644 index 0000000..0fa9e00 Binary files /dev/null and b/docs/images/free_smells_in_7x.png differ diff --git a/docs/images/free_smells_in_claw.png b/docs/images/free_smells_in_claw.png new file mode 100644 index 0000000..3614c20 Binary files /dev/null and b/docs/images/free_smells_in_claw.png differ diff --git a/docs/images/free_smells_media.png b/docs/images/free_smells_media.png new file mode 100644 index 0000000..22a74de Binary files /dev/null and b/docs/images/free_smells_media.png differ diff --git a/docs/images/list_all_migration.png b/docs/images/list_all_migration.png new file mode 100644 index 0000000..eebb37e Binary files /dev/null and b/docs/images/list_all_migration.png differ diff --git a/docs/images/migrate1.jpg b/docs/images/migrate1.jpg deleted file mode 100644 index 2523f09..0000000 Binary files a/docs/images/migrate1.jpg and /dev/null differ diff --git a/docs/images/migrate2.jpg b/docs/images/migrate2.jpg deleted file mode 100644 index 5dfdcd5..0000000 Binary files a/docs/images/migrate2.jpg and /dev/null differ diff --git a/docs/images/migrate_groups.png b/docs/images/migrate_groups.png new file mode 100644 index 0000000..236d309 Binary files /dev/null and b/docs/images/migrate_groups.png differ diff --git a/docs/images/migrate_result1.jpg b/docs/images/migrate_result1.jpg deleted file mode 100644 index d793147..0000000 Binary files a/docs/images/migrate_result1.jpg and /dev/null differ diff --git a/docs/images/migrations.jpg b/docs/images/migrations.jpg deleted file mode 100644 index 7eb0f72..0000000 Binary files a/docs/images/migrations.jpg and /dev/null differ diff --git a/docs/images/migrations.png b/docs/images/migrations.png new file mode 100644 index 0000000..76bced8 Binary files /dev/null and b/docs/images/migrations.png differ diff --git a/migrate/tags.csv b/migrate/tags.csv new file mode 100644 index 0000000..73b2f12 --- /dev/null +++ b/migrate/tags.csv @@ -0,0 +1,7 @@ +vid,name,description,external_uri +islandora_media_use,"RELS-EXT File","A RELS-EXT file from an Islandora 7.x instance",http://islandora.ca/ontology/relsext +islandora_media_use,"Dublin Core File","Dublin Core Elements 1.1",http://purl.org/dc/elements/1.1 +islandora_media_use,"MODS File","Metadata Object Description Schema",http://www.loc.gov/mods/v3 +islandora_media_use,"FITS File","Technical metadata generated by FITS",http://hul.harvard.edu/ois/xml/ns/fits/fits_output +islandora_media_use,"Audit Trail","Audit trail generated by Fedora 3",http://islandora.ca/audit-trail +islandora_media_use,"Collection Policy","Islandora 7.x Collection Policy File",http://islandora.ca/collection-policy diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_7x_tags.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_7x_tags.yml new file mode 100644 index 0000000..a3cc204 --- /dev/null +++ b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_7x_tags.yml @@ -0,0 +1,31 @@ +langcode: en +status: true +dependencies: + enforced: + module: + - islandora_migrate_7x_claw_feature +id: islandora_7x_tags +class: null +field_plugin_method: null +cck_plugin_method: null +migration_tags: islandora_tags +migration_group: islandora_7x +label: '7.x Tags Migration from CSV' +source: + plugin: csv + path: modules/contrib/migrate_7x_claw/migrate/tags.csv + header_row_count: 1 + keys: + - external_uri +process: + name: name + vid: vid + description: description + field_external_uri: + plugin: urlencode + source: external_uri +destination: + plugin: 'entity:taxonomy_term' +migration_dependencies: + required: { } + optional: { } diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_audit_file.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_audit_file.yml new file mode 100644 index 0000000..7f05efa --- /dev/null +++ b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_audit_file.yml @@ -0,0 +1,85 @@ +langcode: en +status: true +dependencies: + enforced: + module: + - migrate_7x_claw + - migrate_plus + - islandora +id: islandora_audit_file +class: null +field_plugin_method: null +cck_plugin_method: null +migration_tags: null +migration_group: islandora_7x +label: 'AUDIT File' +source: + plugin: islandora + solr_base_url: 'http://97.107.189.65:8080/solr' + q: 'RELS_EXT_isMemberOfCollection_uri_ms:"info:fedora/islandora:sp_basic_image_collection" OR PID:"islandora:sp_basic_image_collection"' + fedora_base_url: 'http://97.107.189.65:8080/fedora' + data_fetcher_plugin: http + authentication: + plugin: basic + username: fedoraAdmin + password: fedoraAdmin + data_parser_plugin: authenticated_xml + item_selector: '/foxml:digitalObject' + constants: + destination_directory: 'fedora://masters' + mimetype: application/xml + extension: xml + dsid: AUDIT + creator_uid: 1 + fields: + - + name: PID + label: PID + selector: '@PID' + - + name: audit_ds + label: Audit Datastream + selector: 'foxml:datastream[@ID = "AUDIT"]/foxml:datastreamVersion/foxml:xmlContent/*' + ids: + PID: + type: string +process: + digital_id: + - + plugin: concat + delimiter: _ + source: + - PID + - constants/dsid + - + plugin: str_replace + search: ':' + replace: _ + filemime: constants/mimetype + uid: constants/creator_uid + filename: + plugin: concat + delimiter: . + source: + - '@digital_id' + - constants/extension + destination: + plugin: concat + delimiter: / + source: + - constants/destination_directory + - '@filename' + uri: + - + plugin: flatten + source: + - '@destination' + - audit_ds + - + plugin: file_blob +destination: + plugin: 'entity:file' + default_bundle: file +migration_dependencies: + required: { } + optional: { } diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_audit_media.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_audit_media.yml new file mode 100644 index 0000000..78b4a2b --- /dev/null +++ b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_audit_media.yml @@ -0,0 +1,87 @@ +langcode: en +status: true +dependencies: + enforced: + module: + - migrate_7x_claw + - migrate_plus + - islandora +id: islandora_audit_media +class: null +field_plugin_method: null +cck_plugin_method: null +migration_tags: null +migration_group: islandora_7x +label: 'AUDIT Media' +source: + plugin: islandora + solr_base_url: 'http://97.107.189.65:8080/solr' + fedora_base_url: 'http://97.107.189.65:8080/fedora' + data_fetcher_plugin: http + authentication: + plugin: basic + username: fedoraAdmin + password: fedoraAdmin + content_model_field: RELS_EXT_hasModel_uri_ms + content_model: 'islandora:sp_basic_image' + data_parser_plugin: authenticated_xml + item_selector: '/foxml:digitalObject' + constants: + destination_directory: 'fedora://masters' + mimetype: application/xml + extension: xml + dsid: AUDIT + fedora_base_url: 'http://97.107.189.65:8080/fedora' + creator_uid: 1 + audit_url: http://islandora.ca/audit-trail + fields: + - + name: PID + label: PID + selector: '@PID' + ids: + PID: + type: string +process: + digital_id: + - + plugin: concat + delimiter: _ + source: + - PID + - constants/dsid + - + plugin: str_replace + search: ':' + replace: _ + name: + plugin: concat + delimiter: . + source: + - '@digital_id' + - constants/extension + field_media_use: + plugin: migration_lookup + migration: islandora_7x_tags + source: constants/audit_url + no_stub: true + field_media_file: + plugin: migration_lookup + migration: islandora_audit_file + source: PID + no_stub: true + field_media_of: + plugin: migration_lookup + migration: islandora_objects + source: PID + no_stub: true + uid: constants/creator_uid +destination: + plugin: 'entity:media' + default_bundle: file +migration_dependencies: + required: + - migrate_plus.migration.islandora_objects + - migrate_plus.migration.islandora_audit_file + - migrate_plus.migration.islandora_7x_tags + optional: { } diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_basic_image.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_basic_image.yml deleted file mode 100644 index 282097c..0000000 --- a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_basic_image.yml +++ /dev/null @@ -1,109 +0,0 @@ -langcode: en -status: true -dependencies: - enforced: - module: - - migrate_7x_claw - - migrate_plus - - islandora -id: islandora_basic_image -class: null -field_plugin_method: null -cck_plugin_method: null -migration_tags: null -migration_group: islandora_7x -label: 'Basic Image Objects' -source: - plugin: islandora - solr_base_url: 'http://10.0.2.2:18080/solr' - fedora_base_url: 'http://10.0.2.2:18080/fedora' - data_fetcher_plugin: http - authentication: - plugin: basic - username: fedoraAdmin - password: fedoraAdmin - content_model_field: RELS_EXT_hasModel_uri_ms - content_model: 'islandora:sp_basic_image' - data_parser_plugin: authenticated_xml - item_selector: '/foxml:digitalObject' - constants: - creator_uid: 1 - image: Image - fields: - - - name: PID - label: PID - selector: '@PID' - - - name: created - label: 'Created Date' - selector: 'foxml:objectProperties/foxml:property[@NAME="info:fedora/fedora-system:def/model#createdDate"]/@VALUE' - - - name: lastmodified - label: 'Last Modified Date' - selector: 'foxml:objectProperties/foxml:property[@NAME="info:fedora/fedora-system:def/view#lastModifiedDate"]/@VALUE' - - - name: label - label: 'Object label' - selector: 'foxml:objectProperties/foxml:property[@NAME="info:fedora/fedora-system:def/model#label"]/@VALUE' - - - name: member_of_collection - label: 'Member of Collections' - selector: 'foxml:datastream[@ID = "RELS-EXT" and @CONTROL_GROUP = "X" ]/foxml:datastreamVersion[position() = last()]/foxml:xmlContent/rdf:RDF/rdf:Description/fedora:isMemberOfCollection/@rdf:resource' - - - name: member_of - label: 'Member Of' - selector: 'foxml:datastream[@ID = "RELS-EXT" and @CONTROL_GROUP = "X" ]/foxml:datastreamVersion[position() = last()]/foxml:xmlContent/rdf:RDF/rdf:Description/fedora:isMemberOf/@rdf:resource' - ids: - PID: - type: string -process: - title: label - type: - plugin: default_value - default_value: islandora_object - uid: constants/creator_uid - created: - plugin: format_date - from_format: 'Y-m-d\TH:i:s.u\Z' - to_format: U - source: created - settings: - validate_format: false - updated: - plugin: format_date - from_format: 'Y-m-d\TH:i:s.u\Z' - to_format: U - source: lastmodified - settings: - validate_format: false - field_member_of: - - - plugin: skip_on_empty - method: process - source: member_of - - - plugin: substr - source: member_of - start: 11 - field_member_of_collection: - - - plugin: skip_on_empty - method: process - source: member_of_collection - - - plugin: substr - source: member_of_collection - start: 11 - field_tags: - plugin: entity_lookup - source: constants/image - value_key: name - bundle_key: vid - bundle: tags - entity_type: taxonomy_term - ignore_case: true -destination: - plugin: 'entity:node' - default_bundle: islandora_object -migration_dependencies: null diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_corporate.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_corporate.yml new file mode 100644 index 0000000..ad90755 --- /dev/null +++ b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_corporate.yml @@ -0,0 +1,46 @@ +langcode: en +status: true +dependencies: + enforced: + module: + - migrate_7x_claw + - migrate_plus + - islandora +id: islandora_corporate +class: null +field_plugin_method: null +cck_plugin_method: null +migration_tags: null +migration_group: islandora_7x +label: 'Islandora Corporate' +source: + plugin: islandora + solr_base_url: 'http://97.107.189.65:8080/solr' + q: 'fedora_datastreams_ms:MODS' + row_type: MODS + fedora_base_url: 'http://97.107.189.65:8080/fedora' + data_fetcher_plugin: http + authentication: + plugin: basic + username: fedoraAdmin + password: fedoraAdmin + data_parser_plugin: authenticated_xml + item_selector: '/mods:mods/mods:name[@type = "corporate"]' + constants: + creator_uid: 1 + fields: + - + name: name + label: Name + selector: 'mods:namePart' + ids: + name: + type: string +process: + name: name +destination: + plugin: 'entity:taxonomy_term' + default_bundle: corporate_body +migration_dependencies: + required: { } + optional: { } diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_basic_image_files.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_files.yml similarity index 77% rename from modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_basic_image_files.yml rename to modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_files.yml index 05d9219..41484ae 100644 --- a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_basic_image_files.yml +++ b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_files.yml @@ -6,17 +6,18 @@ dependencies: - migrate_7x_claw - migrate_plus - islandora -id: islandora_basic_image_files +id: islandora_files class: null field_plugin_method: null cck_plugin_method: null migration_tags: null migration_group: islandora_7x -label: 'Basic Image Objects OBJ' +label: 'Islandora Files' source: plugin: islandora - solr_base_url: 'http://10.0.2.2:18080/solr' - fedora_base_url: 'http://10.0.2.2:18080/fedora' + solr_base_url: 'http://97.107.189.65:8080/solr' + q: 'RELS_EXT_isMemberOfCollection_uri_ms:"info:fedora/islandora:sp_basic_image_collection" OR PID:"islandora:sp_basic_image_collection"' + fedora_base_url: 'http://97.107.189.65:8080/fedora' islandora_type: datastreams datastream_solr_field: fedora_datastreams_ms data_fetcher_plugin: http @@ -24,14 +25,12 @@ source: plugin: basic username: fedoraAdmin password: fedoraAdmin - content_model_field: RELS_EXT_hasModel_uri_ms - content_model: 'islandora:sp_basic_image' data_parser_plugin: tuque_datastreams item_selector: '/foxml:digitalObject' constants: - destination_directory: 'fedora://masters' + fedora_base_url: 'http://97.107.189.65:8080/fedora' + destination_directory: 'fedora://' extension: jpg - fedora_base_url: 'http://10.0.2.2:18080/fedora' objects_string: objects datastreams_string: datastreams content_string: content @@ -79,12 +78,20 @@ process: text/xml: xml application/xml: xml application/rdf+xml: xml + application/pdf: pdf image/jpeg: jpg image/tiff: tiff image/tif: tiff image/jpg: jpg image/png: png image/gif: gif + image/jp2: jp2 + audio/mpeg: mp3 + audio/wav: wav + audio/aac: aac + video/mp4: mp4 + video/x-matroska: mkv + text/plain: txt filename: plugin: concat delimiter: . @@ -111,4 +118,6 @@ process: destination: plugin: 'entity:file' default_bundle: image -migration_dependencies: null +migration_dependencies: + required: { } + optional: { } diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_geographic.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_geographic.yml new file mode 100644 index 0000000..307e818 --- /dev/null +++ b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_geographic.yml @@ -0,0 +1,46 @@ +langcode: en +status: true +dependencies: + enforced: + module: + - migrate_7x_claw + - migrate_plus + - islandora +id: islandora_geographic +class: null +field_plugin_method: null +cck_plugin_method: null +migration_tags: null +migration_group: islandora_7x +label: 'Islandora Geographic' +source: + plugin: islandora + solr_base_url: 'http://97.107.189.65:8080/solr' + q: 'fedora_datastreams_ms:MODS' + row_type: MODS + fedora_base_url: 'http://97.107.189.65:8080/fedora' + data_fetcher_plugin: http + authentication: + plugin: basic + username: fedoraAdmin + password: fedoraAdmin + data_parser_plugin: authenticated_xml + item_selector: '/mods:mods/mods:subject/mods:geographic[node()] | /mods:mods/mods:subject/mods:hierarchicalGeographic/*[node()] | /mods:mods/mods:originInfo/mods:place/mods:placeTerm[node()]' + constants: + creator_uid: 1 + fields: + - + name: name + label: Name + selector: '.' + ids: + name: + type: string +process: + name: name +destination: + plugin: 'entity:taxonomy_term' + default_bundle: geo_location +migration_dependencies: + required: { } + optional: { } diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_basic_image_media.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_media.yml similarity index 50% rename from modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_basic_image_media.yml rename to modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_media.yml index 1cd59bb..ffba397 100644 --- a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_basic_image_media.yml +++ b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_media.yml @@ -6,17 +6,18 @@ dependencies: - migrate_7x_claw - migrate_plus - islandora -id: islandora_basic_image_media +id: islandora_media class: null field_plugin_method: null cck_plugin_method: null migration_tags: null migration_group: islandora_7x -label: 'Basic Image Objects OBJ Media' +label: 'Islandora Media' source: plugin: islandora - solr_base_url: 'http://10.0.2.2:18080/solr' - fedora_base_url: 'http://10.0.2.2:18080/fedora' + solr_base_url: 'http://97.107.189.65:8080/solr' + q: 'RELS_EXT_isMemberOfCollection_uri_ms:"info:fedora/islandora:sp_basic_image_collection" OR PID:"islandora:sp_basic_image_collection"' + fedora_base_url: 'http://97.107.189.65:8080/fedora' islandora_type: datastreams datastream_solr_field: fedora_datastreams_ms data_fetcher_plugin: http @@ -24,13 +25,9 @@ source: plugin: basic username: fedoraAdmin password: fedoraAdmin - content_model_field: RELS_EXT_hasModel_uri_ms - content_model: 'islandora:sp_basic_image' data_parser_plugin: tuque_datastreams item_selector: '/foxml:digitalObject' constants: - preservation_master: 'Preservation Master' - other_tag: Datastream creator_uid: 1 fields: - @@ -72,12 +69,22 @@ process: source: filesize field_media_image/target_id: plugin: migration_lookup - migration: islandora_basic_image_files + migration: islandora_files source: PID_DSID no_stub: true field_media_file/target_id: plugin: migration_lookup - migration: islandora_basic_image_files + migration: islandora_files + source: PID_DSID + no_stub: true + field_media_audio_file/target_id: + plugin: migration_lookup + migration: islandora_files + source: PID_DSID + no_stub: true + field_media_video_file/target_id: + plugin: migration_lookup + migration: islandora_files source: PID_DSID no_stub: true field_media_image/display: @@ -86,29 +93,58 @@ process: field_media_file/display: plugin: default_value default_value: 1 + field_media_audio_file/display: + plugin: default_value + default_value: 1 + field_media_video_file/display: + plugin: default_value + default_value: 1 field_media_image/description: plugin: default_value default_value: '' field_media_file/description: plugin: default_value default_value: '' + field_media_audio_file/description: + plugin: default_value + default_value: '' + field_media_video_file/description: + plugin: default_value + default_value: '' field_media_of: plugin: migration_lookup - migration: islandora_basic_image + migration: islandora_objects source: PID no_stub: true - field_tags: + field_media_use: - plugin: static_map source: DSID map: - OBJ: 'Preservation Master' + OBJ: http://pcdm.org/use#OriginalFile + PDFA: http://pcdm.org/use#PreservationMasterFile + OCR: http://pcdm.org/use#ExtractedText + HOCR: http://pcdm.org/use#ExtractedText + FULL_TEXT: http://pcdm.org/use#ExtractedText + TN: http://pcdm.org/use#ThumbnailImage + JPG: http://pcdm.org/use#ServiceFile + MEDIUM_SIZE: http://pcdm.org/use#ServiceFile + PREVIEW: http://pcdm.org/use#ServiceFile + MP4: http://pcdm.org/use#ServiceFile + JP2: http://pcdm.org/use#IntermediateFile + MKV: http://pcdm.org/use#IntermediateFile + RELS-EXT: http://islandora.ca/ontology/relsext + DC: http://purl.org/dc/elements/1.1 + MODS: http://www.loc.gov/mods/v3 + TECHMD: http://hul.harvard.edu/ois/xml/ns/fits/fits_output + PROXY_MP3: http://pcdm.org/use#ServiceFile + COLLECTION_POLICY: http://islandora.ca/collection-policy default_value: Datastream - plugin: entity_lookup - value_key: name + value_key: field_external_uri bundle_key: vid - bundle: tags + bundle: islandora_media_use entity_type: taxonomy_term ignore_case: true bundle: @@ -116,15 +152,20 @@ process: source: mimetype map: image/jpeg: image - image/tiff: image - image/tif: image image/jpg: image image/png: image image/gif: image + audio/mpeg: audio + audio/wav: audio + audio/aac: audio + video/mp4: video default_value: file destination: plugin: 'entity:media' migration_dependencies: required: - - islandora_basic_image_files - - islandora_basic_image + - migrate_plus.migration.islandora_files + - migrate_plus.migration.islandora_objects + - migrate_plus.migration.islandora_7x_tags + - migrate_plus.migration.islandora_audit_media + optional: { } diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_objects.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_objects.yml new file mode 100644 index 0000000..c0aff68 --- /dev/null +++ b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_objects.yml @@ -0,0 +1,315 @@ +langcode: en +status: true +dependencies: + enforced: + module: + - migrate_7x_claw + - migrate_plus + - islandora +id: islandora_objects +class: null +field_plugin_method: null +cck_plugin_method: null +migration_tags: null +migration_group: islandora_7x +label: 'Islandora Objects' +source: + plugin: islandora + solr_base_url: 'http://97.107.189.65:8080/solr' + q: 'RELS_EXT_isMemberOfCollection_uri_ms:"info:fedora/islandora:sp_basic_image_collection" OR PID:"islandora:sp_basic_image_collection"' + row_type: solr + fedora_base_url: 'http://97.107.189.65:8080/fedora' + data_fetcher_plugin: http + authentication: + plugin: basic + username: fedoraAdmin + password: fedoraAdmin + data_parser_plugin: json_list + item_selector: '/response/docs' + constants: + creator_uid: 1 + fields: + - + name: PID + label: PID + selector: 'PID' + - + name: model + label: 'Content Model' + selector: 'RELS_EXT_hasModel_uri_s' + - + name: created + label: 'Created Date' + selector: 'fgs_createdDate_dt' + - + name: lastmodified + label: 'Last Modified Date' + selector: 'fgs_lastModifiedDate_dt' + - + name: title + label: 'Title' + selector: 'fgs_label_s' + - + name: subtitle + label: 'Subtitle' + selector: 'mods_titleInfo_subTitle_s' + - + name: resource_type + label: 'Type of Resource' + selector: 'mods_typeOfResource_s' + - + name: identifier + label: 'Identifier' + selector: 'mods_identifier_local_s' + - + name: description + label: 'Description' + selector: 'mods_abstract_s' + - + name: extent + label: 'Physical Extent' + selector: 'mods_physicalDescription_extent_s' + - + name: member_of + label: 'Member Of' + selector: 'RELS_EXT_isMemberOfCollection_uri_ms' + - + name: person_agents + label: "Person agents" + selector: 'mods_name_personal_namePart_ms' + - + name: person_roles + label: "Person roles" + selector: 'mods_name_personal_role_roleTerm_text_ms' + - + name: corporate_agents + label: "Corporate agents" + selector: 'mods_name_corporate_namePart_ms' + - + name: corporate_roles + label: "Corporate roles" + selector: 'mods_name_corporate_role_roleTerm_text_ms' + - + name: topic + label: "Topics" + selector: 'mods_subject_topic_ms' + - + name: temporal + label: "Temporal" + selector: 'mods_subject_temporal_ms' + - + name: geographic + label: "Geographic" + selector: 'mods_subject_geographic_ms' + - + name: continent + label: "Continent" + selector: 'mods_subject_hierarchicalGeographic_continent_ms' + - + name: country + label: "Country" + selector: 'mods_subject_hierarchicalGeographic_country_ms' + - + name: province + label: "Province" + selector: 'mods_subject_hierarchicalGeographic_province_ms' + - + name: region + label: "Region" + selector: 'mods_subject_hierarchicalGeographic_region_ms' + - + name: county + label: "County" + selector: 'mods_subject_hierarchicalGeographic_county_ms' + - + name: city + label: "City" + selector: 'mods_subject_hierarchicalGeographic_city_ms' + - + name: city_section + label: "City Section" + selector: 'mods_subject_hierarchicalGeographic_citySection_ms' + ids: + PID: + type: string +process: + title: title + field_alternative_title: subtitle + field_identifier: identifier + field_pid: PID + field_description: description + field_extent: extent + type: + plugin: default_value + default_value: islandora_object + uid: constants/creator_uid + created: + plugin: format_date + from_format: 'Y-m-d\TH:i:s.u\Z' + to_format: U + source: created + settings: + validate_format: false + updated: + plugin: format_date + from_format: 'Y-m-d\TH:i:s.u\Z' + to_format: U + source: lastmodified + settings: + validate_format: false + field_member_of: + - + plugin: skip_on_empty + method: process + source: member_of + - + plugin: substr + start: 12 + - + plugin: migration_lookup + migration: islandora_objects + no_stub: true + field_model: + - + plugin: skip_on_empty + method: process + source: model + - + plugin: static_map + map: + info:fedora/islandora:collectionCModel: http://purl.org/dc/dcmitype/Collection + info:fedora/islandora:sp_basic_image: http://purl.org/coar/resource_type/c_c513 + info:fedora/islandora:sp-audioCModel: http://purl.org/coar/resource_type/c_18cc + info:fedora/islandora:sp_videoCModel: http://purl.org/coar/resource_type/c_12ce + default_value: http://purl.org/coar/resource_type/c_1843 + - + plugin: entity_lookup + value_key: field_external_uri + bundle_key: vid + bundle: islandora_models + entity_type: taxonomy_term + ignore_case: true + field_resource_type: + - + plugin: skip_on_empty + method: process + source: resource_type + - + plugin: static_map + map: + 'still image': http://purl.org/dc/dcmitype/StillImage + 'text': http://purl.org/dc/dcmitype/Text + 'cartographic': http://purl.org/dc/dcmitype/Image + 'notated music': http://purl.org/dc/dcmitype/Text + 'sound recording': http://purl.org/dc/dcmitype/Sound + 'sound recording-musical': http://purl.org/dc/dcmitype/Sound + 'sound recording-nonmusical': http://purl.org/dc/dcmitype/Sound + 'moving image': http://purl.org/dc/dcmitype/MovingImage + 'three dimensional object': http://purl.org/dc/dcmitype/Image + 'software, multimedia': http://purl.org/dc/dcmitype/Software + # Need to figure out what to do wtih 'mixed material' + # There is no obvious mapping at https://www.loc.gov/standards/mods/mods-dcsimple.html + - + plugin: entity_lookup + value_key: field_external_uri + bundle_key: vid + bundle: resource_types + entity_type: taxonomy_term + ignore_case: true + temp_person: + - + plugin: migration_lookup + migration: islandora_person + source: person_agents + no_stub: true + - + plugin: typed_relation + role_source: person_roles + temp_corporate: + - + plugin: migration_lookup + migration: islandora_corporate + source: corporate_agents + no_stub: true + - + plugin: typed_relation + role_source: corporate_roles + field_linked_agent: + plugin: merge_ignore_empty + source: + - '@temp_person' + - '@temp_corporate' + temp_topic: + plugin: migration_lookup + migration: islandora_subject + source: topic + no_stub: true + temp_temporal: + plugin: migration_lookup + migration: islandora_subject + source: temporal + no_stub: true + temp_geographic: + plugin: migration_lookup + migration: islandora_geographic + source: geographic + no_stub: true + temp_continent: + plugin: migration_lookup + migration: islandora_geographic + source: continent + no_stub: true + temp_country: + plugin: migration_lookup + migration: islandora_geographic + source: country + no_stub: true + temp_province: + plugin: migration_lookup + migration: islandora_geographic + source: province + no_stub: true + temp_region: + plugin: migration_lookup + migration: islandora_geographic + source: region + no_stub: true + temp_county: + plugin: migration_lookup + migration: islandora_geographic + source: county + no_stub: true + temp_city: + plugin: migration_lookup + migration: islandora_geographic + source: city + no_stub: true + temp_city_section: + plugin: migration_lookup + migration: islandora_geographic + source: city_section + no_stub: true + field_subject: + plugin: merge_ignore_empty + source: + - '@temp_topic' + - '@temp_temporal' + - '@temp_geographic' + - '@temp_continent' + - '@temp_country' + - '@temp_province' + - '@temp_region' + - '@temp_county' + - '@temp_city' + - '@temp_city_section' +destination: + plugin: 'entity:node' + default_bundle: islandora_object +migration_dependencies: + required: + - migrate_plus.migration.islandora_person + - migrate_plus.migration.islandora_corporate + - migrate_plus.migration.islandora_subject + - migrate_plus.migration.islandora_geographic + - migrate_plus.migration.islandora_7x_tags + optional: { } diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_person.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_person.yml new file mode 100644 index 0000000..221a238 --- /dev/null +++ b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_person.yml @@ -0,0 +1,46 @@ +langcode: en +status: true +dependencies: + enforced: + module: + - migrate_7x_claw + - migrate_plus + - islandora +id: islandora_person +class: null +field_plugin_method: null +cck_plugin_method: null +migration_tags: null +migration_group: islandora_7x +label: 'Islandora Person' +source: + plugin: islandora + solr_base_url: 'http://97.107.189.65:8080/solr' + q: 'fedora_datastreams_ms:MODS' + row_type: MODS + fedora_base_url: 'http://97.107.189.65:8080/fedora' + data_fetcher_plugin: http + authentication: + plugin: basic + username: fedoraAdmin + password: fedoraAdmin + data_parser_plugin: authenticated_xml + item_selector: '/mods:mods/mods:name[@type = "personal"]' + constants: + creator_uid: 1 + fields: + - + name: name + label: Name + selector: 'mods:namePart' + ids: + name: + type: string +process: + name: name +destination: + plugin: 'entity:taxonomy_term' + default_bundle: person +migration_dependencies: + required: { } + optional: { } diff --git a/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_subject.yml b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_subject.yml new file mode 100644 index 0000000..6208f4a --- /dev/null +++ b/modules/islandora_migrate_7x_claw_feature/config/install/migrate_plus.migration.islandora_subject.yml @@ -0,0 +1,46 @@ +langcode: en +status: true +dependencies: + enforced: + module: + - migrate_7x_claw + - migrate_plus + - islandora +id: islandora_subject +class: null +field_plugin_method: null +cck_plugin_method: null +migration_tags: null +migration_group: islandora_7x +label: 'Islandora Subject' +source: + plugin: islandora + solr_base_url: 'http://97.107.189.65:8080/solr' + q: 'fedora_datastreams_ms:MODS' + row_type: MODS + fedora_base_url: 'http://97.107.189.65:8080/fedora' + data_fetcher_plugin: http + authentication: + plugin: basic + username: fedoraAdmin + password: fedoraAdmin + data_parser_plugin: authenticated_xml + item_selector: '/mods:mods/mods:subject/mods:temporal[node()] | /mods:mods/mods:subject/mods:topic[node()]' + constants: + creator_uid: 1 + fields: + - + name: name + label: Name + selector: '.' + ids: + name: + type: string +process: + name: name +destination: + plugin: 'entity:taxonomy_term' + default_bundle: subject +migration_dependencies: + required: { } + optional: { } diff --git a/modules/islandora_migrate_7x_claw_feature/islandora_migrate_7x_claw_feature.info.yml b/modules/islandora_migrate_7x_claw_feature/islandora_migrate_7x_claw_feature.info.yml index a009c48..ab3b7b5 100644 --- a/modules/islandora_migrate_7x_claw_feature/islandora_migrate_7x_claw_feature.info.yml +++ b/modules/islandora_migrate_7x_claw_feature/islandora_migrate_7x_claw_feature.info.yml @@ -4,5 +4,6 @@ type: module core: 8.x dependencies: - migrate_plus + - migrate_7x_claw version: 8.x-1.x package: Islandora diff --git a/src/Plugin/migrate/process/FedoraDatastream.php b/src/Plugin/migrate/process/FedoraDatastream.php index e40edf7..b3bf6aa 100644 --- a/src/Plugin/migrate/process/FedoraDatastream.php +++ b/src/Plugin/migrate/process/FedoraDatastream.php @@ -95,6 +95,7 @@ public static function create(ContainerInterface $container, array $configuratio */ public function transform($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) { if (!is_null($value) && $value instanceof \SimpleXMLElement) { +echo ("I MADE IT!"); foreach ($value->attributes() as $key => $attribute) { if (strtolower($key) == 'size') { $size = (int) $attribute; diff --git a/src/Plugin/migrate/process/MergeIgnoreEmpty.php b/src/Plugin/migrate/process/MergeIgnoreEmpty.php new file mode 100644 index 0000000..f993306 --- /dev/null +++ b/src/Plugin/migrate/process/MergeIgnoreEmpty.php @@ -0,0 +1,40 @@ + 1, 'rel_type' => 'relators:pbl'], + * ['target_id' => 2, 'rel_type' => 'relators:ctb'], + * ] + * @endcode + * + * @see \Drupal\migrate\Plugin\MigrateProcessInterface + * + * @MigrateProcessPlugin( + * id = "typed_relation", + * handle_multiples = TRUE + * * ) + */ +class TypedRelation extends ProcessPluginBase { + + /** + * {@inheritdoc} + */ + public function transform($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) { + // No-op is empty array. + if (empty($value)) { + return []; + } + + // We're expecting an array of ids. + if (!is_array($value)) { + throw new MigrateException("Process input is not an array"); + } + + // Make sure there's a role array, and that it lines up with the ids. + if (isset($this->configuration['role_source'])) { + $roles = $row->getSourceProperty($this->configuration['role_source']); + if (count($roles) != count($value)) { + throw new MigrateException("Input and roles arrays must be parallel"); + } + } + else { + throw new MigrateException("Required configuration: role_source"); + } + + // Build an array for a Typed Relation field. + $out = []; + for ($i = 0; $i < count($value); ++$i) { + $out[] = ['target_id' => $value[$i], 'rel_type' => $roles[$i]]; + } + + return $out; + } +} diff --git a/src/Plugin/migrate/source/Islandora.php b/src/Plugin/migrate/source/Islandora.php index d48affe..fff1ca3 100644 --- a/src/Plugin/migrate/source/Islandora.php +++ b/src/Plugin/migrate/source/Islandora.php @@ -17,32 +17,25 @@ class Islandora extends SourcePluginExtension { /** - * The content model to restrict this search to. - * - * @var string - */ - private $contentModel; - - /** - * The Solr field to use for content model matching. + * The base URL of the Fedora repo. * * @var string */ - private $contentModelField; + private $fedoraBase; /** - * The base URL of the Fedora repo. + * The base URL for the Solr instance. * * @var string */ - private $fedoraBase; + private $solrBase; /** - * The base URL for the Solr instance. + * Solr query string. * * @var string */ - private $solrBase; + private $q; /** * The number of batches to run for this source. @@ -72,6 +65,11 @@ class Islandora extends SourcePluginExtension { */ private $count; + /** + * Type of URL to generate a list of. E.g. foxml, solr, MODS. + */ + private $row_type; + /** * Internal client for Solr queries. * @@ -136,11 +134,6 @@ public function __construct(array $configuration, $plugin_id, $plugin_definition throw new MigrateException("Islandora source plugin requires a \"solr_base_url\" be defined."); } $this->solrBase = rtrim($configuration['solr_base_url'], '/'); - if (!isset($configuration['content_model']) || !isset($configuration['content_model_field'])) { - throw new MigrateException("Islandora source plugin requires a \"content_model_field\" and \"content_model\" be defined."); - } - $this->contentModel = $configuration['content_model']; - $this->contentModelField = $configuration['content_model_field']; if (isset($configuration['batch_size'])) { if (is_int($this->configuration['batch_size']) && ($this->configuration['batch_size']) > 0) { $this->batchSize = $this->configuration['batch_size']; @@ -166,6 +159,16 @@ public function __construct(array $configuration, $plugin_id, $plugin_definition $this->datastreamSolrField = $configuration['datastream_solr_field']; } $this->httpClient = \Drupal::httpClient(); + + $this->q = "*:*"; + if (isset($configuration['q']) && !empty($configuration['q'])) { + $this->q = $configuration['q']; + } + + $this->row_type = 'foxml'; + if (isset($configuration['row_type']) && !empty($configuration['row_type'])) { + $this->row_type = $configuration['row_type']; + } } /** @@ -178,7 +181,15 @@ protected function initializeIterator() { $start = $this->batchCounter * $this->batchSize; $pids = $this->getPids($start); $current_batch = array_map(function ($i) { - return "{$this->fedoraBase}/objects/{$i}/objectXML"; + if ($this->row_type == 'solr') { + return "{$this->solrBase}/select?q=PID%3A\"" . urlencode($i) . "\"&wt=json"; + } + else if ($this->row_type != 'foxml') { + return "{$this->fedoraBase}/objects/{$i}/datastreams/{$this->row_type}/content"; + } + else { + return "{$this->fedoraBase}/objects/{$i}/objectXML"; + } }, $pids); $this->configuration['urls'] = $current_batch; $this->getDataParserPlugin()->updateUrls($current_batch); @@ -313,15 +324,10 @@ private function getQuery($start = 0, $rows = 200) { $params = []; $params['rows'] = $rows; $params['start'] = $start; - if (isset($this->datastreamSolrField)) { - $params['fl'] = 'PID,' . $this->datastreamSolrField; - } - else { - $params['fl'] = 'PID'; - } + $params['fl'] = 'PID'; + $params['q'] = $this->q; $params['wt'] = 'json'; - $params['q'] = "{$this->contentModelField}:(\"{$this->contentModel}\" OR \"info:fedora/{$this->contentModel}\")"; - $params['sort'] = 'PID+asc'; + $params['sort'] = 'PID+desc'; return $this->solrBase . "/select?" . build_query($params, FALSE); } diff --git a/src/Plugin/migrate_plus/data_parser/AuthenticatedXml.php b/src/Plugin/migrate_plus/data_parser/AuthenticatedXml.php index 00f97e4..775dc0a 100644 --- a/src/Plugin/migrate_plus/data_parser/AuthenticatedXml.php +++ b/src/Plugin/migrate_plus/data_parser/AuthenticatedXml.php @@ -2,7 +2,7 @@ namespace Drupal\migrate_7x_claw\Plugin\migrate_plus\data_parser; -use Drupal\migrate_plus\Plugin\migrate_plus\data_parser\Xml; +use Drupal\migrate_plus\Plugin\migrate_plus\data_parser\SimpleXml; /** * Obtain XML data for migration using the XMLReader pull parser. @@ -12,7 +12,7 @@ * title = @Translation("Authenticated XML") * ) */ -class AuthenticatedXml extends Xml { +class AuthenticatedXml extends SimpleXml { /** * Update the configuration for the dataparserplugin. @@ -33,29 +33,31 @@ public function updateUrls($urls) { /** * {@inheritdoc} */ - protected function openSourceUrl($url) { - // (Re)open the provided URL. - $this->reader->close(); - - // Clear XML error buffer. Other Drupal code that executed during the - // migration may have polluted the error buffer and could create false - // positives in our error check below. We are only concerned with errors - // that occur from attempting to load the XML string into an object here. - libxml_clear_errors(); - - if (is_null($url)) { - // No URL means no source. - return FALSE; + protected function fetchNextRow() { + $target_element = array_shift($this->matches); + // If we've found the desired element, populate the currentItem and + // currentId with its data. + if ($target_element !== FALSE && !is_null($target_element)) { + foreach ($this->fieldSelectors() as $field_name => $xpath) { + foreach ($target_element->xpath($xpath) as $value) { + if ($value->children() && !trim((string) $value)) { + $this->currentItem[$field_name] = $value; + } + elseif (!trim((string) $value)){ + $this->currentItem[$field_name][] = $value->asXML(); + } + else { + $this->currentItem[$field_name][] = (string) $value; + } + } + } + // Reduce single-value results to scalars. + foreach ($this->currentItem as $field_name => $values) { + if (count($values) == 1) { + $this->currentItem[$field_name] = reset($values); + } + } } - - // Get the XML using the data fetcher to allow us to access URLs requiring - // authentication. - $xml = $this->getDataFetcherPlugin() - ->getResponseContent($url) - ->getContents(); - - return $this->reader->XML($xml, NULL, \LIBXML_NOWARNING); - } /** diff --git a/src/Plugin/migrate_plus/data_parser/JsonList.php b/src/Plugin/migrate_plus/data_parser/JsonList.php new file mode 100644 index 0000000..42126ca --- /dev/null +++ b/src/Plugin/migrate_plus/data_parser/JsonList.php @@ -0,0 +1,45 @@ +urls = $urls; + } + + /** + * {@inheritdoc} + * + * Islandora Source can provide 0 urls, we need to exit or it throws an + * error. + */ + protected function nextSource() { + if (count($this->urls) == 0) { + return FALSE; + } + return parent::nextSource(); + } +}