Skip to content

Commit

Permalink
wip, proof-of-concept for fetching mods and inserting link-back into …
Browse files Browse the repository at this point in the history
…<abstract>
  • Loading branch information
Jason Peak committed Aug 2, 2017
1 parent 826ee6e commit 6613f56
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 7 deletions.
13 changes: 6 additions & 7 deletions extras/samples/lsu_tulane_oai.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,27 @@ oai_endpoint = "https://digitallibrary.tulane.edu/oai2"
;set_spec = tulane_p16313coll73
set_spec = tulane_p16313coll75
;set_spec = tulane_p16313coll64
metadata_prefix = oai_dc
metadata_prefix = mods
;date-ranging
;from = 2005-09-13
;until = 2005-09-14
temp_directory = "/tmp/oaitest_temp"
temp_directory = "tulane/tmp"

[METADATA_PARSER]
class = dc\OaiToDc

[FILE_GETTER]
class = OaipmhIslandoraObj
datastream_ids[] = "TN"
temp_directory = "/tmp/oaitest_temp"
temp_directory = "tulane/tmp"


[WRITER]
class = LsuOaipmh
output_directory = "/tmp/oaitest_output"
class = OAIAggregator
output_directory = "tulane/out"
;had to fix the path on this one...
;postwritehooks[] = "/usr/bin/php extras/scripts/postwritehooks/oai_dc_to_mods.php"


[MANIPULATORS]
;fetchermanipulator = "RandomSet|10"
;fetchermanipulator = "SpecificSet|kora_specific_set.txt"
Expand All @@ -47,4 +46,4 @@ output_directory = "/tmp/oaitest_output"

[LOGGING]
; Full path to log file for mik log files
path_to_log = "/tmp/oaitest_output/mik.log"
path_to_log = "tulane/out/mik.log"
72 changes: 72 additions & 0 deletions src/writers/OAIAggregator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<?php
namespace mik\writers;

use GuzzleHttp\Client;
use mik\exceptions\MikErrorException;
use Monolog\Logger;

class OAIAggregator extends Oaipmh
{

public function __construct($settings) {

$this->temp_directory = $settings['FILE_GETTER']['temp_directory'];
parent::__construct($settings);
}

public function writePackages($metadata, $pages, $record_id) {
$this->record_id = $record_id;
parent::writePackages($metadata, $pages, $record_id);
}

public function getTempFile($path) {
$id = explode(DIRECTORY_SEPARATOR, $path);
return array_pop($id);
}

public function parseIdentifierForUrl($temp_file) {
$contents = file_get_contents($temp_file);
$xml = new \SimpleXMLElement($contents);

$id = $xml->header->identifier;
$parts = explode(":", $id);
$protocol = $parts[1];
$rawPid = $parts[3];
$pid = str_replace('_', ':', $rawPid);
$url = $protocol . ":" . $parts[2] . "islandora/object" . '/' . $pid;
//oai:http://digitallibrary.tulane.edu/:tulane_70
return $url;
}

public function writeMetadataFile($metadata, $path, $overwrite = true)
{
// var_dump($this->getTempFile($this->record_id)); die();
//$temp_file = $this->temp_directory . DIRECTORY_SEPARATOR . $this->getTempFile($path);
$temp_file = $this->temp_directory . DIRECTORY_SEPARATOR . $this->record_id . '.metadata';
$url = $this->parseIdentifierForUrl($temp_file);
$mods_ns = 'http://www.loc.gov/mods/v3';
$doc = new \DomDocument('1.0');
$doc->loadXML($metadata);
$doc->formatOutput = true;
$root = $doc->getElementsByTagNameNS($mods_ns, 'mods')->item(0);

//grab existing element abstract which has tulane:item
$oldAbstract = $doc->getElementsByTagNameNS($mods_ns, 'abstract')->item(0);
$abstractText = sprintf("(Original record: %s) %s", $url, $oldAbstract->textContent);

//new path added as element to root then
$newAbstract = $doc->createElement('abstract', $abstractText);
$root->replaceChild($newAbstract, $oldAbstract);
// $root->appendChild($newAbstract);

$metadata = $doc->saveXML();

if ($path !='') {
$fileCreationStatus = file_put_contents($path, $metadata);
if ($fileCreationStatus === false) {
$this->log->addWarning("There was a problem writing the metadata to a file",
array('file' => $path));
}
}
}
}

0 comments on commit 6613f56

Please sign in to comment.