Skip to content

Commit

Permalink
Updated ICL finding aid
Browse files Browse the repository at this point in the history
  • Loading branch information
BobHanson committed Nov 21, 2024
1 parent f362b59 commit efc0cfb
Show file tree
Hide file tree
Showing 8 changed files with 14,433 additions and 4,027 deletions.
4,702 changes: 2,914 additions & 1,788 deletions docs/examples/v5-icl-repository-DOI-crawl/10.14469_hpc_10386/IFD.findingaid.json

Large diffs are not rendered by default.

10,034 changes: 9,628 additions & 406 deletions docs/examples/v5-icl-repository-DOI-crawl/10.14469_hpc_10386/crawler.log

Large diffs are not rendered by default.

3,659 changes: 1,847 additions & 1,812 deletions docs/examples/v5-icl-repository-DOI-crawl/10.14469_hpc_10386/ifd-fileURLMap.txt

Large diffs are not rendered by default.

39 changes: 30 additions & 9 deletions src/main/java/com/integratedgraphics/extractor/DOICrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -361,20 +361,20 @@ protected void processEndElement(String localName) {
Map<String, String> attrs;
switch (localName) {
case "description":
if (s.length() > 0 && !customizeText("description", s)) {
if (s.length() > 0 && !crawler.customizeText("description", s)) {
crawler.addAttr(IFDConst.IFD_PROPERTY_DESCRIPTION, s);
}
break;
case "title":
if (s.length() > 0) {
if (!customizeText("title", s)) {
if (!crawler.customizeText("title", s)) {
crawler.addAttr(IFDConst.IFD_PROPERTY_LABEL, s);
}
}
break;
case "subject":
attrs = getAttributes(true);
addSubjects(attrs, s);
addSubject(attrs, s);
break;
case "relatedidentifier":
if (s.length() > 0) {
Expand All @@ -392,7 +392,7 @@ protected void processEndElement(String localName) {
crawler.xmlDepth--;
}

private void addSubjects(Map<String, String> attrs, String s) {
private void addSubject(Map<String, String> attrs, String s) {
// <subjects>
// <subject
// schemeURI="http://iupac.org/ifd"
Expand All @@ -401,7 +401,9 @@ private void addSubjects(Map<String, String> attrs, String s) {
// </subjects>
//
String key = attrs.get("subjectscheme");
if (key != null) {
if (key == null) {
crawler.customizeText("subject", s);
} else {
switch (key) {
case FAIRDATA_SUBJECT_SCHEME:
key = attrs.get("valueuri");
Expand All @@ -426,9 +428,6 @@ private String customizeKey(String key) {
return (customizer == null ? key : customizer.customizeKey(key));
}

private boolean customizeText(String key, String val) {
return (customizer != null && customizer.customizeText(key, val));
}

}

Expand Down Expand Up @@ -630,6 +629,24 @@ private boolean ignoreURL(String url) {
return false;
}

protected boolean customizeText(String key, String val) {
switch (key) {
case "subject":
switch (val) {
// ccdc subject
case "Crystal Structure":
setDataObjectType("xrd");
return false;
}
break;
case "References":
if (val.indexOf("/ccdc.") >= 0)
return true;
}
return (customizer != null && customizer.customizeText(key, val));
}


private void popURLStack(String currentPath) {
pidPath = currentPath;
urlStack.pop();
Expand Down Expand Up @@ -766,8 +783,12 @@ private void addRelatedIdentifier(Map<String, String> attrs, String s) {
break;
case "References":
// only interested in JournalArticle
if (generalType == null)
if (generalType == null) {
if (customizeText("References", s)) {
break; // treat as "HasPart"
}
return;
}
switch (generalType) {
case "JournalArticle":
if ("DOI".equals(type))
Expand Down
19 changes: 11 additions & 8 deletions src/main/java/com/integratedgraphics/extractor/ICLDOICrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ public class ICLDOICrawler extends DOICrawler {
public static class ICLCustomizer implements DOICustomizer {

private static String[] ignoreURLs = new String[] {
"10.14469/hpc/14300",
"10.14469/HPC/11652",
"https://data.hpc.imperial.ac.uk/resolve/?doi=11597&file=1",
"https://data.hpc.imperial.ac.uk/resolve/?doi=11597&file=2"
// "10.14469/hpc/14300",
// "10.14469/HPC/11652",
// "https://data.hpc.imperial.ac.uk/resolve/?doi=11597&file=1",
// "https://data.hpc.imperial.ac.uk/resolve/?doi=11597&file=2"
};

private static Map<String, String> hackMap = new HashMap<>();
Expand Down Expand Up @@ -82,6 +82,13 @@ public boolean customizeText(String key, String val) {
if (val.length() < 3)
return false;
switch (key) {
case "subject":
switch (val) {
case "Crystal Structure":
crawler.setDataObjectType("xrd");
break;
}
break;
case "description":
break;
case "title":
Expand Down Expand Up @@ -116,8 +123,4 @@ public static void main(String[] args) {
crawler.setCustomizer(new ICLCustomizer(crawler));
crawler.crawl();
}




}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ public FAIRSpecDataObject() {
public static FAIRSpecDataObject createFAIRSpecObject(String key) {
// backward compatibility:
//
if (key == null)
key = "unknown";
String type = key.substring(key.lastIndexOf(".") + 1);
String ucType = type.toUpperCase();
String className = FAIRSpecDataObject.class.getName();
Expand Down
1 change: 0 additions & 1 deletion src/main/java/org/iupac/fairdata/core/IFDCollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ protected void serializeList(IFDSerializerI serializer) {
get(i).setID("" + (i + 1));
}
}

serializer.addCollection(byid ? "itemsByID" : "items", this, byid);
}
if (haveCommonClass) {
Expand Down
4 changes: 1 addition & 3 deletions src/main/java/org/iupac/fairdata/core/IFDReference.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ public String getURL() {

public void setURL(String url) {
this.url = url;
System.out.println("IFDREF " + index + " " + localName + " " + url);
}

private String doi;
Expand Down Expand Up @@ -77,7 +76,6 @@ public IFDReference() {
*/
public IFDReference(String resourceID, Object originPath, String localDir, String localName) {
this.index = ++test;
System.out.println("IFDREF. " + index + " " + localName);
this.resourceID = resourceID;
this.originPath = originPath;
this.localDir = localDir;
Expand Down Expand Up @@ -126,7 +124,7 @@ public void serialize(IFDSerializerI serializer) {
if (originPath != null && !originPath.equals(doi) && !originPath.equals(url))
serializer.addAttr("originPath", originPath.toString());
if (localName != null) {
if (localDir == null) {
if (url != null || doi != null || localDir == null) {
serializer.addAttr("localName", localName);
} else {
serializer.addAttr("localPath", getLocalPath());
Expand Down

0 comments on commit efc0cfb

Please sign in to comment.