From 3a8259538571b2b020ac09729adbaf3b3a08c189 Mon Sep 17 00:00:00 2001 From: dexagod Date: Tue, 3 Aug 2021 12:14:21 +0200 Subject: [PATCH] Add compatibility with related specs. Create dummy relations for next links --- Readme.md | 86 ++++++++++++-------- src/lib/metadataExtraction.ts | 135 ++++++++++++++++++++++++------- src/util/NameSpaces.ts | 2 + src/util/Util.ts | 69 +++++++++++----- tests/MetadataExtraction.test.js | 19 ++++- 5 files changed, 228 insertions(+), 83 deletions(-) diff --git a/Readme.md b/Readme.md index 922c01c..9585607 100644 --- a/Readme.md +++ b/Readme.md @@ -39,43 +39,59 @@ for (const relationId of metadata.relations.keys()) { In this section, the extracted fields per class are listed. In the case an Object is returned, it can have arbitrary fields that are not listed below. -| Object | Metadata field | type | -|-----------------------|---------------------|---------------| -| Collection | @id | string -| | @type | [ string ] -| | view | [ URI ] # References a Node object -| | member | [ URI ] -| | shape | [ object ] -| | import | [ URI ] -| | importStream | [ URI ] -| | conditionalImport | [ ConditionalImport ] -| Node | @id | string -| | @type | [ string ] -| | search | [ object ] -| | relation | [ URI ] # References a Relation object -| | import | [ object ] -| | importStream | [ object ] -| | conditionalImport | [ ConditionalImport ] -| Relation | @id | string -| | @type | [ string ] -| | remainingItems | [ Literal ] -| | path | [ object ] -| | value | [ object ] -| | node | [ object ] -| | import | [ URI ] -| | importStream | [ URI ] -| | conditionalImport | [ ConditionalImport ] -| ConditionalImport | path | [ object ] -| | import | [ URI ] -| | importStream | [ URI ] -| Literal | @value | [ string ] -| | @type | [ string ] -| | @language | [ string ] -| URI | @id | string -| object | @id | string -| | ... | [ any ] +| Object | Metadata field | type | referenced object type | +|-----------------------|---------------------|-----------------------|------------------------| +| Collection | @id | string | | +| | @type | [ string ] | | +| | view | [ URI ] | Node | +| | member | [ URI ] | Member | +| | shape | [ Shape ] | | +| | totalItems | [ Literal ] | | +| | import | [ URI ] | | +| | importStream | [ URI ] | | +| | conditionalImport | [ ConditionalImport ] | | +| | ... | [ any ] | | +| Node | @id | string | | +| | @type | [ string ] | | +| | relation | [ URI ] | Relation | +| | search | [ object ] | | +| | retentionPolicy | [ RetentionPolicy ] | | +| | import | [ object ] | | +| | importStream | [ object ] | | +| | conditionalImport | [ ConditionalImport ] | | +| | ... | [ any ] | | +| Relation | @id | string | | +| | @type | [ string ] | | +| | remainingItems | [ Literal ] | | +| | path | [ object ] | shacl:propertyPath | +| | value | [ object ] | | +| | node | [ URI ] | Node | +| | import | [ URI ] | | +| | importStream | [ URI ] | | +| | conditionalImport | [ ConditionalImport ] | | +| Member | ... | [ any ] | | +| Shape | ... | [ any ] | shacl:NodeShape / shex:Shape | +| IriTemplate | ... | [ any ] | hydra:IriTemplate | +| ConditionalImport | path | [ object ] | shacl:propertyPath | +| | import | [ URI ] | | +| | importStream | [ URI ] | | +| RetentionPolicy | @type | [ string ] | | +| | amount | [ Literal ] | | +| | versionKey | [ object ] | | +| | path | [ object ] | shacl:propertyPath | +| | value | [ object ] | | +| Literal | @value | [ string ] | | +| | @type | [ string ] | | +| | @language | [ string ] | | +| URI | @id | string | | +| object | @id | string | | +| | ... | [ any ] | | +``` +Note: Blank node URIs can be returned as a URI. +We expect any additional data used in combination with the extracted metadata to be retrieved from the same rdfjs data factory. +``` ### Examples diff --git a/src/lib/metadataExtraction.ts b/src/lib/metadataExtraction.ts index 5a73edf..1f6de6d 100644 --- a/src/lib/metadataExtraction.ts +++ b/src/lib/metadataExtraction.ts @@ -1,7 +1,7 @@ import * as RDF from 'rdf-js' import * as N3 from 'n3' import ns from '../util/NameSpaces' -import { RelationType, Literal, Collection, Node, Relation, ConditionalImport } from '../util/Util'; +import { RelationType, Literal, Collection, Node, Relation, ConditionalImport, RetentionPolicy } from '../util/Util'; const context = { "@vocab": ns.tree('') } @@ -18,12 +18,16 @@ export async function extractMetadata (quads: RDF.Quad[]) { const relationsMetadata = new Map(); for (let id of collectionIds) { - const metadata = await extractCollectionMetadata(store, id) + const metadata = await extractCollectionData(store, id) collectionsMetadata.set(id, metadata) } + for (let id of nodeIds) { - const metadata = await extractNodeMetadata(store, id) - nodesMetadata.set(id, metadata) + const metadata = await extractNodeData(store, id) + nodesMetadata.set(id, metadata.node) + + // Set the dummy relations generated for the hydra:next and as:next predicates + metadata.relations.map(relation => relationsMetadata.set(relation["@id"], relation)) } for (let id of relationIds) { const metadata = await extractRelationMetadata(store, id) @@ -39,16 +43,27 @@ export async function extractMetadata (quads: RDF.Quad[]) { */ function extractCollectionids(store: N3.Store) { let ids: string[] = [] - // Search for collection ids + // Search for collection ids on type ids = ids.concat( store.getQuads(null, ns.rdf('type'), ns.tree('Collection'), null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.rdf('type'), ns.ldes('EventStream'), null).map(quad => quad.subject.id) ); ids = ids.concat( store.getQuads(null, ns.rdf('type'), ns.hydra('Collection'), null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.rdf('type'), ns.as('Collection'), null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.rdf('type'), ns.as('OrderedCollection'), null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.rdf('type'), ns.dct('Collection'), null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.rdf('type'), ns.ldp('Container'), null).map(quad => quad.subject.id) ); + + // Search for collection ids on view ids = ids.concat( store.getQuads(null, ns.tree('view'), null, null).map(quad => quad.subject.id) ); ids = ids.concat( store.getQuads(null, ns.hydra('view'), null, null).map(quad => quad.subject.id) ); ids = ids.concat( store.getQuads(null, ns.void('subset'), null, null).map(quad => quad.subject.id) ); - // Match on dct:isPartOf property -> collection id is object here + // reverse view properties ids = ids.concat( store.getQuads(null, ns.dct('isPartOf'), null, null).map(quad => quad.object.id) ); + ids = ids.concat( store.getQuads(null, ns.as('partOf'), null, null).map(quad => quad.object.id) ); + + // Search for collection ids on members ids = ids.concat( store.getQuads(null, ns.tree('member'), null, null).map(quad => quad.subject.id) ); ids = ids.concat( store.getQuads(null, ns.hydra('member'), null, null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.ldp('contains'), null, null).map(quad => quad.subject.id) ); return Array.from(new Set(ids)) } @@ -58,11 +73,28 @@ function extractCollectionids(store: N3.Store) { */ function extractNodeIds(store: N3.Store) { let ids: string[] = [] - // Search for node ids + + // Search for node ids on type ids = ids.concat( store.getQuads(null, ns.rdf('type'), ns.tree('Node'), null).map(quad => quad.subject.id) ); ids = ids.concat( store.getQuads(null, ns.rdf('type'), ns.hydra('PartialCollectionView'), null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.rdf('as'), ns.hydra('CollectionPage'), null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.rdf('as'), ns.hydra('OrderedCollectionPage'), null).map(quad => quad.subject.id) ); + + // Searching on view causes nodes to be displayed that still need to be retrieved form another page + // // Search for node ids on view + // ids = ids.concat( store.getQuads(null, ns.tree('view'), null, null).map(quad => quad.object.id) ); + // ids = ids.concat( store.getQuads(null, ns.hydra('view'), null, null).map(quad => quad.object.id) ); + // ids = ids.concat( store.getQuads(null, ns.void('subset'), null, null).map(quad => quad.object.id) ); + // // reverse view properties + // ids = ids.concat( store.getQuads(null, ns.dct('isPartOf'), null, null).map(quad => quad.subject.id) ); + // ids = ids.concat( store.getQuads(null, ns.as('partOf'), null, null).map(quad => quad.subject.id) ); + + // Search for node ids on their properties ids = ids.concat( store.getQuads(null, ns.tree('search'), null, null).map(quad => quad.subject.id) ); ids = ids.concat( store.getQuads(null, ns.tree('relation'), null, null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.ldes('retentionPolicy'), null, null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.hydra('next'), null, null).map(quad => quad.subject.id) ); + ids = ids.concat( store.getQuads(null, ns.as('next'), null, null).map(quad => quad.subject.id) ); return Array.from(new Set(ids)) } @@ -78,7 +110,7 @@ function extractRelationIds(store: N3.Store) { return Array.from(new Set(ids)) } -function extractCollectionMetadata(store: N3.Store, id: string) { +function extractCollectionData(store: N3.Store, id: string) { const c : Collection = { "@context": context, "@id": id, @@ -88,25 +120,37 @@ function extractCollectionMetadata(store: N3.Store, id: string) { setField(c, "@type", store.getQuads(id, ns.rdf('type'), null, null).map(quad => quad.object.id)); // Extract view ids - setField(c, "view", store.getQuads(id, ns.tree('view'), null, null).map(quad => retrieveBaseObject(store, quad.object))); - setField(c, "view", store.getQuads(id, ns.hydra('view'), null, null).map(quad => retrieveBaseObject(store, quad.object))); - setField(c, "view", store.getQuads(id, ns.void('subset'), null, null).map(quad => retrieveBaseObject(store, quad.object))); - setField(c, "view", store.getQuads(null, ns.dct('isPartOf'), id, null).map(quad => retrieveBaseObject(store, quad.subject))); + setField(c, "view", store.getQuads(id, ns.tree('view'), null, null).map(quad => retrieveTerm(store, quad.object))); + setField(c, "view", store.getQuads(id, ns.hydra('view'), null, null).map(quad => retrieveTerm(store, quad.object))); + setField(c, "view", store.getQuads(id, ns.void('subset'), null, null).map(quad => retrieveTerm(store, quad.object))); + // reverse properties + setField(c, "view", store.getQuads(null, ns.dct('isPartOf'), id, null).map(quad => retrieveTerm(store, quad.subject))); + setField(c, "view", store.getQuads(null, ns.as('partOf'), id, null).map(quad => retrieveTerm(store, quad.subject))); + // Extract member ids - setField(c, "member", store.getQuads(id, ns.tree('member'), null, null).map(quad => retrieveBaseObject(store, quad.object))); - setField(c, "member", store.getQuads(id, ns.hydra('member'), null, null).map(quad => retrieveBaseObject(store, quad.object))); - + setField(c, "member", store.getQuads(id, ns.tree('member'), null, null).map(quad => retrieveTerm(store, quad.object))); + setField(c, "member", store.getQuads(id, ns.hydra('member'), null, null).map(quad => retrieveTerm(store, quad.object))); + setField(c, "member", store.getQuads(id, ns.as('items'), null, null).map(quad => retrieveTerm(store, quad.object))); + setField(c, "member", store.getQuads(id, ns.hydra('contains'), null, null).map(quad => retrieveTerm(store, quad.object))); + // Extract shape objects setField(c, "shape", store.getQuads(id, ns.tree('shape'), null, null).map(quad => retrieveFullObject(store, quad.object))); + setField(c, "shape", store.getQuads(id, ns.st('validatedBy'), null, null).map(quad => retrieveFullObject(store, quad.object))); // Extract full import objects - setField(c, "import", store.getQuads(id, ns.tree('import'), null, null).map(quad => retrieveFullObject(store, quad.object))); - setField(c, "importStream", store.getQuads(id, ns.tree('importStream'), null, null).map(quad => retrieveFullObject(store, quad.object))); - setField(c, "conditionalImport", store.getQuads(id, ns.tree('conditionalImport'), null, null).map(quad => extractConditionalImportMetadata(store, quad.object))); + setField(c, "import", store.getQuads(id, ns.tree('import'), null, null).map(quad => retrieveTerm(store, quad.object))); + setField(c, "importStream", store.getQuads(id, ns.tree('importStream'), null, null).map(quad => retrieveTerm(store, quad.object))); + setField(c, "conditionalImport", store.getQuads(id, ns.tree('conditionalImport'), null, null).map(quad => retrieveConditionalImportData(store, quad.object))); + + // Extract totalItems + setField(c, "totalItems", store.getQuads(id, ns.hydra('totalItems'), null, null).map(quad => retrieveTerm(store, quad.object))); + + // TODO:: extract additional metadata? + return c; } -function extractNodeMetadata(store: N3.Store, id: string) { +function extractNodeData(store: N3.Store, id: string) { const n : Node = { "@context": context, "@id": id, @@ -119,13 +163,24 @@ function extractNodeMetadata(store: N3.Store, id: string) { setField(n, "search", store.getQuads(id, ns.tree('search'), null, null).map(quad => retrieveFullObject(store, quad.object))); // Extract relation ids - setField(n, "relation", store.getQuads(id, ns.tree('relation'), null, null).map(quad => retrieveBaseObject(store, quad.object))); + setField(n, "relation", store.getQuads(id, ns.tree('relation'), null, null).map(quad => retrieveTerm(store, quad.object))); + + // Extract retentionPolicy + setField(n, "retentionPolicy", store.getQuads(id, ns.ldes('retentionPolicy'), null, null).map(quad => retrieveRetentionPolicyData(store, quad.object))); // Extract full import objects setField(n, "import", store.getQuads(id, ns.tree('import'), null, null).map(quad => retrieveFullObject(store, quad.object))); setField(n, "importStream", store.getQuads(id, ns.tree('importStream'), null, null).map(quad => retrieveFullObject(store, quad.object))); - setField(n, "conditionalImport", store.getQuads(id, ns.tree('conditionalImport'), null, null).map(quad => extractConditionalImportMetadata(store, quad.object))); - return n; + setField(n, "conditionalImport", store.getQuads(id, ns.tree('conditionalImport'), null, null).map(quad => retrieveConditionalImportData(store, quad.object))); + + // Extract next links + let nextQuads = store.getQuads(id, ns.hydra('next'), null, null).concat(store.getQuads(id, ns.as('next'), null, null)) + let generatedNextDummyRelations = nextQuads.map((quad, index) => createNextDummyRelation(quad.subject.id, quad.object.id, index)) + + setField(n, "relation", generatedNextDummyRelations.map(relation => { return({"@id": relation["@id"] }) }) ); + + // TODO:: extract additional metadata? + return {node: n, relations: generatedNextDummyRelations}; } function extractRelationMetadata(store: N3.Store, id: string) { @@ -147,17 +202,31 @@ function extractRelationMetadata(store: N3.Store, id: string) { setField(r, "value", store.getQuads(id, ns.tree('value'), null, null).map(quad => retrieveFullObject(store, quad.object))); // Extract node id - setField(r, "node", store.getQuads(id, ns.tree('node'), null, null).map(quad => retrieveBaseObject(store, quad.object))); + setField(r, "node", store.getQuads(id, ns.tree('node'), null, null).map(quad => retrieveTerm(store, quad.object))); // Extract full import objects setField(r, "import", store.getQuads(id, ns.tree('import'), null, null).map(quad => retrieveFullObject(store, quad.object))); setField(r, "importStream", store.getQuads(id, ns.tree('importStream'), null, null).map(quad => retrieveFullObject(store, quad.object))); - setField(r, "conditionalImport", store.getQuads(id, ns.tree('conditionalImport'), null, null).map(quad => extractConditionalImportMetadata(store, quad.object))); + setField(r, "conditionalImport", store.getQuads(id, ns.tree('conditionalImport'), null, null).map(quad => retrieveConditionalImportData(store, quad.object))); return r; } +function retrieveRetentionPolicyData(store: N3.Store, term: N3.Term) { + const rp : RetentionPolicy = { } + if (N3.Util.isNamedNode(term)) { + rp ["@id"] = term.value + } + const id = term.id + // Extract retention policy data + setField(rp, "@type", store.getQuads(id, ns.rdf('type'), null, null).map(quad => quad.object.id)); + setField(rp, "amount", store.getQuads(id, ns.ldes('amount'), null, null).map(quad => retrieveTerm(store, quad.object))); + setField(rp, "versionKey", store.getQuads(id, ns.ldes('versionKey'), null, null).map(quad => retrieveFullObject(store, quad.object))); + setField(rp, "path", store.getQuads(id, ns.tree('path'), null, null).map(quad => retrieveFullObject(store, quad.object))); + setField(rp, "value", store.getQuads(id, ns.tree('value'), null, null).map(quad => retrieveFullObject(store, quad.object))); + return rp +} -function extractConditionalImportMetadata(store: N3.Store, term: N3.Term) { +function retrieveConditionalImportData(store: N3.Store, term: N3.Term) { const ci : ConditionalImport = { } if (N3.Util.isNamedNode(term)) { ci ["@id"] = term.value @@ -177,7 +246,7 @@ function extractConditionalImportMetadata(store: N3.Store, term: N3.Term) { * @param recursive * @param processedIds */ -function retrieveBaseObject(store: N3.Store, term: N3.Term) { +function retrieveTerm(store: N3.Store, term: N3.Term) { return retrieveFullObject(store, term, false) } @@ -205,11 +274,23 @@ function retrieveFullObject(store: N3.Store, term: N3.Term, recursive = true, pr return { '@id': term.id } } default: - // We do not process variables in metadata extraction. + // We do not process variables in metadata extraction.Literal return {}; } } +function createNextDummyRelation (sourceURI: string, targetURI: string, index:number): Relation { + // Generate unique blank Id + let id = `_:nextRelation-${index}` + const r : Relation = { + "@context": context, + "@id": id, + } + setField(r, "@type", [ns.tree('Relation')] ) ; + setField(r, "node", [{"@id": targetURI}] ); + return r; +} + /** * Create a literal object * @param store diff --git a/src/util/NameSpaces.ts b/src/util/NameSpaces.ts index a1bc014..ceebdae 100644 --- a/src/util/NameSpaces.ts +++ b/src/util/NameSpaces.ts @@ -60,6 +60,8 @@ enum aliases { dbo = 'http://dbpedia.org/ontology/', ex = 'http://example.com/', tree = 'https://w3id.org/tree#', + ldes = 'https://w3id.org/ldes#', + st = 'http://www.w3.org/ns/shapetrees#', hydra = 'http://www.w3.org/ns/hydra/core#', void = 'http://rdfs.org/ns/void#', shacl = 'http://www.w3.org/ns/shacl#', diff --git a/src/util/Util.ts b/src/util/Util.ts index 57737fb..02bca40 100644 --- a/src/util/Util.ts +++ b/src/util/Util.ts @@ -12,25 +12,30 @@ export enum RelationType { } export interface Collection { - "@context": any, + "@context"?: string | object, "@id": string, - "view"?: any[], - "member"?: any[], - "shape"?: any[], - "import"?: any[], - "importStream"?: any[], - "conditionalImport"?: any[], + "@type"?: string[], + "view"?: URI[], + "member"?: Member[], + "shape"?: Shape[], + "totalItems"?: Literal[], + "import"?: URI[], + "importStream"?: URI[], + "conditionalImport"?: ConditionalImport[], + [property: string]: any; } export interface Node { - "@context": any, + "@context"?: string | object, "@id": string, "@type"?: string[], - "search"?: any[], - "relation"?: any[], - "import"?: any[], - "importStream"?: any[], - "conditionalImport"?: any[], + "relation"?: URI[], // Note hydra:next / as:next links are added as Relations of type tree:relation with the target node as the target of the next relation + "search"?: IriTemplate[], + "retentionPolicy"?: RetentionPolicy[], + "import"?: URI[], + "importStream"?: URI[], + "conditionalImport"?: ConditionalImport[], + [property: string]: any; } @@ -38,23 +43,51 @@ export interface Relation { "@context": any, "@id": string, "@type"?: string[], - "remainingItems"?: any[], + "remainingItems"?: Literal[], "path"?: any[], "value"?: any[], - "node"?: any[], - "import"?: any[], - "importStream"?: any[], - "conditionalImport"?: any[], + "node"?: URI[], + "import"?: URI[], + "importStream"?: URI[], + "conditionalImport"?: ConditionalImport[], +} + +export interface Member { + "@id"?: string, + [property: string]: any; } +export interface Shape { + "@id"?: string, + [property: string]: any; +} + +export interface IriTemplate { + "@id"?: string, + [property: string]: any; +} export interface ConditionalImport { "@id"?: string, "import"?: any[], "importStream"?: any[], "conditionalImport"?: any[], } + +export interface RetentionPolicy { + "@id"?: string, + "@type"?: string[], + "amount"?: Literal[], + "versionKey"?: string[], + "path"?: any[], + "value"?: any[], +} + export interface Literal { "@value": string, "@type"?: string, "@language"?: string, } + +export interface URI { + "@id": string, +} diff --git a/tests/MetadataExtraction.test.js b/tests/MetadataExtraction.test.js index 2e8a483..875ab44 100644 --- a/tests/MetadataExtraction.test.js +++ b/tests/MetadataExtraction.test.js @@ -25,6 +25,7 @@ async function test(turtleString, result, message) { async function evaluateMetadataExtraction(input, result) { const extractedMetadataPerType = await extractMetadata(input) // Check if output is valid JSONLD + console.log(result, extractedMetadataPerType) for (let type of ['collections', 'nodes', 'relations']) { let extractedMapping = extractedMetadataPerType[type] let resultMapping = result[type] @@ -607,9 +608,13 @@ describe('testing elaborate extraction', sensorNodes.set("https://streams.datapiloten.be/sensors?page=1", { "@context": context, "@id": "https://streams.datapiloten.be/sensors?page=1", - "relation": [{ - "@id": "_:b8_b13" - }] + "relation": [ + { + "@id": "_:b8_b13" + }, { + "@id": "_:nextRelation-0" + } + ] }) // blank node ids are required to reference the relevant objects @@ -629,6 +634,14 @@ describe('testing elaborate extraction', "@value": "2020-06-30T14:48:52.013Z" }] }) + sensorRelations.set('_:nextRelation-0', { + "@context": context, + "@id": "_:nextRelation-0", + "@type": [ns.tree("Relation")], + "node": [{ + "@id": "https://streams.datapiloten.be/sensors?page=2" + }] + }) const sensordataResult = { collections: sensorCollections,