From 9a89f6d53baf0f96c59172e1fd6a64305b4402be Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 14 Feb 2024 15:17:31 -0500 Subject: [PATCH 1/4] feat: Define JSON Schemas for ontology data release artifacts --- artifact-schemas/all_ontology_schema.json | 76 +++++++++++++++++++ artifact-schemas/cell_class_list_schema.json | 12 +++ .../cell_subclass_list_schema.json | 12 +++ .../cell_type_descendants_schema.json | 17 +++++ artifact-schemas/organ_list_schema.json | 12 +++ artifact-schemas/system_list_schema_.json | 12 +++ .../tissue_descendants_schema.json | 17 +++++ .../tissue_general_list_schema.json | 12 +++ 8 files changed, 170 insertions(+) create mode 100644 artifact-schemas/all_ontology_schema.json create mode 100644 artifact-schemas/cell_class_list_schema.json create mode 100644 artifact-schemas/cell_subclass_list_schema.json create mode 100644 artifact-schemas/cell_type_descendants_schema.json create mode 100644 artifact-schemas/organ_list_schema.json create mode 100644 artifact-schemas/system_list_schema_.json create mode 100644 artifact-schemas/tissue_descendants_schema.json create mode 100644 artifact-schemas/tissue_general_list_schema.json diff --git a/artifact-schemas/all_ontology_schema.json b/artifact-schemas/all_ontology_schema.json new file mode 100644 index 00000000..3dc5dd10 --- /dev/null +++ b/artifact-schemas/all_ontology_schema.json @@ -0,0 +1,76 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Valid Ontology Term JSON Schema", + "description": "Schema for file containing metadata for Ontology Terms accepted in dataset submissions to CZ CellXGene Data Portal.", + "type": "object", + "properties": { + "EFO": { "$ref": "#/definitions/ontologyCategory" }, + "UBERON": { "$ref": "#/definitions/ontologyCategory" }, + "CL": { "$ref": "#/definitions/ontologyCategory" }, + "HANCESTRO": { "$ref": "#/definitions/ontologyCategory" }, + "HsapDv": { "$ref": "#/definitions/ontologyCategory" }, + "MmusDv": { "$ref": "#/definitions/ontologyCategory" }, + "PATO": { "$ref": "#/definitions/ontologyCategory" }, + "NCBITaxon": { "$ref": "#/definitions/ontologyCategory" }, + "MONDO": { "$ref": "#/definitions/ontologyCategory" } + }, + "additionalProperties": false, + "definitions": { + "ontologyCategory": { + "type": "object", + "patternProperties": { + "^(EFO|UBERON|CL|HANCESTRO|HsapDv|MmusDv|PATO|NCBITaxon|MONDO):[0-9]{7}$": { + "type": "object", + "properties": { + "label": { + "type": "string", + "description": "human-readable name for the ontology entry." + }, + "deprecated": { + "type": "boolean", + "description": "Indicates whether the ontology entry is deprecated." + }, + "ancestors": { + "type": "array", + "items": { + "type": "string", + "pattern": "^(EFO|UBERON|CL|HANCESTRO|HsapDv|MmusDv|PATO|NCBITaxon|MONDO):[0-9]{7}$", + "description": "List of ancestor IDs for the ontology entry." + }, + "description": "An array of ancestor ontology terms that this term is a subclass of." + }, + "comments": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 0, + "description": "Optional comments regarding the ontology entry from ontology curators." + }, + "term_tracker": { + "type": "string", + "format": "uri", + "description": "Optional URL to track discussion around the term's history and changes." + }, + "consider": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 0, + "description": "Suggests alternative IDs to consider in place of this ontology entry." + }, + "replaced_by": { + "type": "string", + "pattern": "^(EFO|UBERON|CL|HANCESTRO|HsapDv|MmusDv|PATO|NCBITaxon|MONDO):[0-9]{7}$", + "description": "If deprecated, the ID of the ontology entry that should canonically replace this one." + } + }, + "required": ["label", "deprecated", "ancestors"], + "additionalProperties": false, + } + }, + "additionalProperties": false + } + } +} \ No newline at end of file diff --git a/artifact-schemas/cell_class_list_schema.json b/artifact-schemas/cell_class_list_schema.json new file mode 100644 index 00000000..0fd0e14f --- /dev/null +++ b/artifact-schemas/cell_class_list_schema.json @@ -0,0 +1,12 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Curated 'Cell Class' Ontology Terms Schema", + "description": "A schema for validating an array of CL ontology term IDs representing high-level 'Cell Classes', curated for CZ CellxGene use.", + "type": "array", + "items": { + "type": "string", + "pattern": "^CL:[0-9]{7}$" + }, + "minItems": 1, + "uniqueItems": true +} \ No newline at end of file diff --git a/artifact-schemas/cell_subclass_list_schema.json b/artifact-schemas/cell_subclass_list_schema.json new file mode 100644 index 00000000..08811cbb --- /dev/null +++ b/artifact-schemas/cell_subclass_list_schema.json @@ -0,0 +1,12 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Curated 'Cell Subclass' Ontology Terms Schema", + "description": "A schema for validating an array of CL ontology term IDs representing high-level 'Cell Subclasses', curated for CZ CellxGene use.", + "type": "array", + "items": { + "type": "string", + "pattern": "^CL:[0-9]{7}$" + }, + "minItems": 1, + "uniqueItems": true +} \ No newline at end of file diff --git a/artifact-schemas/cell_type_descendants_schema.json b/artifact-schemas/cell_type_descendants_schema.json new file mode 100644 index 00000000..a7acfc1f --- /dev/null +++ b/artifact-schemas/cell_type_descendants_schema.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Cell Type Ontology Descendants Schema", + "description": "Schema for Mapping Cell Type Ontology Terms to its Descendant Cell Type Ontology Terms", + "type": "object", + "patternProperties": { + "^CL:[0-9]{7}$": { + "type": "array", + "items": { + "type": "string", + "pattern": "^CL:[0-9]{7}$" + }, + "description": "An array of CL ontology term IDs that are subclasses of the key CL ontology term ID." + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/artifact-schemas/organ_list_schema.json b/artifact-schemas/organ_list_schema.json new file mode 100644 index 00000000..d4782f6f --- /dev/null +++ b/artifact-schemas/organ_list_schema.json @@ -0,0 +1,12 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Curated 'Organ' Ontology Terms Schema", + "description": "A schema for validating an array of UBERON ontology term IDs representing 'Organ' ontology terms, curated for CZ CellxGene use.", + "type": "array", + "items": { + "type": "string", + "pattern": "^UBERON:[0-9]{7}$" + }, + "minItems": 1, + "uniqueItems": true +} \ No newline at end of file diff --git a/artifact-schemas/system_list_schema_.json b/artifact-schemas/system_list_schema_.json new file mode 100644 index 00000000..edc9c39a --- /dev/null +++ b/artifact-schemas/system_list_schema_.json @@ -0,0 +1,12 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Curated 'System' Ontology Terms Schema", + "description": "A schema for validating an array of UBERON ontology term IDs representing organ 'System' ontology terms, curated for CZ CellxGene use.", + "type": "array", + "items": { + "type": "string", + "pattern": "^UBERON:[0-9]{7}$" + }, + "minItems": 1, + "uniqueItems": true +} \ No newline at end of file diff --git a/artifact-schemas/tissue_descendants_schema.json b/artifact-schemas/tissue_descendants_schema.json new file mode 100644 index 00000000..b14c989f --- /dev/null +++ b/artifact-schemas/tissue_descendants_schema.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Tissue Ontology Descendants Schema", + "description": "Schema for Mapping Tissue Ontology Terms to its Descendant Tissue Ontology Terms", + "type": "object", + "patternProperties": { + "^UBERON:[0-9]{7}$": { + "type": "array", + "items": { + "type": "string", + "pattern": "^UBERON:[0-9]{7}$" + }, + "description": "An array of UBERON ontology term IDs that are subclasses of the key UBERON ontology term ID." + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/artifact-schemas/tissue_general_list_schema.json b/artifact-schemas/tissue_general_list_schema.json new file mode 100644 index 00000000..6928b472 --- /dev/null +++ b/artifact-schemas/tissue_general_list_schema.json @@ -0,0 +1,12 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Curated 'Tissue General' Ontology Terms Schema", + "description": "A schema for validating an array of high-level UBERON ontology term IDs representing 'general tissue' ontology terms, curated for CZ CellxGene use.", + "type": "array", + "items": { + "type": "string", + "pattern": "^UBERON:[0-9]{7}$" + }, + "minItems": 1, + "uniqueItems": true +} \ No newline at end of file From d1be291a210e534cc674d6c1f404b69dad3392fc Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 14 Feb 2024 15:43:01 -0500 Subject: [PATCH 2/4] add ontology_info.yml schema --- artifact-schemas/ontology_info_schema.json | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 artifact-schemas/ontology_info_schema.json diff --git a/artifact-schemas/ontology_info_schema.json b/artifact-schemas/ontology_info_schema.json new file mode 100644 index 00000000..437a6aa3 --- /dev/null +++ b/artifact-schemas/ontology_info_schema.json @@ -0,0 +1,40 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Ontology Version and Source Schema", + "description": "A schema for the set of valid ontology reference files mapping to a CZ CellXGene Dataset Schema Version", + "type": "object", + "properties": { + "CL": { "$ref": "#/definitions/ontologyEntry" }, + "EFO": { "$ref": "#/definitions/ontologyEntry" }, + "HANCESTRO": { "$ref": "#/definitions/ontologyEntry" }, + "HsapDv": { "$ref": "#/definitions/ontologyEntry" }, + "MONDO": { "$ref": "#/definitions/ontologyEntry" }, + "MmusDv": { "$ref": "#/definitions/ontologyEntry" }, + "NCBITaxon": { "$ref": "#/definitions/ontologyEntry" }, + "UBERON": { "$ref": "#/definitions/ontologyEntry" }, + "PATO": { "$ref": "#/definitions/ontologyEntry" } + }, + "additionalProperties": false, + "definitions": { + "ontologyEntry": { + "type": "object", + "properties": { + "version": { + "type": "string", + "description": "version of ontology canonical to this data release, as defined in its source repo" + }, + "source": { + "type": "string", + "format": "uri", + "description": "Source to find and download canonical ontology version for this data release" + }, + "filetype": { + "type": "string", + "description": "filetype used to build generated artifacts for this ontology data release" + } + }, + "required": ["version", "source", "filetype"], + "additionalProperties": false + } + } +} \ No newline at end of file From 4be0356cc8124d1eeb1d4368ce65838fba300723 Mon Sep 17 00:00:00 2001 From: Nayib Gloria <55710092+nayib-jose-gloria@users.noreply.github.com> Date: Wed, 14 Feb 2024 15:44:57 -0500 Subject: [PATCH 3/4] Update artifact-schemas/cell_type_descendants_schema.json Co-authored-by: Trent Smith <1429913+Bento007@users.noreply.github.com> --- artifact-schemas/cell_type_descendants_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifact-schemas/cell_type_descendants_schema.json b/artifact-schemas/cell_type_descendants_schema.json index a7acfc1f..04fc2e55 100644 --- a/artifact-schemas/cell_type_descendants_schema.json +++ b/artifact-schemas/cell_type_descendants_schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Cell Type Ontology Descendants Schema", - "description": "Schema for Mapping Cell Type Ontology Terms to its Descendant Cell Type Ontology Terms", + "description": "A schema for mapping cell type ontology Terms to its descendant cell type ontology terms", "type": "object", "patternProperties": { "^CL:[0-9]{7}$": { From e164ccac2ddaff4d651fa97befba72fa3e8a69f4 Mon Sep 17 00:00:00 2001 From: Nayib Gloria <55710092+nayib-jose-gloria@users.noreply.github.com> Date: Wed, 14 Feb 2024 15:46:34 -0500 Subject: [PATCH 4/4] Update artifact-schemas/tissue_descendants_schema.json --- artifact-schemas/tissue_descendants_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifact-schemas/tissue_descendants_schema.json b/artifact-schemas/tissue_descendants_schema.json index b14c989f..6d540383 100644 --- a/artifact-schemas/tissue_descendants_schema.json +++ b/artifact-schemas/tissue_descendants_schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Tissue Ontology Descendants Schema", - "description": "Schema for Mapping Tissue Ontology Terms to its Descendant Tissue Ontology Terms", + "description": "A schema for mapping Tissue ontology terms to its descendant Tissue ontology terms", "type": "object", "patternProperties": { "^UBERON:[0-9]{7}$": {