Skip to content

Commit

Permalink
Merge pull request #215 from microbiomedata/issue-214
Browse files Browse the repository at this point in the history
Issue 214: get_nmdc_jsonschema_dict not returning correct data
  • Loading branch information
wdduncan authored Nov 22, 2021
2 parents 800b89d + 9afea98 commit 8a52878
Show file tree
Hide file tree
Showing 7 changed files with 2,786 additions and 766 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@ Changes to the schema are documented in this file.
### Removed
- N/A

## [2021.11.22rc1](https://github.com/microbiomedata/nmdc-schema/releases/tag/2021.11.12rc1)
### Added
- N/A
### Fixed
- get_nmdc_jsonschema_dict not returning correct data (#214)
- typo in NMDC Schema description
### Changed
- N/A
### Removed
- N/A

## [2021.11.19rc3](https://github.com/microbiomedata/nmdc-schema/releases/tag/2021.11.19rc3)
### Added
- N/A
Expand Down
2 changes: 1 addition & 1 deletion jsonld-context/nmdc.context.jsonld
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"_comments": "Auto generated from nmdc.yaml by jsonldcontextgen.py version: 0.1.1\n Generation date: 2021-11-15 12:55\n Schema: NMDC\n \n id: https://microbiomedata/schema\n description: Schema for National Microbiome Data Collaborative (NMDC). This schem is organized into 3 separate modules:\n \nThis schema is organized into distinct modules:\n \n * a set of core types for representing data values\n * the mixs schema (auto-translated from mixs excel)\n * annotation schema\n * the NMDC schema itself\n license: https://creativecommons.org/publicdomain/zero/1.0/\n ",
"_comments": "Auto generated from nmdc.yaml by jsonldcontextgen.py version: 0.1.1\n Generation date: 2021-11-22T18:20:03\n Schema: NMDC\n \n id: https://microbiomedata/schema\n description: Schema for National Microbiome Data Collaborative (NMDC).\n \nThis schema is organized into distinct modules:\n \n * a set of core types for representing data values\n * the mixs schema (auto-translated from mixs excel)\n * annotation schema\n * the NMDC schema itself\n license: https://creativecommons.org/publicdomain/zero/1.0/\n ",
"@context": {
"CAS": "http://identifiers.org/cas/",
"CATH": "http://identifiers.org/cath/",
Expand Down
3,444 changes: 2,701 additions & 743 deletions jsonschema/nmdc.schema.json

Large diffs are not rendered by default.

31 changes: 26 additions & 5 deletions python/nmdc.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# Auto generated from nmdc.yaml by pythongen.py version: 0.9.0
# Generation date: 2021-11-15 12:56
# Generation date: 2021-11-22T18:20:38
# Schema: NMDC
#
# id: https://microbiomedata/schema
# description: Schema for National Microbiome Data Collaborative (NMDC). This schem is organized into 3 separate
# modules: This schema is organized into distinct modules: * a set of core types for representing
# data values * the mixs schema (auto-translated from mixs excel) * annotation schema * the NMDC
# schema itself
# description: Schema for National Microbiome Data Collaborative (NMDC). This schema is organized into distinct
# modules: * a set of core types for representing data values * the mixs schema (auto-translated from
# mixs excel) * annotation schema * the NMDC schema itself
# license: https://creativecommons.org/publicdomain/zero/1.0/

import dataclasses
Expand Down Expand Up @@ -3110,6 +3109,24 @@ def _addvals(cls):
setattr(cls, "QC Statistics",
PermissibleValue(text="QC Statistics",
description="Reads QC summary statistics") )
setattr(cls, "TIGRFam Annotation GFF",
PermissibleValue(text="TIGRFam Annotation GFF",
description="GFF3 format file with TIGRfam") )
setattr(cls, "Clusters of Orthologous Groups (COG) Annotation GFF",
PermissibleValue(text="Clusters of Orthologous Groups (COG) Annotation GFF",
description="GFF3 format file with COGs") )
setattr(cls, "CATH FunFams (Functional Families) Annotation GFF",
PermissibleValue(text="CATH FunFams (Functional Families) Annotation GFF",
description="GFF3 format file with CATH FunFams") )
setattr(cls, "SUPERFam Annotation GFF",
PermissibleValue(text="SUPERFam Annotation GFF",
description="GFF3 format file with SUPERFam") )
setattr(cls, "SMART Annotation GFF",
PermissibleValue(text="SMART Annotation GFF",
description="GFF3 format file with SMART") )
setattr(cls, "Pfam Annotation GFF",
PermissibleValue(text="Pfam Annotation GFF",
description="GFF3 format file with Pfam") )

class CreditEnum(EnumDefinitionImpl):

Expand Down Expand Up @@ -6245,6 +6262,10 @@ class slots:
slots.reaction_participant_stoichiometry = Slot(uri=NMDC.stoichiometry, name="reaction participant_stoichiometry", curie=NMDC.curie('stoichiometry'),
model_uri=NMDC.reaction_participant_stoichiometry, domain=ReactionParticipant, range=Optional[int])

slots.functional_annotation_has_function = Slot(uri="str(uriorcurie)", name="functional annotation_has function", curie=None,
model_uri=NMDC.functional_annotation_has_function, domain=FunctionalAnnotation, range=Optional[str],
pattern=re.compile(r'^(KEGG.PATHWAY:\w{2,4}\d{5}|KEGG.REACTION:R\d+|RHEA:\d{5}|MetaCyc:[A-Za-z0-9+_.%-:]+|EC:\d{1,2}(\.\d{0,3}){0,3}|GO:\d{7}|MetaNetX:(MNXR\d+|EMPTY)|SEED:\w+|KEGG\.ORTHOLOGY:K\d+|EGGNOG:\w+|PFAM:PF\d{5}|TIGRFAM:TIGR\d+|SUPFAM:\w+|CATH:[1-6]\.[0-9]+\.[0-9]+\.[0-9]+|PANTHER.FAMILY:PTHR\d{5}(\:SF\d{1,3})?)$'))

slots.functional_annotation_type = Slot(uri=NMDC.type, name="functional annotation_type", curie=NMDC.curie('type'),
model_uri=NMDC.functional_annotation_type, domain=FunctionalAnnotation, range=Optional[Union[str, OntologyClassId]])

Expand Down
3 changes: 1 addition & 2 deletions src/schema/nmdc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ title: NMDC Schema

description: >-
Schema for National Microbiome Data Collaborative (NMDC).
This schem is organized into 3 separate modules:
This schema is organized into distinct modules:
Expand All @@ -14,7 +13,7 @@ description: >-
* the NMDC schema itself
license: https://creativecommons.org/publicdomain/zero/1.0/
version: 2021-11-19
version: 2021-11-22

prefixes:
nmdc: https://microbiomedata/meta/
Expand Down
57 changes: 44 additions & 13 deletions util/nmdc_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,53 @@ def get_nmdc_yaml_string() -> str:
return nmdc_yaml.decode("utf-8")


def get_nmdc_dict() -> Dict:
"""Parses the nmdc.yaml file into a dict.
def get_nmdc_jsonschema_bytesIO() -> io.BytesIO:
"""Returns the nmdc.schema.json file as bytes steam.
This function is not intended to be used directly, but it used by other functions
Returns
-------
dict
The dict of the keys and value in the nmdc.yaml file.
BytesIO
A bytes stream of nmdc.schema.json file.
"""
# get nmdc.yaml file from the package data
nmdc_yaml = get_nmdc_yaml_bytesIO()
return io.BytesIO(pkgutil.get_data("nmdc_schema", "nmdc.schema.json"))


def get_nmdc_jsonschema_bytes() -> bytes:
"""Retruns the nmdc.schema.json file as bytes.
Returns
-------
bytes
The bytes of the nmdc.schema.json file.
"""
nmdc_json = get_nmdc_jsonschema_bytesIO()
return nmdc_json.getvalue()


# convert yaml to dict
nmdc_dict = yaml.load(nmdc_yaml, Loader=yaml.CLoader)
def get_nmdc_jsonschema_string() -> str:
"""Retruns the nmdc.schema.json file as a string.
# return dict
return nmdc_dict
Returns
-------
str
A string containing the contents of nmdc.schema.json file.
"""
nmdc_json = get_nmdc_jsonschema_bytes()
return nmdc_json.decode("utf-8")


def get_nmdc_jsonschema_dict() -> Dict:
"""Parses the nmdc.schema.json file into a dict.
Returns
-------
dict
The dict of the keys and value in the nmdc.schema.json file.
"""
nmdc_json = get_nmdc_jsonschema_bytes()
return json.loads(nmdc_json)


def get_nmdc_jsonschema() -> str:
Expand All @@ -79,12 +110,12 @@ def get_nmdc_jsonschema() -> str:
str
JSON string representation of the NMDC jsonschema (nmdc.schema.json).
"""
nmdc_schema = get_nmdc_dict()
nmdc_schema = get_nmdc_jsonschema_dict()
return json.dumps(nmdc_schema, indent=2)


def get_nmdc_schema_definition() -> SchemaDefinition:
"""Returns a SchemaDefintion object created from the nmdc.yaml file.
"""Returns a LinkML SchemaDefintion object created from the nmdc.yaml file.
Returns
-------
Expand Down Expand Up @@ -180,7 +211,7 @@ def get_gold_sssom() -> str:
yaml returns the nmdc.yaml file as a string
jsonschema returns the NMDC jsonschema as json
dict returns the NMDC jsonschema as a dict
schemadef returns the SchemaDefintion created from the nmdc.yaml file
schemadef returns the LinkML SchemaDefintion created from the nmdc.yaml file
filetypeenums returns informaton about the NMDC file type enums as json
goldsssom returns the gold-to-mixs.sssom.tsv file contents
""",
Expand All @@ -195,7 +226,7 @@ def cli(ctx, fetch):
elif "jsonschema" == fetch:
click.echo(get_nmdc_jsonschema())
elif "dict" == fetch:
click.echo(get_nmdc_dict())
click.echo(get_nmdc_jsonschema_dict())
elif "schemadef" == fetch:
click.echo(get_nmdc_schema_definition())
elif "filetypeenums" == fetch:
Expand Down
4 changes: 2 additions & 2 deletions util/validate_nmdc_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""Provides CLI to validate json files against the NMDC jsonschema."""

import json, jsonschema, io, click
from .nmdc_data import get_nmdc_jsonschema, get_nmdc_dict
from .nmdc_data import get_nmdc_jsonschema, get_nmdc_jsonschema_dict
from deprecated import deprecated


Expand All @@ -18,7 +18,7 @@ def get_nmdc_schema() -> dict:
dict
Dict representation of the nmdc.schema.json package data file.
"""
return get_nmdc_dict()
return get_nmdc_jsonschema_dict()


@deprecated(reason="functionality moved to nmdc_data.get_nmdc_jsonschema()")
Expand Down

0 comments on commit 8a52878

Please sign in to comment.