Skip to content

Commit

Permalink
Merge branch 'main' into adding-docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
cmungall authored Jul 30, 2024
2 parents ec4fd2c + 649af7e commit c550c1e
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 17 deletions.
58 changes: 44 additions & 14 deletions schemasheets/schemamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import csv
import logging
from urllib.request import urlopen
from copy import copy

import click
import yaml
Expand All @@ -19,7 +20,7 @@

from schemasheets.schemasheet_datamodel import ColumnConfig, TableConfig, get_configmodel, get_metamodel, COL_NAME, \
DESCRIPTOR, \
tmap, T_CLASS, T_PV, T_SLOT, T_SUBSET, T_SCHEMA, T_ENUM, T_PREFIX, T_TYPE, SchemaSheet, T_SETTING
tmap, T_CLASS, T_PV, T_SLOT, T_ATTRIBUTE, T_SUBSET, T_SCHEMA, T_ENUM, T_PREFIX, T_TYPE, SchemaSheet, T_SETTING
from schemasheets.conf.configschema import Cardinality
from schemasheets.utils.google_sheets import gsheets_download_url
from schemasheets.utils.prefixtool import guess_prefix_expansion
Expand Down Expand Up @@ -59,6 +60,8 @@ class SchemaMaker:
table_config_path: str = None
"""Path to table configuration file."""

base_schema_path: str = None

def create_schema(self, csv_files: Union[str, List[str]], **kwargs) -> SchemaDefinition:
"""
Create a LinkML schema from one or more Schema Sheets.
Expand All @@ -67,8 +70,13 @@ def create_schema(self, csv_files: Union[str, List[str]], **kwargs) -> SchemaDef
:param kwargs:
:return: generated schema
"""
n = self.default_name
if n is None:
self.base_view = SchemaView(self.base_schema_path) if self.base_schema_path else None

if self.default_name:
n = self.default_name
elif self.base_view and self.base_view.schema.name:
n = self.base_view.schema.name
else:
n = 'TEMP'
self.schema = SchemaDefinition(id=n, name=n, default_prefix=n, default_range='string')
if not isinstance(csv_files, list):
Expand All @@ -82,6 +90,9 @@ def create_schema(self, csv_files: Union[str, List[str]], **kwargs) -> SchemaDef
if prefix not in self.schema.prefixes:
logging.error(f'Prefix {prefix} not declared: using default')
self.schema.prefixes[prefix] = Prefix(prefix, f'https://example.org/{prefix}/')

if self.base_view:
SchemaView(self.schema).merge_schema(self.base_view.schema)
return self.schema

def _tidy_slot_usage(self):
Expand Down Expand Up @@ -202,7 +213,14 @@ def add_row(self, row: Dict[str, Any], table_config: TableConfig):
logging.warning(f'Overwriting value for {k}, was {curr_val}, now {v}')
raise ValueError(f'Cannot reset value for {k}, was {curr_val}, now {v}')
if cc.settings.inner_key:
getattr(actual_element, cc.maps_to)[cc.settings.inner_key] = v
if isinstance(getattr(actual_element, cc.maps_to), list):
if '|' in v:
vs = v.split('|')
else:
vs = [v]
setattr(actual_element, cc.maps_to, [{cc.settings.inner_key: v} for v in vs])
else:
getattr(actual_element, cc.maps_to)[cc.settings.inner_key] = v
else:
setattr(actual_element, cc.maps_to, v)
elif cc.is_element_type:
Expand All @@ -214,7 +232,7 @@ def add_row(self, row: Dict[str, Any], table_config: TableConfig):
else:
raise ValueError(f'No mapping for {k}; cc={cc}')

def get_current_element(self, elt: Element) -> Union[Element, PermissibleValue]:
def get_current_element(self, elt: Element, is_attr=False) -> Union[Element, PermissibleValue]:
"""
Look up an element in the current schema using a stub element as key
Expand All @@ -237,6 +255,7 @@ def get_current_element(self, elt: Element) -> Union[Element, PermissibleValue]:
This time the existing "foo" class from the schema, with its adornments, will be returned
:param elt: proxy for element to look up
:param is_attr: if True, then the element is an attribute, not a slot
:return:
"""
sc = self.schema
Expand All @@ -249,7 +268,10 @@ def get_current_element(self, elt: Element) -> Union[Element, PermissibleValue]:
if isinstance(elt, ClassDefinition):
ix = sc.classes
elif isinstance(elt, SlotDefinition):
ix = sc.slots
if self.use_attributes or is_attr:
ix = copy(sc.slots)
else:
ix = sc.slots
elif isinstance(elt, EnumDefinition):
ix = sc.enums
elif isinstance(elt, TypeDefinition):
Expand Down Expand Up @@ -323,6 +345,8 @@ def row_focal_element(self, row: Dict[str, Any], table_config: TableConfig,
raise ValueError(f'Cardinality of schema col must be 1; got: {vs}')
self.schema.name = vs[0]
vmap[k] = [self.schema]
elif k == T_ATTRIBUTE:
vmap[k] = [self.get_current_element(elt_cls(v), is_attr=True) for v in vs]
else:
vmap[k] = [self.get_current_element(elt_cls(v)) for v in vs]

Expand All @@ -339,19 +363,22 @@ def check_excess(descriptors):
else:
cls = self.get_current_element(ClassDefinition(cc.settings.applies_to_class))
vmap[T_CLASS] = [cls]
if T_SLOT in vmap:
check_excess([T_SLOT, T_CLASS])
if len(vmap[T_SLOT]) != 1:
raise ValueError(f'Cardinality of slot field must be 1; got {vmap[T_SLOT]}')
main_elt = vmap[T_SLOT][0]
if T_SLOT in vmap or T_ATTRIBUTE in vmap:
if T_SLOT in vmap and T_ATTRIBUTE in vmap:
raise ValueError(f'Cannot have both slot and attribute in same row')
T_SLOT_ATTR = T_SLOT if T_SLOT in vmap else T_ATTRIBUTE
check_excess([T_SLOT_ATTR, T_CLASS])
if len(vmap[T_SLOT_ATTR]) != 1:
raise ValueError(f'Cardinality of slot field must be 1; got {vmap[T_SLOT_ATTR]}')
main_elt = vmap[T_SLOT_ATTR][0]
if T_CLASS in vmap:
# The sheet does double duty representing a class and a slot;
# Here *both* the "class" and "slot" columns are populated, so
# this translated to slot_usage;
# TODO: add option to allow to instead represent these as attributes
c: ClassDefinition
for c in vmap[T_CLASS]:
if self.use_attributes:
if self.use_attributes or T_SLOT_ATTR == T_ATTRIBUTE:
# slots always belong to a class;
# no separate top level slots
a = SlotDefinition(main_elt.name)
Expand Down Expand Up @@ -656,10 +683,12 @@ def ensure_csvreader(self, file_name: str, delimiter=None) -> str:
help="Auto-repair schema")
@click.option("--gsheet-id",
help="Google sheets ID. If this is specified then the arguments MUST be sheet names")
@click.option("--base-schema-path",
help="Base schema yaml file, the base-schema will be merged with the generated schema")
@click.option("-v", "--verbose", count=True)
@click.argument('tsv_files', nargs=-1)
def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_path: str, use_attributes: bool,
unique_slots: bool, verbose: int, sort_keys: bool):
unique_slots: bool, verbose: int, sort_keys: bool, base_schema_path: str):
"""
Convert schemasheets to a LinkML schema
Expand All @@ -684,7 +713,8 @@ def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_pat
unique_slots=unique_slots,
gsheet_id=gsheet_id,
default_name=name,
table_config_path=table_config_path)
table_config_path=table_config_path,
base_schema_path=base_schema_path)
schema = sm.create_schema(list(tsv_files))
if repair:
schema = sm.repair_schema(schema)
Expand Down
2 changes: 2 additions & 0 deletions schemasheets/schemasheet_datamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
T_SCHEMA = 'schema'
T_CLASS = 'class'
T_SLOT = 'slot'
T_ATTRIBUTE = 'attribute'
T_ENUM = 'enum'
T_PV = 'permissible_value'
T_TYPE = 'type'
Expand All @@ -34,6 +35,7 @@
T_SCHEMA: SchemaDefinition,
T_CLASS: ClassDefinition,
T_SLOT: SlotDefinition,
T_ATTRIBUTE: SlotDefinition,
T_ENUM: EnumDefinition,
T_PV: PermissibleValue,
T_TYPE: TypeDefinition,
Expand Down
6 changes: 3 additions & 3 deletions tests/test_121/input/class_defs.tsv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class description
>class description
Vehicle A machine, with or wihtout its own power source, that eases the transportation of people, materials, etc.
Airplane "A vehicle which flies through the air, obtaining lif from air flowing acoss fixed wings"
Boat A vehicle which moves through water
Vehicle A machine, with or without its own power source, that eases the transportation of people, materials, etc.
Airplane "A vehicle which flies through the air, obtaining lift from air flowing across fixed wings"
Boat A vehicle which moves through water.

0 comments on commit c550c1e

Please sign in to comment.