diff --git a/Makefile b/Makefile index b9cebf0f..359670e5 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,18 @@ .PHONY: all clean test post_clone_submodule_steps serializastion_vs_pattern negative_case target/soil_biosample_modular.yaml: clean post_clone_submodule_steps serializastion_vs_pattern + poetry run gen-yaml mixs-source/model/schema/mixs.yaml > target/mixs_generated.yaml + yq eval 'del(.imports)' target/mixs_generated.yaml > target/mixs_generated_no_imports.yaml + poetry run gen-yaml nmdc-schema/src/schema/nmdc.yaml > target/nmdc_generated.yaml + yq eval 'del(.imports)' target/nmdc_generated.yaml > target/nmdc_generated_no_imports.yaml poetry run python use_modular_gd.py > $@ - poetry run linkml_to_dh_light --model_file target/soil_biosample_modular.yaml --selected_class soil_biosample + poetry run enum_annotator \ + --modelfile target/soil_biosample_modular.yaml \ + --requested_enum_name fao_class_enum \ + --ontology_string ENVO \ + --max_cosine 0.1 > target/soil_biosample_modular_annotated.yaml + poetry run linkml_to_dh_light --model_file target/soil_biosample_modular_annotated.yaml --selected_class soil_biosample + # test needs work all: clean post_clone_submodule_steps serializastion_vs_pattern target/data.tsv diff --git a/data_all.tsv b/data_all.tsv new file mode 100644 index 00000000..36768b82 --- /dev/null +++ b/data_all.tsv @@ -0,0 +1,251 @@ +Ontology ID label parent class description guidance datatype pattern requirement examples source capitalize data status max value min value EXPORT_dev + nmdc:default +nmdc:id id nmdc:default A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI xs:unique required default id +nmdc:add_date add_date nmdc:default The date on which the information was added to the database. xs:token default add_date +nmdc:alternative identifiers alternative identifiers nmdc:default A list of alternative identifiers for the entity. xs:token default alternative identifiers +nmdc:community community nmdc:default xs:token default community +nmdc:depth2 depth2 nmdc:default xs:token \d+[.\d+] \S+ default depth2 +nmdc:habitat habitat nmdc:default xs:token default habitat +nmdc:host_name host_name nmdc:default xs:token default host_name +nmdc:identifier identifier nmdc:default xs:token default identifier +nmdc:location location nmdc:default xs:token default location +nmdc:mod_date mod_date nmdc:default The last date on which the database information was modified. xs:token default mod_date +nmdc:name name nmdc:default A human readable label for an entity xs:token default name +nmdc:ncbi_taxonomy_name ncbi_taxonomy_name nmdc:default xs:token default ncbi_taxonomy_name +nmdc:proport_woa_temperature proport_woa_temperature nmdc:default xs:token default proport_woa_temperature +nmdc:salinity_category salinity_category nmdc:default Categorcial description of the sample's salinity. Examples: halophile, halotolerant, hypersaline, huryhaline xs:token default salinity_category +nmdc:sample_collection_site sample_collection_site nmdc:default xs:token default sample_collection_site +nmdc:soluble_iron_micromol soluble_iron_micromol nmdc:default xs:token default soluble_iron_micromol +nmdc:subsurface_depth subsurface_depth nmdc:default xs:token \d+[.\d+] \S+ default subsurface_depth +nmdc:subsurface_depth2 subsurface_depth2 nmdc:default xs:token \d+[.\d+] \S+ default subsurface_depth2 +nmdc:type type nmdc:default An optional string that specifies the type object. This is used to allow for searches for different kinds of objects. xs:token default type + MIXS:core field + MIXS:environment field + dcterms:default + nmdc:attribute + nmdc:gold_path_field + nmdc:sample identifiers +nmdc:GOLD sample identifiers GOLD sample identifiers nmdc:sample identifiers ['identifiers for corresponding sample in GOLD'] |^GOLD:Gb[0-9]+$ xs:token ^GOLD:Gb[0-9]+$ default GOLD sample identifiers +nmdc:INSDC biosample identifiers INSDC biosample identifiers nmdc:sample identifiers ['identifiers for corresponding sample in INSDC'] |^biosample:SAM[NED]([A-Z])?[0-9]+$ xs:token ^biosample:SAM[NED]([A-Z])?[0-9]+$ default INSDC biosample identifiers +nmdc:INSDC secondary sample identifiers INSDC secondary sample identifiers nmdc:sample identifiers ['secondary identifiers for corresponding sample in INSDC'] ENA redirects these to primary IDs, e.g. https://www.ebi.ac.uk/ena/browser/view/DRS166340 -> SAMD00212331|MGnify uses these as their primary sample IDs|^biosample:(E|D|S)RS[0-9]{6,}$ xs:token ^biosample:(E|D|S)RS[0-9]{6,}$ default INSDC secondary sample identifiers +MIXS:0000639 history/agrochemical additions MIXS:core field Addition of fertilizers, pesticides, etc. - amount and time of applications Expected value: agrochemical name;agrochemical amount;timestamp|Preferred unit: gram, mole per liter, milligram per liter xs:token \d+[.\d+] \S+ default agrochem_addition +MIXS:0000607 extreme_unusual_properties/Al saturation MIXS:core field Aluminum saturation (esp. For tropical soils) Expected value: measurement value|Preferred unit: percentage|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default al_sat +MIXS:0000324 extreme_unusual_properties/Al saturation method MIXS:core field Reference or method used in determining Al saturation Expected value: PMID,DOI or URL xs:token default al_sat_meth +nmdc:alkalinity alkalinity nmdc:attribute """Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default alkalinity +nmdc:alkalinity_method alkalinity_method nmdc:attribute Method used for alkalinity measurement xs:token default alkalinity_method +nmdc:alkyl_diethers alkyl_diethers nmdc:attribute Concentration of alkyl diethers |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default alkyl_diethers +MIXS:0000094 altitude MIXS:environment field Altitude is a term used to identify heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air Expected value: measurement value|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default alt +nmdc:aminopept_act aminopept_act nmdc:attribute Measurement of aminopeptidase activity |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default aminopept_act +nmdc:ammonium ammonium nmdc:attribute Concentration of ammonium in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default ammonium +MIXS:0000644 mean annual precipitation MIXS:core field The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps. Expected value: measurement value|Preferred unit: millimeter|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default annual_precpt +MIXS:0000642 mean annual temperature MIXS:core field Mean annual temperature Expected value: measurement value|Preferred unit: degree Celsius|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default annual_temp +nmdc:bacteria_carb_prod bacteria_carb_prod nmdc:attribute Measurement of bacterial carbon production |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default bacteria_carb_prod +nmdc:bishomohopanol bishomohopanol nmdc:attribute Concentration of bishomohopanol |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default bishomohopanol +nmdc:bromide bromide nmdc:attribute Concentration of bromide |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default bromide +nmdc:calcium calcium nmdc:attribute Concentration of calcium in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default calcium +nmdc:carb_nitro_ratio carb_nitro_ratio nmdc:attribute Ratio of amount or concentrations of carbon to nitrogen |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default carb_nitro_ratio +nmdc:chem_administration chem_administration nmdc:attribute """List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi""" xs:token default chem_administration +nmdc:chloride chloride nmdc:attribute Concentration of chloride in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default chloride +nmdc:chlorophyll chlorophyll nmdc:attribute Concentration of chlorophyll |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default chlorophyll +MIXS:0000011 collection date MIXS:environment field The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant Expected value: date and time xs:date default collection_date +MIXS:0000318 history/crop rotation MIXS:core field Whether or not crop is rotated, and if yes, rotation schedule Expected value: crop rotation status;schedule xs:token default crop_rotation +MIXS:0001080 current land use MIXS:core field Present state of sample site Expected value: enumeration|[cities|farmstead|industrial areas|roads\/railroads|rock|sand|gravel|mudflats|salt flats|badlands|permanent snow or ice|saline seeps|mines\/quarries|oil waste areas|small grains|row crops|vegetable crops|horticultural plants (e.g. tulips)|marshlands (grass,sedges,rushes)|tundra (mosses,lichens)|rangeland|pastureland (grasslands used for livestock grazing)|hayland|meadows (grasses,alfalfa,fescue,bromegrass,timothy)|shrub land (e.g. mesquite,sage\-brush,creosote bush,shrub oak,eucalyptus)|successional shrub land (tree saplings,hazels,sumacs,chokecherry,shrub dogwoods,blackberries)|shrub crops (blueberries,nursery ornamentals,filberts)|vine crops (grapes)|conifers (e.g. pine,spruce,fir,cypress)|hardwoods (e.g. oak,hickory,elm,aspen)|intermixed hardwood and conifers|tropical (e.g. mangrove,palms)|rainforest (evergreen forest receiving >406 cm annual rainfall)|swamp (permanent or semi\-permanent water body dominated by woody plants)|crop trees (nuts,fruit,christmas trees,nursery trees)] select default cur_land_use +MIXS:0000312 current vegetation MIXS:core field Vegetation classification from one or more standard classification systems, or agricultural crop Expected value: current vegetation type xs:token default cur_vegetation +MIXS:0000314 current vegetation method MIXS:core field Reference or method used in vegetation classification Expected value: PMID,DOI or url xs:token default cur_vegetation_meth +nmdc:density density nmdc:attribute """Density of the sample, which is its mass per unit volume (aka volumetric mass density)""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default density +MIXS:0000018 depth MIXS:environment field MIxS_soil:The vertical distance below local surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples.|NMDC_biosample:Please refer to the definitions of depth in the environmental packages Expected value: measurement value xs:token \d+[.\d+] \S+ required default depth +dcterms:description description dcterms:default a human-readable description of a thing xs:token default description +nmdc:diss_carb_dioxide diss_carb_dioxide nmdc:attribute Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default diss_carb_dioxide +nmdc:diss_hydrogen diss_hydrogen nmdc:attribute Concentration of dissolved hydrogen |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default diss_hydrogen +nmdc:diss_inorg_carb diss_inorg_carb nmdc:attribute """Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default diss_inorg_carb +nmdc:diss_inorg_phosp diss_inorg_phosp nmdc:attribute Concentration of dissolved inorganic phosphorus in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default diss_inorg_phosp +nmdc:diss_org_carb diss_org_carb nmdc:attribute """Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default diss_org_carb +nmdc:diss_org_nitro diss_org_nitro nmdc:attribute Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2 |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default diss_org_nitro +nmdc:diss_oxygen diss_oxygen nmdc:attribute Concentration of dissolved oxygen |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default diss_oxygen +MIXS:0001085 drainage classification MIXS:core field Drainage classification from a standard system such as the USDA system Expected value: enumeration|[very poorly|poorly|somewhat poorly|moderately well|well|excessively drained] select default drainage_class +nmdc:ecosystem ecosystem nmdc:gold_path_field TODO xs:token default ecosystem +nmdc:ecosystem_category ecosystem_category nmdc:gold_path_field TODO xs:token default ecosystem_category +nmdc:ecosystem_subtype ecosystem_subtype nmdc:gold_path_field TODO xs:token default ecosystem_subtype +nmdc:ecosystem_type ecosystem_type nmdc:gold_path_field TODO xs:token default ecosystem_type +MIXS:0000093 elevation MIXS:environment field Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit. Expected value: measurement value|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ required default elev +MIXS:0000012 broad-scale environmental context MIXS:environment field Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO’s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS Expected value: The major environment type(s) where the sample was collected. Recommend subclasses of biome [ENVO:00000428]. Multiple terms can be separated by one or more pipes.|.* \S+:\S+ xs:token .* \S+:\S+ required default env_broad_scale +MIXS:0000013 local environmental context MIXS:environment field Report the entity or entities which are in the sample or specimen’s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS. Expected value: Environmental entities having causal influences upon the entity at time of sampling.|.* \S+:\S+ xs:token .* \S+:\S+ required default env_local_scale +MIXS:0000014 environmental medium MIXS:environment field Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top). Expected value: The material displaced by the entity at time of sampling. Recommend subclasses of environmental material [ENVO:00010483].|.* \S+:\S+ xs:token .* \S+:\S+ required default env_medium +nmdc:env_package env_package nmdc:attribute """MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported""" |[air|built environment|host\-associated|human\-associated|human\-skin|human\-oral|human\-gut|human\-vaginal|hydrocarbon resources\-cores|hydrocarbon resources\-fluids\/swabs|microbial mat\/biofilm|misc environment|plant\-associated|sediment|soil|wastewater\/sludge|water] xs:token [air|built environment|host\-associated|human\-associated|human\-skin|human\-oral|human\-gut|human\-vaginal|hydrocarbon resources\-cores|hydrocarbon resources\-fluids\/swabs|microbial mat\/biofilm|misc environment|plant\-associated|sediment|soil|wastewater\/sludge|water] default env_package +MIXS:0000320 history/extreme events MIXS:core field Unusual physical events that may have affected microbial populations Expected value: date xs:date default extreme_event +MIXS:0000651 extreme_unusual_properties/salinity MIXS:core field Measured salinity xs:token \d+[.\d+] \S+ default extreme_salinity +MIXS:0001083 soil_taxonomic/FAO classification MIXS:core field Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups Expected value: enumeration|[Acrisols|Andosols|Arenosols|Cambisols|Chernozems|Ferralsols|Fluvisols|Gleysols|Greyzems|Gypsisols|Histosols|Kastanozems|Lithosols|Luvisols|Nitosols|Phaeozems|Planosols|Podzols|Podzoluvisols|Rankers|Regosols|Rendzinas|Solonchaks|Solonetz|Vertisols|Yermosols] select default fao_class +MIXS:0001086 history/fire MIXS:core field Historical and/or physical evidence of fire Expected value: date xs:date default fire +MIXS:0000319 history/flooding MIXS:core field Historical and/or physical evidence of flooding Expected value: date xs:date default flooding +MIXS:0000010 geographic location (country and/or sea,region) MIXS:environment field The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ) Expected value: country or sea name (INSDC or GAZ): region(GAZ), specific location name xs:token default geo_loc_name +nmdc:glucosidase_act glucosidase_act nmdc:attribute Measurement of glucosidase activity |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default glucosidase_act +MIXS:0000652 extreme_unusual_properties/heavy metals MIXS:core field MIxS_soil:Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field.|NMDC_biosample:Heavy metals present and concentrationsany drug used by subject and the frequency of usage; can include multiple heavy metals and concentrations Expected value: heavy metal name;measurement value unit|Preferred unit: microgram per gram xs:token \d+[.\d+] \S+ default heavy_metals +MIXS:0000343 extreme_unusual_properties/heavy metals method MIXS:core field Reference or method used in determining heavy metals Expected value: PMID,DOI or url xs:token default heavy_metals_meth +nmdc:horizon horizon nmdc:attribute Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath |[O horizon|A horizon|E horizon|B horizon|C horizon|R layer|Permafrost] xs:token [O horizon|A horizon|E horizon|B horizon|C horizon|R layer|Permafrost] default horizon +MIXS:0000321 horizon method MIXS:core field Reference or method used in determining the horizon Expected value: PMID,DOI or url xs:token default horizon_meth +MIXS:0000009 geographic location (latitude and longitude) MIXS:environment field MIxS:The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system|NMDC: This is currently a required field but it's not clear if this should be required for human hosts Expected value: decimal degrees, limit to 8 decimal points|\d+[.\d+] \d+[.\d+] xs:token \d+[.\d+] \d+[.\d+] default lat_lon +MIXS:0000340 links to additional analysis MIXS:core field Link to additional analysis results performed on the sample Expected value: PMID,DOI or url xs:token default link_addit_analys +MIXS:0000329 link to classification information MIXS:core field Link to digitized soil maps or other soil classification information Expected value: PMID,DOI or url xs:token default link_class_info +MIXS:0000328 link to climate information MIXS:core field Link to climate resource Expected value: PMID,DOI or url xs:token default link_climate_info +MIXS:0000330 soil_taxonomic/local classification MIXS:core field Soil classification based on local soil classification system Expected value: local classification name xs:token default local_class +MIXS:0000331 soil_taxonomic/local classification method MIXS:core field Reference or method used in determining the local soil classification Expected value: PMID,DOI or url xs:token default local_class_meth +nmdc:magnesium magnesium nmdc:attribute Concentration of magnesium in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default magnesium +nmdc:mean_frict_vel mean_frict_vel nmdc:attribute Measurement of mean friction velocity |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default mean_frict_vel +nmdc:mean_peak_frict_vel mean_peak_frict_vel nmdc:attribute Measurement of mean peak friction velocity |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default mean_peak_frict_vel +MIXS:0000339 microbial biomass method MIXS:core field Reference or method used in determining microbial biomass xs:token default micro_biomass_meth +MIXS:0000650 microbial biomass MIXS:core field The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units Expected value: measurement value|Preferred unit: ton, kilogram, gram per kilogram soil|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default microbial_biomass +nmdc:microbial_biomass_meth microbial_biomass_meth nmdc:attribute Reference or method used in determining microbial biomass xs:token default microbial_biomass_meth +MIXS:0000752 miscellaneous parameter MIXS:core field Any other measurement performed or parameter collected, that is not listed here Expected value: parameter name;measurement value xs:token \d+[.\d+] \S+ default misc_param +nmdc:n_alkanes n_alkanes nmdc:attribute Concentration of n-alkanes; can include multiple n-alkanes xs:token \d+[.\d+] \S+ default n_alkanes +nmdc:nitrate nitrate nmdc:attribute Concentration of nitrate in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default nitrate +nmdc:nitrite nitrite nmdc:attribute Concentration of nitrite in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default nitrite +nmdc:org_matter org_matter nmdc:attribute Concentration of organic matter |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default org_matter +nmdc:org_nitro org_nitro nmdc:attribute Concentration of organic nitrogen |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default org_nitro +nmdc:organism_count organism_count nmdc:attribute """Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)""" xs:token \d+[.\d+] \S+ default organism_count +nmdc:oxy_stat_samp oxy_stat_samp nmdc:attribute Oxygenation status of sample |[aerobic|anaerobic|other] xs:token [aerobic|anaerobic|other] default oxy_stat_samp +dcterms:isPartOf part of dcterms:default Links a resource to another resource that either logically or physically includes it. xs:token default part of +nmdc:part_org_carb part_org_carb nmdc:attribute Concentration of particulate organic carbon |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default part_org_carb +nmdc:perturbation perturbation nmdc:attribute """Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types""" xs:token default perturbation +nmdc:petroleum_hydrocarb petroleum_hydrocarb nmdc:attribute Concentration of petroleum hydrocarbon |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default petroleum_hydrocarb +MIXS:0001001 pH MIXS:core field Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid Expected value: measurement value|\d+[.\d+] xs:token \d+[.\d+] default ph +MIXS:0001106 pH method MIXS:core field Reference or method used in determining ph Expected value: PMID,DOI or url xs:token default ph_meth +nmdc:phaeopigments phaeopigments nmdc:attribute Concentration of phaeopigments; can include multiple phaeopigments xs:token \d+[.\d+] \S+ default phaeopigments +nmdc:phosplipid_fatt_acid phosplipid_fatt_acid nmdc:attribute Concentration of phospholipid fatty acids; can include multiple values xs:token \d+[.\d+] \S+ default phosplipid_fatt_acid +MIXS:0000325 pooling of DNA extracts (if done) MIXS:core field Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given Expected value: pooling status;number of pooled extracts xs:token default pool_dna_extracts +nmdc:potassium potassium nmdc:attribute Concentration of potassium in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default potassium +nmdc:pressure pressure nmdc:attribute """Pressure to which the sample is subject to, in atmospheres""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default pressure +MIXS:0000316 history/previous land use method MIXS:core field Reference or method used in determining previous land use and dates xs:token default prev_land_use_meth +MIXS:0000315 history/previous land use MIXS:core field Previous land use and dates Expected value: land use name;date xs:token default previous_land_use +nmdc:previous_land_use_meth previous_land_use_meth nmdc:attribute Reference or method used in determining previous land use and dates xs:token default previous_land_use_meth +MIXS:0001084 profile position MIXS:core field Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas Expected value: enumeration|[summit|shoulder|backslope|footslope|toeslope] select default profile_position +nmdc:redox_potential redox_potential nmdc:attribute """Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default redox_potential +nmdc:salinity salinity nmdc:attribute """Salinity is the total concentration of all dissolved salts in a water sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default salinity +MIXS:0000341 salinity method MIXS:core field Reference or method used in determining salinity Expected value: PMID,DOI or url xs:token default salinity_meth +nmdc:samp_collect_device samp_collect_device nmdc:attribute The method or device employed for collecting the sample xs:token default samp_collect_device +nmdc:samp_mat_process samp_mat_process nmdc:attribute """Any processing applied to the sample during or after retrieving the sample from environment. This field accepts OBI, for a browser of OBI (v 2018-02-12) terms please see http://purl.bioontology.org/ontology/OBI""" xs:token default samp_mat_process +nmdc:samp_store_dur samp_store_dur nmdc:attribute Duration for which the sample was stored xs:token default samp_store_dur +nmdc:samp_store_loc samp_store_loc nmdc:attribute """Location at which sample was stored, usually name of a specific freezer/room""" xs:token default samp_store_loc +nmdc:samp_store_temp samp_store_temp nmdc:attribute """Temperature at which sample was stored, e.g. -80 degree Celsius""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default samp_store_temp +nmdc:samp_vol_we_dna_ext samp_vol_we_dna_ext nmdc:attribute """Volume (ml), weight (g) of processed sample, or surface area swabbed from sample for DNA extraction""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default samp_vol_we_dna_ext +MIXS:0000645 mean seasonal precipitation MIXS:core field The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps. Expected value: measurement value|Preferred unit: millimeter|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default season_precpt +MIXS:0000643 mean seasonal temperature MIXS:core field Mean seasonal temperature Expected value: measurement value|Preferred unit: degree Celsius|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default season_temp +MIXS:0000322 composite design/sieving MIXS:core field Collection design of pooled samples and/or sieve size and amount of sample sieved Expected value: design name and/or size;amount xs:token \d+[.\d+] \S+ default sieving +nmdc:size_frac_low size_frac_low nmdc:attribute Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default size_frac_low +nmdc:size_frac_up size_frac_up nmdc:attribute Refers to the mesh/pore size used to retain the sample. Materials smaller than the size threshold are excluded from the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default size_frac_up +MIXS:0000647 slope aspect MIXS:core field The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration. Expected value: measurement value|Preferred unit: degree|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default slope_aspect +MIXS:0000646 slope gradient MIXS:core field Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer Expected value: measurement value|Preferred unit: percentage|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default slope_gradient +nmdc:sodium sodium nmdc:attribute Sodium concentration in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default sodium +MIXS:0001082 soil horizon MIXS:core field Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath select default soil_horizon +MIXS:0000335 soil texture measurement MIXS:core field The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional. xs:token \d+[.\d+] \S+ default soil_text_measure +MIXS:0000336 soil texture method MIXS:core field Reference or method used in determining soil texture xs:token default soil_texture_meth +MIXS:0000332 soil type MIXS:core field MIxS_soil:Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes.|NMDC_biosample:Soil series name or other lower-level classification Expected value: ENVO_00001998 xs:token default soil_type +MIXS:0000334 soil type method MIXS:core field Reference or method used in determining soil series name or other lower-level classification Expected value: PMID,DOI or url xs:token default soil_type_meth +nmdc:specific_ecosystem specific_ecosystem nmdc:gold_path_field TODO xs:token default specific_ecosystem +MIXS:0000327 storage conditions MIXS:core field MIxS_soil:Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other).|NMDC_biosample:Explain how and for how long the soil sample was stored before DNA extraction Expected value: storage condition type;duration xs:token default store_cond +nmdc:sulfate sulfate nmdc:attribute Concentration of sulfate in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default sulfate +nmdc:sulfide sulfide nmdc:attribute Concentration of sulfide in the sample |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default sulfide +MIXS:0000113 temperature MIXS:environment field MIxS_soil:Temperature of the sample at the time of sampling.|NMDC_biosample:Temperature of the sample at the time of sampling Expected value: measurement value|Preferred unit: degree Celsius|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default temp +nmdc:texture texture nmdc:attribute """The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional.""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default texture +nmdc:texture_meth texture_meth nmdc:attribute Reference or method used in determining soil texture xs:token default texture_meth +nmdc:tidal_stage tidal_stage nmdc:attribute Stage of tide |[low tide|ebb tide|flood tide|high tide] xs:token [low tide|ebb tide|flood tide|high tide] default tidal_stage +MIXS:0001081 history/tillage MIXS:core field Note method(s) used for tilling Expected value: enumeration|[drill|cutting disc|ridge till|strip tillage|zonal tillage|chisel|tined|mouldboard|disc plough] multiple default tillage +nmdc:tot_carb tot_carb nmdc:attribute Total carbon content |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default tot_carb +nmdc:tot_depth_water_col tot_depth_water_col nmdc:attribute Measurement of total depth of water column |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default tot_depth_water_col +nmdc:tot_diss_nitro tot_diss_nitro nmdc:attribute """Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default tot_diss_nitro +MIXS:0000338 total nitrogen content method MIXS:core field Reference or method used in determining the total nitrogen xs:token default tot_nitro_cont_meth +MIXS:0000530 total nitrogen content MIXS:core field Total nitrogen content of the sample Expected value: measurement value|Preferred unit: microgram per liter, micromole per liter, milligram per liter|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default tot_nitro_content +nmdc:tot_nitro_content_meth tot_nitro_content_meth nmdc:attribute Reference or method used in determining the total nitrogen xs:token default tot_nitro_content_meth +MIXS:0000337 total organic carbon method MIXS:core field Reference or method used in determining total organic carbon Expected value: PMID,DOI or url xs:token default tot_org_c_meth +MIXS:0000533 total organic carbon MIXS:core field Definition for soil: total organic carbon content of the soil, definition otherwise: total organic carbon content Expected value: measurement value|Preferred unit: gram Carbon per kilogram sample material|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default tot_org_carb +nmdc:tot_phosp tot_phosp nmdc:attribute """Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus""" |\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default tot_phosp +MIXS:0000323 water content method MIXS:core field Reference or method used in determining the water content of soil xs:token default water_cont_soil_meth +MIXS:0000185 water content MIXS:core field Water content measurement Expected value: measurement value|Preferred unit: gram per gram or cubic centimeter per cubic centimeter|\d+[.\d+] \S+ xs:token \d+[.\d+] \S+ default water_content +nmdc:water_content_soil_meth water_content_soil_meth nmdc:attribute Reference or method used in determining the water content of soil xs:token default water_content_soil_meth + badlands current land use + cities current land use + conifers (e.g. pine,spruce,fir,cypress) current land use + crop trees (nuts,fruit,christmas trees,nursery trees) current land use + farmstead current land use + gravel current land use + hardwoods (e.g. oak,hickory,elm,aspen) current land use + hayland current land use + horticultural plants (e.g. tulips) current land use + industrial areas current land use + intermixed hardwood and conifers current land use + marshlands (grass,sedges,rushes) current land use + meadows (grasses,alfalfa,fescue,bromegrass,timothy) current land use + mines/quarries current land use + mudflats current land use + oil waste areas current land use + pastureland (grasslands used for livestock grazing) current land use + permanent snow or ice current land use + rainforest (evergreen forest receiving >406 cm annual rainfall) current land use + rangeland current land use + roads/railroads current land use + rock current land use + row crops current land use + saline seeps current land use + salt flats current land use + sand current land use + shrub crops (blueberries,nursery ornamentals,filberts) current land use + shrub land (e.g. mesquite,sage-brush,creosote bush,shrub oak,eucalyptus) current land use + small grains current land use + successional shrub land (tree saplings,hazels,sumacs,chokecherry,shrub dogwoods,blackberries) current land use + swamp (permanent or semi-permanent water body dominated by woody plants) current land use + tropical (e.g. mangrove,palms) current land use + tundra (mosses,lichens) current land use + vegetable crops current land use + vine crops (grapes) current land use + excessively drained drainage classification + moderately well drainage classification + poorly drainage classification + somewhat poorly drainage classification + very poorly drainage classification + well drainage classification + Acrisols soil_taxonomic/FAO classification + Andosols soil_taxonomic/FAO classification + Arenosols soil_taxonomic/FAO classification + Cambisols soil_taxonomic/FAO classification + Chernozems soil_taxonomic/FAO classification + Ferralsols soil_taxonomic/FAO classification + Fluvisols soil_taxonomic/FAO classification + Gleysols soil_taxonomic/FAO classification + Greyzems soil_taxonomic/FAO classification + Gypsisols soil_taxonomic/FAO classification + Histosols soil_taxonomic/FAO classification + Kastanozems soil_taxonomic/FAO classification + Lithosols soil_taxonomic/FAO classification + Luvisols soil_taxonomic/FAO classification + Nitosols soil_taxonomic/FAO classification + Phaeozems soil_taxonomic/FAO classification + Planosols soil_taxonomic/FAO classification + Podzols soil_taxonomic/FAO classification + Podzoluvisols soil_taxonomic/FAO classification + Rankers soil_taxonomic/FAO classification + Regosols soil_taxonomic/FAO classification + Rendzinas soil_taxonomic/FAO classification + Solonchaks soil_taxonomic/FAO classification + Solonetz soil_taxonomic/FAO classification + Vertisols soil_taxonomic/FAO classification + Yermosols soil_taxonomic/FAO classification + backslope profile position + footslope profile position + shoulder profile position + summit profile position + toeslope profile position + A horizon soil horizon + B horizon soil horizon + C horizon soil horizon + E horizon soil horizon + O horizon soil horizon + Permafrost soil horizon + R layer soil horizon + chisel history/tillage + cutting disc history/tillage + disc plough history/tillage + drill history/tillage + mouldboard history/tillage + ridge till history/tillage + strip tillage history/tillage + tined history/tillage + zonal tillage history/tillage diff --git a/linkml_round_trips/linkml_to_dh_light.py b/linkml_round_trips/linkml_to_dh_light.py index 3f3d06d5..1394c93e 100644 --- a/linkml_round_trips/linkml_to_dh_light.py +++ b/linkml_round_trips/linkml_to_dh_light.py @@ -1,5 +1,6 @@ from linkml_runtime.utils.schemaview import SchemaView import pandas as pd +import re import click @@ -38,6 +39,30 @@ def linkml_to_dh_light(model_file, selected_class, default_section, default_sour model_sv = SchemaView(model_file) + # ---- + # trying to get term requirement within class slot usages + classes = model_sv.all_classes() + class_names = list(classes.keys()) + class_names.sort() + reqs_from_usage = [] + for cc in class_names: + current_class = classes[cc] + ccsu = current_class.slot_usage + ccsu_names = list(ccsu.keys()) + ccsu_names.sort() + for current_usage in ccsu_names: + current_row_dict = {"class": cc, "slot": current_usage, "required": ccsu[current_usage].required, + "recommended": ccsu[current_usage].recommended} + reqs_from_usage.append(current_row_dict) + + reqs_from_usage_frame = pd.DataFrame(reqs_from_usage) + reqs_from_usage_frame['required'] = reqs_from_usage_frame['required'].fillna(False) + reqs_from_usage_frame['recommended'] = reqs_from_usage_frame['recommended'].fillna(False) + req_from_usage = list(reqs_from_usage_frame.loc[reqs_from_usage_frame.required, 'slot']) + rec_from_usage = list(reqs_from_usage_frame.loc[reqs_from_usage_frame.recommended, 'slot']) + + # ---- + model_enums = model_sv.all_enums() model_enum_names = list(model_enums.keys()) model_enum_names.sort() @@ -92,8 +117,18 @@ def linkml_to_dh_light(model_file, selected_class, default_section, default_sour # useless parent classes: attribute, , current_row["parent class"] = isa_dict[i] # description: quote and or bracket wrappers, TODO, empty - current_row["description"] = current_sd.description - # guidance: I have moved slot used in... and Occurrence out of the MIxS comments + if current_sd.description is None: + pass + else: + # these are of type linkml_runtime.utils.yamlutils.extended_str + # even though GOLD sample identifiers ['identifiers for corresponding sample in GOLD'] looks like a list + # current_row["description"] = current_sd.description[0] + temp = current_sd.description + temp = re.sub(r"^[\['\"]*", "", temp) + temp = re.sub(r"['\]\"]*$", "", temp) + current_row["description"] = temp + # guidance: I have moved slot used in... out of the MIxS comments + # Occurrence is still in there # ~ half of the MixS soil/NMDC biosample fields lack comments for "guidance" # Montana provides her own, to be concatenated on # Damion's latest LinkML -> JS approach lays the comments and examples out nicer @@ -133,12 +168,14 @@ def linkml_to_dh_light(model_file, selected_class, default_section, default_sour pv_row = blank_row.copy() pv_row["label"] = pvk pv_row["parent class"] = current_sd.title + # use term meaning as ontology ID if possible + pv_row["Ontology ID"] = pvs_obj[pvk].meaning pv_list.append(pv_row) # seeing fewer required than I expected # current_row["requirement"] = "" - if current_sd.recommended: + if current_sd.recommended or current_sd.name in rec_from_usage: current_row["requirement"] = "recommended" - elif current_sd.required: + elif current_sd.required or current_sd.name in req_from_usage: current_row["requirement"] = "required" # --- examples example_list = [] @@ -147,7 +184,7 @@ def linkml_to_dh_light(model_file, selected_class, default_section, default_sour if exmpl.value is not None and len(exmpl.value) > 0: example_list.append(exmpl.value) example_cat = "|".join(example_list) - current_row["source"] = example_cat + current_row["examples"] = example_cat current_row["source"] = default_source # for reuse of enums? current_row["capitalize"] = default_capitalize current_row["data status"] = default_data_status diff --git a/poetry.lock b/poetry.lock index 79fab280..dc977bb6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -102,6 +102,14 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "bcp47" +version = "0.0.4" +description = "Language tags made easy" +category = "main" +optional = false +python-versions = "*" + [[package]] name = "black" version = "21.12b0" @@ -151,11 +159,11 @@ python-versions = "*" [[package]] name = "cachetools" -version = "5.0.0" +version = "4.2.4" description = "Extensible memoizing collections and decorators" category = "main" optional = false -python-versions = "~=3.7" +python-versions = "~=3.5" [[package]] name = "certifi" @@ -243,7 +251,7 @@ python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" [[package]] name = "decorator" -version = "5.1.0" +version = "5.1.1" description = "Decorators for Humans" category = "main" optional = false @@ -285,6 +293,14 @@ wrapt = ">=1.10,<2" [package.extras] dev = ["tox", "bump2version (<1)", "sphinx (<2)", "importlib-metadata (<3)", "importlib-resources (<4)", "configparser (<5)", "sphinxcontrib-websupport (<2)", "zipp (<2)", "PyTest (<5)", "PyTest-Cov (<2.6)", "pytest", "pytest-cov"] +[[package]] +name = "docopt" +version = "0.6.2" +description = "Pythonic argument parser, that will make you smile" +category = "main" +optional = false +python-versions = "*" + [[package]] name = "docutils" version = "0.17.1" @@ -336,6 +352,20 @@ category = "main" optional = false python-versions = ">=3.6" +[[package]] +name = "funowl" +version = "0.1.10" +description = "Python rendering of the OWL Functional syntax" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +bcp47 = "*" +pyjsg = ">=0.11.6" +rdflib = ">=5.0.0,<6.0" +rfc3987 = "*" + [[package]] name = "ghp-import" version = "2.0.2" @@ -368,55 +398,57 @@ yaml = ["pyyaml"] [[package]] name = "google-api-core" -version = "1.16.0" +version = "2.3.2" description = "Google API client core library" category = "main" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" +python-versions = ">=3.6" [package.dependencies] -google-auth = ">=0.4.0,<2.0dev" -googleapis-common-protos = ">=1.6.0,<2.0dev" -protobuf = ">=3.4.0" -pytz = "*" +google-auth = ">=1.25.0,<3.0dev" +googleapis-common-protos = ">=1.52.0,<2.0dev" +protobuf = ">=3.12.0" requests = ">=2.18.0,<3.0.0dev" -six = ">=1.10.0" [package.extras] -grpc = ["grpcio (>=1.8.2,<2.0dev)"] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio-status (>=1.33.2,<2.0dev)"] grpcgcp = ["grpcio-gcp (>=0.2.2)"] grpcio-gcp = ["grpcio-gcp (>=0.2.2)"] [[package]] name = "google-api-python-client" -version = "1.8.4" +version = "2.34.0" description = "Google API Client Library for Python" category = "main" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" +python-versions = ">=3.6" [package.dependencies] -google-api-core = ">=1.13.0,<2dev" -google-auth = ">=1.4.1" -google-auth-httplib2 = ">=0.0.3" -httplib2 = ">=0.9.2,<1dev" -six = ">=1.6.1,<2dev" -uritemplate = ">=3.0.0,<4dev" +google-api-core = ">=1.21.0,<3.0.0dev" +google-auth = ">=1.16.0,<3.0.0dev" +google-auth-httplib2 = ">=0.1.0" +httplib2 = ">=0.15.0,<1dev" +uritemplate = ">=3.0.1,<5" [[package]] name = "google-auth" -version = "1.6.3" +version = "2.3.3" description = "Google Authentication Library" category = "main" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" [package.dependencies] -cachetools = ">=2.0.0" +cachetools = ">=2.0.0,<5.0" pyasn1-modules = ">=0.2.1" -rsa = ">=3.1.4" +rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} six = ">=1.9.0" +[package.extras] +aiohttp = ["requests (>=2.20.0,<3.0.0dev)", "aiohttp (>=3.6.2,<4.0.0dev)"] +pyopenssl = ["pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] + [[package]] name = "google-auth-httplib2" version = "0.1.0" @@ -534,6 +566,18 @@ docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] perf = ["ipython"] testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"] +[[package]] +name = "inflect" +version = "5.3.0" +description = "Correctly generate plurals, singular nouns, ordinals, indefinite articles; convert numbers to words" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] +testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pytest-cov", "pytest-enabler", "pygments", "pytest-black (>=0.3.7)", "pytest-mypy"] + [[package]] name = "iniconfig" version = "1.1.1" @@ -887,6 +931,32 @@ linkml = ">=1.0,<2.0" linkml-runtime = ">=1.0.10,<2.0" mkdocs = "*" +[[package]] +name = "linkml-model-enrichment" +version = "0.0.2" +description = "Infer models, enrich with meaning for terms including enum permissible values" +category = "main" +optional = false +python-versions = ">=3.9,<4.0" + +[package.dependencies] +click = ">=7.1.2,<8.0.0" +click-log = ">=0.3.2,<0.4.0" +funowl = ">=0.1.10,<0.2.0" +jsonpatch = ">=1.32,<2.0" +linkml = ">=1.1.13,<2.0.0" +linkml-runtime = ">=1.1.10,<2.0.0" +mkdocs = ">=1.2.3,<2.0.0" +pandas = ">=1.3.5,<2.0.0" +psycopg2-binary = ">=2.9.2,<3.0.0" +pytest = ">=6.2.5,<7.0.0" +python-dateutil = ">=2.8.2,<3.0.0" +PyYAML = ">=5.3.1,<6.0.0" +quantulum3 = ">=0.7.9,<0.8.0" +rdflib = "5.0.0" +requests = ">=2.26.0,<3.0.0" +strsimpy = ">=0.2.1,<0.3.0" + [[package]] name = "linkml-runtime" version = "1.1.15" @@ -914,7 +984,7 @@ shexjsg = ">=0.7.0,<1.0.0" [[package]] name = "linkml-runtime-api" -version = "0.0.4" +version = "0.0.6" description = "LinkML Runtime Environment API" category = "main" optional = false @@ -924,7 +994,7 @@ python-versions = ">=3.7" jinja2 = "*" jsonpatch = "*" jsonpath-ng = "*" -linkml-runtime = "*" +linkml-runtime = ">=1.1.6" "ruamel.yaml" = "*" [[package]] @@ -1193,6 +1263,17 @@ docs = ["sphinx", "nbsphinx", "sphinxcontrib-github-alt", "sphinx-rtd-theme", "m json-logging = ["json-logging"] test = ["pytest", "coverage", "requests", "nbval", "selenium", "pytest-cov", "requests-unixsocket"] +[[package]] +name = "num2words" +version = "0.5.10" +description = "Modules to convert numbers to words. Easily extensible." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +docopt = ">=0.6.2" + [[package]] name = "numpy" version = "1.22.0" @@ -1418,6 +1499,14 @@ category = "main" optional = false python-versions = ">=3.5" +[[package]] +name = "psycopg2-binary" +version = "2.9.3" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" +optional = false +python-versions = ">=3.6" + [[package]] name = "ptyprocess" version = "0.7.0" @@ -1709,22 +1798,36 @@ packaging = "*" [package.extras] test = ["pytest (>=6.0.0,<7.0)", "pytest-cov (>=2.11.0)"] +[[package]] +name = "quantulum3" +version = "0.7.9" +description = "Extract quantities from unstructured text." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +inflect = "*" +num2words = "*" + [[package]] name = "rdflib" -version = "6.1.1" +version = "5.0.0" description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." category = "main" optional = false -python-versions = ">=3.7" +python-versions = "*" [package.dependencies] isodate = "*" pyparsing = "*" +six = "*" [package.extras] -docs = ["sphinx (<5)", "sphinxcontrib-apidoc"] +docs = ["sphinx (<3)", "sphinxcontrib-apidoc"] html = ["html5lib"] -tests = ["berkeleydb", "html5lib", "networkx", "pytest", "pytest-cov", "pytest-subtests"] +sparql = ["requests"] +tests = ["html5lib", "networkx", "nose", "doctest-ignore-unicode"] [[package]] name = "rdflib-jsonld" @@ -1795,6 +1898,14 @@ requests = ">=2.0.0" [package.extras] rsa = ["oauthlib[signedtoken] (>=3.0.0)"] +[[package]] +name = "rfc3987" +version = "1.3.8" +description = "Parsing and validation of URIs (RFC 3986) and IRIs (RFC 3987)" +category = "main" +optional = false +python-versions = "*" + [[package]] name = "rsa" version = "4.8" @@ -2151,15 +2262,15 @@ python-versions = ">=3.6" [[package]] name = "uritemplate" -version = "3.0.1" -description = "URI templates" +version = "4.1.1" +description = "Implementation of RFC 6570 URI Templates" category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.6" [[package]] name = "urllib3" -version = "1.26.7" +version = "1.26.8" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = false @@ -2231,7 +2342,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "b01678f1ec8894af1f6c1044527038e88147e355fa6c41b449b82069aa51f00e" +content-hash = "e825b36177927529cbab86c29e8e0b0f7bc8c23fae8398183400b2011993256f" [metadata.files] alabaster = [ @@ -2292,6 +2403,10 @@ backcall = [ {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, ] +bcp47 = [ + {file = "bcp47-0.0.4-py3-none-any.whl", hash = "sha256:309d3bbaef8d6c9ac59d37ba2167cc6620b4e7467ec8f1e09641b659bb1c0c6d"}, + {file = "bcp47-0.0.4.tar.gz", hash = "sha256:4878d2f3e697ef39ef3891a147280705e4377d5a8d7eb0702129b8d4a3718702"}, +] black = [ {file = "black-21.12b0-py3-none-any.whl", hash = "sha256:a615e69ae185e08fdd73e4715e260e2479c861b5740057fde6e8b4e3b7dd589f"}, {file = "black-21.12b0.tar.gz", hash = "sha256:77b80f693a569e2e527958459634f18df9b0ba2625ba4e0c2d5da5be42e6f2b3"}, @@ -2305,8 +2420,8 @@ boltons = [ {file = "boltons-21.0.0.tar.gz", hash = "sha256:65e70a79a731a7fe6e98592ecfb5ccf2115873d01dbc576079874629e5c90f13"}, ] cachetools = [ - {file = "cachetools-5.0.0-py3-none-any.whl", hash = "sha256:8fecd4203a38af17928be7b90689d8083603073622229ca7077b72d8e5a976e4"}, - {file = "cachetools-5.0.0.tar.gz", hash = "sha256:486471dfa8799eb7ec503a8059e263db000cdda20075ce5e48903087f79d5fd6"}, + {file = "cachetools-4.2.4-py3-none-any.whl", hash = "sha256:92971d3cb7d2a97efff7c7bb1657f21a8f5fb309a37530537c71b1774189f2d1"}, + {file = "cachetools-4.2.4.tar.gz", hash = "sha256:89ea6f1b638d5a73a4f9226be57ac5e4f399d22770b92355f92dcb0f7f001693"}, ] certifi = [ {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, @@ -2411,8 +2526,8 @@ debugpy = [ {file = "debugpy-1.5.1.zip", hash = "sha256:d2b09e91fbd1efa4f4fda121d49af89501beda50c18ed7499712c71a4bf3452e"}, ] decorator = [ - {file = "decorator-5.1.0-py3-none-any.whl", hash = "sha256:7b12e7c3c6ab203a29e157335e9122cb03de9ab7264b137594103fd4a683b374"}, - {file = "decorator-5.1.0.tar.gz", hash = "sha256:e59913af105b9860aa2c8d3272d9de5a56a4e608db9a2f167a8480b323d529a7"}, + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, ] deepdiff = [ {file = "deepdiff-5.7.0-py3-none-any.whl", hash = "sha256:1ffb38c3b5d9174eb2df95850c93aee55ec00e19396925036a2e680f725079e0"}, @@ -2426,6 +2541,9 @@ deprecated = [ {file = "Deprecated-1.2.13-py2.py3-none-any.whl", hash = "sha256:64756e3e14c8c5eea9795d93c524551432a0be75629f8f29e67ab8caf076c76d"}, {file = "Deprecated-1.2.13.tar.gz", hash = "sha256:43ac5335da90c31c24ba028af536a91d41d53f9e6901ddb021bcc572ce44e38d"}, ] +docopt = [ + {file = "docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"}, +] docutils = [ {file = "docutils-0.17.1-py2.py3-none-any.whl", hash = "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61"}, {file = "docutils-0.17.1.tar.gz", hash = "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125"}, @@ -2450,6 +2568,10 @@ frozendict = [ {file = "frozendict-2.1.3-py3-none-any.whl", hash = "sha256:cfa47860932d549947157d68f246898f60b05da392f67219a913b85d8c36b1b2"}, {file = "frozendict-2.1.3.tar.gz", hash = "sha256:7a12336ba271066e1261a70d8ba97d5178392f3b317b31c6686e401423e70670"}, ] +funowl = [ + {file = "funowl-0.1.10-py3-none-any.whl", hash = "sha256:ced04e6a6c6b3c3b2da7b0a3189103c577a32b5d707d1a51a5228bd11bb7f1b3"}, + {file = "funowl-0.1.10.tar.gz", hash = "sha256:b860076676df746fc5a5cfde6a7fc9dd2a3c627cd704be843097f9ea9776c63c"}, +] ghp-import = [ {file = "ghp-import-2.0.2.tar.gz", hash = "sha256:947b3771f11be850c852c64b561c600fdddf794bab363060854c1ee7ad05e071"}, {file = "ghp_import-2.0.2-py3-none-any.whl", hash = "sha256:5f8962b30b20652cdffa9c5a9812f7de6bcb56ec475acac579807719bf242c46"}, @@ -2459,16 +2581,16 @@ glom = [ {file = "glom-20.11.0.tar.gz", hash = "sha256:54051072bccc9cdb3ebbd8af0559195137a61d308f04bff19678e4b61350eb12"}, ] google-api-core = [ - {file = "google-api-core-1.16.0.tar.gz", hash = "sha256:92e962a087f1c4b8d1c5c88ade1c1dfd550047dcffb320c57ef6a534a20403e2"}, - {file = "google_api_core-1.16.0-py2.py3-none-any.whl", hash = "sha256:859f7392676761f2b160c6ee030c3422135ada4458f0948c5690a6a7c8d86294"}, + {file = "google-api-core-2.3.2.tar.gz", hash = "sha256:c8889f45cf58deca522888ae1d39b2a25e93e7d1b019ae8cee6456d5c726a40c"}, + {file = "google_api_core-2.3.2-py2.py3-none-any.whl", hash = "sha256:3c562d393aed7e3d2011fcd1f103b490c411dcf5644b6312ca11a166a6ea8faf"}, ] google-api-python-client = [ - {file = "google-api-python-client-1.8.4.tar.gz", hash = "sha256:bbe212611fdc05364f3d20271cae53971bf4d485056e6c0d40748eddeeda9a19"}, - {file = "google_api_python_client-1.8.4-py3-none-any.whl", hash = "sha256:e7980ba66288f815b41f10c4561b37f45cd568d302b0d801709e51f75b21f61b"}, + {file = "google-api-python-client-2.34.0.tar.gz", hash = "sha256:f4c602d1b49f7de53dca105fa1e021718f73ab8a40957e66226f7b0504e65e7c"}, + {file = "google_api_python_client-2.34.0-py2.py3-none-any.whl", hash = "sha256:31eab065a8d01409af13632006a0bac4eae2ef02f0539092a4dd9575cdd724f0"}, ] google-auth = [ - {file = "google-auth-1.6.3.tar.gz", hash = "sha256:0f7c6a64927d34c1a474da92cfc59e552a5d3b940d3266606c6a28b72888b9e4"}, - {file = "google_auth-1.6.3-py2.py3-none-any.whl", hash = "sha256:20705f6803fd2c4d1cc2dcb0df09d4dfcb9a7d51fd59e94a3a28231fd93119ed"}, + {file = "google-auth-2.3.3.tar.gz", hash = "sha256:d83570a664c10b97a1dc6f8df87e5fdfff012f48f62be131e449c20dfc32630e"}, + {file = "google_auth-2.3.3-py2.py3-none-any.whl", hash = "sha256:a348a50b027679cb7dae98043ac8dbcc1d7951f06d8387496071a1e05a2465c0"}, ] google-auth-httplib2 = [ {file = "google-auth-httplib2-0.1.0.tar.gz", hash = "sha256:a07c39fd632becacd3f07718dfd6021bf396978f03ad3ce4321d060015cc30ac"}, @@ -2558,6 +2680,10 @@ importlib-metadata = [ {file = "importlib_metadata-4.10.0-py3-none-any.whl", hash = "sha256:b7cf7d3fef75f1e4c80a96ca660efbd51473d7e8f39b5ab9210febc7809012a4"}, {file = "importlib_metadata-4.10.0.tar.gz", hash = "sha256:92a8b58ce734b2a4494878e0ecf7d79ccd7a128b5fc6014c401e0b61f006f0f6"}, ] +inflect = [ + {file = "inflect-5.3.0-py3-none-any.whl", hash = "sha256:42560be16af702a21d43d59427f276b5aed79efb1ded9b713468c081f4353d10"}, + {file = "inflect-5.3.0.tar.gz", hash = "sha256:41a23f6788962e9775e40e2ecfb1d6455d02de315022afeedd3c5dc070019d73"}, +] iniconfig = [ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, @@ -2652,13 +2778,17 @@ linkml-model = [ {file = "linkml_model-1.0.6-py3-none-any.whl", hash = "sha256:61686e1dfb63fc060b4a15628c5690910bfa2343aa7c5e86ab9761af6b862f32"}, {file = "linkml_model-1.0.6.tar.gz", hash = "sha256:eafac7eaadfc8140cb0826f170f51dba539eecc15c8e082cc8798d222d0b6fb6"}, ] +linkml-model-enrichment = [ + {file = "linkml-model-enrichment-0.0.2.tar.gz", hash = "sha256:52428879839191a313c706b6eb57c94bb2013295ecd29270de3ce757d57859d6"}, + {file = "linkml_model_enrichment-0.0.2-py3-none-any.whl", hash = "sha256:ae54ae5688bf7df4e7c1efd0c1d8e5e026fcc73ba0fcf8387c1ef6f529783713"}, +] linkml-runtime = [ {file = "linkml_runtime-1.1.15-py3-none-any.whl", hash = "sha256:fdd1307e08bb71c7ba93d340c7355020e2d29188b51f3b8b788c5b3f35a9e999"}, {file = "linkml_runtime-1.1.15.tar.gz", hash = "sha256:2baa757f1925b1cd58fa8f7de167941247597334eaaa988b2f9e064ffc95f01b"}, ] linkml-runtime-api = [ - {file = "linkml_runtime_api-0.0.4-py3-none-any.whl", hash = "sha256:b18541410dabedfe61bb90ad2830e794c72417a4451563a2a02c486783df2ef4"}, - {file = "linkml_runtime_api-0.0.4.tar.gz", hash = "sha256:fb619e0e10a878502051eb316af431bd81849dc1966a926ee09c83b90ba24e59"}, + {file = "linkml_runtime_api-0.0.6-py3-none-any.whl", hash = "sha256:b567a8f678f96fa723f0ba2f225712ab989930f023f5bb84df2ae2ea19db04f5"}, + {file = "linkml_runtime_api-0.0.6.tar.gz", hash = "sha256:298d446cc4e5875d4622f6be794a521f1a67bbb1743902dd36b5917ea12c34d3"}, ] lxml = [ {file = "lxml-4.7.1-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:d546431636edb1d6a608b348dd58cc9841b81f4116745857b6cb9f8dadb2725f"}, @@ -2853,6 +2983,10 @@ notebook = [ {file = "notebook-6.4.6-py3-none-any.whl", hash = "sha256:5cad068fa82cd4fb98d341c052100ed50cd69fbfb4118cb9b8ab5a346ef27551"}, {file = "notebook-6.4.6.tar.gz", hash = "sha256:7bcdf79bd1cda534735bd9830d2cbedab4ee34d8fe1df6e7b946b3aab0902ba3"}, ] +num2words = [ + {file = "num2words-0.5.10-py3-none-any.whl", hash = "sha256:0b6e5f53f11d3005787e206d9c03382f459ef048a43c544e3db3b1e05a961548"}, + {file = "num2words-0.5.10.tar.gz", hash = "sha256:37cd4f60678f7e1045cdc3adf6acf93c8b41bf732da860f97d301f04e611cc57"}, +] numpy = [ {file = "numpy-1.22.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d22662b4b10112c545c91a0741f2436f8ca979ab3d69d03d19322aa970f9695"}, {file = "numpy-1.22.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:11a1f3816ea82eed4178102c56281782690ab5993251fdfd75039aad4d20385f"}, @@ -3001,6 +3135,64 @@ protobuf = [ {file = "protobuf-3.19.1-py2.py3-none-any.whl", hash = "sha256:e813b1c9006b6399308e917ac5d298f345d95bb31f46f02b60cd92970a9afa17"}, {file = "protobuf-3.19.1.tar.gz", hash = "sha256:62a8e4baa9cb9e064eb62d1002eca820857ab2138440cb4b3ea4243830f94ca7"}, ] +psycopg2-binary = [ + {file = "psycopg2-binary-2.9.3.tar.gz", hash = "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e82d38390a03da28c7985b394ec3f56873174e2c88130e6966cb1c946508e65"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57804fc02ca3ce0dbfbef35c4b3a4a774da66d66ea20f4bda601294ad2ea6092"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:083a55275f09a62b8ca4902dd11f4b33075b743cf0d360419e2051a8a5d5ff76"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_ppc64le.whl", hash = "sha256:0a29729145aaaf1ad8bafe663131890e2111f13416b60e460dae0a96af5905c9"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a79d622f5206d695d7824cbf609a4f5b88ea6d6dab5f7c147fc6d333a8787e4"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:090f3348c0ab2cceb6dfbe6bf721ef61262ddf518cd6cc6ecc7d334996d64efa"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a9e1f75f96ea388fbcef36c70640c4efbe4650658f3d6a2967b4cc70e907352e"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c3ae8e75eb7160851e59adc77b3a19a976e50622e44fd4fd47b8b18208189d42"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-win32.whl", hash = "sha256:7b1e9b80afca7b7a386ef087db614faebbf8839b7f4db5eb107d0f1a53225029"}, + {file = "psycopg2_binary-2.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:8b344adbb9a862de0c635f4f0425b7958bf5a4b927c8594e6e8d261775796d53"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:e847774f8ffd5b398a75bc1c18fbb56564cda3d629fe68fd81971fece2d3c67e"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68641a34023d306be959101b345732360fc2ea4938982309b786f7be1b43a4a1"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3303f8807f342641851578ee7ed1f3efc9802d00a6f83c101d21c608cb864460"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_24_aarch64.whl", hash = "sha256:e3699852e22aa68c10de06524a3721ade969abf382da95884e6a10ff798f9281"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_24_ppc64le.whl", hash = "sha256:526ea0378246d9b080148f2d6681229f4b5964543c170dd10bf4faaab6e0d27f"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:b1c8068513f5b158cf7e29c43a77eb34b407db29aca749d3eb9293ee0d3103ca"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:15803fa813ea05bef089fa78835118b5434204f3a17cb9f1e5dbfd0b9deea5af"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:152f09f57417b831418304c7f30d727dc83a12761627bb826951692cc6491e57"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:404224e5fef3b193f892abdbf8961ce20e0b6642886cfe1fe1923f41aaa75c9d"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-win32.whl", hash = "sha256:1f6b813106a3abdf7b03640d36e24669234120c72e91d5cbaeb87c5f7c36c65b"}, + {file = "psycopg2_binary-2.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:2d872e3c9d5d075a2e104540965a1cf898b52274a5923936e5bfddb58c59c7c2"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:10bb90fb4d523a2aa67773d4ff2b833ec00857f5912bafcfd5f5414e45280fb1"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:874a52ecab70af13e899f7847b3e074eeb16ebac5615665db33bce8a1009cf33"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a29b3ca4ec9defec6d42bf5feb36bb5817ba3c0230dd83b4edf4bf02684cd0ae"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:12b11322ea00ad8db8c46f18b7dfc47ae215e4df55b46c67a94b4effbaec7094"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_24_ppc64le.whl", hash = "sha256:53293533fcbb94c202b7c800a12c873cfe24599656b341f56e71dd2b557be063"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c381bda330ddf2fccbafab789d83ebc6c53db126e4383e73794c74eedce855ef"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9d29409b625a143649d03d0fd7b57e4b92e0ecad9726ba682244b73be91d2fdb"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:183a517a3a63503f70f808b58bfbf962f23d73b6dccddae5aa56152ef2bcb232"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:15c4e4cfa45f5a60599d9cec5f46cd7b1b29d86a6390ec23e8eebaae84e64554"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-win32.whl", hash = "sha256:adf20d9a67e0b6393eac162eb81fb10bc9130a80540f4df7e7355c2dd4af9fba"}, + {file = "psycopg2_binary-2.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f9ffd643bc7349eeb664eba8864d9e01f057880f510e4681ba40a6532f93c71"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:def68d7c21984b0f8218e8a15d514f714d96904265164f75f8d3a70f9c295667"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dffc08ca91c9ac09008870c9eb77b00a46b3378719584059c034b8945e26b272"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:280b0bb5cbfe8039205c7981cceb006156a675362a00fe29b16fbc264e242834"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:af9813db73395fb1fc211bac696faea4ca9ef53f32dc0cfa27e4e7cf766dcf24"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_ppc64le.whl", hash = "sha256:63638d875be8c2784cfc952c9ac34e2b50e43f9f0a0660b65e2a87d656b3116c"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ffb7a888a047696e7f8240d649b43fb3644f14f0ee229077e7f6b9f9081635bd"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0c9d5450c566c80c396b7402895c4369a410cab5a82707b11aee1e624da7d004"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:d1c1b569ecafe3a69380a94e6ae09a4789bbb23666f3d3a08d06bbd2451f5ef1"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8fc53f9af09426a61db9ba357865c77f26076d48669f2e1bb24d85a22fb52307"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-win32.whl", hash = "sha256:6472a178e291b59e7f16ab49ec8b4f3bdada0a879c68d3817ff0963e722a82ce"}, + {file = "psycopg2_binary-2.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:35168209c9d51b145e459e05c31a9eaeffa9a6b0fd61689b48e07464ffd1a83e"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:47133f3f872faf28c1e87d4357220e809dfd3fa7c64295a4a148bcd1e6e34ec9"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91920527dea30175cc02a1099f331aa8c1ba39bf8b7762b7b56cbf54bc5cce42"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887dd9aac71765ac0d0bac1d0d4b4f2c99d5f5c1382d8b770404f0f3d0ce8a39"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:1f14c8b0942714eb3c74e1e71700cbbcb415acbc311c730370e70c578a44a25c"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_ppc64le.whl", hash = "sha256:7af0dd86ddb2f8af5da57a976d27cd2cd15510518d582b478fbb2292428710b4"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:93cd1967a18aa0edd4b95b1dfd554cf15af657cb606280996d393dadc88c3c35"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bda845b664bb6c91446ca9609fc69f7db6c334ec5e4adc87571c34e4f47b7ddb"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:01310cf4cf26db9aea5158c217caa92d291f0500051a6469ac52166e1a16f5b7"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:99485cab9ba0fa9b84f1f9e1fef106f44a46ef6afdeec8885e0b88d0772b49e8"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-win32.whl", hash = "sha256:46f0e0a6b5fa5851bbd9ab1bc805eef362d3a230fbdfbc209f4a236d0a7a990d"}, + {file = "psycopg2_binary-2.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:accfe7e982411da3178ec690baaceaad3c278652998b2c45828aaac66cd8285f"}, +] ptyprocess = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, @@ -3256,9 +3448,13 @@ qtpy = [ {file = "QtPy-2.0.0-py3-none-any.whl", hash = "sha256:74bf26be3288aadc843cf3381d5ef0b82f11417ecdcbf26718a408f32590f1ac"}, {file = "QtPy-2.0.0.tar.gz", hash = "sha256:777e333df4d711b2ec9743117ab319dadfbd743a5a0eee35923855ca3d35cd9d"}, ] +quantulum3 = [ + {file = "quantulum3-0.7.9-py3-none-any.whl", hash = "sha256:be2c3f6a79a9c01e440e0ee9390c15aba010a5cc01723e5aea48702bcf7e7dad"}, + {file = "quantulum3-0.7.9.tar.gz", hash = "sha256:8ac9664734133d7c3ff2796d69f64440e4ab424ced98f791b34a86e86d87b352"}, +] rdflib = [ - {file = "rdflib-6.1.1-py3-none-any.whl", hash = "sha256:fc81cef513cd552d471f2926141396b633207109d0154c8e77926222c70367fe"}, - {file = "rdflib-6.1.1.tar.gz", hash = "sha256:8dbfa0af2990b98471dacbc936d6494c997ede92fd8ed693fb84ee700ef6f754"}, + {file = "rdflib-5.0.0-py3-none-any.whl", hash = "sha256:88208ea971a87886d60ae2b1a4b2cdc263527af0454c422118d43fe64b357877"}, + {file = "rdflib-5.0.0.tar.gz", hash = "sha256:78149dd49d385efec3b3adfbd61c87afaf1281c30d3fcaf1b323b34f603fb155"}, ] rdflib-jsonld = [ {file = "rdflib-jsonld-0.6.1.tar.gz", hash = "sha256:eda5a42a2e09f80d4da78e32b5c684bccdf275368f1541e6b7bcddfb1382a0e0"}, @@ -3281,6 +3477,10 @@ requests-oauthlib = [ {file = "requests_oauthlib-1.3.0-py2.py3-none-any.whl", hash = "sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d"}, {file = "requests_oauthlib-1.3.0-py3.7.egg", hash = "sha256:fa6c47b933f01060936d87ae9327fead68768b69c6c9ea2109c48be30f2d4dbc"}, ] +rfc3987 = [ + {file = "rfc3987-1.3.8-py2.py3-none-any.whl", hash = "sha256:10702b1e51e5658843460b189b185c0366d2cf4cff716f13111b0ea9fd2dce53"}, + {file = "rfc3987-1.3.8.tar.gz", hash = "sha256:d3c4d257a560d544e9826b38bc81db676890c79ab9d7ac92b39c7a253d5ca733"}, +] rsa = [ {file = "rsa-4.8-py3-none-any.whl", hash = "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb"}, {file = "rsa-4.8.tar.gz", hash = "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17"}, @@ -3489,12 +3689,12 @@ typing-extensions = [ {file = "typing_extensions-4.0.1.tar.gz", hash = "sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e"}, ] uritemplate = [ - {file = "uritemplate-3.0.1-py2.py3-none-any.whl", hash = "sha256:07620c3f3f8eed1f12600845892b0e036a2420acf513c53f7de0abd911a5894f"}, - {file = "uritemplate-3.0.1.tar.gz", hash = "sha256:5af8ad10cec94f215e3f48112de2022e1d5a37ed427fbd88652fa908f2ab7cae"}, + {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, + {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, ] urllib3 = [ - {file = "urllib3-1.26.7-py2.py3-none-any.whl", hash = "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"}, - {file = "urllib3-1.26.7.tar.gz", hash = "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece"}, + {file = "urllib3-1.26.8-py2.py3-none-any.whl", hash = "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed"}, + {file = "urllib3-1.26.8.tar.gz", hash = "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"}, ] watchdog = [ {file = "watchdog-2.1.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9693f35162dc6208d10b10ddf0458cc09ad70c30ba689d9206e02cd836ce28a3"}, diff --git a/pyproject.toml b/pyproject.toml index a85416b9..c3447cf5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ strsimpy = "*" pytest = "*" dpath = "^2.0.5" pandasql = "^0.7.3" +linkml-model-enrichment = "^0.0.2" [tool.poetry.dev-dependencies] diff --git a/use_modular_gd.py b/use_modular_gd.py index caa0a528..0adc0463 100644 --- a/use_modular_gd.py +++ b/use_modular_gd.py @@ -1,6 +1,7 @@ import linkml.generators.yamlgen as yg import linkml_round_trips.modular_gd as mgd from linkml_runtime.linkml_model import Prefix +from linkml_runtime.dumpers import yaml_dumper sheet_id = '1pSmxX6XGOxmoA7S7rKyj5OaEl3PmAl4jAOlROuNHrU0' client_secret_json = "local/client_secret.apps.googleusercontent.com.json" @@ -16,7 +17,14 @@ new_schema = mgd.construct_schema(constructed_schema_name, constructed_schema_id, constructed_class_name, additional_prefixes) -tasks = {"nmdc": {"yaml": "nmdc-schema/src/schema/nmdc.yaml", "title": "nmdc_biosample_slots", +# nmdc-schema/src/schema/nmdc.yaml +# mixs-source/model/schema/mixs.yaml +# target/nmdc_generated.yaml +# target/mixs_generated.yaml +# target/mixs_generated_no_imports.yaml +# target/nmdc_generated_no_imports.yaml + +tasks = {"nmdc": {"yaml": "target/nmdc_generated_no_imports.yaml", "title": "nmdc_biosample_slots", "focus_class": "biosample", "query": """ SELECT @@ -26,7 +34,7 @@ where from_schema != 'https://microbiomedata/schema/mixs' and disposition != 'skip'; -"""}, "mixs": {"yaml": "mixs-source/model/schema/mixs.yaml", "title": "mixs_packages_x_slots", "focus_class": "soil", +"""}, "mixs": {"yaml": "target/mixs_generated_no_imports.yaml", "title": "mixs_packages_x_slots", "focus_class": "soil", "query": """ SELECT slot as slot @@ -37,7 +45,7 @@ and ( disposition = 'use as-is' or disposition = 'borrowed as-is' ) -"""}} +"""}, } for title, task in tasks.items(): pysqldf_slot_list = mgd.subset_slots_from_sheet(client_secret_json, sheet_id, task['title'], task['query']) @@ -45,16 +53,18 @@ new_schema = mgd.wrapper(task['yaml'], title, task['focus_class'], pysqldf_slot_list, new_schema, constructed_class_name) -generated = yg.YAMLGenerator(new_schema) +# generated = yg.YAMLGenerator(new_schema) # 1 WARNING:Namespaces:MIXS namespace is already mapped to https://w3id.org/gensc/ - Mapping to https://w3id.org/mixs/terms/ ignored # 276 WARNING:YAMLGenerator:File "" Prefix case mismatch - supplied: MIXS expected: mixs # 1 WARNING:YAMLGenerator:Overlapping subset and class names: soil -serialized = generated.serialize() +# serialized = generated.serialize() # # todo use the "with" wrapper (if we want to write to a files instead of STDOUT) # file = open("use_modular_gd.yaml", "w") # yaml.safe_dump(serialized, file) -print(serialized) +dumped = yaml_dumper.dumps(new_schema) + +print(dumped)