Skip to content

Commit

Permalink
Merge pull request #1696 from microbiomedata/1695-move-yq-commands-fr…
Browse files Browse the repository at this point in the history
…om-localmixs_regenmixs_subset_modifiedyaml-target-into-a-separate-file

move lists of yq commands from project.Makefile target into a separate files
  • Loading branch information
turbomam authored Jan 22, 2024
2 parents d30dfed + be758bb commit 05aea1c
Show file tree
Hide file tree
Showing 11 changed files with 8,550 additions and 6,313 deletions.
154 changes: 154 additions & 0 deletions assets/yq-for-mixs_subset_modified.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
'.slots.agrochem_addition.range |= "TextValue"'
'.slots.air_temp_regm.range |= "TextValue"'
'.slots.antibiotic_regm.range |= "TextValue"'
'.slots.aromatics_pc.range |= "TextValue"'
'.slots.asphaltenes_pc.range |= "TextValue"'
'.slots.atmospheric_data.range |= "TextValue"'
'.slots.avg_occup.range |= "TextValue"'
'.slots.bathroom_count.range |= "TextValue"'
'.slots.bedroom_count.range |= "TextValue"'
'.slots.biocide_admin_method.range |= "TextValue"'
'.slots.biomass.range |= "TextValue"'
'.slots.chem_administration.range |= "ControlledTermValue"'
'.slots.chem_mutagen.range |= "TextValue"'
'.slots.chem_treat_method.range |= "string"'
'.slots.collection_date.range |= "TimestampValue"'
'.slots.cool_syst_id.range |= "TextValue"'
'.slots.date_last_rain.range |= "TimestampValue"'
'.slots.diether_lipids.range |= "TextValue"'
'.slots.elevator.range |= "TextValue"'
'.slots.emulsions.range |= "TextValue"'
'.slots.env_broad_scale.range |= "ControlledIdentifiedTermValue"'
'.slots.env_local_scale.range |= "ControlledIdentifiedTermValue"'
'.slots.env_medium.range |= "ControlledIdentifiedTermValue"'
'.slots.escalator.range |= "TextValue"'
'.slots.exp_pipe.range |= "QuantityValue"'
'.slots.experimental_factor.range |= "ControlledTermValue"'
'.slots.ext_door.range |= "TextValue"'
'.slots.extreme_event.range |= "TimestampValue"'
'.slots.fertilizer_regm.range |= "TextValue"'
'.slots.fire.range |= "TimestampValue"'
'.slots.flooding.range |= "TimestampValue"'
'.slots.floor_count.range |= "TextValue"'
'.slots.freq_clean.range |= "QuantityValue"'
'.slots.freq_cook.range |= "QuantityValue"'
'.slots.fungicide_regm.range |= "TextValue"'
'.slots.gaseous_environment.range |= "TextValue"'
'.slots.gaseous_substances.range |= "TextValue"'
'.slots.gravity.range |= "TextValue"'
'.slots.growth_facil.range |= "ControlledTermValue"'
'.slots.growth_hormone_regm.range |= "TextValue"'
'.slots.hall_count.range |= "TextValue"'
'.slots.hall_count.range |= "TextValue"'
'.slots.hcr_pressure.range |= "TextValue"'
'.slots.hcr_temp.range |= "TextValue"'
'.slots.heat_sys_deliv_meth.range |= "string"'
'.slots.heat_system_id.range |= "TextValue"'
'.slots.heavy_metals.range |= "TextValue"'
'.slots.herbicide_regm.range |= "TextValue"'
'.slots.host_body_product.range |= "ControlledTermValue"'
'.slots.host_body_site.range |= "ControlledTermValue"'
'.slots.host_family_relation.range |= "string"'
'.slots.host_phenotype.range |= "ControlledTermValue"'
'.slots.host_subspecf_genlin.range |= "string"'
'.slots.host_symbiont.range |= "string"'
'.slots.humidity_regm.range |= "TextValue"'
'.slots.inorg_particles.range |= "TextValue"'
'.slots.iw_bt_date_well.range |= "TimestampValue"'
'.slots.last_clean.range |= "TimestampValue"'
'.slots.lat_lon.range |= "GeolocationValue"'
'.slots.light_regm.range |= "TextValue"'
'.slots.max_occup.range |= "QuantityValue"'
'.slots.micro_biomass_meth.range |= "string"'
'.slots.mineral_nutr_regm.range |= "TextValue"'
'.slots.misc_param.range |= "TextValue"'
'.slots.n_alkanes.range |= "TextValue"'
'.slots.non_min_nutr_regm.range |= "string"'
'.slots.number_pets.range |= "QuantityValue"'
'.slots.number_plants.range |= "QuantityValue"'
'.slots.number_resident.range |= "QuantityValue"'
'.slots.occup_density_samp.range |= "QuantityValue"'
'.slots.occup_samp.range |= "QuantityValue"'
'.slots.org_count_qpcr_info.range |= "string"'
'.slots.org_particles.range |= "TextValue"'
'.slots.organism_count.range |= "QuantityValue"'
'.slots.particle_class.range |= "TextValue"'
'.slots.permeability.range |= "TextValue"'
'.slots.pesticide_regm.range |= "TextValue"'
'.slots.phaeopigments.range |= "TextValue"'
'.slots.phosplipid_fatt_acid.range |= "TextValue"'
'.slots.plant_growth_med.range |= "ControlledTermValue"'
'.slots.plant_struc.range |= "ControlledTermValue"'
'.slots.pollutants.range |= "TextValue"'
'.slots.porosity.range |= "TextValue"'
'.slots.pres_animal_insect.range |= "string"'
'.slots.prev_land_use_meth.range |= "string"'
'.slots.prod_start_date.range |= "TimestampValue"'
'.slots.radiation_regm.range |= "TextValue"'
'.slots.rainfall_regm.range |= "TextValue"'
'.slots.resins_pc.range |= "TextValue"'
'.slots.room_architec_elem.range |= "string"'
'.slots.room_count.range |= "TextValue"'
'.slots.room_dim.range |= "TextValue"'
'.slots.room_door_dist.range |= "TextValue"'
'.slots.room_net_area.range |= "TextValue"'
'.slots.room_occup.range |= "QuantityValue"'
'.slots.room_vol.range |= "TextValue"'
'.slots.root_med_carbon.range |= "TextValue"'
'.slots.root_med_macronutr.range |= "TextValue"'
'.slots.root_med_micronutr.range |= "TextValue"'
'.slots.root_med_ph.range |= "QuantityValue"'
'.slots.root_med_regl.range |= "TextValue"'
'.slots.root_med_suppl.range |= "TextValue"'
'.slots.salt_regm.range |= "TextValue"'
'.slots.samp_collec_device.range |= "string"'
'.slots.samp_collec_method.range |= "string"'
'.slots.samp_loc_corr_rate.range |= "TextValue"'
'.slots.samp_mat_process.range |= "ControlledTermValue"'
'.slots.samp_md.range |= "QuantityValue"'
'.slots.samp_name.range |= "string"'
'.slots.samp_preserv.range |= "TextValue"'
'.slots.samp_room_id.range |= "TextValue"'
'.slots.samp_time_out.range |= "TextValue"'
'.slots.samp_transport_cond.range |= "TextValue"'
'.slots.samp_tvdss.range |= "TextValue"'
'.slots.saturates_pc.range |= "TextValue"'
'.slots.shad_dev_water_mold.range |= "string"'
'.slots.sieving.range |= "TextValue"'
'.slots.size_frac.range |= "TextValue"'
'.slots.soil_texture_meth.range |= "string"'
'.slots.soluble_inorg_mat.range |= "TextValue"'
'.slots.soluble_org_mat.range |= "TextValue"'
'.slots.suspend_solids.range |= "TextValue"'
'.slots.tot_nitro_cont_meth.range |= "string"'
'.slots.viscosity.range |= "TextValue"'
'.slots.volatile_org_comp.range |= "TextValue"'
'.slots.water_cont_soil_meth.range |= "string"'
'.slots.water_temp_regm.range |= "TextValue"'
'.slots.watering_regm.range |= "TextValue"'
'.slots.window_open_freq.range |= "TextValue"'
'.slots.window_size.range |= "TextValue"'

'del(.classes)'
'del(.enums.[].name)'
'del(.enums.[].permissible_values.[].text)'
'del(.slots.[].name)'
'del(.slots.add_recov_method.pattern)'
'del(.subsets.[].name)'

'.id |= "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/src/schema/mixs.yaml"'

# update host_taxid and samp_taxon_id. may want to flatten to a string or URIORCURIE eventually
'del(.slots.host_taxid.examples)'
'del(.slots.host_taxid.string_serialization)'
'del(.slots.samp_taxon_id.examples)'
'del(.slots.samp_taxon_id.string_serialization)'

'.slots.host_taxid.comments |= ["Homo sapiens [NCBITaxon:9606] would be a reasonable has_raw_value"]'
'.slots.host_taxid.range = "ControlledIdentifiedTermValue"'
'.slots.samp_taxon_id.comments |= ["coal metagenome [NCBITaxon:1260732] would be a reasonable has_raw_value"]'
'.slots.samp_taxon_id.range = "ControlledIdentifiedTermValue"'


# add "M horizon" to soil_horizon_enum
'.enums.soil_horizon_enum.permissible_values.["M horizon"] = {}'
38 changes: 38 additions & 0 deletions assets/yq-for-nmdc_schema_accepting_legacy_ids.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# probably should have made a list of classes and then looped over a parameterized version of this
# could also assert that the range is string
'(.classes[] | select(.name == "Biosample") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "Biosample") | .slot_usage.part_of.pattern) = ".*"'
'(.classes[] | select(.name == "Biosample") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "DataObject") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "DataObject") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "MagsAnalysisActivity") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "MagsAnalysisActivity") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "MetabolomicsAnalysisActivity") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "MetabolomicsAnalysisActivity") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "MetagenomeAnnotationActivity") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "MetagenomeAnnotationActivity") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "MetagenomeAssembly") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "MetagenomeAssembly") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "MetagenomeSequencingActivity") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "MetagenomeSequencingActivity") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "MetaproteomicsAnalysisActivity") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "MetaproteomicsAnalysisActivity") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "MetatranscriptomeActivity") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "MetatranscriptomeActivity") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "MetatranscriptomeAnnotationActivity") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "MetatranscriptomeAnnotationActivity") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "MetatranscriptomeAssembly") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "MetatranscriptomeAssembly") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "NomAnalysisActivity") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "NomAnalysisActivity") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "OmicsProcessing") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "OmicsProcessing") | .slot_usage.part_of.pattern) = ".*"'
'(.classes[] | select(.name == "OmicsProcessing") | .slot_usage.has_input.pattern) = ".*"'
'(.classes[] | select(.name == "OmicsProcessing") | .slot_usage.has_output.pattern) = ".*"'
'(.classes[] | select(.name == "OmicsProcessing") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "ReadBasedTaxonomyAnalysisActivity") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "ReadBasedTaxonomyAnalysisActivity") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "ReadQcAnalysisActivity") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "ReadQcAnalysisActivity") | .slot_usage.id.structured_pattern.syntax) = ".*"'
'(.classes[] | select(.name == "Study") | .slot_usage.id.pattern) = ".*"'
'(.classes[] | select(.name == "Study") | .slot_usage.id.structured_pattern.syntax) = ".*"'
Loading

0 comments on commit 05aea1c

Please sign in to comment.