Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorporate ACDD and IOOS standards #35

Merged
merged 10 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 53 additions & 6 deletions .vscode/schema/dataset-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,20 @@
"format": "uri",
"type": "string"
},
"conventions": {
"Conventions": {
"title": "Conventions",
"description": "The data conventions the dataset follows.",
"description": "A comma-separated list of the conventions that are followed by the dataset.",
"default": "CF-1.6",
"type": "string"
},
"featureType": {
"title": "Feature Type",
"description": "CF attribute for identifying the featureType.",
"default": "timeSeries",
"type": "string"
},
"doi": {
"title": "Doi",
"title": "DOI",
"description": "The DOI that has been registered for this dataset, if applicable.",
"type": "string"
},
Expand All @@ -91,7 +98,7 @@
"dataset_name": {
"title": "Dataset Name",
"description": "A string used to identify the data being produced. Ideally resembles a shortened lowercase version of the title. Only lowercase alphanumeric characters and '_' are allowed.",
"minLength": 3,
"minLength": 2,
"pattern": "^[a-z0-9_]+$",
"type": "string"
},
Expand All @@ -104,14 +111,14 @@
},
"temporal": {
"title": "Temporal",
"description": "An optional string which describes the temporal resolution of the data (if it spaced in regular intervals). This string should be formated as a number followed by a unit of measurement, e.g., '10m' would indicate the data is sampled every ten minutes. Only lowercase alphanumeric characters are allowed.",
"description": "An optional string which describes the temporal resolution of the data (if spaced in regular intervals). This string should be formatted as a number followed by a unit of measurement, e.g., '10m' would indicate the data is sampled every ten minutes. Only lowercase alphanumeric characters are allowed.",
"minLength": 2,
"pattern": "^[0-9]+[a-zA-Z]+$",
"type": "string"
},
"data_level": {
"title": "Data Level",
"description": "A string used to indicate the level of processing of the output data. It should be formated as a letter followed by a number. Typical values for this include: a1 - data is ingested (no qc), b1 - data is ingested and quality checks applied, c1 (or higher) - one or more a* or b* datastreams used to create a higher-level data product. Only lowercase alphanumeric characters are allowed.",
"description": "A string used to indicate the level of processing of the output data. It should be formatted as a letter followed by a number. Typical values for this include: a1 - data is ingested (no qc), b1 - data is ingested and quality checks applied, c1 (or higher) - one or more a* or b* datastreams used to create a higher-level data product. Only lowercase alphanumeric characters are allowed.",
"maxLength": 3,
"minLength": 2,
"pattern": "^[a-z0-9]+$",
Expand Down Expand Up @@ -163,6 +170,46 @@
"description": "A string exactly matching a value in the CF Standard Name table which is used to provide a standardized way of identifying variables and measurements across heterogeneous datasets and domains. If a suitable match does not exist, then this attribute should be omitted. The full list of CF Standard Names is at: https://cfconventions.org/Data/cf-standard-names.",
"type": "string"
},
"coverage_content_type": {
"title": "Coverage Content Type",
"description": "An ISO 19115-1 code to indicate the source of the data (image, thematicClassification, physicalMeasurement, auxiliaryInformation, qualityInformation, referenceInformation, modelResult, or coordinate).",
"type": "string"
},
"cf_role": {
"title": "CF Role",
"description": "Allowed values are defined in Chapter 9.5 CF guidelines and consist of: timeseries_id, profile_id, and trajectory_id, depending on the featureType represented in the dataset, as specified by the featureType global attribute.",
"type": "string"
},
"accuracy": {
"title": "Accuracy",
"description": "The sensor accuracy is the closeness of the measurements to the variable's true value. It should be given in the same units as the measured variable. If the instrument has been calibrated multiple times with different results, the most recent accuracy should be provided here (see instrument_variable:calibration_date).",
"type": "number"
},
"precision": {
"title": "Precision",
"description": "The sensor precision is the closeness of the measurements to each other. It should be given in the same units as the measured variable. If the instrument has been calibrated multiple times with different results, the most recent precision should be provided here (see instrument_variable:calibration_date).",
"type": "number"
},
"resolution": {
"title": "Resolution",
"description": "The sensor resolution is the smallest change it can represent in the quantity that it is measuring. It should be given in the same units as the measured variable.",
"type": "number"
},
"instrument": {
"title": "Instrument",
"description": "Variable attribute to be specified on each geophysical variable to identify the instrument that collected the data. The value of the attribute should be set to another variable which contains the details of the instrument. There can be multiple instruments involved depending on if all the instances of the featureType in the collection come from the same instrument or not. If multiple instruments are involved, a variable should be defined for each instrument and referenced from the geophysical variable in a comma separated string.",
"type": "string"
},
"make_model": {
"title": "Make and Model",
"description": "The make and model of the instrument.",
"type": "string"
},
"calibration_date": {
"title": "Calibration Date",
"description": "The date the instrument was last calibrated. Value should be specified using ISO-8601 compatible strings.",
"type": "string"
},
"comment": {
"title": "Comment",
"description": "A user-friendly description of what the variable represents, how it was measured or derived, or any other relevant information that increases the ability of users to understand and use this data. This field plays a considerable role in creating self-documenting data, so we highly recommend including this field, especially for any variables which are particularly important for your dataset. Additionally, if the units for an attribute are unknown, then this field must include the phrase: 'Unknown units.' so that users know there is some uncertainty around this property. Variables that are unitless (e.g., categorical data or ratios), should set the 'units' to '1'.",
Expand Down
61 changes: 54 additions & 7 deletions .vscode/schema/pipeline-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@
"properties": {
"name": {
"title": "Name",
"description": "The exact name or list of names of the variable in the raw datasetreturned by the DataReader.",
"description": "The exact name or list of names of the variable in the raw dataset returned by the DataReader.",
"anyOf": [
{
"type": "string"
Expand Down Expand Up @@ -290,13 +290,20 @@
"format": "uri",
"type": "string"
},
"conventions": {
"Conventions": {
"title": "Conventions",
"description": "The data conventions the dataset follows.",
"description": "A comma-separated list of the conventions that are followed by the dataset.",
"default": "CF-1.6",
"type": "string"
},
"featureType": {
"title": "Feature Type",
"description": "CF attribute for identifying the featureType.",
"default": "timeSeries",
"type": "string"
},
"doi": {
"title": "Doi",
"title": "DOI",
"description": "The DOI that has been registered for this dataset, if applicable.",
"type": "string"
},
Expand All @@ -320,7 +327,7 @@
"dataset_name": {
"title": "Dataset Name",
"description": "A string used to identify the data being produced. Ideally resembles a shortened lowercase version of the title. Only lowercase alphanumeric characters and '_' are allowed.",
"minLength": 3,
"minLength": 2,
"pattern": "^[a-z0-9_]+$",
"type": "string"
},
Expand All @@ -333,14 +340,14 @@
},
"temporal": {
"title": "Temporal",
"description": "An optional string which describes the temporal resolution of the data (if it spaced in regular intervals). This string should be formated as a number followed by a unit of measurement, e.g., '10m' would indicate the data is sampled every ten minutes. Only lowercase alphanumeric characters are allowed.",
"description": "An optional string which describes the temporal resolution of the data (if spaced in regular intervals). This string should be formatted as a number followed by a unit of measurement, e.g., '10m' would indicate the data is sampled every ten minutes. Only lowercase alphanumeric characters are allowed.",
"minLength": 2,
"pattern": "^[0-9]+[a-zA-Z]+$",
"type": "string"
},
"data_level": {
"title": "Data Level",
"description": "A string used to indicate the level of processing of the output data. It should be formated as a letter followed by a number. Typical values for this include: a1 - data is ingested (no qc), b1 - data is ingested and quality checks applied, c1 (or higher) - one or more a* or b* datastreams used to create a higher-level data product. Only lowercase alphanumeric characters are allowed.",
"description": "A string used to indicate the level of processing of the output data. It should be formatted as a letter followed by a number. Typical values for this include: a1 - data is ingested (no qc), b1 - data is ingested and quality checks applied, c1 (or higher) - one or more a* or b* datastreams used to create a higher-level data product. Only lowercase alphanumeric characters are allowed.",
"maxLength": 3,
"minLength": 2,
"pattern": "^[a-z0-9]+$",
Expand Down Expand Up @@ -392,6 +399,46 @@
"description": "A string exactly matching a value in the CF Standard Name table which is used to provide a standardized way of identifying variables and measurements across heterogeneous datasets and domains. If a suitable match does not exist, then this attribute should be omitted. The full list of CF Standard Names is at: https://cfconventions.org/Data/cf-standard-names.",
"type": "string"
},
"coverage_content_type": {
"title": "Coverage Content Type",
"description": "An ISO 19115-1 code to indicate the source of the data (image, thematicClassification, physicalMeasurement, auxiliaryInformation, qualityInformation, referenceInformation, modelResult, or coordinate).",
"type": "string"
},
"cf_role": {
"title": "CF Role",
"description": "Allowed values are defined in Chapter 9.5 CF guidelines and consist of: timeseries_id, profile_id, and trajectory_id, depending on the featureType represented in the dataset, as specified by the featureType global attribute.",
"type": "string"
},
"accuracy": {
"title": "Accuracy",
"description": "The sensor accuracy is the closeness of the measurements to the variable's true value. It should be given in the same units as the measured variable. If the instrument has been calibrated multiple times with different results, the most recent accuracy should be provided here (see instrument_variable:calibration_date).",
"type": "number"
},
"precision": {
"title": "Precision",
"description": "The sensor precision is the closeness of the measurements to each other. It should be given in the same units as the measured variable. If the instrument has been calibrated multiple times with different results, the most recent precision should be provided here (see instrument_variable:calibration_date).",
"type": "number"
},
"resolution": {
"title": "Resolution",
"description": "The sensor resolution is the smallest change it can represent in the quantity that it is measuring. It should be given in the same units as the measured variable.",
"type": "number"
},
"instrument": {
"title": "Instrument",
"description": "Variable attribute to be specified on each geophysical variable to identify the instrument that collected the data. The value of the attribute should be set to another variable which contains the details of the instrument. There can be multiple instruments involved depending on if all the instances of the featureType in the collection come from the same instrument or not. If multiple instruments are involved, a variable should be defined for each instrument and referenced from the geophysical variable in a comma separated string.",
"type": "string"
},
"make_model": {
"title": "Make and Model",
"description": "The make and model of the instrument.",
"type": "string"
},
"calibration_date": {
"title": "Calibration Date",
"description": "The date the instrument was last calibrated. Value should be specified using ISO-8601 compatible strings.",
"type": "string"
},
"comment": {
"title": "Comment",
"description": "A user-friendly description of what the variable represents, how it was measured or derived, or any other relevant information that increases the ability of users to understand and use this data. This field plays a considerable role in creating self-documenting data, so we highly recommend including this field, especially for any variables which are particularly important for your dataset. Additionally, if the units for an attribute are unknown, then this field must include the phrase: 'Unknown units.' so that users know there is some uncertainty around this property. Variables that are unitless (e.g., categorical data or ratios), should set the 'units' to '1'.",
Expand Down
Loading
Loading