tsdat · jmcvey3 · Oct 11, 2023 · Oct 5, 2023 · Oct 5, 2023 · Oct 9, 2023
diff --git a/.vscode/schema/dataset-schema.json b/.vscode/schema/dataset-schema.json
@@ -61,13 +61,20 @@
                     "format": "uri",
                     "type": "string"
                 },
-                "conventions": {
+                "Conventions": {
                     "title": "Conventions",
-                    "description": "The data conventions the dataset follows.",
+                    "description": "A comma-separated list of the conventions that are followed by the dataset.",
+                    "default": "CF-1.6",
+                    "type": "string"
+                },
+                "featureType": {
+                    "title": "Feature Type",
+                    "description": "CF attribute for identifying the featureType.",
+                    "default": "timeSeries",
                     "type": "string"
                 },
                 "doi": {
-                    "title": "Doi",
+                    "title": "DOI",
                     "description": "The DOI that has been registered for this dataset, if applicable.",
                     "type": "string"
                 },
@@ -91,7 +98,7 @@
                 "dataset_name": {
                     "title": "Dataset Name",
                     "description": "A string used to identify the data being produced. Ideally resembles a shortened lowercase version of the title. Only lowercase alphanumeric characters and '_' are allowed.",
-                    "minLength": 3,
+                    "minLength": 2,
                     "pattern": "^[a-z0-9_]+$",
                     "type": "string"
                 },
@@ -104,14 +111,14 @@
                 },
                 "temporal": {
                     "title": "Temporal",
-                    "description": "An optional string which describes the temporal resolution of the data (if it spaced in regular intervals). This string should be formated as a number followed by a unit of measurement, e.g., '10m' would indicate the data is sampled every ten minutes. Only lowercase alphanumeric characters are allowed.",
+                    "description": "An optional string which describes the temporal resolution of the data (if spaced in regular intervals). This string should be formatted as a number followed by a unit of measurement, e.g., '10m' would indicate the data is sampled every ten minutes. Only lowercase alphanumeric characters are allowed.",
                     "minLength": 2,
                     "pattern": "^[0-9]+[a-zA-Z]+$",
                     "type": "string"
                 },
                 "data_level": {
                     "title": "Data Level",
-                    "description": "A string used to indicate the level of processing of the output data. It should be formated as a letter followed by a number. Typical values for this include: a1 - data is ingested (no qc), b1 - data is ingested and quality checks applied, c1 (or higher) - one or more a* or b* datastreams used to create a higher-level data product. Only lowercase alphanumeric characters are allowed.",
+                    "description": "A string used to indicate the level of processing of the output data. It should be formatted as a letter followed by a number. Typical values for this include: a1 - data is ingested (no qc), b1 - data is ingested and quality checks applied, c1 (or higher) - one or more a* or b* datastreams used to create a higher-level data product. Only lowercase alphanumeric characters are allowed.",
                     "maxLength": 3,
                     "minLength": 2,
                     "pattern": "^[a-z0-9]+$",
@@ -163,6 +170,46 @@
                     "description": "A string exactly matching a value in the CF Standard Name table which is used to provide a standardized way of identifying variables and measurements across heterogeneous datasets and domains. If a suitable match does not exist, then this attribute should be omitted. The full list of CF Standard Names is at: https://cfconventions.org/Data/cf-standard-names.",
                     "type": "string"
                 },
+                "coverage_content_type": {
+                    "title": "Coverage Content Type",
+                    "description": "An ISO 19115-1 code to indicate the source of the data (image, thematicClassification, physicalMeasurement, auxiliaryInformation, qualityInformation, referenceInformation, modelResult, or coordinate).",
+                    "type": "string"
+                },
+                "cf_role": {
+                    "title": "CF Role",
+                    "description": "Allowed values are defined in Chapter 9.5 CF guidelines and consist of: timeseries_id, profile_id, and trajectory_id, depending on the featureType represented in the dataset, as specified by the featureType global attribute.",
+                    "type": "string"
+                },
+                "accuracy": {
+                    "title": "Accuracy",
+                    "description": "The sensor accuracy is the closeness of the measurements to the variable's true value. It should be given in the same units as the measured variable. If the instrument has been calibrated multiple times with different results, the most recent accuracy should be provided here (see instrument_variable:calibration_date).",
+                    "type": "number"
+                },
+                "precision": {
+                    "title": "Precision",
+                    "description": "The sensor precision is the closeness of the measurements to each other. It should be given in the same units as the measured variable. If the instrument has been calibrated multiple times with different results, the most recent precision should be provided here (see instrument_variable:calibration_date).",
+                    "type": "number"
+                },
+                "resolution": {
+                    "title": "Resolution",
+                    "description": "The sensor resolution is the smallest change it can represent in the quantity that it is measuring. It should be given in the same units as the measured variable.",
+                    "type": "number"
+                },
+                "instrument": {
+                    "title": "Instrument",
+                    "description": "Variable attribute to be specified on each geophysical variable to identify the instrument that collected the data. The value of the attribute should be set to another variable which contains the details of the instrument. There can be multiple instruments involved depending on if all the instances of the featureType in the collection come from the same instrument or not. If multiple instruments are involved, a variable should be defined for each instrument and referenced from the geophysical variable in a comma separated string.",
+                    "type": "string"
+                },
+                "make_model": {
+                    "title": "Make and Model",
+                    "description": "The make and model of the instrument.",
+                    "type": "string"
+                },
+                "calibration_date": {
+                    "title": "Calibration Date",
+                    "description": "The date the instrument was last calibrated. Value should be specified using ISO-8601 compatible strings.",
+                    "type": "string"
+                },
                 "comment": {
                     "title": "Comment",
                     "description": "A user-friendly description of what the variable represents, how it was measured or derived, or any other relevant information that increases the ability of users to understand and use this data. This field plays a considerable role in creating self-documenting data, so we highly recommend including this field, especially for any variables which are particularly important for your dataset. Additionally, if the units for an attribute are unknown, then this field must include the phrase: 'Unknown units.' so that users know there is some uncertainty around this property. Variables that are unitless (e.g., categorical data or ratios), should set the 'units' to '1'.",

diff --git a/.vscode/schema/pipeline-schema.json b/.vscode/schema/pipeline-schema.json
@@ -150,7 +150,7 @@
             "properties": {
                 "name": {
                     "title": "Name",
-                    "description": "The exact name or list of names of the variable in the raw datasetreturned by the DataReader.",
+                    "description": "The exact name or list of names of the variable in the raw dataset returned by the DataReader.",
                     "anyOf": [
                         {
                             "type": "string"
@@ -290,13 +290,20 @@
                     "format": "uri",
                     "type": "string"
                 },
-                "conventions": {
+                "Conventions": {
                     "title": "Conventions",
-                    "description": "The data conventions the dataset follows.",
+                    "description": "A comma-separated list of the conventions that are followed by the dataset.",
+                    "default": "CF-1.6",
+                    "type": "string"
+                },
+                "featureType": {
+                    "title": "Feature Type",
+                    "description": "CF attribute for identifying the featureType.",
+                    "default": "timeSeries",
                     "type": "string"
                 },
                 "doi": {
-                    "title": "Doi",
+                    "title": "DOI",
                     "description": "The DOI that has been registered for this dataset, if applicable.",
                     "type": "string"
                 },
@@ -320,7 +327,7 @@
                 "dataset_name": {
                     "title": "Dataset Name",
                     "description": "A string used to identify the data being produced. Ideally resembles a shortened lowercase version of the title. Only lowercase alphanumeric characters and '_' are allowed.",
-                    "minLength": 3,
+                    "minLength": 2,
                     "pattern": "^[a-z0-9_]+$",
                     "type": "string"
                 },
@@ -333,14 +340,14 @@
                 },
                 "temporal": {
                     "title": "Temporal",
-                    "description": "An optional string which describes the temporal resolution of the data (if it spaced in regular intervals). This string should be formated as a number followed by a unit of measurement, e.g., '10m' would indicate the data is sampled every ten minutes. Only lowercase alphanumeric characters are allowed.",
+                    "description": "An optional string which describes the temporal resolution of the data (if spaced in regular intervals). This string should be formatted as a number followed by a unit of measurement, e.g., '10m' would indicate the data is sampled every ten minutes. Only lowercase alphanumeric characters are allowed.",
                     "minLength": 2,
                     "pattern": "^[0-9]+[a-zA-Z]+$",
                     "type": "string"
                 },
                 "data_level": {
                     "title": "Data Level",
-                    "description": "A string used to indicate the level of processing of the output data. It should be formated as a letter followed by a number. Typical values for this include: a1 - data is ingested (no qc), b1 - data is ingested and quality checks applied, c1 (or higher) - one or more a* or b* datastreams used to create a higher-level data product. Only lowercase alphanumeric characters are allowed.",
+                    "description": "A string used to indicate the level of processing of the output data. It should be formatted as a letter followed by a number. Typical values for this include: a1 - data is ingested (no qc), b1 - data is ingested and quality checks applied, c1 (or higher) - one or more a* or b* datastreams used to create a higher-level data product. Only lowercase alphanumeric characters are allowed.",
                     "maxLength": 3,
                     "minLength": 2,
                     "pattern": "^[a-z0-9]+$",
@@ -392,6 +399,46 @@
                     "description": "A string exactly matching a value in the CF Standard Name table which is used to provide a standardized way of identifying variables and measurements across heterogeneous datasets and domains. If a suitable match does not exist, then this attribute should be omitted. The full list of CF Standard Names is at: https://cfconventions.org/Data/cf-standard-names.",
                     "type": "string"
                 },
+                "coverage_content_type": {
+                    "title": "Coverage Content Type",
+                    "description": "An ISO 19115-1 code to indicate the source of the data (image, thematicClassification, physicalMeasurement, auxiliaryInformation, qualityInformation, referenceInformation, modelResult, or coordinate).",
+                    "type": "string"
+                },
+                "cf_role": {
+                    "title": "CF Role",
+                    "description": "Allowed values are defined in Chapter 9.5 CF guidelines and consist of: timeseries_id, profile_id, and trajectory_id, depending on the featureType represented in the dataset, as specified by the featureType global attribute.",
+                    "type": "string"
+                },
+                "accuracy": {
+                    "title": "Accuracy",
+                    "description": "The sensor accuracy is the closeness of the measurements to the variable's true value. It should be given in the same units as the measured variable. If the instrument has been calibrated multiple times with different results, the most recent accuracy should be provided here (see instrument_variable:calibration_date).",
+                    "type": "number"
+                },
+                "precision": {
+                    "title": "Precision",
+                    "description": "The sensor precision is the closeness of the measurements to each other. It should be given in the same units as the measured variable. If the instrument has been calibrated multiple times with different results, the most recent precision should be provided here (see instrument_variable:calibration_date).",
+                    "type": "number"
+                },
+                "resolution": {
+                    "title": "Resolution",
+                    "description": "The sensor resolution is the smallest change it can represent in the quantity that it is measuring. It should be given in the same units as the measured variable.",
+                    "type": "number"
+                },
+                "instrument": {
+                    "title": "Instrument",
+                    "description": "Variable attribute to be specified on each geophysical variable to identify the instrument that collected the data. The value of the attribute should be set to another variable which contains the details of the instrument. There can be multiple instruments involved depending on if all the instances of the featureType in the collection come from the same instrument or not. If multiple instruments are involved, a variable should be defined for each instrument and referenced from the geophysical variable in a comma separated string.",
+                    "type": "string"
+                },
+                "make_model": {
+                    "title": "Make and Model",
+                    "description": "The make and model of the instrument.",
+                    "type": "string"
+                },
+                "calibration_date": {
+                    "title": "Calibration Date",
+                    "description": "The date the instrument was last calibrated. Value should be specified using ISO-8601 compatible strings.",
+                    "type": "string"
+                },
                 "comment": {
                     "title": "Comment",
                     "description": "A user-friendly description of what the variable represents, how it was measured or derived, or any other relevant information that increases the ability of users to understand and use this data. This field plays a considerable role in creating self-documenting data, so we highly recommend including this field, especially for any variables which are particularly important for your dataset. Additionally, if the units for an attribute are unknown, then this field must include the phrase: 'Unknown units.' so that users know there is some uncertainty around this property. Variables that are unitless (e.g., categorical data or ratios), should set the 'units' to '1'.",