Skip to content

Commit

Permalink
Merge fa09067 into 2f9ec48
Browse files Browse the repository at this point in the history
  • Loading branch information
mmwinther authored Nov 16, 2023
2 parents 2f9ec48 + fa09067 commit f825299
Show file tree
Hide file tree
Showing 2 changed files with 360 additions and 0 deletions.
66 changes: 66 additions & 0 deletions .github/workflows/generate-python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

on:
pull_request:

jobs:
generate-python:
runs-on: ubuntu-latest
steps:
- name: Generate token
id: generate_token
uses: actions/create-github-app-token@v1
with:
app-id: ${{ secrets.DAPLA_BOT_APP_ID }}
private-key: ${{ secrets.DAPLA_BOT_PRIVATE_KEY }}

- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
token: ${{ steps.generate_token.outputs.token }}

- name: Configure Git
run: |
git config user.name "dapla-bot[bot]"
git config user.email "143391972+dapla-bot[bot]@users.noreply.github.com"
- uses: actions/setup-python@v2
with:
python-version: 3.11.5

- name: Install pipx
run: |
python -m pip install --upgrade pipx
python -m pipx ensurepath
- name: Install datamodel-codegen
# We must manually install pyyaml as a workaround for https://github.com/koxudaxi/datamodel-code-generator/issues/1697
run: |
python -m pipx install datamodel-code-generator==0.24.0
python -m pipx inject datamodel-code-generator pyyaml
- name: Generate Python
run: >
datamodel-codegen
--input-file-type jsonschema
--input src/datadoc-json-schema/datadoc-json-schema.json
--output-model-type pydantic_v2.BaseModel
--force-optional
--use-default
--output generated/python/datadoc_model/datadoc_model/model.py
- name: Check for modified files
id: git-check
run: echo modified=$(if [ -n "$(git status --porcelain)" ]; then echo "true"; else echo "false"; fi) >> $GITHUB_OUTPUT

- name: Commit
if: steps.git-check.outputs.modified == 'true'
run: |
git add -A
git commit -m '[GENERATE] Pydantic models from JSON Schema'
- name: Push
uses: ad-m/[email protected]
with:
github_token: ${{ steps.generate_token.outputs.token }}
branch: ${{ github.head_ref }}
294 changes: 294 additions & 0 deletions generated/python/datadoc_model/datadoc_model/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
# generated by datamodel-codegen:
# filename: datadoc-json-schema.json
# timestamp: 2023-11-16T09:52:30+00:00

from __future__ import annotations

from datetime import datetime
from enum import Enum
from typing import List, Optional
from uuid import UUID

from pydantic import AnyUrl, BaseModel, Field
from typing_extensions import Literal


class Assessment(Enum):
SENSITIVE = 'SENSITIVE'
PROTECTED = 'PROTECTED'
OPEN = 'OPEN'


class DatasetStatus(Enum):
DRAFT = 'DRAFT'
INTERNAL = 'INTERNAL'
EXTERNAL = 'EXTERNAL'
DEPRECATED = 'DEPRECATED'


class DatasetState(Enum):
SOURCE_DATA = 'SOURCE_DATA'
INPUT_DATA = 'INPUT_DATA'
PROCESSED_DATA = 'PROCESSED_DATA'
STATISTICS = 'STATISTICS'
OUTPUT_DATA = 'OUTPUT_DATA'


class DataType(Enum):
STRING = 'STRING'
INTEGER = 'INTEGER'
DATETIME = 'DATETIME'
BOOLEAN = 'BOOLEAN'


class VariableRole(Enum):
IDENTIFIER = 'IDENTIFIER'
MEASURE = 'MEASURE'
START_TIME = 'START_TIME'
STOP_TIME = 'STOP_TIME'
ATTRIBUTE = 'ATTRIBUTE'


class LanguageStringType(BaseModel):
en: Optional[str] = None
nn: Optional[str] = None
nb: Optional[str] = None


class UnitType(Enum):
ARBEIDSULYKKE = 'ARBEIDSULYKKE'
BOLIG = 'BOLIG'
BYGNING = 'BYGNING'
EIENDOM = 'EIENDOM'
FAMILIE = 'FAMILIE'
FORETAK = 'FORETAK'
FYLKE = 'FYLKE'
HAVNEANLOEP = 'HAVNEANLOEP'
HUSHOLDNING = 'HUSHOLDNING'
KJOERETOEY = 'KJOERETOEY'
KOMMUNE = 'KOMMUNE'
KURS = 'KURS'
LOVBRUDD = 'LOVBRUDD'
PERSON = 'PERSON'
STAT = 'STAT'
STORFE = 'STORFE'
TRAFIKKULYKKE = 'TRAFIKKULYKKE'
TRANSAKSJON = 'TRANSAKSJON'
VARE_TJENESTE = 'VARE_TJENESTE'
VERDIPAPIR = 'VERDIPAPIR'
VIRKSOMHET = 'VIRKSOMHET'


class TemporalityTypeType(Enum):
FIXED = 'FIXED'
STATUS = 'STATUS'
ACCUMULATED = 'ACCUMULATED'
EVENT = 'EVENT'


class Dataset(BaseModel):
short_name: Optional[str] = Field(
None,
description='Name of (physical) data file, data table or dataset',
title='Short name',
)
assessment: Optional[Assessment] = Field(
None,
description='Value assessment (sensitivity classification) for the data set',
title='Assessment',
)
dataset_status: Optional[DatasetStatus] = Field(
None, description='Life cycle for data set', title='Data set status'
)
dataset_state: Optional[DatasetState] = Field(
None, description='Steady state of data', title='Data set state'
)
name: Optional[LanguageStringType] = Field(
None, description='Name of data set', title='Name'
)
description: Optional[LanguageStringType] = Field(
None, description='Free text description of the data set', title='Description'
)
data_source: Optional[LanguageStringType] = Field(
None,
description='Data source. Set either for the data set or instance variable.',
title='Data source',
)
register_uri: Optional[LanguageStringType] = Field(
None, description='Link to register', title='Register'
)
population_description: Optional[LanguageStringType] = Field(
None,
description='Description of the population covered in the data set. Includes unit type, spatial coverage and period of time.',
title='Description of population',
)
version: Optional[str] = Field(
None, description='Version of data set', title='Version'
)
version_description: Optional[LanguageStringType] = Field(
None,
description='Dataset version information in the form of descriptive text',
title='Version description',
)
unit_type: Optional[UnitType] = Field(
None,
description='Unit Type for data file, table or data set. See Definitions of Unit Types https://www.ssb.no/en/metadata/definisjoner-av-statistiske-enheter',
title='Unit type',
)
temporality_type: Optional[TemporalityTypeType] = Field(
None,
description='Temporality type. Either for the instance variable or the data set',
title='Temporality type',
)
subject_field: Optional[LanguageStringType] = Field(
None,
description='Primary area of statistics in which the data set is included',
title='Subject field',
)
keyword: Optional[List[str]] = Field(
None,
description='A list of searchable keywords that can contribute to the development of effective filtering and search services.',
title='Keyword(s)',
)
spatial_coverage_description: Optional[LanguageStringType] = Field(
None,
description="Description of the data set's spatial coverage",
title='Spatial coverage description',
)
id: Optional[UUID] = Field(
None, description='Unique identifier for the data set', title='Identifier'
)
owner: Optional[LanguageStringType] = Field(
None,
description='Owner of the data set (responsible division in Statistics Norway). See also Classification of organisational units https://www.ssb.no/en/klass/klassifikasjoner/83',
title='Owner',
)
file_path: Optional[str] = Field(
None,
description="The file path contains the data set's name and the path to where it is stored",
title='File path',
)
metadata_created_date: Optional[datetime] = Field(
None,
description='Created date for metadata about the data set',
title='Metadata created date',
)
metadata_created_by: Optional[str] = Field(
None, description='Created by identifiable person.', title='Metadata created by'
)
metadata_last_updated_date: Optional[datetime] = Field(
None,
description='Last updated date for metadata about the dataset',
title='Metadata last updated date',
)
metadata_last_updated_by: Optional[str] = Field(
None,
description='Last change made by identifiable person. ',
title='Metadata last updated by',
)
contains_data_from: Optional[str] = Field(
None,
description='The data set contains data from date/time',
title='Contains data from',
)
contains_data_until: Optional[str] = Field(
None,
description='The data set contains data up until date/time',
title='Contains data up until',
)


class Variable(BaseModel):
short_name: Optional[str] = Field(
None,
description='Physical name of the variable in the dataset. Should match the recommended short name.',
title='Short name',
)
name: Optional[LanguageStringType] = Field(
None,
description='Variable names can be inherited from VarDef, but can also be documented/changed here',
title='Name',
)
data_type: Optional[DataType] = Field(
None, description='Data type', title='Data type'
)
variable_role: Optional[VariableRole] = Field(
None,
description='Role of the instance variable in the data set',
title='Variable role',
)
definition_uri: Optional[AnyUrl] = Field(
None,
description="A link (URI) to the variable's definition in Vardok/VarDef",
title='Definition URI',
)
direct_person_identifying: Optional[bool] = Field(
None,
description='Direct Person identifying Information (DPI). Some of the values \u200b\u200bin an instance variable kan be DPI, others not. In this case, DPI is set equal to true. For example, the variable exporter where some of the values \u200b\u200bcan be organization number, others social security numbers (sole proprietorships).',
title='Direct Person identifying Information (DPI)',
)
data_source: Optional[LanguageStringType] = Field(
None,
description='Data source. Set at data set level, but can be overwritten at variable instance level.',
title='Data source',
)
population_description: Optional[LanguageStringType] = Field(
None,
description='The population the variable describes can be specified in more detail here. Set at dataset level, but can be overwritten at instance variable level.',
title='Population description',
)
comment: Optional[LanguageStringType] = Field(
None,
description='Further clarification of the variables definition',
title='Comment',
)
temporality_type: Optional[TemporalityTypeType] = Field(
None,
description='Temporality type. Set either for variable instance or dataset.',
title='Temporality type',
)
measurement_unit: Optional[str] = Field(
None, description='Measurement unit', title='Measurement unit'
)
format: Optional[str] = Field(
None,
description='The format of the values \u200b\u200b(physical format or regular expression) in machine-readable form for validation. This can be used as a further specification of the data type (dataType) in those cases where this is relevant.',
title='Format',
)
classification_uri: Optional[AnyUrl] = Field(
None,
description='Link (URI) to valid classification or code list',
title='Classification or codelist URI',
)
sentinel_value_uri: Optional[AnyUrl] = Field(
None,
description='A link (URI) to an overview of sentinel values included in the variable. Proposals for the standardization of sentinel values \u200b\u200bhave been drafted, but this has not yet been processed by the Standards Committee in Statistics Norway: 01 - In total, 02 - Sum, 03 - Subtotal, 04 - Other, 05 - Rest, 06 - Invalid value, 07 - Unspecified, 08 - Not relevant',
title='Sentinel value URI',
)
invalid_value_description: Optional[LanguageStringType] = Field(
None,
description='Invalid value(s) description used in addition (or as an alternative) to standard sentinel values.',
title='Invalid value(s) description',
)
id: Optional[UUID] = Field(
None,
description='Unique SSB identifier for the instance variable in the data set',
title='Identifier',
)
contains_data_from: Optional[datetime] = Field(
None,
description='The instance variable in the data set contains data from and including this date. This can be useful information for data sets that contain many instance variables in addition to data for many periods/years. In many cases, it will then be the case that some variables only contain data for the most recent periods/years, e.g. if the entire data set contains data from 1970 to 2020, while some instance variables only contain data from 1998 onwards.',
title='Contains data from',
)
contains_data_until: Optional[datetime] = Field(
None,
description='The instance variable in the data set contains data up to and including this date. This can be useful information for data sets that contain many instance variables in addition to data for many periods/years. In many cases, it will then be the case that some of the instance variables in the data set are terminated (no longer updated) after a given point in time.',
title='Contains data up until',
)


class Model(BaseModel):
percentage_complete: Optional[int] = None
document_version: Literal['2.0.0'] = None
dataset: Optional[Dataset] = None
variables: Optional[List[Variable]] = None

0 comments on commit f825299

Please sign in to comment.