Skip to content

Commit

Permalink
Resolve merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
JanhSander committed Apr 9, 2024
2 parents bd914c5 + 1606204 commit 9b2fe5c
Show file tree
Hide file tree
Showing 15 changed files with 380 additions and 187 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ssb-datadoc"
version = "0.8.3"
version = "0.9.0"
description = "Document dataset metadata. For use in Statistics Norway's metadata system."
authors = ["Statistics Norway <[email protected]>"]
license = "MIT"
Expand Down
4 changes: 4 additions & 0 deletions src/datadoc/assets/workspace_style.css
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,7 @@
.form-check{
padding-left: 0;
}

label.form-check-label{
padding-left: 0.5rem;
}
88 changes: 72 additions & 16 deletions src/datadoc/backend/dapla_dataset_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,26 +292,21 @@ def __init__(self, dataset_path: str | os.PathLike[str]) -> None:
self._period_strings = self._extract_period_strings(self.dataset_name_sections)

@staticmethod
def _extract_period_strings(dataset_name_sections: list[str]) -> list[str]:
"""Extract period strings from dataset name sections.
Iterates over the dataset name sections and returns a list of strings
that match the year regex, stripping the first character. This extracts
the year periods from the dataset name.
def _get_period_string_indices(dataset_name_sections: list[str]) -> list[int]:
"""Get all the indices at which period strings are found.
Examples:
>>> DaplaDatasetPathInfo._extract_period_strings(['p2022', 'kommune', 'v1'])
['2022']
>>> DaplaDatasetPathInfo._get_period_string_indices(['kommune', 'p2022', 'v1'])
[1]
>>> DaplaDatasetPathInfo._extract_period_strings(['p2022-01', 'p2023-06', 'kommune', 'v1'])
['2022-01', '2023-06']
>>> DaplaDatasetPathInfo._get_period_string_indices(['kommune', 'p2022-01', 'p2023-06', 'v1'])
[1, 2]
>>> DaplaDatasetPathInfo._extract_period_strings(['p1990Q1', 'kommune', 'v1'])
['1990Q1']
>>> DaplaDatasetPathInfo._get_period_string_indices(['kommune', 'p1990Q1', 'v1'])
[1]
>>> DaplaDatasetPathInfo._extract_period_strings(['varehandel','v1']) # No date will return empty string
>>> DaplaDatasetPathInfo._get_period_string_indices(['varehandel','v1'])
[]
"""

def insert_p(regex: str) -> str:
Expand All @@ -324,14 +319,43 @@ def insert_p(regex: str) -> str:
return regex[:1] + "p" + regex[1:]

return [
x[1:]
for x in dataset_name_sections
i
for i, x in enumerate(dataset_name_sections)
if any(
re.match(insert_p(date_format.regex_pattern), x)
for date_format in SUPPORTED_DATE_FORMATS
)
]

@staticmethod
def _extract_period_strings(dataset_name_sections: list[str]) -> list[str]:
"""Extract period strings from dataset name sections.
Iterates over the dataset name sections and returns a list of strings
that match the year regex, stripping the first character. This extracts
the year periods from the dataset name.
Examples:
>>> DaplaDatasetPathInfo._extract_period_strings(['p2022', 'kommune', 'v1'])
['2022']
>>> DaplaDatasetPathInfo._extract_period_strings(['p2022-01', 'p2023-06', 'kommune', 'v1'])
['2022-01', '2023-06']
>>> DaplaDatasetPathInfo._extract_period_strings(['p1990Q1', 'kommune', 'v1'])
['1990Q1']
>>> DaplaDatasetPathInfo._extract_period_strings(['varehandel','v1'])
[]
"""
return [
dataset_name_sections[i][1:]
for i in DaplaDatasetPathInfo._get_period_string_indices(
dataset_name_sections,
)
]

def _extract_period_string_from_index(self, index: int) -> str | None:
try:
return self._period_strings[index]
Expand All @@ -347,6 +371,38 @@ def _extract_norwegian_dataset_state_path_part(
).lower()
return {norwegian_dataset_state_path_part.replace(" ", x) for x in ["-", "_"]}

@property
def dataset_short_name(
self,
) -> str | None:
"""Extract the dataset short name from the filepath.
The dataset short name is defined as the first section of the stem, up to the period information,
or the version information if no period information is present.
Examples:
>>> DaplaDatasetPathInfo('prosjekt/befolkning/klargjorte_data/person_data_v1.parquet').dataset_short_name
person_data
>>> DaplaDatasetPathInfo('befolkning/inndata/sykepenger_p2022Q1_p2022Q2_v23.parquet').dataset_short_name
sykepenger
>>> DaplaDatasetPathInfo('my_data/simple_dataset_name.parquet').dataset_short_name
simple_dataset_name
"""
if self.contains_data_from or self.contains_data_until:
short_name_sections = self.dataset_name_sections[
: min(
DaplaDatasetPathInfo._get_period_string_indices(
self.dataset_name_sections,
),
)
]
elif self.dataset_version:
short_name_sections = self.dataset_name_sections[:-1]
else:
short_name_sections = self.dataset_name_sections

return "_".join(short_name_sections)

@property
def contains_data_from(self) -> datetime.date | None:
"""The earliest date from which data in the dataset is relevant for."""
Expand Down
5 changes: 2 additions & 3 deletions src/datadoc/backend/datadoc_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def __init__(
self.metadata_document: pathlib.Path | CloudPath | None = None
self.container: model.MetadataContainer | None = None
self.dataset_path: pathlib.Path | CloudPath | None = None
self.short_name: str | None = None
self.dataset = model.Dataset()
self.variables: list = []
self.variables_lookup: dict[str, model.Variable] = {}
Expand Down Expand Up @@ -176,7 +175,7 @@ def extract_metadata_from_dataset(
)

self.dataset = model.Dataset(
short_name=self.dataset_path.stem if self.dataset_path else None,
short_name=dapla_dataset_path_info.dataset_short_name,
dataset_state=dapla_dataset_path_info.dataset_state,
dataset_status=DataSetStatus.DRAFT,
assessment=self.get_assessment_by_state(
Expand All @@ -185,7 +184,7 @@ def extract_metadata_from_dataset(
version=dapla_dataset_path_info.dataset_version,
contains_data_from=str(dapla_dataset_path_info.contains_data_from),
contains_data_until=str(dapla_dataset_path_info.contains_data_until),
data_source_path=self.dataset_path,
file_path=str(self.dataset_path),
metadata_created_by=user_info.get_user_info_for_current_platform().short_email,
# TODO @mmwinther: Remove multiple_language_support once the model is updated.
# https://github.com/statisticsnorway/ssb-datadoc-model/issues/41
Expand Down
46 changes: 14 additions & 32 deletions src/datadoc/frontend/callbacks/register_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@
from datadoc.frontend.callbacks.utils import update_global_language_state
from datadoc.frontend.callbacks.variables import accept_variable_metadata_date_input
from datadoc.frontend.callbacks.variables import accept_variable_metadata_input
from datadoc.frontend.callbacks.variables import populate_variables_workspace
from datadoc.frontend.components.builders import build_dataset_edit_section
from datadoc.frontend.components.builders import build_edit_section
from datadoc.frontend.components.builders import build_ssb_accordion
from datadoc.frontend.components.dataset_tab import SECTION_WRAPPER_ID
from datadoc.frontend.components.variables_tab import ACCORDION_WRAPPER_ID
from datadoc.frontend.components.variables_tab import VARIABLES_INFORMATION_ID
Expand All @@ -39,8 +38,6 @@
from datadoc.frontend.fields.display_dataset import OBLIGATORY_EDITABLE_DATASET_METADATA
from datadoc.frontend.fields.display_dataset import OPTIONAL_DATASET_METADATA
from datadoc.frontend.fields.display_dataset import DatasetIdentifiers
from datadoc.frontend.fields.display_variables import OBLIGATORY_VARIABLES_METADATA
from datadoc.frontend.fields.display_variables import OPTIONAL_VARIABLES_METADATA
from datadoc.frontend.fields.display_variables import VariableIdentifiers

if TYPE_CHECKING:
Expand Down Expand Up @@ -143,39 +140,24 @@ def callback_populate_variables_info_section(
@app.callback(
Output(ACCORDION_WRAPPER_ID, "children"),
Input("language-dropdown", "value"),
Input("search-variables", "value"),
prevent_initial_call=True,
)
def callback_populate_variables_workspace(
language: str,
search_query: str,
) -> list:
"""Create variable workspace with accordions for variables."""
"""Create variable workspace with accordions for variables.
Allows for filtering which variables are displayed via the search box.
"""
update_global_language_state(SupportedLanguages(language))
logger.info("Populating new variables workspace")
return [
build_ssb_accordion(
variable.short_name,
{
"type": "variables-accordion",
"id": f"{variable.short_name}-{language}", # Insert language into the ID to invalidate browser caches
},
variable.short_name,
children=[
build_edit_section(
OBLIGATORY_VARIABLES_METADATA,
"Obligatorisk",
variable,
state.current_metadata_language.value,
),
build_edit_section(
OPTIONAL_VARIABLES_METADATA,
"Anbefalt",
variable,
state.current_metadata_language.value,
),
],
)
for variable in list(state.metadata.variables)
]
logger.debug("Populating variables workspace. Search query: %s", search_query)
return populate_variables_workspace(
state.metadata.variables,
state.current_metadata_language,
search_query,
)

@app.callback(
Output(SECTION_WRAPPER_ID, "children"),
Expand All @@ -189,7 +171,7 @@ def callback_populate_dataset_workspace(
) -> list:
"""Create dataset workspace with sections."""
update_global_language_state(SupportedLanguages(language))
logger.info("Populating new dataset workspace")
logger.debug("Populating dataset workspace")
if n_clicks:
return [
build_dataset_edit_section(
Expand Down
44 changes: 44 additions & 0 deletions src/datadoc/frontend/callbacks/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,61 @@
from datadoc.frontend.callbacks.utils import MetadataInputTypes
from datadoc.frontend.callbacks.utils import find_existing_language_string
from datadoc.frontend.callbacks.utils import parse_and_validate_dates
from datadoc.frontend.components.builders import build_edit_section
from datadoc.frontend.components.builders import build_ssb_accordion
from datadoc.frontend.fields.display_variables import (
MULTIPLE_LANGUAGE_VARIABLES_METADATA,
)
from datadoc.frontend.fields.display_variables import OBLIGATORY_VARIABLES_METADATA
from datadoc.frontend.fields.display_variables import OPTIONAL_VARIABLES_METADATA
from datadoc.frontend.fields.display_variables import VariableIdentifiers

if TYPE_CHECKING:
from datadoc_model import model
from datadoc_model.model import LanguageStringType

from datadoc.enums import SupportedLanguages

logger = logging.getLogger(__name__)


def populate_variables_workspace(
variables: list[model.Variable],
language: SupportedLanguages,
search_query: str,
) -> list:
"""Create variable workspace with accordions for variables.
Allows for filtering which variables are displayed via the search box.
"""
return [
build_ssb_accordion(
variable.short_name or "",
{
"type": "variables-accordion",
"id": f"{variable.short_name}-{language.value}", # Insert language into the ID to invalidate browser caches
},
variable.short_name or "",
children=[
build_edit_section(
OBLIGATORY_VARIABLES_METADATA,
"Obligatorisk",
variable,
language.value,
),
build_edit_section(
OPTIONAL_VARIABLES_METADATA,
"Anbefalt",
variable,
language.value,
),
],
)
for variable in variables
if search_query in (variable.short_name or "")
]


def handle_multi_language_metadata(
metadata_field: str,
new_value: MetadataInputTypes | LanguageStringType,
Expand Down
4 changes: 2 additions & 2 deletions src/datadoc/frontend/components/variables_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def build_variables_tab() -> dbc.Tab:
ssb.Input(
label="Søk i variabler",
searchField=True,
disabled=True,
placeholder="Kommer...",
disabled=False,
placeholder="Variabel kortnavn...",
id="search-variables",
n_submit=0,
value="",
Expand Down
9 changes: 4 additions & 5 deletions src/datadoc/frontend/fields/display_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import TYPE_CHECKING
from typing import Any

import dash_bootstrap_components as dbc
import ssb_dash_components as ssb
from dash import dcc

Expand Down Expand Up @@ -228,16 +227,16 @@ def render(
variable_id: dict,
language: str, # noqa: ARG002 Required by Dash
variable: model.Variable,
) -> dbc.Checkbox:
) -> ssb.Checkbox:
"""Build Checkbox component."""
value = self.value_getter(variable, self.identifier)
return dbc.Checkbox(
return ssb.Checkbox(
label=self.display_name,
id=variable_id,
disabled=not self.editable,
label_class_name="ssb-checkbox checkbox-label",
class_name="ssb-checkbox",
value=value,
showDescription=True,
description=self.description,
)


Expand Down
1 change: 1 addition & 0 deletions src/datadoc/frontend/fields/display_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ class DatasetIdentifiers(str, Enum):
description="Filstien inneholder datasettets navn og stien til hvor det er lagret.",
obligatory=True,
editable=False,
value_getter=get_metadata_and_stringify,
),
DatasetIdentifiers.METADATA_CREATED_DATE: MetadataInputField(
identifier=DatasetIdentifiers.METADATA_CREATED_DATE.value,
Expand Down
2 changes: 1 addition & 1 deletion src/datadoc/frontend/fields/display_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class VariableIdentifiers(str, Enum):
VariableIdentifiers.DIRECT_PERSON_IDENTIFYING: MetadataCheckboxField(
identifier=VariableIdentifiers.DIRECT_PERSON_IDENTIFYING.value,
display_name="Direkte personidentifiserende informasjon",
description="Direkte personidentifiserende informasjon (DPI)",
description="Velges hvis variabelen inneholder informasjon som innebærer at enkeltpersoner kan identifiseres. Gjelder ikke hvis kolonnen er pseudonymisert eller anonymisert.",
obligatory=True,
),
VariableIdentifiers.DATA_SOURCE: MetadataInputField(
Expand Down
8 changes: 8 additions & 0 deletions tests/backend/test_datadoc_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,14 @@ def test_existing_metadata_valid_id(
assert post_write_id == pre_open_id


def test_dataset_short_name(metadata: DataDocMetadata):
assert metadata.dataset.short_name == "person_data"


def test_dataset_file_path(metadata: DataDocMetadata):
assert metadata.dataset.file_path == str(metadata.dataset_path)


def test_variable_role_default_value(metadata: DataDocMetadata):
assert all(
v.variable_role == VariableRole.MEASURE.value for v in metadata.variables
Expand Down
Loading

0 comments on commit 9b2fe5c

Please sign in to comment.