Skip to content

Commit

Permalink
Shortnames can have non ascii characters (#432)
Browse files Browse the repository at this point in the history
* Updated dapla toolbet metadata package

* Added fix to error

* Update .vscode/settings.json

* Update .vscode/settings.json

* Removed unessasary code

* Fixed end of file

* Fixed end of file

* Small change to test

---------

Co-authored-by: Jorgen-5 <[email protected]>
  • Loading branch information
Jorgen-5 and Jorgen-5 authored Nov 6, 2024
1 parent e3b1b9f commit 0fbf501
Show file tree
Hide file tree
Showing 11 changed files with 1,967 additions and 1,302 deletions.
2,741 changes: 1,445 additions & 1,296 deletions poetry.lock

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions src/datadoc/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@
CHECK_OBLIGATORY_METADATA_DATASET_MESSAGE = "Følgende datasett felt har ikke verdi:"
CHECK_OBLIGATORY_METADATA_VARIABLES_MESSAGE = "Følgende variabler har felt uten verdi:"
DAPLA_MANUAL_TEXT = "Dapla manual navnestandard"

ILLEGAL_SHORTNAME_WARNING = (
"Noen av variablene i datasetter følger ikke navnestandard for kortnavn"
)
ILLEGAL_SHORTNAME_WARNING_MESSAGE = "Følgende navnestandard er utarbeidet for variabler: Alfanumerisk begrenset til a-z, A-Z, 0-9, - (bindestrek) og (understrek). Kortnavn som ikke følger standarden:"
28 changes: 28 additions & 0 deletions src/datadoc/frontend/callbacks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from datadoc import state
from datadoc.constants import CHECK_OBLIGATORY_METADATA_DATASET_MESSAGE
from datadoc.constants import CHECK_OBLIGATORY_METADATA_VARIABLES_MESSAGE
from datadoc.constants import ILLEGAL_SHORTNAME_WARNING
from datadoc.constants import ILLEGAL_SHORTNAME_WARNING_MESSAGE
from datadoc.constants import MISSING_METADATA_WARNING
from datadoc.frontend.components.builders import AlertTypes
from datadoc.frontend.components.builders import build_ssb_alert
Expand Down Expand Up @@ -364,6 +366,31 @@ def variables_control(error_message: str | None, variables: list) -> dbc.Alert |
)


def check_variable_names(
variables: list,
) -> dbc.Alert | None:
"""Checks if a variable shortname complies with the naming standard.
Returns:
An ssb alert with a message saying that what names dont comply with the naming standard.
"""
illegal_names: list = [
v.short_name
for v in variables
if not re.match(r"^[a-z0-9_]{3,}$", v.short_name)
]

if not illegal_names:
return None
return build_ssb_alert(
AlertTypes.WARNING,
ILLEGAL_SHORTNAME_WARNING,
ILLEGAL_SHORTNAME_WARNING_MESSAGE,
None,
illegal_names,
)


def save_metadata_and_generate_alerts(metadata: Datadoc) -> list:
"""Save the metadata document to disk and check obligatory metadata.
Expand Down Expand Up @@ -397,4 +424,5 @@ def save_metadata_and_generate_alerts(metadata: Datadoc) -> list:
success_alert,
dataset_control(missing_obligatory_dataset),
variables_control(missing_obligatory_variables, metadata.variables),
check_variable_names(metadata.variables),
]
5 changes: 3 additions & 2 deletions src/datadoc/frontend/callbacks/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import logging
import urllib.parse
from typing import TYPE_CHECKING

from datadoc import state
Expand Down Expand Up @@ -84,7 +85,7 @@ def handle_multi_language_metadata(

if isinstance(new_value, str):
return find_existing_language_string(
state.metadata.variables_lookup[updated_row_id],
state.metadata.variables_lookup[urllib.parse.unquote(updated_row_id)],
new_value,
metadata_field,
language,
Expand Down Expand Up @@ -128,7 +129,7 @@ def accept_variable_metadata_input(

# Write the value to the variables structure
setattr(
state.metadata.variables_lookup[variable_short_name],
state.metadata.variables_lookup[urllib.parse.unquote(variable_short_name)],
metadata_field,
new_value,
)
Expand Down
14 changes: 14 additions & 0 deletions src/datadoc/frontend/fields/display_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import logging
import urllib
from abc import ABC
from abc import abstractmethod
from dataclasses import dataclass
Expand Down Expand Up @@ -141,6 +142,13 @@ class DisplayMetadata(ABC):
obligatory: bool = False
editable: bool = True

def url_encode_shortname_ids(self, component_id: dict) -> None:
"""Encodes id to hanlde non ascii values."""
if "variable_short_name" in component_id:
component_id["variable_short_name"] = urllib.parse.quote(
component_id["variable_short_name"],
)

@abstractmethod
def render(
self,
Expand All @@ -164,6 +172,7 @@ def render(
metadata: BaseModel,
) -> ssb.Input:
"""Build an Input component."""
self.url_encode_shortname_ids(component_id)
return ssb.Input(
label=self.display_name,
id=component_id,
Expand All @@ -189,6 +198,7 @@ def render(
metadata: BaseModel,
) -> ssb.Dropdown:
"""Build Dropdown component."""
self.url_encode_shortname_ids(component_id)
return ssb.Dropdown(
header=self.display_name,
id=component_id,
Expand All @@ -211,6 +221,7 @@ def render(
metadata: BaseModel,
) -> ssb.Input:
"""Build Input date component."""
self.url_encode_shortname_ids(component_id)
return ssb.Input(
label=self.display_name,
id=component_id,
Expand Down Expand Up @@ -240,6 +251,7 @@ def render(
) -> ssb.Input:
"""Build Input date component."""
component_id["type"] = self.id_type
self.url_encode_shortname_ids(component_id)
return ssb.Input(
label=self.display_name,
id=component_id,
Expand Down Expand Up @@ -269,6 +281,7 @@ def render_input_group(
metadata: BaseModel,
) -> html.Section:
"""Build section with Input components for each language."""
self.url_encode_shortname_ids(component_id)
if "variable_short_name" in component_id:
return html.Section(
children=[
Expand Down Expand Up @@ -353,6 +366,7 @@ def render(
metadata: BaseModel,
) -> ssb.Checkbox:
"""Build Checkbox component."""
self.url_encode_shortname_ids(component_id)
return ssb.Checkbox(
label=self.display_name,
id=component_id,
Expand Down
19 changes: 19 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@

from .utils import TEST_EXISTING_METADATA_DIRECTORY
from .utils import TEST_PARQUET_FILE_NAME
from .utils import TEST_PARQUET_FILE_NAME_ILLEGAL_SHORTNAMES
from .utils import TEST_PARQUET_FILEPATH
from .utils import TEST_PARQUET_FILEPATH_ILLEGAL_SHORTNAMES
from .utils import TEST_RESOURCES_DIRECTORY

if TYPE_CHECKING:
Expand Down Expand Up @@ -89,6 +91,23 @@ def metadata(
)


@pytest.fixture()
def metadata_illegal_shortnames(
_mock_timestamp: None,
_mock_user_info: None,
subject_mapping_fake_statistical_structure: StatisticSubjectMapping,
tmp_path: Path,
) -> Datadoc:
shutil.copy(
TEST_PARQUET_FILEPATH_ILLEGAL_SHORTNAMES,
tmp_path / TEST_PARQUET_FILE_NAME_ILLEGAL_SHORTNAMES,
)
return Datadoc(
str(tmp_path / TEST_PARQUET_FILE_NAME_ILLEGAL_SHORTNAMES),
statistic_subject_mapping=subject_mapping_fake_statistical_structure,
)


@pytest.fixture()
def existing_metadata_path() -> Path:
return TEST_EXISTING_METADATA_DIRECTORY
Expand Down
32 changes: 28 additions & 4 deletions tests/frontend/callbacks/test_callbacks_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from dataclasses import dataclass
from unittest import mock

import dash_bootstrap_components as dbc
Expand Down Expand Up @@ -76,16 +77,39 @@ def test_render_tabs(tab: str, identifier: str):

def test_save_and_generate_alerts():
mock_metadata = mock.Mock()

@dataclass
class Variable:
short_name: str

mock_metadata.variables = [
"var1",
"var2",
Variable(short_name="var"),
Variable(short_name="var"),
]
state.metadata = mock_metadata
result = save_metadata_and_generate_alerts(
mock_metadata,
)
assert (result[1] and result[2] and result[3]) is None
assert isinstance(result[0], dbc.Alert)


def test_save_and_generate_illegal_shortname_alert():
mock_metadata = mock.Mock()

@dataclass
class MockVariable:
short_name: str

num_list_of_alerts = 3
assert len(result) == num_list_of_alerts
mock_metadata.variables = [
MockVariable(short_name="var illegal"),
MockVariable(short_name="rådyr"),
]
state.metadata = mock_metadata
result = save_metadata_and_generate_alerts(
mock_metadata,
)
assert (result[1] and result[2]) is None
assert isinstance(result[0], dbc.Alert)
assert isinstance(result[3], dbc.Alert)
assert result[3].color == "warning"
21 changes: 21 additions & 0 deletions tests/frontend/callbacks/test_variables_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,3 +634,24 @@ def test_variables_metadata_control_dont_return_alert(metadata: Datadoc):
missing_metadata = str(w[0].message)
result = variables_control(missing_metadata, metadata.variables)
assert result is None


def test_accept_variable_metadata_input_when_shortname_is_non_ascii(
metadata_illegal_shortnames: Datadoc,
):
state.metadata = metadata_illegal_shortnames
assert metadata_illegal_shortnames.variables[-1].short_name == "rådyr"
assert (
accept_variable_metadata_input(
"Format value",
metadata_illegal_shortnames.variables[-1].short_name,
metadata_field=VariableIdentifiers.FORMAT.value,
language="nb",
)
is None
)

assert (
getattr(state.metadata.variables[-1], VariableIdentifiers.FORMAT.value)
== "Format value"
)
Binary file not shown.
Loading

0 comments on commit 0fbf501

Please sign in to comment.