Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BDRSPS-766 Make frictionless assume every csv input is utf-8 encoded #179

Merged
merged 1 commit into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions abis_mapping/base/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ def extra_fields_schema(
resource = frictionless.Resource(
source=data,
format="csv",
encoding="utf-8",
)
resource.infer()

Expand Down
4 changes: 3 additions & 1 deletion abis_mapping/templates/incidental_occurrence_data/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ def apply_validation(
resource = frictionless.Resource(
source=data,
format="csv", # TODO -> Hardcoded to csv for now
schema=schema
schema=schema,
encoding="utf-8",
)

# Validate
Expand Down Expand Up @@ -145,6 +146,7 @@ def apply_mapping(
source=data,
format="csv", # TODO -> Hardcoded to csv for now
schema=schema,
encoding="utf-8",
)

# Initialise Graph
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ def apply_validation(
resource = frictionless.Resource(
source=data,
format="csv", # TODO -> Hardcoded to csv for now
schema=schema
schema=schema,
encoding="utf-8",
)

# Validate
Expand Down Expand Up @@ -152,6 +153,7 @@ def apply_mapping(
source=data,
format="csv", # TODO -> Hardcoded to csv for now
schema=schema,
encoding="utf-8",
)

# Initialise Graph
Expand Down
2 changes: 2 additions & 0 deletions abis_mapping/templates/survey_metadata/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def apply_validation(
source=data,
format="csv", # TODO -> Hardcoded to csv for now
schema=schema,
encoding="utf-8",
)

# Validate
Expand Down Expand Up @@ -149,6 +150,7 @@ def apply_mapping(
source=data,
format="csv", # TODO -> Hardcoded to csv for now
schema=schema,
encoding="utf-8",
)

# Initialise Graph
Expand Down
3 changes: 3 additions & 0 deletions abis_mapping/templates/survey_occurrence_data/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def apply_validation(
source=data,
format="csv", # TODO -> Hardcoded to csv for now
schema=schema,
encoding="utf-8",
)

# Validate
Expand Down Expand Up @@ -178,6 +179,7 @@ def extract_site_id_keys(
source=data,
format="csv",
schema=schema,
encoding="utf-8",
)

# Iterate over rows to extract values
Expand Down Expand Up @@ -226,6 +228,7 @@ def apply_mapping(
source=data,
format="csv", # TODO -> Hardcoded to csv for now
schema=schema,
encoding="utf-8",
)

# Initialise Graph
Expand Down
3 changes: 3 additions & 0 deletions abis_mapping/templates/survey_site_data/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def apply_validation(
source=data,
format="csv",
schema=schema,
encoding="utf-8",
)

# Validate
Expand Down Expand Up @@ -142,6 +143,7 @@ def extract_geometry_defaults(
source=data,
format="csv",
schema=schema,
encoding="utf-8",
)

# Context manager for row streaming
Expand Down Expand Up @@ -218,6 +220,7 @@ def apply_mapping(
source=data,
format="csv", # TODO -> Hardcoded to csv for now
schema=schema,
encoding="utf-8",
)

# Initialise Graph
Expand Down
12 changes: 10 additions & 2 deletions tests/base/test_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,12 @@ def test_extract_extra_fields(mocker: pytest_mock.MockerFixture) -> None:
schema = base.mapper.ABISMapper.extra_fields_schema(csv_data, full_schema=True)

# Construct resource
resource = frictionless.Resource(source=csv_data, format="csv", schema=schema)
resource = frictionless.Resource(
source=csv_data,
format="csv",
schema=schema,
encoding="utf-8",
)

# Open resource for row streaming
with resource.open() as r:
Expand Down Expand Up @@ -215,6 +220,7 @@ def test_add_extra_fields_json(mocker: pytest_mock.MockerFixture) -> None:
resource = frictionless.Resource(
source=csv_data,
format="csv",
encoding="utf-8",
)

# Open resource for row streaming
Expand Down Expand Up @@ -266,6 +272,7 @@ def test_add_extra_fields_json_no_data(mocker: pytest_mock.MockerFixture) -> Non
resource = frictionless.Resource(
source=csv_data,
format="csv",
encoding="utf-8",
)

# Open resource for row streaming
Expand Down Expand Up @@ -311,7 +318,8 @@ def test_extra_fields_middle(mocker: pytest_mock.MockerFixture) -> None:
resource = frictionless.Resource(
source=csv_data,
format="csv",
schema=frictionless.Schema.from_descriptor(descriptor)
schema=frictionless.Schema.from_descriptor(descriptor),
encoding="utf-8",
)

# These errors must be skipped to enable extra columns
Expand Down
1 change: 1 addition & 0 deletions tests/plugins/test_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def test_checks_not_empty() -> None:
# Construct Fake Resource
resource = frictionless.Resource(
source=__file__,
encoding="utf-8",
)

# Validate
Expand Down
1 change: 1 addition & 0 deletions tests/plugins/test_tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def test_checks_is_tabular() -> None:
# Construct Fake Resource
resource = frictionless.Resource(
source=__file__,
encoding="utf-8",
)

# Validate
Expand Down
2 changes: 1 addition & 1 deletion tests/templates/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,4 +249,4 @@ def test_allows_extra_cols(
mocked_validate.assert_called_once()
# The following asserts determine that the extra fields schema was used in creating the resource
mocked_extra_fields_schema.assert_called_once()
mocked_resource.assert_called_with(source=data, schema=schema, format='csv')
mocked_resource.assert_called_with(source=data, schema=schema, format='csv', encoding="utf-8")