Skip to content

Commit

Permalink
[Issue #2429] Various small fixes to get ELT process working in prod (#…
Browse files Browse the repository at this point in the history
…2430)

## Summary
Fixes #2429

### Time to review: __5 mins__

## Changes proposed
Actually set the default chunk size to 800

Handle cases where lookup values in the legacy system are ` `
(whitespace) and treat them the same as null/empty strings

Handle deleting the `current_opportunity_summary` record when an
`opportunity_summary` is deleted

## Context for reviewers
All issues found during testing in the prod ELT runs I was doing today

Chunk size affects how "big" the batches are when we do queries to the
Oracle foreign data wrapper as Oracle has a hard limit on the size of
queries that we were hitting.

For the lookup values, found a dozen or so opportunities where the
opportunity category was literally a ` ` (space) and we couldn't process
them. This fixes that and pre-emptively does it for other similar rows
(although I think most of them haven't had the issue).

The delete just takes advantage of SQLAlchemy's approach to deletes
which will automatically delete through relationships when you do
`db_session.delete(some_record)`. Now it'll know it needs to first
delete the current opportunity summary record as part of that.

## Additional information
Tests updated accordingly
  • Loading branch information
chouinar authored Oct 9, 2024
1 parent 219053d commit 96d864f
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 12 deletions.
2 changes: 1 addition & 1 deletion api/src/data_migration/command/load_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"--set-current/--no-set-current", default=True, help="run SetCurrentOpportunitiesTask"
)
@click.option(
"--insert-chunk-size", default=4000, help="chunk size for load inserts", show_default=True
"--insert-chunk-size", default=800, help="chunk size for load inserts", show_default=True
)
@click.option("--tables-to-load", "-t", help="table to load", multiple=True)
@flask_db.with_db_session()
Expand Down
26 changes: 17 additions & 9 deletions api/src/data_migration/transformation/transform_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@
}


def is_empty_str(value: str | None) -> bool:
# null/None, "" and any amount of just whitespace
# are treated as an empty string
return value is None or value.strip() == ""


def transform_opportunity(
source_opportunity: Topportunity, existing_opportunity: Opportunity | None
) -> Opportunity:
Expand Down Expand Up @@ -125,7 +131,7 @@ def transform_opportunity(


def transform_opportunity_category(value: str | None) -> OpportunityCategory | None:
if value is None or value == "":
if is_empty_str(value):
return None

if value not in OPPORTUNITY_CATEGORY_MAP:
Expand All @@ -135,7 +141,7 @@ def transform_opportunity_category(value: str | None) -> OpportunityCategory | N


def transform_applicant_type(value: str | None) -> ApplicantType | None:
if value is None or value == "":
if is_empty_str(value):
return None

if value not in APPLICANT_TYPE_MAP:
Expand All @@ -145,7 +151,7 @@ def transform_applicant_type(value: str | None) -> ApplicantType | None:


def transform_funding_category(value: str | None) -> FundingCategory | None:
if value is None or value == "":
if is_empty_str(value):
return None

if value not in FUNDING_CATEGORY_MAP:
Expand All @@ -155,7 +161,7 @@ def transform_funding_category(value: str | None) -> FundingCategory | None:


def transform_funding_instrument(value: str | None) -> FundingInstrument | None:
if value is None or value == "":
if is_empty_str(value):
return None

if value not in FUNDING_INSTRUMENT_MAP:
Expand Down Expand Up @@ -428,7 +434,7 @@ def transform_update_create_timestamp(
def convert_yn_bool(value: str | None) -> bool | None:
# Booleans in the Oracle database are stored as varchar/char
# columns with the values as Y/N (very rarely Yes/No)
if value is None or value == "":
if is_empty_str(value):
return None

if value in TRUTHY:
Expand All @@ -442,7 +448,7 @@ def convert_yn_bool(value: str | None) -> bool | None:


def convert_true_false_bool(value: str | None) -> bool | None:
if value is None or value == "":
if is_empty_str(value):
return None

return value == "TRUE"
Expand All @@ -463,7 +469,7 @@ def convert_action_type_to_is_deleted(value: str | None) -> bool:
# however many older records seem to not have this set at all
# The legacy system looks like it treats anything that isn't D
# the same, so we'll go with that assumption as well.
if value is None or value == "":
if is_empty_str(value):
return False

if value == "D": # D = Delete
Expand All @@ -476,11 +482,13 @@ def convert_action_type_to_is_deleted(value: str | None) -> bool:


def convert_numeric_str_to_int(value: str | None) -> int | None:
if value is None or value == "":
if is_empty_str(value):
return None

try:
return int(value)
# We know the value is a string and not None from the above
# function call but mypy can't see that, so ignore it here.
return int(value) # type: ignore
except ValueError:
# From what we've found in the legacy data, some of these numeric strings
# are written out as "none", "not available", "n/a" or similar. All of these
Expand Down
7 changes: 7 additions & 0 deletions api/src/db/models/opportunity_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,13 @@ class OpportunitySummary(ApiSchemaTable, TimestampMixin):
creator=lambda obj: LinkOpportunitySummaryApplicantType(applicant_type=obj),
)

# We configure a relationship from a summary to the current opportunity summary
# Just in case we delete this record, we can cascade to deleting the current_opportunity_summary
# record as well automatically.
current_opportunity_summary: Mapped["CurrentOpportunitySummary | None"] = relationship(
back_populates="opportunity_summary", single_parent=True, cascade="delete"
)

def for_json(self) -> dict:
json_valid_dict = super().for_json()

Expand Down
7 changes: 6 additions & 1 deletion api/tests/src/data_migration/transformation/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def setup_synopsis_forecast(
opportunity: Opportunity | None,
is_delete: bool = False,
is_already_processed: bool = False,
is_existing_current_opportunity_summary: bool = False,
source_values: dict | None = None,
):
if source_values is None:
Expand Down Expand Up @@ -128,9 +129,13 @@ def setup_synopsis_forecast(
)

if create_existing:
f.OpportunitySummaryFactory.create(
opportunity_summary = f.OpportunitySummaryFactory.create(
opportunity=opportunity, is_forecast=is_forecast, revision_number=revision_number
)
if is_existing_current_opportunity_summary:
f.CurrentOpportunitySummaryFactory.create(
opportunity=opportunity, opportunity_summary=opportunity_summary
)

return source_summary

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def test_process_opportunity_summaries(self, db_session, transform_opportunity_s
create_existing=True,
is_delete=True,
opportunity=opportunity3,
is_existing_current_opportunity_summary=True,
)
synopsis_delete1 = setup_synopsis_forecast(
is_forecast=False,
Expand Down Expand Up @@ -98,6 +99,7 @@ def test_process_opportunity_summaries(self, db_session, transform_opportunity_s
create_existing=False,
is_delete=True,
opportunity=opportunity4,
is_existing_current_opportunity_summary=True,
)
synopsis_update_invalid_yn_field = setup_synopsis_forecast(
is_forecast=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
("M", OpportunityCategory.MANDATORY),
("O", OpportunityCategory.OTHER),
(None, None),
(" ", None),
("", None),
],
)
Expand Down Expand Up @@ -90,6 +91,7 @@ def test_transform_update_create_timestamp(
("yes", True),
("no", False),
("", None),
(" ", None),
(None, None),
],
)
Expand All @@ -104,7 +106,7 @@ def test_convert_yn_boolean_unexpected_value(value):


@pytest.mark.parametrize(
"value,expected_value", [("D", True), ("U", False), ("", False), (None, False)]
"value,expected_value", [("D", True), ("U", False), ("", False), (" ", False), (None, False)]
)
def test_convert_action_type_to_is_deleted(value, expected_value):
assert transform_util.convert_action_type_to_is_deleted(value) == expected_value
Expand All @@ -124,6 +126,7 @@ def test_convert_action_type_to_is_deleted_unexpected_value(value):
("123123123", 123123123),
("-5", -5),
("", None),
(" ", None),
(None, None),
("words", None),
("zero", None),
Expand Down

0 comments on commit 96d864f

Please sign in to comment.