Skip to content

Commit

Permalink
avoid many counts
Browse files Browse the repository at this point in the history
  • Loading branch information
JesperGomes committed Nov 29, 2024
1 parent 92dcf03 commit db954f5
Showing 1 changed file with 32 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,20 @@ def assert_dataframes_equal(actual: DataFrame, expected: DataFrame) -> None:
actual_excess = actual.subtract(expected)
expected_excess = expected.subtract(actual)

if actual_excess.count() > 0:
# do the count once here to avoid materializing multiple times
actual_excess_count = actual_excess.count()
expected_excess_count = expected_excess.count()

if actual_excess_count > 0:
print("Actual excess:")
actual_excess.show(3000, False)

if expected_excess.count() > 0:
if expected_excess_count > 0:
print("Expected excess:")
expected_excess.show(3000, False)

assert (
actual_excess.count() == 0 and expected_excess.count() == 0
actual_excess_count == 0 and expected_excess_count == 0
), "Dataframes data are not equal"


Expand Down Expand Up @@ -108,31 +112,31 @@ def assert_dataframe_and_schema(
print("EXPECTED:")
expected.show(3000, False)

try:
_assert_no_duplicates(actual)
except AssertionError:

if (
not feature_tests_configuration.show_columns_when_actual_and_expected_are_equal
):
actual, expected = drop_columns_if_the_same(actual, expected)

print("DUPLICATED ROWS IN ACTUAL:")
_show_duplicates(actual).show(3000, False)
raise

try:
_assert_no_duplicates(expected)
except AssertionError:

if (
not feature_tests_configuration.show_columns_when_actual_and_expected_are_equal
):
actual, expected = drop_columns_if_the_same(actual, expected)

print("DUPLICATED ROWS IN EXPECTED:")
_show_duplicates(expected).show(3000, False)
raise
# try:
# _assert_no_duplicates(actual)
# except AssertionError:
#
# if (
# not feature_tests_configuration.show_columns_when_actual_and_expected_are_equal
# ):
# actual, expected = drop_columns_if_the_same(actual, expected)
#
# print("DUPLICATED ROWS IN ACTUAL:")
# _show_duplicates(actual).show(3000, False)
# raise
#
# try:
# _assert_no_duplicates(expected)
# except AssertionError:
#
# if (
# not feature_tests_configuration.show_columns_when_actual_and_expected_are_equal
# ):
# actual, expected = drop_columns_if_the_same(actual, expected)
#
# print("DUPLICATED ROWS IN EXPECTED:")
# _show_duplicates(expected).show(3000, False)
# raise

try:
assert_dataframes_equal(actual, expected)
Expand Down

0 comments on commit db954f5

Please sign in to comment.