diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index d7396f0fcd12..2b4ab81ad589 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -1220,6 +1220,8 @@ type Table - If there are no columns in the output table, a `No_Output_Columns` is raised as an error regardless of the problem behavior, because it is not possible to create a table without any columns. + - If a given aggregate is not supported by the backend, + `Unsupported_Database_Operation` is reported. - If a column index is out of range, a `Column_Indexes_Out_Of_Range` is reported according to the `on_problems` setting, unless `error_on_missing_columns` is set to `True`, in which case it is @@ -1256,11 +1258,17 @@ type Table aggregate : Vector Aggregate_Column -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Invalid_Aggregate_Column | Invalid_Output_Column_Names | Duplicate_Output_Column_Names | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings aggregate self columns (error_on_missing_columns=False) (on_problems=Report_Warning) = validated = Aggregate_Column_Helper.prepare_aggregate_columns columns self error_on_missing_columns=error_on_missing_columns - on_problems.attach_problems_before validated.problems <| - key_columns = validated.key_columns + key_columns = validated.key_columns + key_problems = key_columns.flat_map internal_column-> + column = self.make_column internal_column + case column.value_type.is_floating_point of + True -> [Floating_Point_Equality.Error column.name] + False -> [] + on_problems.attach_problems_before validated.problems+key_problems <| resolved_aggregates = validated.valid_columns key_expressions = key_columns.map .expression new_ctx = self.context.set_groups key_expressions + problem_builder = Problem_Builder.new ## TODO [RW] here we will perform as many fetches as there are aggregate columns, but technically we could perform just one fetch fetching all column types - TODO we should do that. We can @@ -1269,7 +1277,7 @@ type Table point to a single query. See #6118. infer_from_database_callback expression = - SQL_Type_Reference.new self.connection self.context expression + SQL_Type_Reference.new self.connection self.context expression dialect = self.connection.dialect type_mapping = dialect.get_type_mapping infer_return_type op_kind columns expression = @@ -1277,7 +1285,7 @@ type Table results = resolved_aggregates.map p-> agg = p.second new_name = p.first - result = Aggregate_Helper.make_aggregate_column agg new_name dialect infer_return_type + result = Aggregate_Helper.make_aggregate_column self agg new_name dialect infer_return_type problem_builder ## If the `result` did contain an error, we catch it to be able to store it in a vector and then we will partition the created columns and failures. @@ -1291,9 +1299,11 @@ type Table the `lift_aggregate` method to push the aggregates into a subquery. new_columns = partitioned.second - problems = partitioned.first.map .value - on_problems.attach_problems_before problems <| - self.updated_context_and_columns new_ctx new_columns subquery=True + problem_builder.attach_problems_before on_problems <| + problems = partitioned.first.map .value + on_problems.attach_problems_before problems <| + if new_columns.is_empty then (Error.throw No_Output_Columns) else + self.updated_context_and_columns new_ctx new_columns subquery=True ## Returns a new table with a chosen subset of columns left unchanged and the other columns pivoted to rows with a single name field and a single diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso index f94eb2a20650..4235f85ff477 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso @@ -2,7 +2,9 @@ from Standard.Base import all hiding First, Last import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Table.Data.Aggregate_Column.Aggregate_Column +import Standard.Table.Internal.Problem_Builder.Problem_Builder from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all +from Standard.Table.Errors import Floating_Point_Equality import project.Data.Dialect.Dialect import project.Data.Table.Table @@ -16,14 +18,16 @@ from project.Errors import Unsupported_Database_Operation Creates an `Internal_Column` that will represent the computed aggregate. Arguments: + - table: The table owning the columns used in the aggregation. - aggregate: The description of the aggregation to compute. - new_name: The name for the created column. - dialect: The dialect of the database to generate the SQL for. - infer_return_type: A function that takes 3 arguments (name of the operation, list of input columns and a raw SQL IR Expression) and returns the inferred type for the aggregation. -make_aggregate_column : Aggregate_Column -> Text -> Dialect -> (Any -> Any -> Any -> SQL_Type_Reference) -> SQL_Expression -make_aggregate_column aggregate new_name dialect infer_return_type = + - problem_builder: A `Problem_Builder` instance used for reporting warnings. +make_aggregate_column : Table -> Aggregate_Column -> Text -> Dialect -> (Any -> Any -> Any -> SQL_Type_Reference) -> Problem_Builder -> SQL_Expression +make_aggregate_column table aggregate new_name dialect infer_return_type problem_builder = is_non_empty_selector v = v.is_nothing.not && v.not_empty simple_aggregate op_kind columns = expression = SQL_Expression.Operation op_kind (columns.map .expression) @@ -55,7 +59,11 @@ make_aggregate_column aggregate new_name dialect infer_return_type = expression = SQL_Expression.Operation op_kind [SQL_Expression.Constant p, c.expression] sql_type_ref = infer_return_type op_kind [c] expression Internal_Column.Value new_name sql_type_ref expression - Mode c _ -> simple_aggregate "MODE" [c] + Mode c _ -> + col = table.make_column c + if col.value_type.is_floating_point then + problem_builder.report_other_warning (Floating_Point_Equality.Error new_name) + simple_aggregate "MODE" [c] First c _ ignore_nothing order_by -> case is_non_empty_selector order_by of False -> Error.throw (Unsupported_Database_Operation.Error "`First` aggregation requires at least one `order_by` column.") True -> diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 22fccdc95aa1..4cd8bad9d0c6 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -1756,12 +1756,13 @@ type Table problem_builder.attach_problems_before on_problems <| Illegal_Argument.handle_java_exception <| java_key_columns = grouping.map .java_column - index = self.java_table.indexFromColumns java_key_columns.to_array + index = self.java_table.indexFromColumns java_key_columns.to_array name_mapper = if matched_name.is_empty then Aggregate_Column_Helper.default_aggregate_column_name else if validated_values.length == 1 then (_ -> "") else all_same = Aggregate_Column_Helper.all_same_column validated_values - c -> Aggregate_Column_Helper.default_aggregate_column_name c all_same + include_column_name = all_same.not + c -> Aggregate_Column_Helper.default_aggregate_column_name c include_column_name data_columns = validated_values.map c-> col_name = c.new_name.if_nothing <| diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso index 427609066879..122f5965e25a 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso @@ -1,15 +1,15 @@ from Standard.Base import all hiding First, Last +import project.Data.Aggregate_Column.Aggregate_Column import project.Data.Column.Column import project.Data.Column_Selector.Column_Selector import project.Data.Sort_Column.Sort_Column +import project.Data.Table.Table +import project.Data.Type.Value_Type.Value_Type import project.Internal.Problem_Builder.Problem_Builder import project.Internal.Table_Helpers import project.Internal.Unique_Name_Strategy.Unique_Name_Strategy - -import Standard.Table.Data.Aggregate_Column.Aggregate_Column from project.Data.Aggregate_Column.Aggregate_Column import all -import project.Data.Table.Table from project.Errors import No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation polyglot java import org.enso.table.aggregations.Aggregator @@ -107,7 +107,7 @@ default_aggregate_column_name aggregate_column include_column=True = _ -> prefix = Meta.get_simple_type_name aggregate_column . replace "_" " " c = aggregate_column.column - prefix + " " + (if include_column then c.name else "") + prefix + (if include_column then " " + c.name else "") ## PRIVATE Utility function to check if all aggregates are operating on the same source column. @@ -128,6 +128,8 @@ all_same_column aggregates = indices or column references potentially from a different table) are replaced with column references from the provided table. + It also verifies that the columns have the right types. + `Sort_Column`s are replaced with column references of matched columns coming from the provided table. @@ -160,6 +162,18 @@ resolve_aggregate table problem_builder aggregate_column = Internal_Order_By_Column_Reference.Value c.column c.associated_selector.direction sort_columns + resolve_numeric c = + internal_column = resolve c + col = columns_helper.make_column internal_column + Value_Type.expect_numeric col <| + internal_column + + resolve_text c = + internal_column = resolve c + col = columns_helper.make_column internal_column + Value_Type.expect_text col <| + internal_column + result = case aggregate_column of Group_By c new_name -> Group_By (resolve c) new_name Count new_name -> Count new_name @@ -168,21 +182,21 @@ resolve_aggregate table problem_builder aggregate_column = Count_Distinct new_c new_name ignore_nothing Count_Not_Nothing c new_name -> Count_Not_Nothing (resolve c) new_name Count_Nothing c new_name -> Count_Nothing (resolve c) new_name - Count_Not_Empty c new_name -> Count_Not_Empty (resolve c) new_name - Count_Empty c new_name -> Count_Empty (resolve c) new_name - Sum c new_name -> Sum (resolve c) new_name - Average c new_name -> Average (resolve c) new_name - Median c new_name -> Median (resolve c) new_name - Percentile p c new_name -> Percentile p (resolve c) new_name + Count_Not_Empty c new_name -> Count_Not_Empty (resolve_text c) new_name + Count_Empty c new_name -> Count_Empty (resolve_text c) new_name + Sum c new_name -> Sum (resolve_numeric c) new_name + Average c new_name -> Average (resolve_numeric c) new_name + Median c new_name -> Median (resolve_numeric c) new_name + Percentile p c new_name -> Percentile p (resolve_numeric c) new_name Mode c new_name -> Mode (resolve c) new_name - Standard_Deviation c new_name population -> Standard_Deviation (resolve c) new_name population - Concatenate c new_name separator prefix suffix quote_char -> Concatenate (resolve c) new_name separator prefix suffix quote_char + Standard_Deviation c new_name population -> Standard_Deviation (resolve_numeric c) new_name population + Concatenate c new_name separator prefix suffix quote_char -> Concatenate (resolve_text c) new_name separator prefix suffix quote_char First c new_name ignore_nothing order_by -> First (resolve c) new_name ignore_nothing (resolve_order_by order_by) Last c new_name ignore_nothing order_by -> Last (resolve c) new_name ignore_nothing (resolve_order_by order_by) Maximum c new_name -> Maximum (resolve c) new_name Minimum c new_name -> Minimum (resolve c) new_name - Shortest c new_name -> Shortest (resolve c) new_name - Longest c new_name -> Longest (resolve c) new_name + Shortest c new_name -> Shortest (resolve_text c) new_name + Longest c new_name -> Longest (resolve_text c) new_name ## Downgrade the `Internal_Missing_Column_Error` error into a `Nothing` value, keeping any other dataflow errors intact. diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso index 7167a2a144ac..2af7af331dbc 100644 --- a/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso +++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso @@ -91,7 +91,7 @@ expect_warning expected_warning result = found = warnings.find if_missing=Nothing x-> (x == expected_warning) || (x.is_a expected_warning) if found.is_nothing then - loc = Meta.get_source_location 3 + loc = Meta.get_source_location 2 Test.fail "Expected the result to contain a warning: "+expected_warning.to_text+", but it did not. The warnings were "+warnings.short_display_text+' (at '+loc+').' ## UNSTABLE diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java index 5f8ec788a68c..90567d4c23d8 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java +++ b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java @@ -215,11 +215,12 @@ public Table makeCrossTabTable( } // Merge Problems - AggregatedProblems[] problems = new AggregatedProblems[aggregates.length + 2]; + AggregatedProblems[] problems = new AggregatedProblems[aggregates.length + 3]; problems[0] = this.problems; problems[1] = AggregatedProblems.of(outputTableNameDeduplicator.getProblems()); + problems[2] = nameIndex.getProblems(); for (int i = 0; i < aggregates.length; i++) { - problems[i + 2] = aggregates[i].getProblems(); + problems[i + 3] = aggregates[i].getProblems(); } AggregatedProblems merged = AggregatedProblems.merge(problems); diff --git a/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso index 133affc93428..95a75f0b7c52 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso @@ -30,7 +30,7 @@ spec setup = test_selection = setup.aggregate_test_selection expect_column_names names table = - table.columns . map .name . should_equal names frames_to_skip=2 + table.columns . map .name . should_equal names frames_to_skip=3 find_row key table (columns=Nothing) = table_columns = if columns.is_nothing then table.columns else columns.map x->(table.columns.at x) @@ -1305,7 +1305,7 @@ spec setup = err4.name.should_equal "[MISSING]*[MISSING]" err4.expression_error.should_equal (No_Such_Column.Error "MISSING") - Test.group prefix+"Table.aggregate should raise warnings when there are issues" pending=(resolve_pending test_selection.problem_handling) <| + Test.group prefix+"Table.aggregate should raise warnings when there are issues" <| table = col1 = ["Index", [1, 2, 3]] col2 = ["Value", [1, 2, 3]] @@ -1399,70 +1399,66 @@ spec setup = tester = expect_column_names ["Count"] Problems.test_problem_handling action problems tester + Test.group prefix+"Table.aggregate should report warnings and errors based on types" <| + Test.specify "should warn if grouping on a floating point" <| + t = table_builder [["X", [1.1, 2.2, 3.3, 2.2]]] + action = t.aggregate [Group_By "X"] on_problems=_ + problems = [Floating_Point_Equality.Error "X"] + tester = expect_column_names ["X"] + Problems.test_problem_handling action problems tester + + case test_selection.advanced_stats of + True -> + Test.specify "should warn if computing an aggregation relying on floating point equality" <| + t = table_builder [["X", [1.5, 2.0, 1.5, 1.0]]] + action = t.aggregate [Mode "X"] on_problems=_ + problems = [Floating_Point_Equality.Error "Mode X"] + tester = expect_column_names ["Mode X"] + Problems.test_problem_handling action problems tester + False -> + Test.specify "should error if unsupported operations are selected" <| + t1 = table_builder [["X", [1.5, 2.0, 1.5, 1.0]]] + t2 = t1.aggregate [Mode "X"] on_problems=Problem_Behavior.Ignore + t2.should_fail_with No_Output_Columns + + Test.specify "should check types" <| + table = table_builder [["Text", ["a", "b"]], ["Int", [1, 2]], ["Float", [1.1, 2.2]]] + [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| + non_numbers = [Average "Text", Standard_Deviation "Text", Median "Text", Sum "Text"] + non_numbers.each agg-> Test.with_clue "Aggregation="+agg.to_text+" " <| + err = table.aggregate [agg] on_problems=pb + err.should_fail_with Invalid_Value_Type + err.catch.related_column.should_equal "Text" + + non_texts = [Shortest "Int", Longest "Int", Concatenate "Int", Count_Empty "Int", Count_Not_Empty "Int"] + non_texts.each agg-> Test.with_clue "Aggregation="+agg.to_text+" " <| + err = table.aggregate [agg] on_problems=pb + err.should_fail_with Invalid_Value_Type + err.catch.related_column.should_equal "Int" + + Test.specify "should return predictable types" <| + table = table_builder [["Text", ["a", "b"]], ["Int", [1, 2]], ["Float", [1.1, 2.2]]] + + t1 = table.aggregate [Group_By "Text", Group_By "Int", Group_By "Float"] + t1.at "Text" . value_type . is_text . should_be_true + t1.at "Int" . value_type . is_integer . should_be_true + t1.at "Float" . value_type . is_floating_point . should_be_true + + t2 = table.aggregate [Count, Count_Not_Empty "Text", Sum "Int", Sum "Float", Average "Int", Concatenate "Text"] + t2.at "Count" . value_type . is_integer . should_be_true + t2.at "Count Not Empty Text" . value_type . is_integer . should_be_true + t2.at "Sum Int" . value_type . is_numeric . should_be_true + t2.at "Sum Float" . value_type . is_floating_point . should_be_true + t2.at "Average Int" . value_type . is_numeric . should_be_true + t2.at "Concatenate Text" . value_type . is_text . should_be_true + Test.group prefix+"Table.aggregate should raise warnings when there are issues computing aggregation" pending=(resolve_pending test_selection.aggregation_problems) <| table = col1 = ["Index", [1, 2, 3]] col2 = ["Value", [1, 2, 3.1]] col3 = ["Text", ["A", ",", "C"]] col4 = ["Mixed", ["A", 1, "C"]] - Table.new [col1, col2, col3, col4] - - Test.specify "should warn if grouping on a floating point" <| - action = table.aggregate [Group_By 1] on_problems=_ - # All rows are marked as floating point, because the integers get coerced to double when stored in DoubleStorage - problems = [Floating_Point_Equality.Error "Value"] - tester = expect_column_names ["Value"] - Problems.test_problem_handling action problems tester - - Test.specify "should warn if totaling on a non number" <| - [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| - err = table.aggregate [Sum "Text"] on_problems=pb - err.should_fail_with Invalid_Aggregation - err.catch . should_equal (Invalid_Aggregation.Error "Sum Text" [0] "Cannot convert to a number.") - err.catch.to_display_text . should_equal "The Sum Text could not be calculated at [0]: Cannot convert to a number." - - Test.specify "should warn if averaging on a non number" <| - [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| - err = table.aggregate [Average "Text"] on_problems=pb - err.should_fail_with Invalid_Aggregation - err.catch . should_equal (Invalid_Aggregation.Error "Average Text" [0] "Cannot convert to a number.") - - Test.specify "should warn if calculating standard deviation on a non number" <| - [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| - err = table.aggregate [Standard_Deviation "Text"] on_problems=pb - err.should_fail_with Invalid_Aggregation - err.catch . should_equal (Invalid_Aggregation.Error "Standard Deviation Text" [0] "Cannot convert to a number.") - - Test.specify "should warn if median on a non number" <| - [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| - err = table.aggregate [Median "Text"] on_problems=pb - err.should_fail_with Invalid_Aggregation - err.catch . should_equal (Invalid_Aggregation.Error "Median Text" [0] "Cannot convert to a number.") - - Test.specify "should warn if trying shortest on a non text" <| - [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| - err = table.aggregate [Shortest "Index"] on_problems=pb - err.should_fail_with Invalid_Aggregation - err.catch . should_equal (Invalid_Aggregation.Error "Shortest Index" [0] "Not a text value.") - - Test.specify "should warn if trying count empties on a non text" <| - [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| - err = table.aggregate [Count_Empty "Index"] on_problems=pb - err.should_fail_with Invalid_Aggregation - err.catch . should_equal (Invalid_Aggregation.Error "Count Empty Index" [0] "Not a text value.") - - Test.specify "should warn if trying concatenate on a non text" <| - [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| - err = table.aggregate [Concatenate "Index"] on_problems=pb - err.should_fail_with Invalid_Aggregation - err.catch . should_equal (Invalid_Aggregation.Error "Concatenate Index" [0] "Not a text value.") - - Test.specify "should warn if trying concatenate unquoted delimiters" <| - column = Concatenate "Text" separator="," - action = table.aggregate [column] on_problems=_ - problems = [Unquoted_Delimiter.Error "Concatenate Text" [1]] - tester = expect_column_names ["Concatenate Text"] - Problems.test_problem_handling action problems tester + table_builder [col1, col2, col3, col4] Test.specify "should not fail if trying concatenate unquoted delimiters with no separator" <| column = Concatenate "Text" separator="" @@ -1480,21 +1476,29 @@ spec setup = err.catch.message.should_start_with "Cannot compare values" err.catch.rows.should_equal [1] - Test.group prefix+"Table.aggregate should merge warnings when issues computing aggregation" pending=(resolve_pending test_selection.aggregation_problems) <| - table = - col1 = ["Key", ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O"]] - col2 = ["Value", [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5]] - col3 = ["Float", col2.second.map x->(1.5*x)] - Table.new [col1, col2, col3] + Test.specify "should warn if trying concatenate unquoted delimiters" <| + column = Concatenate "Text" separator="," + action = table.aggregate [column] on_problems=_ + problems = [Unquoted_Delimiter.Error "Concatenate Text" [1]] + tester = expect_column_names ["Concatenate Text"] + Problems.test_problem_handling action problems tester + Test.group prefix+"Table.aggregate should merge warnings when issues computing aggregation" pending=(resolve_pending test_selection.aggregation_problems) <| Test.specify "should merge Invalid Aggregation warnings" <| - new_table = table.aggregate [Group_By "Key", Concatenate "Value"] - err = new_table.catch - err . should_be_a Invalid_Aggregation.Error - err.column . should_equal "Concatenate Value" - err.rows . length . should_equal 15 + table = table_builder [["X", (0.up_to 16).map (_-> ",")]] + new_table = table.aggregate [Concatenate "X" separator=","] + problems = Problems.get_attached_warnings new_table + warning = problems.first + warning . should_be_a Unquoted_Delimiter.Error + warning.column . should_equal "Concatenate X" + warning.rows . length . should_equal 16 Test.specify "should merge Floating Point Grouping warnings" <| + table = + col1 = ["Key", ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O"]] + col2 = ["Value", [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5]] + col3 = ["Float", col2.second.map x->(1.5*x)] + table_builder [col1, col2, col3] new_table = table.aggregate [Group_By "Float", Count] problems = Problems.get_attached_warnings new_table problems.length . should_equal 1 diff --git a/test/Table_Tests/src/Common_Table_Operations/Cross_Tab_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Cross_Tab_Spec.enso index 91020d727e55..1ef96094f410 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Cross_Tab_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Cross_Tab_Spec.enso @@ -1,7 +1,7 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument -from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Average, Count, Group_By, Sum +from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Average, Count, Group_By, Sum, Concatenate import Standard.Table.Data.Expression.Expression_Error from Standard.Table.Errors import all @@ -95,14 +95,14 @@ spec setup = Test.specify "should allow multiple values aggregates" <| t1 = table.cross_tab values=[Count, Sum "Value"] - t1.column_names . should_equal ["x Count", "x Sum Value", "y Count", "y Sum Value", "z Count", "z Sum Value"] + t1.column_names . should_equal ["x Count", "x Sum", "y Count", "y Sum", "z Count", "z Sum"] t1.row_count . should_equal 1 t1.at "x Count" . to_vector . should_equal [4] - t1.at "x Sum Value" . to_vector . should_equal [10] + t1.at "x Sum" . to_vector . should_equal [10] t1.at "y Count" . to_vector . should_equal [3] - t1.at "y Sum Value" . to_vector . should_equal [18] + t1.at "y Sum" . to_vector . should_equal [18] t1.at "z Count" . to_vector . should_equal [2] - t1.at "z Sum Value" . to_vector . should_equal [17] + t1.at "z Sum" . to_vector . should_equal [17] Test.specify "should fail if name_field is not found" <| err1 = table.cross_tab [] "Name" @@ -160,26 +160,6 @@ spec setup = problems3 = [Duplicate_Output_Column_Names.Error ["x"]] Problems.test_problem_handling action3 problems3 tester3 - Test.specify "should fail on invalid aggregations" <| - table = table_builder [["Key", ["x", "x", "x", "x", "y", "y", "y", "z", "z"]], ["TextValue", ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]] - [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| - err = table.cross_tab [] "Key" values=[Average "TextValue"] on_problems=pb - case setup.is_database of - True -> - err.should_fail_with SQL_Error - False -> - err.should_fail_with Invalid_Aggregation - err.catch . should_equal (Invalid_Aggregation.Error "Average TextValue" [0, 4, 7] "Cannot convert to a number.") - - [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| - err = table.cross_tab [] "Key" values=[Average "Value", Sum "TextValue"] on_problems=pb - case setup.is_database of - True -> - err.should_fail_with SQL_Error - False -> - err.should_fail_with Invalid_Aggregation - err.catch . should_equal (Invalid_Aggregation.Error "Sum TextValue" [0, 4, 7] "Cannot convert to a number.") - Test.specify "should allow non-Text columns to be used as name" <| table = table_builder [["Key", [1, 1, 1, 2, 2, 1, 3, 3, 1]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]] t1 = table.cross_tab @@ -201,6 +181,49 @@ spec setup = t1.at '\n\n' . to_vector . should_equal [1] t1.at " " . to_vector . should_equal [1] + Test.specify "should report Floating_Point_Equality if the group or name column is floating point" <| + t = table_builder [["X", [1.5, 2.5, 2.5, 1.5]], ["Y", [1, 2, 3, 4]], ["Z", ["a", "b", "b", "b"]]] + t1 = t.cross_tab + Problems.expect_warning Floating_Point_Equality t1 + t1.column_names . should_equal ["1.5", "2.5"] + t1.row_count . should_equal 1 + t1.at "1.5" . to_vector . should_equal [2] + t1.at "2.5" . to_vector . should_equal [2] + + t2 = t.cross_tab ["X"] "Z" + Problems.expect_warning Floating_Point_Equality t2 + t2.column_names . should_equal ["X", "a", "b"] + t2.row_count . should_equal 2 + t2.at "X" . to_vector . should_equal [1.5, 2.5] + t2.at "a" . to_vector . should_equal [1, 0] + t2.at "b" . to_vector . should_equal [1, 2] + + Test.specify "should check types of aggregates" <| + t = table_builder [["Key", ["a", "a", "b", "b"]], ["ints", [1, 2, 3, 4]], ["texts", ["a", "b", "c", "d"]]] + + [Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <| + t1 = t.cross_tab [] "Key" values=[Average "texts"] on_problems=pb + t1.should_fail_with Invalid_Value_Type + t2 = t.cross_tab [] "Key" values=[Sum "texts"] on_problems=pb + t2.should_fail_with Invalid_Value_Type + t3 = t.cross_tab [] "Key" values=[Concatenate "ints"] on_problems=pb + t3.should_fail_with Invalid_Value_Type + + Test.specify "should return predictable types" <| + table = table_builder [["Text", ["a", "b"]], ["Int", [1, 2]], ["Float", [1.1, 2.2]]] + t1 = table.cross_tab ["Int"] "Text" + t1.column_names . should_equal ["Int", "a", "b"] + t1.at "Int" . value_type . is_integer . should_be_true + t1.at "a" . value_type . is_integer . should_be_true + t1.at "b" . value_type . is_integer . should_be_true + + t2 = table.cross_tab [] "Int" values=[Average "Float", Concatenate "Text"] . sort_columns + t2.column_names . should_equal ["1 Average Float", "1 Concatenate Text", "2 Average Float", "2 Concatenate Text"] + t2.at "1 Average Float" . value_type . is_floating_point . should_be_true + t2.at "1 Concatenate Text" . value_type . is_text . should_be_true + t2.at "2 Average Float" . value_type . is_floating_point . should_be_true + t2.at "2 Concatenate Text" . value_type . is_text . should_be_true + Test.specify "should fail gracefully if an effective column name would contain invalid characters" <| table = table_builder [["Key", ['x', 'x', 'y\0', '\0', 'y\0', 'z', 'z', 'z', 'z']], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]] r1 = table.cross_tab diff --git a/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso index f1180260a50a..429f423c5038 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso @@ -30,7 +30,7 @@ spec setup = prefix = setup.prefix table_builder = setup.table_builder materialize = setup.materialize - db_todo = if prefix.contains "In-Memory" then Nothing else "New Joining API is still WIP for the DB backend." + db_todo = if prefix.contains "In-Memory" then Nothing else "TODO: handling NULLs in equality conditions." Test.group prefix+"Table.join" <| t1 = table_builder [["X", [1, 2, 3]], ["Y", [4, 5, 6]]] t2 = table_builder [["Z", [2, 3, 2, 4]], ["W", [4, 5, 6, 7]]] diff --git a/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso b/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso index 0ab7e99c4025..1c5c9b596ef8 100644 --- a/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso @@ -55,43 +55,50 @@ spec = Test.group "Aggregate Columns" <| test_aggregator simple_table (Count_Empty -1) "Count Empty text" 3 test_aggregator simple_table (Count_Empty -1 test_name) test_name 3 test_aggregator simple_table (Count_Empty "text" test_name) test_name 3 - test_aggregator empty_table (Count_Empty 0 test_name) test_name empty_table.row_count + # TODO [RW] Re-enable this once #6281 is implemented. + # test_aggregator empty_table (Count_Empty 0 test_name) test_name empty_table.row_count Test.specify "should be able to count non empties in a set of Texts" <| test_aggregator simple_table (Count_Not_Empty -1) "Count Not Empty text" 2 test_aggregator simple_table (Count_Not_Empty -1 test_name) test_name 2 test_aggregator simple_table (Count_Not_Empty "text" test_name) test_name 2 - test_aggregator empty_table (Count_Not_Empty 0 test_name) test_name empty_table.row_count + # TODO [RW] Re-enable this once #6281 is implemented. + # test_aggregator empty_table (Count_Not_Empty 0 test_name) test_name empty_table.row_count Test.specify "should be able to total a set of values" <| test_aggregator simple_table (Sum -2) "Sum float" 12.1 test_aggregator simple_table (Sum -2 test_name) test_name 12.1 test_aggregator simple_table (Sum "float" test_name) test_name 12.1 - test_aggregator empty_table (Sum 0 test_name) test_name Nothing + # TODO [RW] Re-enable this once #6281 is implemented. + # test_aggregator empty_table (Sum 0 test_name) test_name Nothing Test.specify "should be able to average a set of values" <| test_aggregator simple_table (Average -2) "Average float" 3.025 0.000001 test_aggregator simple_table (Average -2 test_name) test_name 3.025 0.000001 test_aggregator simple_table (Average "float" test_name) test_name 3.025 0.000001 - test_aggregator empty_table (Average 0 test_name) test_name Nothing + # TODO [RW] Re-enable this once #6281 is implemented. + # test_aggregator empty_table (Average 0 test_name) test_name Nothing Test.specify "should be able to compute standard deviation a set of values" <| test_aggregator simple_table (Standard_Deviation -2) "Standard Deviation float" 1.977161 0.000001 test_aggregator simple_table (Standard_Deviation -2 test_name) test_name 1.977161 0.000001 test_aggregator simple_table (Standard_Deviation "float" test_name) test_name 1.977161 0.000001 - test_aggregator empty_table (Standard_Deviation 0 test_name) test_name Nothing + # TODO [RW] Re-enable this once #6281 is implemented. + # test_aggregator empty_table (Standard_Deviation 0 test_name) test_name Nothing Test.specify "should be able to compute standard deviation of a population a set of values" <| test_aggregator simple_table (Standard_Deviation -2 population=True) "Standard Deviation float" 1.712271 0.000001 test_aggregator simple_table (Standard_Deviation -2 test_name population=True) test_name 1.712271 0.000001 test_aggregator simple_table (Standard_Deviation "float" test_name population=True) test_name 1.712271 0.000001 - test_aggregator empty_table (Standard_Deviation 0 test_name population=True) test_name Nothing + # TODO [RW] Re-enable this once #6281 is implemented. + # test_aggregator empty_table (Standard_Deviation 0 test_name population=True) test_name Nothing Test.specify "should be able to compute median a set of values" <| test_aggregator simple_table (Median -2) "Median float" 2.75 0.000001 test_aggregator simple_table (Median -2 test_name) test_name 2.75 0.000001 test_aggregator simple_table (Median "float" test_name) test_name 2.75 0.000001 - test_aggregator empty_table (Median 0 test_name) test_name Nothing + # TODO [RW] Re-enable this once #6281 is implemented. + # test_aggregator empty_table (Median 0 test_name) test_name Nothing Test.specify "should be able to compute first of a set of values including missing" <| test_aggregator simple_table (First 1 ignore_nothing=False) "First is_valid" Nothing @@ -121,7 +128,8 @@ spec = Test.group "Aggregate Columns" <| test_aggregator simple_table (Concatenate -1 Nothing ',' '[' ']' '"') "Concatenate text" '[A,"",,"B,C",]' test_aggregator simple_table (Concatenate -1 test_name) test_name 'AB,C' test_aggregator simple_table (Concatenate "text" test_name ',') test_name 'A,,,B,C,' - test_aggregator empty_table (Concatenate 0 test_name) test_name Nothing + # TODO [RW] Re-enable this once #6281 is implemented. + # test_aggregator empty_table (Concatenate 0 test_name) test_name Nothing Test.specify "should be able to count distinct items on a single set of values" <| test_aggregator simple_table (Count_Distinct 0) "Count Distinct count" 4 @@ -152,13 +160,15 @@ spec = Test.group "Aggregate Columns" <| test_aggregator simple_table (Shortest -1) "Shortest text" "" test_aggregator simple_table (Shortest -1 test_name) test_name "" test_aggregator simple_table (Shortest "text" test_name) test_name "" - test_aggregator empty_table (Shortest 0 test_name) test_name Nothing + # TODO [RW] Re-enable this once #6281 is implemented. + # test_aggregator empty_table (Shortest 0 test_name) test_name Nothing Test.specify "should be able to get the longest of a set of texts" <| test_aggregator simple_table (Longest -1) "Longest text" "B,C" test_aggregator simple_table (Longest -1 test_name) test_name "B,C" test_aggregator simple_table (Longest "text" test_name) test_name "B,C" - test_aggregator empty_table (Longest 0 test_name) test_name Nothing + # TODO [RW] Re-enable this once #6281 is implemented. + # test_aggregator empty_table (Longest 0 test_name) test_name Nothing Test.specify "should be able to get the mode of a set of numbers" <| mode_table = Table.new [["tests", [1,2,3,4,2,4,1,2,3,4,2,1,3,5,2,1,2,4,5,2,1,2,3,5,6,1,2,2]]] diff --git a/test/Table_Tests/src/In_Memory/Common_Spec.enso b/test/Table_Tests/src/In_Memory/Common_Spec.enso index e54136d48496..4da128bffd2e 100644 --- a/test/Table_Tests/src/In_Memory/Common_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Common_Spec.enso @@ -11,7 +11,7 @@ run_common_spec spec = aggregate_selection = Common_Table_Operations.Aggregate_Spec.Test_Selection.Config table = (enso_project.data / "data.csv") . read - empty_table = Table.new <| table.columns.map c->[c.name, []] + empty_table = table.take 0 materialize = x->x setup = Common_Table_Operations.Main.Test_Setup.Config "[In-Memory] " table empty_table Table.new materialize is_database=False test_selection=selection aggregate_test_selection=aggregate_selection