From 1fb60df61b5bf143dffa7e2003e8567b65dfc6fc Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Sun, 9 Jul 2023 18:03:05 +0000 Subject: [PATCH] Fixes from the live demo. (#7243) - Removed defaults from `cross_tab`. It caused an out-of-heap space error when it attempted to build a 205k x 205k table. Now has a hard limit of 10,000 columns - we can increase this once we have more concrete test data. ![image](https://github.com/enso-org/enso/assets/4699705/bc38d41c-56dc-41bd-8a7c-fa89ecfa7f79) - Adjusted the dropdowns on `Aggregate_Column` for `columns` and `order_by` to be dropdowns as nested Vector editors are not supported. ![image](https://github.com/enso-org/enso/assets/4699705/f4a7c7cc-6a21-462c-a39e-65fbab82c367) - Altered `Aggregate_Column` so `new_name` now `new_name:Text=""` and not taking `Nothing` anymore. Makes it appear correctly in IDE. ![image](https://github.com/enso-org/enso/assets/4699705/196a49ba-4274-44bb-b876-0372c8f62746) - Added dropdowns for `fill_empty`, `fill_nothing` and `replace` on `Table`. ![image](https://github.com/enso-org/enso/assets/4699705/9ee5cec2-82d5-4452-b650-67015ac9fee5) - Added `replace` to Database table throwing `Unsupport_Database_Operation`. --- .../Database/0.0.0-dev/src/Data/Table.enso | 56 ++++++++++++++++--- .../0.0.0-dev/src/Data/Aggregate_Column.enso | 40 ++++++------- .../Table/0.0.0-dev/src/Data/Table.enso | 44 ++++++++------- .../Standard/Table/0.0.0-dev/src/Errors.enso | 2 +- .../src/Internal/Aggregate_Column_Helper.enso | 4 +- .../src/Internal/Widget_Helpers.enso | 14 +++-- .../table/data/index/MultiValueIndex.java | 13 +++++ .../table/error/TooManyColumnsException.java | 20 +++++++ .../Aggregate_Spec.enso | 6 -- .../Cross_Tab_Spec.enso | 34 ++++++----- .../src/Database/Codegen_Spec.enso | 2 +- .../src/Database/Common/Common_Spec.enso | 3 +- .../src/In_Memory/Aggregate_Column_Spec.enso | 8 +-- 13 files changed, 163 insertions(+), 83 deletions(-) create mode 100644 std-bits/table/src/main/java/org/enso/table/error/TooManyColumnsException.java diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index deab21f8ddce..e9cdc3630cce 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -1512,7 +1512,7 @@ type Table @name_column Widget_Helpers.make_column_name_selector @values (Widget_Helpers.make_aggregate_column_selector include_group_by=False) cross_tab : Vector (Integer | Text | Column_Selector | Aggregate_Column) | Text | Integer -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings - cross_tab self group_by=[] name_column=self.column_names.first values=Aggregate_Column.Count (on_problems=Report_Warning) = + cross_tab self group_by name_column values=Aggregate_Column.Count (on_problems=Report_Warning) = ## Avoid unused arguments warning. We cannot rename arguments to `_`, because we need to keep the API consistent with the in-memory table. _ = [group_by, name_column, values, on_problems] @@ -1981,8 +1981,7 @@ type Table been replaced with the provided default(s). Arguments: - - selectors: Single instance or a Vector of names, indexes or - `Column_Selector`s. + - columns: The column(s) to fill missing values of. - default: The value to replace missing values with. If this argument is a column, the value from `default` at the corresponding position will be used. @@ -1991,10 +1990,11 @@ type Table Fill missing values in two columns with the value 20.5. fill_nothing = table.fill_nothing ["col0", "col1"] 20.5 - fill_nothing : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Column | Any -> Table - fill_nothing self selectors default = + @columns Widget_Helpers.make_column_name_vector_selector + fill_nothing : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table + fill_nothing self columns default = transformer col = col.fill_nothing default - Table_Helpers.replace_columns_with_transformed_columns self selectors transformer + Table_Helpers.replace_columns_with_transformed_columns self columns transformer ## ALIAS Fill Empty, if_empty @@ -2002,6 +2002,7 @@ type Table provided default(s). Arguments: + - columns: The column(s) to fill empty values. - default: The value to replace empty values with. If this argument is a column, the value from `default` at the corresponding position will be used. @@ -2010,10 +2011,47 @@ type Table Fill empty values in two columns with the value "hello". fill_empty = table.fill_empty ["col0", "col1"] "hello" - fill_empty : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Column | Any -> Table - fill_empty self selectors default = + @columns Widget_Helpers.make_column_name_vector_selector + fill_empty : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table + fill_empty self columns default = transformer col = col.fill_empty default - Table_Helpers.replace_columns_with_transformed_columns self selectors transformer + Table_Helpers.replace_columns_with_transformed_columns self columns transformer + + ## Replaces the first, or all occurrences of `term` with `new_text` in each + row of the specified column. If `term` is empty, the function returns the + table unchanged. + + This method follows the exact replacement semantics of the + `Text.replace` method. + + Arguments: + - columns: The column(s) to replace values on. + - term: The term to find. Can be `Text`, `Regex`, or a `Column` of + strings. + - replacement: The text to replace matches with. + - case_sensitivity: Specifies if the text values should be compared case + sensitively. + - only_first: If True, only replace the first match. + + > Example + Replace dashes with underscores. + + table.replace "-" "_" + + > Example + Remove leading and trailing spaces from cells. + + column.replace "^\s*(.*?)\s*$".to_regex "$1" + + > Example + Replace texts in quotes with parentheses. + + column.replace '"(.*?)"'.to_regex '($1)' + @columns Widget_Helpers.make_column_name_vector_selector + replace : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column + replace self columns term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False = + _ = [columns, term, new_text, case_sensitivity, only_first] + Error.throw (Unsupported_Database_Operation.Error "Text replace is currently not supported in the database backend.") ## PRIVATE diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso index 1c691fa326a6..83cf98b22638 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso @@ -12,14 +12,14 @@ type Aggregate_Column - column: the column (specified by name, expression or index) to group by. - new_name: name of new column. - Group_By (column:Text|Integer|Column|Any) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Group_By (column:Text|Integer|Column|Any) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the row count of each group. If no rows, evaluates to 0. Arguments: - new_name: name of new column. - Count (new_name:Text|Nothing=Nothing) + Count (new_name:Text="") ## Creates a new column with the count of unique items in the selected column(s) within each group. If no rows, evaluates to 0. @@ -32,7 +32,7 @@ type Aggregate_Column multiple selection. - new_name: name of new column. - ignore_nothing: if all values are Nothing won't be included. - Count_Distinct (columns:(Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector | Column))=0) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False) # Column needed because of 6866 + Count_Distinct (columns:(Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector | Column))=0) (new_name:Text="") (ignore_nothing:Boolean=False) # Column needed because of 6866 ## ALIAS Count_Not_Null @@ -42,7 +42,7 @@ type Aggregate_Column Arguments: - column: the column (specified by name, expression or index) to count. - new_name: name of new column. - Count_Not_Nothing (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Count_Not_Nothing (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## ALIAS Count_Null, Count_Missing @@ -52,7 +52,7 @@ type Aggregate_Column Arguments: - column: the column (specified by name, expression or index) to count. - new_name: name of new column. - Count_Nothing (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Count_Nothing (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the count of not `Nothing` (null) and non-empty ("") values of the column within each group. If no rows, evaluates to 0. @@ -60,7 +60,7 @@ type Aggregate_Column Arguments: - column: the column (specified by name, expression or index) to count. - new_name: name of new column. - Count_Not_Empty (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Count_Not_Empty (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the count of `Nothing` (null) or empty ("") text values of the column within each group. If no rows, evaluates to 0. @@ -68,7 +68,7 @@ type Aggregate_Column Arguments: - column: the column (specified by name, expression or index) to count. - new_name: name of new column. - Count_Empty (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Count_Empty (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the sum of values (ignoring missing values) of the column within each group. If no rows, evaluates to `Nothing`. @@ -76,7 +76,7 @@ type Aggregate_Column Arguments: - column: the column (specified by name, expression or index) to total. - new_name: name of new column. - Sum (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Sum (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the mean of values (ignoring missing values) of the column within each group. If no rows, evaluates to `Nothing`. @@ -84,7 +84,7 @@ type Aggregate_Column Arguments: - column: the column (specified by name, expression or index) to average. - new_name: name of new column. - Average (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Average (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the median of values (ignoring missing values) of the column within each group. If no rows, evaluates to `Nothing`. @@ -93,7 +93,7 @@ type Aggregate_Column - column: column (specified by name, expression or index) to calculate median on. - new_name: name of new column. - Median (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Median (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the median of values (ignoring missing values) of the column within each group. If no rows, evaluates to `Nothing`. @@ -103,7 +103,7 @@ type Aggregate_Column - column: column (specified by name, expression or index) to compute percentile. - new_name: name of new column. - Percentile (percentile:Number=0.5) (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Percentile (percentile:Number=0.5) (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the mode of values (ignoring missing values) of the column within each group. If no rows, evaluates to `Nothing`. @@ -112,7 +112,7 @@ type Aggregate_Column - column: column (specified by name, expression or index) to find the most common value. - new_name: name of new column. - Mode (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Mode (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the standard deviation of values (ignoring missing values) of the column within each group. If no rows, evaluates to @@ -123,7 +123,7 @@ type Aggregate_Column standard deviation. - new_name: name of new column. - population: specifies if group is a sample or the population - Standard_Deviation (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) (population:Boolean=False) # Column needed because of 6866 + Standard_Deviation (column:Text|Integer|Column|Any=0) (new_name:Text="") (population:Boolean=False) # Column needed because of 6866 ## Creates a new column with the values concatenated together. `Nothing` values will become an empty string. If no rows, evaluates to `Nothing`. @@ -136,7 +136,7 @@ type Aggregate_Column - suffix: added at the end of the result. - quote_char: character used to quote the values if the value is `Empty` or contains the separator. - Concatenate (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) (separator:Text="") (prefix:Text="") (suffix:Text="") (quote_char:Text="") # Column needed because of 6866 + Concatenate (column:Text|Integer|Column|Any=0) (new_name:Text="") (separator:Text="") (prefix:Text="") (suffix:Text="") (quote_char:Text="") # Column needed because of 6866 ## Creates a new column with the first value in each group. If no rows, evaluates to `Nothing`. @@ -149,7 +149,7 @@ type Aggregate_Column not missing value returned. - order_by: required for database tables. Specifies how to order the results within the group. - First (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:(Text | Vector (Text | Sort_Column) | Nothing)=Nothing) # Column needed because of 6866 + First (column:Text|Integer|Column|Any=0) (new_name:Text="") (ignore_nothing:Boolean=True) (order_by:(Text | Vector (Text | Sort_Column) | Nothing)=Nothing) # Column needed because of 6866 ## Creates a new column with the last value in each group. If no rows, evaluates to `Nothing`. @@ -162,7 +162,7 @@ type Aggregate_Column not missing value returned. - order_by: required for database tables. Specifies how to order the results within the group. - Last (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:(Text | Vector (Text | Sort_Column) | Nothing)=Nothing) # Column needed because of 6866 + Last (column:Text|Integer|Column|Any=0) (new_name:Text="") (ignore_nothing:Boolean=True) (order_by:(Text | Vector (Text | Sort_Column) | Nothing)=Nothing) # Column needed because of 6866 ## Creates a new column with the maximum value in each group. If no rows, evaluates to `Nothing`. @@ -171,7 +171,7 @@ type Aggregate_Column - column: column (specified by name, expression or index) to find the group maximum. - new_name: name of new column. - Maximum (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Maximum (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the maximum value in each group. If no rows, evaluates to `Nothing`. @@ -180,7 +180,7 @@ type Aggregate_Column - column: column (specified by name, expression or index) to find the group minimum. - new_name: name of new column. - Minimum (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Minimum (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the shortest text in each group. If no rows, evaluates to `Nothing`. @@ -189,7 +189,7 @@ type Aggregate_Column - column: column (specified by name, expression or index) to find the group shortest value. - new_name: name of new column. - Shortest (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Shortest (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 ## Creates a new column with the longest text in each group. If no rows, evaluates to `Nothing`. @@ -198,4 +198,4 @@ type Aggregate_Column - column: column (specified by name, expression or index) to find the group longest value. - new_name: name of new column. - Longest (column:Text|Integer|Column|Any=0) (new_name:Text|Nothing=Nothing) # Column needed because of 6866 + Longest (column:Text|Integer|Column|Any=0) (new_name:Text="") # Column needed because of 6866 diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 0173be632438..d6a078d2dbba 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -56,6 +56,7 @@ polyglot java import org.enso.table.data.table.join.Between as Java_Join_Between polyglot java import org.enso.table.data.table.join.Equals as Java_Join_Equals polyglot java import org.enso.table.data.table.join.EqualsIgnoreCase as Java_Join_Equals_Ignore_Case polyglot java import org.enso.table.data.table.Table as Java_Table +polyglot java import org.enso.table.error.TooManyColumnsException polyglot java import org.enso.table.operations.OrderBuilder ## Represents a column-oriented table data structure. @@ -1849,7 +1850,7 @@ type Table @name_column Widget_Helpers.make_column_name_selector @values (Widget_Helpers.make_aggregate_column_selector include_group_by=False) cross_tab : Vector (Integer | Text | Column_Selector | Aggregate_Column) | Text | Integer -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings - cross_tab self group_by=[] name_column=self.column_names.first values=Aggregate_Column.Count (on_problems=Report_Warning) = + cross_tab self group_by name_column values=Aggregate_Column.Count (on_problems=Report_Warning) = columns_helper = self.columns_helper problem_builder = Problem_Builder.new error_on_missing_columns=True @@ -1890,7 +1891,7 @@ type Table c -> Aggregate_Column_Helper.default_aggregate_column_name c include_column_name data_columns = validated_values.map c-> - col_name = c.new_name.if_nothing <| + col_name = if c.new_name != "" then c.new_name else Aggregate_Column_Helper.default_aggregate_column_name c Aggregate_Column_Helper.java_aggregator col_name c @@ -1900,8 +1901,13 @@ type Table index.makeTable (group_by + data_columns) False -> aggregate_names = validated_values.map c-> - c.new_name.if_nothing (name_mapper c) - index.makeCrossTabTable java_key_columns matched_name.first.java_column data_columns aggregate_names + if c.new_name != "" then c.new_name else (name_mapper c) + + too_many_columns caught_panic = + inner_panic = caught_panic.payload + Error.throw (Column_Count_Exceeded.Error inner_panic.getMaximumColumnCount inner_panic.getColumnCount) + Panic.catch TooManyColumnsException handler=too_many_columns <| + index.makeCrossTabTable java_key_columns matched_name.first.java_column data_columns aggregate_names on_problems.attach_problems_after (Table.Value result) <| problems = result.getProblems @@ -2025,8 +2031,7 @@ type Table been replaced with the provided default(s). Arguments: - - selectors: Single instance or a Vector of names, indexes or - `Column_Selector`s. + - columns: The column(s) to fill Nothing values. - default: The value to replace missing values with. If this argument is a column, the value from `default` at the corresponding position will be used. @@ -2035,10 +2040,11 @@ type Table Fill missing values in two columns with the value 20.5. fill_nothing = table.fill_nothing ["col0", "col1"] 20.5 - fill_nothing : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Column | Any -> Table - fill_nothing self selectors default = + @columns Widget_Helpers.make_column_name_vector_selector + fill_nothing : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table + fill_nothing self columns default = transformer col = col.fill_nothing default - Table_Helpers.replace_columns_with_transformed_columns self selectors transformer + Table_Helpers.replace_columns_with_transformed_columns self columns transformer ## ALIAS Fill Empty, if_empty @@ -2046,8 +2052,7 @@ type Table provided default(s). Arguments: - - selectors: Single instance or a Vector of names, indexes or - `Column_Selector`s. + - columns: The column(s) to fill empty values. - default: The value to replace empty values with. If this argument is a column, the value from `default` at the corresponding position will be used. @@ -2056,10 +2061,11 @@ type Table Fill empty values in two columns with the value "hello". fill_empty = table.fill_empty ["col0", "col1"] "hello" - fill_empty : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Column | Any -> Table - fill_empty self selectors default = + @columns Widget_Helpers.make_column_name_vector_selector + fill_empty : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table + fill_empty self columns default = transformer col = col.fill_empty default - Table_Helpers.replace_columns_with_transformed_columns self selectors transformer + Table_Helpers.replace_columns_with_transformed_columns self columns transformer ## Replaces the first, or all occurrences of `term` with `new_text` in each row of the specified column. If `term` is empty, the function returns the @@ -2069,8 +2075,7 @@ type Table `Text.replace` method. Arguments: - - selectors: Single instance or a Vector of names, indexes or - `Column_Selector`s. + - columns: The column(s) to replace values on. - term: The term to find. Can be `Text`, `Regex`, or a `Column` of strings. - replacement: The text to replace matches with. @@ -2092,10 +2097,11 @@ type Table Replace texts in quotes with parentheses. column.replace '"(.*?)"'.to_regex '($1)' - replace : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column - replace self selectors term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False = + @columns Widget_Helpers.make_column_name_vector_selector + replace : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column + replace self columns term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False = transformer col = col.replace term new_text case_sensitivity only_first - Table_Helpers.replace_columns_with_transformed_columns self selectors transformer + Table_Helpers.replace_columns_with_transformed_columns self columns transformer ## PRIVATE diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso index c24a07477ebb..bb9a2e5b29f5 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso @@ -596,4 +596,4 @@ type Column_Count_Exceeded Create a human-readable version of the error. to_display_text : Text to_display_text self = - "The operation produced more columns than the specified limit. The limit is "+self.limit.to_text+" and the number of new columns was "+self.column_count.to_text+". The limit may be turned off by setting the `limit` option to `Nothing`." + "The operation produced more columns than the specified limit. The limit is "+self.limit.to_text+" and the number of new columns was "+self.column_count.to_text+"." diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso index 04843a9afc84..e0b5f8794137 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso @@ -70,10 +70,10 @@ prepare_aggregate_columns aggregates table error_on_missing_columns = The second pass resolves the default names, ensuring that they do not clash with the user-specified names (ensuring that user-specified names take precedence). - pass_1 = valid_resolved_aggregate_columns.map c->(if c.new_name.is_nothing then Nothing else unique.make_unique c.new_name) + pass_1 = valid_resolved_aggregate_columns.map c-> if c.new_name == "" then "" else unique.make_unique c.new_name renamed_columns = pass_1.map_with_index i->name-> agg = valid_resolved_aggregate_columns.at i - new_name = name.if_nothing (unique.make_unique (default_aggregate_column_name agg)) + new_name = if name != "" then name else unique.make_unique (default_aggregate_column_name agg) Pair.new new_name agg # Build Problems Output diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso index 0340ffbb7bc9..4819407bd219 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso @@ -20,14 +20,18 @@ make_aggregate_column_selector table display=Display.Always include_group_by=Tru col_names_selector = make_column_name_selector table display=Display.Always column_widget = ["column", col_names_selector] - col_list_selector = make_column_name_vector_selector table display=Display.Always - fqn = Meta.get_qualified_type_name Aggregate_Column group_by = if include_group_by then [Option "Group By" fqn+".Group_By" [column_widget]] else [] count = Option "Count" fqn+".Count" - count_distinct = Option "Count Distinct" fqn+".Count_Distinct" [["columns", col_list_selector]] - first = Option "First" fqn+".First" [column_widget, ["order_by" , col_list_selector]] - last = Option "Last" fqn+".Last" [column_widget, ["order_by" , col_list_selector]] + + ## Currently can't support nested vector editors so using single picker + ## col_list_selector = make_column_name_vector_selector table display=Display.Always + count_distinct = Option "Count Distinct" fqn+".Count_Distinct" [["columns", col_names_selector]] + + ## Currently can't support nested vector editors so using single order by picker + order_by_selector = make_order_by_selector table . item_editor + first = Option "First" fqn+".First" [column_widget, ["order_by" , order_by_selector]] + last = Option "Last" fqn+".Last" [column_widget, ["order_by" , order_by_selector]] count_not_nothing = Option "Count Not Nothing" fqn+".Count_Not_Nothing" [column_widget] count_nothing = Option "Count Nothing" fqn+".Count_Nothing" [column_widget] diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java index e95c5680961c..ab6683de7f42 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java +++ b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java @@ -11,12 +11,15 @@ import org.enso.table.data.table.Column; import org.enso.table.data.table.Table; import org.enso.table.data.table.problems.FloatingPointGrouping; +import org.enso.table.error.TooManyColumnsException; import org.enso.table.problems.AggregatedProblems; import org.enso.table.util.ConstantList; import org.enso.table.util.NameDeduplicator; import org.graalvm.polyglot.Context; public class MultiValueIndex { + private static final int MAXIMUM_CROSS_TAB_COLUMN_COUNT = 10000; + private final int keyColumnsLength; private final Map> locs; private final AggregatedProblems problems; @@ -140,6 +143,16 @@ public Table makeCrossTabTable( nameColumn.getSize(), TextFoldingStrategy.unicodeNormalizedFold); final int columnCount = groupingColumns.length + nameIndex.locs.size() * aggregates.length; + if (columnCount > MAXIMUM_CROSS_TAB_COLUMN_COUNT) { + throw new TooManyColumnsException( + "The cross_tab contained too many columns. Maximum allowed is " + + MAXIMUM_CROSS_TAB_COLUMN_COUNT + + " but was " + + columnCount + + ".", + columnCount, + MAXIMUM_CROSS_TAB_COLUMN_COUNT); + } // Create the storage Builder[] storage = new Builder[columnCount]; diff --git a/std-bits/table/src/main/java/org/enso/table/error/TooManyColumnsException.java b/std-bits/table/src/main/java/org/enso/table/error/TooManyColumnsException.java new file mode 100644 index 000000000000..f59e76d47636 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/error/TooManyColumnsException.java @@ -0,0 +1,20 @@ +package org.enso.table.error; + +public class TooManyColumnsException extends RuntimeException { + private final int columnCount; + private final int maximumColumnCount; + + public TooManyColumnsException(String message, int columnCount, int maximumColumnCount) { + super(message); + this.columnCount = columnCount; + this.maximumColumnCount = maximumColumnCount; + } + + public int getColumnCount() { + return columnCount; + } + + public int getMaximumColumnCount() { + return maximumColumnCount; + } +} diff --git a/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso index debe577c718a..17b064559536 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso @@ -1363,12 +1363,6 @@ spec setup = err6 = table.aggregate [Group_By "Index", Sum "Value", Sum 42] on_problems=Problem_Behavior.Ignore error_on_missing_columns=True err6.catch . should_equal (Missing_Input_Columns.Error [42]) - Test.specify "should raise a warning when an invalid output name" <| - action = table.aggregate [Group_By "Index" ""] on_problems=_ - problems = [Invalid_Output_Column_Names.Error [""]] - tester = expect_column_names ["Column 1"] - Problems.test_problem_handling action problems tester - Test.specify "should raise a warning when a duplicate column name" <| action = table.aggregate [Group_By "Index", Group_By 0] on_problems=_ problems = [Duplicate_Output_Column_Names.Error ["Index"]] diff --git a/test/Table_Tests/src/Common_Table_Operations/Cross_Tab_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Cross_Tab_Spec.enso index 7e835290d271..c5c10c019bdd 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Cross_Tab_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Cross_Tab_Spec.enso @@ -22,7 +22,7 @@ spec setup = table = table_builder [["Key", ["x", "x", "x", "x", "y", "y", "y", "z", "z"]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]] table2 = table_builder [["Group", ["A","B","A","B","A","B","A","B","A"]], ["Key", ["x", "x", "x", "x", "y", "y", "y", "z", "z"]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]] Test.specify "should cross_tab counts by default using first column as names" <| - t1 = table.cross_tab + t1 = table.cross_tab [] "Key" t1.column_names . should_equal ["x", "y", "z"] t1.row_count . should_equal 1 t1.at "x" . to_vector . should_equal [4] @@ -30,7 +30,7 @@ spec setup = t1.at "z" . to_vector . should_equal [2] Test.specify "should allow a different aggregate" <| - t1 = table.cross_tab values=[Sum "Value"] + t1 = table.cross_tab [] "Key" values=[Sum "Value"] t1.column_names . should_equal ["x", "y", "z"] t1.row_count . should_equal 1 t1.at "x" . to_vector . should_equal [10] @@ -38,7 +38,7 @@ spec setup = t1.at "z" . to_vector . should_equal [17] Test.specify "should allow a custom expression for the aggregate" <| - t1 = table.cross_tab values=[Sum "[Value]*[Value]"] + t1 = table.cross_tab [] "Key" values=[Sum "[Value]*[Value]"] t1.column_names . should_equal ["x", "y", "z"] t1.row_count . should_equal 1 t1.at "x" . to_vector . should_equal [30] @@ -94,7 +94,7 @@ spec setup = t2.column_names . should_equal ["Group", "x", "y", "z"] Test.specify "should allow multiple values aggregates" <| - t1 = table.cross_tab values=[Count, Sum "Value"] + t1 = table.cross_tab [] "Key" values=[Count, Sum "Value"] t1.column_names . should_equal ["x Count", "x Sum", "y Count", "y Sum", "z Count", "z Sum"] t1.row_count . should_equal 1 t1.at "x Count" . to_vector . should_equal [4] @@ -123,21 +123,21 @@ spec setup = err2.catch.criteria . should_equal [42] Test.specify "should fail if aggregate values contain missing columns" <| - err1 = table.cross_tab values=[Count, Sum "Nonexistent Value", Sum "Value", Sum "OTHER"] + err1 = table.cross_tab [] "Key" values=[Count, Sum "Nonexistent Value", Sum "Value", Sum "OTHER"] err1.should_fail_with Invalid_Aggregate_Column err1.catch.name . should_equal "Nonexistent Value" - err2 = table.cross_tab values=[Count, Sum "Nonexistent Value", Sum "Value", Sum 42] + err2 = table.cross_tab [] "Key" values=[Count, Sum "Nonexistent Value", Sum "Value", Sum 42] err2.should_fail_with Missing_Input_Columns err2.catch.criteria . should_equal [42] Test.specify "should fail if aggregate values contain invalid expressions" <| - err1 = table.cross_tab values=[Sum "[MISSING]*10"] + err1 = table.cross_tab [] "Key" values=[Sum "[MISSING]*10"] err1.should_fail_with Invalid_Aggregate_Column err1.catch.name . should_equal "[MISSING]*10" err1.catch.expression_error . should_equal (No_Such_Column.Error "MISSING") - err2 = table.cross_tab values=[Sum "[[["] + err2 = table.cross_tab [] "Key" values=[Sum "[[["] err2.should_fail_with Invalid_Aggregate_Column err2.catch.name . should_equal "[[[" err2.catch.expression_error . should_be_a Expression_Error.Syntax_Error @@ -162,7 +162,7 @@ spec setup = Test.specify "should allow non-Text columns to be used as name" <| table = table_builder [["Key", [1, 1, 1, 2, 2, 1, 3, 3, 1]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]] - t1 = table.cross_tab + t1 = table.cross_tab [] "Key" t1.column_names . should_equal ["1", "2", "3"] t1.row_count . should_equal 1 t1.at "1" . to_vector . should_equal [5] @@ -171,7 +171,7 @@ spec setup = Test.specify "should correctly handle uncommon characters in fields becoming column names" <| table = table_builder [["Key", ["πŸ’‘πŸŽ‰πŸŒ»", "Δ…Δ™ΕΊ", "Δ…Δ™ΕΊ", '\n\n', "😊", "😊", "🌻", "😊", "🌻", " "]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]] - t1 = table.cross_tab . sort_columns + t1 = table.cross_tab [] "Key" . sort_columns t1.column_names . should_equal ['\n\n', ' ', 'Δ…Δ™ΕΊ', '🌻', 'πŸ’‘πŸŽ‰πŸŒ»', '😊'] t1.row_count . should_equal 1 t1.at "πŸ’‘πŸŽ‰πŸŒ»" . to_vector . should_equal [1] @@ -183,7 +183,7 @@ spec setup = Test.specify "should report Floating_Point_Equality if the group or name column is floating point" <| t = table_builder [["X", [1.5, 2.5, 2.5, 1.5]], ["Y", [1, 2, 3, 4]], ["Z", ["a", "b", "b", "b"]]] - t1 = t.cross_tab + t1 = t.cross_tab [] "X" Problems.expect_warning Floating_Point_Equality t1 t1.column_names . should_equal ["1.5", "2.5"] t1.row_count . should_equal 1 @@ -226,7 +226,7 @@ spec setup = Test.specify "should fail gracefully if an effective column name would contain invalid characters" <| table = table_builder [["Key", ['x', 'x', 'y\0', '\0', 'y\0', 'z', 'z', 'z', 'z']], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]] - r1 = table.cross_tab + r1 = table.cross_tab [] "Key" r1.should_fail_with Illegal_Argument r1.catch.to_display_text . should_contain "must not contain the NUL character" @@ -237,11 +237,17 @@ spec setup = Test.specify "should fail gracefully if an effective column name would be empty or null" <| table = table_builder [["Key", [" ", "x", "x", "x", "", "", "", "y", "y"]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]] - r1 = table.cross_tab + r1 = table.cross_tab [] "Key" r1.should_fail_with Illegal_Argument r1.catch.to_display_text . should_contain "cannot be empty" table2 = table_builder [["Key", [" ", "x", "x", "x", Nothing, Nothing, Nothing, "y", "y"]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]] - r2 = table2.cross_tab + r2 = table2.cross_tab [] "Key" r2 . should_fail_with Illegal_Argument r2.catch.to_display_text . should_contain "cannot be Nothing" + + Test.specify "should fail gracefully if producing too many columns in a table" <| + table = table_builder [["Key", 0.up_to 25000 . to_vector]] + r1 = table.cross_tab [] "Key" + r1 . should_fail_with Column_Count_Exceeded + r1.catch.column_count . should_equal 25000 diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 5ea85ef3ac9d..aa21f3ca01f5 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -125,7 +125,7 @@ spec = code . should_equal ['SELECT "T1"."A grp" AS "A grp", "T1"."counter" AS "counter" FROM (SELECT "T1"."A" AS "A grp", COUNT(*) AS "counter" FROM "T1" AS "T1" GROUP BY "T1"."A") AS "T1"', []] Test.specify "should allow to group by multiple fields" <| - code = t1.aggregate [Sum "A" "sum_a", Group_By "C" Nothing, Group_By "B" "B grp"] . to_sql . prepare + code = t1.aggregate [Sum "A" "sum_a", Group_By "C", Group_By "B" "B grp"] . to_sql . prepare code . should_equal ['SELECT "T1"."sum_a" AS "sum_a", "T1"."C" AS "C", "T1"."B grp" AS "B grp" FROM (SELECT SUM("T1"."A") AS "sum_a", "T1"."C" AS "C", "T1"."B" AS "B grp" FROM "T1" AS "T1" GROUP BY "T1"."C", "T1"."B") AS "T1"', []] main = Test_Suite.run_main spec diff --git a/test/Table_Tests/src/Database/Common/Common_Spec.enso b/test/Table_Tests/src/Database/Common/Common_Spec.enso index 99b783e53f69..608999c1f83c 100644 --- a/test/Table_Tests/src/Database/Common/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common/Common_Spec.enso @@ -260,7 +260,7 @@ run_tests prefix connection upload = aggregates = [Sum "price" "sum price", Sum "quantity" "sum quantity", Average "price" "avg price"] ## TODO can check the datatypes - t1 = determinize_by "name" (t.aggregate ([Group_By "name" Nothing] + aggregates) . read) + t1 = determinize_by "name" (t.aggregate ([Group_By "name"] + aggregates) . read) t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"] t1.at "sum price" . to_vector . should_equal [100.5, 6.7, 1, Nothing, 2] t1.at "sum quantity" . to_vector . should_equal [80, 40, 120, 70, 2] @@ -270,6 +270,7 @@ run_tests prefix connection upload = t2.at "sum price" . to_vector . should_equal [110.2] t2.at "sum quantity" . to_vector . should_equal [312] t2.at "avg price" . to_vector . should_equal [(110.2 / 11)] + Test.group prefix+"Table.filter" <| Test.specify "report error when trying to filter by a custom predicate" <| t1.filter "a" (x -> x % 2 == 0) . should_fail_with Unsupported_Database_Operation diff --git a/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso b/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso index 6790797cc15d..2f736d16d5c2 100644 --- a/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso @@ -19,10 +19,8 @@ spec = Test.group "Aggregate Columns" <| test_aggregator table col expected_name expected_result epsilon=False = problem_builder = Problem_Builder.new resolved = Aggregate_Column_Helper.resolve_aggregate table problem_builder col - name = case resolved.new_name of - Nothing -> - Aggregate_Column_Helper.default_aggregate_column_name resolved - overridden -> overridden + name = if resolved.new_name != "" then resolved.new_name else + Aggregate_Column_Helper.default_aggregate_column_name resolved name . should_equal expected_name result = @@ -125,7 +123,7 @@ spec = Test.group "Aggregate Columns" <| test_aggregator empty_table (Last 0 test_name) test_name Nothing Test.specify "should be able to concatenate a set of values excluding missing" <| - test_aggregator simple_table (Concatenate -1 Nothing ',' '[' ']' '"') "Concatenate text" '[A,"",,"B,C",]' + test_aggregator simple_table (Concatenate -1 "" ',' '[' ']' '"') "Concatenate text" '[A,"",,"B,C",]' test_aggregator simple_table (Concatenate -1 test_name) test_name 'AB,C' test_aggregator simple_table (Concatenate "text" test_name ',') test_name 'A,,,B,C,' # TODO [RW] Re-enable this once #6281 is implemented.