From 434a021b16ccc183559a4167bc6d54673bb8cd49 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 1 Mar 2022 15:03:37 +0000 Subject: [PATCH] Count Distinct Support for Booleans and Nothing in the keys --- .../0.0.0-dev/src/Data/Aggregate_Column.enso | 13 ++++-- .../Table/0.0.0-dev/src/Data/Group_By.enso | 46 +++++++++++++++++-- .../Table/0.0.0-dev/src/Data/Table.enso | 4 +- .../src/Aggregate_Column_Spec.enso | 21 +++++++-- 4 files changed, 70 insertions(+), 14 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso index 3727af0404236..6303c9f563cf3 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso @@ -1,7 +1,7 @@ from Standard.Base import all from Standard.Table.Data.Column as Column_Module import Column -from Standard.Table.Data.Group_By import Group_By_Key +import Standard.Table.Data.Group_By ## Defines an Aggregate Column type Aggregate_Column @@ -86,7 +86,10 @@ type Aggregate_Column get_name c = (this.resolve_column table c).java_column.getName case this of Count _ -> "Count" - Count_Distinct c _ -> "Count Distinct " + (get_name c) + Count_Distinct c _ -> + case c of + Vector.Vector _ -> "Count Distinct " + ((c.map get_name).join " ") + _ -> "Count Distinct " + (get_name c) Count_Not_Nothing c _ -> "Count Not Nothing " + (get_name c) Count_Nothing c _ -> "Count Nothing " + (get_name c) Count_Not_Empty c _ -> "Count Not Empty " + (get_name c) @@ -169,8 +172,10 @@ type Aggregate_Column val = col.at i if val.is_nothing then map else (map.insert val (1 + (map.get_or_else val 0))) Count_Distinct columns _ -> - resolved = columns.map c->(this.resolve_column table c) - key_maker i = Group_By_Key (resolved.map c->(c.at i)) + resolved = case columns of + Vector.Vector _ -> columns.map c->(this.resolve_column table c) + _ -> [this.resolve_column table columns] + key_maker i = Group_By.key (resolved.map c->(c.at i)) map->i->(map.insert (key_maker i) 1) evaluate : Any->Any diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Group_By.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Group_By.enso index b64e2fa40d8ea..fb5fc384a6d4b 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Group_By.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Group_By.enso @@ -1,14 +1,54 @@ from Standard.Base import all import Standard.Base.Data.Ordering.Vector_Lexicographic_Order -## Represents a comparable vector of element which is used as key for grouping. +## Create a key structure for grouping operations +key : Vector -> Group_By_Key +key values = + mapper c = case c of + Boolean -> Comparable_Boolean c + Nothing -> Comparable_Nothing + _ -> c + + Group_By_Key <| values.map mapper + +## PRIVATE + Represents a comparable vector of element which is used as key for grouping. type Group_By_Key type Group_By_Key values ## See if two keys are equal == : Group_By_Key->Boolean - == that = this.values == that.values + == that = (this.compare_to that) == Ordering.Equal ## Compares two keys compare_to : Group_By_Key->Ordering - compare_to that = Vector_Lexicographic_Order.compare this.values that.values + compare_to that = + Vector_Lexicographic_Order.compare this.values that.values + +## PRIVATE + Temporary workaround until Boolean compare_to completed +type Comparable_Boolean + type Comparable_Boolean value + + == : Comparable_Boolean->Boolean + == that = (this.compare_to that) == Ordering.Equal + + compare_to : Any->Ordering + compare_to that = + if this.value == that.value then Ordering.Equal else + if this.value then Ordering.Greater else Ordering.Less + +## PRIVATE + Temporary workaround allowing Nothing to be in a Group_By +type Comparable_Nothing + type Comparable_Nothing + + == : Comparable_Nothing->Boolean + == that = (this.compare_to that) == Ordering.Equal + + compare_to : Any->Ordering + compare_to that = + case that of + Comparable_Nothing -> Ordering.Equal + Nothing -> Ordering.Equal + _ -> Ordering.Less diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 0e9925584d541..3c8b51cc2d5bd 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -17,7 +17,7 @@ import Standard.Table.Data.Column_Mapping import Standard.Table.Data.Position import Standard.Base.Error.Warnings -from Standard.Table.Data.Group_By import Group_By_Key +import Standard.Table.Data.Group_By import Standard.Table.Data.Aggregate_Column polyglot java import org.enso.table.data.table.Table as Java_Table @@ -518,7 +518,7 @@ type Table key_columns = if selector.is_nothing then [] else Table_Helpers.select_columns internal_columns=this.columns selector=selector reorder=True on_problems=on_problems warnings=warnings key_length = key_columns.length - make_key = if (key_length == 0) then _->(Group_By_Key [1]) else i->(Group_By_Key (key_columns.map v->(v.at i))) + make_key = if (key_length == 0) then _->(Group_By.key [1]) else i->(Group_By.key (key_columns.map v->(v.at i))) # New Table Accumulator new_table = (key_columns.map c->c.name)+(columns.map c->(c.column_name this)) . map n->[n, Vector.new_builder] diff --git a/test/Table_Tests/src/Aggregate_Column_Spec.enso b/test/Table_Tests/src/Aggregate_Column_Spec.enso index 1b351fb5fcaa3..920fe6f5c2fcf 100644 --- a/test/Table_Tests/src/Aggregate_Column_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Column_Spec.enso @@ -10,6 +10,7 @@ spec = Test.group "Aggregate Columns" <| text_col = simple_table.at "text" bool_col = simple_table.at "is_valid" float_col = simple_table.at "float" + int_col = simple_table.at "count" empty_table = Table.new [["count", []], ["is_valid", []], ["text", []]] test_name = "Test Column" @@ -92,39 +93,49 @@ spec = Test.group "Aggregate Columns" <| test_aggregator simple_table (Median float_col test_name) test_name 2.75 0.000001 test_aggregator empty_table (Median 0 test_name) test_name Nothing - Test.specify "should be able to compute first of a set of values including missing values" <| + Test.specify "should be able to compute first of a set of values including missing" <| test_aggregator simple_table (First 1 ignore_nothing=False) "First is_valid" Nothing test_aggregator simple_table (First 1 test_name ignore_nothing=False) test_name Nothing test_aggregator simple_table (First "is_valid" test_name ignore_nothing=False) test_name Nothing test_aggregator simple_table (First bool_col test_name ignore_nothing=False) test_name Nothing test_aggregator empty_table (First 0 test_name ignore_nothing=False) test_name Nothing - Test.specify "should be able to compute first of a set of values excluding missing values" <| + Test.specify "should be able to compute first of a set of values excluding missing" <| test_aggregator simple_table (First 1) "First is_valid" False test_aggregator simple_table (First 1 test_name) test_name False test_aggregator simple_table (First "is_valid" test_name) test_name False test_aggregator simple_table (First bool_col test_name) test_name False test_aggregator empty_table (First 0 test_name) test_name Nothing - Test.specify "should be able to compute last of a set of values including missing values" <| + Test.specify "should be able to compute last of a set of values including missing" <| test_aggregator simple_table (Last 1 ignore_nothing=False) "Last is_valid" Nothing test_aggregator simple_table (Last 1 test_name ignore_nothing=False) test_name Nothing test_aggregator simple_table (Last "is_valid" test_name ignore_nothing=False) test_name Nothing test_aggregator simple_table (Last bool_col test_name ignore_nothing=False) test_name Nothing test_aggregator empty_table (Last 0 test_name ignore_nothing=False) test_name Nothing - Test.specify "should be able to compute last of a set of values excluding missing values" <| + Test.specify "should be able to compute last of a set of values excluding missing" <| test_aggregator simple_table (Last 1) "Last is_valid" False test_aggregator simple_table (Last 1 test_name) test_name False test_aggregator simple_table (Last "is_valid" test_name) test_name False test_aggregator simple_table (Last bool_col test_name) test_name False test_aggregator empty_table (Last 0 test_name) test_name Nothing - Test.specify "should be able to concatenate a set of values excluding missing values" <| + Test.specify "should be able to concatenate a set of values excluding missing" <| test_aggregator simple_table (Concatenate -1 Nothing ',' '[' ']' '"') "Concatenate text" '[A,"",,"B,C",]' test_aggregator simple_table (Concatenate -1 test_name) test_name 'AB,C' test_aggregator simple_table (Concatenate "text" test_name ',') test_name 'A,,,B,C,' test_aggregator simple_table (Concatenate text_col test_name) test_name 'AB,C' test_aggregator empty_table (Concatenate 0 test_name) test_name Nothing + Test.specify "should be able to count distinct items on a single set of values" <| + test_aggregator simple_table (Count_Distinct 0) "Count Distinct count" 4 + test_aggregator simple_table (Count_Distinct 0 test_name) test_name 4 + test_aggregator simple_table (Count_Distinct "count" test_name) test_name 4 + test_aggregator simple_table (Count_Distinct int_col test_name) test_name 4 + test_aggregator empty_table (Count_Distinct 0 test_name) test_name 0 + + Test.specify "should be able to count distinct items on a multiple sets of values" <| + test_aggregator simple_table (Count_Distinct [0, 1]) "Count Distinct count is_valid" 5 + main = Test.Suite.run_main here.spec