Skip to content

Commit

Permalink
Everything except Aggregate_Spec updated.
Browse files Browse the repository at this point in the history
  • Loading branch information
jdunkerley committed Feb 12, 2024
1 parent 2c2e774 commit b8a531a
Show file tree
Hide file tree
Showing 14 changed files with 75 additions and 81 deletions.
45 changes: 22 additions & 23 deletions test/Benchmarks/src/Table/Aggregate.enso
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from Standard.Base import all hiding First, Last

from Standard.Table import Table
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
from Standard.Table import Table, Aggregate_Column

from Standard.Test import Bench, Faker

Expand Down Expand Up @@ -34,67 +33,67 @@ collect_benches = Bench.build builder->

builder.group "Table_Aggregate" options group_builder->
group_builder.specify "Count_table" <|
data.table.aggregate [Count]
data.table.aggregate [Aggregate_Column.Count]

group_builder.specify "Max_table" <|
data.table.aggregate [Maximum "ValueWithNothing"]
data.table.aggregate [Aggregate_Column.Maximum "ValueWithNothing"]

group_builder.specify "Sum_table" <|
data.table.aggregate [Sum "ValueWithNothing"]
data.table.aggregate [Aggregate_Column.Sum "ValueWithNothing"]

group_builder.specify "Count_Distinct_table" <|
data.table.aggregate [Count_Distinct "Index"]
data.table.aggregate [Aggregate_Column.Count_Distinct "Index"]

group_builder.specify "StDev_table" <|
data.table.aggregate [Standard_Deviation "Value"]
data.table.aggregate [Aggregate_Column.Standard_Deviation "Value"]

group_builder.specify "Median_table" <|
data.table.aggregate [Median "Value"]
data.table.aggregate [Aggregate_Column.Median "Value"]

group_builder.specify "Mode_table" <|
data.table.aggregate [Mode "Index"]
data.table.aggregate [Aggregate_Column.Mode "Index"]

group_builder.specify "Count_grouped" <|
data.table.aggregate ["Index"] [Count]
data.table.aggregate ["Index"] [Aggregate_Column.Count]

group_builder.specify "Max_grouped" <|
data.table.aggregate ["Index"] [Maximum "ValueWithNothing"]
data.table.aggregate ["Index"] [Aggregate_Column.Maximum "ValueWithNothing"]

group_builder.specify "Sum_grouped" <|
data.table.aggregate ["Index"] [Sum "ValueWithNothing"]
data.table.aggregate ["Index"] [Aggregate_Column.Sum "ValueWithNothing"]

group_builder.specify "Count_Distinct_grouped" <|
data.table.aggregate ["Index"] [Count_Distinct "Code"]
data.table.aggregate ["Index"] [Aggregate_Column.Count_Distinct "Code"]

group_builder.specify "StDev_grouped" <|
data.table.aggregate ["Index"] [Standard_Deviation "Value"]
data.table.aggregate ["Index"] [Aggregate_Column.Standard_Deviation "Value"]

group_builder.specify "Median_grouped" <|
data.table.aggregate ["Index"] [Median "Value"]
data.table.aggregate ["Index"] [Aggregate_Column.Median "Value"]

group_builder.specify "Mode_grouped" <|
data.table.aggregate ["Index"] [Mode "Index"]
data.table.aggregate ["Index"] [Aggregate_Column.Mode "Index"]

group_builder.specify "Count_2_level_groups" <|
data.table.aggregate ["Index", "Flag"] [Count]
data.table.aggregate ["Index", "Flag"] [Aggregate_Column.Count]

group_builder.specify "Max_2_level_groups" <|
data.table.aggregate ["Index", "Flag"] [Maximum "ValueWithNothing"]
data.table.aggregate ["Index", "Flag"] [Aggregate_Column.Maximum "ValueWithNothing"]

group_builder.specify "Sum_2_level_groups" <|
data.table.aggregate ["Index", "Flag"] [Sum "ValueWithNothing"]
data.table.aggregate ["Index", "Flag"] [Aggregate_Column.Sum "ValueWithNothing"]

group_builder.specify "Count_Distinct_2_level_groups" <|
data.table.aggregate ["Index", "Flag"] [Count_Distinct "Code"]
data.table.aggregate ["Index", "Flag"] [Aggregate_Column.Count_Distinct "Code"]

group_builder.specify "StDev_2_level_groups" <|
data.table.aggregate ["Index", "Flag"] [Standard_Deviation "Value"]
data.table.aggregate ["Index", "Flag"] [Aggregate_Column.Standard_Deviation "Value"]

group_builder.specify "Median_2_level_groups" <|
data.table.aggregate ["Index", "Flag"] [Median "Value"]
data.table.aggregate ["Index", "Flag"] [Aggregate_Column.Median "Value"]

group_builder.specify "Mode_2_level_groups" <|
data.table.aggregate ["Index", "Flag"] [Mode "Index"]
data.table.aggregate ["Index", "Flag"] [Aggregate_Column.Mode "Index"]


main = collect_benches . run_main
7 changes: 3 additions & 4 deletions test/Benchmarks/src/Table/Cross_Tab.enso
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from Standard.Base import all

from Standard.Table import Table
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Count, Sum
from Standard.Table import Table, Aggregate_Column

from Standard.Test import Bench, Faker

Expand Down Expand Up @@ -42,7 +41,7 @@ collect_benches = Bench.build builder->
data = Data.create num_rows

builder.group ("CrossTab_" + num_rows.to_text) options group_builder->
specify group_by name_column values=[Count] =
specify group_by name_column values=[Aggregate_Column.Count] =
name = (group_by.join '_') + "_" + name_column + "_" + (values.map .to_text . join "_")
group_builder.specify (normalize_name name) <|
data.table.cross_tab group_by name_column values
Expand All @@ -53,7 +52,7 @@ collect_benches = Bench.build builder->
specify ["type"] "size"
specify ["store"] "size"
specify ["size"] "store"
specify ["size"] "store" values=[Count, Sum "price"]
specify ["size"] "store" values=[Aggregate_Column.Count, Aggregate_Column.Sum "price"]


normalize_name : Text -> Text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ add_specs suite_builder setup =

group_builder.specify "case insensitive name collisions - aggregate" <|
t1 = table_builder [["X", [2, 1, 3, 2]]]
t2 = t1.aggregate [Aggregate_Column.Maximum "X" "A", Aggregate_Column.Minimum "X" "a"]
t2 = t1.aggregate columns=[Aggregate_Column.Maximum "X" "A", Aggregate_Column.Minimum "X" "a"]

case is_case_sensitive of
True ->
Expand Down
36 changes: 18 additions & 18 deletions test/Table_Tests/src/Common_Table_Operations/Cross_Tab_Spec.enso
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument

from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Average, Count, Group_By, Sum, Concatenate
from Standard.Table import Aggregate_Column
import Standard.Table.Data.Expression.Expression_Error
from Standard.Table.Errors import all

Expand Down Expand Up @@ -53,15 +53,15 @@ add_specs suite_builder setup =
t1.at "z" . to_vector . should_equal [2]

group_builder.specify "should allow a different aggregate" <|
t1 = data.table.cross_tab [] "Key" values=[Sum "Value"]
t1 = data.table.cross_tab [] "Key" values=[Aggregate_Column.Sum "Value"]
t1.column_names . should_equal ["x", "y", "z"]
t1.row_count . should_equal 1
t1.at "x" . to_vector . should_equal [10]
t1.at "y" . to_vector . should_equal [18]
t1.at "z" . to_vector . should_equal [17]

group_builder.specify "should allow a custom expression for the aggregate" <|
t1 = data.table.cross_tab [] "Key" values=[Sum "[Value]*[Value]"]
t1 = data.table.cross_tab [] "Key" values=[Aggregate_Column.Sum "[Value]*[Value]"]
t1.column_names . should_equal ["x", "y", "z"]
t1.row_count . should_equal 1
t1.at "x" . to_vector . should_equal [30]
Expand Down Expand Up @@ -94,19 +94,19 @@ add_specs suite_builder setup =
t1.at "z" . to_vector . should_equal [1, 1]

group_builder.specify "should allow a grouping by Aggregate_Column" <|
t1 = data.table2.cross_tab [Group_By "Group"] "Key"
t1 = data.table2.cross_tab [Aggregate_Column.Group_By "Group"] "Key"
t1.column_names . should_equal ["Group", "x", "y", "z"]
t1.row_count . should_equal 2
t1.at "Group" . to_vector . should_equal ["A", "B"]
t1.at "x" . to_vector . should_equal [2, 2]
t1.at "y" . to_vector . should_equal [2, 1]
t1.at "z" . to_vector . should_equal [1, 1]

data.table2.cross_tab [Sum "Group"] "Key" . should_fail_with Illegal_Argument
data.table2.cross_tab [Aggregate_Column.Sum "Group"] "Key" . should_fail_with Illegal_Argument

group_builder.specify "should allow a grouping by Aggregate_Colum, with some empty bins" <|
table3 = table_builder [["Group", ["B","A","B","A","A"]], ["Key", ["x", "y", "y", "y", "z"]], ["Value", [4, 5, 6, 7, 8]]]
t1 = table3.cross_tab [Group_By "Group"] "Key"
t1 = table3.cross_tab [Aggregate_Column.Group_By "Group"] "Key"
t1.column_names . should_equal ["Group", "x", "y", "z"]
t1.row_count . should_equal 2
t1.at "Group" . to_vector . should_equal ["A", "B"]
Expand All @@ -127,7 +127,7 @@ add_specs suite_builder setup =
t2.column_names . should_equal ["Group", "x", "y", "z"]

group_builder.specify "should allow multiple values aggregates" <|
t1 = data.table.cross_tab [] "Key" values=[Count, Sum "Value"]
t1 = data.table.cross_tab [] "Key" values=[Aggregate_Column.Count, Aggregate_Column.Sum "Value"]
t1.column_names . should_equal ["x Count", "x Sum", "y Count", "y Sum", "z Count", "z Sum"]
t1.row_count . should_equal 1
t1.at "x Count" . to_vector . should_equal [4]
Expand Down Expand Up @@ -156,31 +156,31 @@ add_specs suite_builder setup =
err2.catch.criteria . should_equal [42]

group_builder.specify "should fail if aggregate values contain missing columns" <|
err1 = data.table.cross_tab [] "Key" values=[Count, Sum "Nonexistent Value", Sum "Value", Sum "OTHER"]
err1 = data.table.cross_tab [] "Key" values=[Aggregate_Column.Count, Aggregate_Column.Sum "Nonexistent Value", Aggregate_Column.Sum "Value", Aggregate_Column.Sum "OTHER"]
err1.should_fail_with Invalid_Aggregate_Column
err1.catch.name . should_equal "Nonexistent Value"

err2 = data.table.cross_tab [] "Key" values=[Count, Sum "Nonexistent Value", Sum "Value", Sum 42]
err2 = data.table.cross_tab [] "Key" values=[Aggregate_Column.Count, Aggregate_Column.Sum "Nonexistent Value", Aggregate_Column.Sum "Value", Aggregate_Column.Sum 42]
err2.should_fail_with Missing_Input_Columns
err2.catch.criteria . should_equal [42]

group_builder.specify "should fail if aggregate values contain invalid expressions" <|
err1 = data.table.cross_tab [] "Key" values=[Sum "[MISSING]*10"]
err1 = data.table.cross_tab [] "Key" values=[Aggregate_Column.Sum "[MISSING]*10"]
err1.should_fail_with Invalid_Aggregate_Column
err1.catch.name . should_equal "[MISSING]*10"
err1.catch.expression_error . should_equal (No_Such_Column.Error "MISSING")

err2 = data.table.cross_tab [] "Key" values=[Sum "[[["]
err2 = data.table.cross_tab [] "Key" values=[Aggregate_Column.Sum "[[["]
err2.should_fail_with Invalid_Aggregate_Column
err2.catch.name . should_equal "[[["
err2.catch.expression_error . should_be_a Expression_Error.Syntax_Error

group_builder.specify "should not allow Group_By for values" <|
err1 = data.table.cross_tab [] "Key" values=[Count, Group_By "Value"] on_problems=Problem_Behavior.Ignore
err1 = data.table.cross_tab [] "Key" values=[Aggregate_Column.Count, Aggregate_Column.Group_By "Value"] on_problems=Problem_Behavior.Ignore
err1.should_fail_with Illegal_Argument

group_builder.specify "should gracefully handle duplicate aggregate names" <|
action = data.table.cross_tab [] "Key" values=[Count new_name="Agg1", Sum "Value" new_name="Agg1"] on_problems=_
action = data.table.cross_tab [] "Key" values=[Aggregate_Column.Count new_name="Agg1", Aggregate_Column.Sum "Value" new_name="Agg1"] on_problems=_
tester table =
table.column_names . should_equal ["x Agg1", "x Agg1 1", "y Agg1", "y Agg1 1", "z Agg1", "z Agg1 1"]
problems = [Duplicate_Output_Column_Names.Error ["x Agg1", "y Agg1", "z Agg1"]]
Expand Down Expand Up @@ -235,11 +235,11 @@ add_specs suite_builder setup =
t = table_builder [["Key", ["a", "a", "b", "b"]], ["ints", [1, 2, 3, 4]], ["texts", ["a", "b", "c", "d"]]]

[Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <|
t1 = t.cross_tab [] "Key" values=[Average "texts"] on_problems=pb
t1 = t.cross_tab [] "Key" values=[Aggregate_Column.Average "texts"] on_problems=pb
t1.should_fail_with Invalid_Value_Type
t2 = t.cross_tab [] "Key" values=[Sum "texts"] on_problems=pb
t2 = t.cross_tab [] "Key" values=[Aggregate_Column.Sum "texts"] on_problems=pb
t2.should_fail_with Invalid_Value_Type
t3 = t.cross_tab [] "Key" values=[Concatenate "ints"] on_problems=pb
t3 = t.cross_tab [] "Key" values=[Aggregate_Column.Concatenate "ints"] on_problems=pb
t3.should_fail_with Invalid_Value_Type

group_builder.specify "should return predictable types" <|
Expand All @@ -250,7 +250,7 @@ add_specs suite_builder setup =
t1.at "a" . value_type . is_integer . should_be_true
t1.at "b" . value_type . is_integer . should_be_true

t2 = table.cross_tab [] "Int" values=[Average "Float", Concatenate "Text"] . sort_columns
t2 = table.cross_tab [] "Int" values=[Aggregate_Column.Average "Float", Aggregate_Column.Concatenate "Text"] . sort_columns
t2.column_names . should_equal ["1 Average Float", "1 Concatenate Text", "2 Average Float", "2 Concatenate Text"]
t2.at "1 Average Float" . value_type . is_floating_point . should_be_true
t2.at "1 Concatenate Text" . value_type . is_text . should_be_true
Expand All @@ -263,7 +263,7 @@ add_specs suite_builder setup =
r1.should_fail_with Invalid_Column_Names
r1.catch.to_display_text . should_contain "cannot contain the NUL character"

r2 = data.table2.cross_tab [] "Key" values=[Average "Value" new_name='x\0']
r2 = data.table2.cross_tab [] "Key" values=[Aggregate_Column.Average "Value" new_name='x\0']
r2.print
r2.should_fail_with Invalid_Column_Names
r2.catch.to_display_text . should_contain "cannot contain the NUL character"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ from Standard.Base import all

# We hide the table constructor as instead we are supposed to use `table_builder` which is backend-agnostic.
from Standard.Table import all hiding Table
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Group_By, Count, Sum

from Standard.Test import all

Expand Down Expand Up @@ -54,7 +53,7 @@ add_specs suite_builder setup =
t1 = table_builder [["Count", [1, 2, 3]], ["Class", ["X", "Y", "Z"]]]
t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "A", "C", "D", "D", "B", "B"]]]

t3 = t2.aggregate [Group_By "Letter", Count]
t3 = t2.aggregate ["Letter"] [Aggregate_Column.Count]
t4 = t3.join t1 on="Count" join_kind=Join_Kind.Left_Outer |> materialize |> _.order_by "Letter"
t4.columns.map .name . should_equal ["Letter", "Count", "Right Count", "Class"]
rows = t4.rows . map .to_vector
Expand All @@ -66,7 +65,7 @@ add_specs suite_builder setup =
group_builder.specify "aggregates and distinct" <|
t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "C"]], ["Points", [2, 5, 2, 1, 10, 3]]]

t3 = t2.aggregate [Group_By "Letter", Sum "Points"]
t3 = t2.aggregate ["Letter"] [Aggregate_Column.Sum "Points"]
t4 = t3.distinct "Sum Points" |> materialize |> _.order_by "Sum Points"
t4.columns.map .name . should_equal ["Letter", "Sum Points"]
t4.row_count . should_equal 2
Expand All @@ -81,7 +80,7 @@ add_specs suite_builder setup =
group_builder.specify "aggregates and filtering" <|
t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "C", "B"]], ["Points", [2, 5, 2, 1, 10, 3, 0]]]

t3 = t2.aggregate [Group_By "Letter", Sum "Points"]
t3 = t2.aggregate ["Letter"] [Aggregate_Column.Sum "Points"]
t4 = t3.filter "Sum Points" (Filter_Condition.Equal 5) |> materialize |> _.order_by "Letter"
t4.columns.map .name . should_equal ["Letter", "Sum Points"]
rows = t4.rows . map .to_vector
Expand All @@ -90,7 +89,7 @@ add_specs suite_builder setup =

group_builder.specify "aggregates and ordering" <|
t1 = table_builder [["Letter", ["C", "A", "B", "A", "A", "C", "C", "B"]], ["Points", [0, -100, 5, 2, 1, 10, 3, 0]]]
t2 = t1.aggregate [Group_By "Letter", Sum "Points"]
t2 = t1.aggregate ["Letter"] [Aggregate_Column.Sum "Points"]
t3 = t2.order_by "Sum Points" |> materialize
t3.columns.map .name . should_equal ["Letter", "Sum Points"]
t3.at "Letter" . to_vector . should_equal ["A", "B", "C"]
Expand Down Expand Up @@ -194,7 +193,7 @@ add_specs suite_builder setup =
vt1.should_be_a (Value_Type.Char ...)
vt1.variable_length.should_be_true

t4 = t3.aggregate [Aggregate_Column.Shortest "X", Aggregate_Column.Group_By "Y"]
t4 = t3.aggregate ["Y"] [Aggregate_Column.Shortest "X"]
vt2 = t4.at "Shortest X" . value_type
Test.with_clue "t4[X].value_type="+vt2.to_display_text+": " <|
vt2.should_be_a (Value_Type.Char ...)
Expand All @@ -219,7 +218,7 @@ add_specs suite_builder setup =
c.value_type.variable_length.should_be_true

t2 = t1.set c "C"
t3 = t2.aggregate [Aggregate_Column.Shortest "C"]
t3 = t2.aggregate columns=[Aggregate_Column.Shortest "C"]
t3.at "Shortest C" . to_vector . should_equal ["b"]
vt = t3.at "Shortest C" . value_type
Test.with_clue "t3[C].value_type="+vt.to_display_text+": " <|
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from Standard.Base import all

from Standard.Table import Value_Type, Column_Ref, Previous_Value, Blank_Selector
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Count_Distinct
from Standard.Table.Errors import all

from Standard.Database.Errors import Unsupported_Database_Operation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import Standard.Base.Errors.Common.Index_Out_Of_Bounds
import Standard.Base.Errors.Common.Type_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument

from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Group_By, Sum
from Standard.Table import Aggregate_Column
from Standard.Table.Errors import all

from Standard.Test import all
Expand Down Expand Up @@ -254,7 +254,7 @@ add_specs suite_builder setup =

group_builder.specify "Should work correctly after aggregation" <|
t0 = table_builder [["X", ["a", "b", "a", "c"]], ["Y", [1, 2, 4, 8]]]
t1 = t0.aggregate [Group_By "X", Sum "Y"]
t1 = t0.aggregate ["X"] [Aggregate_Column.Sum "Y"]

t2 = t1.order_by "X" . take 2
t2.at "X" . to_vector . should_equal ['a', 'b']
Expand Down
Loading

0 comments on commit b8a531a

Please sign in to comment.