From dd143803e2e9c92baee48b34d8b29200b74230ea Mon Sep 17 00:00:00 2001 From: Gregory Travis Date: Mon, 8 May 2023 13:21:53 -0400 Subject: [PATCH 1/4] works --- .../src/Internal/Split_Tokenize.enso | 19 ++++++++++++------- .../src/In_Memory/Split_Tokenize_Spec.enso | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso index dfd842cd3972..102cb7c1616a 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso @@ -123,7 +123,7 @@ fan_out_to_columns table input_column_id function column_count=Nothing on_proble input_column = table.get input_column_id problem_builder = Problem_Builder.new new_columns_unrenamed = map_columns_to_multiple input_column function column_count problem_builder - new_columns = rename_new_columns table new_columns_unrenamed problem_builder + new_columns = rename_new_columns table input_column.name new_columns_unrenamed problem_builder new_table = replace_column_with_columns table input_column new_columns problem_builder.attach_problems_after on_problems new_table @@ -315,17 +315,22 @@ map_columns_to_multiple input_column function column_count problem_builder = builders + # Name columns. If there's only one, use the original column name. + new_column_names = case builders.length of + 1 -> [input_column.name] + _ -> 0.up_to builders.length . map i-> default_column_namer input_column.name i + # Build Columns. - builders.map .seal . map_with_index i-> storage-> - name = default_column_namer input_column.name i - Column.from_storage name storage + sealed = builders.map .seal + new_column_names.zip sealed Column.from_storage ## PRIVATE Rename a vector of columns to be unique when added to a table. -rename_new_columns : Table -> Vector Column -> Problem_Builder -> Vector Column -rename_new_columns table columns problem_builder = +rename_new_columns : Table -> Text -> Vector Column -> Problem_Builder -> Vector Column +rename_new_columns table removed_column_name columns problem_builder = unique = Unique_Name_Strategy.new - unique.mark_used <| table.columns.map .name + remaining_columns = table.columns . filter (c-> c.name != removed_column_name) . map .name + unique.mark_used remaining_columns new_columns = columns.map column-> new_name = unique.make_unique column.name column.rename new_name diff --git a/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso b/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso index 07f866df508c..83e68958deb2 100644 --- a/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso @@ -42,6 +42,14 @@ spec = t2 = t.split_to_rows "bar" "b" t2.should_equal expected + Test.specify "can do split_to_columns with one output column, no column suffix added" <| + cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]] + t = Table.new cols + expected_rows = [[0, "abc"], [1, "cbdbef"], [2, "ghbijbu"]] + expected = Table.from_rows ["foo", "bar"] expected_rows + t2 = t.split_to_columns "bar" "q" + t2.should_equal expected + Test.group "Table.tokenize" <| Test.specify "can do tokenize_to_columns" <| cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]]] @@ -75,6 +83,14 @@ spec = t2 = t.tokenize_to_rows "bar" "\d+" t2.should_equal expected + Test.specify "can do tokenize_to_columns with one output column, no column suffix needed" <| + cols = [["foo", [0, 1, 2]], ["bar", ["a12b", "23", "2r"]]] + t = Table.new cols + expected_rows = [[0, "12"], [1, "23"], [2, "2"]] + expected = Table.from_rows ["foo", "bar"] expected_rows + t2 = t.tokenize_to_columns "bar" "\d+" + t2.should_equal expected + Test.specify "can do tokenize_to_rows with some rows that have no matches" <| cols = [["foo", [0, 1, 2, 3]], ["bar", ["a12b34r5", "23", "q", "2r4r55"]]] t = Table.new cols From ff292fe22656c1f0da667e475048e74c7fa386ba Mon Sep 17 00:00:00 2001 From: Gregory Travis Date: Tue, 9 May 2023 09:54:22 -0400 Subject: [PATCH 2/4] review, number columns from 1 --- .../src/Internal/Split_Tokenize.enso | 8 +-- .../src/In_Memory/Split_Tokenize_Spec.enso | 58 +++++++++---------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso index 102cb7c1616a..e79d0a189f45 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso @@ -97,7 +97,7 @@ regex_to_column_names pattern original_column_name = group_nums_to_names = pattern.group_nums_to_names unnamed_group_numbers = 1.up_to pattern.group_count . filter i-> group_nums_to_names.contains_key i . not - group_number_to_column_name_suffix = Map.from_vector <| unnamed_group_numbers.zip (0.up_to unnamed_group_numbers.length) + group_number_to_column_name_suffix = Map.from_vector <| unnamed_group_numbers.zip (1.up_to unnamed_group_numbers.length+1) Vector.new (pattern.group_count-1) i-> # explicit groups start at 1 @@ -318,11 +318,11 @@ map_columns_to_multiple input_column function column_count problem_builder = # Name columns. If there's only one, use the original column name. new_column_names = case builders.length of 1 -> [input_column.name] - _ -> 0.up_to builders.length . map i-> default_column_namer input_column.name i + _ -> 1.up_to (builders.length+1) . map i-> default_column_namer input_column.name i # Build Columns. - sealed = builders.map .seal - new_column_names.zip sealed Column.from_storage + storages = builders.map .seal + new_column_names.zip storages Column.from_storage ## PRIVATE Rename a vector of columns to be unique when added to a table. diff --git a/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso b/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso index 83e68958deb2..ffd5f424f068 100644 --- a/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso @@ -14,7 +14,7 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing], [1, "c", "d", "ef"], [2, "gh", "ij", "u"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.split_to_columns "bar" "b" t2.should_equal expected @@ -30,7 +30,7 @@ spec = cols = [["foo", [0, 1, 2, 3]], ["bar", ["abc", "cbdbef", Nothing, "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing], [1, "c", "d", "ef"], [2, Nothing, Nothing, Nothing], [3, "gh", "ij", "u"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.split_to_columns "bar" "b" t2.should_equal expected @@ -55,7 +55,7 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]]] t = Table.new cols expected_rows = [[0, "12", "34", "5"], [1, "23", Nothing, Nothing], [2, "2", "4", "55"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.tokenize_to_columns "bar" "\d+" t2.should_equal expected @@ -71,7 +71,7 @@ spec = cols = [["foo", [0, 1, 2, 3]], ["bar", ["a12b34r5", Nothing, "23", "2r4r55"]]] t = Table.new cols expected_rows = [[0, "12", "34", "5"], [1, Nothing, Nothing, Nothing], [2, "23", Nothing, Nothing], [3, "2", "4", "55"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.tokenize_to_columns "bar" "\d+" t2.should_equal expected @@ -103,7 +103,7 @@ spec = cols = [["foo", [0, 1]], ["bar", ["r a-1, b-12,qd-50", "ab-10:bc-20c"]]] t = Table.new cols expected_rows = [[0, "a1", "b12", "d50"], [1, "b10", "c20", Nothing]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.tokenize_to_columns "bar" "([a-z]).(\d+)" t2.should_equal expected @@ -119,7 +119,7 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["aBqcE", "qcBr", "cCb"]]] t = Table.new cols expected_rows = [[0, "B", "c", Nothing], [1, "c", "B", Nothing], [2, "c", "C", "b"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.tokenize_to_columns "bar" "[bc]" case_sensitivity=Case_Sensitivity.Insensitive t2.should_equal expected @@ -136,16 +136,16 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing, Nothing], [1, "c", "d", "ef", Nothing], [2, "gh", "ij", "u", Nothing]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "bar 3"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "bar 4"] expected_rows t2 = t.split_to_columns "bar" "b" column_count=4 t2.should_equal expected - t2.at "bar 3" . value_type . is_text . should_be_true + t2.at "bar 4" . value_type . is_text . should_be_true Test.specify "split should limit columns and return problems when exceeding the column limit" <| cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c"], [1, "c", "d"], [2, "gh", "ij"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2"] expected_rows action = t.split_to_columns "bar" "b" column_count=2 on_problems=_ tester = t-> t.should_equal expected problems = [Column_Count_Exceeded.Error 2 3] @@ -155,7 +155,7 @@ spec = cols = [["foo", [0, 1]], ["bar", ["r a-1, b-12,qd-50", "ab-10:bc-20c"]]] t = Table.new cols expected_rows = [[0, "a1", "b12", "d50"], [1, "b10", "c20", Nothing]] - expected = Table.from_rows ["foo", "bar 0", "bar 1"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2"] expected_rows action = t.tokenize_to_columns "bar" "([a-z]).(\d+)" column_count=2 on_problems=_ tester = t-> t.should_equal expected problems = [Column_Count_Exceeded.Error 2 3] @@ -165,10 +165,10 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["ghbijbu", "cbdbef", "abc"]]] t = Table.new cols expected_rows = [[0, "gh", "ij", "u", Nothing], [1, "c", "d", "ef", Nothing], [2, "a", "c", Nothing, Nothing]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "bar 3"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "bar 4"] expected_rows t2 = t.split_to_columns "bar" "b" column_count=4 t2.should_equal expected - t2.at "bar 3" . value_type . is_text . should_be_true + t2.at "bar 4" . value_type . is_text . should_be_true Test.group "Table.split/tokenize errors" <| Test.specify "won't work on a non-text column" <| @@ -199,23 +199,23 @@ spec = Test.group "Table.split/tokenize name conflicts" <| Test.specify "split will make column names unique" <| - cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["bar 1", ["a", "b", "c"]]] + cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["bar 2", ["a", "b", "c"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing, "a"], [1, "c", "d", "ef", "b"], [2, "gh", "ij", "u", "c"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 2", "bar 1"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 3", "bar 2"] expected_rows action = t.split_to_columns "bar" "b" on_problems=_ tester = t-> t.should_equal expected - problems = [Duplicate_Output_Column_Names.Error ["bar 1"]] + problems = [Duplicate_Output_Column_Names.Error ["bar 2"]] Problems.test_problem_handling action problems tester Test.specify "tokenize will make column names unique" <| - cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]], ["bar 1", ["a", "b", "c"]]] + cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]], ["bar 2", ["a", "b", "c"]]] t = Table.new cols expected_rows = [[0, "12", "34", "5", "a"], [1, "23", Nothing, Nothing, "b"], [2, "2", "4", "55", "c"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 2", "bar 1"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 3", "bar 2"] expected_rows action = t.tokenize_to_columns "bar" "\d+" on_problems=_ tester = t-> t.should_equal expected - problems = [Duplicate_Output_Column_Names.Error ["bar 1"]] + problems = [Duplicate_Output_Column_Names.Error ["bar 2"]] Problems.test_problem_handling action problems tester Test.group "Table.split/tokenize column order" <| @@ -223,14 +223,14 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["baz", [1, 2, 3]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing, 1], [1, "c", "d", "ef", 2], [2, "gh", "ij", "u", 3]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "baz"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "baz"] expected_rows t2 = t.split_to_columns "bar" "b" t2.should_equal expected Test.group "Table.parse_to_columns" <| Test.specify "can parse to columns" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] + expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected @@ -248,25 +248,25 @@ spec = Test.specify "non-participating groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "q1", "y"], ["xx", "qp", "yy"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "baz"] [["x", "1", 1, Nothing, "y"], ["xx", "p", Nothing, "p", "yy"]] + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "baz"] [["x", "1", 1, Nothing, "y"], ["xx", "p", Nothing, "p", "yy"]] actual = t.parse_to_columns "bar" "q((\d)|([a-z]))" actual.should_equal expected Test.specify "case-insensitive" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "qq", "y"], ["xx", "qQ", "yy"]] - expected = Table.from_rows ["foo", "bar 0", "baz"] [["x", "q", "y"], ["xx", "Q", "yy"]] + expected = Table.from_rows ["foo", "bar 1", "baz"] [["x", "q", "y"], ["xx", "Q", "yy"]] actual = t.parse_to_columns "bar" "q(q)" case_sensitivity=Case_Sensitivity.Insensitive actual.should_equal expected Test.specify "no post-parsing" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", "1", "2", "y"], ["x", "3", "4", "y"], ["x", "5", "6", "y"], ["xx", "4", "8", "yy"], ["xx", "5", "9", "yy"]] + expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", "1", "2", "y"], ["x", "3", "4", "y"], ["x", "5", "6", "y"], ["xx", "4", "8", "yy"], ["xx", "5", "9", "yy"]] actual = t.parse_to_columns "bar" "(\d)(\d)" parse_values=False actual.should_equal expected Test.specify "column name clash" <| - t = Table.from_rows ["foo", "bar", "bar 1"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 1"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] + t = Table.from_rows ["foo", "bar", "bar 2"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] + expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 2"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected @@ -284,13 +284,13 @@ spec = Test.specify "empty table, with regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0 - expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", "a", "a", "y"]] . take 0 + expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", "a", "a", "y"]] . take 0 actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected Test.specify "empty table, with named and unnamed regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0 - expected = Table.from_rows ["foo", "quux", "bar 0", "foo_1", "bar 1", "baz"] [["x", "a", "a", "a", "a", "y"]] . take 0 + expected = Table.from_rows ["foo", "quux", "bar 1", "foo_1", "bar 2", "baz"] [["x", "a", "a", "a", "a", "y"]] . take 0 actual = t.parse_to_columns "bar" "(?)(\d)(?\d)(\d)" actual.should_equal expected @@ -302,13 +302,13 @@ spec = Test.specify "input with no matches, with regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [] + expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected Test.specify "input with no matches, with named and unnamed regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] - expected = Table.from_rows ["foo", "quux", "bar 0", "foo_1", "bar 1", "baz"] [] + expected = Table.from_rows ["foo", "quux", "bar 1", "foo_1", "bar 2", "baz"] [] actual = t.parse_to_columns "bar" "(?)(\d)(?\d)(\d)" actual.should_equal expected From 87c69698807e289d570e94601040496773e55d9e Mon Sep 17 00:00:00 2001 From: Gregory Travis Date: Tue, 9 May 2023 10:55:47 -0400 Subject: [PATCH 3/4] Revert "review, number columns from 1" This reverts commit ff292fe22656c1f0da667e475048e74c7fa386ba. --- .../src/Internal/Split_Tokenize.enso | 8 +-- .../src/In_Memory/Split_Tokenize_Spec.enso | 58 +++++++++---------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso index e79d0a189f45..102cb7c1616a 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso @@ -97,7 +97,7 @@ regex_to_column_names pattern original_column_name = group_nums_to_names = pattern.group_nums_to_names unnamed_group_numbers = 1.up_to pattern.group_count . filter i-> group_nums_to_names.contains_key i . not - group_number_to_column_name_suffix = Map.from_vector <| unnamed_group_numbers.zip (1.up_to unnamed_group_numbers.length+1) + group_number_to_column_name_suffix = Map.from_vector <| unnamed_group_numbers.zip (0.up_to unnamed_group_numbers.length) Vector.new (pattern.group_count-1) i-> # explicit groups start at 1 @@ -318,11 +318,11 @@ map_columns_to_multiple input_column function column_count problem_builder = # Name columns. If there's only one, use the original column name. new_column_names = case builders.length of 1 -> [input_column.name] - _ -> 1.up_to (builders.length+1) . map i-> default_column_namer input_column.name i + _ -> 0.up_to builders.length . map i-> default_column_namer input_column.name i # Build Columns. - storages = builders.map .seal - new_column_names.zip storages Column.from_storage + sealed = builders.map .seal + new_column_names.zip sealed Column.from_storage ## PRIVATE Rename a vector of columns to be unique when added to a table. diff --git a/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso b/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso index ffd5f424f068..83e68958deb2 100644 --- a/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso @@ -14,7 +14,7 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing], [1, "c", "d", "ef"], [2, "gh", "ij", "u"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows t2 = t.split_to_columns "bar" "b" t2.should_equal expected @@ -30,7 +30,7 @@ spec = cols = [["foo", [0, 1, 2, 3]], ["bar", ["abc", "cbdbef", Nothing, "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing], [1, "c", "d", "ef"], [2, Nothing, Nothing, Nothing], [3, "gh", "ij", "u"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows t2 = t.split_to_columns "bar" "b" t2.should_equal expected @@ -55,7 +55,7 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]]] t = Table.new cols expected_rows = [[0, "12", "34", "5"], [1, "23", Nothing, Nothing], [2, "2", "4", "55"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows t2 = t.tokenize_to_columns "bar" "\d+" t2.should_equal expected @@ -71,7 +71,7 @@ spec = cols = [["foo", [0, 1, 2, 3]], ["bar", ["a12b34r5", Nothing, "23", "2r4r55"]]] t = Table.new cols expected_rows = [[0, "12", "34", "5"], [1, Nothing, Nothing, Nothing], [2, "23", Nothing, Nothing], [3, "2", "4", "55"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows t2 = t.tokenize_to_columns "bar" "\d+" t2.should_equal expected @@ -103,7 +103,7 @@ spec = cols = [["foo", [0, 1]], ["bar", ["r a-1, b-12,qd-50", "ab-10:bc-20c"]]] t = Table.new cols expected_rows = [[0, "a1", "b12", "d50"], [1, "b10", "c20", Nothing]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows t2 = t.tokenize_to_columns "bar" "([a-z]).(\d+)" t2.should_equal expected @@ -119,7 +119,7 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["aBqcE", "qcBr", "cCb"]]] t = Table.new cols expected_rows = [[0, "B", "c", Nothing], [1, "c", "B", Nothing], [2, "c", "C", "b"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows t2 = t.tokenize_to_columns "bar" "[bc]" case_sensitivity=Case_Sensitivity.Insensitive t2.should_equal expected @@ -136,16 +136,16 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing, Nothing], [1, "c", "d", "ef", Nothing], [2, "gh", "ij", "u", Nothing]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "bar 4"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.split_to_columns "bar" "b" column_count=4 t2.should_equal expected - t2.at "bar 4" . value_type . is_text . should_be_true + t2.at "bar 3" . value_type . is_text . should_be_true Test.specify "split should limit columns and return problems when exceeding the column limit" <| cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c"], [1, "c", "d"], [2, "gh", "ij"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1"] expected_rows action = t.split_to_columns "bar" "b" column_count=2 on_problems=_ tester = t-> t.should_equal expected problems = [Column_Count_Exceeded.Error 2 3] @@ -155,7 +155,7 @@ spec = cols = [["foo", [0, 1]], ["bar", ["r a-1, b-12,qd-50", "ab-10:bc-20c"]]] t = Table.new cols expected_rows = [[0, "a1", "b12", "d50"], [1, "b10", "c20", Nothing]] - expected = Table.from_rows ["foo", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1"] expected_rows action = t.tokenize_to_columns "bar" "([a-z]).(\d+)" column_count=2 on_problems=_ tester = t-> t.should_equal expected problems = [Column_Count_Exceeded.Error 2 3] @@ -165,10 +165,10 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["ghbijbu", "cbdbef", "abc"]]] t = Table.new cols expected_rows = [[0, "gh", "ij", "u", Nothing], [1, "c", "d", "ef", Nothing], [2, "a", "c", Nothing, Nothing]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "bar 4"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.split_to_columns "bar" "b" column_count=4 t2.should_equal expected - t2.at "bar 4" . value_type . is_text . should_be_true + t2.at "bar 3" . value_type . is_text . should_be_true Test.group "Table.split/tokenize errors" <| Test.specify "won't work on a non-text column" <| @@ -199,23 +199,23 @@ spec = Test.group "Table.split/tokenize name conflicts" <| Test.specify "split will make column names unique" <| - cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["bar 2", ["a", "b", "c"]]] + cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["bar 1", ["a", "b", "c"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing, "a"], [1, "c", "d", "ef", "b"], [2, "gh", "ij", "u", "c"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 3", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 2", "bar 1"] expected_rows action = t.split_to_columns "bar" "b" on_problems=_ tester = t-> t.should_equal expected - problems = [Duplicate_Output_Column_Names.Error ["bar 2"]] + problems = [Duplicate_Output_Column_Names.Error ["bar 1"]] Problems.test_problem_handling action problems tester Test.specify "tokenize will make column names unique" <| - cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]], ["bar 2", ["a", "b", "c"]]] + cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]], ["bar 1", ["a", "b", "c"]]] t = Table.new cols expected_rows = [[0, "12", "34", "5", "a"], [1, "23", Nothing, Nothing, "b"], [2, "2", "4", "55", "c"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 3", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 2", "bar 1"] expected_rows action = t.tokenize_to_columns "bar" "\d+" on_problems=_ tester = t-> t.should_equal expected - problems = [Duplicate_Output_Column_Names.Error ["bar 2"]] + problems = [Duplicate_Output_Column_Names.Error ["bar 1"]] Problems.test_problem_handling action problems tester Test.group "Table.split/tokenize column order" <| @@ -223,14 +223,14 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["baz", [1, 2, 3]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing, 1], [1, "c", "d", "ef", 2], [2, "gh", "ij", "u", 3]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "baz"] expected_rows + expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "baz"] expected_rows t2 = t.split_to_columns "bar" "b" t2.should_equal expected Test.group "Table.parse_to_columns" <| Test.specify "can parse to columns" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] + expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected @@ -248,25 +248,25 @@ spec = Test.specify "non-participating groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "q1", "y"], ["xx", "qp", "yy"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "baz"] [["x", "1", 1, Nothing, "y"], ["xx", "p", Nothing, "p", "yy"]] + expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "baz"] [["x", "1", 1, Nothing, "y"], ["xx", "p", Nothing, "p", "yy"]] actual = t.parse_to_columns "bar" "q((\d)|([a-z]))" actual.should_equal expected Test.specify "case-insensitive" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "qq", "y"], ["xx", "qQ", "yy"]] - expected = Table.from_rows ["foo", "bar 1", "baz"] [["x", "q", "y"], ["xx", "Q", "yy"]] + expected = Table.from_rows ["foo", "bar 0", "baz"] [["x", "q", "y"], ["xx", "Q", "yy"]] actual = t.parse_to_columns "bar" "q(q)" case_sensitivity=Case_Sensitivity.Insensitive actual.should_equal expected Test.specify "no post-parsing" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", "1", "2", "y"], ["x", "3", "4", "y"], ["x", "5", "6", "y"], ["xx", "4", "8", "yy"], ["xx", "5", "9", "yy"]] + expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", "1", "2", "y"], ["x", "3", "4", "y"], ["x", "5", "6", "y"], ["xx", "4", "8", "yy"], ["xx", "5", "9", "yy"]] actual = t.parse_to_columns "bar" "(\d)(\d)" parse_values=False actual.should_equal expected Test.specify "column name clash" <| - t = Table.from_rows ["foo", "bar", "bar 2"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 2"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] + t = Table.from_rows ["foo", "bar", "bar 1"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] + expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 1"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected @@ -284,13 +284,13 @@ spec = Test.specify "empty table, with regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0 - expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", "a", "a", "y"]] . take 0 + expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", "a", "a", "y"]] . take 0 actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected Test.specify "empty table, with named and unnamed regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0 - expected = Table.from_rows ["foo", "quux", "bar 1", "foo_1", "bar 2", "baz"] [["x", "a", "a", "a", "a", "y"]] . take 0 + expected = Table.from_rows ["foo", "quux", "bar 0", "foo_1", "bar 1", "baz"] [["x", "a", "a", "a", "a", "y"]] . take 0 actual = t.parse_to_columns "bar" "(?)(\d)(?\d)(\d)" actual.should_equal expected @@ -302,13 +302,13 @@ spec = Test.specify "input with no matches, with regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [] + expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected Test.specify "input with no matches, with named and unnamed regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] - expected = Table.from_rows ["foo", "quux", "bar 1", "foo_1", "bar 2", "baz"] [] + expected = Table.from_rows ["foo", "quux", "bar 0", "foo_1", "bar 1", "baz"] [] actual = t.parse_to_columns "bar" "(?)(\d)(?\d)(\d)" actual.should_equal expected From 31af9c5fb69075b66d629cdc5bf92e4848035646 Mon Sep 17 00:00:00 2001 From: Gregory Travis Date: Tue, 9 May 2023 10:56:18 -0400 Subject: [PATCH 4/4] storages --- .../Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso index 102cb7c1616a..3ed3fbd54c1a 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso @@ -321,8 +321,8 @@ map_columns_to_multiple input_column function column_count problem_builder = _ -> 0.up_to builders.length . map i-> default_column_namer input_column.name i # Build Columns. - sealed = builders.map .seal - new_column_names.zip sealed Column.from_storage + storages = builders.map .seal + new_column_names.zip storages Column.from_storage ## PRIVATE Rename a vector of columns to be unique when added to a table.