diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso index 102cb7c1616a..e79d0a189f45 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso @@ -97,7 +97,7 @@ regex_to_column_names pattern original_column_name = group_nums_to_names = pattern.group_nums_to_names unnamed_group_numbers = 1.up_to pattern.group_count . filter i-> group_nums_to_names.contains_key i . not - group_number_to_column_name_suffix = Map.from_vector <| unnamed_group_numbers.zip (0.up_to unnamed_group_numbers.length) + group_number_to_column_name_suffix = Map.from_vector <| unnamed_group_numbers.zip (1.up_to unnamed_group_numbers.length+1) Vector.new (pattern.group_count-1) i-> # explicit groups start at 1 @@ -318,11 +318,11 @@ map_columns_to_multiple input_column function column_count problem_builder = # Name columns. If there's only one, use the original column name. new_column_names = case builders.length of 1 -> [input_column.name] - _ -> 0.up_to builders.length . map i-> default_column_namer input_column.name i + _ -> 1.up_to (builders.length+1) . map i-> default_column_namer input_column.name i # Build Columns. - sealed = builders.map .seal - new_column_names.zip sealed Column.from_storage + storages = builders.map .seal + new_column_names.zip storages Column.from_storage ## PRIVATE Rename a vector of columns to be unique when added to a table. diff --git a/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso b/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso index 83e68958deb2..ffd5f424f068 100644 --- a/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso @@ -14,7 +14,7 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing], [1, "c", "d", "ef"], [2, "gh", "ij", "u"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.split_to_columns "bar" "b" t2.should_equal expected @@ -30,7 +30,7 @@ spec = cols = [["foo", [0, 1, 2, 3]], ["bar", ["abc", "cbdbef", Nothing, "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing], [1, "c", "d", "ef"], [2, Nothing, Nothing, Nothing], [3, "gh", "ij", "u"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.split_to_columns "bar" "b" t2.should_equal expected @@ -55,7 +55,7 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]]] t = Table.new cols expected_rows = [[0, "12", "34", "5"], [1, "23", Nothing, Nothing], [2, "2", "4", "55"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.tokenize_to_columns "bar" "\d+" t2.should_equal expected @@ -71,7 +71,7 @@ spec = cols = [["foo", [0, 1, 2, 3]], ["bar", ["a12b34r5", Nothing, "23", "2r4r55"]]] t = Table.new cols expected_rows = [[0, "12", "34", "5"], [1, Nothing, Nothing, Nothing], [2, "23", Nothing, Nothing], [3, "2", "4", "55"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.tokenize_to_columns "bar" "\d+" t2.should_equal expected @@ -103,7 +103,7 @@ spec = cols = [["foo", [0, 1]], ["bar", ["r a-1, b-12,qd-50", "ab-10:bc-20c"]]] t = Table.new cols expected_rows = [[0, "a1", "b12", "d50"], [1, "b10", "c20", Nothing]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.tokenize_to_columns "bar" "([a-z]).(\d+)" t2.should_equal expected @@ -119,7 +119,7 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["aBqcE", "qcBr", "cCb"]]] t = Table.new cols expected_rows = [[0, "B", "c", Nothing], [1, "c", "B", Nothing], [2, "c", "C", "b"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows t2 = t.tokenize_to_columns "bar" "[bc]" case_sensitivity=Case_Sensitivity.Insensitive t2.should_equal expected @@ -136,16 +136,16 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing, Nothing], [1, "c", "d", "ef", Nothing], [2, "gh", "ij", "u", Nothing]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "bar 3"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "bar 4"] expected_rows t2 = t.split_to_columns "bar" "b" column_count=4 t2.should_equal expected - t2.at "bar 3" . value_type . is_text . should_be_true + t2.at "bar 4" . value_type . is_text . should_be_true Test.specify "split should limit columns and return problems when exceeding the column limit" <| cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]] t = Table.new cols expected_rows = [[0, "a", "c"], [1, "c", "d"], [2, "gh", "ij"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2"] expected_rows action = t.split_to_columns "bar" "b" column_count=2 on_problems=_ tester = t-> t.should_equal expected problems = [Column_Count_Exceeded.Error 2 3] @@ -155,7 +155,7 @@ spec = cols = [["foo", [0, 1]], ["bar", ["r a-1, b-12,qd-50", "ab-10:bc-20c"]]] t = Table.new cols expected_rows = [[0, "a1", "b12", "d50"], [1, "b10", "c20", Nothing]] - expected = Table.from_rows ["foo", "bar 0", "bar 1"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2"] expected_rows action = t.tokenize_to_columns "bar" "([a-z]).(\d+)" column_count=2 on_problems=_ tester = t-> t.should_equal expected problems = [Column_Count_Exceeded.Error 2 3] @@ -165,10 +165,10 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["ghbijbu", "cbdbef", "abc"]]] t = Table.new cols expected_rows = [[0, "gh", "ij", "u", Nothing], [1, "c", "d", "ef", Nothing], [2, "a", "c", Nothing, Nothing]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "bar 3"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "bar 4"] expected_rows t2 = t.split_to_columns "bar" "b" column_count=4 t2.should_equal expected - t2.at "bar 3" . value_type . is_text . should_be_true + t2.at "bar 4" . value_type . is_text . should_be_true Test.group "Table.split/tokenize errors" <| Test.specify "won't work on a non-text column" <| @@ -199,23 +199,23 @@ spec = Test.group "Table.split/tokenize name conflicts" <| Test.specify "split will make column names unique" <| - cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["bar 1", ["a", "b", "c"]]] + cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["bar 2", ["a", "b", "c"]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing, "a"], [1, "c", "d", "ef", "b"], [2, "gh", "ij", "u", "c"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 2", "bar 1"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 3", "bar 2"] expected_rows action = t.split_to_columns "bar" "b" on_problems=_ tester = t-> t.should_equal expected - problems = [Duplicate_Output_Column_Names.Error ["bar 1"]] + problems = [Duplicate_Output_Column_Names.Error ["bar 2"]] Problems.test_problem_handling action problems tester Test.specify "tokenize will make column names unique" <| - cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]], ["bar 1", ["a", "b", "c"]]] + cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]], ["bar 2", ["a", "b", "c"]]] t = Table.new cols expected_rows = [[0, "12", "34", "5", "a"], [1, "23", Nothing, Nothing, "b"], [2, "2", "4", "55", "c"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 2", "bar 1"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 3", "bar 2"] expected_rows action = t.tokenize_to_columns "bar" "\d+" on_problems=_ tester = t-> t.should_equal expected - problems = [Duplicate_Output_Column_Names.Error ["bar 1"]] + problems = [Duplicate_Output_Column_Names.Error ["bar 2"]] Problems.test_problem_handling action problems tester Test.group "Table.split/tokenize column order" <| @@ -223,14 +223,14 @@ spec = cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["baz", [1, 2, 3]]] t = Table.new cols expected_rows = [[0, "a", "c", Nothing, 1], [1, "c", "d", "ef", 2], [2, "gh", "ij", "u", 3]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "baz"] expected_rows + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "baz"] expected_rows t2 = t.split_to_columns "bar" "b" t2.should_equal expected Test.group "Table.parse_to_columns" <| Test.specify "can parse to columns" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] + expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected @@ -248,25 +248,25 @@ spec = Test.specify "non-participating groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "q1", "y"], ["xx", "qp", "yy"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "baz"] [["x", "1", 1, Nothing, "y"], ["xx", "p", Nothing, "p", "yy"]] + expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "baz"] [["x", "1", 1, Nothing, "y"], ["xx", "p", Nothing, "p", "yy"]] actual = t.parse_to_columns "bar" "q((\d)|([a-z]))" actual.should_equal expected Test.specify "case-insensitive" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "qq", "y"], ["xx", "qQ", "yy"]] - expected = Table.from_rows ["foo", "bar 0", "baz"] [["x", "q", "y"], ["xx", "Q", "yy"]] + expected = Table.from_rows ["foo", "bar 1", "baz"] [["x", "q", "y"], ["xx", "Q", "yy"]] actual = t.parse_to_columns "bar" "q(q)" case_sensitivity=Case_Sensitivity.Insensitive actual.should_equal expected Test.specify "no post-parsing" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", "1", "2", "y"], ["x", "3", "4", "y"], ["x", "5", "6", "y"], ["xx", "4", "8", "yy"], ["xx", "5", "9", "yy"]] + expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", "1", "2", "y"], ["x", "3", "4", "y"], ["x", "5", "6", "y"], ["xx", "4", "8", "yy"], ["xx", "5", "9", "yy"]] actual = t.parse_to_columns "bar" "(\d)(\d)" parse_values=False actual.should_equal expected Test.specify "column name clash" <| - t = Table.from_rows ["foo", "bar", "bar 1"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 1"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] + t = Table.from_rows ["foo", "bar", "bar 2"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]] + expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 2"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected @@ -284,13 +284,13 @@ spec = Test.specify "empty table, with regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0 - expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", "a", "a", "y"]] . take 0 + expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", "a", "a", "y"]] . take 0 actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected Test.specify "empty table, with named and unnamed regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0 - expected = Table.from_rows ["foo", "quux", "bar 0", "foo_1", "bar 1", "baz"] [["x", "a", "a", "a", "a", "y"]] . take 0 + expected = Table.from_rows ["foo", "quux", "bar 1", "foo_1", "bar 2", "baz"] [["x", "a", "a", "a", "a", "y"]] . take 0 actual = t.parse_to_columns "bar" "(?)(\d)(?\d)(\d)" actual.should_equal expected @@ -302,13 +302,13 @@ spec = Test.specify "input with no matches, with regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] - expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [] + expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected Test.specify "input with no matches, with named and unnamed regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] - expected = Table.from_rows ["foo", "quux", "bar 0", "foo_1", "bar 1", "baz"] [] + expected = Table.from_rows ["foo", "quux", "bar 1", "foo_1", "bar 2", "baz"] [] actual = t.parse_to_columns "bar" "(?)(\d)(?\d)(\d)" actual.should_equal expected