Skip to content

Commit

Permalink
review, number columns from 1
Browse files Browse the repository at this point in the history
  • Loading branch information
GregoryTravis committed May 9, 2023
1 parent dd14380 commit ff292fe
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ regex_to_column_names pattern original_column_name =
group_nums_to_names = pattern.group_nums_to_names

unnamed_group_numbers = 1.up_to pattern.group_count . filter i-> group_nums_to_names.contains_key i . not
group_number_to_column_name_suffix = Map.from_vector <| unnamed_group_numbers.zip (0.up_to unnamed_group_numbers.length)
group_number_to_column_name_suffix = Map.from_vector <| unnamed_group_numbers.zip (1.up_to unnamed_group_numbers.length+1)

Vector.new (pattern.group_count-1) i->
# explicit groups start at 1
Expand Down Expand Up @@ -318,11 +318,11 @@ map_columns_to_multiple input_column function column_count problem_builder =
# Name columns. If there's only one, use the original column name.
new_column_names = case builders.length of
1 -> [input_column.name]
_ -> 0.up_to builders.length . map i-> default_column_namer input_column.name i
_ -> 1.up_to (builders.length+1) . map i-> default_column_namer input_column.name i

# Build Columns.
sealed = builders.map .seal
new_column_names.zip sealed Column.from_storage
storages = builders.map .seal
new_column_names.zip storages Column.from_storage

## PRIVATE
Rename a vector of columns to be unique when added to a table.
Expand Down
58 changes: 29 additions & 29 deletions test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ spec =
cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]]
t = Table.new cols
expected_rows = [[0, "a", "c", Nothing], [1, "c", "d", "ef"], [2, "gh", "ij", "u"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows
t2 = t.split_to_columns "bar" "b"
t2.should_equal expected

Expand All @@ -30,7 +30,7 @@ spec =
cols = [["foo", [0, 1, 2, 3]], ["bar", ["abc", "cbdbef", Nothing, "ghbijbu"]]]
t = Table.new cols
expected_rows = [[0, "a", "c", Nothing], [1, "c", "d", "ef"], [2, Nothing, Nothing, Nothing], [3, "gh", "ij", "u"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows
t2 = t.split_to_columns "bar" "b"
t2.should_equal expected

Expand All @@ -55,7 +55,7 @@ spec =
cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]]]
t = Table.new cols
expected_rows = [[0, "12", "34", "5"], [1, "23", Nothing, Nothing], [2, "2", "4", "55"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows
t2 = t.tokenize_to_columns "bar" "\d+"
t2.should_equal expected

Expand All @@ -71,7 +71,7 @@ spec =
cols = [["foo", [0, 1, 2, 3]], ["bar", ["a12b34r5", Nothing, "23", "2r4r55"]]]
t = Table.new cols
expected_rows = [[0, "12", "34", "5"], [1, Nothing, Nothing, Nothing], [2, "23", Nothing, Nothing], [3, "2", "4", "55"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows
t2 = t.tokenize_to_columns "bar" "\d+"
t2.should_equal expected

Expand Down Expand Up @@ -103,7 +103,7 @@ spec =
cols = [["foo", [0, 1]], ["bar", ["r a-1, b-12,qd-50", "ab-10:bc-20c"]]]
t = Table.new cols
expected_rows = [[0, "a1", "b12", "d50"], [1, "b10", "c20", Nothing]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows
t2 = t.tokenize_to_columns "bar" "([a-z]).(\d+)"
t2.should_equal expected

Expand All @@ -119,7 +119,7 @@ spec =
cols = [["foo", [0, 1, 2]], ["bar", ["aBqcE", "qcBr", "cCb"]]]
t = Table.new cols
expected_rows = [[0, "B", "c", Nothing], [1, "c", "B", Nothing], [2, "c", "C", "b"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows
t2 = t.tokenize_to_columns "bar" "[bc]" case_sensitivity=Case_Sensitivity.Insensitive
t2.should_equal expected

Expand All @@ -136,16 +136,16 @@ spec =
cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]]
t = Table.new cols
expected_rows = [[0, "a", "c", Nothing, Nothing], [1, "c", "d", "ef", Nothing], [2, "gh", "ij", "u", Nothing]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "bar 3"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "bar 4"] expected_rows
t2 = t.split_to_columns "bar" "b" column_count=4
t2.should_equal expected
t2.at "bar 3" . value_type . is_text . should_be_true
t2.at "bar 4" . value_type . is_text . should_be_true

Test.specify "split should limit columns and return problems when exceeding the column limit" <|
cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]]
t = Table.new cols
expected_rows = [[0, "a", "c"], [1, "c", "d"], [2, "gh", "ij"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2"] expected_rows
action = t.split_to_columns "bar" "b" column_count=2 on_problems=_
tester = t-> t.should_equal expected
problems = [Column_Count_Exceeded.Error 2 3]
Expand All @@ -155,7 +155,7 @@ spec =
cols = [["foo", [0, 1]], ["bar", ["r a-1, b-12,qd-50", "ab-10:bc-20c"]]]
t = Table.new cols
expected_rows = [[0, "a1", "b12", "d50"], [1, "b10", "c20", Nothing]]
expected = Table.from_rows ["foo", "bar 0", "bar 1"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2"] expected_rows
action = t.tokenize_to_columns "bar" "([a-z]).(\d+)" column_count=2 on_problems=_
tester = t-> t.should_equal expected
problems = [Column_Count_Exceeded.Error 2 3]
Expand All @@ -165,10 +165,10 @@ spec =
cols = [["foo", [0, 1, 2]], ["bar", ["ghbijbu", "cbdbef", "abc"]]]
t = Table.new cols
expected_rows = [[0, "gh", "ij", "u", Nothing], [1, "c", "d", "ef", Nothing], [2, "a", "c", Nothing, Nothing]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "bar 3"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "bar 4"] expected_rows
t2 = t.split_to_columns "bar" "b" column_count=4
t2.should_equal expected
t2.at "bar 3" . value_type . is_text . should_be_true
t2.at "bar 4" . value_type . is_text . should_be_true

Test.group "Table.split/tokenize errors" <|
Test.specify "won't work on a non-text column" <|
Expand Down Expand Up @@ -199,38 +199,38 @@ spec =

Test.group "Table.split/tokenize name conflicts" <|
Test.specify "split will make column names unique" <|
cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["bar 1", ["a", "b", "c"]]]
cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["bar 2", ["a", "b", "c"]]]
t = Table.new cols
expected_rows = [[0, "a", "c", Nothing, "a"], [1, "c", "d", "ef", "b"], [2, "gh", "ij", "u", "c"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 2", "bar 1"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 3", "bar 2"] expected_rows
action = t.split_to_columns "bar" "b" on_problems=_
tester = t-> t.should_equal expected
problems = [Duplicate_Output_Column_Names.Error ["bar 1"]]
problems = [Duplicate_Output_Column_Names.Error ["bar 2"]]
Problems.test_problem_handling action problems tester

Test.specify "tokenize will make column names unique" <|
cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]], ["bar 1", ["a", "b", "c"]]]
cols = [["foo", [0, 1, 2]], ["bar", ["a12b34r5", "23", "2r4r55"]], ["bar 2", ["a", "b", "c"]]]
t = Table.new cols
expected_rows = [[0, "12", "34", "5", "a"], [1, "23", Nothing, Nothing, "b"], [2, "2", "4", "55", "c"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 2", "bar 1"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 3", "bar 2"] expected_rows
action = t.tokenize_to_columns "bar" "\d+" on_problems=_
tester = t-> t.should_equal expected
problems = [Duplicate_Output_Column_Names.Error ["bar 1"]]
problems = [Duplicate_Output_Column_Names.Error ["bar 2"]]
Problems.test_problem_handling action problems tester

Test.group "Table.split/tokenize column order" <|
Test.specify "preserves column order" <|
cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]], ["baz", [1, 2, 3]]]
t = Table.new cols
expected_rows = [[0, "a", "c", Nothing, 1], [1, "c", "d", "ef", 2], [2, "gh", "ij", "u", 3]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "baz"] expected_rows
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "baz"] expected_rows
t2 = t.split_to_columns "bar" "b"
t2.should_equal expected

Test.group "Table.parse_to_columns" <|
Test.specify "can parse to columns" <|
t = Table.from_rows ["foo", "bar", "baz"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]]
expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]]
actual = t.parse_to_columns "bar" "(\d)(\d)"
actual.should_equal expected

Expand All @@ -248,25 +248,25 @@ spec =

Test.specify "non-participating groups" <|
t = Table.from_rows ["foo", "bar", "baz"] [["x", "q1", "y"], ["xx", "qp", "yy"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "bar 2", "baz"] [["x", "1", 1, Nothing, "y"], ["xx", "p", Nothing, "p", "yy"]]
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3", "baz"] [["x", "1", 1, Nothing, "y"], ["xx", "p", Nothing, "p", "yy"]]
actual = t.parse_to_columns "bar" "q((\d)|([a-z]))"
actual.should_equal expected

Test.specify "case-insensitive" <|
t = Table.from_rows ["foo", "bar", "baz"] [["x", "qq", "y"], ["xx", "qQ", "yy"]]
expected = Table.from_rows ["foo", "bar 0", "baz"] [["x", "q", "y"], ["xx", "Q", "yy"]]
expected = Table.from_rows ["foo", "bar 1", "baz"] [["x", "q", "y"], ["xx", "Q", "yy"]]
actual = t.parse_to_columns "bar" "q(q)" case_sensitivity=Case_Sensitivity.Insensitive
actual.should_equal expected

Test.specify "no post-parsing" <|
t = Table.from_rows ["foo", "bar", "baz"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", "1", "2", "y"], ["x", "3", "4", "y"], ["x", "5", "6", "y"], ["xx", "4", "8", "yy"], ["xx", "5", "9", "yy"]]
expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", "1", "2", "y"], ["x", "3", "4", "y"], ["x", "5", "6", "y"], ["xx", "4", "8", "yy"], ["xx", "5", "9", "yy"]]
actual = t.parse_to_columns "bar" "(\d)(\d)" parse_values=False
actual.should_equal expected

Test.specify "column name clash" <|
t = Table.from_rows ["foo", "bar", "bar 1"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1_1", "bar 1"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]]
t = Table.from_rows ["foo", "bar", "bar 2"] [["x", "12 34p q56", "y"], ["xx", "a48 59b", "yy"]]
expected = Table.from_rows ["foo", "bar 1", "bar 2_1", "bar 2"] [["x", 1, 2, "y"], ["x", 3, 4, "y"], ["x", 5, 6, "y"], ["xx", 4, 8, "yy"], ["xx", 5, 9, "yy"]]
actual = t.parse_to_columns "bar" "(\d)(\d)"
actual.should_equal expected

Expand All @@ -284,13 +284,13 @@ spec =

Test.specify "empty table, with regex groups" <|
t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0
expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] [["x", "a", "a", "y"]] . take 0
expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", "a", "a", "y"]] . take 0
actual = t.parse_to_columns "bar" "(\d)(\d)"
actual.should_equal expected

Test.specify "empty table, with named and unnamed regex groups" <|
t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0
expected = Table.from_rows ["foo", "quux", "bar 0", "foo_1", "bar 1", "baz"] [["x", "a", "a", "a", "a", "y"]] . take 0
expected = Table.from_rows ["foo", "quux", "bar 1", "foo_1", "bar 2", "baz"] [["x", "a", "a", "a", "a", "y"]] . take 0
actual = t.parse_to_columns "bar" "(?<quux>)(\d)(?<foo>\d)(\d)"
actual.should_equal expected

Expand All @@ -302,13 +302,13 @@ spec =

Test.specify "input with no matches, with regex groups" <|
t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]]
expected = Table.from_rows ["foo", "bar 0", "bar 1", "baz"] []
expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] []
actual = t.parse_to_columns "bar" "(\d)(\d)"
actual.should_equal expected

Test.specify "input with no matches, with named and unnamed regex groups" <|
t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]]
expected = Table.from_rows ["foo", "quux", "bar 0", "foo_1", "bar 1", "baz"] []
expected = Table.from_rows ["foo", "quux", "bar 1", "foo_1", "bar 2", "baz"] []
actual = t.parse_to_columns "bar" "(?<quux>)(\d)(?<foo>\d)(\d)"
actual.should_equal expected

Expand Down

0 comments on commit ff292fe

Please sign in to comment.