From 22e7fbe6e272b87dc6a0790247df686834b8a36a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 5 Apr 2023 17:03:04 +0200 Subject: [PATCH 1/7] s/parse_values/parse/g --- .../Database/0.0.0-dev/src/Data/Table.enso | 4 +- .../Table/0.0.0-dev/src/Data/Table.enso | 12 +- .../src/Formatting/Parse_Values_Spec.enso | 130 +++++++++--------- 3 files changed, 73 insertions(+), 73 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 511f317236f1..4aa39dbdd912 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -1382,8 +1382,8 @@ type Table ## Parsing values is not supported in database tables, the table has to be loaded into memory first with `read`. - parse_values : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table - parse_values columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = + parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table + parse columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = ## Avoid unused arguments warning. We cannot rename arguments to `_`, because we need to keep the API consistent with the in-memory table. _ = [columns, type, format, error_on_missing_columns, on_problems] diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 2a0ea6396106..f596425003dc 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -796,24 +796,24 @@ type Table > Example Parse the first and last columns containing Yes/No values as booleans. - table.parse_values columns=[0, -1] type=Boolean format="Yes|No" + table.parse columns=[0, -1] type=Boolean format="Yes|No" > Example Parse dates in a column in the format `yyyy-MM-dd` (the default format). - table.parse_values "birthday" Date + table.parse "birthday" Date > Example Parse dates in a column in the format `dd/MM/yyyy`. - table.parse_values "birthday" Date 'dd/MM/yyyy' + table.parse "birthday" Date 'dd/MM/yyyy' > Example Parse all columns inferring their types, using `,` as the decimal point for numbers. - table.parse_values format=(Data_Formatter.Value.with_number_formatting decimal_point=',') - parse_values : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table - parse_values self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = ensure_valid_parse_target type <| + table.parse format=(Data_Formatter.Value.with_number_formatting decimal_point=',') + parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table + parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = ensure_valid_parse_target type <| formatter = case format of _ : Text -> Data_Formatter.Value.with_format type format diff --git a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso index 01836f4a46df..8b9bb4198406 100644 --- a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso +++ b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso @@ -11,24 +11,24 @@ import Standard.Test.Extensions import project.Util spec = - Test.group "Table.parse_values" <| + Test.group "Table.parse" <| Test.specify "should correctly parse integers" <| t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]]] - t2 = t1.parse_values type=Integer + t2 = t1.parse type=Integer t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing] Test.specify "should correctly parse decimals" <| t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "12345", Nothing]]] - t2 = t1.parse_values type=Decimal + t2 = t1.parse type=Decimal t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 12345, Nothing] t2.at "ints" . to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "12345.0", "Nothing"] t3 = Table.new [["floats", ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."]]] - t4 = t3.parse_values type=Decimal + t4 = t3.parse type=Decimal t4.at "floats" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345] t5 = Table.new [["floats", [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"]]] - t6 = t5.parse_values type=Decimal + t6 = t5.parse type=Decimal t6.at "floats" . to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111] Test.specify "should warn on leading zeros in numbers, if asked" <| @@ -37,56 +37,56 @@ spec = t1_parsed = [0, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 12345, Nothing] t1_zeros = ["+00", "-00", "+01", "-01", "01", "000", "0010"] - t3 = t1.parse_values type=Integer + t3 = t1.parse type=Integer t3.at "ints" . to_vector . should_equal t1_parsed Problems.get_attached_warnings t3 . should_equal [Leading_Zeros.Error "ints" Integer t1_zeros] - t4 = t1.parse_values type=Decimal + t4 = t1.parse type=Decimal t4.at "ints" . to_vector . should_equal t1_parsed Problems.get_attached_warnings t4 . should_equal [Leading_Zeros.Error "ints" Decimal t1_zeros] - t5 = t2.parse_values type=Decimal + t5 = t2.parse type=Decimal t5.at "floats" . to_vector . should_equal [0.0, 0.0, Nothing, Nothing, Nothing, 1.0] Problems.get_attached_warnings t5 . should_equal [Leading_Zeros.Error "floats" Decimal ["00.", "01.0", '-0010.0000']] opts = Data_Formatter.Value allow_leading_zeros=True t1_parsed_zeros = [0, 0, 0, 1, -1, 1, 0, 10, 12345, Nothing] - t6 = t1.parse_values format=opts type=Integer + t6 = t1.parse format=opts type=Integer t6.at "ints" . to_vector . should_equal t1_parsed_zeros Problems.assume_no_problems t6 - t7 = t1.parse_values format=opts type=Decimal + t7 = t1.parse format=opts type=Decimal t7.at "ints" . to_vector . should_equal t1_parsed_zeros Problems.assume_no_problems t7 - t8 = t2.parse_values format=opts type=Decimal + t8 = t2.parse format=opts type=Decimal t8.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.0, -10.0, 1.0] Problems.assume_no_problems t8 Test.specify "should correctly parse booleans" <| t1 = Table.new [["bools", ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"]]] - t2 = t1.parse_values type=Boolean + t2 = t1.parse type=Boolean t2.at "bools" . to_vector . should_equal [True, False, True, True, False, Nothing, False] t3 = Table.new [["bools", ["1", "0", "true", "yes", "oui", "no", "NO!"]]] - t4 = t3.parse_values type=Boolean format="yes|no" + t4 = t3.parse type=Boolean format="yes|no" t4.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, True, Nothing, False, Nothing] Test.specify "should correctly parse date and time" <| t1 = Table.new [["dates", ["2022-05-07", "2000-01-01", "2010-12-31"]]] - t2 = t1.parse_values type=Date + t2 = t1.parse type=Date t2.at "dates" . to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31] t3 = Table.new [["datetimes", ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56"]]] - t4 = t3.parse_values type=Date_Time + t4 = t3.parse type=Date_Time t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56] t5 = Table.new [["times", ["23:59:59", "00:00:00", "12:34:56"]]] - t6 = t5.parse_values type=Time_Of_Day + t6 = t5.parse type=Time_Of_Day t6.at "times" . to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56] t7 = Table.new [["dates", ["07/05/2022", "01/01/2001", "31/12/2010"]]] - t8 = t7.parse_values type=Date format="dd/MM/yyyy" + t8 = t7.parse type=Date format="dd/MM/yyyy" t8.at "dates" . value_type . should_equal Value_Type.Date t8.at "dates" . to_vector . should_equal [Date.new 2022 5 7, Date.new 2001 1 1, Date.new 2010 12 31] @@ -94,15 +94,15 @@ spec = opts = Data_Formatter.Value date_formats=["d.M.y", "d MMM y[ G]", "E, d MMM y"] datetime_formats=["yyyy-MM-dd'T'HH:mm:ss", "dd/MM/yyyy HH:mm"] time_formats=["H:mm:ss.n", "h:mma"] t1 = Table.new [["dates", ["1.2.476", "10 Jan 1900 AD", "Tue, 3 Jun 2008"]]] - t2 = t1.parse_values format=opts type=Date + t2 = t1.parse format=opts type=Date t2.at "dates" . to_vector . should_equal [Date.new 476 2 1, Date.new 1900 1 10, Date.new 2008 6 3] t3 = Table.new [["datetimes", ["2011-12-03T10:15:30", "31/12/2012 22:33"]]] - t4 = t3.parse_values format=opts type=Date_Time + t4 = t3.parse format=opts type=Date_Time t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2011 12 3 10 15 30, Date_Time.new 2012 12 31 22 33] t5 = Table.new [["times", ["1:02:03.987654321", "1:30PM"]]] - t6 = t5.parse_values format=opts type=Time_Of_Day + t6 = t5.parse format=opts type=Time_Of_Day t6.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3 nanosecond=987654321, Time_Of_Day.new 13 30 0 0] Test.specify "should warn when cells do not fit the expected format" <| @@ -113,43 +113,43 @@ spec = times = ["2001-01-01", "2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"] t = Table.new [ints, floats, bools, ["times", times]] - t0 = t.parse_values type=Boolean + t0 = t.parse type=Boolean t0.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing] t0.at "ints" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] Problems.expect_warning (Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]) t0 Problems.expect_warning (Invalid_Format.Error "ints" Boolean ["0", "1", "1.0", "foobar", "", "--1", "+-1", "10", "-+1"]) t0 - a1 = t.parse_values columns=["ints"] type=Integer on_problems=_ + a1 = t.parse columns=["ints"] type=Integer on_problems=_ t1 t = t.at "ints" . to_vector . should_equal [0, 1, Nothing, Nothing, Nothing, Nothing, Nothing, 10, Nothing] p1 = [Invalid_Format.Error "ints" Integer ["1.0", "foobar", "", "--1", "+-1", "-+1"]] Problems.test_problem_handling a1 p1 t1 - a2 = t.parse_values columns=["floats"] type=Decimal on_problems=_ + a2 = t.parse columns=["floats"] type=Decimal on_problems=_ t2 t = t.at "floats" . to_vector . should_equal [0, 2, Nothing, Nothing, Nothing, Nothing, Nothing, 100, Nothing] p2 = [Invalid_Format.Error "floats" Decimal ["1e6", "foobar", "", "--1", "+-1", "-+1"]] Problems.test_problem_handling a2 p2 t2 - a3 = t.parse_values columns=["bools"] type=Boolean on_problems=_ + a3 = t.parse columns=["bools"] type=Boolean on_problems=_ t3 t = t.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing] p3 = [Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]] Problems.test_problem_handling a3 p3 t3 - a4 = t.parse_values columns=["times"] type=Date on_problems=_ + a4 = t.parse columns=["times"] type=Date on_problems=_ t4 t = t.at "times" . to_vector . should_equal [Date.new 2001 1 1, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] p4 = [Invalid_Format.Error "times" Date ["2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] Problems.test_problem_handling a4 p4 t4 - a5 = t.parse_values columns=["times"] type=Date_Time on_problems=_ + a5 = t.parse columns=["times"] type=Date_Time on_problems=_ t5 t = t.at "times" . to_vector . should_equal [Nothing, Date_Time.new 2001 1 1 12 34 56, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] p5 = [Invalid_Format.Error "times" Date_Time ["2001-01-01", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] Problems.test_problem_handling a5 p5 t5 - a6 = t.parse_values columns=["times"] type=Time_Of_Day on_problems=_ + a6 = t.parse columns=["times"] type=Time_Of_Day on_problems=_ t6 t = t.at "times" . to_vector . should_equal [Nothing, Nothing, Time_Of_Day.new 10 0 10 0, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] p6 = [Invalid_Format.Error "times" Time_Of_Day ["2001-01-01", "2001-01-01 12:34:56", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] @@ -157,7 +157,7 @@ spec = Test.specify "should leave not selected columns unaffected" <| t1 = Table.new [["A", ["1", "2"]], ["B", ["3", "4"]]] - t2 = t1.parse_values columns="B" + t2 = t1.parse columns="B" t2.at "A" . to_vector . should_equal ["1", "2"] t2.at "B" . to_vector . should_equal [3, 4] @@ -174,7 +174,7 @@ spec = c10 = ["mixeddates", ["2022-10-01", "2000-01-01 01:02:03", "01:02:03", Nothing]] c11 = ["text+ints", ["1", "2", " foobar", Nothing]] t = Table.new [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11] - t2 = t.parse_values + t2 = t.parse Problems.assume_no_problems t2 t2.at "ints" . to_vector . should_equal [1, 2, -123, Nothing] @@ -191,19 +191,19 @@ spec = t2.at "text+ints" . to_vector . should_equal ["1", "2", "foobar", Nothing] # In Auto mode, integers take precedence over booleans. - t3 = Table.new [["bools", ["1", "0", "True"]], ["ints", ["1", "0", "0"]]] . parse_values format=(Data_Formatter.Value true_values=["1", "True"] false_values=["0", "False"]) + t3 = Table.new [["bools", ["1", "0", "True"]], ["ints", ["1", "0", "0"]]] . parse format=(Data_Formatter.Value true_values=["1", "True"] false_values=["0", "False"]) t3.at "bools" . to_vector . should_equal [True, False, True] t3.at "ints" . to_vector . should_equal [1, 0, 0] - t4 = Table.new [c2] . parse_values format=(Data_Formatter.Value allow_leading_zeros=True) + t4 = Table.new [c2] . parse format=(Data_Formatter.Value allow_leading_zeros=True) t4 . at "ints0" . to_vector . should_equal [1, 2, Nothing, -1] - t5 = t.parse_values columns="ints" type=Decimal + t5 = t.parse columns="ints" type=Decimal t5.at "ints" . to_vector . should_equal [1.0, 2.0, -123.0, Nothing] # `ints` are requested to be parsed as decimals. t5.at "ints" . to_vector . first . should_be_a Decimal - t6 = t.parse_values columns=["floats", "text+ints"] type=Auto + t6 = t.parse columns=["floats", "text+ints"] type=Auto # `floats` are auto-detected as decimals. t6.at "floats" . to_vector . should_equal [1.0, 2.2, Nothing, -1.0] # `text+ints` is attempted to be parsed (hence whitespace is stripped), but it only fits the text type. @@ -214,25 +214,25 @@ spec = Test.specify "should allow to specify a thousands separator and a custom decimal point" <| opts = Data_Formatter.Value decimal_point=',' thousand_separator='_' t1 = Table.new [["floats", ["0,0", "+0,0", "-0,0", "+1,5", "-1,2", "1,0", "0,0000", "10_000,", ",0"]]] - t2 = t1.parse_values format=opts + t2 = t1.parse format=opts t2.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.5, -1.2, 1.0, 0.0, 10000.0, 0.0] t3 = Table.new [["xs", ["1,2", "1.3", "_0", "0_", "1_0_0"]]] - t4 = t3.parse_values format=opts type=Decimal + t4 = t3.parse format=opts type=Decimal t4.at "xs" . to_vector . should_equal [1.2, Nothing, Nothing, Nothing, 100.0] Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "xs" Decimal ["1.3", "_0", "0_"]] - t5 = t3.parse_values format=opts type=Integer + t5 = t3.parse format=opts type=Integer t5.at "xs" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, 100] Problems.get_attached_warnings t5 . should_equal [Invalid_Format.Error "xs" Integer ["1,2", "1.3", "_0", "0_"]] Test.specify "should allow to specify custom values for booleans" <| opts_1 = Data_Formatter.Value true_values=["1", "YES"] false_values=["0"] t1 = Table.new [["bools", ["1", "0", "YES", "1", "0"]]] - t2 = t1.parse_values format=opts_1 + t2 = t1.parse format=opts_1 t2.at "bools" . to_vector . should_equal [True, False, True, True, False] t3 = Table.new [["bools", ["1", "NO", "False", "True", "YES", "no", "oui", "0"]]] - t4 = t3.parse_values format=opts_1 type=Boolean + t4 = t3.parse format=opts_1 type=Boolean t4.at "bools" . to_vector . should_equal [True, Nothing, Nothing, Nothing, True, Nothing, Nothing, False] Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "bools" Boolean ["NO", "False", "True", "no", "oui"]] @@ -246,53 +246,53 @@ spec = Table.new [ints, floats, bools, dates, datetimes, times] Test.specify "should trim input values by default" <| - t1 = whitespace_table.parse_values columns="ints" type=Integer + t1 = whitespace_table.parse columns="ints" type=Integer t1.at "ints" . to_vector . should_equal [0, 1, Nothing, 2] Problems.expect_only_warning (Invalid_Format.Error "ints" Integer ["0 1"]) t1 - t2 = whitespace_table.parse_values columns="floats" type=Decimal + t2 = whitespace_table.parse columns="floats" type=Decimal t2.at "floats" . to_vector . should_equal [0.0, 2.0, Nothing, 10.0] Problems.expect_only_warning (Invalid_Format.Error "floats" Decimal ["- 1"]) t2 - t3 = whitespace_table.parse_values columns="bools" type=Boolean + t3 = whitespace_table.parse columns="bools" type=Boolean t3.at "bools" . to_vector . should_equal [True, False, Nothing, False] Problems.expect_only_warning (Invalid_Format.Error "bools" Boolean ["t rue"]) t3 - t4 = whitespace_table.parse_values columns="dates" type=Date + t4 = whitespace_table.parse columns="dates" type=Date t4.at "dates" . to_vector . should_equal [Date.new 2022 1 1, Date.new 2022 7 17, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "dates" Date ["2022 - 07 - 17", ""]) t4 - t5 = whitespace_table.parse_values columns="datetimes" type=Date_Time + t5 = whitespace_table.parse columns="datetimes" type=Date_Time t5.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 1 1 11 59, Nothing, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "datetimes" Date_Time ["2022 - 07 - 17 1:2:3", "2022-01-01 11:59:00"]) t5 - t6 = whitespace_table.parse_values columns="times" type=Time_Of_Day + t6 = whitespace_table.parse columns="times" type=Time_Of_Day t6.at "times" . to_vector . should_equal [Time_Of_Day.new 11 0 0, Time_Of_Day.new, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "times" Time_Of_Day ["00 : 00 : 00"]) t6 Test.specify "should fail to parse if whitespace is present and trimming is turned off" <| opts = Data_Formatter.Value trim_values=False - t1 = whitespace_table.parse_values format=opts columns="ints" type=Integer + t1 = whitespace_table.parse format=opts columns="ints" type=Integer t1.at "ints" . to_vector . should_equal [0, Nothing, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "ints" Integer ["1 ", "0 1", " 2"]) t1 - t2 = whitespace_table.parse_values format=opts columns="floats" type=Decimal + t2 = whitespace_table.parse format=opts columns="floats" type=Decimal t2.at "floats" . to_vector . should_equal [Nothing, Nothing, Nothing, 10.0] Problems.expect_only_warning (Invalid_Format.Error "floats" Decimal ["0 ", " 2.0", "- 1"]) t2 - t3 = whitespace_table.parse_values format=opts columns="bools" type=Boolean + t3 = whitespace_table.parse format=opts columns="bools" type=Boolean t3.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, False] Problems.expect_only_warning (Invalid_Format.Error "bools" Boolean ["True ", " false", "t rue"]) t3 - t4 = whitespace_table.parse_values format=opts columns="dates" type=Date + t4 = whitespace_table.parse format=opts columns="dates" type=Date t4.at "dates" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "dates" Date [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""]) t4 - t5 = whitespace_table.parse_values format=opts columns="datetimes" type=Date_Time + t5 = whitespace_table.parse format=opts columns="datetimes" type=Date_Time t5.at "datetimes" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "datetimes" Date_Time [" 2022-01-01 11:59:00 ", "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"]) t5 - t6 = whitespace_table.parse_values format=opts columns="times" type=Time_Of_Day + t6 = whitespace_table.parse format=opts columns="times" type=Time_Of_Day t6.at "times" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "times" Time_Of_Day ["11:00:00 ", " 00:00:00", "00 : 00 : 00"]) t6 @@ -301,7 +301,7 @@ spec = c2 = [" 1.0 ", "2.2", Nothing, "-1.0"] c3 = ["true", " False", Nothing, "True"] t = Table.new [["ints", c1], ["floats", c2], ["bools", c3]] - t2 = t.parse_values format=(Data_Formatter.Value trim_values=False) + t2 = t.parse format=(Data_Formatter.Value trim_values=False) Warning.get_all t2 . should_equal [] t2.at "ints" . to_vector . should_equal c1 @@ -310,7 +310,7 @@ spec = Test.specify "should allow selecting columns by regex" <| t1 = Table.new [["An", ["1", "2", "3"]], ["Am", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]] - r1 = t1.parse_values columns=[Column_Selector.By_Name "A.*" use_regex=True] + r1 = t1.parse columns=[Column_Selector.By_Name "A.*" use_regex=True] r1.at "An" . to_vector . should_equal [1, 2, 3] r1.at "Am" . to_vector . should_equal [4, 5, 6] r1.at "C" . to_vector . should_equal ["7", "8", "9"] @@ -318,15 +318,15 @@ spec = Test.specify "should correctly handle problems: missing input columns" <| t1 = Table.new [["A", ["1", "2", "3"]]] - r1 = t1.parse_values columns=["A", "B", "C", "E"] on_problems=Problem_Behavior.Ignore + r1 = t1.parse columns=["A", "B", "C", "E"] on_problems=Problem_Behavior.Ignore r1.should_fail_with Missing_Input_Columns r1.catch.criteria . should_equal ["B", "C", "E"] - r2 = t1.parse_values columns=[Column_Selector.By_Name "A.+" use_regex=True] + r2 = t1.parse columns=[Column_Selector.By_Name "A.+" use_regex=True] r2.should_fail_with Missing_Input_Columns r2.catch.criteria . should_equal ["A.+"] - action = t1.parse_values columns=["A", "B", "C", "E"] error_on_missing_columns=False on_problems=_ + action = t1.parse columns=["A", "B", "C", "E"] error_on_missing_columns=False on_problems=_ tester table = table.at "A" . to_vector . should_equal [1, 2, 3] problems = [Missing_Input_Columns.Error ["B", "C", "E"]] @@ -334,11 +334,11 @@ spec = Test.specify "should correctly handle problems: out of bounds indices" <| t1 = Table.new [["A", ["1", "2", "3"]]] - r1 = t1.parse_values columns=[0, -1, 42, -5] + r1 = t1.parse columns=[0, -1, 42, -5] r1.should_fail_with Column_Indexes_Out_Of_Range r1.catch.indexes . should_equal [42, -5] - action = t1.parse_values columns=[0, -1, 42, -5] error_on_missing_columns=False on_problems=_ + action = t1.parse columns=[0, -1, 42, -5] error_on_missing_columns=False on_problems=_ tester table = table.at "A" . to_vector . should_equal [1, 2, 3] problems = [Column_Indexes_Out_Of_Range.Error [42, -5]] @@ -346,7 +346,7 @@ spec = Test.specify "should allow mixed column selectors" <| t1 = Table.new [["Am", ["1", "2", "3"]], ["B", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]] - r1 = t1.parse_values columns=[(Column_Selector.By_Name "A.*" use_regex=True), -2, "D"] + r1 = t1.parse columns=[(Column_Selector.By_Name "A.*" use_regex=True), -2, "D"] r1.at "Am" . to_vector . should_equal [1, 2, 3] r1.at "B" . to_vector . should_equal ["4", "5", "6"] r1.at "C" . to_vector . should_equal [7, 8, 9] @@ -354,7 +354,7 @@ spec = Test.specify "should handle edge-cases: overlapping selectors" <| t1 = Table.new [["Am", ["1", "2", "3"]], ["B", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]] - r1 = t1.parse_values columns=[(Column_Selector.By_Name "A.*" use_regex=True), 0, "D", -1, -1, 0, 3] + r1 = t1.parse columns=[(Column_Selector.By_Name "A.*" use_regex=True), 0, "D", -1, -1, 0, 3] r1.at "Am" . to_vector . should_equal [1, 2, 3] r1.at "B" . to_vector . should_equal ["4", "5", "6"] r1.at "C" . to_vector . should_equal ["7", "8", "9"] @@ -362,29 +362,29 @@ spec = Test.specify "should error if invalid target type is provided" <| t1 = Table.new [["A", ["1", "2", "3"]]] - t1.parse_values type=Nothing . should_fail_with Illegal_Argument + t1.parse type=Nothing . should_fail_with Illegal_Argument Test.specify "should error if the input column is not text" <| t1 = Table.new [["A", [1, 2, 3]], ["B", ["4", "5", "6"]], ["C", [7, 8, 9]], ["D", ["10", "11", "12"]]] - r1 = t1.parse_values columns=["A", "B", "C"] + r1 = t1.parse columns=["A", "B", "C"] r1.should_fail_with Invalid_Value_Type r1.catch.related_column . should_equal "A" r1.catch.expected.is_text.should_be_true Test.specify "should error if no input columns selected, unless error_on_missing_columns=False" <| t1 = Table.new [["A", ["1", "2", "3"]]] - r1 = t1.parse_values columns=[] + r1 = t1.parse columns=[] r1.should_fail_with No_Input_Columns_Selected - r2 = t1.parse_values columns=[] error_on_missing_columns=False + r2 = t1.parse columns=[] error_on_missing_columns=False r2 . should_equal t1 Problems.expect_warning No_Input_Columns_Selected r2 - r3 = t1.parse_values columns=[] error_on_missing_columns=False on_problems=Problem_Behavior.Ignore + r3 = t1.parse columns=[] error_on_missing_columns=False on_problems=Problem_Behavior.Ignore r3 . should_equal t1 Problems.assume_no_problems r3 - r4 = t1.parse_values columns=["nonexistent column :D", -42] error_on_missing_columns=False on_problems=Problem_Behavior.Report_Warning + r4 = t1.parse columns=["nonexistent column :D", -42] error_on_missing_columns=False on_problems=Problem_Behavior.Report_Warning r4 . should_equal t1 Problems.expect_warning No_Input_Columns_Selected r4 Problems.expect_warning (Missing_Input_Columns.Error ["nonexistent column :D"]) r4 From 087dfb556944999cb38dccd5ed19ad1cda476457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 5 Apr 2023 17:39:55 +0200 Subject: [PATCH 2/7] Switch the type argument from a sum type into Value_Type --- .../Database/0.0.0-dev/src/Data/Column.enso | 2 +- .../Database/0.0.0-dev/src/Data/Table.enso | 2 +- .../Table/0.0.0-dev/src/Data/Column.enso | 7 +++-- .../0.0.0-dev/src/Data/Data_Formatter.enso | 29 ++++++++++--------- .../Table/0.0.0-dev/src/Data/Table.enso | 8 ++--- .../src/Internal/Widget_Helpers.enso | 6 ++-- 6 files changed, 30 insertions(+), 24 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 878f259ec5ff..42afd4a5a820 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -902,7 +902,7 @@ type Column ## Parsing values is not supported in database columns. @type Widget_Helpers.parse_type_selector - parse : (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Problem_Behavior -> Column + parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column parse self type=Auto format=Data_Formatter.Value on_problems=Report_Warning = _ = [type, format, on_problems] Error.throw <| Unsupported_Database_Operation.Error "`Column.parse` is not implemented yet for the Database backends." diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 4aa39dbdd912..f6a8a4fd9d83 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -1382,7 +1382,7 @@ type Table ## Parsing values is not supported in database tables, the table has to be loaded into memory first with `read`. - parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table + parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table parse columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = ## Avoid unused arguments warning. We cannot rename arguments to `_`, because we need to keep the API consistent with the in-memory table. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 86b6032c36a1..5d9f3929ca80 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -1029,15 +1029,16 @@ type Column example_contains = Examples.text_column_1.parse Boolean 'Yes|No' @type Widget_Helpers.parse_type_selector - parse : (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Problem_Behavior -> Column + parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column parse self type=Auto format=Data_Formatter.Value on_problems=Report_Warning = Value_Type.expect_text self.value_type related_column=self.name <| ensure_valid_parse_target type <| formatter = case format of _ : Text -> Data_Formatter.Value.with_format type format - _ -> format + _ : Data_Formatter -> format + _ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter.") - parser = if type == Auto then formatter.make_auto_parser else formatter.make_datatype_parser type + parser = formatter.make_datatype_parser type storage = self.java_column.getStorage new_storage_and_problems = parser.parseColumn self.name storage diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso index 6e845183efc1..e42e3bf98364 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso @@ -67,16 +67,15 @@ type Data_Formatter Arguments: - text: Text value to parse. - - datatype: Text value to parse. + - value_type: The type to parse the value into. If set to `Auto`, the + type will be inferred automatically. - on_problems: Specifies the behavior when a problem occurs. By default, a warning is issued, but the operation proceeds. If set to `Report_Error`, the operation fails with a dataflow error. If set to `Ignore`, the operation proceeds without errors or warnings. - parse : Text -> (Auto|Integer|Number|Date|Date_Time|Time_Of_Day|Boolean) -> Problem_Behavior -> Any - parse self text datatype=Auto on_problems=Problem_Behavior.Report_Warning = - parser = case datatype of - Auto -> self.make_auto_parser - _ -> self.make_datatype_parser datatype + parse : Text -> Value_Type | Auto -> Problem_Behavior -> Any + parse self text value_type=Auto on_problems=Problem_Behavior.Report_Warning = + parser = self.make_datatype_parser datatype result = parser.parseIndependentValue text problems = Vector.from_polyglot_array result.problems . map (Parse_Values_Helper.translate_parsing_problem datatype) on_problems.attach_problems_after result.value problems @@ -210,13 +209,17 @@ type Data_Formatter ## PRIVATE make_datatype_parser self datatype = case datatype of - Integer -> self.make_integer_parser - Decimal -> self.make_decimal_parser - Boolean -> self.make_boolean_parser - Date -> self.make_date_parser - Date_Time -> self.make_date_time_parser - Time_Of_Day -> self.make_time_of_day_parser - _ -> Error.throw (Illegal_Argument.Error "Unsupported datatype: "+datatype.to_text) + # TODO once we implement #5159 we will need to add checks for bounds here and support 16/32-bit ints + Value_Type.Integer Bits.Bits_64 -> self.make_integer_parser + # TODO once we implement #6109 we can support 32-bit floats + Value_Type.Float Bits.Bits_64 -> self.make_decimal_parser + Value_Type.Boolean -> self.make_boolean_parser + Value_Type.Date -> self.make_date_parser + Value_Type.Date_Time True -> self.make_date_time_parser + Value_Type.Time -> self.make_time_of_day_parser + Auto -> self.make_auto_parser + _ -> + Error.throw (Illegal_Argument.Error "Unsupported value type: "+datatype.to_display_text) ## PRIVATE get_specific_type_parsers self = diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index f596425003dc..a9fcf05a9277 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -812,15 +812,15 @@ type Table Parse all columns inferring their types, using `,` as the decimal point for numbers. table.parse format=(Data_Formatter.Value.with_number_formatting decimal_point=',') - parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table + parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = ensure_valid_parse_target type <| formatter = case format of _ : Text -> Data_Formatter.Value.with_format type format - _ -> format + _ : Data_Formatter -> format + _ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter.") - parser = if type == Auto then formatter.make_auto_parser else - formatter.make_datatype_parser type + parser = formatter.make_datatype_parser type select_problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns selected_columns = self.columns_helper.select_columns_helper columns reorder=True select_problem_builder diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso index 1ed7844e213f..c6274fca3cb7 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso @@ -16,8 +16,10 @@ make_column_name_selector table display=Display.Always = Selector for type argument on `Column.parse`. parse_type_selector : Single_Choice parse_type_selector = - choice = ['Auto', 'Integer', 'Decimal', 'Date', 'Date_Time', 'Time_Of_Day', 'Boolean'] - Single_Choice display=Display.Always values=(choice.map n->(Option n)) + choice = ['Auto', 'Value_Type.Integer', 'Value_Type.Float', 'Value_Type.Date', 'Value_Type.Date_Time', 'Value_Type.Time', 'Value_Type.Boolean'] + names = ['Auto', 'Integer', 'Float', 'Date', 'Date_Time', 'Time', 'Boolean'] + options = names.zip choice . map pair-> Option pair.first pair.second + Single_Choice display=Display.Always values=options ## PRIVATE Selector for type argument on `Column.parse`. From 866e20d8deaae3cf3e0765359a8d80ab298dca19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 5 Apr 2023 18:09:40 +0200 Subject: [PATCH 3/7] Missing Data_Formatter.with_format alignment, align Parse Values test --- .../0.0.0-dev/src/Data/Data_Formatter.enso | 22 +-- .../Standard/Table/0.0.0-dev/src/Errors.enso | 9 +- .../src/Formatting/Parse_Values_Spec.enso | 159 ++++++++++-------- 3 files changed, 103 insertions(+), 87 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso index e42e3bf98364..24d9b63d22f7 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso @@ -144,23 +144,25 @@ type Data_Formatter It is mostly a convenience function to easily specify a datatype format. Arguments: - - type: The datatype for which to change the format. The format can be - changed only for Date_Time, Date, Time_Of_Day and Boolean types. + - type: The value type for which to change the format. The format can be + changed only for `Date_Time`, `Date`, `Time` and `Boolean` value types. - format: The new format string to set. For dates, it is the usual date format notation, and for booleans it should be two values that represent true and false, separated by a `|`. - with_format : (Auto|Integer|Number|Date|Date_Time|Time_Of_Day|Boolean) -> Text -> Data_Formatter + with_format : Value_Type | Auto -> Text -> Data_Formatter with_format self type format = case type of - Auto -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Auto`.") - Integer -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Integer`.") - Decimal -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Decimal`.") - Date -> self.with_datetime_formats date_formats=[format] - Date_Time -> self.with_datetime_formats datetime_formats=[format] - Time_Of_Day -> self.with_datetime_formats time_formats=[format] - Boolean -> + Value_Type.Date -> self.with_datetime_formats date_formats=[format] + Value_Type.Time -> self.with_datetime_formats time_formats=[format] + Value_Type.Date_Time _ -> + self.with_datetime_formats datetime_formats=[format] + Value_Type.Boolean -> formats = format.split "|" if formats.length != 2 then Error.throw (Illegal_Argument.Error "The `format` for Booleans must be a string with two values separated by `|`, for example: 'Yes|No'.") else self.with_boolean_values true_values=[formats.at 0] false_values=[formats.at 1] + Auto -> + Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Auto`.") + _ : Value_Type -> + Error.throw (Illegal_Argument.Error "Cannot specify a `format` for type `"+type.to_text+"`.") ## PRIVATE Clone the instance with some properties overridden. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso index c55c222c9541..fb03f1fc6980 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso @@ -1,6 +1,7 @@ from Standard.Base import all import Standard.Table.Data.Expression.Expression_Error +import Standard.Table.Data.Type.Value_Type.Value_Type polyglot java import org.enso.table.error.ColumnCountMismatchException polyglot java import org.enso.table.error.ColumnNameMismatchException @@ -243,18 +244,18 @@ type Invalid_Location Arguments: - column: the column in which the problematic cells appeared, if applicable. It may be empty if the value is parsed outside of a context of a column. - - datatype: The expected datatype. + - value_type: The expected value type. - cells: Contents of the cells that did not match the expected datatype format. type Invalid_Format ## PRIVATE - Error column:(Text|Nothing) (datatype:(Integer|Number|Date|Time|Time_Of_Day|Boolean)) (cells:[Text]) + Error column:(Text|Nothing) (value_type:Value_Type) (cells:[Text]) ## PRIVATE Pretty print the invalid format error. to_display_text : Text to_display_text self = - self.cells.length+" cells in column "+self.column+" had invalid format for datatype "+self.datatype.to_text+"." + self.cells.length+" cells in column "+self.column+" had invalid format for datatype "+self.value_type.to_text+"." ## Indicates that some values contained leading zeros even though these were not allowed. @@ -270,7 +271,7 @@ type Leading_Zeros ## PRIVATE Pretty print the leading zeros error. to_display_text : Text - to_display_text self = "Leading zeros in column "+self.column+" with datatype "+self.datatype.to_text+"." + to_display_text self = "Leading zeros in column "+self.column+" with datatype "+self.value_type.to_text+"." ## Indicates that an empty file was encountered, so no data could be loaded. type Empty_File_Error diff --git a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso index 8b9bb4198406..e1dc8f1a2c71 100644 --- a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso +++ b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso @@ -14,21 +14,21 @@ spec = Test.group "Table.parse" <| Test.specify "should correctly parse integers" <| t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]]] - t2 = t1.parse type=Integer + t2 = t1.parse type=Value_Type.Integer t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing] Test.specify "should correctly parse decimals" <| t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "12345", Nothing]]] - t2 = t1.parse type=Decimal + t2 = t1.parse type=Value_Type.Float t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 12345, Nothing] t2.at "ints" . to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "12345.0", "Nothing"] t3 = Table.new [["floats", ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."]]] - t4 = t3.parse type=Decimal + t4 = t3.parse type=Value_Type.Float t4.at "floats" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345] t5 = Table.new [["floats", [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"]]] - t6 = t5.parse type=Decimal + t6 = t5.parse type=Value_Type.Float t6.at "floats" . to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111] Test.specify "should warn on leading zeros in numbers, if asked" <| @@ -37,56 +37,56 @@ spec = t1_parsed = [0, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 12345, Nothing] t1_zeros = ["+00", "-00", "+01", "-01", "01", "000", "0010"] - t3 = t1.parse type=Integer + t3 = t1.parse type=Value_Type.Integer t3.at "ints" . to_vector . should_equal t1_parsed Problems.get_attached_warnings t3 . should_equal [Leading_Zeros.Error "ints" Integer t1_zeros] - t4 = t1.parse type=Decimal + t4 = t1.parse type=Value_Type.Float t4.at "ints" . to_vector . should_equal t1_parsed - Problems.get_attached_warnings t4 . should_equal [Leading_Zeros.Error "ints" Decimal t1_zeros] + Problems.get_attached_warnings t4 . should_equal [Leading_Zeros.Error "ints" Value_Type.Float t1_zeros] - t5 = t2.parse type=Decimal + t5 = t2.parse type=Value_Type.Float t5.at "floats" . to_vector . should_equal [0.0, 0.0, Nothing, Nothing, Nothing, 1.0] - Problems.get_attached_warnings t5 . should_equal [Leading_Zeros.Error "floats" Decimal ["00.", "01.0", '-0010.0000']] + Problems.get_attached_warnings t5 . should_equal [Leading_Zeros.Error "floats" Value_Type.Float ["00.", "01.0", '-0010.0000']] opts = Data_Formatter.Value allow_leading_zeros=True t1_parsed_zeros = [0, 0, 0, 1, -1, 1, 0, 10, 12345, Nothing] - t6 = t1.parse format=opts type=Integer + t6 = t1.parse format=opts type=Value_Type.Integer t6.at "ints" . to_vector . should_equal t1_parsed_zeros Problems.assume_no_problems t6 - t7 = t1.parse format=opts type=Decimal + t7 = t1.parse format=opts type=Value_Type.Float t7.at "ints" . to_vector . should_equal t1_parsed_zeros Problems.assume_no_problems t7 - t8 = t2.parse format=opts type=Decimal + t8 = t2.parse format=opts type=Value_Type.Float t8.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.0, -10.0, 1.0] Problems.assume_no_problems t8 Test.specify "should correctly parse booleans" <| t1 = Table.new [["bools", ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"]]] - t2 = t1.parse type=Boolean + t2 = t1.parse type=Value_Type.Boolean t2.at "bools" . to_vector . should_equal [True, False, True, True, False, Nothing, False] t3 = Table.new [["bools", ["1", "0", "true", "yes", "oui", "no", "NO!"]]] - t4 = t3.parse type=Boolean format="yes|no" + t4 = t3.parse type=Value_Type.Boolean format="yes|no" t4.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, True, Nothing, False, Nothing] Test.specify "should correctly parse date and time" <| t1 = Table.new [["dates", ["2022-05-07", "2000-01-01", "2010-12-31"]]] - t2 = t1.parse type=Date + t2 = t1.parse type=Value_Type.Date t2.at "dates" . to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31] t3 = Table.new [["datetimes", ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56"]]] - t4 = t3.parse type=Date_Time + t4 = t3.parse type=Value_Type.Date_Time t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56] t5 = Table.new [["times", ["23:59:59", "00:00:00", "12:34:56"]]] - t6 = t5.parse type=Time_Of_Day + t6 = t5.parse type=Value_Type.Time t6.at "times" . to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56] t7 = Table.new [["dates", ["07/05/2022", "01/01/2001", "31/12/2010"]]] - t8 = t7.parse type=Date format="dd/MM/yyyy" + t8 = t7.parse type=Value_Type.Date format="dd/MM/yyyy" t8.at "dates" . value_type . should_equal Value_Type.Date t8.at "dates" . to_vector . should_equal [Date.new 2022 5 7, Date.new 2001 1 1, Date.new 2010 12 31] @@ -94,15 +94,15 @@ spec = opts = Data_Formatter.Value date_formats=["d.M.y", "d MMM y[ G]", "E, d MMM y"] datetime_formats=["yyyy-MM-dd'T'HH:mm:ss", "dd/MM/yyyy HH:mm"] time_formats=["H:mm:ss.n", "h:mma"] t1 = Table.new [["dates", ["1.2.476", "10 Jan 1900 AD", "Tue, 3 Jun 2008"]]] - t2 = t1.parse format=opts type=Date + t2 = t1.parse format=opts type=Value_Type.Date t2.at "dates" . to_vector . should_equal [Date.new 476 2 1, Date.new 1900 1 10, Date.new 2008 6 3] t3 = Table.new [["datetimes", ["2011-12-03T10:15:30", "31/12/2012 22:33"]]] - t4 = t3.parse format=opts type=Date_Time + t4 = t3.parse format=opts type=Value_Type.Date_Time t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2011 12 3 10 15 30, Date_Time.new 2012 12 31 22 33] t5 = Table.new [["times", ["1:02:03.987654321", "1:30PM"]]] - t6 = t5.parse format=opts type=Time_Of_Day + t6 = t5.parse format=opts type=Value_Type.Time t6.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3 nanosecond=987654321, Time_Of_Day.new 13 30 0 0] Test.specify "should warn when cells do not fit the expected format" <| @@ -113,46 +113,46 @@ spec = times = ["2001-01-01", "2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"] t = Table.new [ints, floats, bools, ["times", times]] - t0 = t.parse type=Boolean + t0 = t.parse type=Value_Type.Boolean t0.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing] t0.at "ints" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] Problems.expect_warning (Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]) t0 Problems.expect_warning (Invalid_Format.Error "ints" Boolean ["0", "1", "1.0", "foobar", "", "--1", "+-1", "10", "-+1"]) t0 - a1 = t.parse columns=["ints"] type=Integer on_problems=_ + a1 = t.parse columns=["ints"] type=Value_Type.Integer on_problems=_ t1 t = t.at "ints" . to_vector . should_equal [0, 1, Nothing, Nothing, Nothing, Nothing, Nothing, 10, Nothing] p1 = [Invalid_Format.Error "ints" Integer ["1.0", "foobar", "", "--1", "+-1", "-+1"]] Problems.test_problem_handling a1 p1 t1 - a2 = t.parse columns=["floats"] type=Decimal on_problems=_ + a2 = t.parse columns=["floats"] type=Value_Type.Float on_problems=_ t2 t = t.at "floats" . to_vector . should_equal [0, 2, Nothing, Nothing, Nothing, Nothing, Nothing, 100, Nothing] - p2 = [Invalid_Format.Error "floats" Decimal ["1e6", "foobar", "", "--1", "+-1", "-+1"]] + p2 = [Invalid_Format.Error "floats" Value_Type.Float ["1e6", "foobar", "", "--1", "+-1", "-+1"]] Problems.test_problem_handling a2 p2 t2 - a3 = t.parse columns=["bools"] type=Boolean on_problems=_ + a3 = t.parse columns=["bools"] type=Value_Type.Boolean on_problems=_ t3 t = t.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing] p3 = [Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]] Problems.test_problem_handling a3 p3 t3 - a4 = t.parse columns=["times"] type=Date on_problems=_ + a4 = t.parse columns=["times"] type=Value_Type.Date on_problems=_ t4 t = t.at "times" . to_vector . should_equal [Date.new 2001 1 1, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] p4 = [Invalid_Format.Error "times" Date ["2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] Problems.test_problem_handling a4 p4 t4 - a5 = t.parse columns=["times"] type=Date_Time on_problems=_ + a5 = t.parse columns=["times"] type=Value_Type.Date_Time on_problems=_ t5 t = t.at "times" . to_vector . should_equal [Nothing, Date_Time.new 2001 1 1 12 34 56, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] p5 = [Invalid_Format.Error "times" Date_Time ["2001-01-01", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] Problems.test_problem_handling a5 p5 t5 - a6 = t.parse columns=["times"] type=Time_Of_Day on_problems=_ + a6 = t.parse columns=["times"] type=Value_Type.Time on_problems=_ t6 t = t.at "times" . to_vector . should_equal [Nothing, Nothing, Time_Of_Day.new 10 0 10 0, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] - p6 = [Invalid_Format.Error "times" Time_Of_Day ["2001-01-01", "2001-01-01 12:34:56", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] + p6 = [Invalid_Format.Error "times" Value_Type.Time ["2001-01-01", "2001-01-01 12:34:56", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] Problems.test_problem_handling a6 p6 t6 Test.specify "should leave not selected columns unaffected" <| @@ -198,10 +198,10 @@ spec = t4 = Table.new [c2] . parse format=(Data_Formatter.Value allow_leading_zeros=True) t4 . at "ints0" . to_vector . should_equal [1, 2, Nothing, -1] - t5 = t.parse columns="ints" type=Decimal + t5 = t.parse columns="ints" type=Value_Type.Float t5.at "ints" . to_vector . should_equal [1.0, 2.0, -123.0, Nothing] # `ints` are requested to be parsed as decimals. - t5.at "ints" . to_vector . first . should_be_a Decimal + t5.at "ints" . to_vector . first . should_be_a Value_Type.Float t6 = t.parse columns=["floats", "text+ints"] type=Auto # `floats` are auto-detected as decimals. @@ -218,10 +218,10 @@ spec = t2.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.5, -1.2, 1.0, 0.0, 10000.0, 0.0] t3 = Table.new [["xs", ["1,2", "1.3", "_0", "0_", "1_0_0"]]] - t4 = t3.parse format=opts type=Decimal + t4 = t3.parse format=opts type=Value_Type.Float t4.at "xs" . to_vector . should_equal [1.2, Nothing, Nothing, Nothing, 100.0] - Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "xs" Decimal ["1.3", "_0", "0_"]] - t5 = t3.parse format=opts type=Integer + Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "xs" Value_Type.Float ["1.3", "_0", "0_"]] + t5 = t3.parse format=opts type=Value_Type.Integer t5.at "xs" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, 100] Problems.get_attached_warnings t5 . should_equal [Invalid_Format.Error "xs" Integer ["1,2", "1.3", "_0", "0_"]] @@ -232,7 +232,7 @@ spec = t2.at "bools" . to_vector . should_equal [True, False, True, True, False] t3 = Table.new [["bools", ["1", "NO", "False", "True", "YES", "no", "oui", "0"]]] - t4 = t3.parse format=opts_1 type=Boolean + t4 = t3.parse format=opts_1 type=Value_Type.Boolean t4.at "bools" . to_vector . should_equal [True, Nothing, Nothing, Nothing, True, Nothing, Nothing, False] Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "bools" Boolean ["NO", "False", "True", "no", "oui"]] @@ -246,53 +246,53 @@ spec = Table.new [ints, floats, bools, dates, datetimes, times] Test.specify "should trim input values by default" <| - t1 = whitespace_table.parse columns="ints" type=Integer + t1 = whitespace_table.parse columns="ints" type=Value_Type.Integer t1.at "ints" . to_vector . should_equal [0, 1, Nothing, 2] Problems.expect_only_warning (Invalid_Format.Error "ints" Integer ["0 1"]) t1 - t2 = whitespace_table.parse columns="floats" type=Decimal + t2 = whitespace_table.parse columns="floats" type=Value_Type.Float t2.at "floats" . to_vector . should_equal [0.0, 2.0, Nothing, 10.0] - Problems.expect_only_warning (Invalid_Format.Error "floats" Decimal ["- 1"]) t2 + Problems.expect_only_warning (Invalid_Format.Error "floats" Value_Type.Float ["- 1"]) t2 - t3 = whitespace_table.parse columns="bools" type=Boolean + t3 = whitespace_table.parse columns="bools" type=Value_Type.Boolean t3.at "bools" . to_vector . should_equal [True, False, Nothing, False] Problems.expect_only_warning (Invalid_Format.Error "bools" Boolean ["t rue"]) t3 - t4 = whitespace_table.parse columns="dates" type=Date + t4 = whitespace_table.parse columns="dates" type=Value_Type.Date t4.at "dates" . to_vector . should_equal [Date.new 2022 1 1, Date.new 2022 7 17, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "dates" Date ["2022 - 07 - 17", ""]) t4 - t5 = whitespace_table.parse columns="datetimes" type=Date_Time + t5 = whitespace_table.parse columns="datetimes" type=Value_Type.Date_Time t5.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 1 1 11 59, Nothing, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "datetimes" Date_Time ["2022 - 07 - 17 1:2:3", "2022-01-01 11:59:00"]) t5 - t6 = whitespace_table.parse columns="times" type=Time_Of_Day + t6 = whitespace_table.parse columns="times" type=Value_Type.Time t6.at "times" . to_vector . should_equal [Time_Of_Day.new 11 0 0, Time_Of_Day.new, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "times" Time_Of_Day ["00 : 00 : 00"]) t6 Test.specify "should fail to parse if whitespace is present and trimming is turned off" <| opts = Data_Formatter.Value trim_values=False - t1 = whitespace_table.parse format=opts columns="ints" type=Integer + t1 = whitespace_table.parse format=opts columns="ints" type=Value_Type.Integer t1.at "ints" . to_vector . should_equal [0, Nothing, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "ints" Integer ["1 ", "0 1", " 2"]) t1 - t2 = whitespace_table.parse format=opts columns="floats" type=Decimal + t2 = whitespace_table.parse format=opts columns="floats" type=Value_Type.Float t2.at "floats" . to_vector . should_equal [Nothing, Nothing, Nothing, 10.0] - Problems.expect_only_warning (Invalid_Format.Error "floats" Decimal ["0 ", " 2.0", "- 1"]) t2 + Problems.expect_only_warning (Invalid_Format.Error "floats" Value_Type.Float ["0 ", " 2.0", "- 1"]) t2 - t3 = whitespace_table.parse format=opts columns="bools" type=Boolean + t3 = whitespace_table.parse format=opts columns="bools" type=Value_Type.Boolean t3.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, False] Problems.expect_only_warning (Invalid_Format.Error "bools" Boolean ["True ", " false", "t rue"]) t3 - t4 = whitespace_table.parse format=opts columns="dates" type=Date + t4 = whitespace_table.parse format=opts columns="dates" type=Value_Type.Date t4.at "dates" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "dates" Date [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""]) t4 - t5 = whitespace_table.parse format=opts columns="datetimes" type=Date_Time + t5 = whitespace_table.parse format=opts columns="datetimes" type=Value_Type.Date_Time t5.at "datetimes" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "datetimes" Date_Time [" 2022-01-01 11:59:00 ", "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"]) t5 - t6 = whitespace_table.parse format=opts columns="times" type=Time_Of_Day + t6 = whitespace_table.parse format=opts columns="times" type=Value_Type.Time t6.at "times" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] Problems.expect_only_warning (Invalid_Format.Error "times" Time_Of_Day ["11:00:00 ", " 00:00:00", "00 : 00 : 00"]) t6 @@ -393,87 +393,93 @@ spec = Test.group "Column.parse" <| Test.specify "should correctly parse integers" <| c1 = Column.from_vector "ints" ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing] - c2 = c1.parse Integer + c2 = c1.parse type=Value_Type.Integer c2.name.should_equal c1.name c2 . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing] + c2.value_type.should_equal Value_Type.Integer Problems.expect_warning Leading_Zeros c2 - c3 = c1.parse Integer format=(Data_Formatter.Value.with_number_formatting allow_leading_zeros=True) + c3 = c1.parse type=Value_Type.Integer format=(Data_Formatter.Value.with_number_formatting allow_leading_zeros=True) c3.to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345, Nothing] Problems.assume_no_problems c3 Test.specify "should correctly parse decimals" <| c1 = Column.from_vector "ints" ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing] - c2 = c1.parse Decimal + c2 = c1.parse Value_Type.Float c2.name.should_equal c1.name - c2 . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing] + c2.to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing] + c2.value_type.should_equal Value_Type.Float c2.to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "Nothing", "Nothing", "12345.0", "Nothing"] Problems.expect_warning Leading_Zeros c2 c3 = Column.from_vector "floats" ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."] - c4 = c3.parse Decimal + c4 = c3.parse Value_Type.Float c4.to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345] c4.value_type.is_floating_point.should_be_true Problems.assume_no_problems c4 c5 = Column.from_vector "floats" [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"] - c6 = c5.parse Decimal + c6 = c5.parse Value_Type.Float c6.to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111] Problems.assume_no_problems c6 Test.specify "should correctly parse booleans" <| c1 = Column.from_vector "bools" ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"] - c2 = c1.parse Boolean + c2 = c1.parse type=Value_Type.Boolean c2.name.should_equal c1.name c2.to_vector . should_equal [True, False, True, True, False, Nothing, False] + c2.value_type.should_equal Value_Type.Boolean c1.parse . to_vector . should_equal [True, False, True, True, False, Nothing, False] c3 = Column.from_vector "bools" ["yes", "no", Nothing] - c4 = c3.parse Boolean "yes|no" + c4 = c3.parse type=Value_Type.Boolean "yes|no" c4.to_vector . should_equal [True, False, Nothing] c5 = Column.from_vector "bools" ["true", "yes", "false"] - c6 = c5.parse Boolean + c6 = c5.parse type=Value_Type.Boolean c6.to_vector . should_equal [True, Nothing, False] w = Problems.get_attached_warnings c6 . find w-> w.is_a Invalid_Format w.column.should_equal "bools" - w.datatype . should_equal Boolean + w.value_type . should_equal Value_Type.Boolean w.cells . should_equal ["yes"] Test.specify "should correctly parse date and time" <| c1 = Column.from_vector "date" ["2022-05-07", "2000-01-01", "2010-12-31"] - c2 = c1.parse Date + c2 = c1.parse type=Value_Type.Date c2.to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31] + c2.value_type.should_equal Value_Type.Date c3 = Column.from_vector "datetimes" ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56", "2010-12-31T12:34:56", "2010-12-31 12:34:56.123"] - c4 = c3.parse Date_Time + c4 = c3.parse type=Value_Type.Date_Time c4.to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56, Date_Time.new 2010 12 31 12 34 56, Date_Time.new 2010 12 31 12 34 56 123] + c4.value_type.should_equal Value_Type.Date_Time c5 = Column.from_vector "times" ["23:59:59", "00:00:00", "12:34:56"] - c6 = c5.parse Time_Of_Day + c6 = c5.parse type=Value_Type.Time c6.to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56] + c6.value_type.should_equal Value_Type.Time c7 = Column.from_vector "foo" ["2022-05-07 23:59:59", "42", "2010-12-31"] - c8 = c7.parse Date_Time . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Nothing, Nothing] + c8 = c7.parse type=Value_Type.Date_Time . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Nothing, Nothing] w = Problems.get_attached_warnings c8 . find w-> w.is_a Invalid_Format w.column.should_equal "foo" - w.datatype . should_equal Date_Time + w.value_type . should_equal Value_Type.Date_Time w.cells . should_equal ["42", "2010-12-31"] Test.specify "should correctly parse date and time with format" <| c1 = Column.from_vector "date" ["5/7/2022", "1/1/2000", "12/31/2010"] - c2 = c1.parse Date "M/d/yyyy" + c2 = c1.parse type=Value_Type.Date "M/d/yyyy" c2.to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31] c3 = Column.from_vector "datetimes" ["5/7/2022 23:59:59", "1/1/2000 00:00:00", "12/31/2010 12:34:56"] - c4 = c3.parse Date_Time "M/d/yyyy HH:mm:ss" + c4 = c3.parse type=Value_Type.Date_Time "M/d/yyyy HH:mm:ss" c4.to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56] Test.specify "should handle invalid format strings gracefully" <| c1 = Column.from_vector "date" ["5/7/2022", "1/1/2000", "12/31/2010"] - c1.parse Date "M/d/fqsrf" . should_fail_with Illegal_Argument - c1.parse Time_Of_Day "HH:mm:ss.fff" . should_fail_with Illegal_Argument - c1.parse Date_Time "M/d/fqsrf HH:mm:ss.fff" . should_fail_with Illegal_Argument + c1.parse type=Value_Type.Date "M/d/fqsrf" . should_fail_with Illegal_Argument + c1.parse type=Value_Type.Time "HH:mm:ss.fff" . should_fail_with Illegal_Argument + c1.parse type=Value_Type.Date_Time "M/d/fqsrf HH:mm:ss.fff" . should_fail_with Illegal_Argument Test.specify "should correctly work in Auto mode" <| c1 = Column.from_vector "A" ["1", "2", "3"] @@ -484,21 +490,26 @@ spec = c6 = Column.from_vector "F" ["this is here to ensure the column has type text... can be replaced one we have retyping"] c7 = Column.from_vector "G" ["true", "42"] c8 = Column.from_vector "H" ["text-to-force-value-type-to-be-text", Nothing, Nothing, Nothing] + c8.value_type . should_equal Value_Type.Char r1 = c1.parse r1.to_vector . should_equal [1, 2, 3] + r1.value_type.should_equal Value_Type.Integer Problems.assume_no_problems r1 r2 = c2.parse r2.to_vector . should_equal [1.0, 2.5, 3.0] + r2.value_type.should_equal Value_Type.Float Problems.assume_no_problems r2 r3 = c3.parse r3.to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31] + r3.value_type.should_equal Value_Type.Date Problems.assume_no_problems r3 r4 = c4.parse r4.to_vector . should_equal [True, False, Nothing] + r4.value_type.should_equal Value_Type.Boolean Problems.assume_no_problems r4 r5 = c5.parse @@ -508,25 +519,27 @@ spec = c5.parse format="yes|no" . should_fail_with Illegal_Argument r5_2 = c5.parse format=(Data_Formatter.Value.with_boolean_values ["yes"] ["no"]) r5_2.to_vector . should_equal [True, False] + r5_2.value_type . should_equal Value_Type.Boolean Problems.assume_no_problems r5_2 r6 = (c6.drop 1).parse r6.to_vector . should_equal [] - Test.with_clue "r6.value_type == "+r6.value_type.to_text+"; " <| - r6.value_type.is_text . should_be_true + r6.value_type . should_equal Value_Type.Char Problems.assume_no_problems r6 r7 = c7.parse r7.to_vector . should_equal ["true", "42"] + r7.value_type . should_equal Value_Type.Char Problems.assume_no_problems r7 r8 = c8.drop 1 . parse + r8.value_type . should_equal Value_Type.Char r8.to_vector . should_equal [Nothing, Nothing, Nothing] Problems.assume_no_problems r8 Test.specify "should error if invalid target type is provided" <| c1 = Column.from_vector "A" ["1", "2", "3"] - c1.parse Nothing . should_fail_with Illegal_Argument + c1.parse type=Nothing . should_fail_with Illegal_Argument Test.specify "should error if the input column is not text" <| c1 = Column.from_vector "A" [1, 2, 3] From c26f5f1faecce97e16b60da878795d7380361571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 5 Apr 2023 18:50:29 +0200 Subject: [PATCH 4/7] Keep Data_Formatter use the raw Enso types. --- .../Table/0.0.0-dev/src/Data/Column.enso | 2 +- .../0.0.0-dev/src/Data/Data_Formatter.enso | 26 ++++++++++++++----- .../Table/0.0.0-dev/src/Data/Table.enso | 2 +- .../src/Internal/Parse_Values_Helper.enso | 6 ++--- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 5d9f3929ca80..824b871abcef 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -1038,7 +1038,7 @@ type Column _ : Data_Formatter -> format _ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter.") - parser = formatter.make_datatype_parser type + parser = formatter.make_value_type_parser type storage = self.java_column.getStorage new_storage_and_problems = parser.parseColumn self.name storage diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso index 24d9b63d22f7..88c10c848a7a 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso @@ -2,7 +2,7 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import project.Internal.Parse_Values_Helper -from project.Data.Type.Value_Type import Value_Type, Auto +from project.Data.Type.Value_Type import Value_Type, Auto, Bits polyglot java import org.enso.table.parsing.IntegerParser polyglot java import org.enso.table.parsing.DecimalParser @@ -67,17 +67,17 @@ type Data_Formatter Arguments: - text: Text value to parse. - - value_type: The type to parse the value into. If set to `Auto`, the - type will be inferred automatically. + - value_type: The expected Enso type to parse the value into. If set to + `Auto`, the type will be inferred automatically. - on_problems: Specifies the behavior when a problem occurs. By default, a warning is issued, but the operation proceeds. If set to `Report_Error`, the operation fails with a dataflow error. If set to `Ignore`, the operation proceeds without errors or warnings. - parse : Text -> Value_Type | Auto -> Problem_Behavior -> Any + parse : Text -> (Auto|Integer|Number|Date|Date_Time|Time_Of_Day|Boolean) -> Problem_Behavior -> Any parse self text value_type=Auto on_problems=Problem_Behavior.Report_Warning = - parser = self.make_datatype_parser datatype + parser = self.make_datatype_parser value_type result = parser.parseIndependentValue text - problems = Vector.from_polyglot_array result.problems . map (Parse_Values_Helper.translate_parsing_problem datatype) + problems = Vector.from_polyglot_array result.problems . map (Parse_Values_Helper.translate_parsing_problem value_type) on_problems.attach_problems_after result.value problems ## PRIVATE @@ -211,6 +211,18 @@ type Data_Formatter ## PRIVATE make_datatype_parser self datatype = case datatype of + Integer -> self.make_integer_parser + Decimal -> self.make_decimal_parser + Boolean -> self.make_boolean_parser + Date -> self.make_date_parser + Date_Time -> self.make_date_time_parser + Time_Of_Day -> self.make_time_of_day_parser + Auto -> self.make_auto_parser + _ -> + Error.throw (Illegal_Argument.Error "Unsupported datatype: "+datatype.to_text) + + ## PRIVATE + make_value_type_parser self value_type = case value_type of # TODO once we implement #5159 we will need to add checks for bounds here and support 16/32-bit ints Value_Type.Integer Bits.Bits_64 -> self.make_integer_parser # TODO once we implement #6109 we can support 32-bit floats @@ -221,7 +233,7 @@ type Data_Formatter Value_Type.Time -> self.make_time_of_day_parser Auto -> self.make_auto_parser _ -> - Error.throw (Illegal_Argument.Error "Unsupported value type: "+datatype.to_display_text) + Error.throw (Illegal_Argument.Error "Unsupported value type: "+value_type.to_display_text) ## PRIVATE get_specific_type_parsers self = diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index a9fcf05a9277..0e1c1bbe7bf5 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -820,7 +820,7 @@ type Table _ : Data_Formatter -> format _ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter.") - parser = formatter.make_datatype_parser type + parser = formatter.make_value_type_parser type select_problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns selected_columns = self.columns_helper.select_columns_helper columns reorder=True select_problem_builder diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Parse_Values_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Parse_Values_Helper.enso index 20d957000b6d..fef6b288d2d7 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Parse_Values_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Parse_Values_Helper.enso @@ -9,10 +9,10 @@ polyglot java import org.enso.table.parsing.problems.LeadingZeros ## PRIVATE Translates a parse related problem additionally enriching it with expected datatype information that is not originally present on the Java side. -translate_parsing_problem expected_datatype problem = case problem of +translate_parsing_problem expected_value_type problem = case problem of java_problem : InvalidFormat -> - Invalid_Format.Error java_problem.column expected_datatype (Vector.from_polyglot_array java_problem.cells) + Invalid_Format.Error java_problem.column expected_value_type (Vector.from_polyglot_array java_problem.cells) java_problem : LeadingZeros -> - Leading_Zeros.Error java_problem.column expected_datatype (Vector.from_polyglot_array java_problem.cells) + Leading_Zeros.Error java_problem.column expected_value_type (Vector.from_polyglot_array java_problem.cells) _ -> Panic.throw (Illegal_State.Error "Reported an unknown problem type: "+problem.to_text) From 062292e26a1182509b6a74e6bf86191b765934cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 5 Apr 2023 19:01:47 +0200 Subject: [PATCH 5/7] fixes --- .../lib/Standard/Table/0.0.0-dev/src/Data/Column.enso | 3 +-- .../Table/0.0.0-dev/src/Data/Data_Formatter.enso | 10 +++++----- .../lib/Standard/Table/0.0.0-dev/src/Data/Table.enso | 3 +-- .../0.0.0-dev/src/Data/Type/Value_Type_Helpers.enso | 8 -------- .../lib/Standard/Table/0.0.0-dev/src/Errors.enso | 2 +- test/Table_Tests/src/Formatting/Parse_Values_Spec.enso | 4 ++-- 6 files changed, 10 insertions(+), 20 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 824b871abcef..b8b59be0f611 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -16,7 +16,6 @@ import project.Internal.Widget_Helpers from project.Data.Table import print_table from project.Data.Type.Value_Type import Value_Type, Auto -from project.Data.Type.Value_Type_Helpers import ensure_valid_parse_target from project.Errors import No_Index_Set_Error, Floating_Point_Equality, Invalid_Value_Type polyglot java import org.enso.table.data.column.operation.map.MapOperationProblemBuilder @@ -1031,7 +1030,7 @@ type Column @type Widget_Helpers.parse_type_selector parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column parse self type=Auto format=Data_Formatter.Value on_problems=Report_Warning = - Value_Type.expect_text self.value_type related_column=self.name <| ensure_valid_parse_target type <| + Value_Type.expect_text self.value_type related_column=self.name <| formatter = case format of _ : Text -> Data_Formatter.Value.with_format type format diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso index 88c10c848a7a..3b383c863bde 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso @@ -67,17 +67,17 @@ type Data_Formatter Arguments: - text: Text value to parse. - - value_type: The expected Enso type to parse the value into. If set to - `Auto`, the type will be inferred automatically. + - type: The expected Enso type to parse the value into. If set to `Auto`, + the type will be inferred automatically. - on_problems: Specifies the behavior when a problem occurs. By default, a warning is issued, but the operation proceeds. If set to `Report_Error`, the operation fails with a dataflow error. If set to `Ignore`, the operation proceeds without errors or warnings. parse : Text -> (Auto|Integer|Number|Date|Date_Time|Time_Of_Day|Boolean) -> Problem_Behavior -> Any - parse self text value_type=Auto on_problems=Problem_Behavior.Report_Warning = - parser = self.make_datatype_parser value_type + parse self text type=Auto on_problems=Problem_Behavior.Report_Warning = + parser = self.make_datatype_parser type result = parser.parseIndependentValue text - problems = Vector.from_polyglot_array result.problems . map (Parse_Values_Helper.translate_parsing_problem value_type) + problems = Vector.from_polyglot_array result.problems . map (Parse_Values_Helper.translate_parsing_problem type) on_problems.attach_problems_after result.value problems ## PRIVATE diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 0e1c1bbe7bf5..8ffba43dc7c1 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -38,7 +38,6 @@ import project.Data.Expression.Expression_Error import project.Delimited.Delimited_Format.Delimited_Format from project.Data.Type.Value_Type import Value_Type, Auto -from project.Data.Type.Value_Type_Helpers import ensure_valid_parse_target from project.Internal.Rows_View import Rows_View from project.Errors import all @@ -813,7 +812,7 @@ type Table table.parse format=(Data_Formatter.Value.with_number_formatting decimal_point=',') parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table - parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = ensure_valid_parse_target type <| + parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = formatter = case format of _ : Text -> Data_Formatter.Value.with_format type format diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type_Helpers.enso index 238670122ddc..f75d4338bc3e 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type_Helpers.enso @@ -75,11 +75,3 @@ find_common_type types strict = # Double check if Mixed was really allowed to come out. if types.contains Value_Type.Mixed then Value_Type.Mixed else Nothing - -## PRIVATE - Checks if the given type is a valid target type for parsing. - - This will be replaced once we change parse to rely on `Value_Type` instead. -ensure_valid_parse_target type ~action = - expected_types = [Auto, Integer, Decimal, Date, Date_Time, Time_Of_Day, Boolean] - if expected_types.contains type . not then Error.throw (Illegal_Argument.Error "Unsupported target type "+type.to_text+".") else action diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso index fb03f1fc6980..197116a68729 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso @@ -249,7 +249,7 @@ type Invalid_Location format. type Invalid_Format ## PRIVATE - Error column:(Text|Nothing) (value_type:Value_Type) (cells:[Text]) + Error column:(Text|Nothing) (value_type:Value_Type|Integer|Number|Date|Time|Time_Of_Day|Boolean) (cells:[Text]) ## PRIVATE Pretty print the invalid format error. diff --git a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso index e1dc8f1a2c71..2f02f486186c 100644 --- a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso +++ b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso @@ -39,7 +39,7 @@ spec = t1_zeros = ["+00", "-00", "+01", "-01", "01", "000", "0010"] t3 = t1.parse type=Value_Type.Integer t3.at "ints" . to_vector . should_equal t1_parsed - Problems.get_attached_warnings t3 . should_equal [Leading_Zeros.Error "ints" Integer t1_zeros] + Problems.get_attached_warnings t3 . should_equal [Leading_Zeros.Error "ints" Value_Type.Integer t1_zeros] t4 = t1.parse type=Value_Type.Float t4.at "ints" . to_vector . should_equal t1_parsed @@ -201,7 +201,7 @@ spec = t5 = t.parse columns="ints" type=Value_Type.Float t5.at "ints" . to_vector . should_equal [1.0, 2.0, -123.0, Nothing] # `ints` are requested to be parsed as decimals. - t5.at "ints" . to_vector . first . should_be_a Value_Type.Float + t5.at "ints" . to_vector . first . should_be_a Decimal t6 = t.parse columns=["floats", "text+ints"] type=Auto # `floats` are auto-detected as decimals. From e99f574756f6f8b9de0eecbd3b5d8cba86c77be7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 5 Apr 2023 19:28:07 +0200 Subject: [PATCH 6/7] more fixes --- .../0.0.0-dev/src/Data/Data_Formatter.enso | 15 ++++---- .../Standard/Table/0.0.0-dev/src/Errors.enso | 2 +- .../Standard/Test/0.0.0-dev/src/Problems.enso | 6 +++- .../table/parsing/TypeInferringParser.java | 12 +++---- .../src/Formatting/Data_Formatter_Spec.enso | 21 +++++++++++ .../src/Formatting/Parse_Values_Spec.enso | 36 +++++++++---------- 6 files changed, 60 insertions(+), 32 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso index 3b383c863bde..91b7190ddd3b 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Data_Formatter.enso @@ -67,17 +67,17 @@ type Data_Formatter Arguments: - text: Text value to parse. - - type: The expected Enso type to parse the value into. If set to `Auto`, - the type will be inferred automatically. + - datatype: The expected Enso type to parse the value into. If set to + `Auto`, the type will be inferred automatically. - on_problems: Specifies the behavior when a problem occurs. By default, a warning is issued, but the operation proceeds. If set to `Report_Error`, the operation fails with a dataflow error. If set to `Ignore`, the operation proceeds without errors or warnings. parse : Text -> (Auto|Integer|Number|Date|Date_Time|Time_Of_Day|Boolean) -> Problem_Behavior -> Any - parse self text type=Auto on_problems=Problem_Behavior.Report_Warning = - parser = self.make_datatype_parser type + parse self text datatype=Auto on_problems=Problem_Behavior.Report_Warning = + parser = self.make_datatype_parser datatype result = parser.parseIndependentValue text - problems = Vector.from_polyglot_array result.problems . map (Parse_Values_Helper.translate_parsing_problem type) + problems = Vector.from_polyglot_array result.problems . map (Parse_Values_Helper.translate_parsing_problem datatype) on_problems.attach_problems_after result.value problems ## PRIVATE @@ -219,7 +219,10 @@ type Data_Formatter Time_Of_Day -> self.make_time_of_day_parser Auto -> self.make_auto_parser _ -> - Error.throw (Illegal_Argument.Error "Unsupported datatype: "+datatype.to_text) + type_name = case datatype.to_text of + text : Text -> text + _ -> Meta.meta datatype . to_text + Error.throw (Illegal_Argument.Error "Unsupported datatype: "+type_name) ## PRIVATE make_value_type_parser self value_type = case value_type of diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso index 197116a68729..fb32620a742a 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso @@ -255,7 +255,7 @@ type Invalid_Format Pretty print the invalid format error. to_display_text : Text to_display_text self = - self.cells.length+" cells in column "+self.column+" had invalid format for datatype "+self.value_type.to_text+"." + self.cells.length+" cells in column "+self.column+" had invalid format for type "+self.value_type.to_text+"." ## Indicates that some values contained leading zeros even though these were not allowed. diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso index a5aff8e873d8..c5d90fa81b43 100644 --- a/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso +++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso @@ -97,11 +97,14 @@ expect_warning expected_warning result = ## UNSTABLE Checks if the provided value has a specific warning attached and if there are no other warnings. + + As a utility, it also returns the found warning. + Arguments: - expected_warning: The expected warning. It can either by a warning type or a concrete value. - result: The value to check. -expect_only_warning : Any -> Any -> Nothing +expect_only_warning : Any -> Any -> Any expect_only_warning expected_warning result = warnings = get_attached_warnings result is_expected x = @@ -114,6 +117,7 @@ expect_only_warning expected_warning result = if invalid.not_empty then loc = Meta.get_source_location 3 Test.fail "Expected the result to contain only the warning: "+found.to_text+", but it also contained: "+invalid.to_text+' (at '+loc+').' + found ## UNSTABLE diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java index 51c6d3d27cb8..6628f0588978 100644 --- a/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java +++ b/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java @@ -8,8 +8,8 @@ import org.enso.table.problems.WithProblems; /** - * The type inferring parser tries to parse the given column using a set of provided parsers. It - * returns the result of the first parser that succeeds without reporting any problems. + * The type inferring parser tries to parse the given column using a set of provided parsers. It returns the result of + * the first parser that succeeds without reporting any problems. * *

If all parsers from the set reported problems, the fallback parser is used and its result is * returned regardless of any problems. @@ -40,10 +40,10 @@ public Object parseSingleValue(String text, ProblemAggregator problemAggregator) @Override public WithProblems> parseColumn(String columnName, Storage sourceStorage) { - // If there are now rows, the Auto parser would guess some random type (the first one that is - // checked). Instead, - // we just return the empty column unchanged. - if (sourceStorage.size() == 0) { + // If there are no values, the Auto parser would guess some random type (the first one that is + // checked). Instead, we just return the empty column unchanged. + boolean hasNoValues = (sourceStorage.size() == 0) || (sourceStorage.countMissing() == sourceStorage.size()); + if (hasNoValues) { return fallbackParser.parseColumn(columnName, sourceStorage); } diff --git a/test/Table_Tests/src/Formatting/Data_Formatter_Spec.enso b/test/Table_Tests/src/Formatting/Data_Formatter_Spec.enso index 6875b735382a..3e58c6d23c32 100644 --- a/test/Table_Tests/src/Formatting/Data_Formatter_Spec.enso +++ b/test/Table_Tests/src/Formatting/Data_Formatter_Spec.enso @@ -127,6 +127,27 @@ spec = And newlines toO! formatter.parse complex_text . should_equal complex_text + Test.specify "should report Invalid_Format errors" <| + formatter = Data_Formatter.Value + expect_warning r = + r.should_equal Nothing + Problems.expect_only_warning Invalid_Format r + + r1 = formatter.parse "Text" datatype=Decimal + w1 = expect_warning r1 + w1.value_type . should_equal Decimal + w1.column . should_equal Nothing + + expect_warning <| formatter.parse "Text" datatype=Integer + expect_warning <| formatter.parse "Text" datatype=Boolean + expect_warning <| formatter.parse "Text" datatype=Date + expect_warning <| formatter.parse "Text" datatype=Date_Time + expect_warning <| formatter.parse "Text" datatype=Time_Of_Day + + Test.specify "should not allow unexpected types" <| + formatter = Data_Formatter.Value + formatter.parse "Text" datatype=List . should_fail_with Illegal_Argument + Test.group "DataFormatter.format" <| Test.specify "should handle Nothing" <| Data_Formatter.Value.format Nothing . should_equal Nothing diff --git a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso index 2f02f486186c..ff6ef4a29b67 100644 --- a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso +++ b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso @@ -116,13 +116,13 @@ spec = t0 = t.parse type=Value_Type.Boolean t0.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing] t0.at "ints" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] - Problems.expect_warning (Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]) t0 - Problems.expect_warning (Invalid_Format.Error "ints" Boolean ["0", "1", "1.0", "foobar", "", "--1", "+-1", "10", "-+1"]) t0 + Problems.expect_warning (Invalid_Format.Error "bools" Value_Type.Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]) t0 + Problems.expect_warning (Invalid_Format.Error "ints" Value_Type.Boolean ["0", "1", "1.0", "foobar", "", "--1", "+-1", "10", "-+1"]) t0 a1 = t.parse columns=["ints"] type=Value_Type.Integer on_problems=_ t1 t = t.at "ints" . to_vector . should_equal [0, 1, Nothing, Nothing, Nothing, Nothing, Nothing, 10, Nothing] - p1 = [Invalid_Format.Error "ints" Integer ["1.0", "foobar", "", "--1", "+-1", "-+1"]] + p1 = [Invalid_Format.Error "ints" Value_Type.Integer ["1.0", "foobar", "", "--1", "+-1", "-+1"]] Problems.test_problem_handling a1 p1 t1 a2 = t.parse columns=["floats"] type=Value_Type.Float on_problems=_ @@ -134,19 +134,19 @@ spec = a3 = t.parse columns=["bools"] type=Value_Type.Boolean on_problems=_ t3 t = t.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing] - p3 = [Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]] + p3 = [Invalid_Format.Error "bools" Value_Type.Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]] Problems.test_problem_handling a3 p3 t3 a4 = t.parse columns=["times"] type=Value_Type.Date on_problems=_ t4 t = t.at "times" . to_vector . should_equal [Date.new 2001 1 1, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] - p4 = [Invalid_Format.Error "times" Date ["2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] + p4 = [Invalid_Format.Error "times" Value_Type.Date ["2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] Problems.test_problem_handling a4 p4 t4 a5 = t.parse columns=["times"] type=Value_Type.Date_Time on_problems=_ t5 t = t.at "times" . to_vector . should_equal [Nothing, Date_Time.new 2001 1 1 12 34 56, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing] - p5 = [Invalid_Format.Error "times" Date_Time ["2001-01-01", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] + p5 = [Invalid_Format.Error "times" Value_Type.Date_Time ["2001-01-01", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]] Problems.test_problem_handling a5 p5 t5 a6 = t.parse columns=["times"] type=Value_Type.Time on_problems=_ @@ -223,7 +223,7 @@ spec = Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "xs" Value_Type.Float ["1.3", "_0", "0_"]] t5 = t3.parse format=opts type=Value_Type.Integer t5.at "xs" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, 100] - Problems.get_attached_warnings t5 . should_equal [Invalid_Format.Error "xs" Integer ["1,2", "1.3", "_0", "0_"]] + Problems.get_attached_warnings t5 . should_equal [Invalid_Format.Error "xs" Value_Type.Integer ["1,2", "1.3", "_0", "0_"]] Test.specify "should allow to specify custom values for booleans" <| opts_1 = Data_Formatter.Value true_values=["1", "YES"] false_values=["0"] @@ -234,7 +234,7 @@ spec = t3 = Table.new [["bools", ["1", "NO", "False", "True", "YES", "no", "oui", "0"]]] t4 = t3.parse format=opts_1 type=Value_Type.Boolean t4.at "bools" . to_vector . should_equal [True, Nothing, Nothing, Nothing, True, Nothing, Nothing, False] - Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "bools" Boolean ["NO", "False", "True", "no", "oui"]] + Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "bools" Value_Type.Boolean ["NO", "False", "True", "no", "oui"]] whitespace_table = ints = ["ints", ["0", "1 ", "0 1", " 2"]] @@ -248,7 +248,7 @@ spec = Test.specify "should trim input values by default" <| t1 = whitespace_table.parse columns="ints" type=Value_Type.Integer t1.at "ints" . to_vector . should_equal [0, 1, Nothing, 2] - Problems.expect_only_warning (Invalid_Format.Error "ints" Integer ["0 1"]) t1 + Problems.expect_only_warning (Invalid_Format.Error "ints" Value_Type.Integer ["0 1"]) t1 t2 = whitespace_table.parse columns="floats" type=Value_Type.Float t2.at "floats" . to_vector . should_equal [0.0, 2.0, Nothing, 10.0] @@ -256,25 +256,25 @@ spec = t3 = whitespace_table.parse columns="bools" type=Value_Type.Boolean t3.at "bools" . to_vector . should_equal [True, False, Nothing, False] - Problems.expect_only_warning (Invalid_Format.Error "bools" Boolean ["t rue"]) t3 + Problems.expect_only_warning (Invalid_Format.Error "bools" Value_Type.Boolean ["t rue"]) t3 t4 = whitespace_table.parse columns="dates" type=Value_Type.Date t4.at "dates" . to_vector . should_equal [Date.new 2022 1 1, Date.new 2022 7 17, Nothing, Nothing] - Problems.expect_only_warning (Invalid_Format.Error "dates" Date ["2022 - 07 - 17", ""]) t4 + Problems.expect_only_warning (Invalid_Format.Error "dates" Value_Type.Date ["2022 - 07 - 17", ""]) t4 t5 = whitespace_table.parse columns="datetimes" type=Value_Type.Date_Time t5.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 1 1 11 59, Nothing, Nothing, Nothing] - Problems.expect_only_warning (Invalid_Format.Error "datetimes" Date_Time ["2022 - 07 - 17 1:2:3", "2022-01-01 11:59:00"]) t5 + Problems.expect_only_warning (Invalid_Format.Error "datetimes" Value_Type.Date_Time ["2022 - 07 - 17 1:2:3", "2022-01-01 11:59:00"]) t5 t6 = whitespace_table.parse columns="times" type=Value_Type.Time t6.at "times" . to_vector . should_equal [Time_Of_Day.new 11 0 0, Time_Of_Day.new, Nothing, Nothing] - Problems.expect_only_warning (Invalid_Format.Error "times" Time_Of_Day ["00 : 00 : 00"]) t6 + Problems.expect_only_warning (Invalid_Format.Error "times" Value_Type.Time ["00 : 00 : 00"]) t6 Test.specify "should fail to parse if whitespace is present and trimming is turned off" <| opts = Data_Formatter.Value trim_values=False t1 = whitespace_table.parse format=opts columns="ints" type=Value_Type.Integer t1.at "ints" . to_vector . should_equal [0, Nothing, Nothing, Nothing] - Problems.expect_only_warning (Invalid_Format.Error "ints" Integer ["1 ", "0 1", " 2"]) t1 + Problems.expect_only_warning (Invalid_Format.Error "ints" Value_Type.Integer ["1 ", "0 1", " 2"]) t1 t2 = whitespace_table.parse format=opts columns="floats" type=Value_Type.Float t2.at "floats" . to_vector . should_equal [Nothing, Nothing, Nothing, 10.0] @@ -282,19 +282,19 @@ spec = t3 = whitespace_table.parse format=opts columns="bools" type=Value_Type.Boolean t3.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, False] - Problems.expect_only_warning (Invalid_Format.Error "bools" Boolean ["True ", " false", "t rue"]) t3 + Problems.expect_only_warning (Invalid_Format.Error "bools" Value_Type.Boolean ["True ", " false", "t rue"]) t3 t4 = whitespace_table.parse format=opts columns="dates" type=Value_Type.Date t4.at "dates" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] - Problems.expect_only_warning (Invalid_Format.Error "dates" Date [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""]) t4 + Problems.expect_only_warning (Invalid_Format.Error "dates" Value_Type.Date [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""]) t4 t5 = whitespace_table.parse format=opts columns="datetimes" type=Value_Type.Date_Time t5.at "datetimes" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] - Problems.expect_only_warning (Invalid_Format.Error "datetimes" Date_Time [" 2022-01-01 11:59:00 ", "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"]) t5 + Problems.expect_only_warning (Invalid_Format.Error "datetimes" Value_Type.Date_Time [" 2022-01-01 11:59:00 ", "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"]) t5 t6 = whitespace_table.parse format=opts columns="times" type=Value_Type.Time t6.at "times" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] - Problems.expect_only_warning (Invalid_Format.Error "times" Time_Of_Day ["11:00:00 ", " 00:00:00", "00 : 00 : 00"]) t6 + Problems.expect_only_warning (Invalid_Format.Error "times" Value_Type.Time ["11:00:00 ", " 00:00:00", "00 : 00 : 00"]) t6 Test.specify "should fallback to text if whitespace is present and trimming is turned off" <| c1 = ["1", " +2", "-123", Nothing] From 2210dbec2af31536c70bc75c4cdbf72c4643d4a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 6 Apr 2023 10:54:09 +0200 Subject: [PATCH 7/7] javafmt --- .../java/org/enso/table/parsing/TypeInferringParser.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java index 6628f0588978..6e13caebffa9 100644 --- a/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java +++ b/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java @@ -8,8 +8,8 @@ import org.enso.table.problems.WithProblems; /** - * The type inferring parser tries to parse the given column using a set of provided parsers. It returns the result of - * the first parser that succeeds without reporting any problems. + * The type inferring parser tries to parse the given column using a set of provided parsers. It + * returns the result of the first parser that succeeds without reporting any problems. * *

If all parsers from the set reported problems, the fallback parser is used and its result is * returned regardless of any problems. @@ -42,7 +42,8 @@ public Object parseSingleValue(String text, ProblemAggregator problemAggregator) public WithProblems> parseColumn(String columnName, Storage sourceStorage) { // If there are no values, the Auto parser would guess some random type (the first one that is // checked). Instead, we just return the empty column unchanged. - boolean hasNoValues = (sourceStorage.size() == 0) || (sourceStorage.countMissing() == sourceStorage.size()); + boolean hasNoValues = + (sourceStorage.size() == 0) || (sourceStorage.countMissing() == sourceStorage.size()); if (hasNoValues) { return fallbackParser.parseColumn(columnName, sourceStorage); }