diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso index cbc488eb4c4a..9fc87fa0c070 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso @@ -86,6 +86,18 @@ type Vector a from_array : Array -> Vector from_array array = @Builtin_Method "Vector.from_array" + ## PRIVATE + A helper method that takes a vector or an array or a single element and + returns a vector. + + If it got a vector or array, a vector containing the same elements is + returned. + If it got a single element, a vector containing this element is returned. + unify_vector_or_element value = case value of + vec : Vector -> vec + arr : Array -> Vector.from_polyglot_array arr + single_element -> [single_element] + ## Creates a new vector of the given length, filling the elements with the provided constant. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Runtime/Debug.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Runtime/Debug.enso index f69dba53d650..8d35fe65f1db 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Runtime/Debug.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Runtime/Debug.enso @@ -15,7 +15,7 @@ import project.Nothing.Nothing > Example Dropping into a debugging REPL during execution. - Debug.breakpoint + Standard.Base.Runtime.Debug.breakpoint breakpoint : Nothing breakpoint = @Builtin_Method "Debug.breakpoint" diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 236ea652187c..15e6beab0cb4 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -3,6 +3,7 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Table.Data.Type.Enso_Types +import Standard.Table.Data.Type.Value_Type_Helpers import Standard.Table.Data.Column.Column as Materialized_Column import Standard.Table.Internal.Java_Problems import Standard.Table.Internal.Problem_Builder.Problem_Builder @@ -173,8 +174,8 @@ type Column ## UNSTABLE Returns the number of missing items in this column. - count_missing : Integer - count_missing self = + count_nothing : Integer + count_nothing self = self.to_table.filter 0 Filter_Condition.Is_Nothing . row_count ## UNSTABLE @@ -228,9 +229,11 @@ type Column `other`. equals_ignore_case : Column | Any -> Locale -> Column equals_ignore_case self other locale=Locale.default = - Helpers.assume_default_locale locale <| - new_name = self.naming_helpers.function_name "equals_ignore_case" [self, other] - self.make_binary_op "equals_ignore_case" other new_name + Value_Type.expect_text self <| + Value_Type.expect_text other <| + Helpers.assume_default_locale locale <| + new_name = self.naming_helpers.function_name "equals_ignore_case" [self, other] + self.make_binary_op "equals_ignore_case" other new_name ## Element-wise non-equality comparison. @@ -276,7 +279,8 @@ type Column `other`. If `other` is a column, the comparison is performed pairwise between corresponding elements of `self` and `other`. >= : Column | Any -> Column - >= self other = self.make_binary_op ">=" other + >= self other = Value_Type.expect_comparable self other <| + self.make_binary_op ">=" other ## UNSTABLE @@ -289,7 +293,8 @@ type Column `other`. If `other` is a column, the comparison is performed pairwise between corresponding elements of `self` and `other`. <= : Column | Any -> Column - <= self other = self.make_binary_op "<=" other + <= self other = Value_Type.expect_comparable self other <| + self.make_binary_op "<=" other ## UNSTABLE @@ -302,7 +307,8 @@ type Column `other`. If `other` is a column, the comparison is performed pairwise between corresponding elements of `self` and `other`. > : Column | Any -> Column - > self other = self.make_binary_op ">" other + > self other = Value_Type.expect_comparable self other <| + self.make_binary_op ">" other ## UNSTABLE @@ -315,7 +321,8 @@ type Column `other`. If `other` is a column, the comparison is performed pairwise between corresponding elements of `self` and `other`. < : Column | Any -> Column - < self other = self.make_binary_op "<" other + < self other = Value_Type.expect_comparable self other <| + self.make_binary_op "<" other ## Element-wise inclusive bounds check. @@ -331,8 +338,10 @@ type Column column fit between the lower and upper bounds (both ends inclusive). between : (Column | Any) -> (Column | Any) -> Column between self lower upper = - new_name = self.naming_helpers.to_expression_text self + " between " + self.naming_helpers.to_expression_text lower + " and " + self.naming_helpers.to_expression_text upper - self.make_op "BETWEEN" [lower, upper] new_name + Value_Type.expect_comparable self lower <| + Value_Type.expect_comparable self upper <| + new_name = self.naming_helpers.to_expression_text self + " between " + self.naming_helpers.to_expression_text lower + " and " + self.naming_helpers.to_expression_text upper + self.make_op "BETWEEN" [lower, upper] new_name ## UNSTABLE @@ -346,14 +355,10 @@ type Column between corresponding elements of `self` and `other`. + : Column | Any -> Column + self other = - self_type = self.value_type - other_type = find_argument_type other - op = if self_type.is_numeric && (other_type.is_nothing || other_type.is_numeric) then 'ADD_NUMBER' else - if self_type.is_text && (other_type.is_nothing || other_type.is_text) then 'ADD_TEXT' else - Error.throw <| Illegal_Argument.Error <| - "Cannot perform addition on a pair of values of types " + self_type.to_text + " and " + other_type.to_text + ". Addition can only be performed if both columns are of some numeric type or are both text." - new_name = self.naming_helpers.binary_operation_name "+" self other - self.make_binary_op op other new_name + op = Value_Type_Helpers.resolve_addition_kind self other + op.if_not_error <| + new_name = self.naming_helpers.binary_operation_name "+" self other + self.make_binary_op op other new_name ## UNSTABLE @@ -366,7 +371,9 @@ type Column element of `self`. If `other` is a column, the operation is performed pairwise between corresponding elements of `self` and `other`. - : Column | Any -> Column - - self other = self.make_binary_op "-" other + - self other = + Value_Type_Helpers.check_binary_numeric_op self other <| + self.make_binary_op "-" other ## UNSTABLE @@ -379,7 +386,9 @@ type Column element of `self`. If `other` is a column, the operation is performed pairwise between corresponding elements of `self` and `other`. * : Column | Any -> Column - * self other = self.make_binary_op "*" other + * self other = + Value_Type_Helpers.check_binary_numeric_op self other <| + self.make_binary_op "*" other ## ALIAS Divide Columns @@ -413,7 +422,8 @@ type Column example_div = Examples.integer_column / 10 / : Column | Any -> Column / self other = - self.make_binary_op "/" other + Value_Type_Helpers.check_binary_numeric_op self other <| + self.make_binary_op "/" other ## Element-wise modulus. @@ -445,10 +455,12 @@ type Column example_mod = Examples.integer_column % 3 % : Column | Any -> Column % self other = - other_type = find_argument_type other - op = if self.value_type.is_integer && (other_type.is_nothing || other_type.is_integer) then "%" else "mod" - new_name = self.naming_helpers.binary_operation_name "%" self other - self.make_binary_op op other new_name + Value_Type_Helpers.check_binary_numeric_op self other <| + other_type = Value_Type_Helpers.find_argument_type other + # Different implementation may be used for integer types. + op = if self.value_type.is_integer && (other_type.is_nothing || other_type.is_integer) then "%" else "mod" + new_name = self.naming_helpers.binary_operation_name "%" self other + self.make_binary_op op other new_name ## ALIAS Power @@ -477,7 +489,8 @@ type Column example_div = Examples.decimal_column ^ Examples.integer_column ^ : Column | Any -> Column ^ self other = - self.make_binary_op '^' other + Value_Type_Helpers.check_binary_numeric_op self other <| + self.make_binary_op '^' other ## UNSTABLE @@ -492,8 +505,9 @@ type Column and `other`. && : Column | Any -> Column && self other = - new_name = self.naming_helpers.binary_operation_name "&&" self other - self.make_binary_op "AND" other new_name + Value_Type_Helpers.check_binary_boolean_op self other <| + new_name = self.naming_helpers.binary_operation_name "&&" self other + self.make_binary_op "AND" other new_name ## UNSTABLE @@ -508,16 +522,18 @@ type Column and `other`. || : Column | Any -> Column || self other = - new_name = self.naming_helpers.binary_operation_name "||" self other - self.make_binary_op "OR" other new_name + Value_Type_Helpers.check_binary_boolean_op self other <| + new_name = self.naming_helpers.binary_operation_name "||" self other + self.make_binary_op "OR" other new_name ## UNSTABLE Boolean negation of each element in this column. not : Column not self = - new_name = "not " + self.naming_helpers.to_expression_text self - self.make_unary_op "NOT" new_name + Value_Type.expect_boolean self <| + new_name = "not " + self.naming_helpers.to_expression_text self + self.make_unary_op "NOT" new_name ## ALIAS IF @@ -529,8 +545,12 @@ type Column - when_false: value or column when `self` is `False`. iif : Any -> Any -> Column iif self when_true when_false = - new_name = "if " + self.naming_helpers.to_expression_text self + " then " + self.naming_helpers.to_expression_text when_true + " else " + self.naming_helpers.to_expression_text when_false - self.make_op "IIF" [when_true, when_false] new_name + Value_Type.expect_boolean self <| + common_type = Value_Type_Helpers.find_common_type_for_arguments [when_true, when_false] + common_type.if_not_error <| + new_name = "if " + self.naming_helpers.to_expression_text self + " then " + self.naming_helpers.to_expression_text when_true + " else " + self.naming_helpers.to_expression_text when_false + op_result = self.make_op "IIF" [when_true, when_false] new_name + adapt_unified_column op_result common_type ## Returns a column of first non-`Nothing` value on each row of `self` and `values` list. @@ -545,17 +565,20 @@ type Column example_coalesce = Examples.decimal_column.coalesce Examples.integer_column coalesce : (Any | Vector Any) -> Column - coalesce self values = case values of - _ : Vector -> - new_name = self.naming_helpers.function_name "coalesce" [self]+values - self.make_op "COALESCE" values new_name - _ : Array -> self.coalesce (Vector.from_polyglot_array values) - _ -> self.coalesce [values] + coalesce self values = + vec = Vector.unify_vector_or_element values + args_with_self = [self]+vec + common_type = Value_Type_Helpers.find_common_type_for_arguments args_with_self + common_type.if_not_error <| + new_name = self.naming_helpers.function_name "coalesce" args_with_self + op_result = self.make_op "COALESCE" vec new_name + adapt_unified_column op_result common_type - ## Returns a column of minimum on each row of `self` and `values` list. + ## Returns a column of minimum on each row of `self` and `values`. Arguments: - - values: list of columns or values to minimum with `self`. + - values: list of columns or values to minimum with `self`, or a single + column or value. > Example Get the minimum value in two columns. @@ -564,17 +587,17 @@ type Column example_min = Examples.decimal_column.min Examples.integer_column min : (Any | Vector Any) -> Column - min self values = case values of - _ : Vector -> - new_name = self.naming_helpers.function_name "min" [self]+values - self.make_op "ROW_MIN" values new_name - _ : Array -> self.min (Vector.from_polyglot_array values) - _ -> self.min [values] + min self values = + Value_Type_Helpers.check_multi_argument_comparable_op self values <| + args = Vector.unify_vector_or_element values + new_name = self.naming_helpers.function_name "min" [self]+args + self.make_op "ROW_MIN" args new_name - ## Returns a column of maximum on each row of `self` and `values` list. + ## Returns a column of maximum on each row of `self` and `values`. Arguments: - - values: list of columns or values to maximum with `self`. + - values: list of columns or values to maximum with `self`, or a single + column or value. > Example Get the maximum value in two columns. @@ -583,12 +606,11 @@ type Column example_max = Examples.decimal_column.max Examples.integer_column max : (Any | Vector Any) -> Column - max self values = case values of - _ : Vector -> - new_name = self.naming_helpers.function_name "max" [self]+values - self.make_op "ROW_MAX" values new_name - _ : Array -> self.max (Vector.from_polyglot_array values) - _ -> self.max [values] + max self values = + Value_Type_Helpers.check_multi_argument_comparable_op self values <| + args = Vector.unify_vector_or_element values + new_name = self.naming_helpers.function_name "max" [self]+args + self.make_op "ROW_MAX" args new_name ## UNSTABLE @@ -603,7 +625,7 @@ type Column Returns a column of booleans, with `True` items at the positions where this column contains a NaN. This is only applicable to double columns. is_nan : Column - is_nan self = + is_nan self = Value_Type.expect_floating_point self <| new_name = self.naming_helpers.function_name "is_nan" [self] self.make_unary_op "IS_NAN" new_name @@ -611,7 +633,7 @@ type Column Returns a column of booleans, with `True` items at the positions where this column contains an empty string or `Nothing`. is_empty : Column - is_empty self = + is_empty self = Value_Type.expect_text self <| new_name = self.naming_helpers.to_expression_text self + " is empty" self.make_unary_op "IS_EMPTY" new_name @@ -646,7 +668,7 @@ type Column is_blank = case self_type.is_text of True -> self.is_empty False -> self.is_nothing - result = case treat_nans_as_blank && self_type.is_floating_point of + result = case treat_nans_as_blank && self_type.is_floating_point && self.connection.dialect.supports_separate_nan of True -> is_blank || self.is_nan False -> is_blank result.rename new_name @@ -657,8 +679,11 @@ type Column provided default. fill_nothing : Column | Any -> Column fill_nothing self default = - new_name = self.naming_helpers.function_name "fill_nothing" [self, default] - self.make_binary_op "FILL_NULL" default new_name + common_type = Value_Type_Helpers.find_common_type_for_arguments [self, default] + common_type.if_not_error <| + new_name = self.naming_helpers.function_name "fill_nothing" [self, default] + op_result = self.make_binary_op "FILL_NULL" default new_name + adapt_unified_column op_result common_type ## ALIAS Fill Empty @@ -671,9 +696,11 @@ type Column will be used. fill_empty : Column | Any -> Column fill_empty self default = - new_name = self.naming_helpers.function_name "fill_empty" [self, default] - result = self.is_empty.iif default self - result.rename new_name + Value_Type.expect_text self <| + Value_Type.expect_text default <| + new_name = self.naming_helpers.function_name "fill_empty" [self, default] + result = self.is_empty.iif default self + result.rename new_name ## Returns a new column, containing the same elements as `self`, but with the given name. @@ -808,8 +835,10 @@ type Column example_contains = Examples.text_column_1.like "F%." like : Column | Text -> Column like self pattern = - new_name = self.naming_helpers.binary_operation_name "like" self pattern - self.make_binary_op "LIKE" pattern new_name + Value_Type.expect_text self <| + Value_Type.expect_text pattern <| + new_name = self.naming_helpers.binary_operation_name "like" self pattern + self.make_binary_op "LIKE" pattern new_name ## This function removes the specified characters, by default any whitespace, from the start, the end, or both ends of the input. @@ -820,14 +849,16 @@ type Column - what: A `Text` (or text `Column`) containing characters that should be removed. By default, spaces, tabs, returns and new lines are removed. trim : Location -> Column | Text -> Column - trim self where=Location.Both what='' = Value_Type.expect_text self.value_type <| check_text_argument what "what" <| - new_name = self.naming_helpers.function_name "trim" [self] - operator = case where of - Location.Both -> "TRIM" - Location.Start -> "LTRIM" - Location.End -> "RTRIM" - if self.connection.dialect.is_supported operator then self.make_binary_op operator what new_name else - Error.throw (Unsupported_Database_Operation.Error ("`Column.trim "+where.to_text+"` is not supported by this connection.")) + trim self where=Location.Both what='' = + Value_Type.expect_text self <| + Value_Type.expect_text what <| + new_name = self.naming_helpers.function_name "trim" [self] + operator = case where of + Location.Both -> "TRIM" + Location.Start -> "LTRIM" + Location.End -> "RTRIM" + if self.connection.dialect.is_supported operator then self.make_binary_op operator what new_name else + Error.throw (Unsupported_Database_Operation.Error ("`Column.trim "+where.to_text+"` is not supported by this connection.")) ## Replaces the first, or all occurrences of `term` with `new_text` in each row. If `term` is empty, the function returns the table unchanged. @@ -868,7 +899,7 @@ type Column Applies only to columns that hold the `Date` or `Date_Time` types. Returns a column of `Integer` type. year : Column ! Invalid_Value_Type - year self = Value_Type.expect_has_date self.value_type related_column=self.name <| + year self = Value_Type.expect_has_date self <| self.make_unary_op "year" ## Gets the month as a number (1-12) from the date stored in the column. @@ -876,7 +907,7 @@ type Column Applies only to columns that hold the `Date` or `Date_Time` types. Returns a column of `Integer` type. month : Column ! Invalid_Value_Type - month self = Value_Type.expect_has_date self.value_type related_column=self.name <| + month self = Value_Type.expect_has_date self <| self.make_unary_op "month" ## Gets the day of the month as a number (1-31) from the date stored in the @@ -885,7 +916,7 @@ type Column Applies only to columns that hold the `Date` or `Date_Time` types. Returns a column of `Integer` type. day : Column ! Invalid_Value_Type - day self = Value_Type.expect_has_date self.value_type related_column=self.name <| + day self = Value_Type.expect_has_date self <| self.make_unary_op "day" ## Checks for each element of the column if it is contained within the @@ -1046,7 +1077,7 @@ type Column - that: The column to zip with `self`. - function: A binary function that is applied to corresponding pairs of elements of `self` and `that` to produce a value. - - skip_missing: controls whether missing values should be passed to the + - skip_nothing: controls whether missing values should be passed to the `function`. The default value of `True` will skip the rows for which the value in either column is missing and automatically append `Nothing` to the result table. If set to `False`, every pair of values @@ -1060,8 +1091,8 @@ type Column example_zip = Examples.integer_column.zip Examples.text_column_1 [_, _] zip : Column -> (Any -> Any -> Any) -> Boolean -> Column - zip self that function skip_missing=True = - _ = [that, function, skip_missing] + zip self that function skip_nothing=True = + _ = [that, function, skip_nothing] Error.throw <| Unsupported_Database_Operation.Error "`Column.zip` is not supported in the Database backends." ## PRIVATE @@ -1085,30 +1116,10 @@ type Column var_args_functions : Array var_args_functions = ['is_in', 'coalesce', 'min', 'max'] -## PRIVATE - Finds the type of an argument to a column operation. - - If the argument is a column, the type of that column is returned. If it is an - Enso value, the smallest `Value_Type` that can fit that value will be - returned (but the Database is free to widen it to the closest type that it - supports without warning). - - Since there is no special type for `Nothing` and `Nothing` technically can - fit any nullable type, it usually needs to be handled specially. This method - returns `Nothing` if the value is `Nothing` - so the caller can try to treat - this value as fitting any type, or accordingly to specific semantics of each - method. -find_argument_type : Any -> Value_Type | Nothing -find_argument_type value = case value of - _ : Column -> value.value_type - _ : Internal_Column -> Panic.throw (Illegal_State.Error "This path is not implemented. If this is ever reached, that is a bug in the Database library.") - Nothing -> Nothing - _ -> Enso_Types.most_specific_value_type value use_smallest=True - ## PRIVATE Helper for case case_sensitivity based text operations make_text_case_op left op other case_sensitivity new_name = - result = Value_Type.expect_text left.value_type <| case case_sensitivity of + result = Value_Type.expect_text left <| Value_Type.expect_text other <| case case_sensitivity of Case_Sensitivity.Default -> left.make_binary_op op other Case_Sensitivity.Sensitive -> make_sensitive column = @@ -1143,7 +1154,10 @@ make_equality_check_with_floating_point_handling column other op = Java_Problems.parse_aggregated_problems problem_builder.getProblems ## PRIVATE -check_text_argument val field ~action = case val of - _ : Text -> action - _ : Column -> Value_Type.expect_text val.value_type <| action - _ -> Error.throw (Illegal_Argument.Error "The `"+field+"` must be Text or a Text Column.") +adapt_unified_column : Column -> Value_Type -> Column +adapt_unified_column column expected_type = + dialect = column.connection.dialect + infer_return_type expression = + SQL_Type_Reference.new column.connection column.context expression + adapted = dialect.adapt_unified_column column.as_internal expected_type infer_return_type + Column.Value name=column.name connection=column.connection sql_type_reference=adapted.sql_type_reference expression=adapted.expression context=column.context diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso index 4733a1948b47..c20ba26498e2 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso @@ -1,7 +1,7 @@ from Standard.Base import all import Standard.Base.Errors.Unimplemented.Unimplemented -from Standard.Table import Aggregate_Column, Join_Kind +from Standard.Table import Aggregate_Column, Join_Kind, Value_Type import Standard.Table.Internal.Naming_Helpers.Naming_Helpers import Standard.Table.Internal.Problem_Builder.Problem_Builder @@ -137,15 +137,22 @@ type Dialect Unimplemented.throw "This is an interface only." ## PRIVATE - Specifies if the cast used to reconcile column types should be done after - performing the union. If `False`, the cast will be done before the union. - - Most databases that care about column types will want to do the cast - before the union operation to ensure that types are aligned when merging. - For an SQLite workaround to work, it's better to do the cast after the - union operation. - cast_after_union : Boolean - cast_after_union self = + Specifies if the Database distinguishes a seprarate `NaN` value for + floating point columns. Some databases will not be able to distinguish + NaN from NULL. + supports_separate_nan : Boolean + supports_separate_nan self = + Unimplemented.throw "This is an interface only." + + ## PRIVATE + Performs any transformations on a column resulting from unifying other + columns. + + These transformations depend on the dialect. They can be used to align + the result types, for example. + adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column + adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback = + _ = [column, approximate_result_type, infer_result_type_from_database_callback] Unimplemented.throw "This is an interface only." ## PRIVATE diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index f603843fc79a..d7396f0fcd12 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -17,6 +17,7 @@ import Standard.Table.Data.Match_Columns as Match_Columns_Helpers import Standard.Table.Data.Report_Unmatched.Report_Unmatched import Standard.Table.Data.Row.Row import Standard.Table.Data.Table.Table as Materialized_Table +import Standard.Table.Data.Type.Value_Type_Helpers import Standard.Table.Internal.Aggregate_Column_Helper import Standard.Table.Internal.Java_Exports import Standard.Table.Internal.Table_Helpers @@ -41,7 +42,6 @@ import project.Internal.IR.Internal_Column.Internal_Column import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind import project.Internal.IR.Query.Query import project.Internal.SQL_Type_Reference.SQL_Type_Reference -from project.Data.Column import find_argument_type from project.Errors import Unsupported_Database_Operation, Integrity_Error, Unsupported_Name @@ -621,7 +621,9 @@ type Table resolved = case column of _ : Text -> self.compute column on_problems _ -> column - renamed = if new_name.is_nothing then resolved else resolved.rename new_name + renamed = case new_name of + Nothing -> resolved + _ : Text -> resolved.rename new_name renamed.if_not_error <| index = self.internal_columns.index_of (c -> c.name == renamed.name) @@ -662,7 +664,7 @@ type Table get_column name = self.at name type_mapping = self.connection.dialect.get_type_mapping make_constant value = - argument_value_type = find_argument_type value + argument_value_type = Value_Type_Helpers.find_argument_type value sql_type = case argument_value_type of Nothing -> SQL_Type.null _ -> type_mapping.value_type_to_sql argument_value_type Problem_Behavior.Ignore @@ -1151,31 +1153,28 @@ type Table sql_type.catch Inexact_Type_Coercion error-> Panic.throw <| Illegal_State.Error "Unexpected inexact type coercion in Union. The union logic should only operate in types supported by the given backend. This is a bug in the Database library. The coercion was: "+error.to_display_text cause=error - [column_set, sql_type] + [column_set, sql_type, result_type] good_columns = merged_columns.filter r-> r.is_nothing.not if good_columns.is_empty then Error.throw No_Output_Columns else problem_builder.attach_problems_before on_problems <| - cast_after_union = dialect.cast_after_union queries = all_tables.map_with_index i-> t-> columns_to_select = good_columns.map description-> column_set = description.first - result_type = description.second + sql_type = description.second column_name = column_set.name - input_column = case column_set.column_indices.at i of + case column_set.column_indices.at i of Nothing -> typ = SQL_Type_Reference.from_constant SQL_Type.null expr = SQL_Expression.Literal "NULL" - Internal_Column.Value column_name typ expr + null_column = Internal_Column.Value column_name typ expr + ## We assume that the type for this + expression will never be queried - it is + just used internally to build the Union + operation and never exposed externally. + infer_return_type _ = SQL_Type_Reference.null + dialect.make_cast null_column sql_type infer_return_type corresponding_column_index : Integer -> t.at corresponding_column_index . as_internal . rename column_name - ## We return `null` return type, as this type should - never be queried - we will just put it into the - union and the overall queried type will be taken - from there. This is just needed to create an - internal representation. - infer_return_type _ = SQL_Type_Reference.null - if cast_after_union then input_column else - dialect.make_cast input_column result_type infer_return_type pairs = columns_to_select.map c-> [c.name, c.expression] Query.Select pairs t.context @@ -1191,15 +1190,11 @@ type Table SQL_Type_Reference.new self.connection new_ctx expression new_columns = good_columns.map description-> column_set = description.first - result_type = description.second + result_type = description.at 2 name = column_set.name expression = SQL_Expression.Column union_alias name - case cast_after_union of - True -> - input_column = Internal_Column.Value name SQL_Type_Reference.null expression - dialect.make_cast input_column result_type infer_return_type - False -> - Internal_Column.Value name (infer_return_type expression) expression + input_column = Internal_Column.Value name (infer_return_type expression) expression + dialect.adapt_unified_column input_column result_type infer_return_type Table.Value union_alias self.connection new_columns new_ctx diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index 6f4766c6ae2d..0d7ce471dbe1 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -6,11 +6,14 @@ import Standard.Base.Errors.Unimplemented.Unimplemented import Standard.Table.Data.Aggregate_Column.Aggregate_Column import Standard.Table.Internal.Naming_Helpers.Naming_Helpers import Standard.Table.Internal.Problem_Builder.Problem_Builder +from Standard.Table import Value_Type from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all +from Standard.Table.Errors import Inexact_Type_Coercion import project.Connection.Connection.Connection import project.Data.Dialect import project.Data.SQL.Builder +import project.Data.SQL.SQL_Fragment import project.Data.SQL_Statement.SQL_Statement import project.Data.SQL_Type.SQL_Type import project.Data.Table.Table @@ -148,8 +151,36 @@ type Postgres_Dialect needs_execute_query_for_type_inference self = False ## PRIVATE - cast_after_union : Boolean - cast_after_union self = False + supports_separate_nan : Boolean + supports_separate_nan self = True + + ## PRIVATE + There is a bug in Postgres type inference, where if we unify two + fixed-length char columns of length N and M, the result type is said to + be a **fixed-length** column of length max_int4. This is wrong, and in + practice the column is just a variable-length text. This method detects + this situations and overrides the type to make it correct. + adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column + adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback = + needs_char_length_override_check = case approximate_result_type of + Value_Type.Char _ _ -> True + _ -> False + case needs_char_length_override_check of + True -> + type_mapping = self.get_type_mapping + db_type = type_mapping.sql_type_to_value_type column.sql_type_reference.get + case db_type of + Value_Type.Char _ _ -> case db_type == approximate_result_type of + True -> column + False -> + type_override = type_mapping.value_type_to_sql approximate_result_type Problem_Behavior.Report_Error + type_override.catch Inexact_Type_Coercion _-> + Panic.throw <| + Illegal_State.Error "The target type ("+db_type.to_display_text+") that we need to cast to seems to not be supported by the Dialect. This is not expected. It is a bug in the Database library." + self.make_cast column type_override infer_result_type_from_database_callback + _ -> Panic.throw <| + Illegal_State.Error "The type computed by our logic is Char, but the Database computed a non-text type ("+db_type.to_display_text+"). This should never happen and should be reported as a bug in the Database library." + False -> column ## PRIVATE prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement @@ -180,9 +211,52 @@ make_internal_generator_dialect = stddev_samp = ["STDDEV_SAMP", Base_Generator.make_function "stddev_samp"] stats = [agg_median, agg_mode, agg_percentile, stddev_pop, stddev_samp] date_ops = [make_extract_as_int "year" "YEAR", make_extract_as_int "month" "MONTH", make_extract_as_int "day" "DAY"] - my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops + special_overrides = [is_null, is_empty] + my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops + special_overrides Base_Generator.base_dialect . extend_with my_mappings +## PRIVATE + This overrides the default behaviour, due to a weird behaviour of Postgres - + it wants to determine the type for the parameter provided to IS NULL. + + But when the parameter is NULL, the type is unspecified. This only occurs if + a constant-NULL column is created in an expression builder `make_constant` + when computing an expression. We do not want to give it a default type, as + it needs to be flexible - this NULL column may be used in expressions of + various types. Only with IS NULL, having no type associated will fail with + `ERROR: could not determine data type of parameter`. To circumvent this + issue, we simply check if the parameter to be provided there is a `Nothing` + interpolation. If it is, we will just rewrite the expression to `TRUE` since + that is the expected result of `NULL IS NULL`. + + With the IR refactor, this should be done in some preprocess pass that still + works on SQL_Expression and not raw SQL. +is_null = Base_Generator.lift_unary_op "IS_NULL" arg-> + if represents_an_untyped_null_expression arg then Builder.code "TRUE" else + Builder.code "(" ++ arg.paren ++ " IS NULL)" + +## PRIVATE + See `is_null` above. + + It is a heuristic that will match generated expressions coming from + a NULL Literal or a Nothing constant. This should be enough, as any more + complex expression should have some type associated with it. +represents_an_untyped_null_expression arg = + vec = arg.fragments.build + if vec.length != 1 then False else + case vec.first of + SQL_Fragment.Code_Part "NULL" -> True + SQL_Fragment.Interpolation Nothing -> True + _ -> False + +## PRIVATE + The same issue as with `is_null` above, but here we can assume that `arg` + represents some `text` value, so we can just CAST it. +is_empty = Base_Generator.lift_unary_op "IS_EMPTY" arg-> + is_null = (arg.paren ++ "::text IS NULL").paren + is_empty = (arg ++ " = ''").paren + (is_null ++ " OR " ++ is_empty).paren + ## PRIVATE agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg-> Builder.code "COUNT(CASE WHEN " ++ arg.paren ++ " IS NULL THEN 1 END)" diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso index 0fd8d1f66205..5303dcc74690 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso @@ -9,6 +9,7 @@ import project.Data.SQL_Type.SQL_Type import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.SQL_Type_Mapping import project.Internal.SQL_Type_Reference.SQL_Type_Reference +from project.Errors import Unsupported_Database_Operation polyglot java import java.sql.Types @@ -37,9 +38,11 @@ type Postgres_Type_Mapping SQL_Type.Value Types.DECIMAL "decimal" precision scale Value_Type.Char size variable -> case variable of - True -> case size of - Nothing -> SQL_Type.Value Types.VARCHAR "text" - _ -> SQL_Type.Value Types.VARCHAR "varchar" size + True -> + is_unbounded = size.is_nothing || (size == max_precision) + case is_unbounded of + True -> SQL_Type.Value Types.VARCHAR "text" + False -> SQL_Type.Value Types.VARCHAR "varchar" size False -> SQL_Type.Value Types.CHAR "char" size Value_Type.Time -> SQL_Type.Value Types.TIME "time" @@ -51,7 +54,7 @@ type Postgres_Type_Mapping Value_Type.Binary _ _ -> SQL_Type.Value Types.BINARY "bytea" precision=max_precision Value_Type.Mixed -> - Error.throw (Illegal_Argument.Error "Postgres tables do not support Mixed types.") + Error.throw (Unsupported_Database_Operation.Error "Postgres tables do not support Mixed types.") Value_Type.Unsupported_Data_Type type_name underlying_type -> underlying_type.if_nothing <| Error.throw <| @@ -115,7 +118,8 @@ complex_types_map = Map.from_vector <| make_decimal sql_type = Value_Type.Decimal sql_type.precision sql_type.scale make_varchar sql_type = - Value_Type.Char size=sql_type.precision variable_length=True + effective_size = if sql_type.precision == max_precision then Nothing else sql_type.precision + Value_Type.Char size=effective_size variable_length=True make_char sql_type = Value_Type.Char size=sql_type.precision variable_length=False make_binary variable sql_type = diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Redshift/Redshift_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Redshift/Redshift_Dialect.enso index fa5521dc5cd2..72a91eebf333 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Redshift/Redshift_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Redshift/Redshift_Dialect.enso @@ -2,6 +2,7 @@ from Standard.Base import all import Standard.Table.Internal.Naming_Helpers.Naming_Helpers from Standard.Table import Aggregate_Column +from Standard.Table import Value_Type import project.Connection.Connection.Connection import project.Data.Dialect @@ -119,8 +120,14 @@ type Redshift_Dialect needs_execute_query_for_type_inference self = False ## PRIVATE - cast_after_union : Boolean - cast_after_union self = False + supports_separate_nan : Boolean + supports_separate_nan self = True + + ## PRIVATE + adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column + adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback = + _ = [approximate_result_type, infer_result_type_from_database_callback] + column ## PRIVATE prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso index 6e903c5ee437..976aaad8a250 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso @@ -6,6 +6,7 @@ import Standard.Table.Data.Aggregate_Column.Aggregate_Column import Standard.Table.Internal.Naming_Helpers.Naming_Helpers import Standard.Table.Internal.Problem_Builder.Problem_Builder from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all +from Standard.Table import Value_Type import project.Connection.Connection.Connection import project.Data.SQL.Builder @@ -153,8 +154,20 @@ type SQLite_Dialect needs_execute_query_for_type_inference self = True ## PRIVATE - cast_after_union : Boolean - cast_after_union self = True + supports_separate_nan : Boolean + supports_separate_nan self = False + + ## PRIVATE + SQLite allows mixed type columns, but we want our columns to be uniform. + So after unifying columns with mixed types, we add a cast to ensure that. + adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column + adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback = + # TODO [RW] This may be revisited with #6281. + case approximate_result_type of + Nothing -> column + _ -> + sql_type = self.get_type_mapping.value_type_to_sql approximate_result_type Problem_Behavior.Ignore + self.make_cast column sql_type infer_result_type_from_database_callback ## PRIVATE prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso index 797cd709a1b5..fb0a5fe2aebc 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso @@ -42,8 +42,9 @@ polyglot java import java.sql.Types allows to store any data in a column, we restrict the data to only what can match the column's affinity to be aligned with our other backends. - We map the BLOB affinity to our Mixed type to allow for Mixed type columns. - One can still store binary data in such a column. + As the text storage is most universal as a single type, we coerce our `Mixed` + type to a `Char` type. Values of mixed types will be able to be stored in + such a column and they will be retrieved by their text representation. See `JDBC3ResultSet::getColumnType` method in the `org.xerial.sqlite-jdbc` module for the logic JDBC is using to map the SQLite types. @@ -62,7 +63,7 @@ type SQLite_Type_Mapping Value_Type.Date -> SQLite_Types.blob Value_Type.Date_Time _ -> SQLite_Types.blob Value_Type.Binary _ _ -> SQLite_Types.blob - Value_Type.Mixed -> SQLite_Types.blob + Value_Type.Mixed -> SQLite_Types.text Value_Type.Unsupported_Data_Type type_name underlying_type -> underlying_type.if_nothing <| Error.throw <| @@ -107,31 +108,32 @@ type SQLite_Type_Mapping SQLite_Type_Mapping.sql_type_to_value_type internal_column.sql_type_reference.get enso_value -> Enso_Types.most_specific_value_type enso_value use_smallest=True - handle_preserve_input_type _ = + reconcile_types types = + result = Value_Type_Helpers.find_common_type types strict=False + # We remap Mixed to Char, to be consistent with our main mapping. + if result == Value_Type.Mixed then default_text else result + + ## We actually re-use the logic from the in-memory backend, since the + SQLite types are a small subset of that and the logic for SQLite + essentially forms a proper sub-algebra (in the universal algebra + terms). + find_a_common_type _ = inputs_types = arguments.map find_type - if inputs_types.is_empty then infer_default_type else - first_type = inputs_types.first - if inputs_types.all (== first_type) then return first_type else - infer_default_type + return (reconcile_types inputs_types) handle_iif _ = if arguments.length != 3 then Panic.throw (Illegal_State.Error "Impossible: IIF must have 3 arguments. This is a bug in the Database library.") inputs_types = arguments.drop 1 . map find_type - if inputs_types.first == inputs_types.second then return inputs_types.first else - case Value_Type_Helpers.reconcile_types inputs_types.first inputs_types.second of - ## Inference failed, fall back to default type. - Ideally, should never happen. To be handled in #6106. - Value_Type.Mixed -> infer_default_type - common -> return common + return (reconcile_types inputs_types) always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_NAN", "IS_EMPTY", "LIKE", "IS_IN", "starts_with", "ends_with", "contains"] always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED"] - preserve_input_type_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FIRST", "LAST", "FIRST_NOT_NULL", "LAST_NOT_NULL", "FILL_NULL"] + merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FIRST", "LAST", "FIRST_NOT_NULL", "LAST_NOT_NULL", "FILL_NULL"] others = [["IIF", handle_iif]] mapping = Map.from_vector <| v1 = always_boolean_ops.map [_, const (return Value_Type.Boolean)] - v2 = preserve_input_type_ops.map [_, handle_preserve_input_type] + v2 = merge_input_types_ops.map [_, find_a_common_type] v3 = always_text_ops.map [_, const (return default_text)] v1 + v2 + v3 + others handler = mapping.get op_name (_ -> infer_default_type) @@ -162,7 +164,7 @@ simple_types_map = Map.from_vector <| # We treat numeric as a float, since that is what really sits in SQLite under the hood. numerics = [Types.DECIMAL, Types.NUMERIC] . map x-> [x, Value_Type.Float Bits.Bits_64] strings = [Types.CHAR, Types.VARCHAR] . map x-> [x, default_text] - blobs = [Types.BINARY, Types.BLOB, Types.CLOB] . map x-> [x, Value_Type.Mixed] + blobs = [Types.BINARY, Types.BLOB, Types.CLOB] . map x-> [x, Value_Type.Binary] special_types = [[Types.BOOLEAN, Value_Type.Boolean]] ints + floats + numerics + strings + blobs + special_types diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso index 622f227042b7..ad956f73ee19 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Statement_Setter.enso @@ -35,4 +35,10 @@ fill_hole_default stmt i value = case value of _ : Decimal -> stmt.setDouble i value _ : Text -> stmt.setString i value _ : Date_Time -> JDBCUtils.setZonedDateTime stmt i value - _ -> stmt.setObject i value + ## Time_Of_Day and Date sometimes work ok, but sometimes are passed as + `org.graalvm.polyglot.Value` to the JDBC driver which is then unable to + infer the correct type for them. Instead, we use these helper functions + which ensure the type is locked to the Java class known to JDBC. + _ : Time_Of_Day -> JDBCUtils.setLocalTime stmt i value + _ : Date -> JDBCUtils.setLocalDate stmt i value + _ -> stmt.setObject i value diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index b7cc7439cde9..cbeaa6ca1c8d 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -8,16 +8,19 @@ import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Data.Index_Sub_Range as Index_Sub_Range_Module import project.Data.Data_Formatter.Data_Formatter +import project.Data.Type.Enso_Types import project.Data.Type.Storage +import project.Data.Type.Value_Type_Helpers import project.Data.Table.Table import project.Internal.Java_Problems import project.Internal.Naming_Helpers.Naming_Helpers import project.Internal.Parse_Values_Helper import project.Internal.Widget_Helpers +import project.Data.Type.Value_Type_Helpers from project.Data.Table import print_table from project.Data.Type.Value_Type import Value_Type, Auto -from project.Errors import No_Index_Set_Error, Floating_Point_Equality, Invalid_Value_Type +from project.Errors import No_Index_Set_Error, Floating_Point_Equality, Invalid_Value_Type, Inexact_Type_Coercion polyglot java import org.enso.table.data.column.operation.map.MapOperationProblemBuilder polyglot java import org.enso.table.data.column.storage.Storage as Java_Storage @@ -160,7 +163,7 @@ type Column problem_builder.reportFloatingPointEquality -1 a == b _ -> a == b - run_vectorized_binary_op_with_problem_handling self '==' fallback other new_name + run_vectorized_binary_op_with_problem_handling self '==' fallback other expected_result_type=Value_Type.Boolean new_name ## Element-wise case-insensitive text equality comparison. @@ -177,9 +180,13 @@ type Column cost of Java-to-Enso calls. We want to have a vectorized implementation, but we need to extend the architecture to allow passing the locale to it. - See: https://www.pivotaltracker.com/n/projects/2539304/stories/184093260 - new_name = Naming_Helpers.function_name "equals_ignore_case" [self, other] - run_vectorized_binary_op self "equals_ignore_case" (x-> y-> x.equals_ignore_case y locale=locale) other new_name + See: https://github.com/enso-org/enso/issues/5250 + fallback x y = + x.equals_ignore_case y locale=locale + Value_Type.expect_text self <| + Value_Type.expect_text other <| + new_name = Naming_Helpers.function_name "equals_ignore_case" [self, other] + run_vectorized_binary_op self "equals_ignore_case" fallback other expected_result_type=Value_Type.Boolean new_name ## Element-wise non-equality comparison. @@ -239,8 +246,8 @@ type Column example_geq = Examples.integer_column >= 1 >= : Column | Any -> Column - >= self other = - run_vectorized_binary_op self ">=" (>=) other + >= self other = Value_Type.expect_comparable self other <| + run_vectorized_binary_op self ">=" (>=) other expected_result_type=Value_Type.Boolean ## Element-wise order comparison. @@ -266,8 +273,8 @@ type Column example_leq = Examples.integer_column <= 1 <= : Column | Any -> Column - <= self other = - run_vectorized_binary_op self "<=" (<=) other + <= self other = Value_Type.expect_comparable self other <| + run_vectorized_binary_op self "<=" (<=) other expected_result_type=Value_Type.Boolean ## Element-wise order comparison. @@ -293,8 +300,8 @@ type Column example_gt = Examples.integer_column > 1 > : Column | Any -> Column - > self other = - run_vectorized_binary_op self ">" (>) other + > self other = Value_Type.expect_comparable self other <| + run_vectorized_binary_op self ">" (>) other expected_result_type=Value_Type.Boolean ## Element-wise order comparison. @@ -320,7 +327,8 @@ type Column example_lt = Examples.integer_column < 1 < : Column | Any -> Column - < self other = run_vectorized_binary_op self "<" (<) other + < self other = Value_Type.expect_comparable self other <| + run_vectorized_binary_op self "<" (<) other expected_result_type=Value_Type.Boolean ## Element-wise inclusive bounds check. @@ -336,9 +344,11 @@ type Column column fit between the lower and upper bounds (both ends inclusive). between : (Column | Any) -> (Column | Any) -> Column between self lower upper = - new_name = Naming_Helpers.to_expression_text self + " between " + Naming_Helpers.to_expression_text lower + " and " + Naming_Helpers.to_expression_text upper - result = (self >= lower) && (self <= upper) - result.rename new_name + Value_Type.expect_comparable self lower <| + Value_Type.expect_comparable self upper <| + new_name = Naming_Helpers.to_expression_text self + " between " + Naming_Helpers.to_expression_text lower + " and " + Naming_Helpers.to_expression_text upper + result = (self >= lower) && (self <= upper) + result.rename new_name ## ALIAS Add Columns @@ -366,7 +376,9 @@ type Column example_plus = Examples.integer_column + 10 + : Column | Any -> Column - + self other = run_vectorized_binary_op self '+' (+) other + + self other = + Value_Type_Helpers.resolve_addition_kind self other . if_not_error <| + run_vectorized_binary_op self '+' fallback_fn=Nothing other ## ALIAS Subtract Columns @@ -394,7 +406,9 @@ type Column example_minus = Examples.integer_column - 10 - : Column | Any -> Column - - self other = run_vectorized_binary_op self '-' (-) other + - self other = + Value_Type_Helpers.check_binary_numeric_op self other <| + run_vectorized_binary_op self '-' fallback_fn=Nothing other ## ALIAS Multiply Columns @@ -422,7 +436,9 @@ type Column example_mul = Examples.integer_column * 10 * : Column | Any -> Column - * self other = run_vectorized_binary_op self '*' (*) other + * self other = + Value_Type_Helpers.check_binary_numeric_op self other <| + run_vectorized_binary_op self '*' fallback_fn=Nothing other ## ALIAS Divide Columns @@ -456,13 +472,9 @@ type Column example_div = Examples.integer_column / 10 / : Column | Any -> Column / self other = - new_name = Naming_Helpers.binary_operation_name "/" self other - fallback problem_builder a b = - if b != 0 then a / b else - # TODO indices for fallback - problem_builder.reportDivisionByZero Nothing - Nothing - run_vectorized_binary_op_with_problem_handling self "/" fallback other new_name + Value_Type_Helpers.check_binary_numeric_op self other <| + new_name = Naming_Helpers.binary_operation_name "/" self other + run_vectorized_binary_op_with_problem_handling self "/" fallback_fn=Nothing other new_name ## Element-wise modulus. @@ -494,12 +506,9 @@ type Column example_mod = Examples.integer_column % 3 % : Column | Any -> Column % self other = - new_name = Naming_Helpers.binary_operation_name "%" self other - fallback problem_builder a b = - if b != 0 then a % b else - # TODO indices for fallback - problem_builder.reportDivisionByZero Nothing - run_vectorized_binary_op_with_problem_handling self "%" fallback other new_name + Value_Type_Helpers.check_binary_numeric_op self other <| + new_name = Naming_Helpers.binary_operation_name "%" self other + run_vectorized_binary_op_with_problem_handling self "%" fallback_fn=Nothing other new_name ## ALIAS Power Columns @@ -527,7 +536,9 @@ type Column example_div = Examples.decimal_column ^ Examples.integer_column ^ : Column | Any -> Column - ^ self other = run_vectorized_binary_op self '^' (^) other + ^ self other = + Value_Type_Helpers.check_binary_numeric_op self other <| + run_vectorized_binary_op self '^' fallback_fn=Nothing other ## ALIAS AND Columns @@ -557,7 +568,8 @@ type Column example_and = Examples.bool_column_1 && True && : Column | Any -> Column && self other = - run_vectorized_binary_op self "&&" (&&) other + Value_Type_Helpers.check_binary_boolean_op self other <| + run_vectorized_binary_op self "&&" fallback_fn=Nothing other ## ALIAS OR Columns @@ -587,7 +599,8 @@ type Column example_or = Examples.bool_column_1 || True || : Column | Any -> Column || self other = - run_vectorized_binary_op self "||" (||) other + Value_Type_Helpers.check_binary_boolean_op self other <| + run_vectorized_binary_op self "||" fallback_fn=Nothing other ## ALIAS NOT @@ -601,8 +614,9 @@ type Column example_not = Examples.bool_column_1.not not : Column not self = - new_name = "not " + Naming_Helpers.to_expression_text self - run_vectorized_unary_op self "not" .not new_name + Value_Type.expect_boolean self <| + new_name = "not " + Naming_Helpers.to_expression_text self + run_vectorized_unary_op self "not" fallback_fn=Nothing new_name ## ALIAS IF @@ -620,22 +634,24 @@ type Column example_if = Examples.bool_column_1.iif 1 0 iif : Any -> Any -> Column - iif self when_true when_false = case self.value_type of - Value_Type.Boolean -> - new_name = "if " + Naming_Helpers.to_expression_text self + " then " + Naming_Helpers.to_expression_text when_true + " else " + Naming_Helpers.to_expression_text when_false - s = self.java_column.getStorage + iif self when_true when_false = + Value_Type.expect_boolean self <| + common_type = Value_Type_Helpers.find_common_type_for_arguments [when_true, when_false] . if_nothing Value_Type.Mixed + storage_type = Storage.from_value_type_strict common_type + storage_type.if_not_error <| + new_name = "if " + Naming_Helpers.to_expression_text self + " then " + Naming_Helpers.to_expression_text when_true + " else " + Naming_Helpers.to_expression_text when_false + s = self.java_column.getStorage - true_val = case when_true of - _ : Column -> when_true.java_column.getStorage - _ -> when_true + true_val = case when_true of + _ : Column -> when_true.java_column.getStorage + _ -> when_true - false_val = case when_false of - _ : Column -> when_false.java_column.getStorage - _ -> when_false + false_val = case when_false of + _ : Column -> when_false.java_column.getStorage + _ -> when_false - rs = s.iif true_val false_val - Column.Value (Java_Column.new new_name rs) - _ -> Error.throw (Illegal_Argument.Error "`iif` can only be used with boolean columns.") + rs = s.iif true_val false_val storage_type + Column.Value (Java_Column.new new_name rs) ## Returns a column of first non-`Nothing` value on each row of `self` and `values` list. @@ -654,10 +670,11 @@ type Column fallback a b = a.if_nothing b run_vectorized_many_op self "coalesce" fallback values - ## Returns a column of minimum on each row of `self` and `values` list. + ## Returns a column of minimum on each row of `self` and `values`. Arguments: - - values: list of columns or values to minimum with `self`. + - values: list of columns or values to minimum with `self`, or a single + column or value. > Example Get the minimum value in two columns. @@ -670,12 +687,14 @@ type Column fallback a b = if a.is_nothing then b else if b.is_nothing then a else if b < a then b else a - run_vectorized_many_op self "min" fallback values + Value_Type_Helpers.check_multi_argument_comparable_op self values <| + run_vectorized_many_op self "min" fallback values - ## Returns a column of maximum on each row of `self` and `values` list. + ## Returns a column of maximum on each row of `self` and `values`. Arguments: - - values: list of columns or values to maximum with `self`. + - values: list of columns or values to maximum with `self`, or a single + column or value. > Example Get the maximum value in two columns. @@ -688,7 +707,8 @@ type Column fallback a b = if a.is_nothing then b else if b.is_nothing then a else if b > a then b else a - run_vectorized_many_op self "max" fallback values + Value_Type_Helpers.check_multi_argument_comparable_op self values <| + run_vectorized_many_op self "max" fallback values ## Returns a column of booleans, with `True` items at the positions where this column contains a `Nothing`. @@ -698,30 +718,41 @@ type Column import Standard.Examples - example_is_missing = Examples.decimal_column.is_nothing + example_is_nothing = Examples.decimal_column.is_nothing is_nothing : Column is_nothing self = new_name = Naming_Helpers.to_expression_text self + " is null" - run_vectorized_unary_op self "is_nothing" (== Nothing) new_name on_missing=True + run_vectorized_unary_op self "is_nothing" (== Nothing) expected_result_type=Value_Type.Boolean new_name on_missing=True ## UNSTABLE Returns a column of booleans, with `True` items at the positions where this column contains a NaN. This is only applicable to double columns. is_nan : Column is_nan self = + Value_Type.expect_floating_point self <| + self.internal_is_nan on_missing=Nothing + + ## PRIVATE + internal_is_nan : Column + internal_is_nan self on_missing = new_name = Naming_Helpers.function_name "is_nan" [self] - is_object_nan x = case x of + fallback x = case x of _ : Decimal -> x.is_nan - _ -> False - run_vectorized_unary_op self "is_nan" is_object_nan new_name on_missing=False + _ -> False + run_vectorized_unary_op self "is_nan" fallback expected_result_type=Value_Type.Boolean new_name on_missing=on_missing ## PRIVATE Returns a column of booleans, with `True` items at the positions where this column contains an empty string or `Nothing`. is_empty : Column - is_empty self = + is_empty self = Value_Type.expect_text self <| + self.internal_is_empty + + ## PRIVATE + internal_is_empty : Column + internal_is_empty self = new_name = Naming_Helpers.to_expression_text self + " is empty" - run_vectorized_unary_op self "is_empty" Filter_Condition.Is_Empty.to_predicate new_name on_missing=True + run_vectorized_unary_op self "is_empty" Filter_Condition.Is_Empty.to_predicate expected_result_type=Value_Type.Boolean new_name on_missing=True ## Returns a column of booleans, with `True` items at the positions where this column does not contain a `Nothing`. @@ -752,8 +783,10 @@ type Column new_name = Naming_Helpers.function_name "is_blank" [self] result = case self.value_type of Value_Type.Char _ _ -> self.is_empty - Value_Type.Float _ -> if treat_nans_as_blank then self.is_nothing || self.is_nan else self.is_nothing - Value_Type.Mixed -> if treat_nans_as_blank then self.is_empty || self.is_nan else self.is_empty + Value_Type.Float _ -> + if treat_nans_as_blank then self.is_nothing || (self.internal_is_nan on_missing=False) else self.is_nothing + Value_Type.Mixed -> + self.internal_is_empty || (if treat_nans_as_blank then self.is_nothing || (self.internal_is_nan on_missing=False) else self.is_nothing) _ -> self.is_nothing result.rename new_name @@ -775,16 +808,19 @@ type Column example_fill_missing = Examples.decimal_column.fill_nothing 20.5 fill_nothing : Column | Any -> Column fill_nothing self default = - new_name = Naming_Helpers.function_name "fill_nothing" [self, default] - storage = self.java_column.getStorage - new_st = case default of - Column.Value java_col -> - other_storage = java_col.getStorage - storage.fillMissingFrom other_storage - _ -> - storage.fillMissing default - col = Java_Column.new new_name new_st - Column.Value col + common_type = Value_Type_Helpers.find_common_type_for_arguments [self, default] + common_type.if_not_error <| + new_name = Naming_Helpers.function_name "fill_nothing" [self, default] + storage = self.java_column.getStorage + storage_type = Storage.from_value_type_strict common_type + new_st = case default of + Column.Value java_col -> + other_storage = java_col.getStorage + storage.fillMissingFrom other_storage storage_type + _ -> + storage.fillMissing default + col = Java_Column.new new_name new_st + Column.Value col ## ALIAS Fill Empty @@ -797,9 +833,11 @@ type Column will be used. fill_empty : Column | Any -> Column fill_empty self default = - new_name = Naming_Helpers.function_name "fill_empty" [self, default] - result = self.is_empty.iif default self - result.rename new_name + Value_Type.expect_text self <| + Value_Type.expect_text default <| + new_name = Naming_Helpers.function_name "fill_empty" [self, default] + result = self.is_empty.iif default self + result.rename new_name ## Checks for each element of the column if it starts with `other`. @@ -912,7 +950,9 @@ type Column example_contains = Examples.text_column_1.like "F%." like : Column | Text -> Column like self pattern = - run_vectorized_binary_op self "like" (_ -> _ -> Error.throw (Illegal_State.Error "The `Like` operation should only be used on Text columns.")) pattern + Value_Type.expect_text self <| + Value_Type.expect_text pattern <| + run_vectorized_binary_op self "like" (_ -> _ -> Error.throw (Illegal_State.Error "The `like` operation should only be used on text columns.")) pattern expected_result_type=Value_Type.Boolean ## This function removes the specified characters, by default any whitespace, from the start, the end, or both ends of the input. @@ -923,10 +963,10 @@ type Column - what: A `Text` (or text `Column`) containing characters that should be removed. By default, all whitespace is removed. trim : Location -> Column | Text -> Column - trim self where=Location.Both what='' = Value_Type.expect_text self.value_type <| + trim self where=Location.Both what='' = Value_Type.expect_text self <| new_name = Naming_Helpers.function_name "trim" [self] - trim_get = wrap_text_argument what 'what' + trim_get = wrap_text_argument_as_value_provider what trim_get.if_not_error <| trim_fn t w = if w.is_empty then t.trim where else @@ -966,9 +1006,9 @@ type Column column.replace '"(.*?)"' '($1)' use_regex=True replace : Text | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Boolean -> Column replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False use_regex=False = - Value_Type.expect_text self.value_type <| - term_fn = wrap_text_argument term 'term' - new_text_fn = wrap_text_argument new_text 'new_text' + Value_Type.expect_text self <| + term_fn = wrap_text_argument_as_value_provider term + new_text_fn = wrap_text_argument_as_value_provider new_text term_fn.if_not_error <| new_text_fn.if_not_error <| new_name = Naming_Helpers.function_name "replace" [self, term, new_text] @@ -988,7 +1028,7 @@ type Column Applies only to columns that hold the `Date` or `Date_Time` types. Returns a column of `Integer` type. year : Column ! Invalid_Value_Type - year self = Value_Type.expect_has_date self.value_type related_column=self.name <| + year self = Value_Type.expect_has_date self <| simple_unary_op self "year" ## Gets the month as a number (1-12) from the date stored in the column. @@ -996,7 +1036,7 @@ type Column Applies only to columns that hold the `Date` or `Date_Time` types. Returns a column of `Integer` type. month : Column ! Invalid_Value_Type - month self = Value_Type.expect_has_date self.value_type related_column=self.name <| + month self = Value_Type.expect_has_date self <| simple_unary_op self "month" ## Gets the day of the month as a number (1-31) from the date stored in the @@ -1005,7 +1045,7 @@ type Column Applies only to columns that hold the `Date` or `Date_Time` types. Returns a column of `Integer` type. day : Column ! Invalid_Value_Type - day self = Value_Type.expect_has_date self.value_type related_column=self.name <| + day self = Value_Type.expect_has_date self <| simple_unary_op self "day" ## Checks for each element of the column if it is contained within the @@ -1034,7 +1074,7 @@ type Column _ : Vector -> vector _ : Array -> Vector.from_polyglot_array vector column : Column -> column.to_vector - run_vectorized_binary_op self op_name fallback_fn true_vector skip_nulls=False new_name=result_name + run_vectorized_binary_op self op_name fallback_fn true_vector expected_result_type=Value_Type.Boolean skip_nulls=False new_name=result_name False -> ## We have custom code for the non-vectorized case, because usually a vectorized binary op will apply the fallback @@ -1118,7 +1158,7 @@ type Column @type Widget_Helpers.parse_type_selector parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column parse self type=Auto format=Data_Formatter.Value on_problems=Report_Warning = - Value_Type.expect_text self.value_type related_column=self.name <| + Value_Type.expect_text self <| formatter = case format of _ : Text -> Data_Formatter.Value.with_format type format @@ -1166,7 +1206,7 @@ type Column - that: The column to zip with `self`. - function: A binary function that is applied to corresponding pairs of elements of `self` and `that` to produce a value. - - skip_missing: controls whether missing values should be passed to the + - skip_nothing: controls whether missing values should be passed to the `function`. The default value of `True` will skip the rows for which the value in either column is missing and automatically append `Nothing` to the result table. If set to `False`, every pair of values @@ -1180,12 +1220,17 @@ type Column example_zip = Examples.integer_column.zip Examples.text_column_1 [_, _] zip : Column -> (Any -> Any -> Any) -> Boolean -> Column - zip self that function skip_missing=True = - s1 = self.java_column.getStorage - s2 = that.java_column.getStorage - rs = s1.zip Nothing function s2 skip_missing Nothing + zip self that function skip_nothing=True = + new_fn = case skip_nothing of + True -> + x -> y -> + if x.is_nothing then Nothing else + if y.is_nothing then Nothing else + function x y + False -> function new_name = Naming_Helpers.binary_operation_name "x" self that - Column.Value (Java_Column.new new_name rs) + vec = self.to_vector.zip that.to_vector new_fn + Column.from_vector new_name vec ## Returns a new column, containing the same elements as `self`, but with the given name. @@ -1232,9 +1277,9 @@ type Column import Standard.Examples - example_count_missing = Examples.text_column_2.count_missing - count_missing : Integer - count_missing self = self.java_column.getStorage.countMissing + example_count_nothing = Examples.text_column_2.count_nothing + count_nothing : Integer + count_nothing self = self.java_column.getStorage.countMissing ## Returns the number of non-null items in this column. @@ -1245,7 +1290,7 @@ type Column example_count = Examples.text_column_2.count count : Integer - count self = self.length - self.count_missing + count self = self.length - self.count_nothing ## Returns the value contained in this column at the given index. @@ -1494,29 +1539,29 @@ type Column - name: The name of the vectorized operation. - fallback_fn: A function used if the vectorized operation isn't available. - operands: The vector of operands to apply to the function after `column`. + - new_name: The name of the column created as the result of this operation. - skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null value results in null without passing it to the function. If set to `False`, the null values are passed as any other value and can have custom handling logic. - - new_name: The name of the column created as the result of this operation. run_vectorized_many_op : Column -> Text -> (Any -> Any -> Any) -> Vector -> Text|Nothing -> Boolean -> Column run_vectorized_many_op column name fallback_fn operands new_name=Nothing skip_nulls=False = - effective_operands = case operands of - _ : Vector -> operands - _ : Array -> Vector.from_polyglot_array operands - _ -> [operands] + effective_operands = Vector.unify_vector_or_element operands effective_new_name = new_name.if_nothing <| Naming_Helpers.function_name name [column]+effective_operands - problem_builder = MapOperationProblemBuilder.new effective_new_name - folded = effective_operands.fold column.java_column.getStorage current-> operand-> - case operand of - _ : Column -> - current.zip name fallback_fn operand.java_column.getStorage skip_nulls problem_builder - _ -> - current.bimap name fallback_fn operand skip_nulls problem_builder - result = Column.Value (Java_Column.new effective_new_name folded) - Problem_Behavior.Report_Warning.attach_problems_after result <| - Java_Problems.parse_aggregated_problems problem_builder.getProblems + common_type = Value_Type_Helpers.find_common_type_for_arguments effective_operands + common_type.if_not_error <| + problem_builder = MapOperationProblemBuilder.new effective_new_name + storage_type = resolve_storage_type common_type + folded = effective_operands.fold column.java_column.getStorage current-> operand-> + case operand of + _ : Column -> + current.zip name fallback_fn operand.java_column.getStorage skip_nulls storage_type problem_builder + _ -> + current.bimap name fallback_fn operand skip_nulls storage_type problem_builder + result = Column.Value (Java_Column.new effective_new_name folded) + Problem_Behavior.Report_Warning.attach_problems_after result <| + Java_Problems.parse_aggregated_problems problem_builder.getProblems ## PRIVATE @@ -1527,25 +1572,27 @@ run_vectorized_many_op column name fallback_fn operands new_name=Nothing skip_nu - name: The name of the vectorized operation. - fallback_fn: A function used if the vectorized operation isn't available. - operand: The operand to apply to the function after `column`. + - expected_result_type: The expected result type of the operation. - skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null value results in null without passing it to the function. If set to `False`, the null values are passed as any other value and can have custom handling logic. - new_name: The name of the column created as the result of this operation. -run_vectorized_binary_op : Column -> Text -> (Any -> Any -> Any) -> Any -> Text|Nothing -> Boolean -> Column -run_vectorized_binary_op column name fallback_fn operand new_name=Nothing skip_nulls=True = +run_vectorized_binary_op : Column -> Text -> (Any -> Any -> Any) -> Any -> Value_Type -> Text|Nothing -> Boolean -> Column +run_vectorized_binary_op column name fallback_fn operand expected_result_type=Nothing new_name=Nothing skip_nulls=True = effective_new_name = new_name.if_nothing <| Naming_Helpers.binary_operation_name name column operand problem_builder = MapOperationProblemBuilder.new column.name + storage_type = resolve_storage_type expected_result_type result = case operand of Column.Value col2 -> s1 = column.java_column.getStorage s2 = col2.getStorage - rs = s1.zip name fallback_fn s2 skip_nulls problem_builder + rs = s1.zip name fallback_fn s2 skip_nulls storage_type problem_builder Column.Value (Java_Column.new effective_new_name rs) _ -> s1 = column.java_column.getStorage - rs = s1.bimap name fallback_fn operand skip_nulls problem_builder + rs = s1.bimap name fallback_fn operand skip_nulls storage_type problem_builder Column.Value (Java_Column.new effective_new_name rs) Problem_Behavior.Report_Warning.attach_problems_after result <| Java_Problems.parse_aggregated_problems problem_builder.getProblems @@ -1560,23 +1607,27 @@ run_vectorized_binary_op column name fallback_fn operand new_name=Nothing skip_n - fallback_fn: A function used if the vectorized operation isn't available. It takes a `MapOperationProblemBuilder` as its first argument. - operand: The operand to apply to the function after `column`. + - expected_result_type: The expected result type of the operation. - new_name: The name of the column created as the result of this operation. - skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null value results in null without passing it to the function. If set to `False`, the null values are passed as any other value and can have custom handling logic. -run_vectorized_binary_op_with_problem_handling column name fallback_fn operand new_name skip_nulls=True = +run_vectorized_binary_op_with_problem_handling column name fallback_fn operand new_name expected_result_type=Nothing skip_nulls=True = problem_builder = MapOperationProblemBuilder.new new_name - applied_fn = fallback_fn problem_builder + applied_fn = case fallback_fn of + Nothing -> Nothing + _ -> fallback_fn problem_builder + storage_type = resolve_storage_type expected_result_type result = case operand of Column.Value col2 -> s1 = column.java_column.getStorage s2 = col2.getStorage - rs = s1.zip name applied_fn s2 skip_nulls problem_builder + rs = s1.zip name applied_fn s2 skip_nulls storage_type problem_builder Column.Value (Java_Column.new new_name rs) _ -> s1 = column.java_column.getStorage - rs = s1.bimap name applied_fn operand skip_nulls problem_builder + rs = s1.bimap name applied_fn operand skip_nulls storage_type problem_builder Column.Value (Java_Column.new new_name rs) Problem_Behavior.Report_Warning.attach_problems_after result <| Java_Problems.parse_aggregated_problems problem_builder.getProblems @@ -1589,15 +1640,17 @@ run_vectorized_binary_op_with_problem_handling column name fallback_fn operand n - column: The column to execute the operation over. - name: The name of the vectorized operation. - fallback_fn: A function used if the vectorized operation isn't available. + - expected_result_type: The expected result type of the operation. - new_name: The name of the column created as the result of this operation. - on_missing: The value to return for missing cells. Ideally it should be replaced with a `skip_nulls` parameter like elsewhere, but currently that is not possible due to a bug: https://github.com/oracle/graal/issues/4741 -run_vectorized_unary_op : Column -> Text -> (Any -> Any) -> Text -> Any -> Column -run_vectorized_unary_op column name fallback_fn new_name on_missing=Nothing = +run_vectorized_unary_op : Column -> Text -> (Any -> Any) -> Value_Type -> Text -> Any -> Column +run_vectorized_unary_op column name fallback_fn new_name expected_result_type=Nothing on_missing=Nothing = problem_builder = MapOperationProblemBuilder.new column.name s = column.java_column.getStorage - rs = s.map name fallback_fn on_missing problem_builder + storage_type = resolve_storage_type expected_result_type + rs = s.map name fallback_fn on_missing storage_type problem_builder result = Column.Value (Java_Column.new new_name rs) Problem_Behavior.Report_Warning.attach_problems_after result <| Java_Problems.parse_aggregated_problems problem_builder.getProblems @@ -1650,16 +1703,21 @@ make_storage_builder_for_type value_type on_problems initial_size=128 = Helper for case case_sensitivity based text operations run_vectorized_binary_case_text_op : Column -> Text -> (Text | Column) -> Case_Sensitivity -> (Text -> Text -> Any) -> Text -> Column run_vectorized_binary_case_text_op left op other case_sensitivity fallback new_name = - Value_Type.expect_text left.value_type <| case case_sensitivity of - Case_Sensitivity.Default -> run_vectorized_binary_op left op fallback other new_name - Case_Sensitivity.Sensitive -> run_vectorized_binary_op left op fallback other new_name + with_checks ~action = + Value_Type.expect_text left <| + Value_Type.expect_text other <| + action + result_type = Value_Type.Boolean + with_checks <| case case_sensitivity of + Case_Sensitivity.Default -> run_vectorized_binary_op left op fallback other result_type new_name + Case_Sensitivity.Sensitive -> run_vectorized_binary_op left op fallback other result_type new_name Case_Sensitivity.Insensitive _ -> ## TODO currently this always runs the fallback which is slow due to the cost of Java-to-Enso calls. We want to have a vectorized implementation, but we need to extend the architecture to allow passing the locale to it. See: https://www.pivotaltracker.com/n/projects/2539304/stories/184093260 - run_vectorized_binary_op left Nothing fallback other new_name + run_vectorized_binary_op left Nothing fallback other result_type new_name ## PRIVATE simple_unary_op column op_name = @@ -1667,9 +1725,18 @@ simple_unary_op column op_name = run_vectorized_unary_op column op_name (_ -> Error.throw (Illegal_State.Error "Missing vectorized implementation for `"+op_name+"`. This is a bug in the Table library.")) new_name ## PRIVATE -wrap_text_argument val field = case val of - _ : Text -> (_-> val) - _ : Column -> Value_Type.expect_text val.value_type <| - storage = val.java_column.getStorage - i-> storage.getItemBoxed i - _ -> Error.throw (Illegal_Argument.Error "The `"+field+"` must be Text or a Text Column.") + Converts the value type to a storage type, passing `Nothing` through. + + Sometimes we do not specify the storage type as it should be inferred by the + vectorized operation, in which case it can be `Nothing`. +resolve_storage_type value_type = case value_type of + Nothing -> Nothing + _ -> Storage.from_value_type_strict value_type + +## PRIVATE +wrap_text_argument_as_value_provider val = + Value_Type.expect_text val <| case val of + text : Text -> (_-> text) + col : Column -> + storage = col.java_column.getStorage + i-> storage.getItemBoxed i diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index bb993556ce2b..22fccdc95aa1 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -75,10 +75,15 @@ type Table Table.new [first_column, second_column] new : Vector (Vector | Column) -> Table new columns = + invalid_input_shape = + Error.throw (Illegal_Argument.Error "Each column must be represented by a pair whose first element is the column name and the second element is a vector of elements that will constitute that column, or an existing column.") cols = columns.map c-> case c of - _ : Vector -> Column.from_vector (c.at 0) (c.at 1) . java_column + v : Vector -> + if v.length != 2 then invalid_input_shape else + Column.from_vector (v.at 0) (v.at 1) . java_column Column.Value java_col -> java_col + _ -> invalid_input_shape if cols.is_empty then Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.") else if (cols.all c-> c.getSize == cols.first.getSize).not then Error.throw (Illegal_Argument.Error "All columns must have the same row count.") else if cols.distinct .getName . length != cols.length then Error.throw (Illegal_Argument.Error "Column names must be distinct.") else @@ -856,7 +861,7 @@ type Table parse_problem_builder = Problem_Builder.new new_columns = self.columns.map column-> if selected_column_names.contains_key column.name . not then column else - Value_Type.expect_text column.value_type related_column=column.name <| + Value_Type.expect_text column <| storage = column.java_column.getStorage new_storage_and_problems = parser.parseColumn column.name storage new_storage = new_storage_and_problems.value @@ -867,6 +872,62 @@ type Table parse_problem_builder.attach_problems_before on_problems <| Table.new new_columns + ## PRIVATE + UNSTABLE + Cast the selected columns to a specific type. + + Returns a new table in which the selected columns are replaced with + columns having the new types. + + Arguments: + - columns: The selection of columns to cast. + - value_type: The `Value_Type` to cast the column to. + - on_problems: Specifies how to handle problems if they occur, reporting + them as warnings by default. + + TODO [RW] this is a prototype needed for debugging, proper implementation + and testing will come with #6112. + + In the Database backend, this will boil down to a CAST operation. + In the in-memory backend, a conversion will be performed according to + the following rules: + - Anything can be cast into the `Mixed` type. + - Converting to a `Char` type, the elements of the column will be + converted to text. If it is fixed length, the texts will be trimmed or + padded on the right with the space character to match the desired + length. + - Conversion between numeric types will replace values exceeding the + range of the target type with `Nothing`. + - Booleans may also be converted to numbers, with `True` being converted + to `1` and `False` to `0`. The reverse is not supported - use `iif` + instead. + - A `Date_Time` may be converted into a `Date` or `Time` type - the + resulting value will be truncated to the desired type. + - If a `Date` is to be converted to `Date_Time`, it will be set at + midnight of the default system timezone. + + ? Conversion Precision + + In the in-memory backend, if the conversion is lossy, a + `Lossy_Conversion` warning will be reported. The only exception is when + truncating a column which is already a text column - as then the + truncation seems like an intended behaviour, so it is not reported. If + truncating needs to occur when converting a non-text column, a warning + will still be reported. + + Currently, the warning is not reported for Database backends. + + ? Inexact Target Type + + If the backend does not support the requested target type, the closest + supported type is chosen and a `Inexact_Type_Coercion` problem is + reported. + cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Lossy_Conversion + cast self columns=[0] value_type=Value_Type.Char on_problems=Problem_Behavior.Report_Warning = + _ = [columns, value_type, on_problems] + ## TODO [RW] actual implementation in #6112 + self + ## Splits a column of text into a set of new columns. The original column will be removed from the table. The new columns will be named with the name of the input column with a @@ -1127,7 +1188,9 @@ type Table resolved = case column of _ : Text -> self.compute column on_problems _ : Column -> column - renamed = if new_name.is_nothing then resolved else resolved.rename new_name + renamed = case new_name of + Nothing -> resolved + _ : Text -> resolved.rename new_name to_add = case set_mode of Set_Mode.Add_Or_Update -> True Set_Mode.Add -> if self.java_table.getColumnByName renamed.name . is_nothing then True else @@ -1291,29 +1354,29 @@ type Table join : Table -> Join_Kind -> Join_Condition | Text | Vector (Join_Condition | Text) -> Text -> Problem_Behavior -> Table join self right join_kind=Join_Kind.Inner on=[Join_Condition.Equals 0 0] right_prefix="Right_" on_problems=Report_Warning = if check_table "right" right then - # [left_unmatched, matched, right_unmatched] - rows_to_keep = case join_kind of - Join_Kind.Inner -> [False, True, False] - Join_Kind.Left_Outer -> [True, True, False] - Join_Kind.Right_Outer -> [False, True, True] - Join_Kind.Full -> [True, True, True] - Join_Kind.Left_Exclusive -> [True, False, False] - Join_Kind.Right_Exclusive -> [False, False, True] - - columns_to_keep = case join_kind of - Join_Kind.Left_Exclusive -> [True, False] - Join_Kind.Right_Exclusive -> [False, True] - _ -> [True, True] - - join_resolution = make_join_helpers self right . resolve on on_problems - right_columns_to_drop = join_resolution.redundant_column_names - - java_conditions = join_resolution.conditions - new_java_table = self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix - - on_problems.attach_problems_after (Table.Value new_java_table) <| - problems = new_java_table.getProblems - Java_Problems.parse_aggregated_problems problems + # [left_unmatched, matched, right_unmatched] + rows_to_keep = case join_kind of + Join_Kind.Inner -> [False, True, False] + Join_Kind.Left_Outer -> [True, True, False] + Join_Kind.Right_Outer -> [False, True, True] + Join_Kind.Full -> [True, True, True] + Join_Kind.Left_Exclusive -> [True, False, False] + Join_Kind.Right_Exclusive -> [False, False, True] + + columns_to_keep = case join_kind of + Join_Kind.Left_Exclusive -> [True, False] + Join_Kind.Right_Exclusive -> [False, True] + _ -> [True, True] + + join_resolution = make_join_helpers self right . resolve on on_problems + right_columns_to_drop = join_resolution.redundant_column_names + + java_conditions = join_resolution.conditions + new_java_table = self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix + + on_problems.attach_problems_after (Table.Value new_java_table) <| + problems = new_java_table.getProblems + Java_Problems.parse_aggregated_problems problems ## ALIAS Cartesian Join Joins tables by pairing every row of the left table with every row of the diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Storage.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Storage.enso index fdb07baf97d9..ba6aacb30dec 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Storage.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Storage.enso @@ -52,6 +52,8 @@ closest_storage_type value_type = case value_type of Value_Type.Mixed -> AnyObjectType.INSTANCE ## PRIVATE + Converts a value type to an in-memory storage type, possibly approximating it + to the closest supported type. from_value_type : Value_Type -> Problem_Behavior -> StorageType from_value_type value_type on_problems = approximate_storage = closest_storage_type value_type @@ -60,6 +62,16 @@ from_value_type value_type on_problems = [Inexact_Type_Coercion.Warning value_type approximated_value_type] on_problems.attach_problems_before problems approximate_storage +## PRIVATE + A strict variant of `from_value_type` that expects that we can represent the + provided value type exactly. +from_value_type_strict : Value_Type -> StorageType ! Illegal_State +from_value_type_strict value_type = + value_type.if_not_error <| + result = from_value_type value_type Problem_Behavior.Report_Error + result.catch Inexact_Type_Coercion cause-> + Panic.throw (Illegal_State.Error "No exact representation for the provided value has been found: "+cause.to_display_text+" This is a bug in the Table library.") + ## PRIVATE Creates a column storage builder for the given storage type. make_builder : StorageType -> Integer -> Builder diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso index 8dce875f3574..a172ddcaf796 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso @@ -1,6 +1,7 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument +import project.Data.Type.Value_Type_Helpers from project.Errors import Invalid_Value_Type ## Type to represent the different sizes of integer or float storage. @@ -186,31 +187,93 @@ type Value_Type Value_Type.Date_Time _ -> True _ -> False + ## PRIVATE + Specifies if values of the given type can be compared for ordering. + has_ordering : Boolean + has_ordering self = case self of + Value_Type.Integer _ -> True + Value_Type.Float _ -> True + Value_Type.Decimal _ _ -> True + Value_Type.Byte -> True + Value_Type.Char _ _ -> True + Value_Type.Boolean -> True + Value_Type.Date -> True + Value_Type.Date_Time _ -> True + Value_Type.Time -> True + ## Not all mixed types are ordered, but some can, so we allow ordering + for mixed columns. + Value_Type.Mixed -> True + _ -> False + + ## PRIVATE + Specifies if values of the given type can be compared for ordering. + are_comparable : Value_Type -> Value_Type -> Boolean + are_comparable type_1 type_2 = + find_bucket typ = typ.if_not_error <| + buckets = [["Integer", "Byte", "Float", "Decimal"], ["Char"], ["Date"], ["Date_Time"], ["Time"], ["Boolean"], ["Mixed"]] + ctor_name = Meta.meta typ . constructor . name + buckets.index_of bucket-> + bucket.contains ctor_name + + bucket_1 = find_bucket type_1 + bucket_2 = find_bucket type_2 + if bucket_1.is_nothing || bucket_2.is_nothing then False else + if (type_1 == Value_Type.Mixed) || (type_2 == Value_Type.Mixed) then True else + bucket_1 == bucket_2 + + + ## PRIVATE + Checks that both arguments can be compared for ordering and runs the + following action or reports an error. + expect_comparable : Any -> Any -> Any -> Any ! Invalid_Value_Type + expect_comparable arg1 arg2 ~action = + type_1 = Value_Type_Helpers.find_argument_type arg1 + type_2 = Value_Type_Helpers.find_argument_type arg2 + if type_1.has_ordering.not then Error.throw (Invalid_Value_Type.Not_Ordered type_1) else + if type_2.is_nothing.not && type_2.has_ordering.not then Error.throw (Invalid_Value_Type.Not_Ordered type_2) else + if type_2.is_nothing.not && (Value_Type.are_comparable type_1 type_2 . not) then Error.throw (Invalid_Value_Type.Incomparable type_1 type_2) else + action + ## PRIVATE ADVANCED - Checks if the provided value type is a textual type (with any settings) - and runs the following action or reports a type error. - expect_text : Value_Type -> Any -> Text -> Any ! Invalid_Value_Type - expect_text value_type ~action related_column=Nothing = - if Value_Type.is_text value_type then action else - Error.throw (Invalid_Value_Type.Error Value_Type.Char value_type related_column) + Checks if the provided argument (which may be a value or a Column) is of + a text type and runs the following action or reports a type error. + expect_text : Any -> Any -> Any ! Invalid_Value_Type + expect_text argument ~action = + expect_type argument .is_text "Char" action ## PRIVATE ADVANCED - Checks if the provided value type is a boolean type and runs the - following action or reports a type error. - expect_boolean : Value_Type -> Any -> Any ! Invalid_Value_Type - expect_boolean value_type ~action = case value_type of - Value_Type.Boolean -> action - _ -> Error.throw (Invalid_Value_Type.Error Value_Type.Boolean value_type) + Checks if the provided argument (which may be a value or a Column) is of + a text type and runs the following action or reports a type error. + expect_boolean : Any -> Any -> Any ! Invalid_Value_Type + expect_boolean argument ~action = + expect_type argument .is_boolean Value_Type.Boolean action ## PRIVATE ADVANCED - Checks if the provided value type is a `Date` or `Date_Time`. - expect_has_date : Value_Type -> Any -> Text -> Any ! Invalid_Value_Type - expect_has_date value_type ~action related_column=Nothing = case value_type.has_date of - True -> action - False -> Error.throw (Invalid_Value_Type.Error "Date or Date_Time" value_type related_column) + Checks if the provided argument (which may be a value or a Column) is of + a numeric type and runs the following action or reports a type error. + expect_numeric : Any -> Any -> Any ! Invalid_Value_Type + expect_numeric argument ~action = + expect_type argument .is_numeric "a numeric" action + + ## PRIVATE + ADVANCED + Checks if the provided argument (which may be a value or a Column) is of + a floating point type and runs the following action or reports a type + error. + expect_floating_point : Any -> Any -> Any ! Invalid_Value_Type + expect_floating_point argument ~action = + expect_type argument .is_floating_point "Float" action + + ## PRIVATE + ADVANCED + Checks if the provided argument (which may be a value or a Column) is has + type `Date` or `Date_Time`. + expect_has_date : Any -> Any -> Any ! Invalid_Value_Type + expect_has_date argument ~action = + expect_type argument .has_date "Date or Date_Time" action ## PRIVATE Provides a text representation of the `Value_Type` meant for @@ -268,3 +331,14 @@ type Value_Type - otherwise, `Text` is chosen as a fallback and the column is kept as-is without parsing. type Auto + +## PRIVATE + A helper for generating the `Value_Type.expect_` checks. +expect_type : Any -> (Value_Type -> Boolean) -> Text|Value_Type -> Any -> Any ! Invalid_Value_Type +expect_type value predicate type_kind ~action = case value of + # Special handling for `Nothing`. Likely, can be removed with #6281. + Nothing -> action + _ -> + typ = Value_Type_Helpers.find_argument_type value + if predicate typ then action else + Value_Type_Helpers.raise_unexpected_type type_kind value diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type_Helpers.enso index f75d4338bc3e..5d273d2df59f 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type_Helpers.enso @@ -1,7 +1,11 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument +import project.Data.Column.Column +import project.Data.Type.Enso_Types from project.Data.Type.Value_Type import Value_Type, Auto +from project.Errors import Invalid_Value_Type, No_Common_Type +from project.Internal.Table_Helpers import is_column ## PRIVATE Finds a type that can fit both a current type and a new type. @@ -11,7 +15,6 @@ reconcile_types current new = case current of Value_Type.Integer new_size -> Value_Type.Integer (Math.max size new_size) Value_Type.Byte -> Value_Type.Integer size - Value_Type.Boolean -> Value_Type.Integer size # If we unify integers with floats, we select the default Float 64 regardless of the input sizes. Value_Type.Float _ -> Value_Type.Float _ -> Value_Type.Mixed @@ -21,21 +24,15 @@ reconcile_types current new = case current of # If we unify integers with floats, we select the default Float 64 regardless of the input sizes. Value_Type.Integer _ -> Value_Type.Float Value_Type.Byte -> Value_Type.Float - Value_Type.Boolean -> Value_Type.Float _ -> Value_Type.Mixed Value_Type.Byte -> case new of Value_Type.Byte -> Value_Type.Byte Value_Type.Integer size -> Value_Type.Integer size - Value_Type.Boolean -> Value_Type.Byte Value_Type.Float _ -> Value_Type.Float _ -> Value_Type.Mixed Value_Type.Boolean -> case new of Value_Type.Boolean -> Value_Type.Boolean - Value_Type.Integer size -> - Value_Type.Integer size - Value_Type.Byte -> Value_Type.Byte - Value_Type.Float _ -> Value_Type.Float _ -> Value_Type.Mixed Value_Type.Char current_size current_variable -> case new of Value_Type.Char new_size new_variable -> @@ -75,3 +72,87 @@ find_common_type types strict = # Double check if Mixed was really allowed to come out. if types.contains Value_Type.Mixed then Value_Type.Mixed else Nothing + +## PRIVATE + Finds the type of an argument to a column operation. + + If the argument is a column, the type of that column is returned. If it + is an Enso value, the smallest `Value_Type` that can fit that value will + be returned (but the Database is free to widen it to the closest type + that it supports without warning). + + Since there is no special type for `Nothing` and `Nothing` technically + can fit any nullable type, it usually needs to be handled specially. This + method returns `Nothing` if the value is `Nothing` - so the caller can + try to treat this value as fitting any type, or accordingly to specific + semantics of each method. +find_argument_type : Any -> Value_Type | Nothing +find_argument_type value = case value of + Nothing -> Nothing + _ -> if is_column value then value.value_type else + Enso_Types.most_specific_value_type value use_smallest=True + +## PRIVATE + A helper which resolves if numeric addition or string concatenation should be + used when the a `+` operator is used with the two provided types. + It will return an error if the provided types are incompatible. +resolve_addition_kind arg1 arg2 = + type_1 = find_argument_type arg1 + type_2 = find_argument_type arg2 + if type_1.is_numeric && (type_2.is_nothing || type_2.is_numeric) then 'ADD_NUMBER' else + if type_1.is_text && (type_2.is_nothing || type_2.is_text) then 'ADD_TEXT' else + Error.throw <| Illegal_Argument.Error <| + if type_2.is_nothing then "Cannot perform addition on a value of type " + type_1.to_display_text + ". Addition can only be performed if the column is of some numeric type or is text." else + "Cannot perform addition on a pair of values of types " + type_1.to_display_text + " and " + type_2.to_display_text + ". Addition can only be performed if both columns are of some numeric type or are both are text." + +## PRIVATE + Checks that both provided arguments have numeric type and runs the action + if they do. +check_binary_numeric_op arg1 arg2 ~action = + Value_Type.expect_numeric arg1 <| + Value_Type.expect_numeric arg2 <| + action + +## PRIVATE + Checks that both provided arguments have boolean type and runs the action + if they do. +check_binary_boolean_op arg1 arg2 ~action = + Value_Type.expect_boolean arg1 <| + Value_Type.expect_boolean arg2 <| + action + +## PRIVATE + Checks that all provided argument are comparable with the provided + column. + + Arguments: + - column: the column to compare the arguments to. + - arg_or_args: a single value or column or a vector of values or columns. + - action: the action to run if the arguments are compatible. +check_multi_argument_comparable_op column arg_or_args ~action = + args = Vector.unify_vector_or_element arg_or_args + checked = args.map arg-> + Value_Type.expect_comparable column arg <| + True + checked.if_not_error <| + action + +## PRIVATE +raise_unexpected_type expected_type argument = + error = case is_column argument of + True -> + Invalid_Value_Type.Column expected_type argument.value_type argument.name + False -> + Invalid_Value_Type.Value expected_type (find_argument_type argument) argument + Error.throw error + +## PRIVATE +find_common_type_for_arguments : Vector Any -> Value_Type | Nothing ! No_Common_Type +find_common_type_for_arguments arguments = + types = arguments.map find_argument_type . filter Filter_Condition.Not_Nothing + case types.is_empty of + True -> Nothing + False -> case find_common_type types strict=True of + common_type : Value_Type -> common_type + Nothing -> Error.throw <| + No_Common_Type.Error types related_column_name=Nothing diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso index 0960ad936397..903bd9163fbd 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso @@ -392,20 +392,34 @@ type No_Index_Set_Error type Invalid_Value_Type ## PRIVATE - Error expected actual related_column=Nothing + Indicates that a column with an unexpected value type has been encountered. + Column (expected:Value_Type|Text) (actual:Value_Type) (related_column:Text) + + ## PRIVATE + Indicates that a value with an unexpected value type has been encountered. + Value (expected:Value_Type|Text) (actual:Value_Type) (value:Any) + + ## PRIVATE + Indicates that a value type does not support comparisons. + Not_Ordered (actual:Value_Type) + + ## PRIVATE + Indicates that the two value types cannot be compared with each other. + Incomparable (type_1:Value_Type) (type_2:Value_Type) ## PRIVATE Create a human-readable version of the error. to_display_text : Text - to_display_text self = - prefix = case self.related_column of - Nothing -> "Expected " - column_name -> "Expected " + column_name + " column to have " - expected_type = case self.expected of - msg : Text -> msg - other -> other.to_display_text - prefix + expected_type + " type, but got " + self.actual.to_display_text + "." + to_display_text self = case self of + Invalid_Value_Type.Column expected actual related_column -> + "Expected type "+expected.to_display_text+", but got a column ["+related_column+"] of type "+actual.to_display_text+"." + Invalid_Value_Type.Value expected actual value -> + "Expected type "+expected.to_display_text+", but got a value "+value.to_text+" of type "+actual.to_display_text+"." + Invalid_Value_Type.Not_Ordered actual -> + "Type "+actual.to_display_text+" does not support comparisons." + Invalid_Value_Type.Incomparable type_1 type_2 -> + "Types "+type_1.to_display_text+" and "+type_2.to_display_text+" cannot be compared to each other." ## UNSTABLE @@ -435,16 +449,26 @@ type Column_Type_Mismatch type No_Common_Type ## PRIVATE - An error indicating that no common type could be found for the merged - columns. - Error (column_name : Text) + An error indicating that no common type could be found. + + Arguments: + - types: The types that were tried to be unified. + - related_column_name: The name of the resulting column that was being + unified, if applicable. + Error (types : Vector Value_Type) (related_column_name : Nothing|Text) ## PRIVATE Create a human-readable version of the error. to_display_text : Text to_display_text self = - "No common type could have been found for the columns corresponding to ["+self.column_name+"]. If you want to allow mixed types, please retype the columns to the `Mixed` before the concatenation (note however that most Database backends do not support `Mixed` types, so it may work only for the in-memory backend)." + types = self.types.map .to_display_text . join ", " + prefix = "No common type could have been found for the types: "+types + infix = case self.related_column_name of + column_name : Text -> " when unifying the column ["+column_name+"]." + _ -> "." + suffix = "If you want to allow mixed types, please retype the columns to the `Mixed` before the concatenation (note however that most Database backends do not support `Mixed` types, so it may work only for the in-memory backend)." + prefix + infix + suffix type Unmatched_Columns ## PRIVATE diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso index 395a6e4c2029..2e473a6a18ea 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso @@ -30,9 +30,8 @@ make_filter_column source_column filter_condition on_problems = case filter_cond Not_Nothing -> source_column.is_nothing.not # Boolean Is_True -> - Value_Type.expect_boolean source_column.value_type <| source_column - Is_False -> - Value_Type.expect_boolean source_column.value_type <| source_column.not + Value_Type.expect_boolean source_column <| source_column + Is_False -> source_column.not # Comparisons Less value -> (source_column < value) Equal_Or_Less value -> (source_column <= value) @@ -41,52 +40,21 @@ make_filter_column source_column filter_condition on_problems = case filter_cond Between lower upper -> source_column.between lower upper # Text Starts_With prefix case_sensitivity -> - Value_Type.expect_text source_column.value_type <| - expect_column_or_value_as_text "prefix" prefix <| - source_column.starts_with prefix case_sensitivity + source_column.starts_with prefix case_sensitivity Ends_With suffix case_sensitivity -> - Value_Type.expect_text source_column.value_type <| - expect_column_or_value_as_text "suffix" suffix <| - source_column.ends_with suffix case_sensitivity + source_column.ends_with suffix case_sensitivity Contains substring case_sensitivity -> - Value_Type.expect_text source_column.value_type <| - expect_column_or_value_as_text "substring" substring <| - source_column.contains substring case_sensitivity + source_column.contains substring case_sensitivity Not_Contains substring case_sensitivity -> - Value_Type.expect_text source_column.value_type <| - expect_column_or_value_as_text "substring" substring <| - source_column.contains substring case_sensitivity . not + source_column.contains substring case_sensitivity . not Is_Empty -> - Value_Type.expect_text source_column.value_type <| - source_column.is_empty + source_column.is_empty Not_Empty -> - Value_Type.expect_text source_column.value_type <| - source_column.is_empty.not + source_column.is_empty.not Like pattern -> - Value_Type.expect_text source_column.value_type <| - expect_column_or_value_as_text "pattern" pattern <| - source_column.like pattern + source_column.like pattern Not_Like pattern -> - Value_Type.expect_text source_column.value_type <| - expect_column_or_value_as_text "pattern" pattern <| - source_column.like pattern . not + source_column.like pattern . not # Vector Is_In values -> source_column.is_in values Not_In values -> source_column.is_in values . not - -## PRIVATE -expect_column_or_value_as_text field_name column_or_value ~action = case column_or_value of - _ : Text -> action - ## A bit of a hack, because due to lack of interfaces we cannot check if the - thing is a Column (as there are various column implementations based on - the backend). So we assume it is a column and if it doesn't quack like a - column, we fall back to a type error. - maybe_column -> - result = Panic.catch No_Such_Method (Value_Type.expect_text maybe_column.value_type True) _-> - Error.throw (Type_Error.Error Text (Meta.type_of maybe_column) field_name) - ## We don't run the action above, to avoid catching spurious - `No_Such_Method` from the action itself. Instead we just return - True there and if it went through successfully we can then execute - the action. If it fails, we forward the dataflow error instead. - case result of - True -> action diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Join_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Join_Helpers.enso index f748fb6cc27c..313ac6a9c705 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Join_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Join_Helpers.enso @@ -47,9 +47,10 @@ type Join_Condition_Resolver left = resolve_left left_selector right = resolve_right right_selector if is_nothing left || is_nothing right then Nothing else - if left.name == right.name then - redundant_names.append right.name - self.make_equals problem_builder left right + Value_Type.expect_comparable left right <| + if left.name == right.name then + redundant_names.append right.name + self.make_equals problem_builder left right converted = conditions_vector.map condition-> case condition of Join_Condition.Equals left_selector right_selector -> handle_equals left_selector right_selector @@ -58,15 +59,17 @@ type Join_Condition_Resolver left = resolve_left left_selector right = resolve_right right_selector if is_nothing left || is_nothing right then Nothing else - Value_Type.expect_text left.value_type <| - Value_Type.expect_text right.value_type <| + Value_Type.expect_text left <| + Value_Type.expect_text right <| self.make_equals_ignore_case problem_builder left right locale Join_Condition.Between left_selector right_lower_selector right_upper_selector -> left = resolve_left left_selector right_lower = resolve_right right_lower_selector right_upper = resolve_right right_upper_selector if is_nothing left || is_nothing right_lower || is_nothing right_upper then Nothing else - self.make_between problem_builder left right_lower right_upper + Value_Type.expect_comparable left right_lower <| + Value_Type.expect_comparable left right_upper <| + self.make_between problem_builder left right_lower right_upper problem_builder.attach_problems_before on_problems <| if converted.contains Nothing then Panic.throw (Illegal_State.Error "Impossible: unresolved columns remaining in the join resolution. This should have raised a dataflow error. This is a bug in the Table library.") else Join_Condition_Resolution.Result converted redundant_names.to_vector diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Naming_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Naming_Helpers.enso index c4ff1ecb201f..24b15e0fff32 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Naming_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Naming_Helpers.enso @@ -1,6 +1,8 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument +from project.Internal.Table_Helpers import is_column + polyglot java import org.enso.table.data.table.Column as Java_Column ## PRIVATE @@ -43,10 +45,3 @@ type Naming_Helpers to_expression_text value = if is_column value then "[" + value.name.replace "]" "]]" + "]" else value.pretty - -## PRIVATE - Checks if the value is a column of any backend. -is_column value = - case Meta.get_qualified_type_name value of - Nothing -> False - typename : Text -> typename.ends_with ".Column" diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso index 364b4c70ab2f..952cfe57723b 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Split_Tokenize.enso @@ -19,7 +19,7 @@ polyglot java import org.enso.table.data.mask.OrderMask split_to_columns : Table -> Text | Integer -> Text -> Integer | Nothing -> Problem_Behavior -> Table split_to_columns table input_column_id delimiter="," column_count=Nothing on_problems=Report_Error = column = table.at input_column_id - Value_Type.expect_text (column.value_type) related_column=column <| + Value_Type.expect_text column <| fan_out_to_columns table input_column_id (handle_nothing (_.split delimiter)) column_count on_problems ## PRIVATE @@ -28,7 +28,7 @@ split_to_columns table input_column_id delimiter="," column_count=Nothing on_pro split_to_rows : Table -> Text | Integer -> Text -> Table split_to_rows table input_column_id delimiter="," = column = table.at input_column_id - Value_Type.expect_text (column.value_type) related_column=column <| + Value_Type.expect_text column fan_out_to_rows table input_column_id (handle_nothing (_.split delimiter)) ## PRIVATE @@ -38,7 +38,7 @@ split_to_rows table input_column_id delimiter="," = tokenize_to_columns : Table -> Text | Integer -> Text -> Case_Sensitivity -> Integer | Nothing -> Problem_Behavior -> Table tokenize_to_columns table input_column_id pattern case_sensitivity column_count on_problems = column = table.at input_column_id - Value_Type.expect_text (column.value_type) related_column=column <| + Value_Type.expect_text column fan_out_to_columns table input_column_id (handle_nothing (_.tokenize pattern case_sensitivity)) column_count on_problems ## PRIVATE @@ -48,7 +48,7 @@ tokenize_to_columns table input_column_id pattern case_sensitivity column_count tokenize_to_rows : Table -> Text | Integer -> Text -> Case_Sensitivity -> Table tokenize_to_rows table input_column_id pattern="." case_sensitivity=Case_Sensitivity.Sensitive = column = table.at input_column_id - Value_Type.expect_text (column.value_type) related_column=column <| + Value_Type.expect_text column fan_out_to_rows table input_column_id (handle_nothing (_.tokenize pattern case_sensitivity)) ## PRIVATE diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso index 5253cce40e9b..3c1a213c41ab 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso @@ -381,6 +381,15 @@ is_table obj = known_types = ["Standard.Table.Data.Table.Table", "Standard.Database.Data.Table.Table"] known_types.contains (Meta.get_qualified_type_name obj) +## PRIVATE + Checks if the given object implements a Table interface. + + Currently, it relies on a hack - it checks if the object has a method + `is_table` and if it returns `True`. +is_column obj = + known_types = ["Standard.Table.Data.Column.Column", "Standard.Database.Data.Column.Column"] + known_types.contains (Meta.get_qualified_type_name obj) + ## PRIVATE A helper method that resolves what should be the result type of a particular column set based on the union settings. @@ -391,7 +400,7 @@ unify_result_type_for_union column_set all_tables allow_type_widening problem_bu types = columns.filter Filter_Condition.Not_Nothing . map .value_type common_type = Value_Type_Helpers.find_common_type types strict=True if common_type.is_nothing then - problem_builder.report_other_warning (No_Common_Type.Error column_set.name) + problem_builder.report_other_warning (No_Common_Type.Error types related_column_name=column_set.name) common_type False -> is_not_nothing c = case c of diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Extensions.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Extensions.enso index c737353460f0..f007ab9011cf 100644 --- a/distribution/lib/Standard/Test/0.0.0-dev/src/Extensions.enso +++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Extensions.enso @@ -47,7 +47,7 @@ Error.should_fail_with : Any -> Integer -> Test_Result Error.should_fail_with self matcher frames_to_skip=0 = caught = self.catch if caught == matcher || caught.is_a matcher then Nothing else - loc = Meta.get_source_location 3+frames_to_skip + loc = Meta.get_source_location 2+frames_to_skip matcher_text = case matcher.to_text of text : Text -> text _ -> Meta.meta matcher . to_text diff --git a/std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java b/std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java index e0577750946e..d4d7f0028008 100644 --- a/std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java +++ b/std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java @@ -42,7 +42,7 @@ public static long coerceToLong(Object o) { case Integer x -> x.longValue(); case Short x -> x.longValue(); case Byte x -> x.longValue(); - default -> throw new UnsupportedOperationException(); + default -> throw new UnsupportedOperationException("Cannot coerce " + o + " to a numeric type."); }; } diff --git a/std-bits/database/src/main/java/org/enso/database/JDBCUtils.java b/std-bits/database/src/main/java/org/enso/database/JDBCUtils.java index d6eff297bd6d..bb6232feaef8 100644 --- a/std-bits/database/src/main/java/org/enso/database/JDBCUtils.java +++ b/std-bits/database/src/main/java/org/enso/database/JDBCUtils.java @@ -4,6 +4,8 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Types; +import java.time.LocalDate; +import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZonedDateTime; @@ -28,4 +30,16 @@ public static void setZonedDateTime( PreparedStatement stmt, int columnIndex, ZonedDateTime zonedDateTime) throws SQLException { stmt.setObject(columnIndex, zonedDateTime.toOffsetDateTime(), Types.TIMESTAMP_WITH_TIMEZONE); } + + /** Sets a LocalTime in a PreparedStatement. */ + public static void setLocalTime(PreparedStatement stmt, int columnIndex, LocalTime localTime) + throws SQLException { + stmt.setObject(columnIndex, localTime, Types.TIME); + } + + /** Sets a LocalDate in a PreparedStatement. */ + public static void setLocalDate(PreparedStatement stmt, int columnIndex, LocalDate localDate) + throws SQLException { + stmt.setObject(columnIndex, localDate, Types.DATE); + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java index 575b66d40b47..c01cdb4cb6d5 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java @@ -29,8 +29,12 @@ public void appendNoGrow(Object o) { if (o == null) { isNa.set(size); } else { - if ((Boolean) o) { - vals.set(size); + if (o instanceof Boolean b) { + if (b) { + vals.set(size); + } + } else { + throw new UnsupportedOperationException("Cannot coerce " + o + " to a boolean type."); } } size++; diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java index 1728cb51a225..33bb747d3c8a 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java @@ -79,7 +79,7 @@ public BoolStorage runZip(DoubleStorage storage, Storage arg, MapOperationPro BitSet newMissing = new BitSet(); for (int i = 0; i < storage.size(); i++) { if (!storage.isNa(i) && i < v.size() && !v.isNa(i)) { - if (doDouble(storage.getItem(i), v.getItem(i))) { + if (doDouble(storage.getItem(i), v.getItemDouble(i))) { newVals.set(i); } } else { diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringStringOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringStringOp.java new file mode 100644 index 000000000000..88b33f395bd1 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringStringOp.java @@ -0,0 +1,58 @@ +package org.enso.table.data.column.operation.map.text; + +import org.enso.table.data.column.builder.object.StringBuilder; +import org.enso.table.data.column.operation.map.MapOperation; +import org.enso.table.data.column.operation.map.MapOperationProblemBuilder; +import org.enso.table.data.column.storage.SpecializedStorage; +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.column.storage.StringStorage; +import org.enso.table.error.UnexpectedTypeException; + +public abstract class StringStringOp extends MapOperation> { + public StringStringOp(String name) { + super(name); + } + + protected abstract String doString(String a, String b); + + @Override + public Storage runMap(SpecializedStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) { + int size = storage.size(); + if (arg == null) { + StringBuilder builder = new StringBuilder(size); + builder.appendNulls(size); + return builder.seal(); + } else if (arg instanceof String argString) { + String[] newVals = new String[size]; + for (int i = 0; i < size; i++) { + if (storage.isNa(i)) { + newVals[i] = null; + } else { + newVals[i] = doString(storage.getItem(i), argString); + } + } + return new StringStorage(newVals, size); + } else { + throw new UnexpectedTypeException("a Text"); + } + } + + @Override + public Storage runZip(SpecializedStorage storage, Storage arg, + MapOperationProblemBuilder problemBuilder) { + if (arg instanceof StringStorage v) { + int size = storage.size(); + String[] newVals = new String[size]; + for (int i = 0; i < size; i++) { + if (storage.isNa(i) || v.isNa(i)) { + newVals[i] = null; + } else { + newVals[i] = doString(storage.getItem(i), v.getItem(i)); + } + } + return new StringStorage(newVals, size); + } else { + throw new UnexpectedTypeException("a Text column"); + } + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java index 5282aba9233a..1e5410bab4c0 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java @@ -180,10 +180,10 @@ public boolean isNegated() { return negated; } - public Storage iif(Value when_true, Value when_false) { + public Storage iif(Value when_true, Value when_false, StorageType resultStorageType) { var on_true = makeRowProvider(when_true); var on_false = makeRowProvider(when_false); - InferredBuilder builder = new InferredBuilder(size); + Builder builder = Builder.getForType(resultStorageType, size); for (int i = 0; i < size; i++) { if (isMissing.get(i)) { builder.append(null); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java index a2032453c5a5..abeb15dc7e69 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java @@ -40,7 +40,7 @@ public DoubleStorage(long[] data, int size, BitSet isMissing) { public static DoubleStorage makeEmpty(int size) { BitSet isMissing = new BitSet(size); isMissing.set(0, size); - return new DoubleStorage(new long[0], size, new BitSet(size)); + return new DoubleStorage(new long[0], size, isMissing); } /** @inheritDoc */ diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java index 131c2b211a1f..b3c5bae93a33 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java @@ -1,5 +1,6 @@ package org.enso.table.data.column.storage; +import org.enso.base.polyglot.NumericConverter; import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.NumericBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; @@ -72,7 +73,7 @@ public long getItem(int idx) { @Override public double getItemDouble(int idx) { - return getItem(idx); + return (double) getItem(idx); } @Override @@ -139,7 +140,7 @@ private Storage fillMissingLong(long arg) { @Override public Storage fillMissing(Value arg) { if (arg.isNumber()) { - if (arg.fitsInLong()) { + if (NumericConverter.isCoercibleToLong(arg.as(Object.class))) { return fillMissingLong(arg.asLong()); } else { return fillMissingDouble(arg.asDouble()); @@ -338,7 +339,7 @@ protected boolean doLong(long a, long b) { @Override protected boolean doDouble(long a, double b) { - return a > b; + return a < b; } }) .add( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java index 9e8211f2bc89..99fbaf33b17b 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -1,10 +1,5 @@ package org.enso.table.data.column.storage; -import java.util.BitSet; -import java.util.HashMap; -import java.util.List; -import java.util.function.BiFunction; -import java.util.function.Function; import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.InferredBuilder; @@ -15,6 +10,12 @@ import org.enso.table.data.mask.SliceRange; import org.graalvm.polyglot.Value; +import java.util.BitSet; +import java.util.HashMap; +import java.util.List; +import java.util.function.BiFunction; +import java.util.function.Function; + /** An abstract representation of a data column. */ public abstract class Storage { /** @return the number of elements in this column (including NAs) */ @@ -92,6 +93,9 @@ protected abstract Storage runVectorizedZip( * @param skipNulls specifies whether null values on the input should result in a null result * without passing them through the function, this is useful if the function does not support * the null-values, but it needs to be set to false if the function should handle them. + * @param expectedResultType the expected type for the result storage; it is ignored if the + * operation is vectorized + * @param problemBuilder a builder for reporting computation problems * @return the result of running the function on all non-missing elements. */ public final Storage bimap( @@ -99,22 +103,31 @@ public final Storage bimap( BiFunction function, Object argument, boolean skipNulls, + StorageType expectedResultType, MapOperationProblemBuilder problemBuilder) { if (name != null && isOpVectorized(name)) { return runVectorizedMap(name, argument, problemBuilder); } - Builder builder = new InferredBuilder(size()); + + checkFallback(function, expectedResultType, name); + + Builder storageBuilder = Builder.getForType(expectedResultType, size()); + if (skipNulls && argument == null) { + storageBuilder.appendNulls(size()); + return storageBuilder.seal(); + } + for (int i = 0; i < size(); i++) { Object it = getItemBoxed(i); if (skipNulls && it == null) { - builder.appendNoGrow(null); + storageBuilder.appendNoGrow(null); } else { Object result = function.apply(it, argument); Object converted = Polyglot_Utils.convertPolyglotValue(result); - builder.appendNoGrow(converted); + storageBuilder.appendNoGrow(converted); } } - return builder.seal(); + return storageBuilder.seal(); } /** @@ -124,6 +137,8 @@ public final Storage bimap( * supported. If this argument is null, the vectorized operation will never be used. * @param function the function to run. * @param onMissing the value to place for missing cells, usually just null + * @param expectedResultType the expected type for the result storage; it is ignored if the + * operation is vectorized * @param problemBuilder a builder for reporting computation problems * @return the result of running the function on all non-missing elements. */ @@ -131,23 +146,28 @@ public final Storage map( String name, Function function, Value onMissing, + StorageType expectedResultType, MapOperationProblemBuilder problemBuilder) { if (name != null && isOpVectorized(name)) { return runVectorizedMap(name, null, problemBuilder); } + + checkFallback(function, expectedResultType, name); + Object missingValue = Polyglot_Utils.convertPolyglotValue(onMissing); - Builder builder = new InferredBuilder(size()); + + Builder storageBuilder = Builder.getForType(expectedResultType, size()); for (int i = 0; i < size(); i++) { Object it = getItemBoxed(i); if (it == null) { - builder.appendNoGrow(missingValue); + storageBuilder.appendNoGrow(missingValue); } else { Value result = function.apply(it); Object converted = Polyglot_Utils.convertPolyglotValue(result); - builder.appendNoGrow(converted); + storageBuilder.appendNoGrow(converted); } } - return builder.seal(); + return storageBuilder.seal(); } /** @@ -157,6 +177,8 @@ public final Storage map( * supported. If this argument is null, the vectorized operation will never be used. * @param function the function to run. * @param skipNa whether rows containing missing values should be passed to the function. + * @param expectedResultType the expected type for the result storage; it is ignored if the + * operation is vectorized * @param problemBuilder the builder used for reporting computation problems * @return the result of running the function on all non-missing elements. */ @@ -165,23 +187,47 @@ public final Storage zip( BiFunction function, Storage arg, boolean skipNa, + StorageType expectedResultType, MapOperationProblemBuilder problemBuilder) { if (name != null && isOpVectorized(name)) { return runVectorizedZip(name, arg, problemBuilder); } - Builder builder = new InferredBuilder(size()); + + checkFallback(function, expectedResultType, name); + + Builder storageBuilder = Builder.getForType(expectedResultType, size()); for (int i = 0; i < size(); i++) { Object it1 = getItemBoxed(i); Object it2 = i < arg.size() ? arg.getItemBoxed(i) : null; if (skipNa && (it1 == null || it2 == null)) { - builder.appendNoGrow(null); + storageBuilder.appendNoGrow(null); } else { Object result = function.apply(it1, it2); Object converted = Polyglot_Utils.convertPolyglotValue(result); - builder.appendNoGrow(converted); + storageBuilder.appendNoGrow(converted); } } - return builder.seal(); + return storageBuilder.seal(); + } + + private static void checkFallback(Object fallback, StorageType storageType, String operationName) + throws IllegalArgumentException { + if (fallback == null) { + if (operationName == null) { + throw new IllegalArgumentException( + "A function or name of vectorized operation must be specified. This is a bug in the Table library."); + } else { + throw new IllegalArgumentException( + "The operation " + + operationName + + " has no vectorized implementation for this storage type, but no fallback function was provided. This is a bug in the Table library."); + } + } + + if (storageType == null) { + throw new IllegalArgumentException( + "The expected result type must be specified if a fallback function is used. This is a bug in the Table library."); + } } /** @@ -198,10 +244,11 @@ public Storage fillMissing(Value arg) { * Fills missing values in this storage, by using corresponding values from {@code other}. * * @param other the source of default values + * @param commonType a common type that should fit values from both storages * @return a new storage with missing values filled */ - public Storage fillMissingFrom(Storage other) { - var builder = new InferredBuilder(size()); + public Storage fillMissingFrom(Storage other, StorageType commonType) { + var builder = Builder.getForType(commonType, size()); for (int i = 0; i < size(); i++) { if (isNa(i)) { builder.appendNoGrow(other.getItemBoxed(i)); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java index 5d83818d01f7..7548ff2fda26 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java @@ -12,6 +12,7 @@ import org.enso.table.data.column.operation.map.text.LikeOp; import org.enso.table.data.column.operation.map.text.StringBooleanOp; import org.enso.table.data.column.operation.map.text.StringIsInOp; +import org.enso.table.data.column.operation.map.text.StringStringOp; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.column.storage.type.TextType; import org.graalvm.polyglot.Value; @@ -139,6 +140,12 @@ protected boolean doString(String a, String b) { }); t.add(new LikeOp()); t.add(new StringIsInOp<>()); + t.add(new StringStringOp(Maps.ADD) { + @Override + protected String doString(String a, String b) { + return a + b; + } + }); return t; } } diff --git a/test/Table_Tests/src/Common_Table_Operations/Cast_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Cast_Spec.enso index c33f840e4bc1..36c3759babe9 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Cast_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Cast_Spec.enso @@ -1,6 +1,7 @@ from Standard.Base import all from Standard.Table import Value_Type +import Standard.Table.Data.Type.Value_Type.Bits from Standard.Test import Test, Problems import Standard.Test.Extensions @@ -12,6 +13,7 @@ main = run_default_backend spec spec setup = prefix = setup.prefix table_builder = setup.table_builder + materialize = setup.materialize # TODO this spec will be expanded in #6112 Test.group prefix+"Column.cast" pending=(if setup.is_database.not then "Cast is not implemented in the in-memory backend yet.") <| Test.specify "should allow to cast an integer column to text" <| @@ -91,3 +93,13 @@ spec setup = t2.at "Y" . to_vector . should_equal ["4", "5", "6"] t2.at "Z" . to_vector . should_equal ["7", "8", "9"] t2.at "A" . to_vector . should_equal [True, False, True] + + if setup.test_selection.fixed_length_text_columns then + Test.specify "should preserve the overridden types when materialized" pending="TODO: #5159 needed" <| + t = table_builder [["X", [1, 2, 100]], ["Y", ["a", "abcdef", "abc"]]] + t2 = t . cast "X" (Value_Type.Integer Bits.Bits_16) . cast "Y" (Value_Type.Char size=3 variable_length=False) + + t3 = materialize t2 + t3.at "X" . value_type . should_equal (t2.at "X" . value_type) + t3.at "Y" . value_type . should_equal (Value_Type.Char size=3 variable_length=False) + t3.at "Y" . to_vector . should_equal ["a ", "abc", "abc"] diff --git a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso index 4eecae25ed0c..3f9ed774ae32 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso @@ -2,8 +2,11 @@ from Standard.Base import all import Standard.Base.Errors.Common.Arithmetic_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument +from Standard.Table import Value_Type from Standard.Table.Errors import all +from Standard.Database.Errors import SQL_Error + from Standard.Test import Test, Problems import Standard.Test.Extensions @@ -14,21 +17,109 @@ main = run_default_backend spec spec setup = prefix = setup.prefix table_builder = setup.table_builder - Test.group prefix+"Basic Column Operations" <| + Test.group prefix+"Boolean Column Operations" <| Test.specify "iif" <| t = table_builder [["X", [True, False, Nothing, True]]] - t.at "X" . iif 22 33 . to_vector . should_equal [22, 33, Nothing, 22] + c1 = t.at "X" . iif 22 33 + c1.to_vector . should_equal [22, 33, Nothing, 22] + c1.value_type . is_integer . should_be_true + + c2 = t.at "X" . iif 22 33.0 + c2.to_vector . should_equal [22, 33, Nothing, 22] + c2.value_type . is_floating_point . should_be_true + + c3 = t.at "X" . iif "A" "B" + c3.to_vector . should_equal ["A", "B", Nothing, "A"] + c3.value_type . is_text . should_be_true + + c4 = t.at "X" . iif Nothing "B" + c4.to_vector . should_equal [Nothing, "B", Nothing, Nothing] + c4.value_type . is_text . should_be_true + + c5 = t.at "X" . iif 42 Nothing + c5.to_vector . should_equal [42, Nothing, Nothing, 42] + c5.value_type . is_integer . should_be_true + + c6 = t.at "X" . iif Nothing Nothing + c6.to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] + + t.at "X" . iif 22.0 False . should_fail_with No_Common_Type + t.at "X" . iif 22 "0" . should_fail_with No_Common_Type Test.specify "iif on Columns" <| - t = table_builder [["X", [True, False, Nothing, False]], ["Y", [1, 2, 3, 4]], ["Z", [1.5, 2.5, 3.5, 4.5]]] - c = t.at "X" . iif (t.at "Y") (t.at "Z") - c.value_type . is_floating_point . should_be_true - c.to_vector . should_equal [1, 2.5, Nothing, 4.5] + t1 = table_builder [["X", [True, False, Nothing, False]], ["Y", [1, 2, 3, 4]], ["Z", [1.5, 2.0, 3.5, 4.0]]] + c1 = t1.at "X" . iif (t1.at "Y") (t1.at "Z") + c1.value_type . is_floating_point . should_be_true + c1.to_vector . should_equal [1, 2.0, Nothing, 4.0] + + t2 = table_builder [["X", [True, False]], ["Y", [1, 2]], ["Z", [1.5, 3.0]]] + c2 = t2.at "X" . iif (t2.at "Y") (t2.at "Z") + c2.to_vector . should_equal [1, 3] + c2.value_type . is_floating_point . should_be_true + + t3 = table_builder [["X", [True, False]], ["Y", [10, 20]], ["Z", [False, True]]] + t3.at "X" . iif (t3.at "Y") (t3.at "Z") . should_fail_with No_Common_Type + t3.at "X" . iif (t3.at "Y") "" . should_fail_with No_Common_Type + + Test.specify "iif should correctly unify text columns of various lengths" pending=(if setup.test_selection.fixed_length_text_columns.not then "Fixed-length Char columns are not supported by this backend.") <| + t0 = table_builder [["x", [False, True, False]], ["A", ["a", "b", "c"]], ["B", ["xyz", "abc", "def"]]] + t1 = t0 . cast "A" (Value_Type.Char size=1 variable_length=False) . cast "B" (Value_Type.Char size=3 variable_length=False) + + x = t1.at "x" + a = t1.at "A" + b = t1.at "B" + a.value_type.should_equal (Value_Type.Char size=1 variable_length=False) + b.value_type.should_equal (Value_Type.Char size=3 variable_length=False) + + c = x.iif a b + c.to_vector.should_equal ["xyz", "b", "def"] + Test.with_clue "c.value_type="+c.value_type.to_display_text+": " <| + c.value_type.variable_length.should_be_true + + d = b.cast (Value_Type.Char size=1 variable_length=False) + e = x.iif a d + e.to_vector.should_equal ["x", "b", "d"] + e.value_type.should_equal (Value_Type.Char size=1 variable_length=False) + + f = b.cast (Value_Type.Char size=1 variable_length=True) + g = x.iif a f + g.to_vector.should_equal ["x", "b", "d"] + Test.with_clue "g.value_type="+g.value_type.to_display_text+": " <| + g.value_type.variable_length.should_be_true + + Test.specify "should allow to compute &&, || and not" <| + t = table_builder [["X", [True, False, True]], ["Y", [True, False, False]]] + x = t.at "X" + y = t.at "Y" + (x || y.not).to_vector . should_equal [True, True, True] + (x || False).to_vector . should_equal [True, False, True] + (x || True).to_vector . should_equal [True, True, True] + (x && False).to_vector . should_equal [False, False, False] - t2 = table_builder [["x", [1, 4, 5, Nothing]], ["y", [2, 3, 5, Nothing]], ["b", [False, False, True, Nothing]]] + Test.specify "should return null if one of arguments is missing" pending="TODO null handling" <| + t = table_builder [["X", [True, False, True]]] + x = t.at "X" + nulls = [Nothing, Nothing, Nothing, Nothing] + (x && Nothing).to_vector . should_equal nulls + (x || Nothing).to_vector . should_equal nulls + + Test.specify "should check types" <| + t = table_builder [["X", [1, 2, 3]], ["Y", ['a', 'b', 'c']], ["Z", [True, False, Nothing]]] + + ((t.at "X") && (t.at "Z")) . should_fail_with Invalid_Value_Type + ((t.at "Z") && (t.at "X")) . should_fail_with Invalid_Value_Type + ((t.at "Y") && True) . should_fail_with Invalid_Value_Type + + ((t.at "X") || (t.at "Z")) . should_fail_with Invalid_Value_Type + ((t.at "Z") || (t.at "X")) . should_fail_with Invalid_Value_Type + ((t.at "Y") || True) . should_fail_with Invalid_Value_Type + + ((t.at "X") . not) . should_fail_with Invalid_Value_Type + ((t.at "Y") . iif 10 20) . should_fail_with Invalid_Value_Type + + t2 = table_builder [["x", [1, 4, 5, Nothing]], ["y", [2.0, 3.25, 5.0, Nothing]]] x = t2.at "x" y = t2.at "y" - b = t2.at "b" Test.group prefix+"Column Operations - Equality & Null Handling" <| Test.specify "should provide basic == and != comparisons" pending="TODO figure out proper null handling" <| (x == y).to_vector . should_equal [False, False, True, Nothing] @@ -39,6 +130,8 @@ spec setup = Test.specify "should allow to check which values are null" x.is_nothing.to_vector . should_equal [False, False, False, True] (x + Nothing).is_nothing.to_vector . should_equal [True, True, True, True] + x.is_present.to_vector . should_equal [True, True, True, False] + (x + Nothing).is_present.to_vector . should_equal [False, False, False, False] Test.specify "Column equality should handle nulls correctly" pending="TODO" <| a = [2, 3, Nothing, Nothing] @@ -47,10 +140,11 @@ spec setup = a.zip b (==) . should_equal r t = table_builder [["A", a], ["B", b]] - ((t.at "A") == (t.at "B")) . to_vector . should_equal r + c = (t.at "A") == (t.at "B") + c.to_vector . should_equal r + c.value_type.should_equal Value_Type.Boolean Test.specify "equals_ignore_case for ASCII strings" <| - # TODO test for nothing too x = ["a", "B", "c", "DEF"] y = ["aa", "b", "c", "dEf"] r = [False, True, True, True] @@ -58,7 +152,22 @@ spec setup = x.zip y (.equals_ignore_case) . should_equal r t = table_builder [["X", x], ["Y", y]] - (t.at "X") . equals_ignore_case (t.at "Y") . to_vector . should_equal r + c = (t.at "X") . equals_ignore_case (t.at "Y") + c.to_vector . should_equal r + c.value_type.should_equal Value_Type.Boolean + (t.at "X") . equals_ignore_case "Def" . to_vector . should_equal [False, False, False, True] + + Test.specify "equals_ignore_case should check types" <| + t = table_builder [["X", [1, 2, 3]], ["Y", ['a', 'b', 'c']]] + + r1 = (t.at "X") . equals_ignore_case (t.at "Y") . to_vector + r1.should_fail_with Invalid_Value_Type + + r2 = (t.at "Y") . equals_ignore_case (t.at "X") . to_vector + r2.should_fail_with Invalid_Value_Type + + r3 = (t.at "Y") . equals_ignore_case 42 . to_vector + r3.should_fail_with Invalid_Value_Type Test.specify "Text Column equality (including case-insensitive) should handle nulls correctly" pending="TODO" <| a = ["Z", "a", "b", Nothing, Nothing] @@ -93,26 +202,141 @@ spec setup = r2.to_vector . should_equal [False, True, False] Problems.expect_warning Floating_Point_Equality r2 - Test.group prefix+"Arithmetic and Boolean Column Operations" <| - Test.specify "should allow basic operations" <| - (x + y).to_vector . should_equal [3, 7, 10, Nothing] - (x - y).to_vector . should_equal [-1, 1, 0, Nothing] - (x * y).to_vector . should_equal [2, 12, 25, Nothing] + Test.group prefix+"Column Comparisons" <| + Test.specify "should allow to compare numbers" <| + x.value_type . is_integer . should_be_true + y.value_type . is_floating_point . should_be_true + (x < y).to_vector . should_equal [True, False, False, Nothing] (x <= y).to_vector . should_equal [True, False, True, Nothing] (x > y).to_vector . should_equal (x <= y).not.to_vector (x >= y).to_vector . should_equal (x < y).not.to_vector - #(((x < y) || (x == y)) == (x <= y)).to_vector . should_equal [True, True, True, Nothing] - (b || b.not).to_vector . should_equal [True, True, True, Nothing] + + (x < 1000).to_vector . should_equal [True, True, True, Nothing] + + [(<), (<=), (>), (>=)].each op-> + op x y . value_type . should_equal Value_Type.Boolean + op x y . to_vector . should_succeed + op x 23 . to_vector . should_succeed + op y 23 . to_vector . should_succeed + op x 1.5 . to_vector . should_succeed + + Test.specify "should allow to compare texts" <| + t0 = table_builder [["X", ["a", "b", "c"]], ["Y", ["a", "b", "d"]]] + t = t0.cast "X" (Value_Type.Char size=1 variable_length=False) + + [(<), (<=), (>), (>=)].each op-> + op (t.at "X") (t.at "Y") . value_type . should_equal Value_Type.Boolean + op (t.at "X") (t.at "Y") . to_vector . should_succeed + op (t.at "X") "abc" . to_vector . should_succeed + + Test.specify "should allow to compare booleans" <| + t = table_builder [["X", [True, False, True]], ["Y", [False, True, True]]] + + ((t.at "X") < (t.at "Y")).to_vector . should_equal [False, True, False] + ((t.at "X") >= (t.at "Y")).to_vector . should_equal [True, False, True] + ((t.at "X") <= (t.at "Y")).to_vector . should_equal [False, True, True] + ((t.at "X") > (t.at "Y")).to_vector . should_equal [True, False, False] + + ((t.at "X") < True).to_vector . should_equal [False, True, False] + ((t.at "X") >= True).to_vector . should_equal [True, False, True] + ((t.at "X") <= True).to_vector . should_equal [True, True, True] + ((t.at "X") > True).to_vector . should_equal [False, False, False] + + Test.specify "should report error if incomparable types are compared" <| + t = table_builder [["X", [1, 2]], ["Y", ["a", "b"]], ["Z", [True, False]]] + + [(<), (<=), (>), (>=)].each op-> + r1 = op (t.at "X") (t.at "Y") + r1.should_fail_with Invalid_Value_Type + r1.catch . should_be_a Invalid_Value_Type.Incomparable + + op (t.at "X") "FOO" . should_fail_with Invalid_Value_Type + op (t.at "Y") 42 . should_fail_with Invalid_Value_Type + op (t.at "Y") False . should_fail_with Invalid_Value_Type + op (t.at "Z") 32 . should_fail_with Invalid_Value_Type + op (t.at "Z") (t.at "X") . should_fail_with Invalid_Value_Type + + Test.specify "Between should return null if any of the values are null" pending="TODO" <| + a = [2, 3, Nothing, 7, 5, Nothing] + b = [0, 5, 7, Nothing, 7, Nothing] + c = [9, 8, 7, 7, Nothing, Nothing] + r = [True, False, Nothing, Nothing, Nothing, Nothing] + + t = table_builder [["A", a], ["B", b], ["C", c]] + ((t.at "A").between (t.at "B") (t.at "C")) . to_vector . should_equal r + + Test.group prefix+"Arithmetic Column Operations" <| + Test.specify "should allow basic operations" <| + (x + y).to_vector . should_equal [3, 7.25, 10, Nothing] + (x - y).to_vector . should_equal [-1.0, 0.75, 0.0, Nothing] + (x * y).to_vector . should_equal [2.0, 13.0, 25.0, Nothing] Test.specify "should allow combining a column with a scalar" pending="TODO null handling" <| (x + 100).to_vector . should_equal [101, 104, 105, Nothing] (x * 10).to_vector . should_equal [10, 40, 50, Nothing] (x - 10).to_vector . should_equal [-9, -6, -5, Nothing] - (x < 1000).to_vector . should_equal [True, True, True, Nothing] - (b || False).to_vector . should_equal [False, False, True, Nothing] - (b || True).to_vector . should_equal [True, True, True, True] - (b && False).to_vector . should_equal [False, False, False, False] + + Test.specify "should correctly infer the types" <| + (x + x).value_type . is_integer . should_be_true + (x + y).value_type . is_floating_point . should_be_true + (x + 2).value_type . is_integer . should_be_true + (x + 1.5).value_type . is_floating_point . should_be_true + + (x - x).value_type . is_integer . should_be_true + (x - y).value_type . is_floating_point . should_be_true + (x - 2).value_type . is_integer . should_be_true + (x - 1.5).value_type . is_floating_point . should_be_true + + (x * x).value_type . is_integer . should_be_true + (x * y).value_type . is_floating_point . should_be_true + (x * 2).value_type . is_integer . should_be_true + (x * 1.5).value_type . is_floating_point . should_be_true + + (x ^ x).value_type . is_numeric . should_be_true + + Test.specify "should check types" <| + t = table_builder [["X", [1, 2]], ["Y", ["a", "b"]], ["Z", [True, False]]] + x = t.at "X" + y = t.at "Y" + z = t.at "Z" + + (x + z) . should_fail_with Illegal_Argument + (x + False) . should_fail_with Illegal_Argument + + # Mixing text and integers should not be allowed + (x + y) . should_fail_with Illegal_Argument + (x + "foo") . should_fail_with Illegal_Argument + + (x - z).should_fail_with Invalid_Value_Type + (x - "a").should_fail_with Invalid_Value_Type + (y - "a").should_fail_with Invalid_Value_Type + (y - 42).should_fail_with Invalid_Value_Type + + (x * z).should_fail_with Invalid_Value_Type + (x * "a").should_fail_with Invalid_Value_Type + (y * "a").should_fail_with Invalid_Value_Type + (y * 42).should_fail_with Invalid_Value_Type + + (x / z).should_fail_with Invalid_Value_Type + (x / "a").should_fail_with Invalid_Value_Type + (y / "a").should_fail_with Invalid_Value_Type + (y / 42).should_fail_with Invalid_Value_Type + + (x ^ z).should_fail_with Invalid_Value_Type + (x ^ "a").should_fail_with Invalid_Value_Type + (y ^ "a").should_fail_with Invalid_Value_Type + (y ^ 42).should_fail_with Invalid_Value_Type + + if setup.test_selection.is_nan_and_nothing_distinct then + Test.specify "should support is_nan" <| + t = table_builder [["X", [1.5, 2, Number.nan]], ["Y", [1, 2, 3]]] + t.at "X" . is_nan . to_vector . should_equal [False, False, True] + t.at "Y" . is_nan . should_fail_with Invalid_Value_Type + Test.specify "should support is_blank" <| + t = table_builder [["X", [1.5, 2, Number.nan, Nothing]], ["Y", [1, Nothing, 3, 4]]] + t.at "X" . is_blank treat_nans_as_blank=True . to_vector . should_equal [False, False, True, True] + t.at "Y" . is_blank treat_nans_as_blank=True . to_vector . should_equal [False, True, False, False] Test.specify "division should be aligned with the Enso arithmetic" <| a = [1, 5, 10, 100] @@ -121,11 +345,33 @@ spec setup = a.zip b (/) . should_equal r t = table_builder [["A", a], ["B", b]] + t.at "A" . value_type . is_integer . should_be_true + t.at "B" . value_type . is_integer . should_be_true + r2 = (t.at "A") / (t.at "B") r2 . to_vector . should_equal r + r2.value_type . is_floating_point . should_be_true r3 = (t.at "A") / 2 r3 . to_vector . should_equal [0.5, 2.5, 5.0, 50.0] + r3.value_type . is_floating_point . should_be_true + + a2 = [1.2, 5, 10.2, 100] + b2 = [1.2, 2, 2, 5] + r4 = [1.0, 2.5, 5.1, 20.0] + a2.zip b2 (/) . should_equal r4 + + t2 = table_builder [["A", a2], ["B", b2]] + t2.at "A" . value_type . is_floating_point . should_be_true + t2.at "B" . value_type . is_floating_point . should_be_true + + r5 = (t2.at "A") / (t2.at "B") + r5 . to_vector . should_equal r4 + r5.value_type . is_floating_point . should_be_true + + r6 = (t2.at "A") / 2 + r6 . to_vector . should_equal [0.6, 2.5, 5.1, 50.0] + r6.value_type . is_floating_point . should_be_true db_pending = if setup.is_database then "Arithmetic error handling is currently not implemented for the Database backend." Test.specify "should allow division by 0 and report warnings" pending=db_pending <| @@ -183,30 +429,18 @@ spec setup = warning.should_be_a Arithmetic_Error warning.message . should_equal "Division by zero (at rows [0, 1, 2, 3, 4, 5, 6, 7, 8, ...])." - Test.specify "should return null if one of arguments is missing" pending="TODO null handling" <| + Test.specify "should return null if one of arguments is missing" <| nulls = [Nothing, Nothing, Nothing, Nothing] (x + Nothing).to_vector . should_equal nulls (x - Nothing).to_vector . should_equal nulls (x * Nothing).to_vector . should_equal nulls (x / Nothing).to_vector . should_equal nulls - (b && Nothing).to_vector . should_equal nulls - (b || Nothing).to_vector . should_equal nulls - - Test.specify "Between should return null if any of the values are null" pending="TODO" <| - a = [2, 3, Nothing, 7, 5, Nothing] - b = [0, 5, 7, Nothing, 7, Nothing] - c = [9, 8, 7, 7, Nothing, Nothing] - r = [True, False, Nothing, Nothing, Nothing, Nothing] - - t = table_builder [["A", a], ["B", b], ["C", c]] - ((t.at "A").between (t.at "B") (t.at "C")) . to_vector . should_equal r - Test.group prefix+"Column Operations - Text" <| + Test.group prefix+"Text Column Operations" <| t3 = table_builder [["s1", ["foobar", "bar", "baz", "BAB", Nothing]], ["s2", ["foo", "ar", "a", "b", Nothing]]] s1 = t3.at "s1" s2 = t3.at "s2" - - Test.specify "should handle basic Text operations" <| + Test.specify "should handle operations like starts_with, ends_with, contains" <| s1.starts_with s2 . to_vector . should_equal [True, False, False, False, Nothing] s1.starts_with s2 Case_Sensitivity.Insensitive . to_vector . should_equal [True, False, False, True, Nothing] s1.starts_with "foo" . to_vector . should_equal [True, False, False, False, Nothing] @@ -225,6 +459,131 @@ spec setup = s1.ends_with "a" . to_vector . should_equal [False, False, False, False, Nothing] s1.ends_with "b" Case_Sensitivity.Insensitive . to_vector . should_equal [False, False, False, True, Nothing] + s1.like s2 . to_vector . should_equal [False, False, False, False, Nothing] + s1.like (s2+"%r") . to_vector . should_equal [True, False, False, False, Nothing] + s1.like "%r%" . to_vector . should_equal [True, True, False, False, Nothing] + + Test.specify "should handle operations like is_empty, is_blank, fill_empty" <| + t = table_builder [["s", ["", " ", " ", Nothing, "foo"]], ["letters", ["a", "b", "c", "d", "e"]]] + s = t.at "s" + s.is_empty . to_vector . should_equal [True, False, False, True, False] + s.is_blank . to_vector . should_equal [True, False, False, True, False] + s.fill_empty "<>" . to_vector . should_equal ["<>", " ", " ", "<>", "foo"] + s.fill_empty (t.at "letters") . to_vector . should_equal ["a", " ", " ", "d", "foo"] + + Test.specify "should check types" <| + t4 = table_builder [["str", ['a', 'b']], ["int", [1, 2]]] + str = t4.at "str" + int = t4.at "int" + str.starts_with int . should_fail_with Invalid_Value_Type + str.ends_with int . should_fail_with Invalid_Value_Type + str.contains int . should_fail_with Invalid_Value_Type + str.like int . should_fail_with Invalid_Value_Type + int.starts_with str . should_fail_with Invalid_Value_Type + int.ends_with str . should_fail_with Invalid_Value_Type + int.contains str . should_fail_with Invalid_Value_Type + int.like str . should_fail_with Invalid_Value_Type + str.starts_with 42 . should_fail_with Invalid_Value_Type + str.ends_with 42 . should_fail_with Invalid_Value_Type + str.contains 42 . should_fail_with Invalid_Value_Type + str.like 42 . should_fail_with Invalid_Value_Type + + # Mixing text and integers should not be allowed + (str + int) . should_fail_with Illegal_Argument + + int.fill_empty "<>" . should_fail_with Invalid_Value_Type + str.fill_empty int . should_fail_with Invalid_Value_Type + str.fill_empty 42 . should_fail_with Invalid_Value_Type + + int.is_empty . should_fail_with Invalid_Value_Type + + Test.specify "should return right types" <| + [Case_Sensitivity.Default, Case_Sensitivity.Sensitive, Case_Sensitivity.Insensitive].each cs-> + s1.starts_with s2 case_sensitivity=cs . value_type . should_equal Value_Type.Boolean + s1.ends_with s2 case_sensitivity=cs . value_type . should_equal Value_Type.Boolean + s1.contains s2 case_sensitivity=cs . value_type . should_equal Value_Type.Boolean + + s1.starts_with "A" case_sensitivity=cs . value_type . should_equal Value_Type.Boolean + s1.ends_with "A" case_sensitivity=cs . value_type . should_equal Value_Type.Boolean + s1.contains "A" case_sensitivity=cs . value_type . should_equal Value_Type.Boolean + + s1.like s2 . value_type . should_equal Value_Type.Boolean + s1.like "%r%" . value_type . should_equal Value_Type.Boolean + + s1.is_empty . value_type . should_equal Value_Type.Boolean + s1.is_blank . value_type . should_equal Value_Type.Boolean + s1.fill_empty "<>" . value_type . is_text . should_be_true + s1.fill_empty s2 . value_type . is_text . should_be_true + + Test.specify "should support text concatenation with the + operator" <| + c1 = s1 + s2 + c1.to_vector . should_equal ["foobarfoo", "barar", "baza", "BABb", Nothing] + c1.value_type.is_text . should_be_true + + c2 = s1 + "_SUF" + c2.to_vector . should_equal ["foobar_SUF", "bar_SUF", "baz_SUF", "BAB_SUF", Nothing] + c2.value_type.is_text . should_be_true + + c3 = s1 + Nothing + c3.to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing] + c3.value_type.is_text . should_be_true + + Test.group prefix+"Min/Max Operations" <| + t = table_builder [["a", [1, 2, 3]], ["b", [4.5, 5.5, 6.5]], ["c", ['a', 'b', 'c']], ["d", [True, False, True]]] + a = t.at "a" + b = t.at "b" + c = t.at "c" + Test.specify "should allow one or more args and return the correct type" <| + c1 = a.min 2 + c1.to_vector . should_equal [1, 2, 2] + c1.value_type.is_integer . should_be_true + + c2 = a.max 2 + c2.to_vector . should_equal [2, 2, 3] + c2.value_type.is_integer . should_be_true + + c3 = a.min [2.5, 2] + c3.to_vector . should_equal [1, 2, 2] + Test.with_clue "c3.value_type="+c3.value_type.to_display_text <| + c3.value_type.is_floating_point . should_be_true + + c4 = a.max [2.5, 2] + c4.to_vector . should_equal [2.5, 2.5, 3] + c4.value_type.is_floating_point . should_be_true + + c5 = a.min b + c5.to_vector . should_equal [1, 2, 3] + Test.with_clue "c5.value_type="+c5.value_type.to_display_text+": " <| + c5.value_type.is_floating_point . should_be_true + + c6 = a.max b + c6.to_vector . should_equal [4.5, 5.5, 6.5] + c6.value_type.is_floating_point . should_be_true + + c7 = a.min [a, b, 1] + c7.to_vector . should_equal [1, 1, 1] + c7.value_type.is_floating_point . should_be_true + + c8 = a.max [a, b, 1] + c8.to_vector . should_equal [4.5, 5.5, 6.5] + c8.value_type.is_floating_point . should_be_true + + c9 = (t.at "d").min False + c9.to_vector . should_equal [False, False, False] + c9.value_type.is_boolean . should_be_true + + c10 = (t.at "d").max False + c10.to_vector . should_equal [True, False, True] + c10.value_type.is_boolean . should_be_true + + Test.specify "should check types" <| + [(.min), (.max)].each op-> + op a c . should_fail_with Invalid_Value_Type + op a [1, 2, c] . should_fail_with Invalid_Value_Type + op a [1, Nothing, c, Nothing] . should_fail_with Invalid_Value_Type + op c 1 . should_fail_with Invalid_Value_Type + op a True . should_fail_with Invalid_Value_Type + Test.group prefix+"Column Operations - Text Replace" <| if setup.is_database.not then t4 = table_builder [["A", ["Alpha", "Bravo", "Charlie", "Delta", "Echo", "Foxtrot"]], ["B", ["A","O","a","E","o","O"]], ["C", [1,2,3,4,5,6]], ["D", ['',Nothing,'',Nothing,'','']]] @@ -249,9 +608,9 @@ spec setup = Test.specify "should only allow replace on Text columns" <| c.replace "a" "#" . should_fail_with Invalid_Value_Type - a.replace 1 "#" . should_fail_with Illegal_Argument + a.replace 1 "#" . should_fail_with Invalid_Value_Type a.replace c "#" . should_fail_with Invalid_Value_Type - a.replace "a" 1 . should_fail_with Illegal_Argument + a.replace "a" 1 . should_fail_with Invalid_Value_Type a.replace "a" c . should_fail_with Invalid_Value_Type Test.specify "should not replace if Empty term" <| @@ -260,6 +619,27 @@ spec setup = a.replace d "#" . to_vector . should_equal ["Alpha", "Bravo", "Charlie", "Delta", "Echo", "Foxtrot"] a.replace d "#" use_regex=True . to_vector . should_equal ["Alpha", "Bravo", "Charlie", "Delta", "Echo", "Foxtrot"] + Test.specify "should infer correct return type" <| + c = table_builder [["texts", ["foo", "bar"]]] . at "texts" + + c1 = c.replace "a" "---" + c1.to_vector . should_equal ["foo", "b---r"] + vt1 = c1.value_type + Test.with_clue "c1.value_type="+vt1.to_display_text+": " <| + vt1.should_be_a (Value_Type.Char ...) + vt1.variable_length.should_be_true + + Test.specify "should infer correct return type (2)" pending=(if setup.test_selection.fixed_length_text_columns.not then "Fixed-length Char columns are not supported by this backend.") <| + c = table_builder [["texts", ["foo", "bar"]]] . at "texts" + c2 = c.cast (Value_Type.Char size=2 variable_length=False) + c3 = c2.replace "a" "---" + + c3.to_vector . should_equal ["fo", "b---"] + vt3 = c3.value_type + Test.with_clue "c3.value_type="+vt3.to_display_text+": " <| + vt3.should_be_a (Value_Type.Char ...) + vt3.variable_length.should_be_true + Test.group prefix+"Column Operations - Text Trim" <| t5 = table_builder [["A", [" A ", ' \t\n\rA\r\n\t ', "xxxAxx"]], ["B", [" ",' \t',"x"]], ["C", [1,2,3]]] a = t5.at "A" @@ -282,10 +662,24 @@ spec setup = Test.specify "should only allow trim on Text columns" <| c.trim what="a" . should_fail_with Invalid_Value_Type - a.trim what=1 . should_fail_with Illegal_Argument + a.trim what=1 . should_fail_with Invalid_Value_Type a.trim what=c . should_fail_with Invalid_Value_Type - Test.group prefix+"Column Operations - Names" <| + Test.group prefix+"Other Column Operations" <| + Test.specify "is_in" <| + t = table_builder [["X", [1, 2, 3, 4]], ["Y", [4, 3, 100, 200]]] + x = t.at "X" + y = t.at "Y" + + c1 = x.is_in [2, 100, 5] + c1.to_vector . should_equal [False, True, False, False] + c1.value_type.should_equal Value_Type.Boolean + + c2 = x.is_in y + c2.to_vector . should_equal [False, False, True, True] + c2.value_type.should_equal Value_Type.Boolean + + Test.group prefix+"Colum Operations - Names" <| t = table_builder [["a", [1, 2, 3]], ["b", ['x', 'y', 'z']], ["c", [1.0, 2.0, 3.0]], ["d", [True, False, True]]] Test.specify "arithmetic" <| ((t.at "a") + 42) . name . should_equal "[a] + 42" @@ -303,8 +697,8 @@ spec setup = ((t.at "a") < 0) . name . should_equal "[a] < 0" ((t.at "a") <= 0) . name . should_equal "[a] <= 0" ((t.at "a") > 0) . name . should_equal "[a] > 0" - ((t.at "a") >= 0) . name . should_equal "[a] >= 0" - ((t.at "b").between (t.at "c") 42) . name . should_equal "[b] between [c] and 42" + ((t.at "b") >= 'X') . name . should_equal "[b] >= 'X'" + ((t.at "a").between (t.at "c") 42) . name . should_equal "[a] between [c] and 42" Test.specify "logical" <| ((t.at "d") || False) . name . should_equal "[d] || False" diff --git a/test/Table_Tests/src/Common_Table_Operations/Date_Time_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Date_Time_Spec.enso index d060fe5d253f..dbf6efdd8bc3 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Date_Time_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Date_Time_Spec.enso @@ -1,4 +1,5 @@ from Standard.Base import all +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument from Standard.Table import Value_Type from Standard.Table.Errors import Inexact_Type_Coercion, Invalid_Value_Type @@ -95,10 +96,41 @@ spec setup = t = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]], ["C", [True, False, True]]] r1 = t.at "A" . year r1.should_fail_with Invalid_Value_Type - r1.catch . to_display_text . should_start_with "Expected A column to have Date or Date_Time type, but got Integer" + r1.catch . to_display_text . should_start_with "Expected type Date or Date_Time, but got a column [A] of type Integer" t.at "B" . month . should_fail_with Invalid_Value_Type t.at "C" . day . should_fail_with Invalid_Value_Type + Test.specify "should allow to compare dates" <| + t = table_builder [["X", [Date.new 2021 12 3]], ["Y", [Date.new 2021 12 5]]] + + [(<), (<=), (>), (>=), (==), (!=)].each op-> + op (t.at "X") (t.at "Y") . value_type . should_equal Value_Type.Boolean + op (t.at "X") (t.at "Y") . to_vector . should_succeed + op (t.at "X") (Date.new 2021 12 4) . to_vector . should_succeed + + Test.specify "should allow to compare date-times" <| + t = table_builder [["X", [Date_Time.new 2021 12 3 12 30 0]], ["Y", [Date_Time.new 2021 12 5 12 30 0]]] + + [(<), (<=), (>), (>=), (==), (!=)].each op-> + op (t.at "X") (t.at "Y") . value_type . should_equal Value_Type.Boolean + op (t.at "X") (t.at "Y") . to_vector . should_succeed + op (t.at "X") (Date_Time.new 2021 12 4 12 30 0) . to_vector . should_succeed + + Test.specify "should allow to compare time-of-day" <| + t = table_builder [["X", [Time_Of_Day.new 12 30 0]], ["Y", [Time_Of_Day.new 12 30 1]]] + + [(<), (<=), (>), (>=), (==), (!=)].each op-> + op (t.at "X") (t.at "Y") . value_type . should_equal Value_Type.Boolean + op (t.at "X") (t.at "Y") . to_vector . should_succeed + op (t.at "X") (Time_Of_Day.new 12 30 0) . to_vector . should_succeed + + Test.specify "should not allow to mix" <| + t = table_builder [["X", [Date.new 2021 12 3]], ["Y", [Date_Time.new 2021 12 5 12 30 0]], ["Z", [Time_Of_Day.new 12 30 0]]] + + [(<), (<=), (>), (>=)].each op-> + op (t.at "X") (t.at "Y") . should_fail_with Invalid_Value_Type + op (t.at "X") (t.at "Z") . should_fail_with Invalid_Value_Type + if setup.test_selection.date_time.not then Test.group prefix+"partial Date-Time support" <| Test.specify "will fail to upload a Table containing Dates" <| diff --git a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso index 46c4b296fe59..5f4047a68355 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso @@ -1,9 +1,10 @@ from Standard.Base import all import Standard.Base.Errors.Common.Arithmetic_Error import Standard.Base.Errors.Illegal_State.Illegal_State +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument -from Standard.Table import Table, Column, Sort_Column, Aggregate_Column -from Standard.Table.Errors import Floating_Point_Equality, Additional_Warnings +from Standard.Table import Table, Column, Sort_Column, Aggregate_Column, Value_Type +from Standard.Table.Errors import all import Standard.Table.Data.Expression.Expression_Error from Standard.Database.Errors import SQL_Error @@ -114,6 +115,37 @@ spec detailed setup = expression_test "Nothing" Nothing expression_test "NOTHING" Nothing + Test.specify "should allow to create a NULL column" <| + t = table_builder [["X", [1, 2, 3]]] + c = t.compute "null" + c.name . should_equal "null" + c.to_vector.should_equal [Nothing, Nothing, Nothing] + ## We do not specify the value type. + It usually will be `Mixed` in in-memory and + `Unsupported_Data_Type` in DB. + c.value_type.should_be_a Value_Type + + c.is_nothing.to_vector.should_equal [True, True, True] + + ## TODO that may not necessarily be good, I think we may need to + introduce a Value_Type.Null and make it accepted by all + `Value_Type.expect_*` checks. + See: https://github.com/enso-org/enso/issues/6281 + Test.specify "a null column may fail typechecks that expect a concrete type" <| + t = table_builder [["X", [1, 2, 3]]] + c = t.compute "null" + (c + c) . should_fail_with Illegal_Argument + (c - c) . should_fail_with Invalid_Value_Type + (c.starts_with "X") . should_fail_with Invalid_Value_Type + c.not . should_fail_with Invalid_Value_Type + + t.compute "not(Nothing)" . should_fail_with Invalid_Value_Type + t.compute "Nothing + Nothing" . should_fail_with Illegal_Argument + t.compute "Nothing * Nothing" . should_fail_with Invalid_Value_Type + + t.compute "[X] + Nothing" . to_vector . should_equal [Nothing, Nothing, Nothing] + t.compute "Nothing + [X]" . should_fail_with Illegal_Argument + Test.group prefix+"Expression Date and Time literals" <| specify_test "should be able to add a date or time column" pending=pending_datetime expression_test-> expression_test "#2020-12-23#" (Date.new 2020 12 23) @@ -254,10 +286,8 @@ spec detailed setup = specify_test "should be able to check empty" expression_test-> expression_test "'Hello World' IS EMPTY" False expression_test "'' IS EMPTY" True - expression_test "Nothing IS EMPTY" True expression_test "'Hello World' IS NOT EMPTY" True expression_test "'' IS NOT EMPTY" False - expression_test "Nothing IS NOT EMPTY" False Test.group prefix+"Expression Text Operators" <| specify_test "should be able to concatenate text" expression_test-> @@ -279,6 +309,7 @@ spec detailed setup = Test.group prefix+"Expression Boolean Operators" <| specify_test "should be able to AND booleans" expression_test-> expression_test "True && TRUE" True + expression_test "True && Nothing" Nothing expression_test "True AND False" False expression_test "True && [Bad]] Name]" [True, False, True, False, True] expression_test "False AND [Bad]] Name]" False diff --git a/test/Table_Tests/src/Common_Table_Operations/Filter_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Filter_Spec.enso index 6d12acd30122..c43ed9b5c526 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Filter_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Filter_Spec.enso @@ -1,7 +1,6 @@ from Standard.Base import all import Standard.Base.Errors.Common.Arithmetic_Error import Standard.Base.Errors.Common.Index_Out_Of_Bounds -import Standard.Base.Errors.Common.Type_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State @@ -44,6 +43,9 @@ spec setup = t.filter "X" (Filter_Condition.Equal to=123) . at "X" . to_vector . should_equal [] v = t.filter "X" (Filter_Condition.Equal to="SOME TEXT :)") . at "X" . to_vector + ## We do not do typechecking at Enso level here, as it is + DB-dependent if such mixing is allowed, so we will rely on an SQL + error. case test_selection.allows_mixed_type_comparisons of True -> v.should_equal [] False -> v.should_fail_with SQL_Error @@ -163,7 +165,7 @@ spec setup = t = table_builder [["ix", [1, 2, 3, 4]], ["X", [Nothing, "A", "", " "]]] check_problem result = result.should_fail_with Invalid_Value_Type - result.catch.expected . should_equal Value_Type.Char + result.catch.expected . should_equal "Char" check_problem (t.filter "X" (Filter_Condition.Starts_With (t.at "ix"))) check_problem (t.filter "X" (Filter_Condition.Ends_With (t.at "ix"))) @@ -182,16 +184,15 @@ spec setup = check_problem (t.filter "ix" Filter_Condition.Is_Empty) check_problem (t.filter "ix" Filter_Condition.Not_Empty) - check_scalar_type_error_handling name action = - action.should_fail_with Type_Error - action.catch . should_equal (Type_Error.Error Text Integer name) + check_scalar_type_error_handling action = + action.should_fail_with Invalid_Value_Type - check_scalar_type_error_handling "prefix" (t.filter "X" (Filter_Condition.Starts_With 42)) - check_scalar_type_error_handling "suffix" (t.filter "X" (Filter_Condition.Ends_With 42)) - check_scalar_type_error_handling "substring" (t.filter "X" (Filter_Condition.Contains 42)) - check_scalar_type_error_handling "pattern" (t.filter "X" (Filter_Condition.Like 42)) - check_scalar_type_error_handling "pattern" (t.filter "X" (Filter_Condition.Not_Like 42)) - check_scalar_type_error_handling "substring" (t.filter "X" (Filter_Condition.Not_Contains 42)) + check_scalar_type_error_handling (t.filter "X" (Filter_Condition.Starts_With 42)) + check_scalar_type_error_handling (t.filter "X" (Filter_Condition.Ends_With 42)) + check_scalar_type_error_handling (t.filter "X" (Filter_Condition.Contains 42)) + check_scalar_type_error_handling (t.filter "X" (Filter_Condition.Like 42)) + check_scalar_type_error_handling (t.filter "X" (Filter_Condition.Not_Like 42)) + check_scalar_type_error_handling (t.filter "X" (Filter_Condition.Not_Contains 42)) Test.specify "by nulls" <| t = table_builder [["ix", [1, 2, 3, 4]], ["X", [Nothing, 1, Nothing, 4]]] @@ -349,13 +350,25 @@ spec setup = t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]] t.filter_by_expression "[ix" . should_fail_with Expression_Error t.filter_by_expression "[ix" . catch . should_be_a Expression_Error.Syntax_Error - t.filter_by_expression "is_empty([b],False)" . should_fail_with Expression_Error - t.filter_by_expression "is_empty([b],False)" . catch . should_be_a Expression_Error.Argument_Mismatch t.filter_by_expression "Starts_With([b])" . should_fail_with Expression_Error t.filter_by_expression "Starts_With([b])" . catch . should_be_a Expression_Error.Argument_Mismatch t.filter_by_expression "[missing]" . should_fail_with No_Such_Column t.filter_by_expression "[ix]" . should_fail_with Invalid_Value_Type + ## This used to raise Expression_Error.Argument_Mismatch, but now we + cannot detect that. + + Argument_Mismatch is detected by applying all arguments to the + function, if there are too many it will fail with + `Type error: expected a function` which was being detected. + But it first runs the code of the function with as many arguments + as it needed, thus if the function fails, its error overrides the + arity error. + t.filter_by_expression "is_empty([b],False)" . should_fail_with Invalid_Value_Type + # If we provide good type for the first argument, then the error will be again as expected. + t.filter_by_expression "is_empty('', 42)" . should_fail_with Expression_Error + t.filter_by_expression "is_empty('', 42)" . catch . should_be_a Expression_Error.Argument_Mismatch + Test.specify "should report issues: floating point equality" <| t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", [10.0, 2.0001, 2.0, 4.5, -2.0]]] r1 = t.filter_by_expression "[X] * [X] == 4.0" on_problems=Problem_Behavior.Ignore diff --git a/test/Table_Tests/src/Common_Table_Operations/Integration_Tests.enso b/test/Table_Tests/src/Common_Table_Operations/Integration_Tests.enso index c1b7ebe8043a..c358faddf0e2 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Integration_Tests.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Integration_Tests.enso @@ -133,3 +133,46 @@ spec setup = r5 = t5 |> materialize r5.at "A" . to_vector . should_contain_the_same_elements_as ["b", "a"] r5.at "B" . to_vector . should_equal [5, 5] + + if setup.test_selection.fixed_length_text_columns then + Test.specify "types of unioned fixed-length columns should be correctly inferred after passing through other operations that infer types from Database, like aggregate Shortest" <| + t1 = table_builder [["X", ["a", "b", "c"]], ["Y", [1, 0, 2]]] . cast "X" (Value_Type.Char 1 False) + t2 = table_builder [["X", ["ddd", "eee", "fff"]], ["Y", [0, 1, 0]]] . cast "X" (Value_Type.Char 3 False) + t3 = t1.union t2 + + vt1 = t3.at "X" . value_type + Test.with_clue "t3[X].value_type="+vt1.to_display_text+": " <| + vt1.should_be_a (Value_Type.Char ...) + vt1.variable_length.should_be_true + + t4 = t3.aggregate [Aggregate_Column.Shortest "X", Aggregate_Column.Group_By "Y"] + vt2 = t4.at "Shortest X" . value_type + Test.with_clue "t4[X].value_type="+vt2.to_display_text+": " <| + vt2.should_be_a (Value_Type.Char ...) + vt2.variable_length.should_be_true + t5 = t4 |> materialize |> _.order_by "Y" + t5.at "Y" . to_vector . should_equal [0, 1, 2] + t5.at "Shortest X" . to_vector . should_equal ["b", "a", "c"] + + Test.specify "types should be correctly preserved after aggregation after iif" <| + t0 = table_builder [["x", [False, True, False]], ["A", ["a", "b", "c"]], ["B", ["xyz", "abc", "def"]]] + t1 = t0 . cast "A" (Value_Type.Char size=1 variable_length=False) . cast "B" (Value_Type.Char size=3 variable_length=False) + + x = t1.at "x" + a = t1.at "A" + b = t1.at "B" + a.value_type.should_equal (Value_Type.Char size=1 variable_length=False) + b.value_type.should_equal (Value_Type.Char size=3 variable_length=False) + + c = x.iif a b + c.to_vector.should_equal ["xyz", "b", "def"] + Test.with_clue "c.value_type="+c.value_type.to_display_text+": " <| + c.value_type.variable_length.should_be_true + + t2 = t1.set c "C" + t3 = t2.aggregate [Aggregate_Column.Shortest "C"] + t3.at "Shortest C" . to_vector . should_equal ["b"] + vt = t3.at "Shortest C" . value_type + Test.with_clue "t3[C].value_type="+vt.to_display_text+": " <| + vt.should_be_a (Value_Type.Char ...) + vt.variable_length.should_be_true diff --git a/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso index 7288eb388836..f1180260a50a 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso @@ -304,34 +304,27 @@ spec setup = test result = result.should_fail_with Invalid_Value_Type - result.catch.expected.should_equal Value_Type.Char + result.catch.expected.should_equal "Char" test <| t1.join t2 on=(Join_Condition.Equals_Ignore_Case "X" "W") on_problems=Problem_Behavior.Ignore test <| t1.join t2 on=(Join_Condition.Equals_Ignore_Case "Y" "Z") on_problems=Problem_Behavior.Ignore - Test.specify "should report Invalid_Value_Type if incompatible types are correlated" pending="We need more advanced value type support to implement this in full-generality." <| - t1 = table_builder ["X", ["1", "2", "c"]] - t2 = table_builder ["Y", [1, 2, 3]] + Test.specify "should report Invalid_Value_Type if incompatible types are correlated" <| + t1 = table_builder [["X", ["1", "2", "c"]]] + t2 = table_builder [["Y", [1, 2, 3]]] r1 = t1.join t2 on_problems=Problem_Behavior.Ignore r1.should_fail_with Invalid_Value_Type - Test.specify "should report Invalid_Value_Type if incompatible columns types are correlated in Between" pending="We need more advanced value type support to implement this in full-generality." <| - t1 = table_builder ["X", ["1", "2", "c"], ["Y", [1, 2, 3]]] - t2 = table_builder ["Z", ["1", "2", "c"], ["W", [1, 2, 3]]] - - test expected actual err = - err.should_fail_with Invalid_Value_Type - err.catch . should_equal (Invalid_Value_Type.Error expected actual) + Test.specify "should report Invalid_Value_Type if incompatible columns types are correlated in Between" <| + t1 = table_builder [["X", ["1", "2", "c"]], ["Y", [1, 2, 3]]] + t2 = table_builder [["Z", ["1", "2", "c"]], ["W", [1, 2, 3]]] - test Value_Type.Char Value_Type.Integer <| - t1.join t2 on=(Join_Condition.Between "X" "W" "W") - test Value_Type.Integer Value_Type.Char <| - t1.join t2 on=(Join_Condition.Between "Y" "W" "Z") - test Value_Type.Integer Value_Type.Char <| - t1.join t2 on=(Join_Condition.Between "Y" "Z" "W") + t1.join t2 on=(Join_Condition.Between "X" "W" "W") . should_fail_with Invalid_Value_Type + t1.join t2 on=(Join_Condition.Between "Y" "W" "Z") . should_fail_with Invalid_Value_Type + t1.join t2 on=(Join_Condition.Between "Y" "Z" "W") . should_fail_with Invalid_Value_Type Test.specify "should warn when joining on equality of Decimal columns" <| t1 = table_builder [["X", [1.5, 2.0, 2.00000000001]], ["Y", [10, 20, 30]]] diff --git a/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso index 8d421755b4f8..b9c3b5afd964 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso @@ -204,30 +204,21 @@ spec setup = expect_column_names ["A"] t3 t3.at "A" . to_vector . should_equal ["a", "b", "c", "xyz", "abc", "def"] t3.at "A" . value_type . is_text . should_be_true - t3.at "A" . value_type . variable_length . should_be_true + Test.with_clue "t3[A].value_type="+(t3.at "A").value_type.to_display_text+": " <| + t3.at "A" . value_type . variable_length . should_be_true Test.specify "should find a common type that will fit the merged columns" <| - t1 = table_builder [["int+bool", [1, 2, 3]], ["int+float", [0, 1, 2]]] - t2 = table_builder [["int+bool", [True, False, Nothing]], ["int+float", [1.0, 2.0, 2.5]]] + t1 = table_builder [["int+float", [0, 1, 2]]] + t2 = table_builder [["int+float", [1.0, 2.0, 2.5]]] - t1.at "int+bool" . value_type . is_integer . should_be_true t1.at "int+float" . value_type . is_integer . should_be_true - t2.at "int+bool" . value_type . is_boolean . should_be_true t2.at "int+float" . value_type . is_floating_point . should_be_true t3 = t1.union t2 - expect_column_names ["int+bool", "int+float"] t3 - t3.at "int+bool" . value_type . is_integer . should_be_true + expect_column_names ["int+float"] t3 t3.at "int+float" . value_type . is_floating_point . should_be_true - t3.at "int+bool" . to_vector . should_equal [1, 2, 3, 1, 0, Nothing] t3.at "int+float" . to_vector . should_equal [0, 1, 2, 1.0, 2.0, 2.5] - t4 = table_builder [["float", [1.0, 2.0, 3.3]]] - t5 = t1.union [t2, t4] match_columns=Match_Columns.By_Position keep_unmatched_columns=False - expect_column_names ["int+bool"] t5 - t5.at "int+bool" . value_type . is_floating_point . should_be_true - t5.at "int+bool" . to_vector . should_equal [1, 2, 3, 1, 0, Nothing, 1.0, 2.0, 3.3] - # Database backends are not required to support Mixed types. if setup.is_database.not then Test.specify "should resort to Mixed value type only if at least one column is already Mixed" <| @@ -260,12 +251,28 @@ spec setup = t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]], ["C", [True, False, Nothing]]] t2 = table_builder [["C", ["x", "Y", "Z"]], ["A", [4, 5, 6]], ["B", [1, 2, 3]]] - action = t1.union t2 on_problems=_ - tester table = - expect_column_names ["A"] table - table.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6] - problems = [No_Common_Type.Error "B", No_Common_Type.Error "C"] - Problems.test_problem_handling action problems tester + r1 = t1.union t2 on_problems=Problem_Behavior.Report_Error + r1.should_fail_with No_Common_Type + + r2 = t1.union t2 on_problems=Problem_Behavior.Ignore + Problems.assume_no_problems r2 + + r3 = t1.union t2 on_problems=Problem_Behavior.Report_Warning + w3 = Problems.get_attached_warnings r3 + w3.each w-> w.should_be_a No_Common_Type + w3.map w-> + ## We look just at names of the Value_Type constructors, as + different database backends may choose integers of different + sizes and have differing settings for text types. + types = w.types.map value_type-> + Meta.meta value_type . constructor . name + (types == ["Char", "Integer"]) || (types == ["Boolean", "Char"]) . should_be_true + + # A boolean column cannot be merged with integers. + t3 = t1.select_columns ["C", "A"] reorder=True + t4 = t2.select_columns ["B", "A"] reorder=True + r4 = t3.union t4 match_columns=Match_Columns.By_Position on_problems=Problem_Behavior.Report_Error + r4.should_fail_with No_Common_Type Test.specify "if type widening is not allowed, should use the type from first table that contained the given column" <| t1 = table_builder [["A", [1, 2, 3]]] @@ -282,17 +289,13 @@ spec setup = t3.at "B" . value_type . is_floating_point . should_be_true Test.specify "if type widening is not allowed and types do not match, should report error and drop the problematic column" <| - t1 = table_builder [["A", [1, 2, 3]], ["B", [1, 2, 3]], ["C", [True, False, Nothing]], ["D", [10, 20, 30]], ["E", [1.1, 2.5, 3.2]]] - t2 = table_builder [["A", [4, 5, 6]], ["B", [1.5, 2.5, 3.5]], ["C", [1, 2, 3]], ["D", [True, True, True]], ["E", [1, 2, 3]]] + t1 = table_builder [["A", [1, 2, 3]], ["B", [1, 2, 3]], ["E", [1.1, 2.5, 3.2]]] + t2 = table_builder [["A", [4, 5, 6]], ["B", [1.5, 2.5, 3.5]], ["E", [1, 2, 3]]] t1.at "B" . value_type . is_integer . should_be_true - t1.at "C" . value_type . is_boolean . should_be_true - t1.at "D" . value_type . is_integer . should_be_true t1.at "E" . value_type . is_floating_point . should_be_true t2.at "B" . value_type . is_floating_point . should_be_true - t2.at "C" . value_type . is_integer . should_be_true - t2.at "D" . value_type . is_boolean . should_be_true t2.at "E" . value_type . is_integer . should_be_true action = t1.union t2 allow_type_widening=False on_problems=_ diff --git a/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso index 00e794b6bfd5..bd00c3e25322 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso @@ -1,6 +1,6 @@ from Standard.Base import all -from Standard.Table import Column_Selector +from Standard.Table import Column_Selector, Value_Type from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Count_Distinct from Standard.Table.Errors import all @@ -138,3 +138,103 @@ spec setup = no_rows.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["X"] no_rows.remove_columns Column_Selector.Blank_Columns . columns . map .name . should_fail_with No_Output_Columns + + Test.group prefix+"Filling Missing Values" <| + Test.specify "should coerce long and double types to double" <| + table = table_builder [["X", [1, Nothing, 2, Nothing]], ["Y", [0.5, Nothing, Nothing, 0.25]]] + ints = table.at "X" + ints_filled = ints.fill_nothing 0.5 + ints_filled.to_vector . should_equal [1.0, 0.5, 2.0, 0.5] + ints_filled.value_type.is_floating_point.should_be_true + + decimals = table.at "Y" + decimals_filled = decimals.fill_nothing 42 + decimals_filled.to_vector . should_equal [0.5, 42.0, 42.0, 0.25] + decimals_filled.value_type.is_floating_point.should_be_true + + r1 = ints.fill_nothing decimals + r1.to_vector . should_equal [1.0, Nothing, 2.0, 0.25] + vt1 = r1.value_type + Test.with_clue "r1.value_type="+vt1.to_display_text+": " <| + vt1.is_floating_point.should_be_true + + r2 = ints.coalesce [decimals, 133] + r2.to_vector . should_equal [1.0, 133, 2.0, 0.25] + vt2 = r2.value_type + Test.with_clue "r2.value_type="+vt2.to_display_text+": " <| + vt2.is_floating_point.should_be_true + + t2 = table_builder [["X", [1, Nothing]], ["Y", [0.5, Nothing]]] + r3 = (t2.at "X").fill_nothing (t2.at "Y") + r3.to_vector . should_equal [1.0, Nothing] + vt3 = r3.value_type + Test.with_clue "r3.value_type="+vt3.to_display_text+": " <| + vt3.is_floating_point.should_be_true + + r4 = ints.fill_nothing 100.0 + r4.to_vector . should_equal [1, 100, 2, 100] + vt4 = r4.value_type + Test.with_clue "r4.value_type="+vt4.to_display_text+": " <| + vt4.is_floating_point.should_be_true + + Test.specify "should keep String, Boolean, Long and Double type" <| + table = table_builder [["X", ["a", Nothing, "b", Nothing]], ["Y", [True, False, Nothing, Nothing]], ["Z", [1, Nothing, 2, Nothing]], ["W", [0.5, Nothing, Nothing, 0.25]]] + strs = table.at "X" + strs_filled = strs.fill_nothing "X" + strs_filled.to_vector . should_equal ["a", "X", "b", "X"] + strs_filled.value_type.is_text.should_be_true + + bools = table.at "Y" + bools_filled = bools.fill_nothing False + bools_filled.to_vector . should_equal [True, False, False, False] + bools_filled.value_type . should_equal Value_Type.Boolean + + ints = table.at "Z" + ints_filled = ints.fill_nothing 42 + ints_filled.to_vector . should_equal [1, 42, 2, 42] + ints_filled.value_type.is_integer.should_be_true + + decimals = table.at "W" + decimals_filled = decimals.fill_nothing 1.0 + decimals_filled.to_vector . should_equal [0.5, 1.0, 1.0, 0.25] + decimals_filled.value_type.is_floating_point.should_be_true + + Test.specify "should not allow mixing types by default" <| + table = table_builder [["X", [1, Nothing, 2, Nothing]], ["Y", [True, False, Nothing, Nothing]], ["Z", [0.5, Nothing, Nothing, 0.25]]] + ints = table.at "X" + ints_filled = ints.fill_nothing False + ints_filled.should_fail_with No_Common_Type + + c = ints.coalesce [42.0, False] + c.should_fail_with No_Common_Type + + table.at "Y" . fill_nothing 42 . should_fail_with No_Common_Type + table.at "Z" . fill_nothing True . should_fail_with No_Common_Type + + if setup.is_database.not then + Test.specify "may allow mixed types if explicitly retyped" pending="TODO: cast #6112" <| + table = table_builder [["X", [1, Nothing, 2, Nothing]]] + ints = table.at "X" . cast Value_Type.Mixed + ints_filled = ints.fill_nothing False + ints_filled.to_vector . should_equal [1, False, 2, False] + ints_filled.value_type . should_equal Value_Type.Mixed + + Test.specify "should correctly unify text columns of various lengths" pending=(if setup.test_selection.fixed_length_text_columns.not then "Fixed-length Char columns are not supported by this backend.") <| + t0 = table_builder [["A", ["a", Nothing, "c"]], ["B", ["X", "Y", "Z"]], ["C", ["xyz", "abc", "def"]]] + t1 = t0 . cast "A" (Value_Type.Char size=1 variable_length=False) . cast "B" (Value_Type.Char size=1 variable_length=False) . cast "C" (Value_Type.Char size=3 variable_length=False) + + a = t1.at "A" + b = t1.at "B" + c = t1.at "C" + a.value_type.should_equal (Value_Type.Char size=1 variable_length=False) + b.value_type.should_equal (Value_Type.Char size=1 variable_length=False) + c.value_type.should_equal (Value_Type.Char size=3 variable_length=False) + + d = a.fill_nothing b + d.to_vector . should_equal ["a", "Y", "c"] + d.value_type . should_equal (Value_Type.Char size=1 variable_length=False) + + e = a.fill_nothing c + e.to_vector . should_equal ["a", "abc", "c"] + Test.with_clue "e.value_type="+e.value_type.to_display_text+": " <| + e.value_type.variable_length.should_be_true diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 5cf2e3ed5fa2..cdc4e6b8f51e 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -68,12 +68,6 @@ spec = t2 = t1.filter "A" (Filter_Condition.Between 10 20) t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN ? AND ?)', [10, 20]] - t3 = t1.filter "A" (Filter_Condition.Between (t1.at "B") (t1.at "C")) - t3.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN "T1"."B" AND "T1"."C")', []] - - t4 = t1.filter "A" (Filter_Condition.Between (t1.at "B") 33) - t4.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN "T1"."B" AND ?)', [33]] - Test.specify "should generate an IN expression" <| t2 = t1.filter "A" (Filter_Condition.Is_In [1, 2, 'foo']) t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE COALESCE("T1"."A" IN (?, ?, ?), FALSE)', [1, 2, "foo"]] @@ -89,8 +83,8 @@ spec = Test.group "[Codegen] Handling Missing Values" <| Test.specify "fill_nothing should allow to replace missing values in a column with a constant" <| - c = t1.at "A" . fill_nothing "not-applicable" - c.to_sql.prepare . should_equal ['SELECT COALESCE("T1"."A", ?) AS "fill_nothing([A], \'not-applicable\')" FROM "T1" AS "T1"', ["not-applicable"]] + c = t1.at "B" . fill_nothing "not-applicable" + c.to_sql.prepare . should_equal ['SELECT CAST(COALESCE("T1"."B", ?) AS TEXT) AS "fill_nothing([B], \'not-applicable\')" FROM "T1" AS "T1"', ["not-applicable"]] Test.specify "filter_blank_rows should drop rows that contain at least one missing column in a Table" <| t2 = t1.filter_blank_rows when_any=True diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index 8f2dc61ac4fb..7fd67a017e28 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -147,7 +147,7 @@ spec prefix connection = col = t4.at 'a' col.length . should_equal 5 col.count . should_equal 3 - col.count_missing . should_equal 2 + col.count_nothing . should_equal 2 Test.group prefix+"Sorting" <| df = upload "clothes" <| diff --git a/test/Table_Tests/src/Database/Postgres_Spec.enso b/test/Table_Tests/src/Database/Postgres_Spec.enso index 7a17c709943b..dd5ab249609d 100644 --- a/test/Table_Tests/src/Database/Postgres_Spec.enso +++ b/test/Table_Tests/src/Database/Postgres_Spec.enso @@ -22,7 +22,7 @@ import project.Database.Types.Postgres_Type_Mapping_Spec import project.Common_Table_Operations from project.Database.Types.Postgres_Type_Mapping_Spec import default_text -postgres_specific_spec connection db_name = +postgres_specific_spec connection db_name setup = Test.group "[PostgreSQL] Schemas and Databases" <| Test.specify "should be able to get current database and list databases" <| connection.database . should_equal db_name @@ -158,6 +158,30 @@ postgres_specific_spec connection db_name = connection.execute_update 'DROP TABLE "'+name+'"' + table_builder = setup.table_builder + materialize = setup.materialize + Test.group "[PostgreSQL] Edge Cases" <| + Test.specify "materialize should respect the overridden type" <| + t0 = table_builder [["x", [False, True, False]], ["A", ["a", "b", "c"]], ["B", ["xyz", "abc", "def"]]] + t1 = t0 . cast "A" (Value_Type.Char size=1 variable_length=False) . cast "B" (Value_Type.Char size=3 variable_length=False) + + x = t1.at "x" + a = t1.at "A" + b = t1.at "B" + a.value_type.should_equal (Value_Type.Char size=1 variable_length=False) + b.value_type.should_equal (Value_Type.Char size=3 variable_length=False) + + c = x.iif a b + c.to_vector.should_equal ["xyz", "b", "def"] + Test.with_clue "c.value_type="+c.value_type.to_display_text+": " <| + c.value_type.variable_length.should_be_true + + d = materialize c + d.to_vector.should_equal ["xyz", "b", "def"] + Test.with_clue "d.value_type="+d.value_type.to_display_text+": " <| + d.value_type.variable_length.should_be_true + + run_tests connection db_name = prefix = "[PostgreSQL] " name_counter = Ref.new 0 @@ -179,7 +203,6 @@ run_tests connection db_name = materialize = .read Common_Spec.spec prefix connection - postgres_specific_spec connection db_name common_selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=True order_by_unicode_normalization_by_default=True take_drop=False allows_mixed_type_comparisons=False fixed_length_text_columns=True aggregate_selection = Common_Table_Operations.Aggregate_Spec.Test_Selection.Config first_last_row_order=False aggregation_problems=False @@ -190,6 +213,7 @@ run_tests connection db_name = tables.append empty_agg_table.name setup = Common_Table_Operations.Main.Test_Setup.Config prefix agg_table empty_agg_table table_builder materialize is_database=True test_selection=common_selection aggregate_test_selection=aggregate_selection + postgres_specific_spec connection db_name setup Common_Table_Operations.Main.spec setup clean_tables tables.to_vector diff --git a/test/Table_Tests/src/Database/Types/Postgres_Type_Mapping_Spec.enso b/test/Table_Tests/src/Database/Types/Postgres_Type_Mapping_Spec.enso index 0e03fe4b6d9e..b5e9c8890d36 100644 --- a/test/Table_Tests/src/Database/Types/Postgres_Type_Mapping_Spec.enso +++ b/test/Table_Tests/src/Database/Types/Postgres_Type_Mapping_Spec.enso @@ -2,6 +2,7 @@ from Standard.Base import all from Standard.Table import Aggregate_Column, Value_Type import Standard.Table.Data.Type.Value_Type.Bits +from Standard.Table.Errors import Inexact_Type_Coercion from Standard.Database import SQL_Query @@ -123,13 +124,13 @@ spec connection db_name = t2.at "b" . value_type . should_be_a (Value_Type.Unsupported_Data_Type ...) t2.at "c" . value_type . should_be_a (Value_Type.Unsupported_Data_Type ...) - Test.specify "should approximate types to the closest supported one" pending="TODO: Table.cast" <| - # TODO this will be tested once the cast operator is implemented - # Binary 10 variable_length=False -> Binary max_int4 variable_length=True - # Byte -> Integer Bits.Bits_16 - Nothing + Test.specify "should approximate types to the closest supported one" <| + t = make_table "T" [["b", "INT"]] + t2 = t.cast "b" Value_Type.Byte + t2.at "b" . value_type . should_equal (Value_Type.Integer Bits.Bits_16) + Problems.expect_warning Inexact_Type_Coercion t2 main = Test_Suite.run_main (test_with_connection spec) max_int4 = 2147483647 -default_text = Value_Type.Char size=max_int4 variable_length=True +default_text = Value_Type.Char size=Nothing variable_length=True diff --git a/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso b/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso index ec43cb2af77f..28f6ec646ad5 100644 --- a/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso +++ b/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso @@ -2,7 +2,7 @@ from Standard.Base import all import Standard.Table.Data.Type.Value_Type.Bits from Standard.Table import Aggregate_Column, Value_Type -from Standard.Table.Errors import Invalid_Value_Type +from Standard.Table.Errors import Invalid_Value_Type, Inexact_Type_Coercion from Standard.Database import Database, SQLite, In_Memory, SQL_Query @@ -25,7 +25,7 @@ spec = t.at "int" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) t.at "real" . value_type . should_equal (Value_Type.Float Bits.Bits_64) t.at "text" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True) - t.at "blob" . value_type . should_equal Value_Type.Mixed + t.at "blob" . value_type . should_equal Value_Type.Binary # We treat numeric as a float, since that is what really sits in SQLite under the hood. t.at "numeric" . value_type . should_equal (Value_Type.Float Bits.Bits_64) @@ -33,7 +33,7 @@ spec = t = make_table "complex" [["a", "VARCHAR(15)"], ["b", "CHAR(10)"], ["c", "BINARY(10)"], ["d", "BIGINT"], ["e", "SMALLINT"], ["f", "TINYINT"], ["g", "FLOAT"], ["h", "DOUBLE"]] t.at "a" . value_type . should_equal Value_Type.Char t.at "b" . value_type . should_equal Value_Type.Char - t.at "c" . value_type . should_equal Value_Type.Mixed + t.at "c" . value_type . should_equal Value_Type.Binary t.at "d" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) t.at "e" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) t.at "f" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) @@ -50,8 +50,8 @@ spec = c2 = t.compute "[a] && False" c2 . value_type . should_equal Value_Type.Boolean - #c3 = t.compute "[a] && 10" - #c3.should_fail_with Invalid_Value_Type + c3 = t.compute "[a] && 10" + c3.should_fail_with Invalid_Value_Type Test.specify "should correctly handle types through operations" <| t = make_table "foo" [["a", "int"], ["b", "text"], ["c", "boolean"], ["d", "double precision"]] @@ -80,10 +80,15 @@ spec = # First is not currently implemented in SQLite # t2.at "First c" . value_type . should_equal Value_Type.Boolean - Test.specify "should approximate types to the closest supported one" pending="TODO: Table.cast" <| - # TODO this will be tested once the cast operator is implemented - # Binary 10 variable_length=False -> Binary max_int4 variable_length=True - # Byte -> Integer Bits.Bits_16 - Nothing + Test.specify "should approximate types to the closest supported one" <| + t = make_table "T" [["a", "BINARY"], ["b", "INT"]] + + t1 = t.cast "a" (Value_Type.Binary 10 variable_length=True) + t1.at "a" . value_type . should_equal Value_Type.Binary + Problems.expect_warning Inexact_Type_Coercion t1 + + t2 = t.cast "b" Value_Type.Byte + t2.at "b" . value_type . should_equal Value_Type.Integer + Problems.expect_warning Inexact_Type_Coercion t2 main = Test_Suite.run_main spec diff --git a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso index 937d7b4f7789..918b896affae 100644 --- a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso +++ b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso @@ -368,7 +368,7 @@ spec = r1 = t1.parse columns=["A", "B", "C"] r1.should_fail_with Invalid_Value_Type r1.catch.related_column . should_equal "A" - r1.catch.expected.is_text.should_be_true + r1.catch.expected.should_equal "Char" Test.specify "should error if no input columns selected, unless error_on_missing_columns=False" <| t1 = Table.new [["A", ["1", "2", "3"]]] @@ -619,7 +619,7 @@ spec = r1 = c1.parse r1.should_fail_with Invalid_Value_Type r1.catch.related_column . should_equal "A" - r1.catch.expected.is_text.should_be_true + r1.catch.expected . should_equal "Char" main = Test_Suite.run_main spec diff --git a/test/Table_Tests/src/Helpers/Value_Type_Spec.enso b/test/Table_Tests/src/Helpers/Value_Type_Spec.enso index e21101bba6e5..876018133e95 100644 --- a/test/Table_Tests/src/Helpers/Value_Type_Spec.enso +++ b/test/Table_Tests/src/Helpers/Value_Type_Spec.enso @@ -30,17 +30,14 @@ spec = Test.specify "should use correct in-memory logic to reconcile pairs of types for operations like union/iif" <| Value_Type_Helpers.reconcile_types Value_Type.Boolean Value_Type.Boolean . should_equal Value_Type.Boolean - Value_Type_Helpers.reconcile_types Value_Type.Boolean Value_Type.Integer . should_equal Value_Type.Integer Value_Type_Helpers.reconcile_types (Value_Type.Integer Bits.Bits_16) (Value_Type.Integer Bits.Bits_32) . should_equal (Value_Type.Integer Bits.Bits_32) Value_Type_Helpers.reconcile_types (Value_Type.Float Bits.Bits_32) (Value_Type.Float Bits.Bits_32) . should_equal (Value_Type.Float Bits.Bits_32) Value_Type_Helpers.reconcile_types (Value_Type.Float Bits.Bits_32) (Value_Type.Float Bits.Bits_64) . should_equal (Value_Type.Float Bits.Bits_64) - Value_Type_Helpers.reconcile_types Value_Type.Boolean Value_Type.Byte . should_equal Value_Type.Byte Value_Type_Helpers.reconcile_types (Value_Type.Integer Bits.Bits_16) Value_Type.Byte . should_equal (Value_Type.Integer Bits.Bits_16) # 64-bit floats are always used when unifying with integers Value_Type_Helpers.reconcile_types (Value_Type.Float Bits.Bits_32) Value_Type.Byte . should_equal Value_Type.Float - Value_Type_Helpers.reconcile_types (Value_Type.Float Bits.Bits_32) Value_Type.Boolean . should_equal Value_Type.Float Value_Type_Helpers.reconcile_types (Value_Type.Char 10 False) (Value_Type.Char 10 False) . should_equal (Value_Type.Char 10 False) Value_Type_Helpers.reconcile_types (Value_Type.Char 10 False) (Value_Type.Char 10 True) . should_equal (Value_Type.Char 10 True) @@ -61,5 +58,8 @@ spec = Value_Type_Helpers.reconcile_types Value_Type.Float Value_Type.Binary . should_equal Value_Type.Mixed Value_Type_Helpers.reconcile_types Value_Type.Char Value_Type.Binary . should_equal Value_Type.Mixed Value_Type_Helpers.reconcile_types Value_Type.Char Value_Type.Boolean . should_equal Value_Type.Mixed + Value_Type_Helpers.reconcile_types Value_Type.Boolean Value_Type.Integer . should_equal Value_Type.Mixed + Value_Type_Helpers.reconcile_types Value_Type.Boolean Value_Type.Byte . should_equal Value_Type.Mixed + Value_Type_Helpers.reconcile_types (Value_Type.Float Bits.Bits_32) Value_Type.Boolean . should_equal Value_Type.Mixed main = Test_Suite.run_main spec diff --git a/test/Table_Tests/src/In_Memory/Table_Spec.enso b/test/Table_Tests/src/In_Memory/Table_Spec.enso index 4412fb2760b3..fb5d3d2cb470 100644 --- a/test/Table_Tests/src/In_Memory/Table_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Table_Spec.enso @@ -85,6 +85,9 @@ spec = Test.specify "should handle error scenarios gracefully" <| Table.new [["X", [1,2,3]], ["Y", [4]]] . should_fail_with Illegal_Argument Table.new [["X", [1]], ["X", [2]]] . should_fail_with Illegal_Argument + Table.new [["X", [1]], ["Y", [2], "Z"]] . should_fail_with Illegal_Argument + Table.new [["X"]] . should_fail_with Illegal_Argument + Table.new ["X", "Y", "Z"] . should_fail_with Illegal_Argument Table.new [] . should_fail_with Illegal_Argument Table.from_rows ["X", "X"] [] . should_fail_with Illegal_Argument @@ -193,7 +196,7 @@ spec = x = Column.from_vector 'a' [1, Nothing, 2, Nothing] y = Column.from_vector 'b' [0, 0, 0, 0] - r = x.zip y skip_missing=False x-> y-> if x == 1 then x else y + r = x.zip y skip_nothing=False x-> y-> if x == 1 then x else y r.to_vector.should_equal [1, 0, 0, 0] r.value_type . should_equal Value_Type.Integer @@ -287,61 +290,12 @@ spec = r.at "b" . to_vector . should_equal [2] r.at "c" . to_vector . should_equal [Nothing] - Test.group "Filling Missing Values" <| - Test.specify "should coerce non-coercible types to Object" <| - strs = Column.from_vector 'x' ["a", Nothing, "b", Nothing] - strs_filled = strs.fill_nothing False - strs_filled.to_vector . should_equal ["a", False, "b", False] - strs_filled.value_type . should_equal Value_Type.Mixed - - ints = Column.from_vector 'x' [1, Nothing, 2, Nothing] - ints_filled = ints.fill_nothing "X" - ints_filled.to_vector . should_equal [1, "X", 2, "X"] - ints_filled.value_type . should_equal Value_Type.Mixed - - bools = Column.from_vector 'x' [True, False, Nothing] - bools_filled = bools.fill_nothing "X" - bools_filled.to_vector . should_equal [True, False, "X"] - bools_filled.value_type . should_equal Value_Type.Mixed - - Test.specify "should coerce long and double types to double" <| - ints = Column.from_vector 'x' [1, Nothing, 2, Nothing] - ints_filled = ints.fill_nothing 0.5 - ints_filled.to_vector . should_equal [1.0, 0.5, 2.0, 0.5] - ints_filled.value_type . should_equal Value_Type.Float - - decimals = Column.from_vector 'x' [0.5, Nothing, Nothing, 0.25] - decimals_filled = decimals.fill_nothing 42 - decimals_filled.to_vector . should_equal [0.5, 42.0, 42.0, 0.25] - decimals_filled.value_type . should_equal Value_Type.Float - - Test.specify "should keep String, Boolean, Long and Double type" <| - strs = Column.from_vector 'x' ["a", Nothing, "b", Nothing] - strs_filled = strs.fill_nothing "X" - strs_filled.to_vector . should_equal ["a", "X", "b", "X"] - strs_filled.value_type . should_equal Value_Type.Char - - bools = Column.from_vector 'x' [True, False, Nothing] - bools_filled = bools.fill_nothing False - bools_filled.to_vector . should_equal [True, False, False] - bools_filled.value_type . should_equal Value_Type.Boolean - - ints = Column.from_vector 'x' [1, Nothing, 2, Nothing] - ints_filled = ints.fill_nothing 42 - ints_filled.to_vector . should_equal [1, 42, 2, 42] - ints_filled.value_type . should_equal Value_Type.Integer - - decimals = Column.from_vector 'x' [0.5, Nothing, Nothing, 0.25] - decimals_filled = decimals.fill_nothing 1.0 - decimals_filled.to_vector . should_equal [0.5, 1.0, 1.0, 0.25] - decimals_filled.value_type . should_equal Value_Type.Float - Test.group "Counting Values" <| Test.specify "should count missing and non-missing values" <| col = Column.from_vector 'x' [1, Nothing, 2] col.length . should_equal 3 col.count . should_equal 2 - col.count_missing . should_equal 1 + col.count_nothing . should_equal 1 Test.group "Dropping Missing Values" <| Test.specify "should correctly handle NaNs with mixed type columns" <| @@ -350,6 +304,11 @@ spec = t1.at "X" . to_vector . should_equal [1, 4, 5] # Comparing text value because `Number.nan != Number.nan`. t1.at "Y" . to_vector . to_text . should_equal "[A, NaN, 0]" + + c = t.at "Y" . is_blank treat_nans_as_blank=True + c.to_vector . should_equal [False, True, True, True, False] + c.value_type . should_equal Value_Type.Boolean + t2 = t.filter_blank_rows when_any=True treat_nans_as_blank=True t2.at "X" . to_vector . should_equal [1, 5] t2.at "Y" . to_vector . should_equal ['A', 0] @@ -819,7 +778,10 @@ spec = False -> True Nothing -> Nothing negated_column_vector = column_vector.map not - t = Table.new [["X", column_vector]] + ## A workaround to ensure that X has Boolean type. + It can be removed with + t0 = Table.new [["X", [True]+column_vector]] + t = t0.drop 1 in_column = Column.from_vector "in" in_vector expected_vector = column_vector.filter (Filter_Condition.Is_In in_vector)