diff --git a/CHANGELOG.md b/CHANGELOG.md index 22f92e317649..cde693bece0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -534,6 +534,7 @@ - [Improving date/time support in Table - added `date_diff`, `date_add`, `date_part` and some shorthands. Extended `Time_Period` with milli-, micro- and nanosecond periods.][7221] +- [Implemented `replace` on database columns.][7275] - [Retire `Column_Selector` and allow regex based selection of columns.][7295] - [`Text.parse_to_table` can take a `Regex`.][7297] @@ -765,6 +766,7 @@ [7223]: https://github.com/enso-org/enso/pull/7223 [7234]: https://github.com/enso-org/enso/pull/7234 [7221]: https://github.com/enso-org/enso/pull/7221 +[7275]: https://github.com/enso-org/enso/pull/7275 [7295]: https://github.com/enso-org/enso/pull/7295 [7297]: https://github.com/enso-org/enso/pull/7297 diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso index 5460af176689..48c4baac383c 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso @@ -1,3 +1,4 @@ +import project.Any.Any import project.Data.Locale.Locale import project.Data.Text.Regex.Regex import project.Data.Text.Text @@ -60,3 +61,12 @@ type Case_Sensitivity to_explicit_sensitivity_in_memory self = case self of Case_Sensitivity.Default -> Case_Sensitivity.Sensitive _ -> self + + ## PRIVATE + Throws an error if self is Insensitive with a non-default locale + disallow_non_default_locale : Any -> Any + disallow_non_default_locale self ~action = case self of + Case_Sensitivity.Insensitive locale -> if locale == Locale.default then action else + msg = "Custom locales are not supported for this operation." + Error.throw (Illegal_Argument.Error msg) + _ -> action diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso index 8db25d7c4782..e6d27666ee4d 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso @@ -392,6 +392,12 @@ type Regex if should_recompile.not then self else Regex.compile self.internal_regex_object.pattern case_insensitive + ## PRIVATE + + Get the original pattern string as a `Text`. + pattern_string : Text + pattern_string self = self.internal_regex_object.pattern + ## PRIVATE Convert the polyglot map to a Map. polyglot_map_to_map : Any -> Map Any Any diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 77be994d5de6..7c722eb19106 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -23,6 +23,7 @@ import project.Internal.IR.Context.Context import project.Internal.IR.Internal_Column.Internal_Column import project.Internal.IR.Query.Query import project.Internal.IR.SQL_Expression.SQL_Expression +import project.Internal.Replace_Params.Replace_Params import project.Internal.SQL_Type_Reference.SQL_Type_Reference from project.Data.Table import freshen_columns, Table from project.Errors import Integrity_Error, Unsupported_Database_Operation, Unsupported_Name @@ -1125,6 +1126,40 @@ type Column sensitively. - only_first: If True, only replace the first match. + ! Backend Support + + Each database backend supports different combinations of options: + + Text: + +----------------+------------+----------+--------+ + | case_sensitive | only_first | postgres | sqlite | + +----------------+------------+----------+--------+ + | t | f | ✓ | ✓ | + | t | t | ✓ | ✓ | + | f | f | ✓ | ✗ | + | f | t | ✓ | ✓ | + +----------------+------------+----------+--------+ + + Regex: + +----------------+------------+----------+--------+ + | case_sensitive | only_first | postgres | sqlite | + +----------------+------------+----------+--------+ + | t | f | ✓ | ✗ | + | t | t | ✓ | ✗ | + | f | f | ✓ | ✗ | + | f | t | ✓ | ✗ | + +----------------+------------+----------+--------+ + + Text Column: + +----------------+------------+----------+--------+ + | case_sensitive | only_first | postgres | sqlite | + +----------------+------------+----------+--------+ + | t | f | ✓ | ✓ | + | t | t | ✗ | ✓ | + | f | f | ✗ | ✗ | + | f | t | ✗ | ✓ | + +----------------+------------+----------+--------+ + > Example Replace dashes with underscores. @@ -1140,11 +1175,17 @@ type Column column.replace '"(.*?)"'.to_regex '($1)' @term make_regex_text_widget - replace : Text | Regex | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Column - replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False = - _ = [term, new_text, case_sensitivity, only_first] - msg = "`Column.replace` is not yet implemented." - Error.throw (Unsupported_Database_Operation.Error msg) + replace : Text | Regex | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Column ! Unsupported_Database_Operation + replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Default only_first=False = + Value_Type.expect_text self <| case_sensitivity.disallow_non_default_locale <| + input_type = Meta.type_of term + params = Replace_Params.Value input_type case_sensitivity only_first + self.connection.dialect.if_replace_params_supports params <| + raw_term = case term of + _ : Regex -> term.pattern_string + _ -> term + new_name = self.naming_helpers.function_name "replace" [self, raw_term, new_text] + self.make_op "REPLACE" [raw_term, new_text] new_name [term, params] ## Gets the year as a number from the date stored in the column. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso index b2683b79f345..524731be16f0 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso @@ -19,6 +19,7 @@ import project.Internal.IR.Order_Descriptor.Order_Descriptor import project.Internal.IR.Query.Query import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.Postgres.Postgres_Dialect +import project.Internal.Replace_Params.Replace_Params import project.Internal.SQL_Type_Mapping.SQL_Type_Mapping import project.Internal.SQL_Type_Reference.SQL_Type_Reference import project.Internal.SQLite.SQLite_Dialect @@ -231,6 +232,13 @@ type Dialect _ = [period, operation_input_type] Unimplemented.throw "This is an interface only." + ## PRVIATE + Returns true if the `replace` parameters are supported by this backend. + if_replace_params_supports : Replace_Params -> Any -> Any + if_replace_params_supports self replace_params ~action = + _ = [replace_params, action] + Unimplemented.throw "This is an interface only." + ## PRIVATE The dialect of SQLite databases. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index e810fd4699f6..3513d93e4673 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -6,11 +6,13 @@ import Standard.Base.Errors.Unimplemented.Unimplemented import Standard.Table.Data.Aggregate_Column.Aggregate_Column import Standard.Table.Internal.Naming_Helpers.Naming_Helpers import Standard.Table.Internal.Problem_Builder.Problem_Builder +import Standard.Table.Internal.Vector_Builder.Vector_Builder from Standard.Table import Value_Type from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all from Standard.Table.Errors import Inexact_Type_Coercion import project.Connection.Connection.Connection +import project.Data.Column.Column import project.Data.Dialect import project.Data.SQL.Builder import project.Data.SQL.SQL_Fragment @@ -33,6 +35,7 @@ import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind import project.Internal.Postgres.Postgres_Error_Mapper.Postgres_Error_Mapper import project.Internal.Postgres.Postgres_Type_Mapping.Postgres_Type_Mapping +import project.Internal.Replace_Params.Replace_Params import project.Internal.SQL_Type_Mapping.SQL_Type_Mapping import project.Internal.SQL_Type_Reference.SQL_Type_Reference import project.Internal.Statement_Setter.Statement_Setter @@ -239,10 +242,16 @@ type Postgres_Dialect _ -> Date_Period_Metadata.Value period operation_input_type + ## PRVIATE + Returns true if the `replace` parameters are supported by this backend. + if_replace_params_supports : Replace_Params -> Any -> Any + if_replace_params_supports self replace_params ~action = + if supported_replace_params.contains replace_params then action else replace_params.throw_unsupported + ## PRIVATE make_internal_generator_dialect = cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]] - text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive]+concat_ops+cases+trim_ops + text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive, ["REPLACE", replace]]+concat_ops+cases+trim_ops counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]] arith_extensions = [is_nan, is_inf, floating_point_div, mod_op, decimal_div, decimal_mod, ["ROW_MIN", Base_Generator.make_function "LEAST"], ["ROW_MAX", Base_Generator.make_function "GREATEST"]] bool = [bool_or] @@ -492,6 +501,67 @@ decimal_div = Base_Generator.lift_binary_op "DECIMAL_DIV" x-> y-> decimal_mod = Base_Generator.lift_binary_op "DECIMAL_MOD" x-> y-> x ++ " - FLOOR(CAST(" ++ x ++ " AS decimal) / CAST(" ++ y ++ " AS decimal)) * " ++ y +## PRIVATE +supported_replace_params : Set Replace_Params +supported_replace_params = + e0 = [Replace_Params.Value Text Case_Sensitivity.Default False, Replace_Params.Value Text Case_Sensitivity.Default True, Replace_Params.Value Text Case_Sensitivity.Sensitive False] + e1 = [Replace_Params.Value Text Case_Sensitivity.Sensitive True, Replace_Params.Value Text Case_Sensitivity.Insensitive False, Replace_Params.Value Text Case_Sensitivity.Insensitive True] + e2 = [Replace_Params.Value Regex Case_Sensitivity.Default False, Replace_Params.Value Regex Case_Sensitivity.Default True, Replace_Params.Value Regex Case_Sensitivity.Sensitive False] + e3 = [Replace_Params.Value Regex Case_Sensitivity.Sensitive True, Replace_Params.Value Regex Case_Sensitivity.Insensitive False, Replace_Params.Value Regex Case_Sensitivity.Insensitive True] + e4 = [Replace_Params.Value Column Case_Sensitivity.Default False, Replace_Params.Value Column Case_Sensitivity.Sensitive False] + Set.from_vector <| e0 + e1 + e2 + e3 + e4 + +replace : Vector Builder -> Any -> Builder +replace args metadata = + input = args.at 0 + pattern = args.at 1 + replacement = args.at 2 + + ## `raw_pattern` is a `Text1 or `Regex`; it's the same value as `input`, but not + embedded in IR. + raw_pattern = metadata.at 0 + replace_params = metadata.at 1 + + expression = case replace_params.input_type of + Text -> + ## To use REGEXP_REPLACE on a non-regex, we have to escape it. + escaped_pattern = Builder.interpolation (Regex.escape raw_pattern) + case replace_params.only_first of + False -> case replace_params.case_sensitivity of + Case_Sensitivity.Insensitive _ -> + Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ escaped_pattern ++ ", " ++ replacement ++ ", 'ig')" + _ -> + Builder.code "REPLACE(" ++ input ++ ", " ++ pattern ++ ", " ++ replacement ++ ")" + True -> case replace_params.case_sensitivity of + Case_Sensitivity.Insensitive _ -> + Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ escaped_pattern ++ ", " ++ replacement ++ ", 'i')" + _ -> + Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ escaped_pattern ++ ", " ++ replacement ++ ")" + Regex -> + pattern_string = Builder.interpolation raw_pattern.pattern_string + case replace_params.only_first of + False -> case replace_params.case_sensitivity of + Case_Sensitivity.Insensitive _ -> + Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ pattern_string ++ ", " ++ replacement ++ ", 'ig')" + _ -> + Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ pattern_string ++ ", " ++ replacement ++ ", 'g')" + True -> case replace_params.case_sensitivity of + Case_Sensitivity.Insensitive _ -> + Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ pattern_string ++ ", " ++ replacement ++ ", 'i')" + _ -> + Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ pattern_string ++ ", " ++ replacement ++ ")" + Column -> + case replace_params.only_first of + False -> case replace_params.case_sensitivity of + Case_Sensitivity.Insensitive _ -> + Nothing + _ -> + Builder.code "REPLACE(" ++ input ++ ", " ++ pattern ++ ", " ++ replacement ++ ")" + True -> Nothing + case expression of + Nothing -> replace_params.throw_unsupported + _ -> expression + ## PRIVATE make_extract_as_int enso_name sql_name=enso_name = Base_Generator.lift_unary_op enso_name arg-> diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Replace_Params.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Replace_Params.enso new file mode 100644 index 000000000000..1456f25bade8 --- /dev/null +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Replace_Params.enso @@ -0,0 +1,16 @@ +import Standard.Base.Data.Boolean.Boolean +import Standard.Base.Error.Error +import Standard.Base.Nothing.Nothing +import Standard.Base.Data.Text.Case_Sensitivity.Case_Sensitivity + +from project.Errors import Unsupported_Database_Operation + +# Specifies a set of parameters to Table.replace +type Replace_Params + Value input_type (case_sensitivity : Case_Sensitivity) (only_first : Boolean) + + ## Raise an exception for an unsupported combination of parameters + throw_unsupported : Nothing ! Unsupported_Database_Operation + throw_unsupported self = + msg = "The REPLACE operation is not supported on PostgreSQL for the following options: argument type " + self.input_type.to_text + ", case_sensitivity " + self.case_sensitivity.to_display_text + ", only_first " + self.only_first.to_text + Error.throw (Unsupported_Database_Operation.Error msg) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso index f773cf204666..1ff1affbccec 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso @@ -10,6 +10,7 @@ from Standard.Table import Value_Type from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all import project.Connection.Connection.Connection +import project.Data.Column.Column import project.Data.Dialect import project.Data.SQL.Builder import project.Data.SQL_Statement.SQL_Statement @@ -28,6 +29,7 @@ import project.Internal.IR.Order_Descriptor.Order_Descriptor import project.Internal.IR.Query.Query import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind +import project.Internal.Replace_Params.Replace_Params import project.Internal.SQL_Type_Mapping.SQL_Type_Mapping import project.Internal.SQL_Type_Reference.SQL_Type_Reference import project.Internal.SQLite.SQLite_Error_Mapper.SQLite_Error_Mapper @@ -265,9 +267,15 @@ type SQLite_Dialect _ = [period, operation_input_type] Error.throw (Unsupported_Database_Operation.Error "SQLite backend does not support date/time operations.") + ## PRVIATE + Returns true if the `replace` parameters are suppoerted by this backend. + if_replace_params_supports : Replace_Params -> Any -> Any + if_replace_params_supports self replace_params ~action = + if supported_replace_params.contains replace_params then action else replace_params.throw_unsupported + ## PRIVATE make_internal_generator_dialect = - text = [starts_with, contains, ends_with, make_case_sensitive]+concat_ops+trim_ops + text = [starts_with, contains, ends_with, make_case_sensitive, ["REPLACE", replace]]+concat_ops+trim_ops counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]] stats = [agg_stddev_pop, agg_stddev_samp] arith_extensions = [is_inf, floating_point_div, mod_op] @@ -405,6 +413,47 @@ floating_point_div = Base_Generator.lift_binary_op "/" x-> y-> mod_op = Base_Generator.lift_binary_op "mod" x-> y-> x ++ " - FLOOR(CAST(" ++ x ++ " AS REAL) / CAST(" ++ y ++ " AS REAL)) * " ++ y +## PRIVATE +supported_replace_params : Set Replace_Params +supported_replace_params = + e = [Replace_Params.Value Text Case_Sensitivity.Default False, Replace_Params.Value Text Case_Sensitivity.Sensitive False, Replace_Params.Value Text Case_Sensitivity.Default True, Replace_Params.Value Text Case_Sensitivity.Sensitive True, Replace_Params.Value Text Case_Sensitivity.Insensitive True] + Set.from_vector e + +replace : Vector Builder -> Any -> Builder +replace args metadata = + input = args.at 0 + pattern = args.at 1 + replacement = args.at 2 + + replace_params = metadata.at 1 + + expression = case replace_params.input_type == Text || replace_params.input_type == Column of + True -> + ## To use REGEXP_REPLACE on a non-regex, we have to escape it. + case replace_params.only_first of + False -> case replace_params.case_sensitivity of + Case_Sensitivity.Insensitive _ -> Nothing + _ -> + Builder.code "REPLACE(" ++ input ++ ", " ++ pattern ++ ", " ++ replacement ++ ")" + True -> case replace_params.case_sensitivity of + Case_Sensitivity.Insensitive _ -> + replace_only_first False input pattern replacement + _ -> + replace_only_first True input pattern replacement + False -> Nothing + case expression of + Nothing -> replace_params.throw_unsupported + _ -> expression + +replace_only_first case_sensitive t p r = + search_string = if case_sensitive then t else + Builder.code "LOWER(" ++ t ++ ")" + instr = Builder.code "INSTR(" ++ search_string ++ ", " ++ p ++ ")" + prefix = Builder.code "SUBSTR(" ++ t ++ ", 1," ++ instr ++ "-1)" + suffix = Builder.code "SUBSTR(" ++ t ++ "," ++ instr ++ "+LENGTH(" ++ p ++ "))" + concatenation = prefix ++ " || " ++ r ++ " || " ++ suffix + Builder.code "CASE WHEN " ++ instr ++ "= 0 THEN " ++ t ++ " ELSE " ++ concatenation ++ "END" + ## PRIVATE It will return `Nothing` if the type does not require custom logic. make_custom_cast column target_value_type type_mapping = diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso index 7faff3c808e4..a87f5c199eef 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso @@ -181,7 +181,7 @@ operations_map = always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR", "IS_INF"] always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP", "ROUND"] - always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM"] + always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM", "REPLACE"] always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER"] same_as_first = ["TRUNCATE", "CEIL", "FLOOR"] arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"] diff --git a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso index a584bc1b1a24..2a5094c6e95a 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso @@ -1,6 +1,10 @@ from Standard.Base import all + import Standard.Base.Errors.Common.Arithmetic_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument +import Standard.Base.Meta.Type +import Standard.Database.Data.Column.Column +import Standard.Database.Internal.Replace_Params.Replace_Params from Standard.Table import Value_Type from Standard.Table.Data.Type.Value_Type import Bits @@ -1038,6 +1042,103 @@ spec setup = op a True . should_fail_with Invalid_Value_Type Test.group prefix+"Column Operations - Text Replace" <| + do_replace column term new_text case_sensitivity=Case_Sensitivity.Default only_first=False expected = + case setup.is_database of + True -> + input_type = Meta.type_of term + params = Replace_Params.Value input_type case_sensitivity only_first + supported_replace_params = setup.test_selection.supported_replace_params + supported_replace_params . should_be_a Set + are_params_supported = supported_replace_params.contains params + case are_params_supported of + True -> column.replace term new_text case_sensitivity only_first . to_vector . should_equal expected + False -> column.replace term new_text case_sensitivity only_first . should_fail_with Unsupported_Database_Operation + False -> + column.replace term new_text case_sensitivity only_first . to_vector . should_equal expected + + Test.group prefix+"replace: literal text pattern and replacement" <| + col0 = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO']]] . at "x" + col1 = table_builder [["x", ['a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']]] . at "x" + + Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=false" + do_replace col0 'hello' 'bye' expected=['bye Hello', 'bye bye', 'HELLO HELLO'] + do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Default expected=['bye Hello', 'bye bye', 'HELLO HELLO'] + do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Sensitive expected=['bye Hello', 'bye bye', 'HELLO HELLO'] + do_replace col1 'a[bcd]' 'hey' expected=['hey A[bCd] hey', 'abac ad Ab aCAd'] + + Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=true" + do_replace col0 'hello' 'bye' only_first=True expected=['bye Hello', 'bye hello', 'HELLO HELLO'] + do_replace col1 'a[bcd]' 'hey' only_first=True expected=['hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] + + Test.specify "case_sensitivity=insensitive use_regex=false only_first=false" + do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Insensitive expected=['bye bye', 'bye bye', 'bye bye'] + do_replace col1 'a[bcd]' 'hey' case_sensitivity=Case_Sensitivity.Insensitive expected=['hey hey hey', 'abac ad Ab aCAd'] + + Test.specify "case_sensitivity=insensitive use_regex=false only_first=true" + do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['bye Hello', 'bye hello', 'bye HELLO'] + do_replace col1 'a[bcd]' 'hey' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] + + Test.group prefix+"replace: literal regex pattern and replacement" <| + col1 = table_builder [["x", ['a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']]] . at "x" + + Test.specify "case_sensitivity=sensitive/default use_regex=True only_first=false" + do_replace col1 'a[bcd]'.to_regex 'hey' expected=['a[bcd] A[bCd] a[bcd]', 'heyhey hey Ab aCAd'] + + Test.specify "case_sensitivity=sensitive/default use_regex=True only_first=true" + do_replace col1 'a[bcd]'.to_regex 'hey' only_first=True expected=['a[bcd] A[bCd] a[bcd]', 'heyac ad Ab aCAd'] + + Test.specify "case_sensitivity=insensitive use_regex=True only_first=false" + do_replace col1 'a[bcd]'.to_regex 'hey' case_sensitivity=Case_Sensitivity.Insensitive expected=['a[bcd] A[bCd] a[bcd]', 'heyhey hey hey heyhey'] + + Test.specify "case_sensitivity=insensitive use_regex=True only_first=true" + do_replace col1 'a[bcd]'.to_regex 'hey' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['a[bcd] A[bCd] a[bcd]', 'heyac ad Ab aCAd'] + + Test.specify "can properly escape complex regexes" <| + regex = "^([^\(]+)|(?\w\d[a-z])+$" + col = table_builder [["x", [regex]]] . at "x" + do_replace col regex "asdf" ["asdf"] + + Test.group prefix+"replace: pattern and replacement columns" <| + table = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO', 'a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']], ["patterns", ['hello', 'hello', 'hello', 'a[bcd]', 'a[bcd]']], ["replacements", ['bye', 'bye', 'bye', 'hey', 'hey']]] + col = table.at "x" + patterns = table.at "patterns" + replacements = table.at "replacements" + + Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=false" + do_replace col patterns replacements expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd'] + do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Default expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd'] + do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Sensitive expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd'] + + Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=true" + do_replace col patterns replacements only_first=True expected=['bye Hello', 'bye hello', 'HELLO HELLO', 'hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] + + Test.specify "case_sensitivity=insensitive use_regex=false only_first=false" + do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Insensitive expected=['bye bye', 'bye bye', 'bye bye', 'hey hey hey', 'abac ad Ab aCAd'] + + Test.specify "case_sensitivity=insensitive use_regex=false only_first=true" + do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['bye Hello', 'bye hello', 'bye HELLO', 'hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] + + if setup.is_database then + col = table_builder [["A", ["Alpha", "Bravo", "Charlie", "Delta", "Echo", "Foxtrot"]]] . at 'A' + Test.specify "should not allow Case_Sensitivity.Insensitive with a non-default locale" <| + locale = Locale.new "en" "GB" "UTF-8" + col.replace 'asdf' 'zxcv' case_sensitivity=(Case_Sensitivity.Insensitive locale) . should_fail_with Illegal_Argument + + Test.specify "column name" <| + table = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO', 'a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']], ["patterns", ['hello', 'hello', 'hello', 'a[bcd]', 'a[bcd]']], ["replacements", ['bye', 'bye', 'bye', 'hey', 'hey']]] + col = table.at "x" + patterns = table.at "patterns" + replacements = table.at "replacements" + + supported_replace_params = setup.test_selection.supported_replace_params + if supported_replace_params.contains (Replace_Params.Value Text Case_Sensitivity.Default False) then + col.replace 'hello' 'bye' . name . should_equal 'replace([x], \'hello\', \'bye\')' + if supported_replace_params.contains (Replace_Params.Value Regex Case_Sensitivity.Default False) then + col.replace 'a[bcd]'.to_regex 'hey' . name . should_equal 'replace([x], \'a[bcd]\', \'hey\')' + if supported_replace_params.contains (Replace_Params.Value Column Case_Sensitivity.Default False) then + col.replace patterns replacements . name . should_equal 'replace([x], [patterns], [replacements])' + + Test.group prefix+"Column Operations - Text Replace (in-memory only)" <| if setup.is_database.not then t4 = table_builder [["A", ["Alpha", "Bravo", "Charlie", "Delta", "Echo", "Foxtrot"]], ["B", ["A","O","a","E","o","O"]], ["C", [1,2,3,4,5,6]], ["D", ['',Nothing,'',Nothing,'','']]] a = t4.at "A" @@ -1059,6 +1160,22 @@ spec setup = a.replace "[aeiou]".to_regex "#" . to_vector . should_equal ["Alph#", "Br#v#", "Ch#rl##", "D#lt#", "Ech#", "F#xtr#t"] a.replace "([aeiou])(.*?)[aeiou]".to_regex "$1$2$1" . to_vector . should_equal ["Alpha", "Brava", "Charlae", "Delte", "Echo", "Foxtrot"] + Test.specify "should handle unicode" <| + table = table_builder [["x", ["śćxx", "ąąasdfąą", "affib"]], ["patterns", ["ć", "ąą", "ffi"]], ["replacements", ["abc", "def", "ghi"]]] + col = table.at "x" + patterns = table.at "patterns" + replacements = table.at "replacements" + + col.replace patterns replacements . to_vector . should_equal ["śabcxx", "defasdfdef", "aghib"] + + Test.specify "should take pattern and replacement string columns" <| + t = table_builder [["x", ["hello", "what", "yes"]], ["patterns", ["ell", "wh", "es"]], ["replacements", ["xyz", "qwer", "asdf"]]] + col = t.at "x" + patterns = t.at "patterns" + replacements = t.at "replacements" + + col.replace patterns replacements . to_vector . should_equal ["hxyzo", "qwerat", "yasdf"] + Test.specify "should only allow replace on Text columns" <| c.replace "a" "#" . should_fail_with Invalid_Value_Type a.replace 1 "#" . should_fail_with Invalid_Value_Type diff --git a/test/Table_Tests/src/Common_Table_Operations/Main.enso b/test/Table_Tests/src/Common_Table_Operations/Main.enso index c3ad6f7a7183..ccf74c82b8c9 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Main.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Main.enso @@ -1,5 +1,7 @@ from Standard.Base import all +import Standard.Database.Internal.Replace_Params.Replace_Params + import project.Common_Table_Operations.Add_Row_Number_Spec import project.Common_Table_Operations.Aggregate_Spec import project.Common_Table_Operations.Column_Operations_Spec @@ -97,7 +99,9 @@ type Test_Selection `Duration`/`Period` type. - supports_nanoseconds_in_time: Specifies if the backend supports nanosecond precision in time values. - Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True distinct_returns_first_row_from_group_if_ordered=True date_time=True fixed_length_text_columns=False supports_decimal_type=False supports_time_duration=False supports_nanoseconds_in_time=False + - supported_replace_params: Specifies the possible values of + Replace_Params that a backend supports. + Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True distinct_returns_first_row_from_group_if_ordered=True date_time=True fixed_length_text_columns=False supports_decimal_type=False supports_time_duration=False supports_nanoseconds_in_time=False supported_replace_params=Nothing spec setup = Core_Spec.spec setup diff --git a/test/Table_Tests/src/Database/Postgres_Spec.enso b/test/Table_Tests/src/Database/Postgres_Spec.enso index 7fa74af3bfbb..ca2c2c40aa8d 100644 --- a/test/Table_Tests/src/Database/Postgres_Spec.enso +++ b/test/Table_Tests/src/Database/Postgres_Spec.enso @@ -6,10 +6,12 @@ import Standard.Table.Data.Type.Value_Type.Bits from Standard.Table import Table, Value_Type from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all hiding First, Last +import Standard.Database.Data.Column.Column import Standard.Database.Data.SQL_Type.SQL_Type import Standard.Database.Internal.Postgres.Pgpass +import Standard.Database.Internal.Replace_Params.Replace_Params from Standard.Database import all -from Standard.Database.Errors import SQL_Error +from Standard.Database.Errors import SQL_Error, Unsupported_Database_Operation from Standard.Test import Test, Test_Suite import Standard.Test.Extensions @@ -276,7 +278,7 @@ run_tests connection db_name = Common_Spec.spec prefix connection - common_selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=True order_by_unicode_normalization_by_default=True take_drop=False allows_mixed_type_comparisons=False fixed_length_text_columns=True supports_decimal_type=True + common_selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=True order_by_unicode_normalization_by_default=True take_drop=False allows_mixed_type_comparisons=False fixed_length_text_columns=True supports_decimal_type=True supported_replace_params=supported_replace_params aggregate_selection = Common_Table_Operations.Aggregate_Spec.Test_Selection.Config first_last_row_order=False aggregation_problems=False agg_in_memory_table = (enso_project.data / "data.csv") . read agg_table = agg_in_memory_table.select_into_database_table connection (Name_Generator.random_name "Agg1") primary_key=Nothing temporary=True @@ -288,6 +290,16 @@ run_tests connection db_name = postgres_specific_spec connection db_name setup Common_Table_Operations.Main.spec setup +## PRIVATE +supported_replace_params : Set Replace_Params +supported_replace_params = + e0 = [Replace_Params.Value Text Case_Sensitivity.Default False, Replace_Params.Value Text Case_Sensitivity.Default True, Replace_Params.Value Text Case_Sensitivity.Sensitive False] + e1 = [Replace_Params.Value Text Case_Sensitivity.Sensitive True, Replace_Params.Value Text Case_Sensitivity.Insensitive False, Replace_Params.Value Text Case_Sensitivity.Insensitive True] + e2 = [Replace_Params.Value Regex Case_Sensitivity.Default False, Replace_Params.Value Regex Case_Sensitivity.Default True, Replace_Params.Value Regex Case_Sensitivity.Sensitive False] + e3 = [Replace_Params.Value Regex Case_Sensitivity.Sensitive True, Replace_Params.Value Regex Case_Sensitivity.Insensitive False, Replace_Params.Value Regex Case_Sensitivity.Insensitive True] + e4 = [Replace_Params.Value Column Case_Sensitivity.Default False, Replace_Params.Value Column Case_Sensitivity.Sensitive False] + Set.from_vector <| e0 + e1 + e2 + e3 + e4 + table_spec = db_name = Environment.get "ENSO_DATABASE_TEST_DB_NAME" db_host_port = (Environment.get "ENSO_DATABASE_TEST_HOST").if_nothing "localhost" . split ':' diff --git a/test/Table_Tests/src/Database/SQLite_Spec.enso b/test/Table_Tests/src/Database/SQLite_Spec.enso index 2d9c322aee45..4ed125c16a12 100644 --- a/test/Table_Tests/src/Database/SQLite_Spec.enso +++ b/test/Table_Tests/src/Database/SQLite_Spec.enso @@ -5,8 +5,10 @@ import Standard.Base.Errors.File_Error.File_Error import Standard.Table.Data.Type.Value_Type.Bits from Standard.Table import Table, Value_Type +import Standard.Database.Data.Column.Column +import Standard.Database.Internal.Replace_Params.Replace_Params from Standard.Database import all -from Standard.Database.Errors import SQL_Error +from Standard.Database.Errors import SQL_Error, Unsupported_Database_Operation from Standard.Test import Test, Test_Suite import Standard.Test.Extensions @@ -203,7 +205,7 @@ sqlite_spec connection prefix = Common_Spec.spec prefix connection - common_selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=False order_by=True natural_ordering=False case_insensitive_ordering=True case_insensitive_ascii_only=True take_drop=False is_nan_and_nothing_distinct=False date_time=False + common_selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=False order_by=True natural_ordering=False case_insensitive_ordering=True case_insensitive_ascii_only=True take_drop=False is_nan_and_nothing_distinct=False date_time=False supported_replace_params=supported_replace_params ## For now `advanced_stats`, `first_last`, `text_shortest_longest` and `multi_distinct` remain disabled, because SQLite does not provide the @@ -224,6 +226,12 @@ sqlite_spec connection prefix = connection.close +## PRIVATE +supported_replace_params : Set Replace_Params +supported_replace_params = + e = [Replace_Params.Value Text Case_Sensitivity.Default False, Replace_Params.Value Text Case_Sensitivity.Sensitive False, Replace_Params.Value Text Case_Sensitivity.Default True, Replace_Params.Value Text Case_Sensitivity.Sensitive True, Replace_Params.Value Text Case_Sensitivity.Insensitive True] + Set.from_vector e + spec = enso_project.data.create_directory file = enso_project.data / "transient" / "sqlite_test.db"