Update starts_with, ends_with and contains to new API design (#4078)

- Updated `Text.starts_with`, `Text.ends_with` and `Text.contains` to new simpler API. - Added a `Case_Sensitivity.Default` and adjusted `Table.distinct` to use it by default. - Fixed a bug with `Data.fetch` on an HTTP error. - Improved SQLite Case Sensitivity control in distinct to use collations.
enso-org · Jan 25, 2023 · 60f0e96 · 60f0e96
1 parent 3dbceef
commit 60f0e96
Show file tree

Hide file tree

Showing 18 changed files with 153 additions and 291 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -287,6 +287,8 @@
 - [Implemented `Table.union` for the in-memory backend.][4052]
 - [Implemented `Table.cross_join` and `Table.zip` for the in-memory
   backend.][4063]
+- [Updated `Text.starts_with`, `Text.ends_with` and `Text.contains` to new
+  simpler API.][4078]
 
 [debug-shortcuts]:
   https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@@ -447,6 +449,7 @@
 [4044]: https://github.com/enso-org/enso/pull/4044
 [4052]: https://github.com/enso-org/enso/pull/4052
 [4063]: https://github.com/enso-org/enso/pull/4063
+[4078]: https://github.com/enso-org/enso/pull/4078
 
 #### Enso Compiler
 

diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso
@@ -32,13 +32,12 @@ new custom_comparator=Nothing =
    - text_ordering:
      Specifies how to compare Text values within the Comparator.
 for_text_ordering : Text_Ordering -> ObjectComparator
-for_text_ordering text_ordering =
-    case_sensitivity = text_ordering.case_sensitivity.if_nothing Case_Sensitivity.Sensitive
-    case text_ordering.sort_digits_as_numbers of
-        True ->
-            txt_cmp a b = Natural_Order.compare a b case_sensitivity . to_sign
-            new.withCustomTextComparator txt_cmp
-        False -> case case_sensitivity of
-            Case_Sensitivity.Sensitive -> new
-            Case_Sensitivity.Insensitive locale ->
-                new.withCaseInsensitivity locale.java_locale
+for_text_ordering text_ordering = case text_ordering.sort_digits_as_numbers of
+    True ->
+        txt_cmp a b = Natural_Order.compare a b text_ordering.case_sensitivity . to_sign
+        new.withCustomTextComparator txt_cmp
+    False -> case text_ordering.case_sensitivity of
+        Case_Sensitivity.Default -> new
+        Case_Sensitivity.Sensitive -> new
+        Case_Sensitivity.Insensitive locale ->
+            new.withCaseInsensitivity locale.java_locale
diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso
@@ -23,8 +23,9 @@ polyglot java import com.ibm.icu.text.BreakIterator
 compare : Text -> Text -> Case_Sensitivity -> Ordering
 compare text1 text2 case_sensitivity=Case_Sensitivity.Sensitive =
     compare_text = case case_sensitivity of
-        Case_Sensitivity.Insensitive locale -> a -> b -> a.compare_to_ignore_case b locale
+        Case_Sensitivity.Default -> _.compare_to _
         Case_Sensitivity.Sensitive -> _.compare_to _
+        Case_Sensitivity.Insensitive locale -> a -> b -> a.compare_to_ignore_case b locale
 
     iter1 = BreakIterator.getCharacterInstance
     iter1.setText text1

diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso
@@ -3,6 +3,11 @@ import project.Data.Locale.Locale
 polyglot java import org.enso.base.text.TextFoldingStrategy
 
 type Case_Sensitivity
+    ## Use the default case-sensitivity for the environment.
+       For in-memory operations, it will be case sensitive.
+       For database operations, it will follow the collation default.
+    Default
+
     ## Represents a case-sensitive comparison mode.
     Sensitive
 
@@ -16,6 +21,7 @@ type Case_Sensitivity
        Creates a Java `TextFoldingStrategy` from the case sensitivity setting.
     folding_strategy : Case_Sensitivity -> TextFoldingStrategy
     folding_strategy case_sensitivity = case case_sensitivity of
+        Case_Sensitivity.Default -> TextFoldingStrategy.unicodeNormalizedFold
         Case_Sensitivity.Sensitive -> TextFoldingStrategy.unicodeNormalizedFold
         Case_Sensitivity.Insensitive locale ->
             TextFoldingStrategy.caseInsensitiveFold locale.java_locale
diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso
@@ -8,6 +8,7 @@ import project.Data.Numbers.Integer
 import project.Data.Range.Extensions
 import project.Data.Range.Range
 import project.Data.Text.Case.Case
+import project.Data.Text.Case_Sensitivity.Case_Sensitivity
 import project.Data.Text.Encoding.Encoding
 import project.Data.Text.Location
 import project.Data.Text.Matching_Mode
@@ -716,59 +717,41 @@ Text.from_codepoints codepoints = Text_Utils.from_codepoints codepoints.to_array
 
    Arguments:
    - prefix: The prefix to see if `self` starts with.
-   - matcher: If a `Text_Matcher`, the text is compared using case-sensitivity
-     rules specified in the matcher.
-     If a `Regex_Matcher`, the term is used as a regular expression and matched
-     using the associated options.
+   - case_sensitivity: Specifies if the text values should be compared case
+     sensitively.
 
    ! Unicode Equality
      The definition of equality includes Unicode canonicalization. I.e. two
      texts are equal if they are identical after canonical decomposition. This
      ensures that different ways of expressing the same character in the
      underlying binary representation are considered equal.
 
-     This however is not always well handled by the regex engine. The behaviour
-     is as follows:
-
          'ś' . starts_with 's' == False
          's\u{301}' . starts_with 's' == False
          's\u{301}' . starts_with 'ś' == True
          'ś' . starts_with 's\u{301}' == True
 
-         'ś' . starts_with 's' Regex_Matcher == True
-         's\u{301}' . starts_with 's' Regex_Matcher == True
-         's\u{301}' . starts_with 'ś' Regex_Matcher == True
-         'ś' . starts_with 's\u{301}' Regex_Matcher == True
-
    > Example
      See if the text "Hello!" starts with the specified prefix.
 
          "Hello!".starts_with "Hello" == True
          "Hello!".starts_with "hello" == False
-         "Hello!".starts_with "hello" (Text_Matcher Case_Insensitive) == True
-         "Hello!".starts_with "[a-z]" Regex_Matcher == False
-         "Hello!".starts_with "[A-Z]" Regex_Matcher == True
-Text.starts_with : Text -> (Text_Matcher | Regex_Matcher) -> Boolean
-Text.starts_with self prefix matcher=Text_Matcher.Case_Sensitive = case matcher of
-    Text_Matcher.Case_Sensitive -> Text_Utils.starts_with self prefix
-    Text_Matcher.Case_Insensitive locale ->
+         "Hello!".starts_with "hello" Case_Sensitivity.Insensitive == True
+Text.starts_with : Text -> Case_Sensitivity -> Boolean
+Text.starts_with self prefix case_sensitivity=Case_Sensitivity.Sensitive = case case_sensitivity of
+    Case_Sensitivity.Default -> self.starts_with prefix Case_Sensitivity.Sensitive
+    Case_Sensitivity.Sensitive -> Text_Utils.starts_with self prefix
+    Case_Sensitivity.Insensitive locale ->
             self.take (Index_Sub_Range.First prefix.length) . equals_ignore_case prefix locale=locale
-    _ : Regex_Matcher ->
-        preprocessed_pattern = "\A(?:" + prefix + ")"
-        compiled_pattern = matcher.compile preprocessed_pattern
-        match = compiled_pattern.match self Matching_Mode.First
-        match.is_nothing.not
 
 ## ALIAS Check Suffix
 
    Checks whether `self` ends with `suffix`.
 
    Arguments:
    - suffix: The suffix to see if `self` ends with.
-   - matcher: If a `Text_Matcher`, the text is compared using case-sensitivity
-     rules specified in the matcher.
-     If a `Regex_Matcher`, the term is used as a regular expression and matched
-     using the associated options.
+   - case_sensitivity: Specifies if the text values should be compared case
+     sensitively.
 
    ! Unicode Equality
      The definition of equality includes Unicode canonicalization. I.e. two
@@ -781,51 +764,35 @@ Text.starts_with self prefix matcher=Text_Matcher.Case_Sensitive = case matcher
 
          "Hello World".ends_with "World" == True
          "Hello World".ends_with "world" == False
-         "Hello World".ends_with "world" (Text_Matcher Case_Insensitive) == True
-         "Hello World".ends_with "[A-Z][a-z]{4}" Regex_Matcher == True
-Text.ends_with : Text -> (Text_Matcher | Regex_Matcher) -> Boolean
-Text.ends_with self suffix matcher=Text_Matcher.Case_Sensitive = case matcher of
-    Text_Matcher.Case_Sensitive -> Text_Utils.ends_with self suffix
-    Text_Matcher.Case_Insensitive locale ->
+         "Hello World".ends_with "world" Case_Sensitivity.Insensitive == True
+Text.ends_with : Text -> Case_Sensitivity -> Boolean
+Text.ends_with self suffix case_sensitivity=Case_Sensitivity.Sensitive = case case_sensitivity of
+    Case_Sensitivity.Default -> self.ends_with suffix Case_Sensitivity.Sensitive
+    Case_Sensitivity.Sensitive -> Text_Utils.ends_with self suffix
+    Case_Sensitivity.Insensitive locale ->
             self.take (Index_Sub_Range.Last suffix.length) . equals_ignore_case suffix locale=locale
-    _ : Regex_Matcher ->
-        preprocessed_pattern = "(?:" + suffix + ")\z"
-        compiled_pattern = matcher.compile preprocessed_pattern
-        match = compiled_pattern.match self Matching_Mode.First
-        match.is_nothing.not
 
 ## ALIAS Contains
 
    Checks whether `self` contains `sequence` as its substring.
+   Returns: `True` if term is found within `self`. `False` otherwise.
 
    Arguments:
    - term: The term to find.
-   - matcher: If a `Text_Matcher`, the text is compared using case-sensitivity
-     rules specified in the matcher.
-     If a `Regex_Matcher`, the term is used as a regular expression and matched
-     using the associated options.
-
-   Returns: `True` if term is found within `self`. `False` otherwise.
+   - case_sensitivity: Specifies if the text values should be compared case
+     sensitively.
 
    ! Unicode Equality
      The definition of equality includes Unicode canonicalization. I.e. two
      texts are equal if they are identical after canonical decomposition. This
      ensures that different ways of expressing the same character in the
      underlying binary representation are considered equal.
 
-     This however is not always well handled by the regex engine. The behaviour
-     is as follows:
-
          'ś' . contains 's' == False
          's\u{301}' . contains 's' == False
          's\u{301}' . contains 'ś' == True
          'ś' . contains 's\u{301}' == True
 
-         'ś' . contains 's' Regex_Matcher == True
-         's\u{301}' . contains 's' Regex_Matcher == True
-         's\u{301}' . contains 'ś' Regex_Matcher == True
-         'ś' . contains 's\u{301}' Regex_Matcher == True
-
    > Example
      See if the text "Hello" contains the text "ell".
 
@@ -839,21 +806,13 @@ Text.ends_with self suffix matcher=Text_Matcher.Case_Sensitive = case matcher of
    > Example
      See if the text "Hello!" contains the text 'LO', ignoring case.
 
-         "Hello!".contains "LO" (Text_Matcher Case_Insensitive)
-
-   > Example
-     See if the text "Hello!" contains any lowercase letters, using a regex.
-
-         "Hello!".contains "[a-z]" Regex_Matcher
-Text.contains : Text -> (Text_Matcher | Regex_Matcher) -> Boolean
-Text.contains self term="" matcher=Text_Matcher.Case_Sensitive = case matcher of
-    Text_Matcher.Case_Sensitive -> Text_Utils.contains self term
-    Text_Matcher.Case_Insensitive locale ->
+         "Hello!".contains "LO" Case_Sensitivity.Insensitive
+Text.contains : Text -> Case_Sensitivity -> Boolean
+Text.contains self term="" case_sensitivity=Case_Sensitivity.Sensitive = case case_sensitivity of
+    Case_Sensitivity.Default -> self.contains term Case_Sensitivity.Sensitive
+    Case_Sensitivity.Sensitive -> Text_Utils.contains self term
+    Case_Sensitivity.Insensitive locale ->
             Text_Utils.contains_case_insensitive self term locale.java_locale
-    _ : Regex_Matcher ->
-        compiled_pattern = matcher.compile term
-        match = compiled_pattern.match self Matching_Mode.First
-        match.is_nothing.not
 
 ## Takes an integer and returns a new text, consisting of `count` concatenated
    copies of `self`.

diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex_Matcher.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex_Matcher.enso
@@ -44,6 +44,7 @@ type Regex_Matcher
     compile : Text -> Pattern
     compile self pattern =
         case_insensitive = case self.case_sensitivity of
+            Case_Sensitivity.Default -> False
             Case_Sensitivity.Sensitive -> False
             ## TODO [RW] Currently locale is not supported in case-insensitive
                Regex matching. There are plans to revisit it:

diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Text_Ordering.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Text_Ordering.enso
@@ -40,11 +40,9 @@ type Text_Ordering
     Case_Insensitive (locale:Locale=Locale.default) (sort_digits_as_numbers:Boolean=False)
 
     ## PRIVATE
-       Returns this ordering's case sensitivity setting. It will return
-       `Nothing` for the `Default` ordering, meaning that the case sensitivity
-       is to be determined by the backend.
+       Returns this ordering's case sensitivity setting.
     case_sensitivity : Case_Sensitivity
     case_sensitivity self = case self of
-        Text_Ordering.Default _ -> Nothing
+        Text_Ordering.Default _ -> Case_Sensitivity.Default
         Text_Ordering.Case_Sensitive _ -> Case_Sensitivity.Sensitive
         Text_Ordering.Case_Insensitive locale _ -> Case_Sensitivity.Insensitive locale
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso
@@ -59,7 +59,7 @@ type Dialect
 
     ## PRIVATE
        Prepares a distinct operation.
-    prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
+    prepare_distinct : Table -> Vector -> Case_Sensitivity -> Problem_Builder -> Table
     prepare_distinct self =
         Unimplemented.throw "This is an interface only."
 

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso
@@ -140,4 +140,4 @@ type SQL_Type
        match more possible types.
     is_likely_text : Boolean
     is_likely_text self =
-       self.is_definitely_text || self.name.contains "text" Text_Matcher.Case_Insensitive
+       self.is_definitely_text || self.name.contains "text" Case_Sensitivity.Insensitive
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@@ -673,16 +673,11 @@ type Table
          - If floating points values are present in the distinct columns, a
            `Floating_Point_Grouping` warning.
     distinct : Vector Text | Column_Selector -> Case_Sensitivity -> Problem_Behavior -> Table
-    distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Sensitive on_problems=Report_Warning =
+    distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Default on_problems=Report_Warning =
         key_columns = self.columns_helper.select_columns selector=columns reorder=True on_problems=Problem_Behavior.Report_Error . catch No_Output_Columns _->
             Error.throw No_Input_Columns_Selected
-        text_case_insensitive = case case_sensitivity of
-            Case_Sensitivity.Sensitive -> False
-            Case_Sensitivity.Insensitive locale ->
-                Helpers.assume_default_locale locale <|
-                    True
         problem_builder = Problem_Builder.new
-        new_table = self.connection.dialect.prepare_distinct self key_columns text_case_insensitive problem_builder
+        new_table = self.connection.dialect.prepare_distinct self key_columns case_sensitivity problem_builder
         problem_builder.attach_problems_before on_problems new_table
 
     ## Joins two tables according to the specified join conditions.

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso
@@ -178,7 +178,7 @@ base_dialect =
     functions = [["COALESCE", make_function "COALESCE"], ["ROW_MIN", make_function "MIN"], ["ROW_MAX", make_function "MAX"]]
     agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
     counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
-    text = [is_empty, bin "LIKE", simple_equals_ignore_case, fold_case]
+    text = [is_empty, bin "LIKE", simple_equals_ignore_case, fold_case, make_case_sensitive]
     nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
     contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
     base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains)
@@ -293,11 +293,14 @@ generate_from_part dialect from_spec = case from_spec of
         sub = generate_query dialect (Query.Select columns context)
         sub.paren ++ alias dialect as_name
 
-
 ## PRIVATE
 fold_case = lift_unary_op "FOLD_CASE" arg->
     code "LOWER(UPPER(" ++ arg ++ "))"
 
+## PRIVATE
+make_case_sensitive = lift_unary_op "MAKE_CASE_SENSITIVE" _->
+    Error.throw <| Unsupported_Database_Operation.Error ("Case sensitive operations are not currently supported by this connection.")
+
 ## PRIVATE
 simple_equals_ignore_case = Base_Generator.lift_binary_op "equals_ignore_case" a-> b->
     code "LOWER(UPPER(" ++ a ++ ")) = LOWER(UPPER(" ++ b ++ "))"

diff --git a/...ibution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso b/...ibution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso
@@ -2,16 +2,19 @@ from Standard.Base import all
 
 from Standard.Table.Errors import Floating_Point_Grouping
 
+import project.Internal.Helpers
 import project.Internal.IR.SQL_Expression.SQL_Expression
 
 ## PRIVATE
-make_distinct_expression text_case_insensitive problem_builder key_column =
+make_distinct_expression text_case_sensitivity problem_builder key_column =
     if key_column.sql_type.is_definitely_double then
         problem_builder.report_other_warning (Floating_Point_Grouping.Error key_column.name)
 
     expr = key_column.expression
 
-    needs_case_fold = text_case_insensitive && key_column.sql_type.is_definitely_text
-    case needs_case_fold of
-        True -> SQL_Expression.Operation "FOLD_CASE" [expr]
-        False -> expr
+    if key_column.sql_type.is_definitely_text.not then expr else case text_case_sensitivity of
+        Case_Sensitivity.Insensitive locale ->
+            Helpers.assume_default_locale locale <|
+                SQL_Expression.Operation "FOLD_CASE" [expr]
+        Case_Sensitivity.Sensitive -> SQL_Expression.Operation "MAKE_CASE_SENSITIVE" [expr]
+        Case_Sensitivity.Default -> expr
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
@@ -81,13 +81,13 @@ type Postgres_Dialect
 
     ## PRIVATE
        Prepares a distinct operation.
-    prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
-    prepare_distinct self table key_columns text_case_insensitive problem_builder =
+    prepare_distinct : Table -> Vector -> Case_Sensitivity -> Problem_Builder -> Table
+    prepare_distinct self table key_columns case_sensitivity problem_builder =
         setup = table.context.as_subquery table.name+"_inner" [table.internal_columns]
         new_columns = setup.new_columns.first
         column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c]
         new_key_columns = key_columns.map c-> column_mapping.at c.name
-        distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_insensitive problem_builder)
+        distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression case_sensitivity problem_builder)
         new_context = Context.for_subquery setup.subquery . set_distinct_on distinct_expressions
         table.updated_context_and_columns new_context new_columns subquery=True
 
@@ -250,6 +250,10 @@ ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub->
     res = str ++ " LIKE CONCAT('%', " ++ sub ++ ")"
     res.paren
 
+## PRIVATE
+make_case_sensitive = Base_Generator.lift_unary_op "MAKE_CASE_SENSITIVE" arg->
+    code "((" ++ arg ++ ') COLLATE "C.utf8")'
+
 ## PRIVATE
 make_contains_expr expr substring =
     code "position(" ++ substring ++ " in " ++ expr ++ ") > 0"
@@ -267,7 +271,7 @@ make_order_descriptor internal_column sort_direction text_ordering =
             ## In the future we can modify this error to suggest using a custom defined collation.
             if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation.Error "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else
                 case text_ordering.case_sensitivity of
-                    Nothing ->
+                    Case_Sensitivity.Default ->
                         Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation=Nothing
                     Case_Sensitivity.Sensitive ->
                         Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic"