Improve generation of long operation in presence of column name lengt…

…h limit (#7556) I planned to do this as part of #7428, but I forgot. Making up for that now.
enso-org · Aug 14, 2023 · 8541a9e · 8541a9e
1 parent 060323e
commit 8541a9e
Show file tree

Hide file tree

Showing 2 changed files with 76 additions and 20 deletions.
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Column_Naming_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Column_Naming_Helper.enso
@@ -110,12 +110,59 @@ type Column_Naming_Helper
        to display every part.
     concat : Vector Text -> Boolean -> Text
     concat self texts add_spaces=True =
-        # TODO truncate parts smarter way
         separator = if add_spaces then " " else ""
         joined = texts.join separator
         case self.naming_properties.size_limit of
             Nothing  -> joined
-            max_size -> self.naming_properties.truncate joined max_size
+            max_size ->
+                base_size = self.naming_properties.encoded_size joined
+                # If it still fits the limit, we are ok.
+                if base_size <= max_size then joined else
+                    # If not, we need to truncate.
+                    separator_size = case add_spaces of
+                        False -> 0
+                        True -> self.naming_properties.encoded_size separator
+
+                    # Estimate the space left for each part to be concatenated.
+                    remaining_space = max_size - (separator_size * (texts.length - 1))
+                    min_bytes_per_part = 5
+
+                    ## If there are so many parts, each of them has very little space,
+                       we cannot fit all of them - so we just keep the first few and the last one.
+                    parts_to_include = case remaining_space <= min_bytes_per_part * texts.length of
+                        # If there is too little space, we will just pick a few parts:
+                        True ->
+                            mid = Math.min (texts.length-1) (remaining_space.div min_bytes_per_part)
+                            texts.take (First 1+mid) + texts.take (Last 1)
+                        False -> texts
+                    new_remaining_space = max_size - (separator_size * (parts_to_include.length - 1))
+                    initial_size_per_part = new_remaining_space.div parts_to_include.length
+
+                    # Now we compute how many parts will fit the initial_size_per_part without truncating - these shorter parts fit as a whole.
+                    part_sizes = parts_to_include.map self.naming_properties.encoded_size
+                    needs_truncating = part_sizes.map (size-> size > initial_size_per_part)
+                    non_truncated_parts_size = part_sizes.zip needs_truncating (size-> is_truncated-> if is_truncated then 0 else size) . fold 0 (+)
+                    truncated_parts_count = needs_truncating.fold 0 acc-> is_truncated->
+                        if is_truncated then acc+1 else acc
+
+                    ## Now having accounted for the parts that do not need truncation,
+                       we distribute the remaining space among the ones that do.
+                    truncated_part_size = (new_remaining_space - non_truncated_parts_size).div truncated_parts_count
+                    truncated_suffix = "..."
+                    truncated_suffix_size = self.naming_properties.encoded_size truncated_suffix
+                    transformed_parts = parts_to_include.zip needs_truncating part-> is_truncated->
+                        case is_truncated of
+                            False -> part
+                            True ->
+                                ## Caveat: we now have more space for each part than when computing `needs_truncating`,
+                                   so some parts may no longer need to be truncated if they were just slightly over the limit.
+                                still_needs_truncation = self.naming_properties.encoded_size part > truncated_part_size
+                                if still_needs_truncation.not then part else
+                                    self.naming_properties.truncate part (truncated_part_size - truncated_suffix_size) + truncated_suffix
+                    new_joined = transformed_parts.join separator
+
+                    # Just to be sure, we still truncate the end result.
+                    self.naming_properties.truncate new_joined max_size
 
     ## PRIVATE
        A `Column_Naming_Helper` for the in-memory backend - having no length limits.

diff --git a/test/Table_Tests/src/Database/Common/Names_Length_Limits_Spec.enso b/test/Table_Tests/src/Database/Common/Names_Length_Limits_Spec.enso
@@ -317,25 +317,34 @@ spec prefix connection =
                         Problems.assume_no_problems r4
 
             Test.specify "should truncate the column name if the resulting operation-generated name is too long, without warnings" <|
-                long_name = "a" * max_column_name_length
-                src = Table.new [[long_name, [1, 2, 3]]]
+                name_a = "a" * max_column_name_length
+                name_b = "b" * max_column_name_length
+                src = Table.new [[name_a, [1]], [name_b, [2]]]
                 db_table = src.select_into_database_table connection (Name_Generator.random_name "long-column-names") temporary=True
-                c0 = db_table.at long_name
-
-                c1 = c0 + 10
-                c1.to_vector . should_equal [11, 12, 13]
-                # truncated
-                c1.name . should_equal ("[" + long_name.drop 1)
-
-                c2 = c0.ceil
-                c2.to_vector . should_equal [1, 2, 3]
-                # truncated
-                c2.name . should_equal (("ceil([" + long_name).take max_column_name_length)
-
-                c3 = c1.cast Value_Type.Char . ends_with "1"
-                c3.to_vector . should_equal [True, False, False]
-                c3.name.length . should_equal max_column_name_length
-                c3.name . should_contain "ends_with"
+
+                a = db_table.at name_a
+                b = db_table.at name_b
+                c1 = a + b
+                Problems.assume_no_problems c1
+                # The column names should be truncated so that prefixes of both names and the operator all fit:
+                c1.name . should_contain "+"
+                c1.name . should_contain "..."
+                c1.name . should_contain "aaa"
+                c1.name . should_contain "bbb"
+
+                c2 = (a == b).iif b 0
+                Problems.assume_no_problems c2
+                c2.name . should_start_with "if [[aaa"
+                c2.name . should_contain " then [bbb"
+                c2.name . should_contain "bb... else 0"
+
+                # We repeat the argument maaany times.
+                args = Vector.new (max_column_name_length * 2) _-> b
+                c3 = a.max args
+                Problems.assume_no_problems c3
+                c3.name.should_start_with "max([aaa"
+                c3.name.should_contain "..., "
+                c3.name.should_contain ")"
 
             Test.specify "raise an error if name provided by the user in aggregate is too long" <|
                 src = Table.new [["X", [1, 2, 3]]]