diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Column_Naming_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Column_Naming_Helper.enso index 3a15eae74cf1..ed4453269480 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Column_Naming_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Column_Naming_Helper.enso @@ -110,12 +110,59 @@ type Column_Naming_Helper to display every part. concat : Vector Text -> Boolean -> Text concat self texts add_spaces=True = - # TODO truncate parts smarter way separator = if add_spaces then " " else "" joined = texts.join separator case self.naming_properties.size_limit of Nothing -> joined - max_size -> self.naming_properties.truncate joined max_size + max_size -> + base_size = self.naming_properties.encoded_size joined + # If it still fits the limit, we are ok. + if base_size <= max_size then joined else + # If not, we need to truncate. + separator_size = case add_spaces of + False -> 0 + True -> self.naming_properties.encoded_size separator + + # Estimate the space left for each part to be concatenated. + remaining_space = max_size - (separator_size * (texts.length - 1)) + min_bytes_per_part = 5 + + ## If there are so many parts, each of them has very little space, + we cannot fit all of them - so we just keep the first few and the last one. + parts_to_include = case remaining_space <= min_bytes_per_part * texts.length of + # If there is too little space, we will just pick a few parts: + True -> + mid = Math.min (texts.length-1) (remaining_space.div min_bytes_per_part) + texts.take (First 1+mid) + texts.take (Last 1) + False -> texts + new_remaining_space = max_size - (separator_size * (parts_to_include.length - 1)) + initial_size_per_part = new_remaining_space.div parts_to_include.length + + # Now we compute how many parts will fit the initial_size_per_part without truncating - these shorter parts fit as a whole. + part_sizes = parts_to_include.map self.naming_properties.encoded_size + needs_truncating = part_sizes.map (size-> size > initial_size_per_part) + non_truncated_parts_size = part_sizes.zip needs_truncating (size-> is_truncated-> if is_truncated then 0 else size) . fold 0 (+) + truncated_parts_count = needs_truncating.fold 0 acc-> is_truncated-> + if is_truncated then acc+1 else acc + + ## Now having accounted for the parts that do not need truncation, + we distribute the remaining space among the ones that do. + truncated_part_size = (new_remaining_space - non_truncated_parts_size).div truncated_parts_count + truncated_suffix = "..." + truncated_suffix_size = self.naming_properties.encoded_size truncated_suffix + transformed_parts = parts_to_include.zip needs_truncating part-> is_truncated-> + case is_truncated of + False -> part + True -> + ## Caveat: we now have more space for each part than when computing `needs_truncating`, + so some parts may no longer need to be truncated if they were just slightly over the limit. + still_needs_truncation = self.naming_properties.encoded_size part > truncated_part_size + if still_needs_truncation.not then part else + self.naming_properties.truncate part (truncated_part_size - truncated_suffix_size) + truncated_suffix + new_joined = transformed_parts.join separator + + # Just to be sure, we still truncate the end result. + self.naming_properties.truncate new_joined max_size ## PRIVATE A `Column_Naming_Helper` for the in-memory backend - having no length limits. diff --git a/test/Table_Tests/src/Database/Common/Names_Length_Limits_Spec.enso b/test/Table_Tests/src/Database/Common/Names_Length_Limits_Spec.enso index aa48d4121730..4bf09a398bcb 100644 --- a/test/Table_Tests/src/Database/Common/Names_Length_Limits_Spec.enso +++ b/test/Table_Tests/src/Database/Common/Names_Length_Limits_Spec.enso @@ -317,25 +317,34 @@ spec prefix connection = Problems.assume_no_problems r4 Test.specify "should truncate the column name if the resulting operation-generated name is too long, without warnings" <| - long_name = "a" * max_column_name_length - src = Table.new [[long_name, [1, 2, 3]]] + name_a = "a" * max_column_name_length + name_b = "b" * max_column_name_length + src = Table.new [[name_a, [1]], [name_b, [2]]] db_table = src.select_into_database_table connection (Name_Generator.random_name "long-column-names") temporary=True - c0 = db_table.at long_name - - c1 = c0 + 10 - c1.to_vector . should_equal [11, 12, 13] - # truncated - c1.name . should_equal ("[" + long_name.drop 1) - - c2 = c0.ceil - c2.to_vector . should_equal [1, 2, 3] - # truncated - c2.name . should_equal (("ceil([" + long_name).take max_column_name_length) - - c3 = c1.cast Value_Type.Char . ends_with "1" - c3.to_vector . should_equal [True, False, False] - c3.name.length . should_equal max_column_name_length - c3.name . should_contain "ends_with" + + a = db_table.at name_a + b = db_table.at name_b + c1 = a + b + Problems.assume_no_problems c1 + # The column names should be truncated so that prefixes of both names and the operator all fit: + c1.name . should_contain "+" + c1.name . should_contain "..." + c1.name . should_contain "aaa" + c1.name . should_contain "bbb" + + c2 = (a == b).iif b 0 + Problems.assume_no_problems c2 + c2.name . should_start_with "if [[aaa" + c2.name . should_contain " then [bbb" + c2.name . should_contain "bb... else 0" + + # We repeat the argument maaany times. + args = Vector.new (max_column_name_length * 2) _-> b + c3 = a.max args + Problems.assume_no_problems c3 + c3.name.should_start_with "max([aaa" + c3.name.should_contain "..., " + c3.name.should_contain ")" Test.specify "raise an error if name provided by the user in aggregate is too long" <| src = Table.new [["X", [1, 2, 3]]]