Skip to content

Commit

Permalink
Improve generation of long operation in presence of column name lengt…
Browse files Browse the repository at this point in the history
…h limit (#7556)

I planned to do this as part of #7428, but I forgot. Making up for that now.
  • Loading branch information
radeusgd authored Aug 14, 2023
1 parent 060323e commit 8541a9e
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,59 @@ type Column_Naming_Helper
to display every part.
concat : Vector Text -> Boolean -> Text
concat self texts add_spaces=True =
# TODO truncate parts smarter way
separator = if add_spaces then " " else ""
joined = texts.join separator
case self.naming_properties.size_limit of
Nothing -> joined
max_size -> self.naming_properties.truncate joined max_size
max_size ->
base_size = self.naming_properties.encoded_size joined
# If it still fits the limit, we are ok.
if base_size <= max_size then joined else
# If not, we need to truncate.
separator_size = case add_spaces of
False -> 0
True -> self.naming_properties.encoded_size separator

# Estimate the space left for each part to be concatenated.
remaining_space = max_size - (separator_size * (texts.length - 1))
min_bytes_per_part = 5

## If there are so many parts, each of them has very little space,
we cannot fit all of them - so we just keep the first few and the last one.
parts_to_include = case remaining_space <= min_bytes_per_part * texts.length of
# If there is too little space, we will just pick a few parts:
True ->
mid = Math.min (texts.length-1) (remaining_space.div min_bytes_per_part)
texts.take (First 1+mid) + texts.take (Last 1)
False -> texts
new_remaining_space = max_size - (separator_size * (parts_to_include.length - 1))
initial_size_per_part = new_remaining_space.div parts_to_include.length

# Now we compute how many parts will fit the initial_size_per_part without truncating - these shorter parts fit as a whole.
part_sizes = parts_to_include.map self.naming_properties.encoded_size
needs_truncating = part_sizes.map (size-> size > initial_size_per_part)
non_truncated_parts_size = part_sizes.zip needs_truncating (size-> is_truncated-> if is_truncated then 0 else size) . fold 0 (+)
truncated_parts_count = needs_truncating.fold 0 acc-> is_truncated->
if is_truncated then acc+1 else acc

## Now having accounted for the parts that do not need truncation,
we distribute the remaining space among the ones that do.
truncated_part_size = (new_remaining_space - non_truncated_parts_size).div truncated_parts_count
truncated_suffix = "..."
truncated_suffix_size = self.naming_properties.encoded_size truncated_suffix
transformed_parts = parts_to_include.zip needs_truncating part-> is_truncated->
case is_truncated of
False -> part
True ->
## Caveat: we now have more space for each part than when computing `needs_truncating`,
so some parts may no longer need to be truncated if they were just slightly over the limit.
still_needs_truncation = self.naming_properties.encoded_size part > truncated_part_size
if still_needs_truncation.not then part else
self.naming_properties.truncate part (truncated_part_size - truncated_suffix_size) + truncated_suffix
new_joined = transformed_parts.join separator

# Just to be sure, we still truncate the end result.
self.naming_properties.truncate new_joined max_size

## PRIVATE
A `Column_Naming_Helper` for the in-memory backend - having no length limits.
Expand Down
45 changes: 27 additions & 18 deletions test/Table_Tests/src/Database/Common/Names_Length_Limits_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -317,25 +317,34 @@ spec prefix connection =
Problems.assume_no_problems r4

Test.specify "should truncate the column name if the resulting operation-generated name is too long, without warnings" <|
long_name = "a" * max_column_name_length
src = Table.new [[long_name, [1, 2, 3]]]
name_a = "a" * max_column_name_length
name_b = "b" * max_column_name_length
src = Table.new [[name_a, [1]], [name_b, [2]]]
db_table = src.select_into_database_table connection (Name_Generator.random_name "long-column-names") temporary=True
c0 = db_table.at long_name

c1 = c0 + 10
c1.to_vector . should_equal [11, 12, 13]
# truncated
c1.name . should_equal ("[" + long_name.drop 1)

c2 = c0.ceil
c2.to_vector . should_equal [1, 2, 3]
# truncated
c2.name . should_equal (("ceil([" + long_name).take max_column_name_length)

c3 = c1.cast Value_Type.Char . ends_with "1"
c3.to_vector . should_equal [True, False, False]
c3.name.length . should_equal max_column_name_length
c3.name . should_contain "ends_with"

a = db_table.at name_a
b = db_table.at name_b
c1 = a + b
Problems.assume_no_problems c1
# The column names should be truncated so that prefixes of both names and the operator all fit:
c1.name . should_contain "+"
c1.name . should_contain "..."
c1.name . should_contain "aaa"
c1.name . should_contain "bbb"

c2 = (a == b).iif b 0
Problems.assume_no_problems c2
c2.name . should_start_with "if [[aaa"
c2.name . should_contain " then [bbb"
c2.name . should_contain "bb... else 0"

# We repeat the argument maaany times.
args = Vector.new (max_column_name_length * 2) _-> b
c3 = a.max args
Problems.assume_no_problems c3
c3.name.should_start_with "max([aaa"
c3.name.should_contain "..., "
c3.name.should_contain ")"

Test.specify "raise an error if name provided by the user in aggregate is too long" <|
src = Table.new [["X", [1, 2, 3]]]
Expand Down

0 comments on commit 8541a9e

Please sign in to comment.