Skip to content

Commit

Permalink
Split union to make the code cleaner
Browse files Browse the repository at this point in the history
  • Loading branch information
radeusgd committed Jan 16, 2023
1 parent a22d614 commit ac0772f
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ type Column_Set
## PRIVATE
Value (name : Text) (column_indices : Vector Integer)

## PRIVATE
resolve_columns self all_tables = self.column_indices.zip all_tables i-> parent_table->
case i of
Nothing -> Nothing
_ : Integer -> parent_table.at i

## PRIVATE
Returns a map indicating in how many tables did a column with a given name appear.
find_column_counts tables =
Expand Down
53 changes: 18 additions & 35 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import project.Delimited.Delimited_Format.Delimited_Format

from project.Data.Column_Type_Selection import Column_Type_Selection, Auto
from project.Internal.Rows_View import Rows_View
from project.Errors import Column_Count_Mismatch, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns, Invalid_Value_Type, No_Common_Type, Column_Type_Mismatch
from project.Errors import Column_Count_Mismatch, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns, Invalid_Value_Type

from project.Data.Column import get_item_string
from project.Internal.Filter_Condition_Helpers import make_filter_column
Expand Down Expand Up @@ -1184,42 +1184,12 @@ type Table
if is_everything_ok then
problem_builder = Problem_Builder.new
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder
new_row_count = all_tables.fold 0 c-> t-> c + t.row_count
result_row_count = all_tables.fold 0 c-> t-> c + t.row_count
merged_columns = matched_column_sets.map column_set->
columns = column_set.column_indices.zip all_tables i-> parent_table->
case i of
Nothing -> Nothing
_ : Integer -> parent_table.at i
result_type = case allow_type_widening of
True ->
types = columns.filter Filter_Condition.Not_Nothing . map .value_type
common_type = Value_Type.find_common_type types strict=True
if common_type.is_nothing then
problem_builder.report_other_warning (No_Common_Type.Error column_set.name)
common_type
False ->
first_column = columns.find (c-> c.is_nothing.not)
first_type = first_column.value_type
if first_type == Value_Type.Mixed then Value_Type.Mixed else
first_wrong_column = columns.find if_missing=Nothing col->
col.is_nothing.not && col.value_type != first_type
if first_wrong_column.is_nothing then first_type else
got_type = first_wrong_column.value_type
problem_builder.report_other_warning (Column_Type_Mismatch.Error column_set.name first_type got_type)
Nothing
case result_type of
case Table_Helpers.unify_result_type_for_union column_set all_tables allow_type_widening problem_builder of
Nothing -> Nothing
_ ->
storage_builder = Column_Module.make_storage_builder_for_type result_type initial_size=new_row_count
column_set.column_indices.zip all_tables i-> parent_table->
case i of
Nothing ->
null_row_count = parent_table.row_count
storage_builder.appendNulls null_row_count
_ : Integer ->
storage = parent_table.at i . java_column . getStorage
storage_builder.appendBulkStorage storage
Column.from_storage column_set.name storage_builder.seal
result_type : Value_Type ->
concat_columns column_set all_tables result_type result_row_count
good_columns = merged_columns.filter Filter_Condition.Not_Nothing
if good_columns.is_empty then Error.throw No_Output_Columns else
problem_builder.attach_problems_before on_problems <|
Expand Down Expand Up @@ -1595,3 +1565,16 @@ check_table arg_name table =
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
True -> True

## PRIVATE
A helper that efficiently concatenates storages of in-memory columns.
concat_columns column_set all_tables result_type result_row_count =
storage_builder = Column_Module.make_storage_builder_for_type result_type initial_size=result_row_count
column_set.column_indices.zip all_tables i-> parent_table->
case i of
Nothing ->
null_row_count = parent_table.row_count
storage_builder.appendNulls null_row_count
_ : Integer ->
storage = parent_table.at i . java_column . getStorage
storage_builder.appendBulkStorage storage
Column.from_storage column_set.name storage_builder.seal
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ import project.Data.Position.Position
import project.Data.Sort_Column_Selector.Sort_Column_Selector
import project.Data.Sort_Column.Sort_Column
import project.Data.Table.Table
import project.Data.Value_Type.Value_Type
import project.Internal.Problem_Builder.Problem_Builder
import project.Internal.Unique_Name_Strategy.Unique_Name_Strategy

from project.Data.Aggregate_Column.Aggregate_Column import Minimum, Maximum
from project.Errors import Missing_Input_Columns, No_Output_Columns, Too_Many_Column_Names_Provided, No_Input_Columns_Selected
from project.Errors import Missing_Input_Columns, No_Output_Columns, Too_Many_Column_Names_Provided, No_Input_Columns_Selected, No_Common_Type, Column_Type_Mismatch

polyglot java import java.util.HashSet

Expand Down Expand Up @@ -631,3 +632,26 @@ filter_blank_rows table when_any treat_nans_as_blank =
is_table obj =
known_types = ["Standard.Table.Data.Table.Table", "Standard.Database.Data.Table.Table"]
known_types.contains (Meta.get_qualified_type_name obj)

## PRIVATE
A helper method that resolves what should be the result type of a particular
column set based on the union settings.
unify_result_type_for_union column_set all_tables allow_type_widening problem_builder =
columns = column_set.resolve_columns all_tables
case allow_type_widening of
True ->
types = columns.filter Filter_Condition.Not_Nothing . map .value_type
common_type = Value_Type.find_common_type types strict=True
if common_type.is_nothing then
problem_builder.report_other_warning (No_Common_Type.Error column_set.name)
common_type
False ->
first_column = columns.find (c-> c.is_nothing.not)
first_type = first_column.value_type
if first_type == Value_Type.Mixed then Value_Type.Mixed else
first_wrong_column = columns.find if_missing=Nothing col->
col.is_nothing.not && col.value_type != first_type
if first_wrong_column.is_nothing then first_type else
got_type = first_wrong_column.value_type
problem_builder.report_other_warning (Column_Type_Mismatch.Error column_set.name first_type got_type)
Nothing

0 comments on commit ac0772f

Please sign in to comment.