Skip to content

Commit

Permalink
Regex support
Browse files Browse the repository at this point in the history
Tidy of Unique Name Strategy
  • Loading branch information
jdunkerley committed Feb 9, 2022
1 parent 6ff5324 commit 1b17985
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 51 deletions.
44 changes: 26 additions & 18 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Data/Matching.enso
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,31 @@ from Standard.Base.Error.Warnings import Warning_System

## Strategy for matching names.
type Matching_Strategy
## UNSTABLE
Exact name matching.
## UNSTABLE
Exact name matching.

A name is matched if its exact name is provided.
type Exact (case_sensitivity : (True | Case_Insensitive) = True)
A name is matched if its exact name is provided.
type Exact (case_sensitivity : (True | Case_Insensitive) = True)

## UNSTABLE
Regex-based name matching.
## UNSTABLE
Regex-based name matching.

A name is matched if its name matches the provided regular expression.
type Regex (case_sensitivity : (True | Case_Insensitive) = True)


## ADVANCED
Compiles the regular expression following the Matching_Strategy rules.
compile : Text -> Regex_Module.Pattern
compile criterion =
case this of
Regex _ ->
insensitive = case this.case_sensitivity of
True -> False
Case_Insensitive -> True
Regex_Module.compile criterion case_insensitive=insensitive
_ -> Error.throw "Invalid Matching_Strategy to compile"

A name is matched if its name matches the provided regular expression.
type Regex (case_sensitivity : (True | Case_Insensitive) = True)

## UNSTABLE
A temporary workaround to allow the Exact constructor to work with default arguments.
Expand All @@ -29,6 +43,7 @@ Exact.new (case_sensitivity = True) = Exact case_sensitivity
Regex.new : (True | Case_Insensitive) -> Regex
Regex.new (case_sensitivity = True) = Regex case_sensitivity


## UNSTABLE
Specifies that the operation should ignore case.

Expand Down Expand Up @@ -156,13 +171,6 @@ match_criteria objects criteria reorder=False name_mapper=(x->x) matching_strate
match_single_criterion : Text -> Text -> Matching_Strategy -> Boolean
match_single_criterion name criterion matching_strategy = case matching_strategy of
Exact case_sensitivity -> case case_sensitivity of
True ->
name == criterion
Case_Insensitive ->
name.equals_ignore_case criterion
Regex case_sensitivity ->
insensitive = case case_sensitivity of
True -> False
Case_Insensitive -> True
re = Regex_Module.compile criterion case_insensitive=insensitive
re.matches name
True -> name == criterion
Case_Insensitive -> name.equals_ignore_case criterion
Regex _ -> matching_strategy.compile . matches name
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column
from Standard.Table.Data.Sort_Method as Sort_Method_Module import Sort_Method
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning
import Standard.Table.Data.Position
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names, Invalid_Output_Column_Names
import Standard.Table.Data.Column_Mapping
import Standard.Table.Internal.Unique_Name_Strategy
import Standard.Base.Data.Ordering.Natural_Order
Expand Down Expand Up @@ -139,34 +139,31 @@ reorder_columns internal_columns selector position on_problems warnings =
will be removed in the future.
rename_columns : Vector -> Column_Mapping -> Problem_Behavior -> Warnings.Warning_System -> Map
rename_columns internal_columns mapping on_problems warnings =
# ToDo: RegEx Baby
# ToDo: Invalid Name Errors ==> Column

unique = Unique_Name_Strategy.new
renames = Vector.new_builder

make_unique = target ->
new_target = unique.make_unique target
if target != new_target then renames.append target
new_target

col_count = internal_columns.length

name_mapper vec ms =
validation = here.validate_unique vec v->[Duplicate_Column_Selectors v] x->(x.at 1)
good_names = validation.valid

mapper = name->
index = 0.up_to good_names.length . find i->(Matching.match_single_criterion name ((good_names.at i).at 0) ms)
case index of
Nothing -> Nothing
_ ->
new_name = case ms of
Matching.Regex _ ->
pattern = ms.compile ((good_names.at index).at 0)
pattern.replace name (good_names.at index).at 1
_ -> (good_names.at index).at 1
unique.make_unique new_name

new_names = 0.up_to col_count . map i->(mapper (internal_columns.at i).name)
Validation_Result new_names validation.problems

mapped = case mapping of
Column_Mapping.By_Column vec ->
map = Map.from_vector (vec.map r-> [r.at 0 . name, r.at 1])
output = here.rename_columns internal_columns (Column_Mapping.By_Name map (Matching.Exact case_sensitivity=True)) on_problems warnings
Validation_Result output []
Column_Mapping.By_Name map ms ->
keys = map.keys
mapper = name->
index = 0.up_to col_count . find k->(Matching.match_single_criterion name k ms)
case index of
Nothing -> Nothing
_ -> make_unique (map.get (keys.at index))

new_names = 0.up_to col_count . map i->(mapper (internal_columns.at i).name)
Validation_Result new_names []
Column_Mapping.By_Column vec -> name_mapper (vec.map r-> [r.at 0 . name, r.at 1]) (Matching.Exact case_sensitivity=True)
Column_Mapping.By_Name map ms -> name_mapper map.to_vector ms
Column_Mapping.By_Index map ->
validation = here.validate_indices col_count map.keys
good_indices = validation.valid
Expand All @@ -175,7 +172,7 @@ rename_columns internal_columns mapping on_problems warnings =

new_names = 0.up_to col_count . map i->
target = index_map.get_or_else i Nothing
if target.is_nothing then target else make_unique target
if target.is_nothing then target else unique.make_unique target

Validation_Result new_names validation.problems
Column_Mapping.By_Position vec ->
Expand All @@ -184,15 +181,18 @@ rename_columns internal_columns mapping on_problems warnings =
False -> Validation_Result vec []
good_names = validation.valid

new_names = 0.up_to col_count . map i->if i < good_names.length then good_names.at i else Nothing
new_names = 0.up_to col_count . map i->if i>=good_names.length then Nothing else
unique.make_unique (good_names.at i)
Validation_Result new_names validation.problems

processed = mapped.valid.map_with_index i->n->
if n.is_nothing then (make_unique (internal_columns.at i).name) else n
if n.is_nothing then (unique.make_unique (internal_columns.at i).name) else n

problems = mapped.problems + (if renames.length == 0 then [] else [Duplicate_Output_Column_Names renames.to_vector])
on_problems.attach_problems_before problems warnings processed
problems = mapped.problems +
(if unique.invalid_names.length == 0 then [] else [Invalid_Output_Column_Names unique.invalid_names.to_vector]) +
(if unique.renames.length == 0 then [] else [Duplicate_Output_Column_Names unique.renames.to_vector])

on_problems.attach_problems_before problems warnings processed


## PRIVATE
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,82 @@
from Standard.Base import all

## Creates a new Unique_Name_Strategy instance.

This is a mutable data structure, that allows for creating a collection
of columns names and making them unique. It will track any duplicates or
invalid names thet are passed to it.

> Example
Construct a set of unique names from two duplicate lists

unique_name_strategy = Unique_Name_Strategy.new
unique_names = ["A","B","A",""] . map unique_name_strategy.make_unique
duplicates = unique_name_strategy.renames
invalid = unique_name_strategy.invalid_names
new : Unique_Name_Strategy
new = Unique_Name_Strategy.new

type Unique_Name_Strategy
type Unique_Name_Strategy store
## PRIVATE
Creates a Unique_Name_Strategy

Arguments:
- store: Backing store for used names (must support get_or_else and insert)
- renames: Vector builder for any duplicates renamed (must support append)
- invalid_names: Vector builder for any invalid names (must support append)
type Unique_Name_Strategy store renames invalid_names

## Creates a new strategy object.

> Example
Make a new strategy

Unique_Name_Strategy.new
new : Unique_Name_Strategy
new = Unique_Name_Strategy Map.empty
new = Unique_Name_Strategy Map.empty Vector.new_builder Vector.new_builder

## Takes a value and converts to a valid (but not necessarily unique) name

Arguments:
- name: The column name to make valid.

> Example
strategy = Unique_Name_Strategy.new
strategy.make_valid_name "" # returns "Column"
strategy.make_valid_name 1 # returns "1"
strategy.make_valid_name "Hello" # returns "Hello"
make_valid_name : Any -> Text
make_valid_name input =
case input of
Text ->
if input.is_empty.not then input else
this.invalid_names.append ""
"Column"
Nothing -> this.make_valid_name ""
_ -> this.make_valid_name input.to_text

## Takes a name and gets a unique version

Arguments:
- name: The column name to make unique.

> Example
strategy = Unique_Name_Strategy.new
strategy.make_unique "A" # returns "A"
strategy.make_unique "A" # returns "A_1"
make_unique : Text -> Text
make_unique name = this.internal_unique name 0
make_unique name =
valid_name = this.make_valid_name name
unique = this.internal_unique valid_name 0
if valid_name != unique then (this.renames.append name)
unique

## PRIVATE
Follows the strategy to find a valid unique name.

Arguments:
- name: The column name to make unique.
- shift: The current index added to the name.
internal_unique : Text -> Integer -> Text
internal_unique name shift =
inner_name = if shift == 0 then name else (name + "_"+ shift.to_text)
Expand Down

0 comments on commit 1b17985

Please sign in to comment.