Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restructuring the Faker type and creating tests for Group_By #3318

Merged
merged 20 commits into from
Mar 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@
`Vector.fold_with_index` and `Vector.take` methods.][3236]
- [Implemented new `Text.insert` method][3311]
- [Implemented `Bool.compare_to` method][3317]
- [Implemented `Map.first`, `Map.last` functions. Expanded `Table.group_by` to
also compute mode, percentile, minimum, maximum.][3318]

[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
Expand Down Expand Up @@ -101,6 +103,7 @@
[3236]: https://github.com/enso-org/enso/pull/3236
[3311]: https://github.com/enso-org/enso/pull/3311
[3317]: https://github.com/enso-org/enso/pull/3317
[3317]: https://github.com/enso-org/enso/pull/3318

#### Enso Compiler

Expand Down
18 changes: 18 additions & 0 deletions distribution/lib/Standard/Base/0.0.0-dev/src/Data/Map.enso
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,24 @@ type Map
to_vector_with_builder this
builder.to_vector

## Get a key value pair of the lowest key in the map.
If the map is empty, returns Nothing.
first : Pair
first =
first p m = case m of
Bin _ k v l _ -> @Tail_Call first (Pair k v) l
Tip -> p
first Nothing this

## Get a key value pair of the highest key in the map.
If the map is empty, returns Nothing.
last : Pair
last =
last p m = case m of
Bin _ k v _ r -> @Tail_Call last (Pair k v) r
Tip -> p
last Nothing this

## UNSTABLE

An error for getting a missing value from a map.
Expand Down

Large diffs are not rendered by default.

37 changes: 13 additions & 24 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Data/Group_By.enso
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ import Standard.Base.Data.Ordering.Vector_Lexicographic_Order
## Create a key structure for grouping operations
key : Vector -> Group_By_Key
key values =
mapper c = case c of
Boolean -> Comparable_Boolean c
Nothing -> Comparable_Nothing
_ -> c

mapper c = Comparable_Value c
Group_By_Key <| values.map mapper

## PRIVATE
Expand All @@ -25,30 +21,23 @@ type Group_By_Key
compare_to that =
Vector_Lexicographic_Order.compare this.values that.values

## PRIVATE
Temporary workaround until Boolean compare_to completed
type Comparable_Boolean
type Comparable_Boolean value

== : Comparable_Boolean->Boolean
== that = (this.compare_to that) == Ordering.Equal

compare_to : Any->Ordering
compare_to that =
if this.value == that.value then Ordering.Equal else
if this.value then Ordering.Greater else Ordering.Less

## PRIVATE
Temporary workaround allowing Nothing to be in a Group_By
type Comparable_Nothing
type Comparable_Nothing
type Comparable_Value
type Comparable_Value value

== : Comparable_Nothing->Boolean
== that = (this.compare_to that) == Ordering.Equal

compare_to : Any->Ordering
compare_to that =
case that of
Comparable_Nothing -> Ordering.Equal
Nothing -> Ordering.Equal
_ -> Ordering.Less
value = case that of
Comparable_Value v -> v
_ -> that

case this.value of
Nothing -> if value.is_nothing then Ordering.Equal else Ordering.Less
_ -> if value.is_nothing then Ordering.Greater else this.value.compare_to value

is_nothing : Boolean
is_nothing = this.value.is_nothing
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import Standard.Base.Data.Time.Date
import Standard.Table.Io.Spreadsheet_Write_Mode
import Standard.Table.Io.Format
import Standard.Table.Internal.Table_Helpers
import Standard.Table.Internal.Unique_Name_Strategy

from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index
Expand Down Expand Up @@ -516,16 +517,17 @@ type Table
group_by selector columns (on_problems=Report_Warning) (warnings=Warnings.default) =
# Grouping Key
key_columns = if selector.is_nothing then [] else
Table_Helpers.select_columns internal_columns=this.columns selector=selector reorder=True on_problems=on_problems warnings=warnings
Table_Helpers.select_columns_helper internal_columns=this.columns selector=selector reorder=True on_problems=on_problems warnings=warnings
key_length = key_columns.length
make_key = if (key_length == 0) then _->(Group_By.key [1]) else i->(Group_By.key (key_columns.map v->(v.at i)))

# New Table Accumulator
new_table = (key_columns.map c->c.name)+(columns.map c->(c.column_name this)) . map n->[n, Vector.new_builder]
name_strategy = Unique_Name_Strategy.new
new_table = (key_columns.map c->c.name)+(columns.map c->(c.column_name this)) . map n->[name_strategy.make_unique n, Vector.new_builder]
add_row key =
idx = new_table.at 0 . at 1 . length
0.up_to key_length . each i->
new_table.at i . at 1 . append (key.values.at i)
new_table.at i . at 1 . append (key.values.at i).value
0.up_to (columns.length) . each i->
column = columns.at i
new_table.at (i + key_length) . at 1 . append (column.initial_value)
Expand Down
109 changes: 85 additions & 24 deletions distribution/lib/Standard/Test/0.0.0-dev/src/Faker.enso
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,94 @@ upper_case_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".utf_16
lower_case_letters = "abcdefghijklmnopqrstuvwxyz".utf_16
numbers = "0123456789".utf_16

## Creates a random number generator which can be used for creating test values.
## Creates a new Faker which can be used for creating test values.

Arguments:
- seed: Optional seed value to make the sequence deterministic
make_generator : Integer -> Random
make_generator (seed = 0) =
if seed == 0 then Random.new else Random.new seed
new : Integer -> Faker
new (seed = 0) =
generator = if seed == 0 then Random.new else Random.new seed
Faker generator

## Object to generate (deterministic) random value for testing
type Faker
type Faker generator

## Creates a random string based on a template and random number generator.
## Creates a random Text based on a template of character sets.

Arguments:
- template: Vector of vectors that represent the possible characters for each
letter, as UTF-16 code units.
- generator: Random number generator

> Examples:
Creates a fake UK National Insurance number:

l = "ABCEGHJKLMNOPRSTWXYZ".utf_16
n = "0123456789".utf_16
s = "ABCDFMP ".utf_16
template = [l, l, n, n, n, n, n, s]
ni_number = make_string template make_generator
make_string : Vector -> Any -> Text
make_string template generator =
characters = template.map possible_chars->
selected_char_ix = generator.nextInt possible_chars.length
possible_chars.at selected_char_ix
Text.from_utf_16 characters
Arguments:
- template: Vector of vectors that represent the possible characters for each
letter, as UTF-16 code units.

> Examples:
Creates a fake UK National Insurance number:

l = "ABCEGHJKLMNOPRSTWXYZ".utf_16
n = "0123456789".utf_16
s = "ABCDFMP ".utf_16
template = [l, l, n, n, n, n, n, s]
ni_number = Faker.new . string_value template
string_value : Vector -> Text
string_value template =
characters = template.map possible_chars->
selected_char_ix = this.generator.nextInt possible_chars.length
possible_chars.at selected_char_ix
Text.from_utf_16 characters

## Generates a Text consisting of lower/upper case characters.

Arguments:
- length: length of text to generate
- upper_case: use upper_case letters
alpha : Integer->Boolean->Text
alpha length=1 upper_case=False =
alphabet = if upper_case then here.upper_case_letters else here.lower_case_letters
this.string_value <| 0.up_to length . map _->alphabet

## Generates a Text consisting of lower/upper case characters and digits.

Arguments:
- length: length of text to generate
- upper_case: use upper_case letters
alpha_numeric : Integer->Boolean->Text
alpha_numeric length=1 upper_case=False =
alphabet = (if upper_case then here.upper_case_letters else here.lower_case_letters) + here.numbers
this.string_value <| 0.up_to length . map _->alphabet

## Generates a Text for a hexadecimal number

Arguments:
- length: length of text to generate
hexadecimal : Integer->Text
hexadecimal length=1 =
alphabet = "0123456789ABCDEF".utf_16
this.string_value <| 0.up_to length . map _->alphabet

## Create a random Boolean value
boolean : Boolean
boolean =
if this.generator.nextDouble < 0.5 then True else False

## Create a random Integer value
integer : Integer->Integer->Integer
integer minimum=0 maximum=100 =
minimum + (this.generator.nextInt (maximum - minimum))

## Create a random Decimal value
decimal : Decimal->Decimal->Decimal
decimal minimum=0.0 maximum=1.0 =
minimum + this.generator.nextDouble * (maximum - minimum)

## Picks an item at Random from a list

Arguments:
- items: Vector of items to pick from
- generator: Random number generator
vector_item : Vector->Any
vector_item items =
items.at (this.generator.nextInt items.length)

## Randomly converts some values to Nothing
make_some_nothing : Any->Decimal->Any
make_some_nothing value (chance=0.1) =
if this.generator.nextDouble <= chance then Nothing else value
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
package org.enso.interpreter.node.expression.builtin.mutable;

import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.CachedContext;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.interop.InteropLibrary;
import com.oracle.truffle.api.interop.InvalidArrayIndexException;
import com.oracle.truffle.api.interop.UnsupportedMessageException;
import com.oracle.truffle.api.library.CachedLibrary;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.Language;
import org.enso.interpreter.dsl.BuiltinMethod;
import org.enso.interpreter.node.expression.builtin.interop.syntax.HostValueToEnsoNode;
import org.enso.interpreter.runtime.Context;
Expand All @@ -34,11 +32,10 @@ Object doArray(
long source_index,
Array dest,
long dest_index,
long count,
@CachedContext(Language.class) Context ctx) {
long count) {
System.arraycopy(
src.getItems(), (int) source_index, dest.getItems(), (int) dest_index, (int) count);
return ctx.getBuiltins().nothing().newInstance();
return Context.get(this).getBuiltins().nothing().newInstance();
}

@Specialization(guards = "arrays.hasArrayElements(src)")
Expand Down
6 changes: 4 additions & 2 deletions test/Benchmarks/src/Natural_Order_Sort.enso
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ main =

## No specific significance to this constant, just fixed to make generated set deterministic
fixed_random_seed = 1644575867
random_generator = Faker.make_generator fixed_random_seed
faker = Faker.new fixed_random_seed

unsorted = 0.up_to here.vector_size . map _->(Faker.make_string template random_generator)
IO.println <| "Creating unsorted vector"
unsorted = 0.up_to here.vector_size . map _->(faker.string_value template)

IO.println <| "Benchmarking..."
Bench.measure (unsorted.sort by=Natural_Order.compare) "Natural Order" here.iter_size here.num_iterations
30 changes: 8 additions & 22 deletions test/Benchmarks/src/Number_Parse.enso
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,6 @@ import Standard.Test.Faker

## Bench Utilities ============================================================

make_double_strings : Integer -> Any -> Integer -> Integer -> Vector
make_double_strings count generator (min = -1000000000) (max = 1000000000) =
range = max - min
output = Array.new count
0.up_to count . each i->
v = generator.nextDouble * range - min
output.set_at i v.to_text
Vector.Vector output

make_integer_strings : Integer -> Any -> Integer -> Integer -> Vector
make_integer_strings count generator (min = -1000000000) (max = 1000000000) =
range = max - min
output = Array.new count
0.up_to count . each i->
v = (generator.nextInt range - min)
output.set_at i v.to_text
Vector.Vector output

vector_size = 1000000
iter_size = 100
num_iterations = 10
Expand All @@ -32,10 +14,14 @@ num_iterations = 10
main =
## No specific significance to this constant, just fixed to make generated set deterministic
fixed_random_seed = 1644575867
random_generator = Faker.make_generator fixed_random_seed
faker = Faker.new fixed_random_seed

double_string = here.make_double_strings here.vector_size random_generator
Bench.measure (double_string.map Decimal.parse) "Decimal.parse" here.iter_size here.num_iterations
IO.println <| "Creating decimal strings"
decimal_strings = Vector.new here.vector_size _->(faker.decimal -1000000000 1000000000).to_text
IO.println <| "Benchmarking Decimal.parse"
Bench.measure (decimal_strings.map Decimal.parse) "Decimal.parse" here.iter_size here.num_iterations

int_strings = here.make_integer_strings here.vector_size random_generator
IO.println <| "Creating integer strings"
int_strings = Vector.new here.vector_size _->(faker.integer -1000000000 1000000000).to_text
IO.println <| "Benchmarking Integer.parse"
Bench.measure (int_strings.map Integer.parse) "Integer.parse" here.iter_size here.num_iterations
49 changes: 49 additions & 0 deletions test/Benchmarks/src/Table/Group_By.enso
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from Standard.Base import all

import Standard.Test.Bench
import Standard.Test.Faker

import Standard.Table.Data.Table
import Standard.Table.Data.Column_Selector
from Standard.Table.Data.Aggregate_Column import all

## Bench Utilities ============================================================

vector_size = 2500
iter_size = 100
num_iterations = 10

create_table : Integer->Integer->Table
create_table rows (seed=1646322139) =
faker = Faker.new seed
key1 = ["Code", 0.up_to rows . map _-> faker.alpha 3]
key2 = ["Index", 0.up_to rows . map _-> faker.integer 0 10]
key3 = ["Flag", 0.up_to rows . map _-> faker.boolean]
value1 = ["Value", 0.up_to rows . map _-> ((faker.decimal -100 100)*100000).floor/100000]
value2 = ["ValueWithNothing", 0.up_to rows . map _-> faker.make_some_nothing ((faker.decimal -100 100)*100).floor/100]
text1 = ["TextWithNothing", 0.up_to rows . map _-> faker.make_some_nothing (faker.alpha_numeric 10)]
text2 = ["Hexadecimal", 0.up_to rows . map _-> faker.make_some_nothing (faker.hexadecimal 8)]
Table.new [key1, key2, key3, value1, value2, text1, text2]

# The Benchmarks ==============================================================
main =
IO.println <| "Making table data..."
table = here.create_table here.vector_size

Bench.measure (table.group_by (Column_Selector.By_Index []) [Count Nothing]) "Count table" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Index []) [Count_Distinct "Index"]) "Count Distinct table" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Index []) [Standard_Deviation "Value"]) "StDev table" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Index []) [Median "Value"]) "Median table" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Index []) [Mode "Index"]) "Mode table" here.iter_size here.num_iterations

Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Count Nothing]) "Count grouped" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Count_Distinct "Code"]) "Count Distinct grouped" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Standard_Deviation "Value"]) "StDev grouped" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Median "Value"]) "Median grouped" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Mode "Index"]) "Mode grouped" here.iter_size here.num_iterations

Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Count Nothing]) "Count 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Count_Distinct "Code"]) "Count Distinct 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Standard_Deviation "Value"]) "StDev 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Median "Value"]) "Median 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Mode "Index"]) "Mode 2 level groups" here.iter_size here.num_iterations
Loading