Skip to content

Commit

Permalink
Statistics on a Vector (#3442)
Browse files Browse the repository at this point in the history
- Implements various statistics on Vector

# Important Notes
Some minor codebase improvements:
- Some tweaks to Any/Nothing to improve performance
- Fixed bug in ObjectComparator
- Added if_nothing
- Removed Group_By_Key
  • Loading branch information
jdunkerley authored May 11, 2022
1 parent d8a2832 commit 4f3a768
Show file tree
Hide file tree
Showing 29 changed files with 653 additions and 116 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
- [Implemented a basic reader for the `Delimited` file format.][3424]
- [Implemented a reader for the `Excel` file format.][3425]
- [Added custom encoding support to the `Delimited` file format reader.][3430]
- [Implemented `compute` method on `Vector` for statistics calculations.][3442]

[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
Expand Down Expand Up @@ -181,6 +182,7 @@
[3424]: https://github.com/enso-org/enso/pull/3424
[3425]: https://github.com/enso-org/enso/pull/3425
[3430]: https://github.com/enso-org/enso/pull/3430
[3442]: https://github.com/enso-org/enso/pull/3442

#### Enso Compiler

Expand Down
22 changes: 17 additions & 5 deletions distribution/lib/Standard/Base/0.0.0-dev/src/Data/Any.enso
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,9 @@ type Any
a = 6 * 21
a >= 147
>= : Any -> Boolean
>= that = (this > that) || (this == that)
>= that =
ordering = this.compare_to that
(ordering == Ordering.Greater) || (ordering == Ordering.Equal)

## ALIAS Less Than

Expand Down Expand Up @@ -218,7 +220,9 @@ type Any
a = 7 * 21
a < 147
<= : Any -> Boolean
<= that = (this < that) || (this == that)
<= that =
ordering = this.compare_to that
(ordering == Ordering.Less) || (ordering == Ordering.Equal)

## Checks if the type is an instance of `Nothing`.

Expand All @@ -230,9 +234,17 @@ type Any

1.is_nothing
is_nothing : Boolean
is_nothing = case this of
Nothing -> True
_ -> False
is_nothing = False

## UNSTABLE
If this is Nothing then returns `function`.

> Example
If the value "Hello" is nothing return "".

"Hello".if_nothing ""
if_nothing : Any -> Any
if_nothing ~_ = this

## Executes the provided handler on an error, or returns a non-error value
unchanged.
Expand Down
4 changes: 2 additions & 2 deletions distribution/lib/Standard/Base/0.0.0-dev/src/Data/Locale.enso
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,8 @@ us = here.from_language_tag "en-US"
example_new = Locale.new "en" "GB" "UTF-8"
new : Text -> Text | Nothing -> Text | Nothing -> Locale
new language country=Nothing variant=Nothing =
country_text = if country.is_nothing then "" else country
variant_text = if variant.is_nothing then "" else variant
country_text = country.if_nothing ""
variant_text = variant.if_nothing ""
java_locale = JavaLocale.new language country_text variant_text
here.from_java java_locale

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from Standard.Base import all
from Standard.Base import Any, Ordering, Nothing, Vector

polyglot java import org.enso.base.ObjectComparator

## ADVANCED
Creates a Java Comparator object which can call En
Creates a Java Comparator object which can call Enso compare_to

Arguments:
- custom_comparator:
If `Nothing` will get a singleton instance for `.compare_to`.
Otherwise can support a custom fallback comparator.
new : Nothing | (Any->Any->Ordering)
new custom_comparator=Nothing =
comparator_to_java cmp x y = cmp x y . to_sign
comparator_to_java cmp x y = Vector.handle_incomparable_value (cmp x y . to_sign)

case custom_comparator of
Nothing -> ObjectComparator.getInstance (comparator_to_java .compare_to)
Expand Down
117 changes: 117 additions & 0 deletions distribution/lib/Standard/Base/0.0.0-dev/src/Data/Statistics.enso
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
from Standard.Base import Boolean, True, False, Nothing, Vector, Number, Any, Error, Array, Panic, Illegal_Argument_Error, Unsupported_Argument_Types
from Standard.Base.Data.Vector import Empty_Error

import Standard.Base.Data.Ordering.Comparator

polyglot java import org.enso.base.statistics.Moments
polyglot java import org.enso.base.statistics.CountMinMax

type Statistic
## PRIVATE
Convert the Enso Statistic into Java equivalent.
to_java : SingleValue
to_java = case this of
Sum -> Moments.SUM
Mean -> Moments.MEAN
Variance p -> if p then Moments.VARIANCE_POPULATION else Moments.VARIANCE
Standard_Deviation p -> if p then Moments.STANDARD_DEVIATION_POPULATION else Moments.STANDARD_DEVIATION
Skew p -> if p then Moments.SKEW_POPULATION else Moments.SKEW
Kurtosis -> Moments.KURTOSIS
_ -> Nothing

## Count the number of non-Nothing and non-NaN values.
type Count

## The minimum value.
type Minimum

## The maximum value.
type Maximum

## Sum the non-Nothing and non-NaN values.
type Sum

## The sample mean of the values.
type Mean

## The variance of the values.
Arguments:
- population: specifies if data is a sample or the population.
type Variance (population:Boolean=False)

## The standard deviation of the values.
Arguments:
- population: specifies if data is a sample or the population.
type Standard_Deviation (population:Boolean=False)

## The skewness of the values.
Arguments:
- population: specifies if data is a sample or the population.
type Skew (population:Boolean=False)

## The sample kurtosis of the values.
type Kurtosis

## Compute a single statistic on a vector like object.

Arguments:
- data: Vector like object which has a `to_array` method.
- statistic: Statistic to calculate.
compute : Vector -> Statistic -> Any
compute data statistic=Count =
here.compute_bulk data [statistic] . first


## Compute a set of statistics on a vector like object.

Arguments:
- data: Vector like object which has a `to_array` method.
- statistics: Set of statistics to calculate.
compute_bulk : Vector -> [Statistic] -> [Any]
compute_bulk data statistics=[Count, Sum] =

count_min_max = statistics.any s->((s.is_a Count) || (s.is_a Minimum) || (s.is_a Maximum))

java_stats = statistics.map .to_java
skip_java_stats = java_stats.all s->s.is_nothing
report_invalid _ =
statistics.map_with_index i->v->
if java_stats.at i . is_nothing then Nothing else
Error.throw (Illegal_Argument_Error ("Can only compute " + v.to_text + " on numerical data sets."))
handle_unsupported = Panic.catch Unsupported_Argument_Types handler=report_invalid

empty_map s = if (s == Count) || (s == Sum) then 0 else
if (s == Minimum) || (s == Maximum) then Error.throw Empty_Error else
Number.nan

if data.length == 0 then statistics.map empty_map else
count_min_max_values = if count_min_max then CountMinMax.new (CountMinMax.toObjectStream data.to_array) Comparator.new else Nothing
stats_array = if skip_java_stats then Nothing else
handle_unsupported <| Moments.compute data.to_array java_stats.to_array

statistics.map_with_index i->s->case s of
Count -> count_min_max_values.count
Minimum ->
if count_min_max_values.comparatorError then (Error.throw Vector.Incomparable_Values_Error) else
count_min_max_values.minimum
Maximum ->
if count_min_max_values.comparatorError then (Error.throw Vector.Incomparable_Values_Error) else
count_min_max_values.maximum
_ -> stats_array.at i

## Compute a single statistic on the vector.

Arguments:
- statistic: Statistic to calculate.
Vector.Vector.compute : Statistic -> Any
Vector.Vector.compute statistic=Count =
this.compute_bulk [statistic] . first


## Compute statistics on the vector.

Arguments:
- statistics: Set of statistics to calculate.
Vector.Vector.compute_bulk : [Statistic] -> [Any]
Vector.Vector.compute_bulk statistics=[Count, Sum] =
here.compute_bulk this statistics
15 changes: 8 additions & 7 deletions distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso
Original file line number Diff line number Diff line change
Expand Up @@ -960,7 +960,7 @@ type Vector

More details on the HashCode / HashMap ticket https://www.pivotaltracker.com/story/show/181027272.

recovered = Panic.recover Any
here.handle_incomparable_value <|
builder = here.new_builder
this.fold Map.empty existing->
item->
Expand All @@ -970,12 +970,6 @@ type Vector
existing.insert key True
builder.to_vector

recovered.map_error e-> case e of
No_Such_Method_Error _ _ -> Incomparable_Values_Error
Unsupported_Argument_Types _ -> Incomparable_Values_Error
Type_Error _ _ _ -> Incomparable_Values_Error
_ -> Panic.throw e


## UNSTABLE

Expand Down Expand Up @@ -1169,3 +1163,10 @@ type Partition_Accumulator true_builder false_builder ix
An error indicating that the vector contains incomparable types.

type Incomparable_Values_Error

## ADVANCED
Catches possible errors from comparing values and throws an
Incomparable_Values_Error if any occur.
handle_incomparable_value ~function =
handle t = Panic.catch t handler=(Error.throw Incomparable_Values_Error)
handle No_Such_Method_Error <| handle Type_Error <| handle Unsupported_Argument_Types <| function
33 changes: 28 additions & 5 deletions distribution/lib/Standard/Base/0.0.0-dev/src/Nothing.enso
Original file line number Diff line number Diff line change
@@ -1,7 +1,30 @@
## The type that has only a singleton value.
from Standard.Base import Boolean, True

It is often used alongside a value of type a to provide a Maybe or
Option abstraction. The type a | Nothing is semantically equivalent to
Maybe a.
@Builtin_Type
type Nothing
## The type that has only a singleton value. Nothing in Enso is used as an
universal value to indicate the lack of presence of a value.

It is often used alongside a value of type a to provide a Maybe or
Option abstraction. The type a | Nothing is semantically equivalent to
Maybe a.
@Builtin_Type
type Nothing

## Checks if the type is an instance of `Nothing`.

> Example
Checking if the value 1 is nothing.

1.is_nothing
is_nothing : Boolean
is_nothing = True

## UNSTABLE
If this is Nothing then returns `function`.

> Example
If the value "Hello" is nothing return "".

"Hello".if_nothing ""
if_nothing : Any -> Any
if_nothing ~function = function
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ type Column
defaults to the current type if not provided.
make_binary_op : Text -> Text -> (Column | Any) -> (Sql_Type | Nothing) -> (Sql_Type | Nothing) -> Column
make_binary_op op_kind operand new_type=Nothing operand_type=Nothing =
actual_new_type = if new_type.is_nothing then this.sql_type else new_type
actual_new_type = new_type.if_nothing this.sql_type
case operand of
Column _ _ _ other_expr _ ->
case Helpers.check_integrity this operand of
Expand All @@ -124,7 +124,7 @@ type Column
new_expr = IR.Operation op_kind [this.expression, other_expr]
Column this.name this.connection actual_new_type new_expr this.context
_ ->
actual_operand_type = if operand_type.is_nothing then this.sql_type else operand_type
actual_operand_type = operand_type.if_nothing this.sql_type
other = IR.make_constant actual_operand_type operand
new_expr = IR.Operation op_kind [this.expression, other]
Column this.name this.connection actual_new_type new_expr this.context
Expand All @@ -139,7 +139,7 @@ type Column
operator.
make_unary_op : Text -> Text -> (Sql_Type | Nothing) -> Column
make_unary_op op_kind new_type=Nothing =
actual_new_type = if new_type.is_nothing then this.sql_type else new_type
actual_new_type = new_type.if_nothing this.sql_type
new_expr = IR.Operation op_kind [this.expression]
Column this.name this.connection actual_new_type new_expr this.context

Expand Down Expand Up @@ -605,7 +605,7 @@ type Aggregate_Column_Builder
- new_type: The SQL type of the result column.
make_aggregate : Column -> Text -> Text -> Sql_Type -> Column
make_aggregate column operation name_suffix="_agg" new_type=Nothing =
actual_new_type = if new_type.is_nothing then column.sql_type else new_type
actual_new_type = new_type.if_nothing column.sql_type
expr = IR.Operation operation [column.expression]
case Helpers.ensure_name_is_sane name_suffix of
True ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -507,8 +507,8 @@ type Table
Text -> wrap_elem elem
Column _ _ _ _ _ -> wrap_elem elem
Order_Rule elem Nothing my_order my_nulls ->
chosen_order = if my_order.is_nothing then order else my_order
chosen_nulls = if my_nulls.is_nothing then missing_last else my_nulls
chosen_order = my_order.if_nothing order
chosen_nulls = my_nulls.if_nothing missing_last
[this.resolve elem . expression, order_to_ir chosen_order, missing_to_ir chosen_nulls]
Order_Rule _ _ _ _ ->
Error.throw <| Unsupported_Database_Operation_Error "Custom comparators are not supported in Database"
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Prob
import Standard.Table.Data.Column_Mapping
import Standard.Table.Data.Position

import Standard.Table.Data.Group_By_Key
import Standard.Table.Data.Aggregate_Column

polyglot java import org.enso.table.data.table.Table as Java_Table
Expand Down
Loading

0 comments on commit 4f3a768

Please sign in to comment.