Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Table API: contatenation, index access, column aggregation, API unification #1489

Merged
merged 5 commits into from
Feb 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ polyglot java import java.util.Locale as JavaLocale
# top ten countries by total (nominal, not per-capita) GDP.

## The default locale.

The default locale is a locale that does not specify any language, country,
or variant and is used as the language/country-neutral setting for locale
sensitive operations.
default : Locale
default = here.from_java JavaLocale.ROOT

Expand Down
36 changes: 36 additions & 0 deletions distribution/std-lib/Base/src/Data/Text/Extensions.enso
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ from Base import all
from Builtins import Text, Prim_Text_Helpers

import Base.Data.Text.Split_Kind
import Base.Data.Locale
import Base.Meta

from Builtins export Text

polyglot java import com.ibm.icu.text.BreakIterator
polyglot java import com.ibm.icu.lang.UCharacter
polyglot java import org.enso.base.Text_Utils

## Computes the number of characters in the text.
Expand Down Expand Up @@ -268,3 +270,37 @@ Text.take_last count =
iterator.last
boundary = iterator.next -count
if boundary == -1 then this else Text_Utils.drop_first this boundary

## Converts each character in `this` to lower case.

Arguments:
- locale: specifies the locale for charater case mapping. Defaults to the
`Locale.default` locale.

> Example
Converting a text to lower case in the default locale:
"My TeXt!".to_lower_case == "my text!"

> Example
Converting a text to lower case in a specified locale:
"I".to_lower_case (Locale.new "tr") == "ı"
Text.to_lower_case : Locale.Locale -> Text
Text.to_lower_case locale=Locale.default =
UCharacter.toLowerCase locale.java_locale this

## Converts each character in `this` to upper case.

Arguments:
- locale: specifies the locale for charater case mapping. Defaults to
`Locale.default`.

> Example
Converting a text to upper case in the default locale:
"My TeXt!".to_lower_case == "my text!"

> Example
Converting a text to upper case in a specified locale:
"i".to_upper_case (Locale.new "tr") . should_equal "İ"
Text.to_upper_case : Locale.Locale -> Text
Text.to_upper_case locale=Locale.default =
UCharacter.toUpperCase locale.java_locale this
2 changes: 1 addition & 1 deletion distribution/std-lib/Base/src/Data/Time/Time.enso
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from Base import all
import Base.Data.Time.Date
import Base.Data.Time.Duration
import Base.Data.Time.Locale
import Base.Data.Locale
import Base.Data.Time.Time_Of_Day
import Base.Data.Time.Zone

Expand Down
2 changes: 1 addition & 1 deletion distribution/std-lib/Base/src/Data/Time/Time_Of_Day.enso
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from Base import all
import Base.Data.Time.Date
import Base.Data.Time.Duration
import Base.Data.Time.Locale
import Base.Data.Locale
import Base.Data.Time.Time
import Base.Data.Time.Zone

Expand Down
74 changes: 74 additions & 0 deletions distribution/std-lib/Table/src/Data/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import Table.Data.Table
import Table.Data.Storage

polyglot java import org.enso.table.data.table.Column as Java_Column
polyglot java import org.enso.table.operations.OrderBuilder

type Column
type Column java_column
Expand Down Expand Up @@ -137,6 +138,11 @@ type Column
is_missing : Column
is_missing = here.run_vectorized_unary_op this "is_missing" (== Nothing)

## Returns a column of booleans, with `True` items at the positions where
this column does not contain a `Nothing`.
is_present : Column
is_present = this.is_missing.not

## Returns a new column where missing values have been replaced with the
provided default.
fill_missing : Any -> Column
Expand Down Expand Up @@ -214,6 +220,13 @@ type Column
count : Integer
count = this.length - this.count_missing

## Returns the index of this column, as a column (indexed by itself).
Returns `Nothing` if there is no index set.
index : Column | Nothing
index = case this.java_column.getIndex.toColumn of
Nothing -> Nothing
i -> Column i

## Returns the item contained in this column at the given index.
at : Integer -> Any
at index =
Expand Down Expand Up @@ -302,6 +315,67 @@ type Column
data = ['data', this.to_vector.take_start max_data]
Json.from_pairs [size, name, data] . to_text

## Sums the values in this column.
sum : Any
sum = this.java_column.aggregate 'sum' (x-> Vector.Vector x . reduce (+)) True

## Computes the maximum element of this column.
max : Any
max =
this.java_column.aggregate 'max' (x-> Vector.Vector x . reduce Math.max) True

## Computes the minimum element of this column.
min : Any
min =
this.java_column.aggregate 'min' (x-> Vector.Vector x . reduce Math.min) True

## Computes the mean of non-missing elements of this column.
mean : Any
mean =
vec_mean v = if v.length == 0 then Nothing else
(Vector.Vector v).reduce (+) / v.length
this.java_column.aggregate 'mean' vec_mean True

## UNSTABLE

Sorts the column according to the specified rules.

Arguments:
- order: specifies the default sort order for this operation.
- missing_last: specifies the default placement of missing values when
compared to non-missing ones. Note thet this argument is independent
from `order`, i.e. missing values will always be sorted according to
this rule, ignoring the ascending / descending setting.
- comparator: function taking two items in this column and returning
an ordering. If specified, it is used instead of the natural
(`.compare_to`) ordering.

> Example
Sorting `column` in ascending order.
column.sort

> Example
Sorting `column` in descending order, placing missing values at the
top of the resulting column.
table.sort order=Sort_Order.Descending missing_last=False

> Sorting `column` in ascending order, using a custom comparator
function.
manhattan_comparator a b = (a.x.abs + a.y.abs) . compare_to (b.x.abs + b.y.abs)
column.sort comparator=manhattan_comparator
sort : Sort_Order -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column
sort order=Sort_Order.Ascending missing_last=True comparator=Nothing =
order_bool = case order of
Sort_Order.Ascending -> True
Sort_Order.Descending -> False
java_cmp = case comparator of
Nothing -> Nothing
cmp -> Table.comparator_to_java cmp
rule = OrderBuilder.OrderRule.new this.java_column java_cmp order_bool missing_last
fallback_cmp = Table.comparator_to_java .compare_to
mask = OrderBuilder.buildOrderMask [rule].to_array fallback_cmp
new_col = this.java_column.applyMask mask
Column new_col

## Creates a new column given a name and a vector of elements.
from_vector : Text -> Vector -> Column
Expand Down
46 changes: 43 additions & 3 deletions distribution/std-lib/Table/src/Data/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,12 @@ type Table

## Sets the column value at the given name. If a column with the given name
already exists, it will be replaced. Otherwise a new column is added.
set : Text -> Column -> Table
set name column =
Table (this.java_table.addOrReplaceColumn (column.rename name . java_column))
set : Text -> Column.Column | Vector.Vector -> Table
set name column = case column of
Vector.Vector _ ->
this.set name (Column.from_vector name column)
Column.Column _ ->
Table (this.java_table.addOrReplaceColumn (column.rename name . java_column))

## Returns the vector of columns contained in this table.
columns : Vector
Expand All @@ -101,6 +104,13 @@ type Table
set_index index =
Table (this.java_table.indexFromColumn index)

## Returns the index of this table, as a column (indexed by itself).
Returns `Nothing` if there is no index set.
index : Column.Column | Nothing
index = case this.java_table.getIndex.toColumn of
Nothing -> Nothing
i -> Column.Column i

## Selects a subset of columns from this table by name.
select : Vector -> Table
select columns = Table (this.java_table.selectColumns columns.to_array)
Expand Down Expand Up @@ -285,6 +295,14 @@ type Table
c -> here.comparator_to_java c
OrderBuilder.OrderRule.new c java_cmp o nulls

## UNSTABLE

Concatenates `other` to `this`. Any column that is present in one table,
but missing in another, will be `Nothing`-padded in the positions
corresponding to the missing column.
concat other =
Table (this.java_table.concat other.java_table)

## PRIVATE
comparator_to_java cmp x y = cmp x y . to_sign

Expand Down Expand Up @@ -321,6 +339,28 @@ type Aggregate_Table
## PRIVATE
from_columns cols = Table (Java_Table.new cols.to_array)

## Creates a new table from a vector of column names and a vector of vectors
specifying row contents.

Arguments:
- header: a list of texts specifying the column names
- rows: a vector of vectors, specifying the contents of each table row.
the length of each element of `rows` must be equal in length to `header`.

> Example
The code below creates a table with 3 columns, named `foo`, `bar`, and
`baz`, containing `[1, 2, 3]`, `[True, False, True]`, and `['a', 'b', 'c']`,
respectively.
header = [ 'foo' , 'bar' , 'baz' ]
row_1 = [ 1 , True , 'a' ]
row_2 = [ 2 , False , 'b' ]
row_3 = [ 3 , True , 'c' ]
Table.from_rows header [row_1, row_2, row_3]
from_rows : Vector.Vector -> Vector.Vector -> Table
from_rows header rows =
columns = header.map_with_index i-> name-> [name, rows.map (_.at i)]
here.new columns

## Creates a new table from a vector of `[name, items]` pairs.

> Example
Expand Down
3 changes: 1 addition & 2 deletions distribution/std-lib/Table/src/Main.enso
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import Table.Data.Order_Rule

from Table.Io.Csv export all hiding Parser
export Table.Data.Column
from Table.Data.Table export new, join, No_Such_Column_Error, Table
from Table.Data.Table export new, from_rows, join, No_Such_Column_Error, Table
from Table.Data.Order_Rule export Order_Rule

## Converts a JSON array into a dataframe, by looking up the requested keys
Expand All @@ -29,4 +29,3 @@ Json.Array.to_table fields = case this of
cols = fields.map_with_index i-> n->
[n, rows.map (_.at i)]
Table.new cols

Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ public void append(Object o) {
size++;
}

@Override
public void appendNulls(int count) {
isNa.set(size, size + count);
size += count;
}

@Override
public Storage seal() {
return new BoolStorage(vals, isNa, size, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@ public abstract class Builder {
*/
public abstract void append(Object o);

/**
* Appends a specified number of missing values into the builder.
*
* <p>This operation should be equivalent to calling {@link #append(Object)} with {@code null} as
* an argument, {@code count} times, however it may be implemented more efficiently by the
* builder.
*
* @param count the number of missing values to append.
*/
public abstract void appendNulls(int count);

/** @return the number of appended elements */
public abstract int getCurrentSize();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ public void append(Object o) {
currentSize++;
}

@Override
public void appendNulls(int count) {
if (currentBuilder != null) {
currentBuilder.appendNulls(count);
}
currentSize += count;
}

private void initBuilderFor(Object o) {
if (o instanceof Boolean) {
currentBuilder = new BoolBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ public void append(Object o) {
}
}

@Override
public void appendNulls(int count) {
isMissing.set(currentSize, currentSize + count);
currentSize += count;
}

/**
* Append a new item in raw form to this builder.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ public void append(Object o) {
data[currentSize++] = o;
}

@Override
public void appendNulls(int count) {
currentSize += count;
}

@Override
public int getCurrentSize() {
return currentSize;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ public void append(Object o) {
data[currentSize++] = o;
}

@Override
public void appendNulls(int count) {
currentSize += count;
}

@Override
public int getCurrentSize() {
return currentSize;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

import org.enso.table.data.column.storage.Storage;

import java.util.List;
import java.util.stream.IntStream;

/**
* Represents a fold-like operation on a storage. An aggregator is usually created for a given
* storage, then {@link #nextGroup(List)} is repeatedly called and the aggregator is responsible for
* storage, then {@link #nextGroup(IntStream)} is repeatedly called and the aggregator is responsible for
* collecting the results of such calls. After that, {@link #seal()} is called to obtain a storage
* containing all the results.
*/
Expand All @@ -17,10 +17,10 @@ public abstract class Aggregator {
*
* @param positions the positions to aggregate in this round.
*/
public abstract void nextGroup(List<Integer> positions);
public abstract void nextGroup(IntStream positions);

/**
* Returns the results of all previous {@link #nextGroup(List)} calls.
* Returns the results of all previous {@link #nextGroup(IntStream)} calls.
*
* @return the storage containing all aggregation results.
*/
Expand Down
Loading