Skip to content

Commit

Permalink
Stdlib Improvements (#1963)
Browse files Browse the repository at this point in the history
  • Loading branch information
kustosz authored Aug 19, 2021
1 parent 32261a1 commit 4536ed9
Show file tree
Hide file tree
Showing 16 changed files with 241 additions and 56 deletions.
3 changes: 3 additions & 0 deletions RELEASES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
filter them using glob patterns
([#1961](https://github.com/enso-org/enso/pull/1961)).

- Implement a host of missing helpers in the Table library
([#1963](https://github.com/enso-org/enso/pull/1963)).

# Enso 0.2.25 (2021-08-18)

## Libraries
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ Text.not_empty = this.is_empty.not

"Hello".utf_8
Text.utf_8 : Vector.Vector Byte
Text.utf_8 = Vector.from_array (Text_Utils.get_bytes this)
Text.utf_8 = Vector.Vector (Text_Utils.get_bytes this)

## Takes a vector of bytes and returns Text resulting from decoding it as UTF-8.

Expand Down Expand Up @@ -280,7 +280,7 @@ Text.from_utf_8 bytes = Text_Utils.from_utf_8 bytes.to_array

"Hello".codepoints
Text.codepoints : Vector.Vector Integer
Text.codepoints = Vector.from_array (Text_Utils.get_codepoints this)
Text.codepoints = Vector.Vector (Text_Utils.get_codepoints this)

## Takes an array of numbers and returns the text resulting from interpreting it
as a sequence of Unicode codepoints.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import Standard.Base.Data.Time.Zone
polyglot java import java.time.format.DateTimeFormatter
polyglot java import java.time.Instant
polyglot java import java.time.LocalDate
polyglot java import java.time.temporal.WeekFields
polyglot java import org.enso.base.Time_Utils

## Obtains the current date from the system clock in the system timezone.
Expand Down Expand Up @@ -184,6 +185,24 @@ type Date
day : Integer
day = this . internal_local_date . getDayOfMonth

## Returns the number of week of year this date falls into.

Arguments:
- locale: the locale used to define the notion of weeks of year.

! Locale Dependency
Note that this operation is locale-specific. It varies both by the
local definition of the first day of week and the definition of the
first week of year. For example, in the US, the first day of the week
is Sunday and week 1 is the week containing January 1. In the UK on the
other hand, the first day of the week is Monday, and week 1 is the week
containing the first Thursday of the year. Therefore it is important to
properly specify the `locale` argument.
week_of_year : Locale.Locale -> Integer
week_of_year locale=Locale.default =
field = WeekFields.of locale.java_locale . weekOfYear
this.internal_local_date.get field

## ALIAS Date to Time

Combine this date with time of day to create a point in time.
Expand Down
2 changes: 2 additions & 0 deletions distribution/lib/Standard/Base/0.2.26-SNAPSHOT/src/Main.enso
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import Standard.Base.Data.Array.Extensions
import Standard.Base.Data.Interval
import Standard.Base.Data.Json
import Standard.Base.Data.List
import Standard.Base.Data.Locale
import Standard.Base.Data.Map
import Standard.Base.Data.Maybe
import Standard.Base.Data.Noise
Expand All @@ -25,6 +26,7 @@ from Standard.Builtins import Nothing, Number, Integer, Any, True, False, Cons,

export Standard.Base.Data.Interval
export Standard.Base.Data.Json
export Standard.Base.Data.Locale
export Standard.Base.Data.Map
export Standard.Base.Data.Maybe
export Standard.Base.Data.Ordering
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -455,20 +455,27 @@ type Column
provided default.

Arguments:
- default: The value to replace missing values with.
- default: The value to replace missing values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.

> Example
Fill missing values in a column with the value 20.5.

import Standard.Examples

example_fill_missing = Examples.decimal_column.fill_missing 20.5
fill_missing : Any -> Column
fill_missing : Column | Any -> Column
fill_missing default =
storage = this.java_column.getStorage
index = this.java_column.getIndex
name = this.java_column.getName
new_st = storage.fillMissing default
new_st = case default of
Column java_col ->
other_storage = java_col.getStorage
storage.fillMissingFrom other_storage
_ ->
storage.fillMissing default
col = Java_Column.new name index new_st
Column col

Expand Down Expand Up @@ -595,6 +602,11 @@ type Column
- that: The column to zip with `this`.
- function: A binary function that is applied to corresponding pairs of
elements of `this` and `that` to produce a value.
- skip_missing: controls whether missing values should be passed to the
`function`. The default value of `True` will skip the rows for which
the value in either column is missing and automatically append
`Nothing` to the result table. If set to `False`, every pair of values
is passed to `function`.

> Example
Zip two columns together as pairs.
Expand All @@ -603,12 +615,12 @@ type Column

example_zip =
Examples.integer_column.zip Examples.text_column_1 [_, _]
zip : Column -> (Any -> Any -> Any) -> Column
zip that function =
zip : Column -> (Any -> Any -> Any) -> Boolean -> Column
zip that function skip_missing=True =
s1 = this.java_column.getStorage
ix = this.java_column.getIndex
s2 = that.java_column.getStorage
rs = s1.zip Nothing function s2
rs = s1.zip Nothing function s2 skip_missing
Column (Java_Column.new "Result" ix rs)

## ALIAS Rename Column
Expand Down Expand Up @@ -1047,6 +1059,20 @@ type Column
mask = OrderBuilder.buildReversedMask this.length
Column <| this.java_column.applyMask mask

## UNSTABLE

Returns a column of numbers, in which every entry denotes how many times
the value at the given position occured before.

> Example
Count duplicate occurences of values in a column.

import Standard.Examples

example_duplicate_count = Examples.integer_column.duplicate_count
duplicate_count : Column
duplicate_count = Column this.java_column.duplicateCount

## Wraps a column grouped by its index. Allows performing aggregation operations
on the contained values.
type Aggregate_Column
Expand Down Expand Up @@ -1245,7 +1271,7 @@ run_vectorized_binary_op column name fallback_fn operand = case operand of
s1 = column.java_column.getStorage
ix = column.java_column.getIndex
s2 = col2.getStorage
rs = s1.zip name fallback_fn s2
rs = s1.zip name fallback_fn s2 True
Column (Java_Column.new "Result" ix rs)
_ ->
s1 = column.java_column.getStorage
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,30 @@ join : Vector -> Table
join tables =
tables.reduce .join

## UNSTABLE

Concatenates multiple tables, resulting in a table with the number of rows
being the sum of numbers of rows of `tables`. Any column that is present in
some tables, but missing in others, will be `Nothing`-padded in the positions
corresponding to the missing values.

Arguments:
- tables: the tables to concatenate.

> Example
Concatenate multiple tables together.

import Standard.Examples
import Standard.Table

example_concat =
table_1 = Examples.inventory_table
table_2 = Examples.popularity_table
Table.concat [table_1, table_2]
concat : Vector -> Table
concat tables =
Table (Java_Table.concat (tables.map .java_table).to_array)

## Represents a column-oriented table data structure.
type Table

Expand Down Expand Up @@ -623,8 +647,7 @@ type Table
example_concat =
Examples.inventory_table.concat Examples.popularity_table
concat : Table -> Table
concat other =
Table (this.java_table.concat other.java_table)
concat other = Table (Java_Table.concat [this.java_table, other.java_table].to_array)

## ALIAS First N Rows
UNSTABLE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export Standard.Table.Io.Format
export Standard.Table.Io.Spreadsheet_Write_Mode
export Standard.Table.Data.Column

from Standard.Table.Data.Table export new, from_rows, join, No_Such_Column_Error, Table
from Standard.Table.Data.Table export new, from_rows, join, concat, No_Such_Column_Error, Table
from Standard.Table.Data.Order_Rule export Order_Rule

## ALIAS To Table
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.enso.interpreter.node.expression.builtin.error.displaytext;

import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.nodes.Node;
Expand All @@ -8,9 +9,13 @@
import org.enso.interpreter.node.expression.builtin.text.util.TypeToDisplayTextNode;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.callable.atom.AtomConstructor;
import org.enso.interpreter.runtime.data.Array;
import org.enso.interpreter.runtime.data.text.Text;
import org.enso.interpreter.runtime.type.TypesGen;

import java.util.Arrays;
import java.util.stream.Collectors;

@BuiltinMethod(type = "Unsupported_Argument_Types", name = "to_display_text")
public abstract class UnsupportedArgumentTypesToDisplayTextNode extends Node {
static UnsupportedArgumentTypesToDisplayTextNode build() {
Expand All @@ -20,9 +25,20 @@ static UnsupportedArgumentTypesToDisplayTextNode build() {
abstract Text execute(Object _this);

@Specialization
@CompilerDirectives.TruffleBoundary
Text doAtom(Atom _this, @Cached TypeToDisplayTextNode displayTypeNode) {
return Text.create("Unsupported argument types: ")
.add(displayTypeNode.execute(_this.getFields()[0]));
Object args = _this.getFields()[0];
String argsRep;
if (args instanceof Array) {
Object[] arguments = ((Array) args).getItems();
argsRep =
Arrays.stream(arguments)
.map(displayTypeNode::execute)
.collect(Collectors.joining(", ", "[", "]"));
} else {
argsRep = displayTypeNode.execute(args);
}
return Text.create("Unsupported argument types: ").add(argsRep);
}

@Specialization
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,19 +176,21 @@ public final Storage map(String name, Function<Object, Object> function) {
* Runs a function on each pair of non-missing elements in this and arg.
*
* @param name a name of potential vectorized variant of the function that should be used if
* supported. If this argument is null, the vectorized operation will never be used. *
* supported. If this argument is null, the vectorized operation will never be used.
* @param function the function to run.
* @param skipNa whether rows containing missing values should be passed to the function.
* @return the result of running the function on all non-missing elements.
*/
public final Storage zip(String name, BiFunction<Object, Object, Object> function, Storage arg) {
public final Storage zip(
String name, BiFunction<Object, Object, Object> function, Storage arg, boolean skipNa) {
if (name != null && isOpVectorized(name)) {
return runVectorizedZip(name, arg);
}
Builder builder = new InferredBuilder(size());
for (int i = 0; i < size(); i++) {
Object it1 = getItemBoxed(i);
Object it2 = i < arg.size() ? arg.getItemBoxed(i) : null;
if (it1 == null || it2 == null) {
if (skipNa && (it1 == null || it2 == null)) {
builder.appendNoGrow(null);
} else {
builder.appendNoGrow(function.apply(it1, it2));
Expand All @@ -207,6 +209,24 @@ public Storage fillMissing(Object arg) {
return fillMissingHelper(arg, new ObjectBuilder(size()));
}

/**
* Fills missing values in this storage, by using corresponding values from {@code other}.
*
* @param other the source of default values
* @return a new storage with missing values filled
*/
public Storage fillMissingFrom(Storage other) {
var builder = new InferredBuilder(size());
for (int i = 0; i < size(); i++) {
if (isNa(i)) {
builder.appendNoGrow(other.getItemBoxed(i));
} else {
builder.appendNoGrow(getItemBoxed(i));
}
}
return builder.seal();
}

protected final Storage fillMissingHelper(Object arg, Builder builder) {
for (int i = 0; i < size(); i++) {
Object it = getItemBoxed(i);
Expand Down Expand Up @@ -261,6 +281,23 @@ public List<Object> toList() {
return new StorageListView(this);
}

/**
* Counts the number of times each value has been seen before in this storage.
*
* @return a storage counting the number of times each value in this one has been seen before.
*/
public Storage duplicateCount() {
long[] data = new long[size()];
HashMap<Object, Integer> occurenceCount = new HashMap<>();
for (int i = 0; i < size(); i++) {
var value = getItemBoxed(i);
var count = occurenceCount.getOrDefault(value, 0);
data[i] = count;
occurenceCount.put(value, count + 1);
}
return new LongStorage(data);
}

/**
* Gets an element at the specified index and converts it to a CSV representation.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.enso.table.data.index;

import org.enso.table.data.column.storage.LongStorage;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.table.Column;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.enso.table.data.index;

import org.enso.table.data.column.storage.LongStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.table.Column;
Expand Down Expand Up @@ -79,9 +80,10 @@ public Index applyMask(OrderMask mask) {
public Index unique() {
HashMap<Object, List<Integer>> newLocs = new HashMap<>();
BitSet mask = new BitSet();
int uniqPos = 0;
for (int i = 0; i < items.size(); i++) {
if (!newLocs.containsKey(items.getItemBoxed(i))) {
newLocs.put(items.getItemBoxed(i), Collections.singletonList(i));
newLocs.put(items.getItemBoxed(i), Collections.singletonList(uniqPos++));
mask.set(i);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,9 @@ public Column applyMask(OrderMask mask) {
public Column slice(int offset, int limit) {
return new Column(name, index.slice(offset, limit), storage.slice(offset, limit));
}

/** @return a column counting value repetitions in this column. */
public Column duplicateCount() {
return new Column(name + "_duplicate_count", index, storage.duplicateCount());
}
}
Loading

0 comments on commit 4536ed9

Please sign in to comment.