-
Notifications
You must be signed in to change notification settings - Fork 323
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Break aggregation into classes for easier reading.
- Loading branch information
1 parent
e8b801a
commit 5c858c9
Showing
18 changed files
with
797 additions
and
492 deletions.
There are no files selected for viewing
80 changes: 80 additions & 0 deletions
80
std-bits/table/src/main/java/org/enso/table/aggregations/AggregateColumn.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
|
||
/*** | ||
* Interface used to define aggregate columns. | ||
*/ | ||
public abstract class AggregateColumn { | ||
private final String name; | ||
private final int type; | ||
|
||
protected AggregateColumn(String name, int type) { | ||
this.name = name; | ||
this.type = type; | ||
} | ||
|
||
/*** | ||
* @return Name of the new column. | ||
*/ | ||
public final String getName() { | ||
return name; | ||
} | ||
|
||
/*** | ||
* @return The type of the new column. | ||
*/ | ||
public int getType() { | ||
return type; | ||
} | ||
|
||
/*** | ||
* Compute the value for a set of rows | ||
* @param rows - indexes to the rows in the source table to aggregate on | ||
* @return aggregated value | ||
*/ | ||
public Object aggregate(int[] rows) { | ||
return this.aggregate(Arrays.stream(rows).boxed().collect(Collectors.toList())); | ||
} | ||
|
||
/*** | ||
* Compute the value for a set of rows | ||
* @param rows - indexes to the rows in the source table to aggregate on | ||
* @return aggregated value | ||
*/ | ||
public abstract Object aggregate(List<Integer> rows); | ||
|
||
protected static Long CastToLong(Object value) { | ||
if (value instanceof Long) { | ||
return (Long)value; | ||
} else if (value instanceof Integer) { | ||
return ((Integer)value).longValue(); | ||
} else if (value instanceof Byte) { | ||
return ((Byte)value).longValue(); | ||
} else if (value instanceof Float && ((Float)value) % 1 == 0) { | ||
return ((Float)value).longValue(); | ||
} else if (value instanceof Double && ((Double)value) % 1 == 0) { | ||
return ((Double)value).longValue(); | ||
} | ||
|
||
return null; | ||
} | ||
|
||
protected static Double CastToDouble(Object value) { | ||
if (value instanceof Long) { | ||
return ((Long)value).doubleValue(); | ||
} else if (value instanceof Integer) { | ||
return ((Integer)value).doubleValue(); | ||
} else if (value instanceof Byte) { | ||
return ((Byte)value).doubleValue(); | ||
} else if (value instanceof Float) { | ||
return ((Float)value).doubleValue(); | ||
} else if (value instanceof Double) { | ||
return ((Double)value); | ||
} | ||
|
||
return null; | ||
} | ||
} |
67 changes: 67 additions & 0 deletions
67
std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import com.ibm.icu.text.BreakIterator; | ||
import org.enso.table.data.column.storage.Storage; | ||
import org.enso.table.data.table.Column; | ||
import org.enso.table.data.table.problems.InvalidAggregation; | ||
|
||
import java.util.List; | ||
|
||
public class Concatenate extends AggregateColumn { | ||
private final Storage storage; | ||
private final String join; | ||
private final String prefix; | ||
private final String suffix; | ||
private final String quote; | ||
|
||
public Concatenate(String name, Column column, String join, String prefix, String suffix, String quote) { | ||
super(name, Storage.Type.STRING); | ||
this.storage = column.getStorage(); | ||
|
||
this.join = join == null ? "" : join; | ||
this.prefix = prefix; | ||
this.suffix = suffix; | ||
this.quote = quote == null ? "" : quote; | ||
} | ||
|
||
@Override | ||
public Object aggregate(List<Integer> rows) { | ||
StringBuilder current = null; | ||
for (int row: rows) { | ||
Object value = storage.getItemBoxed(row); | ||
if (value == null || value instanceof String) { | ||
String textValue = ToQuotedString(value, quote, join); | ||
if (current == null) { | ||
current = new StringBuilder(); | ||
current.append(textValue); | ||
} else { | ||
current.append(join); | ||
current.append(textValue); | ||
} | ||
} else { | ||
return new InvalidAggregation(this.getName(), row, "Non-Text value - cannot Concatenate"); | ||
} | ||
} | ||
|
||
if (current == null) { | ||
return null; | ||
} | ||
|
||
if (prefix != null) { current.insert(0, prefix); } | ||
current.append(suffix); | ||
return current.toString(); | ||
} | ||
|
||
private static String ToQuotedString(Object value, final String quote, final String join) { | ||
if (value == null) { | ||
return ""; | ||
} | ||
|
||
String textValue = value.toString(); | ||
if (!quote.equals("") && (textValue.equals("") || textValue.contains(join))) { | ||
return quote + textValue.replace(quote, quote + quote) + quote; | ||
} | ||
|
||
return textValue; | ||
} | ||
} |
25 changes: 25 additions & 0 deletions
25
std-bits/table/src/main/java/org/enso/table/aggregations/Count.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import org.enso.table.data.column.storage.Storage; | ||
|
||
import java.util.List; | ||
|
||
/*** | ||
* Aggregate Column counting the number of entries in a group. | ||
*/ | ||
public class Count extends AggregateColumn { | ||
public Count(String name) { | ||
super(name, Storage.Type.LONG); | ||
} | ||
|
||
@Override | ||
public Object aggregate(int[] rows) { | ||
return rows.length; | ||
} | ||
|
||
@Override | ||
public Object aggregate(List<Integer> rows) { | ||
return rows.size(); | ||
} | ||
} | ||
|
36 changes: 36 additions & 0 deletions
36
std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import org.enso.table.data.column.storage.Storage; | ||
import org.enso.table.data.index.MultiValueKey; | ||
import org.enso.table.data.table.Column; | ||
|
||
import java.util.Arrays; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Set; | ||
|
||
/*** | ||
* Aggregate Column counting the number of distinct items in a group. | ||
*/ | ||
public class CountDistinct extends AggregateColumn { | ||
private final Storage[] storage; | ||
private final boolean ignoreEmpty; | ||
|
||
public CountDistinct(String name, Column[] columns, boolean ignoreEmpty) { | ||
super(name, Storage.Type.LONG); | ||
this.storage = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new); | ||
this.ignoreEmpty = ignoreEmpty; | ||
} | ||
|
||
@Override | ||
public Object aggregate(List<Integer> rows) { | ||
Set<MultiValueKey> set = new HashSet<>(); | ||
for (int row: rows) { | ||
MultiValueKey key = new MultiValueKey(Arrays.stream(storage).map(s->s.getItemBoxed(row)).toArray()); | ||
if (!ignoreEmpty || !key.areAllNull()) { | ||
set.add(key); | ||
} | ||
} | ||
return set.size(); | ||
} | ||
} |
35 changes: 35 additions & 0 deletions
35
std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import org.enso.table.data.column.storage.Storage; | ||
import org.enso.table.data.table.Column; | ||
import org.enso.table.data.table.problems.InvalidAggregation; | ||
|
||
import java.util.List; | ||
|
||
/*** | ||
* Aggregate Column counting the number of (not-)null entries in a group. | ||
*/ | ||
public class CountEmpty extends AggregateColumn { | ||
private final Storage storage; | ||
private final boolean isEmpty; | ||
|
||
public CountEmpty(String name, Column column, boolean isEmpty) { | ||
super(name, Storage.Type.LONG); | ||
this.storage = column.getStorage(); | ||
this.isEmpty = isEmpty; | ||
} | ||
|
||
@Override | ||
public Object aggregate(List<Integer> rows) { | ||
int count = 0; | ||
for (int row : rows) { | ||
Object value = storage.getItemBoxed(row); | ||
if (value != null && !(value instanceof String)) { | ||
return new InvalidAggregation(this.getName(), row, "Non-Text value - cannot Count " + (isEmpty ? "Empty" : "Non-Empty")); | ||
} | ||
|
||
count += ((value == null || ((String) value).length() == 0) == isEmpty ? 1 : 0); | ||
} | ||
return count; | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import org.enso.table.data.column.storage.Storage; | ||
import org.enso.table.data.table.Column; | ||
|
||
import java.util.List; | ||
|
||
/*** | ||
* Aggregate Column counting the number of (not-)null entries in a group. | ||
*/ | ||
public class CountNothing extends AggregateColumn { | ||
private final Storage storage; | ||
private final boolean isNothing; | ||
|
||
public CountNothing(String name, Column column, boolean isNothing) { | ||
super(name, Storage.Type.LONG); | ||
this.storage = column.getStorage(); | ||
this.isNothing = isNothing; | ||
} | ||
|
||
@Override | ||
public Object aggregate(List<Integer> rows) { | ||
long count = 0; | ||
for (int row: rows) { | ||
count += ((storage.getItemBoxed(row) == null) == isNothing ? 1 : 0); | ||
} | ||
return count; | ||
} | ||
} | ||
|
31 changes: 31 additions & 0 deletions
31
std-bits/table/src/main/java/org/enso/table/aggregations/First.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import org.enso.table.data.column.storage.Storage; | ||
import org.enso.table.data.table.Column; | ||
|
||
import java.util.List; | ||
|
||
/*** | ||
* Aggregate Column finding the first value in a group. | ||
*/ | ||
public class First extends AggregateColumn { | ||
private final Storage storage; | ||
private final boolean ignoreNothing; | ||
|
||
public First(String name, Column column, boolean ignoreNothing) { | ||
super(name, Storage.Type.OBJECT); | ||
this.storage = column.getStorage(); | ||
this.ignoreNothing = ignoreNothing; | ||
} | ||
|
||
@Override | ||
public Object aggregate(List<Integer> rows) { | ||
for (int row: rows) { | ||
Object value = storage.getItemBoxed(row); | ||
if (!ignoreNothing || value != null) { | ||
return value; | ||
} | ||
} | ||
return null; | ||
} | ||
} |
25 changes: 25 additions & 0 deletions
25
std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import org.enso.table.data.column.storage.Storage; | ||
import org.enso.table.data.table.Column; | ||
|
||
import java.util.List; | ||
import java.util.OptionalInt; | ||
import java.util.stream.IntStream; | ||
|
||
/*** | ||
* Aggregate Column getting the grouping key. | ||
*/ | ||
public class GroupBy extends AggregateColumn { | ||
private final Storage storage; | ||
|
||
public GroupBy(String name, Column column) { | ||
super(name, Storage.Type.OBJECT); | ||
storage = column.getStorage(); | ||
} | ||
|
||
@Override | ||
public Object aggregate(List<Integer> rows) { | ||
return rows.isEmpty() ? null : storage.getItemBoxed(rows.get(0)); | ||
} | ||
} |
29 changes: 29 additions & 0 deletions
29
std-bits/table/src/main/java/org/enso/table/aggregations/Last.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import org.enso.table.data.column.storage.Storage; | ||
import org.enso.table.data.table.Column; | ||
|
||
import java.util.List; | ||
|
||
public class Last extends AggregateColumn { | ||
private final Storage storage; | ||
private final boolean ignoreNothing; | ||
|
||
public Last(String name, Column column, boolean ignoreNothing) { | ||
super(name, Storage.Type.OBJECT); | ||
this.storage = column.getStorage(); | ||
this.ignoreNothing = ignoreNothing; | ||
} | ||
|
||
@Override | ||
public Object aggregate(List<Integer> rows) { | ||
Object current = null; | ||
for (int row: rows) { | ||
Object value = storage.getItemBoxed(row); | ||
if (!ignoreNothing || value != null) { | ||
current = value; | ||
} | ||
} | ||
return current; | ||
} | ||
} |
Oops, something went wrong.