Skip to content

Commit

Permalink
Break aggregation into classes for easier reading.
Browse files Browse the repository at this point in the history
  • Loading branch information
jdunkerley committed Mar 31, 2022
1 parent e8b801a commit 5c858c9
Show file tree
Hide file tree
Showing 18 changed files with 797 additions and 492 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package org.enso.table.aggregations;

import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

/***
* Interface used to define aggregate columns.
*/
public abstract class AggregateColumn {
private final String name;
private final int type;

protected AggregateColumn(String name, int type) {
this.name = name;
this.type = type;
}

/***
* @return Name of the new column.
*/
public final String getName() {
return name;
}

/***
* @return The type of the new column.
*/
public int getType() {
return type;
}

/***
* Compute the value for a set of rows
* @param rows - indexes to the rows in the source table to aggregate on
* @return aggregated value
*/
public Object aggregate(int[] rows) {
return this.aggregate(Arrays.stream(rows).boxed().collect(Collectors.toList()));
}

/***
* Compute the value for a set of rows
* @param rows - indexes to the rows in the source table to aggregate on
* @return aggregated value
*/
public abstract Object aggregate(List<Integer> rows);

protected static Long CastToLong(Object value) {
if (value instanceof Long) {
return (Long)value;
} else if (value instanceof Integer) {
return ((Integer)value).longValue();
} else if (value instanceof Byte) {
return ((Byte)value).longValue();
} else if (value instanceof Float && ((Float)value) % 1 == 0) {
return ((Float)value).longValue();
} else if (value instanceof Double && ((Double)value) % 1 == 0) {
return ((Double)value).longValue();
}

return null;
}

protected static Double CastToDouble(Object value) {
if (value instanceof Long) {
return ((Long)value).doubleValue();
} else if (value instanceof Integer) {
return ((Integer)value).doubleValue();
} else if (value instanceof Byte) {
return ((Byte)value).doubleValue();
} else if (value instanceof Float) {
return ((Float)value).doubleValue();
} else if (value instanceof Double) {
return ((Double)value);
}

return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package org.enso.table.aggregations;

import com.ibm.icu.text.BreakIterator;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.problems.InvalidAggregation;

import java.util.List;

public class Concatenate extends AggregateColumn {
private final Storage storage;
private final String join;
private final String prefix;
private final String suffix;
private final String quote;

public Concatenate(String name, Column column, String join, String prefix, String suffix, String quote) {
super(name, Storage.Type.STRING);
this.storage = column.getStorage();

this.join = join == null ? "" : join;
this.prefix = prefix;
this.suffix = suffix;
this.quote = quote == null ? "" : quote;
}

@Override
public Object aggregate(List<Integer> rows) {
StringBuilder current = null;
for (int row: rows) {
Object value = storage.getItemBoxed(row);
if (value == null || value instanceof String) {
String textValue = ToQuotedString(value, quote, join);
if (current == null) {
current = new StringBuilder();
current.append(textValue);
} else {
current.append(join);
current.append(textValue);
}
} else {
return new InvalidAggregation(this.getName(), row, "Non-Text value - cannot Concatenate");
}
}

if (current == null) {
return null;
}

if (prefix != null) { current.insert(0, prefix); }
current.append(suffix);
return current.toString();
}

private static String ToQuotedString(Object value, final String quote, final String join) {
if (value == null) {
return "";
}

String textValue = value.toString();
if (!quote.equals("") && (textValue.equals("") || textValue.contains(join))) {
return quote + textValue.replace(quote, quote + quote) + quote;
}

return textValue;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.enso.table.aggregations;

import org.enso.table.data.column.storage.Storage;

import java.util.List;

/***
* Aggregate Column counting the number of entries in a group.
*/
public class Count extends AggregateColumn {
public Count(String name) {
super(name, Storage.Type.LONG);
}

@Override
public Object aggregate(int[] rows) {
return rows.length;
}

@Override
public Object aggregate(List<Integer> rows) {
return rows.size();
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package org.enso.table.aggregations;

import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.index.MultiValueKey;
import org.enso.table.data.table.Column;

import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/***
* Aggregate Column counting the number of distinct items in a group.
*/
public class CountDistinct extends AggregateColumn {
private final Storage[] storage;
private final boolean ignoreEmpty;

public CountDistinct(String name, Column[] columns, boolean ignoreEmpty) {
super(name, Storage.Type.LONG);
this.storage = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new);
this.ignoreEmpty = ignoreEmpty;
}

@Override
public Object aggregate(List<Integer> rows) {
Set<MultiValueKey> set = new HashSet<>();
for (int row: rows) {
MultiValueKey key = new MultiValueKey(Arrays.stream(storage).map(s->s.getItemBoxed(row)).toArray());
if (!ignoreEmpty || !key.areAllNull()) {
set.add(key);
}
}
return set.size();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.enso.table.aggregations;

import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.problems.InvalidAggregation;

import java.util.List;

/***
* Aggregate Column counting the number of (not-)null entries in a group.
*/
public class CountEmpty extends AggregateColumn {
private final Storage storage;
private final boolean isEmpty;

public CountEmpty(String name, Column column, boolean isEmpty) {
super(name, Storage.Type.LONG);
this.storage = column.getStorage();
this.isEmpty = isEmpty;
}

@Override
public Object aggregate(List<Integer> rows) {
int count = 0;
for (int row : rows) {
Object value = storage.getItemBoxed(row);
if (value != null && !(value instanceof String)) {
return new InvalidAggregation(this.getName(), row, "Non-Text value - cannot Count " + (isEmpty ? "Empty" : "Non-Empty"));
}

count += ((value == null || ((String) value).length() == 0) == isEmpty ? 1 : 0);
}
return count;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.enso.table.aggregations;

import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;

import java.util.List;

/***
* Aggregate Column counting the number of (not-)null entries in a group.
*/
public class CountNothing extends AggregateColumn {
private final Storage storage;
private final boolean isNothing;

public CountNothing(String name, Column column, boolean isNothing) {
super(name, Storage.Type.LONG);
this.storage = column.getStorage();
this.isNothing = isNothing;
}

@Override
public Object aggregate(List<Integer> rows) {
long count = 0;
for (int row: rows) {
count += ((storage.getItemBoxed(row) == null) == isNothing ? 1 : 0);
}
return count;
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.enso.table.aggregations;

import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;

import java.util.List;

/***
* Aggregate Column finding the first value in a group.
*/
public class First extends AggregateColumn {
private final Storage storage;
private final boolean ignoreNothing;

public First(String name, Column column, boolean ignoreNothing) {
super(name, Storage.Type.OBJECT);
this.storage = column.getStorage();
this.ignoreNothing = ignoreNothing;
}

@Override
public Object aggregate(List<Integer> rows) {
for (int row: rows) {
Object value = storage.getItemBoxed(row);
if (!ignoreNothing || value != null) {
return value;
}
}
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.enso.table.aggregations;

import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;

import java.util.List;
import java.util.OptionalInt;
import java.util.stream.IntStream;

/***
* Aggregate Column getting the grouping key.
*/
public class GroupBy extends AggregateColumn {
private final Storage storage;

public GroupBy(String name, Column column) {
super(name, Storage.Type.OBJECT);
storage = column.getStorage();
}

@Override
public Object aggregate(List<Integer> rows) {
return rows.isEmpty() ? null : storage.getItemBoxed(rows.get(0));
}
}
29 changes: 29 additions & 0 deletions std-bits/table/src/main/java/org/enso/table/aggregations/Last.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package org.enso.table.aggregations;

import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;

import java.util.List;

public class Last extends AggregateColumn {
private final Storage storage;
private final boolean ignoreNothing;

public Last(String name, Column column, boolean ignoreNothing) {
super(name, Storage.Type.OBJECT);
this.storage = column.getStorage();
this.ignoreNothing = ignoreNothing;
}

@Override
public Object aggregate(List<Integer> rows) {
Object current = null;
for (int row: rows) {
Object value = storage.getItemBoxed(row);
if (!ignoreNothing || value != null) {
current = value;
}
}
return current;
}
}
Loading

0 comments on commit 5c858c9

Please sign in to comment.