From 222725f280a35eca3ba7fae03bbc7e80e558a672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 5 Mar 2021 18:11:16 +0100 Subject: [PATCH] Make builders growable --- .../column/builder/object/BoolBuilder.java | 24 +++++- .../data/column/builder/object/Builder.java | 18 ++++- .../builder/object/InferredBuilder.java | 80 ++++++++++++++++--- .../column/builder/object/NumericBuilder.java | 63 +++++++++++++-- .../column/builder/object/ObjectBuilder.java | 36 +++++++-- .../column/builder/object/StringBuilder.java | 34 ++++++-- .../aggregate/FunctionAggregator.java | 2 +- .../data/column/storage/DoubleStorage.java | 4 +- .../data/column/storage/LongStorage.java | 8 +- .../table/data/column/storage/Storage.java | 16 ++-- .../org/enso/table/data/table/Column.java | 2 +- .../java/org/enso/table/data/table/Table.java | 11 ++- 12 files changed, 243 insertions(+), 55 deletions(-) diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java b/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java index baf09952972ee..7197246089469 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java @@ -11,7 +11,7 @@ public class BoolBuilder extends TypedBuilder { int size = 0; @Override - public void append(Object o) { + public void appendNoGrow(Object o) { if (o == null) { isNa.set(size); } else { @@ -22,6 +22,23 @@ public void append(Object o) { size++; } + @Override + public void append(Object o) { + appendNoGrow(o); + } + + /** + * Append a new boolean to this builder. + * + * @param data the boolean to append + */ + public void appendBoolean(boolean data) { + if (data) { + vals.set(size); + } + size++; + } + @Override public void appendNulls(int count) { isNa.set(size, size + count); @@ -38,6 +55,11 @@ public int getCurrentSize() { return size; } + @Override + public int getCurrentCapacity() { + return vals.size(); + } + @Override public void writeTo(Object[] items) { for (int i = 0; i < size; i++) { diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java b/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java index 90a7ac45d59a5..1b7f086ad6ee6 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java @@ -5,7 +5,17 @@ /** A builder for creating columns dynamically. */ public abstract class Builder { /** - * Append a new item to this builder. + * Append a new item to this builder, assuming that it has enough allocated space. + * + *

This function should only be used when it is guaranteed that the builder has enough + * capacity, for example if it was initialized with an initial capacity known up-front. + * + * @param o the item to append + */ + public abstract void appendNoGrow(Object o); + + /** + * Append a new item to this builder, increasing the capacity if necessary. * * @param o the item to append */ @@ -25,6 +35,12 @@ public abstract class Builder { /** @return the number of appended elements */ public abstract int getCurrentSize(); + /** + * @return how many elements this builder can hold without growing (including already existing + * elements) + */ + public abstract int getCurrentCapacity(); + /** @return a storage containing all the items appended so far */ public abstract Storage seal(); } diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java b/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java index e5946e4d9fb61..26f091d749852 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java @@ -8,15 +8,67 @@ public class InferredBuilder extends Builder { private TypedBuilder currentBuilder = null; private int currentSize = 0; - private final int size; + private final int initialSize; /** * Creates a new instance of this builder, with the given known result size. * - * @param size the result size + * @param initialSize the result size */ - public InferredBuilder(int size) { - this.size = size; + public InferredBuilder(int initialSize) { + this.initialSize = initialSize; + } + + @Override + public void appendNoGrow(Object o) { + if (currentBuilder == null) { + if (o == null) { + currentSize++; + return; + } else { + initBuilderFor(o); + } + } + if (o == null) { + currentBuilder.appendNoGrow(o); + } else { + switch (currentBuilder.getType()) { + case Storage.Type.BOOL: + if (o instanceof Boolean) { + currentBuilder.appendNoGrow(o); + } else { + retypeAndAppend(o); + } + break; + case Storage.Type.LONG: + if (o instanceof Long) { + currentBuilder.appendNoGrow(o); + } else { + retypeAndAppend(o); + } + break; + case Storage.Type.DOUBLE: + if (o instanceof Double) { + currentBuilder.appendNoGrow(o); + } else if (o instanceof Long) { + currentBuilder.appendNoGrow(((Long) o).doubleValue()); + } else { + retypeAndAppend(o); + } + break; + case Storage.Type.STRING: + if (o instanceof String) { + currentBuilder.appendNoGrow(o); + } else { + retypeAndAppend(o); + } + break; + case Storage.Type.OBJECT: + currentBuilder.appendNoGrow(o); + break; + } + } + currentSize++; } @Override @@ -80,20 +132,19 @@ public void appendNulls(int count) { } private void initBuilderFor(Object o) { + int initialCapacity = Math.max(initialSize, currentSize); if (o instanceof Boolean) { currentBuilder = new BoolBuilder(); } else if (o instanceof Double) { - currentBuilder = NumericBuilder.createDoubleBuilder(size); + currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity); } else if (o instanceof Long) { - currentBuilder = NumericBuilder.createLongBuilder(size); + currentBuilder = NumericBuilder.createLongBuilder(initialCapacity); } else if (o instanceof String) { - currentBuilder = new StringBuilder(size); + currentBuilder = new StringBuilder(initialCapacity); } else { - currentBuilder = new ObjectBuilder(size); - } - for (int i = 0; i < currentSize; i++) { - currentBuilder.append(null); + currentBuilder = new ObjectBuilder(initialCapacity); } + currentBuilder.appendNulls(currentSize); } private void retypeAndAppend(Object o) { @@ -114,7 +165,7 @@ private void retypeAndAppend(Object o) { } private void retypeToObject() { - ObjectBuilder objectBuilder = new ObjectBuilder(size); + ObjectBuilder objectBuilder = new ObjectBuilder(initialSize); currentBuilder.writeTo(objectBuilder.getData()); objectBuilder.setCurrentSize(currentBuilder.getCurrentSize()); currentBuilder = objectBuilder; @@ -125,6 +176,11 @@ public int getCurrentSize() { return currentSize; } + @Override + public int getCurrentCapacity() { + return 0; + } + @Override public Storage seal() { if (currentBuilder == null) { diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java b/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java index 9e1b7e90cafb1..df9fae1dc8e2b 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java @@ -1,5 +1,6 @@ package org.enso.table.data.column.builder.object; +import java.util.Arrays; import java.util.BitSet; import org.enso.table.data.column.storage.DoubleStorage; import org.enso.table.data.column.storage.LongStorage; @@ -7,14 +8,12 @@ /** A builder for numeric columns. */ public class NumericBuilder extends TypedBuilder { - private final int size; private final BitSet isMissing = new BitSet(); - private final long[] data; + private long[] data; private boolean isDouble; private int currentSize; private NumericBuilder(boolean isDouble, int size) { - this.size = size; this.data = new long[size]; this.isDouble = isDouble; } @@ -64,7 +63,7 @@ public int getType() { } @Override - public void append(Object o) { + public void appendNoGrow(Object o) { if (o == null) { isMissing.set(currentSize++); } else if (isDouble && o instanceof Double) { @@ -76,6 +75,14 @@ public void append(Object o) { } } + @Override + public void append(Object o) { + if (currentSize + 1 > data.length) { + grow(); + } + appendNoGrow(o); + } + @Override public void appendNulls(int count) { isMissing.set(currentSize, currentSize + count); @@ -83,26 +90,66 @@ public void appendNulls(int count) { } /** - * Append a new item in raw form to this builder. + * Append a new item in raw form to this builder, assuming that it has enough allocated space. + * + *

This function should only be used when it is guaranteed that the builder has enough + * capacity, for example if it was initialized with an initial capacity known up-front. * * @param rawData the raw encoding of the item, for long numbers just the number and for doubles, * its long bytes */ - public void appendRaw(long rawData) { + public void appendRawNoGrow(long rawData) { data[currentSize++] = rawData; } + /** + * Append a new integer to this builder. + * + * @param data the integer to append + */ + public void appendLong(long data) { + if (currentSize + 1 > this.data.length) { + grow(); + } + appendRawNoGrow(data); + } + + /** + * Append a new double to this builder. + * + * @param data the double to append + */ + public void appendDouble(double data) { + if (currentSize + 1 > this.data.length) { + grow(); + } + appendRawNoGrow(Double.doubleToRawLongBits(data)); + } + @Override public int getCurrentSize() { return currentSize; } + @Override + public int getCurrentCapacity() { + return data.length; + } + @Override public Storage seal() { if (isDouble) { - return new DoubleStorage(data, size, isMissing); + return new DoubleStorage(data, currentSize, isMissing); } else { - return new LongStorage(data, size, isMissing); + return new LongStorage(data, currentSize, isMissing); + } + } + + private void grow() { + int desiredCapacity = 3; + if (data.length > 1) { + desiredCapacity = (data.length * 3 / 2); } + this.data = Arrays.copyOf(data, desiredCapacity); } } diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java b/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java index 7b01989c719ff..f35b781faa083 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java @@ -1,22 +1,20 @@ package org.enso.table.data.column.builder.object; +import java.util.Arrays; import org.enso.table.data.column.storage.ObjectStorage; import org.enso.table.data.column.storage.Storage; /** A builder for boxed object columns. */ public class ObjectBuilder extends TypedBuilder { - private final Object[] data; - private final int size; + private Object[] data; private int currentSize = 0; public ObjectBuilder(int size) { - this.size = size; this.data = new Object[size]; } - public ObjectBuilder(Object[] data, int size) { + public ObjectBuilder(Object[] data) { this.data = data; - this.size = size; } @Override @@ -39,8 +37,16 @@ public int getType() { return Storage.Type.OBJECT; } + @Override + public void appendNoGrow(Object o) { + data[currentSize++] = o; + } + @Override public void append(Object o) { + if (currentSize + 1 > data.length) { + grow(); + } data[currentSize++] = o; } @@ -54,9 +60,14 @@ public int getCurrentSize() { return currentSize; } + @Override + public int getCurrentCapacity() { + return data.length; + } + @Override public Storage seal() { - return new ObjectStorage(data, size); + return new ObjectStorage(data, currentSize); } public Object[] getData() { @@ -64,6 +75,19 @@ public Object[] getData() { } public void setCurrentSize(int currentSize) { + if (currentSize > data.length) grow(currentSize); this.currentSize = currentSize; } + + private void grow() { + if (data.length > 1) { + grow(data.length * 3 / 2); + } else { + grow(3); + } + } + + private void grow(int desiredCapacity) { + this.data = Arrays.copyOf(data, desiredCapacity); + } } diff --git a/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java b/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java index 6e0c42987fc24..24f53ba2850ef 100644 --- a/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java +++ b/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java @@ -1,17 +1,16 @@ package org.enso.table.data.column.builder.object; +import java.util.Arrays; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.StringStorage; /** A builder for string columns. */ public class StringBuilder extends TypedBuilder { - private final Object[] data; - private final int size; + private Object[] data; private int currentSize = 0; public StringBuilder(int size) { this.data = new Object[size]; - this.size = size; } @Override @@ -29,7 +28,7 @@ public boolean canRetypeTo(long type) { @Override public TypedBuilder retypeTo(long type) { if (type == Storage.Type.OBJECT) { - ObjectBuilder res = new ObjectBuilder(data, size); + ObjectBuilder res = new ObjectBuilder(data); res.setCurrentSize(currentSize); return res; } else { @@ -42,8 +41,16 @@ public int getType() { return Storage.Type.STRING; } + @Override + public void appendNoGrow(Object o) { + data[currentSize++] = o; + } + @Override public void append(Object o) { + if (currentSize + 1 > data.length) { + grow(); + } data[currentSize++] = o; } @@ -57,8 +64,25 @@ public int getCurrentSize() { return currentSize; } + @Override + public int getCurrentCapacity() { + return 0; + } + @Override public Storage seal() { - return new StringStorage(data, size); + return new StringStorage(data, currentSize); + } + + private void grow() { + if (data.length > 1) { + grow(data.length * 3 / 2); + } else { + grow(3); + } + } + + private void grow(int desiredCapacity) { + this.data = Arrays.copyOf(data, desiredCapacity); } } diff --git a/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java b/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java index bca43b4e91796..eb0f69df92154 100644 --- a/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java +++ b/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java @@ -38,7 +38,7 @@ public FunctionAggregator( public void nextGroup(IntStream positions) { List items = getItems(positions); Object result = aggregateFunction.apply(items); - builder.append(result); + builder.appendNoGrow(result); } private List getItems(IntStream positions) { diff --git a/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java b/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java index 4cb290fcc0898..ccaf20546b952 100644 --- a/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java +++ b/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java @@ -92,9 +92,9 @@ private Storage fillMissingDouble(double arg) { long rawArg = Double.doubleToRawLongBits(arg); for (int i = 0; i < size(); i++) { if (isMissing.get(i)) { - builder.appendRaw(rawArg); + builder.appendRawNoGrow(rawArg); } else { - builder.appendRaw(data[i]); + builder.appendRawNoGrow(data[i]); } } return builder.seal(); diff --git a/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java b/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java index 792d47d4a9689..05cc967cdd2c4 100644 --- a/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java +++ b/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java @@ -142,10 +142,10 @@ private Storage fillMissingDouble(double arg) { long rawArg = Double.doubleToRawLongBits(arg); for (int i = 0; i < size(); i++) { if (isMissing.get(i)) { - builder.appendRaw(rawArg); + builder.appendRawNoGrow(rawArg); } else { double coerced = data[i]; - builder.appendRaw(Double.doubleToRawLongBits(coerced)); + builder.appendRawNoGrow(Double.doubleToRawLongBits(coerced)); } } return builder.seal(); @@ -155,9 +155,9 @@ private Storage fillMissingLong(long arg) { final var builder = NumericBuilder.createLongBuilder(size()); for (int i = 0; i < size(); i++) { if (isMissing.get(i)) { - builder.appendRaw(arg); + builder.appendRawNoGrow(arg); } else { - builder.appendRaw(data[i]); + builder.appendRawNoGrow(data[i]); } } return builder.seal(); diff --git a/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/table/src/main/java/org/enso/table/data/column/storage/Storage.java index 2f4d9406e3f78..e30967b9b8826 100644 --- a/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -108,9 +108,9 @@ public final Storage bimap( for (int i = 0; i < size(); i++) { Object it = getItemBoxed(i); if (it == null) { - builder.append(null); + builder.appendNoGrow(null); } else { - builder.append(function.apply(it, argument)); + builder.appendNoGrow(function.apply(it, argument)); } } return builder.seal(); @@ -162,9 +162,9 @@ public final Storage map(String name, Function function) { for (int i = 0; i < size(); i++) { Object it = getItemBoxed(i); if (it == null) { - builder.append(null); + builder.appendNoGrow(null); } else { - builder.append(function.apply(it)); + builder.appendNoGrow(function.apply(it)); } } return builder.seal(); @@ -187,9 +187,9 @@ public final Storage zip(String name, BiFunction functio Object it1 = getItemBoxed(i); Object it2 = i < arg.size() ? arg.getItemBoxed(i) : null; if (it1 == null || it2 == null) { - builder.append(null); + builder.appendNoGrow(null); } else { - builder.append(function.apply(it1, it2)); + builder.appendNoGrow(function.apply(it1, it2)); } } return builder.seal(); @@ -209,9 +209,9 @@ protected final Storage fillMissingHelper(Object arg, Builder builder) { for (int i = 0; i < size(); i++) { Object it = getItemBoxed(i); if (it == null) { - builder.append(arg); + builder.appendNoGrow(arg); } else { - builder.append(it); + builder.appendNoGrow(it); } } return builder.seal(); diff --git a/table/src/main/java/org/enso/table/data/table/Column.java b/table/src/main/java/org/enso/table/data/table/Column.java index bd787295f0b6f..453a42987dcb1 100644 --- a/table/src/main/java/org/enso/table/data/table/Column.java +++ b/table/src/main/java/org/enso/table/data/table/Column.java @@ -109,7 +109,7 @@ public Column rename(String name) { public static Column fromItems(String name, List items) { InferredBuilder builder = new InferredBuilder(items.size()); for (Object item : items) { - builder.append(item); + builder.appendNoGrow(item); } return new Column(name, new DefaultIndex(items.size()), builder.seal()); } diff --git a/table/src/main/java/org/enso/table/data/table/Table.java b/table/src/main/java/org/enso/table/data/table/Table.java index 8803efae16d47..41946a87041e0 100644 --- a/table/src/main/java/org/enso/table/data/table/Table.java +++ b/table/src/main/java/org/enso/table/data/table/Table.java @@ -4,7 +4,6 @@ import java.util.stream.Collectors; import org.enso.table.data.column.builder.object.InferredBuilder; -import org.enso.table.data.column.builder.string.StorageBuilder; import org.enso.table.data.column.storage.BoolStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.index.DefaultIndex; @@ -314,10 +313,10 @@ public Table concat(Table other) { private Storage concatStorages(Storage left, Storage right) { InferredBuilder builder = new InferredBuilder(left.size() + right.size()); for (int i = 0; i < left.size(); i++) { - builder.append(left.getItemBoxed(i)); + builder.appendNoGrow(left.getItemBoxed(i)); } for (int j = 0; j < right.size(); j++) { - builder.append(right.getItemBoxed(j)); + builder.appendNoGrow(right.getItemBoxed(j)); } return builder.seal(); } @@ -328,7 +327,7 @@ private Storage nullPad(int nullCount, Storage storage, boolean start) { builder.appendNulls(nullCount); } for (int i = 0; i < storage.size(); i++) { - builder.append(storage.getItemBoxed(i)); + builder.appendNoGrow(storage.getItemBoxed(i)); } if (!start) { builder.appendNulls(nullCount); @@ -342,10 +341,10 @@ private Index concatIndexes(Index left, Index right) { } else { InferredBuilder builder = new InferredBuilder(left.size() + right.size()); for (int i = 0; i < left.size(); i++) { - builder.append(left.iloc(i)); + builder.appendNoGrow(left.iloc(i)); } for (int j = 0; j < right.size(); j++) { - builder.append(right.iloc(j)); + builder.appendNoGrow(right.iloc(j)); } Storage storage = builder.seal(); return HashIndex.fromStorage(left.getName(), storage);