Skip to content

Commit

Permalink
[SPARK-23284][SQL] Document the behavior of several ColumnVector's ge…
Browse files Browse the repository at this point in the history
…t APIs when accessing null slot

## What changes were proposed in this pull request?

For some ColumnVector get APIs such as getDecimal, getBinary, getStruct, getArray, getInterval, getUTF8String, we should clearly document their behaviors when accessing null slot. They should return null in this case. Then we can remove null checks from the places using above APIs.

For the APIs of primitive values like getInt, getInts, etc., this also documents their behaviors when accessing null slots. Their returning values are undefined and can be anything.

## How was this patch tested?

Added tests into `ColumnarBatchSuite`.

Author: Liang-Chi Hsieh <[email protected]>

Closes #20455 from viirya/SPARK-23272-followup.
  • Loading branch information
viirya authored and cloud-fan committed Feb 2, 2018
1 parent 032c11b commit 90848d5
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,22 @@ public double getDouble(int rowId) {

@Override
public Decimal getDecimal(int rowId, int precision, int scale) {
if (isNullAt(rowId)) return null;
BigDecimal data = decimalData.vector[getRowIndex(rowId)].getHiveDecimal().bigDecimalValue();
return Decimal.apply(data, precision, scale);
}

@Override
public UTF8String getUTF8String(int rowId) {
if (isNullAt(rowId)) return null;
int index = getRowIndex(rowId);
BytesColumnVector col = bytesData;
return UTF8String.fromBytes(col.vector[index], col.start[index], col.length[index]);
}

@Override
public byte[] getBinary(int rowId) {
if (isNullAt(rowId)) return null;
int index = getRowIndex(rowId);
byte[] binary = new byte[bytesData.length[index]];
System.arraycopy(bytesData.vector[index], bytesData.start[index], binary, 0, binary.length);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,43 +127,36 @@ public boolean anyNull() {

@Override
public Decimal getDecimal(int ordinal, int precision, int scale) {
if (columns[ordinal].isNullAt(rowId)) return null;
return columns[ordinal].getDecimal(rowId, precision, scale);
}

@Override
public UTF8String getUTF8String(int ordinal) {
if (columns[ordinal].isNullAt(rowId)) return null;
return columns[ordinal].getUTF8String(rowId);
}

@Override
public byte[] getBinary(int ordinal) {
if (columns[ordinal].isNullAt(rowId)) return null;
return columns[ordinal].getBinary(rowId);
}

@Override
public CalendarInterval getInterval(int ordinal) {
if (columns[ordinal].isNullAt(rowId)) return null;
return columns[ordinal].getInterval(rowId);
}

@Override
public ColumnarRow getStruct(int ordinal, int numFields) {
if (columns[ordinal].isNullAt(rowId)) return null;
return columns[ordinal].getStruct(rowId);
}

@Override
public ColumnarArray getArray(int ordinal) {
if (columns[ordinal].isNullAt(rowId)) return null;
return columns[ordinal].getArray(rowId);
}

@Override
public ColumnarMap getMap(int ordinal) {
if (columns[ordinal].isNullAt(rowId)) return null;
return columns[ordinal].getMap(rowId);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ public final int putByteArray(int rowId, byte[] value) {

@Override
public Decimal getDecimal(int rowId, int precision, int scale) {
if (isNullAt(rowId)) return null;
if (precision <= Decimal.MAX_INT_DIGITS()) {
return Decimal.createUnsafe(getInt(rowId), precision, scale);
} else if (precision <= Decimal.MAX_LONG_DIGITS()) {
Expand All @@ -367,6 +368,7 @@ public void putDecimal(int rowId, Decimal value, int precision) {

@Override
public UTF8String getUTF8String(int rowId) {
if (isNullAt(rowId)) return null;
if (dictionary == null) {
return arrayData().getBytesAsUTF8String(getArrayOffset(rowId), getArrayLength(rowId));
} else {
Expand All @@ -384,6 +386,7 @@ public UTF8String getUTF8String(int rowId) {

@Override
public byte[] getBinary(int rowId) {
if (isNullAt(rowId)) return null;
if (dictionary == null) {
return arrayData().getBytes(getArrayOffset(rowId), getArrayLength(rowId));
} else {
Expand Down Expand Up @@ -613,13 +616,15 @@ public final int appendStruct(boolean isNull) {
// array offsets and lengths in the current column vector.
@Override
public final ColumnarArray getArray(int rowId) {
if (isNullAt(rowId)) return null;
return new ColumnarArray(arrayData(), getArrayOffset(rowId), getArrayLength(rowId));
}

// `WritableColumnVector` puts the key array in the first child column vector, value array in the
// second child column vector, and puts the offsets and lengths in the current column vector.
@Override
public final ColumnarMap getMap(int rowId) {
if (isNullAt(rowId)) return null;
return new ColumnarMap(getChild(0), getChild(1), getArrayOffset(rowId), getArrayLength(rowId));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,21 +101,25 @@ public double getDouble(int rowId) {

@Override
public Decimal getDecimal(int rowId, int precision, int scale) {
if (isNullAt(rowId)) return null;
return accessor.getDecimal(rowId, precision, scale);
}

@Override
public UTF8String getUTF8String(int rowId) {
if (isNullAt(rowId)) return null;
return accessor.getUTF8String(rowId);
}

@Override
public byte[] getBinary(int rowId) {
if (isNullAt(rowId)) return null;
return accessor.getBinary(rowId);
}

@Override
public ColumnarArray getArray(int rowId) {
if (isNullAt(rowId)) return null;
return accessor.getArray(rowId);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,14 @@ public abstract class ColumnVector implements AutoCloseable {
public abstract boolean isNullAt(int rowId);

/**
* Returns the boolean type value for rowId.
* Returns the boolean type value for rowId. The return value is undefined and can be anything,
* if the slot for rowId is null.
*/
public abstract boolean getBoolean(int rowId);

/**
* Gets boolean type values from [rowId, rowId + count)
* Gets boolean type values from [rowId, rowId + count). The return values for the null slots
* are undefined and can be anything.
*/
public boolean[] getBooleans(int rowId, int count) {
boolean[] res = new boolean[count];
Expand All @@ -96,12 +98,14 @@ public boolean[] getBooleans(int rowId, int count) {
}

/**
* Returns the byte type value for rowId.
* Returns the byte type value for rowId. The return value is undefined and can be anything,
* if the slot for rowId is null.
*/
public abstract byte getByte(int rowId);

/**
* Gets byte type values from [rowId, rowId + count)
* Gets byte type values from [rowId, rowId + count). The return values for the null slots
* are undefined and can be anything.
*/
public byte[] getBytes(int rowId, int count) {
byte[] res = new byte[count];
Expand All @@ -112,12 +116,14 @@ public byte[] getBytes(int rowId, int count) {
}

/**
* Returns the short type value for rowId.
* Returns the short type value for rowId. The return value is undefined and can be anything,
* if the slot for rowId is null.
*/
public abstract short getShort(int rowId);

/**
* Gets short type values from [rowId, rowId + count)
* Gets short type values from [rowId, rowId + count). The return values for the null slots
* are undefined and can be anything.
*/
public short[] getShorts(int rowId, int count) {
short[] res = new short[count];
Expand All @@ -128,12 +134,14 @@ public short[] getShorts(int rowId, int count) {
}

/**
* Returns the int type value for rowId.
* Returns the int type value for rowId. The return value is undefined and can be anything,
* if the slot for rowId is null.
*/
public abstract int getInt(int rowId);

/**
* Gets int type values from [rowId, rowId + count)
* Gets int type values from [rowId, rowId + count). The return values for the null slots
* are undefined and can be anything.
*/
public int[] getInts(int rowId, int count) {
int[] res = new int[count];
Expand All @@ -144,12 +152,14 @@ public int[] getInts(int rowId, int count) {
}

/**
* Returns the long type value for rowId.
* Returns the long type value for rowId. The return value is undefined and can be anything,
* if the slot for rowId is null.
*/
public abstract long getLong(int rowId);

/**
* Gets long type values from [rowId, rowId + count)
* Gets long type values from [rowId, rowId + count). The return values for the null slots
* are undefined and can be anything.
*/
public long[] getLongs(int rowId, int count) {
long[] res = new long[count];
Expand All @@ -160,12 +170,14 @@ public long[] getLongs(int rowId, int count) {
}

/**
* Returns the float type value for rowId.
* Returns the float type value for rowId. The return value is undefined and can be anything,
* if the slot for rowId is null.
*/
public abstract float getFloat(int rowId);

/**
* Gets float type values from [rowId, rowId + count)
* Gets float type values from [rowId, rowId + count). The return values for the null slots
* are undefined and can be anything.
*/
public float[] getFloats(int rowId, int count) {
float[] res = new float[count];
Expand All @@ -176,12 +188,14 @@ public float[] getFloats(int rowId, int count) {
}

/**
* Returns the double type value for rowId.
* Returns the double type value for rowId. The return value is undefined and can be anything,
* if the slot for rowId is null.
*/
public abstract double getDouble(int rowId);

/**
* Gets double type values from [rowId, rowId + count)
* Gets double type values from [rowId, rowId + count). The return values for the null slots
* are undefined and can be anything.
*/
public double[] getDoubles(int rowId, int count) {
double[] res = new double[count];
Expand All @@ -192,7 +206,7 @@ public double[] getDoubles(int rowId, int count) {
}

/**
* Returns the struct type value for rowId.
* Returns the struct type value for rowId. If the slot for rowId is null, it should return null.
*
* To support struct type, implementations must implement {@link #getChild(int)} and make this
* vector a tree structure. The number of child vectors must be same as the number of fields of
Expand All @@ -205,7 +219,7 @@ public final ColumnarRow getStruct(int rowId) {
}

/**
* Returns the array type value for rowId.
* Returns the array type value for rowId. If the slot for rowId is null, it should return null.
*
* To support array type, implementations must construct an {@link ColumnarArray} and return it in
* this method. {@link ColumnarArray} requires a {@link ColumnVector} that stores the data of all
Expand All @@ -218,13 +232,13 @@ public final ColumnarRow getStruct(int rowId) {
public abstract ColumnarArray getArray(int rowId);

/**
* Returns the map type value for rowId.
* Returns the map type value for rowId. If the slot for rowId is null, it should return null.
*
* In Spark, map type value is basically a key data array and a value data array. A key from the
* key array with a index and a value from the value array with the same index contribute to
* an entry of this map type value.
*
* To support map type, implementations must construct an {@link ColumnarMap} and return it in
* To support map type, implementations must construct a {@link ColumnarMap} and return it in
* this method. {@link ColumnarMap} requires a {@link ColumnVector} that stores the data of all
* the keys of all the maps in this vector, and another {@link ColumnVector} that stores the data
* of all the values of all the maps in this vector, and a pair of offset and length which
Expand All @@ -233,24 +247,25 @@ public final ColumnarRow getStruct(int rowId) {
public abstract ColumnarMap getMap(int ordinal);

/**
* Returns the decimal type value for rowId.
* Returns the decimal type value for rowId. If the slot for rowId is null, it should return null.
*/
public abstract Decimal getDecimal(int rowId, int precision, int scale);

/**
* Returns the string type value for rowId. Note that the returned UTF8String may point to the
* data of this column vector, please copy it if you want to keep it after this column vector is
* freed.
* Returns the string type value for rowId. If the slot for rowId is null, it should return null.
* Note that the returned UTF8String may point to the data of this column vector, please copy it
* if you want to keep it after this column vector is freed.
*/
public abstract UTF8String getUTF8String(int rowId);

/**
* Returns the binary type value for rowId.
* Returns the binary type value for rowId. If the slot for rowId is null, it should return null.
*/
public abstract byte[] getBinary(int rowId);

/**
* Returns the calendar interval type value for rowId.
* Returns the calendar interval type value for rowId. If the slot for rowId is null, it should
* return null.
*
* In Spark, calendar interval type value is basically an integer value representing the number of
* months in this interval, and a long value representing the number of microseconds in this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,43 +119,36 @@ public boolean anyNull() {

@Override
public Decimal getDecimal(int ordinal, int precision, int scale) {
if (data.getChild(ordinal).isNullAt(rowId)) return null;
return data.getChild(ordinal).getDecimal(rowId, precision, scale);
}

@Override
public UTF8String getUTF8String(int ordinal) {
if (data.getChild(ordinal).isNullAt(rowId)) return null;
return data.getChild(ordinal).getUTF8String(rowId);
}

@Override
public byte[] getBinary(int ordinal) {
if (data.getChild(ordinal).isNullAt(rowId)) return null;
return data.getChild(ordinal).getBinary(rowId);
}

@Override
public CalendarInterval getInterval(int ordinal) {
if (data.getChild(ordinal).isNullAt(rowId)) return null;
return data.getChild(ordinal).getInterval(rowId);
}

@Override
public ColumnarRow getStruct(int ordinal, int numFields) {
if (data.getChild(ordinal).isNullAt(rowId)) return null;
return data.getChild(ordinal).getStruct(rowId);
}

@Override
public ColumnarArray getArray(int ordinal) {
if (data.getChild(ordinal).isNullAt(rowId)) return null;
return data.getChild(ordinal).getArray(rowId);
}

@Override
public ColumnarMap getMap(int ordinal) {
if (data.getChild(ordinal).isNullAt(rowId)) return null;
return data.getChild(ordinal).getMap(rowId);
}

Expand Down
Loading

0 comments on commit 90848d5

Please sign in to comment.