Skip to content

Commit

Permalink
Java: Support creating a scalar from utf8 string (#8294)
Browse files Browse the repository at this point in the history
This is a small PR to support creating a scalar from an array of utf8 bytes.

Since the PR #8257 added the support for ColumnVector creation, so I think we'd better add it for scalar creation to avoid conversions between utf8 strings and Java strings when used in Spark.

Signed-off-by: Firestarman <[email protected]>

Authors:
  - Liangcai Li (https://github.com/firestarman)

Approvers:
  - Bobby Wang (https://github.com/wbo4958)

URL: #8294
  • Loading branch information
firestarman authored May 20, 2021
1 parent 2da8473 commit 48647aa
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
11 changes: 10 additions & 1 deletion java/src/main/java/ai/rapids/cudf/Scalar.java
Original file line number Diff line number Diff line change
Expand Up @@ -329,10 +329,19 @@ public static Scalar timestampFromLong(DType type, Long value) {
}

public static Scalar fromString(String value) {
return fromUTF8String(value == null ? null : value.getBytes(StandardCharsets.UTF_8));
}

/**
* Creates a String scalar from an array of UTF8 bytes.
* @param value the array of UTF8 bytes
* @return a String scalar
*/
public static Scalar fromUTF8String(byte[] value) {
if (value == null) {
return fromNull(DType.STRING);
}
return new Scalar(DType.STRING, makeStringScalar(value.getBytes(StandardCharsets.UTF_8), true));
return new Scalar(DType.STRING, makeStringScalar(value, true));
}

/**
Expand Down
17 changes: 17 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ScalarTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.junit.jupiter.api.Test;

import java.math.BigDecimal;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;

import static ai.rapids.cudf.TableTest.assertColumnsAreEqual;
Expand Down Expand Up @@ -244,6 +245,22 @@ public void testString() {
}
}

@Test
public void testUTF8String() {
try (Scalar s = Scalar.fromUTF8String("TEST".getBytes(StandardCharsets.UTF_8))) {
assertEquals(DType.STRING, s.getType());
assertTrue(s.isValid());
assertEquals("TEST", s.getJavaString());
assertArrayEquals(new byte[]{'T', 'E', 'S', 'T'}, s.getUTF8());
}
try (Scalar s = Scalar.fromUTF8String("".getBytes(StandardCharsets.UTF_8))) {
assertEquals(DType.STRING, s.getType());
assertTrue(s.isValid());
assertEquals("", s.getJavaString());
assertArrayEquals(new byte[]{}, s.getUTF8());
}
}

@Test
public void testList() {
// list of int
Expand Down

0 comments on commit 48647aa

Please sign in to comment.