From b17909e932bdd6fe88efef1c1e3f6d5216c5400c Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Sat, 13 Jun 2015 15:33:41 -0700 Subject: [PATCH 01/11] [SPARK-8301] removed unnecessary copying of UTF8String. Added a private function startsWith(prefix, offset) to implement the check for startsWith, endsWith and contains. --- .../apache/spark/unsafe/types/UTF8String.java | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index a35168019549e..bc46ca52aa284 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -131,24 +131,31 @@ public boolean contains(final UTF8String substring) { } for (int i = 0; i <= bytes.length - b.length; i++) { - // TODO: Avoid copying. - if (bytes[i] == b[0] && Arrays.equals(Arrays.copyOfRange(bytes, i, i + b.length), b)) { + if (bytes[i] == b[0] && startsWith(substring, i)) { return true; } } return false; } + private boolean startsWith(final UTF8String prefix, int offset) { + byte[] b = prefix.getBytes(); + if (b.length + offset > bytes.length || offset < 0) { + return false; + } + int i = 0; + while (i < b.length && b[i] == bytes[i + offset]) { + i++; + } + return i == b.length; + } + public boolean startsWith(final UTF8String prefix) { - final byte[] b = prefix.getBytes(); - // TODO: Avoid copying. - return b.length <= bytes.length && Arrays.equals(Arrays.copyOfRange(bytes, 0, b.length), b); + return startsWith(prefix, 0); } public boolean endsWith(final UTF8String suffix) { - final byte[] b = suffix.getBytes(); - return b.length <= bytes.length && - Arrays.equals(Arrays.copyOfRange(bytes, bytes.length - b.length, bytes.length), b); + return startsWith(suffix, bytes.length - suffix.getBytes().length); } public UTF8String toUpperCase() { From 79cb55b496cd2f1cd71e61959fc548fc6a9dd766 Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Sun, 14 Jun 2015 12:16:42 -0700 Subject: [PATCH 02/11] [SPARK-8301] null check. Added test cases for null check. --- .../apache/spark/unsafe/types/UTF8String.java | 16 ++++++++-------- .../spark/unsafe/types/UTF8StringSuite.java | 3 +++ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index bc46ca52aa284..474ea58c8d465 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -125,37 +125,37 @@ public UTF8String substring(final int start, final int until) { } public boolean contains(final UTF8String substring) { + if (substring == null) return false; final byte[] b = substring.getBytes(); if (b.length == 0) { return true; } for (int i = 0; i <= bytes.length - b.length; i++) { - if (bytes[i] == b[0] && startsWith(substring, i)) { + if (bytes[i] == b[0] && startsWith(b, i)) { return true; } } return false; } - private boolean startsWith(final UTF8String prefix, int offset) { - byte[] b = prefix.getBytes(); - if (b.length + offset > bytes.length || offset < 0) { + private boolean startsWith(final byte[] prefix, int offset) { + if (prefix.length + offset > bytes.length || offset < 0) { return false; } int i = 0; - while (i < b.length && b[i] == bytes[i + offset]) { + while (i < prefix.length && prefix[i] == bytes[i + offset]) { i++; } - return i == b.length; + return i == prefix.length; } public boolean startsWith(final UTF8String prefix) { - return startsWith(prefix, 0); + return prefix != null && startsWith(prefix.getBytes(), 0); } public boolean endsWith(final UTF8String suffix) { - return startsWith(suffix, bytes.length - suffix.getBytes().length); + return suffix != null && startsWith(suffix.getBytes(), bytes.length - suffix.getBytes().length); } public UTF8String toUpperCase() { diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java index 80c179a1b5e75..f0f530418b08f 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java @@ -46,6 +46,7 @@ public void basicTest() throws UnsupportedEncodingException { @Test public void contains() { + Assert.assertFalse(UTF8String.fromString("hello").contains(null)); Assert.assertTrue(UTF8String.fromString("hello").contains(UTF8String.fromString("ello"))); Assert.assertFalse(UTF8String.fromString("hello").contains(UTF8String.fromString("vello"))); Assert.assertFalse(UTF8String.fromString("hello").contains(UTF8String.fromString("hellooo"))); @@ -57,6 +58,7 @@ public void contains() { @Test public void startsWith() { + Assert.assertFalse(UTF8String.fromString("hello").startsWith(null)); Assert.assertTrue(UTF8String.fromString("hello").startsWith(UTF8String.fromString("hell"))); Assert.assertFalse(UTF8String.fromString("hello").startsWith(UTF8String.fromString("ell"))); Assert.assertFalse(UTF8String.fromString("hello").startsWith(UTF8String.fromString("hellooo"))); @@ -68,6 +70,7 @@ public void startsWith() { @Test public void endsWith() { + Assert.assertFalse(UTF8String.fromString("hello").endsWith(null)); Assert.assertTrue(UTF8String.fromString("hello").endsWith(UTF8String.fromString("ello"))); Assert.assertFalse(UTF8String.fromString("hello").endsWith(UTF8String.fromString("ellov"))); Assert.assertFalse(UTF8String.fromString("hello").endsWith(UTF8String.fromString("hhhello"))); From d2fb05f87c687c6f6da09cd923ac7500a36522ff Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Sun, 14 Jun 2015 18:31:49 -0700 Subject: [PATCH 03/11] [SPARK-8301] added additional null checks --- .../apache/spark/unsafe/types/UTF8String.java | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 474ea58c8d465..ee8f7a394b9cf 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -56,12 +56,16 @@ public static UTF8String fromString(String str) { * Updates the UTF8String with String. */ public UTF8String set(final String str) { - try { - bytes = str.getBytes("utf-8"); - } catch (UnsupportedEncodingException e) { - // Turn the exception into unchecked so we can find out about it at runtime, but - // don't need to add lots of boilerplate code everywhere. - PlatformDependent.throwException(e); + if (str == null) { + bytes = new byte[0]; + } else { + try { + bytes = str.getBytes("utf-8"); + } catch (UnsupportedEncodingException e) { + // Turn the exception into unchecked so we can find out about it at runtime, but + // don't need to add lots of boilerplate code everywhere. + PlatformDependent.throwException(e); + } } return this; } @@ -70,7 +74,7 @@ public UTF8String set(final String str) { * Updates the UTF8String with byte[], which should be encoded in UTF-8. */ public UTF8String set(final byte[] bytes) { - this.bytes = bytes; + this.bytes = (bytes != null) ? bytes : new byte[0]; return this; } @@ -185,6 +189,7 @@ public UTF8String clone() { @Override public int compareTo(final UTF8String other) { + if (other == null) return 1; final byte[] b = other.getBytes(); for (int i = 0; i < bytes.length && i < b.length; i++) { int res = bytes[i] - b[i]; From a5f853aea8c467af5e780ebd3e9704eaf852e455 Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Sun, 14 Jun 2015 20:43:46 -0700 Subject: [PATCH 04/11] [SPARK-8301] changed visibility of set to protected. Changed annotation of bytes from Nullable to Nonnull --- .../java/org/apache/spark/unsafe/types/UTF8String.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index ee8f7a394b9cf..d27505c9f482a 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -20,7 +20,7 @@ import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.util.Arrays; -import javax.annotation.Nullable; +import javax.annotation.Nonnull; import org.apache.spark.unsafe.PlatformDependent; @@ -34,7 +34,7 @@ */ public final class UTF8String implements Comparable, Serializable { - @Nullable + @Nonnull private byte[] bytes; private static int[] bytesOfCodePointInUTF8 = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -55,7 +55,7 @@ public static UTF8String fromString(String str) { /** * Updates the UTF8String with String. */ - public UTF8String set(final String str) { + protected UTF8String set(final String str) { if (str == null) { bytes = new byte[0]; } else { @@ -73,7 +73,7 @@ public UTF8String set(final String str) { /** * Updates the UTF8String with byte[], which should be encoded in UTF-8. */ - public UTF8String set(final byte[] bytes) { + protected UTF8String set(final byte[] bytes) { this.bytes = (bytes != null) ? bytes : new byte[0]; return this; } From e4530d2ec089766d8b03a97403b5fc767e0cb567 Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Sun, 14 Jun 2015 21:12:10 -0700 Subject: [PATCH 05/11] [SPARK-8301] changed call of UTF8String.set to UTF8String.from --- .../org/apache/spark/sql/catalyst/expressions/UnsafeRow.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java index aec88c9241d92..d7b08a495eb8f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java @@ -310,7 +310,6 @@ public double getDouble(int i) { public UTF8String getUTF8String(int i) { assertIndexIsValid(i); - final UTF8String str = new UTF8String(); final long offsetToStringSize = getLong(i); final int stringSizeInBytes = (int) PlatformDependent.UNSAFE.getLong(baseObject, baseOffset + offsetToStringSize); @@ -322,8 +321,7 @@ public UTF8String getUTF8String(int i) { PlatformDependent.BYTE_ARRAY_OFFSET, stringSizeInBytes ); - str.set(strBytes); - return str; + return UTF8String.fromBytes(strBytes); } @Override From 3a0040f58596b10b34428136621bd13524dcf2c2 Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Sun, 14 Jun 2015 22:15:56 -0700 Subject: [PATCH 06/11] [SPARK-8301] changed call of UTF8String.set to UTF8String.from --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index afbf30af332d8..1c653c95b6894 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -437,17 +437,17 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w case (BinaryType, StringType) => defineCodeGen (ctx, ev, c => - s"new ${ctx.stringType}().set($c)") + s"new ${ctx.stringType}().fromString($c)") case (DateType, StringType) => defineCodeGen(ctx, ev, c => - s"""new ${ctx.stringType}().set( + s"""new ${ctx.stringType}().fromString( org.apache.spark.sql.catalyst.util.DateUtils.toString($c))""") // Special handling required for timestamps in hive test cases since the toString function // does not match the expected output. case (TimestampType, StringType) => super.genCode(ctx, ev) case (_, StringType) => - defineCodeGen(ctx, ev, c => s"new ${ctx.stringType}().set(String.valueOf($c))") + defineCodeGen(ctx, ev, c => s"new ${ctx.stringType}().fromString(String.valueOf($c))") // fallback for DecimalType, this must be before other numeric types case (_, dt: DecimalType) => From 9f17cc80370c612b1cc40801a0d15c1b8f7e283b Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Mon, 15 Jun 2015 15:25:18 -0700 Subject: [PATCH 07/11] [SPARK-8301] fixed conversion byte to string in codegen --- .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 1c653c95b6894..3cad57dcb8fc1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -437,7 +437,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w case (BinaryType, StringType) => defineCodeGen (ctx, ev, c => - s"new ${ctx.stringType}().fromString($c)") + s"new ${ctx.stringType}().fromBytes($c)") case (DateType, StringType) => defineCodeGen(ctx, ev, c => s"""new ${ctx.stringType}().fromString( From 1c327ebdbd33839decdcc5b7ab77ab85eb8d8c0d Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Mon, 15 Jun 2015 21:19:53 -0700 Subject: [PATCH 08/11] [SPARK-8301] removed new --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 3cad57dcb8fc1..978c94132ac17 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -437,17 +437,17 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w case (BinaryType, StringType) => defineCodeGen (ctx, ev, c => - s"new ${ctx.stringType}().fromBytes($c)") + s"${ctx.stringType}().fromBytes($c)") case (DateType, StringType) => defineCodeGen(ctx, ev, c => - s"""new ${ctx.stringType}().fromString( + s"""${ctx.stringType}().fromString( org.apache.spark.sql.catalyst.util.DateUtils.toString($c))""") // Special handling required for timestamps in hive test cases since the toString function // does not match the expected output. case (TimestampType, StringType) => super.genCode(ctx, ev) case (_, StringType) => - defineCodeGen(ctx, ev, c => s"new ${ctx.stringType}().fromString(String.valueOf($c))") + defineCodeGen(ctx, ev, c => s"${ctx.stringType}().fromString(String.valueOf($c))") // fallback for DecimalType, this must be before other numeric types case (_, dt: DecimalType) => From 9ca0473ce350f3cd8f57a37c8cfb24a012e34627 Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Wed, 17 Jun 2015 21:26:38 -0700 Subject: [PATCH 09/11] [SPARK-8301] removed null checks --- .../apache/spark/unsafe/types/UTF8String.java | 33 ++++++++----------- .../spark/unsafe/types/UTF8StringSuite.java | 3 -- 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index d27505c9f482a..294504bd27bee 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -20,7 +20,7 @@ import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.util.Arrays; -import javax.annotation.Nonnull; +import javax.annotation.Nullable; import org.apache.spark.unsafe.PlatformDependent; @@ -34,7 +34,7 @@ */ public final class UTF8String implements Comparable, Serializable { - @Nonnull + @Nullable private byte[] bytes; private static int[] bytesOfCodePointInUTF8 = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -56,16 +56,12 @@ public static UTF8String fromString(String str) { * Updates the UTF8String with String. */ protected UTF8String set(final String str) { - if (str == null) { - bytes = new byte[0]; - } else { - try { - bytes = str.getBytes("utf-8"); - } catch (UnsupportedEncodingException e) { - // Turn the exception into unchecked so we can find out about it at runtime, but - // don't need to add lots of boilerplate code everywhere. - PlatformDependent.throwException(e); - } + try { + bytes = str.getBytes("utf-8"); + } catch (UnsupportedEncodingException e) { + // Turn the exception into unchecked so we can find out about it at runtime, but + // don't need to add lots of boilerplate code everywhere. + PlatformDependent.throwException(e); } return this; } @@ -74,7 +70,7 @@ protected UTF8String set(final String str) { * Updates the UTF8String with byte[], which should be encoded in UTF-8. */ protected UTF8String set(final byte[] bytes) { - this.bytes = (bytes != null) ? bytes : new byte[0]; + this.bytes = bytes; return this; } @@ -129,7 +125,6 @@ public UTF8String substring(final int start, final int until) { } public boolean contains(final UTF8String substring) { - if (substring == null) return false; final byte[] b = substring.getBytes(); if (b.length == 0) { return true; @@ -143,23 +138,23 @@ public boolean contains(final UTF8String substring) { return false; } - private boolean startsWith(final byte[] prefix, int offset) { - if (prefix.length + offset > bytes.length || offset < 0) { + private boolean startsWith(final byte[] prefix, int offsetInBytes) { + if (prefix.length + offsetInBytes > bytes.length || offsetInBytes < 0) { return false; } int i = 0; - while (i < prefix.length && prefix[i] == bytes[i + offset]) { + while (i < prefix.length && prefix[i] == bytes[i + offsetInBytes]) { i++; } return i == prefix.length; } public boolean startsWith(final UTF8String prefix) { - return prefix != null && startsWith(prefix.getBytes(), 0); + return startsWith(prefix.getBytes(), 0); } public boolean endsWith(final UTF8String suffix) { - return suffix != null && startsWith(suffix.getBytes(), bytes.length - suffix.getBytes().length); + return startsWith(suffix.getBytes(), bytes.length - suffix.getBytes().length); } public UTF8String toUpperCase() { diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java index f0f530418b08f..80c179a1b5e75 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java @@ -46,7 +46,6 @@ public void basicTest() throws UnsupportedEncodingException { @Test public void contains() { - Assert.assertFalse(UTF8String.fromString("hello").contains(null)); Assert.assertTrue(UTF8String.fromString("hello").contains(UTF8String.fromString("ello"))); Assert.assertFalse(UTF8String.fromString("hello").contains(UTF8String.fromString("vello"))); Assert.assertFalse(UTF8String.fromString("hello").contains(UTF8String.fromString("hellooo"))); @@ -58,7 +57,6 @@ public void contains() { @Test public void startsWith() { - Assert.assertFalse(UTF8String.fromString("hello").startsWith(null)); Assert.assertTrue(UTF8String.fromString("hello").startsWith(UTF8String.fromString("hell"))); Assert.assertFalse(UTF8String.fromString("hello").startsWith(UTF8String.fromString("ell"))); Assert.assertFalse(UTF8String.fromString("hello").startsWith(UTF8String.fromString("hellooo"))); @@ -70,7 +68,6 @@ public void startsWith() { @Test public void endsWith() { - Assert.assertFalse(UTF8String.fromString("hello").endsWith(null)); Assert.assertTrue(UTF8String.fromString("hello").endsWith(UTF8String.fromString("ello"))); Assert.assertFalse(UTF8String.fromString("hello").endsWith(UTF8String.fromString("ellov"))); Assert.assertFalse(UTF8String.fromString("hello").endsWith(UTF8String.fromString("hhhello"))); From 6d7b068a183a9cc1bbaf86fab4a5a4c518a95c89 Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Wed, 17 Jun 2015 21:27:32 -0700 Subject: [PATCH 10/11] [SPARK-8301] removed null checks --- .../src/main/java/org/apache/spark/unsafe/types/UTF8String.java | 1 - 1 file changed, 1 deletion(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 294504bd27bee..f9a378c50130b 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -184,7 +184,6 @@ public UTF8String clone() { @Override public int compareTo(final UTF8String other) { - if (other == null) return 1; final byte[] b = other.getBytes(); for (int i = 0; i < bytes.length && i < b.length; i++) { int res = bytes[i] - b[i]; From f5d6b9af8c0578cdaef8bc2f4d0b37cae29383b5 Mon Sep 17 00:00:00 2001 From: Tarek Auel Date: Thu, 18 Jun 2015 11:40:39 -0700 Subject: [PATCH 11/11] fixed parentheses and annotation --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 6 +++--- .../main/java/org/apache/spark/unsafe/types/UTF8String.java | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 978c94132ac17..a8681d57f208e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -437,17 +437,17 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w case (BinaryType, StringType) => defineCodeGen (ctx, ev, c => - s"${ctx.stringType}().fromBytes($c)") + s"${ctx.stringType}.fromBytes($c)") case (DateType, StringType) => defineCodeGen(ctx, ev, c => - s"""${ctx.stringType}().fromString( + s"""${ctx.stringType}.fromString( org.apache.spark.sql.catalyst.util.DateUtils.toString($c))""") // Special handling required for timestamps in hive test cases since the toString function // does not match the expected output. case (TimestampType, StringType) => super.genCode(ctx, ev) case (_, StringType) => - defineCodeGen(ctx, ev, c => s"${ctx.stringType}().fromString(String.valueOf($c))") + defineCodeGen(ctx, ev, c => s"${ctx.stringType}.fromString(String.valueOf($c))") // fallback for DecimalType, this must be before other numeric types case (_, dt: DecimalType) => diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index f9a378c50130b..9871a70a40e69 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -20,7 +20,7 @@ import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.util.Arrays; -import javax.annotation.Nullable; +import javax.annotation.Nonnull; import org.apache.spark.unsafe.PlatformDependent; @@ -34,7 +34,7 @@ */ public final class UTF8String implements Comparable, Serializable { - @Nullable + @Nonnull private byte[] bytes; private static int[] bytesOfCodePointInUTF8 = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,