Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-20549] 'java.io.CharConversionException: Invalid UTF-32' in JsonToStructs #17826

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,7 @@ case class GetJsonObject(json: Expression, path: Expression)
try {
/* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
detect character encoding which could fail for some malformed strings */
Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
new ByteArrayInputStream(jsonStr.getBytes), "UTF-8"))) { parser =>
Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, jsonStr)) { parser =>
val output = new ByteArrayOutputStream()
val matched = Utils.tryWithResource(
jsonFactory.createGenerator(output, JsonEncoding.UTF8)) { generator =>
Expand Down Expand Up @@ -398,9 +397,8 @@ case class JsonTuple(children: Seq[Expression])
try {
/* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
detect character encoding which could fail for some malformed strings */
Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
new ByteArrayInputStream(json.getBytes), "UTF-8"))) {
parser => parseRow(parser, input)
Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
parseRow(parser, input)
}
} catch {
case _: JsonProcessingException =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.json

import java.io.InputStream
import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}

import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text
Expand All @@ -33,7 +33,10 @@ private[sql] object CreateJacksonParser extends Serializable {
val bb = record.getByteBuffer
assert(bb.hasArray)

jsonFactory.createParser(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
val bain = new ByteArrayInputStream(
bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())

jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
}

def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
)
}

test("SPARK-20549: from_json bad UTF-8") {
val schema = StructType(StructField("a", IntegerType) :: Nil)
checkEvaluation(
JsonToStructs(schema, Map.empty, Literal(badJson), gmtId),
null)
}

test("from_json with timestamp") {
val schema = StructType(StructField("t", TimestampType) :: Nil)

Expand Down