Skip to content

Commit

Permalink
[VL] Skip UTF-8 validation in JSON parsing (#6661)
Browse files Browse the repository at this point in the history
  • Loading branch information
PHILO-HE authored Aug 7, 2024
1 parent b8929a8 commit fa12819
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -263,20 +263,28 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateTest {
}
}

test("Test get_json_object datatab function") {
test("get_json_object") {
runQueryAndCompare(
"SELECT get_json_object(string_field1, '$.a') " +
"from datatab limit 1;") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
}

test("Test get_json_object lineitem function") {
runQueryAndCompare(
"SELECT l_orderkey, get_json_object('{\"a\":\"b\"}', '$.a') " +
"from lineitem limit 1;") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}

// Invalid UTF-8 encoding.
spark.sql(
"CREATE TABLE t USING parquet SELECT concat('{\"a\": 2, \"'," +
" string(X'80'), '\": 3, \"c\": 100}') AS c1")
withTable("t") {
runQueryAndCompare("SELECT get_json_object(c1, '$.c') FROM t;") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
}
}

ignore("json_array_length") {
Expand Down
11 changes: 11 additions & 0 deletions ep/build-velox/src/modify_velox.patch
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,14 @@ index 97266c253..11d88dcc4 100644

add_library(
velox_dwio_arrow_parquet_writer_test_lib
diff --git a/CMake/resolve_dependency_modules/simdjson.cmake b/CMake/resolve_dependency_modules/simdjson.cmake
index 69e7f2044..777eb5ec1 100644
--- a/CMake/resolve_dependency_modules/simdjson.cmake
+++ b/CMake/resolve_dependency_modules/simdjson.cmake
@@ -29,4 +29,6 @@ FetchContent_Declare(
URL ${VELOX_SIMDJSON_SOURCE_URL}
URL_HASH ${VELOX_SIMDJSON_BUILD_SHA256_CHECKSUM})

+set(SIMDJSON_SKIPUTF8VALIDATION ON)
+
FetchContent_MakeAvailable(simdjson)

0 comments on commit fa12819

Please sign in to comment.