Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-583] impl get_json_object in wscg (#619)
Browse files Browse the repository at this point in the history
* impl get_json_object in wscg

Signed-off-by: Yuan Zhou <[email protected]>

* fix header

Signed-off-by: Yuan Zhou <[email protected]>

* fix

Signed-off-by: Yuan Zhou <[email protected]>

* fix

Signed-off-by: Yuan Zhou <[email protected]>
  • Loading branch information
zhouyuan authored Dec 13, 2021
1 parent 604d17b commit 7ad8194
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,21 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node)
<< ".rfind(" << child_visitor_list[1]->GetResult()
<< ") != std::string::npos;";
prepare_str_ += prepare_ss.str();
} else if (func_name.compare("get_json_object") == 0) {
for (int i = 0; i < 2; i++) {
prepare_str_ += child_visitor_list[i]->GetPrepare();
}
codes_str_ = "get_json_object_" + std::to_string(cur_func_id);
check_str_ = GetValidityName(codes_str_);
real_codes_str_ = codes_str_;
real_validity_str_ = check_str_;
std::stringstream prepare_ss;
prepare_ss << "bool " << check_str_ << " = true;" << std::endl;
prepare_ss << "std::string " << codes_str_ << " = get_json_object("
<< child_visitor_list[0]->GetResult() << ", "
<< child_visitor_list[1]->GetResult() << ");\n";
prepare_str_ += prepare_ss.str();
header_list_.push_back(R"(#include "precompile/gandiva.h")");
} else if (func_name.compare("substr") == 0) {
ss << child_visitor_list[0]->GetResult() << ".substr("
<< "((" << child_visitor_list[1]->GetResult() << " - 1) < 0 ? 0 : ("
Expand Down
37 changes: 37 additions & 0 deletions native-sql-engine/cpp/src/precompile/gandiva.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
*/
#pragma once

#include <arrow/array.h>
#include <arrow/json/api.h>
#include <arrow/json/parser.h>
#include <arrow/util/decimal.h>
#include <math.h>

Expand Down Expand Up @@ -226,3 +229,37 @@ arrow::Decimal128 round(arrow::Decimal128 in, int32_t original_precision,
}
return arrow::Decimal128(out);
}

std::string get_json_object(const std::string& json_str, const std::string& json_path) {
std::unique_ptr<arrow::json::BlockParser> parser;
(arrow::json::BlockParser::Make(arrow::json::ParseOptions::Defaults(), &parser));
(parser->Parse(std::make_shared<arrow::Buffer>(json_str)));
std::shared_ptr<arrow::Array> parsed;
(parser->Finish(&parsed));
auto struct_parsed = std::dynamic_pointer_cast<arrow::StructArray>(parsed);
// json_path example: $.col_14, will extract col_14 here
if (json_path.length() < 3) {
return nullptr;
}
auto col_name = json_path.substr(2);
// illegal json string.
if (struct_parsed == nullptr) {
return nullptr;
}
auto dict_parsed = std::dynamic_pointer_cast<arrow::DictionaryArray>(
struct_parsed->GetFieldByName(col_name));
// no data contained for given field.
if (dict_parsed == nullptr) {
return nullptr;
}

auto dict_array = dict_parsed->dictionary();
// needs to see whether there is a case that has more than one indices.
auto res_index = dict_parsed->GetValueIndex(0);
// TODO(): check null results
auto utf8_array = std::dynamic_pointer_cast<arrow::BinaryArray>(dict_array);

auto res = utf8_array->GetString(res_index);

return res;
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,5 +111,10 @@ TEST(TestArrowCompute, ArithmeticComparisonTest) {
ASSERT_EQ(res, true);
}

TEST(TestArrowCompute, JsonTest) {
std::string data = get_json_object(R"({"hello": "3.5"})", "$.hello");
EXPECT_EQ(data, "3.5");
}

} // namespace codegen
} // namespace sparkcolumnarplugin

0 comments on commit 7ad8194

Please sign in to comment.