Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into basic-sql
Browse files Browse the repository at this point in the history
  • Loading branch information
gibber9809 committed Nov 13, 2024
2 parents d687fe4 + 53c4f52 commit 8f00de2
Show file tree
Hide file tree
Showing 25 changed files with 214 additions and 83 deletions.
8 changes: 5 additions & 3 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
<!--
Set the PR title to a meaningful commit message in imperative form. E.g.:
clp-s: Don't add implicit wildcards ('*') at the beginning and the end of a query (fixes #390).
Set the PR title to a meaningful commit message that:
- follows the Conventional Commits specification (https://www.conventionalcommits.org).
- is in imperative form.
Example:
fix: Don't add implicit wildcards ('*') at the beginning and the end of a query (fixes #390).
-->

# Description
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/clp-core-build-macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ on:
- "deps-tasks.yml"
- "Taskfile.yml"
- "tools/scripts/deps-download/**"
schedule:
# Run daily at 00:15 UTC (the 15 is to avoid periods of high load)
- cron: "15 0 * * *"
workflow_dispatch:

concurrency:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/clp-core-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ on:
- "Taskfile.yml"
- "tools/scripts/deps-download/**"
- "!components/core/tools/scripts/lib_install/macos/**"
schedule:
# Run daily at 00:15 UTC (the 15 is to avoid periods of high load)
- cron: "15 0 * * *"
workflow_dispatch:

env:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/clp-docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ name: "clp-docs"
on:
pull_request:
push:
schedule:
# Run daily at 00:15 UTC (the 15 is to avoid periods of high load)
- cron: "15 0 * * *"
workflow_dispatch:

concurrency:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/clp-execution-image-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ on:
- ".github/actions/clp-execution-image-build/action.yaml"
- ".github/workflows/clp-execution-image-build.yaml"
- "tools/docker-images/**/*"
schedule:
# Run daily at 00:15 UTC (the 15 is to avoid periods of high load)
- cron: "15 0 * * *"
workflow_dispatch:

concurrency:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/clp-lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
pull_request:
push:
schedule:
# Run at midnight UTC every day with 15 minutes delay added to avoid high load periods
# Run daily at 00:15 UTC (the 15 is to avoid periods of high load)
- cron: "15 0 * * *"
workflow_dispatch:

Expand Down
23 changes: 23 additions & 0 deletions .github/workflows/clp-pr-title-checks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: "clp-pr-title-checks"

on:
pull_request_target:
types: ["edited", "opened", "reopened"]
branches: ["main"]

concurrency:
group: "${{github.workflow}}-${{github.ref}}"

# Cancel in-progress jobs for efficiency
cancel-in-progress: true

jobs:
conventional-commits:
permissions:
# For amannn/action-semantic-pull-request
pull-requests: "read"
runs-on: "ubuntu-latest"
steps:
- uses: "amannn/action-semantic-pull-request@v5"
env:
GITHUB_TOKEN: "${{secrets.GITHUB_TOKEN}}"
6 changes: 3 additions & 3 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,10 @@ tasks:
- "client/src/**/*.css"
- "client/src/**/*.jsx"
- "client/src/webpack.config.js"
- "yscope-log-viewer/.babelrc"
- "yscope-log-viewer/customized-packages/**/*"
- "yscope-log-viewer/package.json"
- "yscope-log-viewer/public/**/*"
- "yscope-log-viewer/src/**/*"
- "yscope-log-viewer/tsconfig.json"
- "yscope-log-viewer/webpack.common.js"
- "yscope-log-viewer/webpack.prod.js"
dir: "components/log-viewer-webui"
Expand Down Expand Up @@ -348,7 +348,7 @@ tasks:
- "mkdir -p '{{.OUTPUT_TMP_DIR}}'"
- >-
curl --fail --location --show-error
"{{.URL_PREFIX}}/{{.TAR_NAME}}"
"{{trimSuffix "/" .URL_PREFIX}}/{{.TAR_NAME}}"
--output "{{.TAR_PATH}}"
- "tar xf '{{.TAR_PATH}}' --directory '{{.OUTPUT_TMP_DIR}}'"
- "mv '{{.EXTRACTED_DIR}}' '{{.OUTPUT_DIR}}'"
Expand Down
22 changes: 22 additions & 0 deletions components/core/cmake/Modules/FindMariaDBClient.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,28 @@ include(cmake/Modules/FindLibraryDependencies.cmake)
find_package(PkgConfig)
pkg_check_modules(mariadbclient_PKGCONF QUIET "lib${mariadbclient_LIBNAME}")

if(NOT mariadbclient_PKGCONF_FOUND AND APPLE)
execute_process(
COMMAND brew --prefix mariadb-connector-c
RESULT_VARIABLE mariadbclient_BREW_RESULT
OUTPUT_VARIABLE mariadbclient_MACOS_PREFIX
)
if(NOT mariadbclient_BREW_RESULT EQUAL 0)
message(
FATAL_ERROR
"pkg-config cannot find ${mariadbclient_LIBNAME} and mariadb-connector-c isn't"
" installed via Homebrew"
)
endif()
string(STRIP "${mariadbclient_MACOS_PREFIX}" mariadbclient_MACOS_PREFIX)
list(PREPEND CMAKE_PREFIX_PATH ${mariadbclient_MACOS_PREFIX})
pkg_check_modules(mariadbclient_PKGCONF QUIET "lib${mariadbclient_LIBNAME}")
endif()

if(NOT mariadbclient_PKGCONF_FOUND)
message(FATAL_ERROR "pkg-config cannot find ${mariadbclient_LIBNAME}")
endif()

# Set include directory
find_path(MariaDBClient_INCLUDE_DIR mysql.h
HINTS ${mariadbclient_PKGCONF_INCLUDEDIR}
Expand Down
80 changes: 31 additions & 49 deletions components/core/src/clp/ffi/KeyValuePairLogEvent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,20 +153,6 @@ node_type_matches_value_type(SchemaTree::Node::Type type, Value const& value) ->
KeyValuePairLogEvent::NodeIdValuePairs const& node_id_value_pairs
) -> bool;

/**
* @param node_id_value_pairs
* @param schema_tree
* @return A result containing a bitmap where every bit corresponds to the ID of a node in the
* schema tree, and the set bits correspond to the nodes in the subtree defined by all paths from
* the root node to the nodes in `node_id_value_pairs`; or an error code indicating a failure:
* - std::errc::result_out_of_range if a node ID in `node_id_value_pairs` doesn't exist in the
* schema tree.
*/
[[nodiscard]] auto get_schema_subtree_bitmap(
KeyValuePairLogEvent::NodeIdValuePairs const& node_id_value_pairs,
SchemaTree const& schema_tree
) -> OUTCOME_V2_NAMESPACE::std_result<vector<bool>>;

/**
* Inserts the given key-value pair into the JSON object (map).
* @param node The schema tree node of the key to insert.
Expand Down Expand Up @@ -283,38 +269,6 @@ auto is_leaf_node(
return true;
}

auto get_schema_subtree_bitmap(
KeyValuePairLogEvent::NodeIdValuePairs const& node_id_value_pairs,
SchemaTree const& schema_tree
) -> OUTCOME_V2_NAMESPACE::std_result<vector<bool>> {
auto schema_subtree_bitmap{vector<bool>(schema_tree.get_size(), false)};
for (auto const& [node_id, val] : node_id_value_pairs) {
if (node_id >= schema_subtree_bitmap.size()) {
return std::errc::result_out_of_range;
}
schema_subtree_bitmap[node_id] = true;

// Iteratively mark the parents as true
auto optional_parent_id{schema_tree.get_node(node_id).get_parent_id()};
while (true) {
// Ideally, we'd use this if statement as the loop condition, but clang-tidy will
// complain about an unchecked `optional` access.
if (false == optional_parent_id.has_value()) {
// Reached the root
break;
}
auto const parent_id{optional_parent_id.value()};
if (schema_subtree_bitmap[parent_id]) {
// Parent already set by other child
break;
}
schema_subtree_bitmap[parent_id] = true;
optional_parent_id = schema_tree.get_node(parent_id).get_parent_id();
}
}
return schema_subtree_bitmap;
}

auto insert_kv_pair_into_json_obj(
SchemaTree::Node const& node,
std::optional<Value> const& optional_val,
Expand Down Expand Up @@ -393,6 +347,36 @@ auto KeyValuePairLogEvent::create(
return KeyValuePairLogEvent{std::move(schema_tree), std::move(node_id_value_pairs), utc_offset};
}

auto KeyValuePairLogEvent::get_schema_subtree_bitmap(
) const -> OUTCOME_V2_NAMESPACE::std_result<vector<bool>> {
auto schema_subtree_bitmap{vector<bool>(m_schema_tree->get_size(), false)};
for (auto const& [node_id, val] : m_node_id_value_pairs) {
if (node_id >= schema_subtree_bitmap.size()) {
return std::errc::result_out_of_range;
}
schema_subtree_bitmap[node_id] = true;

// Iteratively mark the parents as true
auto optional_parent_id{m_schema_tree->get_node(node_id).get_parent_id()};
while (true) {
// Ideally, we'd use this if statement as the loop condition, but clang-tidy will
// complain about an unchecked `optional` access.
if (false == optional_parent_id.has_value()) {
// Reached the root
break;
}
auto const parent_id{optional_parent_id.value()};
if (schema_subtree_bitmap[parent_id]) {
// Parent already set by other child
break;
}
schema_subtree_bitmap[parent_id] = true;
optional_parent_id = m_schema_tree->get_node(parent_id).get_parent_id();
}
}
return schema_subtree_bitmap;
}

auto KeyValuePairLogEvent::serialize_to_json(
) const -> OUTCOME_V2_NAMESPACE::std_result<nlohmann::json> {
if (m_node_id_value_pairs.empty()) {
Expand All @@ -409,9 +393,7 @@ auto KeyValuePairLogEvent::serialize_to_json(
// vector grows).
std::stack<DfsIterator> dfs_stack;

auto const schema_subtree_bitmap_ret{
get_schema_subtree_bitmap(m_node_id_value_pairs, *m_schema_tree)
};
auto const schema_subtree_bitmap_ret{get_schema_subtree_bitmap()};
if (schema_subtree_bitmap_ret.has_error()) {
return schema_subtree_bitmap_ret.error();
}
Expand Down
12 changes: 12 additions & 0 deletions components/core/src/clp/ffi/KeyValuePairLogEvent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <optional>
#include <unordered_map>
#include <utility>
#include <vector>

#include <json/single_include/nlohmann/json.hpp>
#include <outcome/single-header/outcome.hpp>
Expand Down Expand Up @@ -60,6 +61,17 @@ class KeyValuePairLogEvent {

[[nodiscard]] auto get_utc_offset() const -> UtcOffset { return m_utc_offset; }

/**
* @return A result containing a bitmap where every bit corresponds to the ID of a node in the
* schema tree, and the set bits correspond to the nodes in the subtree defined by all paths
* from the root node to the nodes in `node_id_value_pairs`; or an error code indicating a
* failure:
* - std::errc::result_out_of_range if a node ID in `node_id_value_pairs` doesn't exist in the
* schema tree.
*/
[[nodiscard]] auto get_schema_subtree_bitmap(
) const -> OUTCOME_V2_NAMESPACE::std_result<std::vector<bool>>;

/**
* Serializes the log event into a `nlohmann::json` object.
* @return A result containing the serialized JSON object or an error code indicating the
Expand Down
9 changes: 4 additions & 5 deletions components/core/src/clp/ffi/ir_stream/Deserializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

#include "../../ReaderInterface.hpp"
#include "../../time_types.hpp"
#include "../KeyValuePairLogEvent.hpp"
#include "../SchemaTree.hpp"
#include "decoding_methods.hpp"
#include "ir_unit_deserialization_methods.hpp"
Expand Down Expand Up @@ -66,8 +65,8 @@ class Deserializer {
/**
* Deserializes the stream from the given reader up to and including the next log event IR unit.
* @param reader
* @return std::errc::no_message_available if no tag bytes can be read to determine the next IR
* unit type.
* @return Forwards `deserialize_tag`s return values if no tag bytes can be read to determine
* the next IR unit type.
* @return std::errc::protocol_not_supported if the IR unit type is not supported.
* @return std::errc::operation_not_permitted if the deserializer already reached the end of
* stream by deserializing an end-of-stream IR unit in the previous calls.
Expand Down Expand Up @@ -172,8 +171,8 @@ auto Deserializer<IrUnitHandler>::deserialize_next_ir_unit(ReaderInterface& read
}

encoded_tag_t tag{};
if (IRErrorCode::IRErrorCode_Success != deserialize_tag(reader, tag)) {
return std::errc::no_message_available;
if (auto const err{deserialize_tag(reader, tag)}; IRErrorCode::IRErrorCode_Success != err) {
return ir_error_code_to_errc(err);
}

auto const optional_ir_unit_type{get_ir_unit_type_from_tag(tag)};
Expand Down
2 changes: 1 addition & 1 deletion components/core/src/clp/ffi/ir_stream/Serializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

namespace clp::ffi::ir_stream {
/**
* A work-in-progress class for serializing log events into the kv-pair IR format.
* Class for serializing log events into the kv-pair IR format.
*
* This class:
* - maintains all necessary internal data structures to track serialization state;
Expand Down
7 changes: 6 additions & 1 deletion components/core/src/clp_s/JsonParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ JsonParser::JsonParser(JsonParserOption const& option)
}

if (false == m_timestamp_key.empty()) {
clp_s::StringUtils::tokenize_column_descriptor(m_timestamp_key, m_timestamp_column);
if (false
== clp_s::StringUtils::tokenize_column_descriptor(m_timestamp_key, m_timestamp_column))
{
SPDLOG_ERROR("Can not parse invalid timestamp key: \"{}\"", m_timestamp_key);
throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__);
}
}

for (auto& file_path : option.file_paths) {
Expand Down
4 changes: 3 additions & 1 deletion components/core/src/clp_s/TimestampDictionaryReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ void TimestampDictionaryReader::read_new_entries() {
TimestampEntry entry;
std::vector<std::string> tokens;
entry.try_read_from_file(m_dictionary_decompressor);
StringUtils::tokenize_column_descriptor(entry.get_key_name(), tokens);
if (false == StringUtils::tokenize_column_descriptor(entry.get_key_name(), tokens)) {
throw OperationFailed(ErrorCodeCorrupt, __FILENAME__, __LINE__);
}
m_entries.emplace_back(std::move(entry));

// TODO: Currently, we only allow a single authoritative timestamp column at ingestion time,
Expand Down
34 changes: 25 additions & 9 deletions components/core/src/clp_s/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,18 +427,34 @@ bool StringUtils::convert_string_to_double(std::string const& raw, double& conve
return true;
}

void StringUtils::tokenize_column_descriptor(
bool StringUtils::tokenize_column_descriptor(
std::string const& descriptor,
std::vector<std::string>& tokens
) {
// TODO: handle escaped . correctly
auto start = 0U;
auto end = descriptor.find('.');
while (end != std::string::npos) {
tokens.push_back(descriptor.substr(start, end - start));
start = end + 1;
end = descriptor.find('.', start);
// TODO: add support for unicode sequences e.g. \u263A
std::string cur_tok;
for (size_t cur = 0; cur < descriptor.size(); ++cur) {
if ('\\' == descriptor[cur]) {
++cur;
if (cur >= descriptor.size()) {
return false;
}
} else if ('.' == descriptor[cur]) {
if (cur_tok.empty()) {
return false;
}
tokens.push_back(cur_tok);
cur_tok.clear();
continue;
}
cur_tok.push_back(descriptor[cur]);
}
tokens.push_back(descriptor.substr(start));

if (cur_tok.empty()) {
return false;
}

tokens.push_back(cur_tok);
return true;
}
} // namespace clp_s
4 changes: 2 additions & 2 deletions components/core/src/clp_s/Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,9 @@ class StringUtils {
* Converts a string column descriptor delimited by '.' into a list of tokens
* @param descriptor
* @param tokens
* @return the list of tokens pushed into the 'tokens' parameter
* @return true if the descriptor was tokenized successfully, false otherwise
*/
static void
[[nodiscard]] static bool
tokenize_column_descriptor(std::string const& descriptor, std::vector<std::string>& tokens);

private:
Expand Down
Loading

0 comments on commit 8f00de2

Please sign in to comment.