Merge branch 'master' into grouping_fix_same_col

feiniaofeiafei · Mar 14, 2024 · 96ec08c · 96ec08c
2 parents b5da6f1 + a18845b
commit 96ec08c
Show file tree

Hide file tree

Showing 201 changed files with 2,136 additions and 979 deletions.
diff --git a/be/src/agent/cgroup_cpu_ctl.cpp b/be/src/agent/cgroup_cpu_ctl.cpp
@@ -132,7 +132,7 @@ Status CgroupV1CpuCtl::init() {
 
     if (_tg_id == -1) {
         // means current cgroup cpu ctl is just used to clear dir,
-        // it does not contains task group.
+        // it does not contains workload group.
         // todo(wb) rethinking whether need to refactor cgroup_cpu_ctl
         _init_succ = true;
         LOG(INFO) << "init cgroup cpu query path succ, path=" << _cgroup_v1_cpu_query_path;

diff --git a/be/src/agent/workload_group_listener.cpp b/be/src/agent/workload_group_listener.cpp
@@ -17,8 +17,8 @@
 
 #include "agent/workload_group_listener.h"
 
-#include "runtime/task_group/task_group.h"
-#include "runtime/task_group/task_group_manager.h"
+#include "runtime/workload_group/workload_group.h"
+#include "runtime/workload_group/workload_group_manager.h"
 #include "util/mem_info.h"
 #include "util/parse_util.h"
 
@@ -32,36 +32,37 @@ void WorkloadGroupListener::handle_topic_info(const std::vector<TopicInfo>& topi
         }
 
         // 1 parse topicinfo to group info
-        taskgroup::TaskGroupInfo task_group_info;
-        Status ret = taskgroup::TaskGroupInfo::parse_topic_info(topic_info.workload_group_info,
-                                                                &task_group_info);
+        WorkloadGroupInfo workload_group_info;
+        Status ret = WorkloadGroupInfo::parse_topic_info(topic_info.workload_group_info,
+                                                         &workload_group_info);
         if (!ret.ok()) {
-            LOG(INFO) << "parse topic info failed, tg_id=" << task_group_info.id
+            LOG(INFO) << "parse topic info failed, tg_id=" << workload_group_info.id
                       << ", reason:" << ret.to_string();
             continue;
         }
-        current_wg_ids.insert(task_group_info.id);
+        current_wg_ids.insert(workload_group_info.id);
 
-        // 2 update task group
-        auto tg = _exec_env->task_group_manager()->get_or_create_task_group(task_group_info);
+        // 2 update workload group
+        auto tg =
+                _exec_env->workload_group_mgr()->get_or_create_workload_group(workload_group_info);
 
         // 3 set cpu soft hard limit switch
-        _exec_env->task_group_manager()->_enable_cpu_hard_limit.store(
-                task_group_info.enable_cpu_hard_limit);
+        _exec_env->workload_group_mgr()->_enable_cpu_hard_limit.store(
+                workload_group_info.enable_cpu_hard_limit);
 
         // 4 create and update task scheduler
-        tg->upsert_task_scheduler(&task_group_info, _exec_env);
+        tg->upsert_task_scheduler(&workload_group_info, _exec_env);
 
-        LOG(INFO) << "update task group finish, tg info=" << tg->debug_string()
+        LOG(INFO) << "update workload group finish, tg info=" << tg->debug_string()
                   << ", enable_cpu_hard_limit="
-                  << (_exec_env->task_group_manager()->enable_cpu_hard_limit() ? "true" : "false")
-                  << ", cgroup cpu_shares=" << task_group_info.cgroup_cpu_shares
-                  << ", cgroup cpu_hard_limit=" << task_group_info.cgroup_cpu_hard_limit
+                  << (_exec_env->workload_group_mgr()->enable_cpu_hard_limit() ? "true" : "false")
+                  << ", cgroup cpu_shares=" << workload_group_info.cgroup_cpu_shares
+                  << ", cgroup cpu_hard_limit=" << workload_group_info.cgroup_cpu_hard_limit
                   << ", enable_cgroup_cpu_soft_limit="
                   << (config::enable_cgroup_cpu_soft_limit ? "true" : "false")
                   << ", cgroup home path=" << config::doris_cgroup_cpu_path;
     }
 
-    _exec_env->task_group_manager()->delete_task_group_by_ids(current_wg_ids);
+    _exec_env->workload_group_mgr()->delete_workload_group_by_ids(current_wg_ids);
 }
 } // namespace doris
diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
@@ -51,7 +51,7 @@
 #include "runtime/memory/mem_tracker.h"
 #include "runtime/memory/mem_tracker_limiter.h"
 #include "runtime/runtime_query_statistics_mgr.h"
-#include "runtime/task_group/task_group_manager.h"
+#include "runtime/workload_group/workload_group_manager.h"
 #include "util/cpu_info.h"
 #include "util/debug_util.h"
 #include "util/disk_info.h"

diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp
@@ -44,6 +44,7 @@
 #include "exec/schema_scanner/schema_user_privileges_scanner.h"
 #include "exec/schema_scanner/schema_variables_scanner.h"
 #include "exec/schema_scanner/schema_views_scanner.h"
+#include "exec/schema_scanner/schema_workload_groups_scanner.h"
 #include "olap/hll.h"
 #include "runtime/define_primitive_type.h"
 #include "util/string_util.h"
@@ -155,6 +156,8 @@ std::unique_ptr<SchemaScanner> SchemaScanner::create(TSchemaTableType::type type
         return SchemaBackendActiveTasksScanner::create_unique();
     case TSchemaTableType::SCH_ACTIVE_QUERIES:
         return SchemaActiveQueriesScanner::create_unique();
+    case TSchemaTableType::SCH_WORKLOAD_GROUPS:
+        return SchemaWorkloadGroupsScanner::create_unique();
     default:
         return SchemaDummyScanner::create_unique();
         break;

diff --git a/be/src/exec/schema_scanner/schema_active_queries_scanner.cpp b/be/src/exec/schema_scanner/schema_active_queries_scanner.cpp
@@ -50,20 +50,17 @@ Status SchemaActiveQueriesScanner::start(RuntimeState* state) {
 Status SchemaActiveQueriesScanner::_get_active_queries_block_from_fe() {
     TNetworkAddress master_addr = ExecEnv::GetInstance()->master_info()->network_address;
 
-    TQueriesMetadataParams tqueries_meta_params;
-    tqueries_meta_params.__set_relay_to_other_fe(true);
-
-    TMetadataTableRequestParams metadata_table_params;
-    metadata_table_params.__set_metadata_type(TMetadataType::QUERIES);
-    metadata_table_params.__set_queries_metadata_params(tqueries_meta_params);
+    TSchemaTableRequestParams schema_table_params;
     for (int i = 0; i < _s_tbls_columns.size(); i++) {
-        metadata_table_params.__isset.columns_name = true;
-        metadata_table_params.columns_name.emplace_back(_s_tbls_columns[i].name);
+        schema_table_params.__isset.columns_name = true;
+        schema_table_params.columns_name.emplace_back(_s_tbls_columns[i].name);
     }
+    schema_table_params.replay_to_other_fe = true;
+    schema_table_params.__isset.replay_to_other_fe = true;
 
     TFetchSchemaTableDataRequest request;
-    request.__set_schema_table_name(TSchemaTableName::SCHEMA_TABLE);
-    request.__set_metada_table_params(metadata_table_params);
+    request.__set_schema_table_name(TSchemaTableName::ACTIVE_QUERIES);
+    request.__set_schema_table_params(schema_table_params);
 
     TFetchSchemaTableDataResult result;
 

diff --git a/be/src/exec/schema_scanner/schema_workload_groups_scanner.cpp b/be/src/exec/schema_scanner/schema_workload_groups_scanner.cpp
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/schema_scanner/schema_workload_groups_scanner.h"
+
+#include "runtime/client_cache.h"
+#include "runtime/exec_env.h"
+#include "runtime/runtime_state.h"
+#include "util/thrift_rpc_helper.h"
+#include "vec/common/string_ref.h"
+#include "vec/core/block.h"
+#include "vec/data_types/data_type_factory.hpp"
+
+namespace doris {
+std::vector<SchemaScanner::ColumnDesc> SchemaWorkloadGroupsScanner::_s_tbls_columns = {
+        {"ID", TYPE_BIGINT, sizeof(int64_t), true},
+        {"NAME", TYPE_VARCHAR, sizeof(StringRef), true},
+        {"CPU_SHARE", TYPE_BIGINT, sizeof(int64_t), true},
+        {"MEMORY_LIMIT", TYPE_VARCHAR, sizeof(StringRef), true},
+        {"ENABLE_MEMORY_OVERCOMMIT", TYPE_VARCHAR, sizeof(StringRef), true},
+        {"MAX_CONCURRENCY", TYPE_BIGINT, sizeof(int64_t), true},
+        {"MAX_QUEUE_SIZE", TYPE_BIGINT, sizeof(int64_t), true},
+        {"QUEUE_TIMEOUT", TYPE_BIGINT, sizeof(int64_t), true},
+        {"CPU_HARD_LIMIT", TYPE_STRING, sizeof(StringRef), true},
+        {"SCAN_THREAD_NUM", TYPE_BIGINT, sizeof(int64_t), true},
+        {"MAX_REMOTE_SCAN_THREAD_NUM", TYPE_BIGINT, sizeof(int64_t), true},
+        {"MIN_REMOTE_SCAN_THREAD_NUM", TYPE_BIGINT, sizeof(int64_t), true}};
+
+SchemaWorkloadGroupsScanner::SchemaWorkloadGroupsScanner()
+        : SchemaScanner(_s_tbls_columns, TSchemaTableType::SCH_WORKLOAD_GROUPS) {}
+
+SchemaWorkloadGroupsScanner::~SchemaWorkloadGroupsScanner() {}
+
+Status SchemaWorkloadGroupsScanner::start(RuntimeState* state) {
+    _block_rows_limit = state->batch_size();
+    _rpc_timeout = state->execution_timeout() * 1000;
+    return Status::OK();
+}
+
+Status SchemaWorkloadGroupsScanner::_get_workload_groups_block_from_fe() {
+    TNetworkAddress master_addr = ExecEnv::GetInstance()->master_info()->network_address;
+
+    TSchemaTableRequestParams schema_table_request_params;
+    for (int i = 0; i < _s_tbls_columns.size(); i++) {
+        schema_table_request_params.__isset.columns_name = true;
+        schema_table_request_params.columns_name.emplace_back(_s_tbls_columns[i].name);
+    }
+    schema_table_request_params.__set_current_user_ident(*_param->common_param->current_user_ident);
+
+    TFetchSchemaTableDataRequest request;
+    request.__set_schema_table_name(TSchemaTableName::WORKLOAD_GROUPS);
+    request.__set_schema_table_params(schema_table_request_params);
+
+    TFetchSchemaTableDataResult result;
+
+    RETURN_IF_ERROR(ThriftRpcHelper::rpc<FrontendServiceClient>(
+            master_addr.hostname, master_addr.port,
+            [&request, &result](FrontendServiceConnection& client) {
+                client->fetchSchemaTableData(result, request);
+            },
+            _rpc_timeout));
+
+    Status status(Status::create(result.status));
+    if (!status.ok()) {
+        LOG(WARNING) << "fetch workload groups from FE failed, errmsg=" << status;
+        return status;
+    }
+    std::vector<TRow> result_data = result.data_batch;
+
+    _workload_groups_block = vectorized::Block::create_unique();
+    for (int i = 0; i < _s_tbls_columns.size(); ++i) {
+        TypeDescriptor descriptor(_s_tbls_columns[i].type);
+        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(descriptor, true);
+        _workload_groups_block->insert(vectorized::ColumnWithTypeAndName(
+                data_type->create_column(), data_type, _s_tbls_columns[i].name));
+    }
+
+    _workload_groups_block->reserve(_block_rows_limit);
+
+    if (result_data.size() > 0) {
+        int col_size = result_data[0].column_value.size();
+        if (col_size != _s_tbls_columns.size()) {
+            return Status::InternalError<false>(
+                    "workload groups schema is not match for FE and BE");
+        }
+    }
+
+    // todo(wb) reuse this callback function
+    auto insert_string_value = [&](int col_index, std::string str_val, vectorized::Block* block) {
+        vectorized::MutableColumnPtr mutable_col_ptr;
+        mutable_col_ptr = std::move(*block->get_by_position(col_index).column).assume_mutable();
+        auto* nullable_column =
+                reinterpret_cast<vectorized::ColumnNullable*>(mutable_col_ptr.get());
+        vectorized::IColumn* col_ptr = &nullable_column->get_nested_column();
+        reinterpret_cast<vectorized::ColumnString*>(col_ptr)->insert_data(str_val.data(),
+                                                                          str_val.size());
+        nullable_column->get_null_map_data().emplace_back(0);
+    };
+    auto insert_int_value = [&](int col_index, int64_t int_val, vectorized::Block* block) {
+        vectorized::MutableColumnPtr mutable_col_ptr;
+        mutable_col_ptr = std::move(*block->get_by_position(col_index).column).assume_mutable();
+        auto* nullable_column =
+                reinterpret_cast<vectorized::ColumnNullable*>(mutable_col_ptr.get());
+        vectorized::IColumn* col_ptr = &nullable_column->get_nested_column();
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(
+                int_val);
+        nullable_column->get_null_map_data().emplace_back(0);
+    };
+
+    for (int i = 0; i < result_data.size(); i++) {
+        TRow row = result_data[i];
+
+        for (int j = 0; j < _s_tbls_columns.size(); j++) {
+            if (_s_tbls_columns[j].type == TYPE_BIGINT) {
+                insert_int_value(j, row.column_value[j].longVal, _workload_groups_block.get());
+            } else {
+                insert_string_value(j, row.column_value[j].stringVal, _workload_groups_block.get());
+            }
+        }
+    }
+    return Status::OK();
+}
+
+Status SchemaWorkloadGroupsScanner::get_next_block(vectorized::Block* block, bool* eos) {
+    if (!_is_init) {
+        return Status::InternalError("Used before initialized.");
+    }
+
+    if (nullptr == block || nullptr == eos) {
+        return Status::InternalError("input pointer is nullptr.");
+    }
+
+    if (_workload_groups_block == nullptr) {
+        RETURN_IF_ERROR(_get_workload_groups_block_from_fe());
+        _total_rows = _workload_groups_block->rows();
+    }
+
+    if (_row_idx == _total_rows) {
+        *eos = true;
+        return Status::OK();
+    }
+
+    int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx);
+    vectorized::MutableBlock mblock = vectorized::MutableBlock::build_mutable_block(block);
+    mblock.add_rows(_workload_groups_block.get(), _row_idx, current_batch_rows);
+    _row_idx += current_batch_rows;
+
+    *eos = _row_idx == _total_rows;
+    return Status::OK();
+}
+
+} // namespace doris
diff --git a/be/src/exec/schema_scanner/schema_workload_groups_scanner.h b/be/src/exec/schema_scanner/schema_workload_groups_scanner.h
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "common/status.h"
+#include "exec/schema_scanner.h"
+
+namespace doris {
+class RuntimeState;
+namespace vectorized {
+class Block;
+} // namespace vectorized
+
+class SchemaWorkloadGroupsScanner : public SchemaScanner {
+    ENABLE_FACTORY_CREATOR(SchemaWorkloadGroupsScanner);
+
+public:
+    SchemaWorkloadGroupsScanner();
+    ~SchemaWorkloadGroupsScanner() override;
+
+    Status start(RuntimeState* state) override;
+    Status get_next_block(vectorized::Block* block, bool* eos) override;
+
+    static std::vector<SchemaScanner::ColumnDesc> _s_tbls_columns;
+
+private:
+    Status _get_workload_groups_block_from_fe();
+
+    int _block_rows_limit = 4096;
+    int _row_idx = 0;
+    int _total_rows = 0;
+    std::unique_ptr<vectorized::Block> _workload_groups_block = nullptr;
+    int _rpc_timeout = 3000;
+};
+}; // namespace doris
diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h
@@ -185,10 +185,7 @@ class BloomFilterFuncBase : public RuntimeFilterFuncBase {
         return _bloom_filter->contain_null();
     }
 
-    void set_contain_null() {
-        DCHECK(_bloom_filter);
-        _bloom_filter->set_contain_null();
-    }
+    void set_contain_null() { _bloom_filter->set_contain_null(); }
 
     size_t get_size() const { return _bloom_filter ? _bloom_filter->size() : 0; }