From 89c977cd53fdfe64ce3394e7489c10d540fdd561 Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Fri, 1 Nov 2024 12:06:08 +0800 Subject: [PATCH 01/17] save --- src/frontend/src/handler/util.rs | 50 ++++++++++---- .../optimizer/plan_node/generic/log_scan.rs | 29 +------- src/frontend/src/session/cursor_manager.rs | 67 ++++++++++++------- 3 files changed, 83 insertions(+), 63 deletions(-) diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index 9ff2cc92b5525..2253405a091b5 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -234,20 +234,44 @@ pub fn gen_query_from_table_name(from_name: ObjectName) -> Query { } } +// Plan like 'select * , pk in table order by pk' pub fn gen_query_from_table_name_order_by(from_name: ObjectName, pk_names: Vec) -> Query { - let mut query = gen_query_from_table_name(from_name); - query.order_by = pk_names - .into_iter() - .map(|pk| { - let expr = Expr::Identifier(Ident::with_quote_unchecked('"', pk)); - OrderByExpr { - expr, - asc: None, - nulls_first: None, - } - }) - .collect(); - query + let table_factor = TableFactor::Table { + name: from_name, + alias: None, + as_of: None, + }; + let from = vec![TableWithJoins { + relation: table_factor, + joins: vec![], + }]; + let mut projection = vec![SelectItem::Wildcard(None)]; + projection.extend(pk_names.iter().map(|name| SelectItem::UnnamedExpr(Expr::Identifier(Ident::new_unchecked(name.clone()))))); + let select = Select { + from, + projection, + ..Default::default() + }; + let body = SetExpr::Select(Box::new(select)); + let order_by = pk_names + .into_iter() + .map(|pk| { + let expr = Expr::Identifier(Ident::with_quote_unchecked('"', pk)); + OrderByExpr { + expr, + asc: None, + nulls_first: None, + } + }) + .collect(); + Query { + with: None, + body, + order_by, + limit: None, + offset: None, + fetch: None, + } } pub fn convert_unix_millis_to_logstore_u64(unix_millis: u64) -> u64 { diff --git a/src/frontend/src/optimizer/plan_node/generic/log_scan.rs b/src/frontend/src/optimizer/plan_node/generic/log_scan.rs index a57ba79242d1a..d38b5b3e72d59 100644 --- a/src/frontend/src/optimizer/plan_node/generic/log_scan.rs +++ b/src/frontend/src/optimizer/plan_node/generic/log_scan.rs @@ -17,7 +17,6 @@ use std::rc::Rc; use educe::Educe; use fixedbitset::FixedBitSet; -use itertools::Itertools; use pretty_xmlish::Pretty; use risingwave_common::catalog::{Field, Schema, TableDesc}; use risingwave_common::types::DataType; @@ -34,9 +33,7 @@ const OP_TYPE: DataType = DataType::Varchar; #[educe(PartialEq, Eq, Hash)] pub struct LogScan { pub table_name: String, - /// Include `output_col_idx_with_out_hidden` and `op_column` - pub output_col_idx_with_out_hidden: Vec, - /// Include `output_col_idx_with_out_hidden` and `op_column` and hidden pk + /// Include `output_col_idx` and `op_column` pub output_col_idx: Vec, /// Descriptor of the table pub table_desc: Rc, @@ -85,16 +82,6 @@ impl LogScan { out_column_names } - pub(crate) fn column_names_without_hidden(&self) -> Vec { - let mut out_column_names: Vec<_> = self - .output_col_idx_with_out_hidden - .iter() - .map(|&i| self.table_desc.columns[i].name.clone()) - .collect(); - out_column_names.push(OP_NAME.to_string()); - out_column_names - } - pub fn distribution_key(&self) -> Option> { let tb_idx_to_op_idx = self .output_col_idx @@ -112,7 +99,6 @@ impl LogScan { /// Create a logical scan node for log table scan pub(crate) fn new( table_name: String, - output_col_idx_with_out_hidden: Vec, output_col_idx: Vec, table_desc: Rc, ctx: OptimizerContextRef, @@ -122,7 +108,6 @@ impl LogScan { ) -> Self { Self { table_name, - output_col_idx_with_out_hidden, output_col_idx, table_desc, chunk_size: None, @@ -163,17 +148,7 @@ impl LogScan { pub(crate) fn out_fields(&self) -> FixedBitSet { let mut out_fields_vec = self - .output_col_idx - .iter() - .enumerate() - .filter_map(|(index, idx)| { - if self.output_col_idx_with_out_hidden.contains(idx) { - Some(index) - } else { - None - } - }) - .collect_vec(); + .output_col_idx.clone(); // add op column out_fields_vec.push(self.output_col_idx.len()); FixedBitSet::from_iter(out_fields_vec) diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index f8464120a531d..02bf9918716cc 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -23,6 +23,7 @@ use std::time::Instant; use anyhow::anyhow; use bytes::Bytes; use futures::StreamExt; +use itertools::Itertools; use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::StatementType; use pgwire::types::{Format, Row}; @@ -30,6 +31,7 @@ use risingwave_common::catalog::Field; use risingwave_common::error::BoxedError; use risingwave_common::session_config::QueryMode; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::sort_util::ColumnOrder; use risingwave_hummock_sdk::HummockVersionId; use risingwave_sqlparser::ast::{Ident, ObjectName, Statement}; @@ -37,7 +39,7 @@ use risingwave_sqlparser::ast::{Ident, ObjectName, Statement}; use super::SessionImpl; use crate::catalog::subscription_catalog::SubscriptionCatalog; use crate::catalog::TableId; -use crate::error::{ErrorCode, Result, RwError}; +use crate::error::{ErrorCode, Result}; use crate::handler::declare_cursor::create_chunk_stream_for_cursor; use crate::handler::query::{ gen_batch_plan_by_statement, gen_batch_plan_fragmenter, BatchQueryPlanResult, @@ -267,6 +269,7 @@ pub struct SubscriptionCursor { fields: Vec, cursor_metrics: Arc, last_fetch: Instant, + pk_column_names: HashMap, } impl SubscriptionCursor { @@ -278,7 +281,7 @@ impl SubscriptionCursor { handler_args: &HandlerArgs, cursor_metrics: Arc, ) -> Result { - let (state, fields) = if let Some(start_timestamp) = start_timestamp { + let (state, fields,pk_column_names) = if let Some(start_timestamp) = start_timestamp { let table_catalog = handler_args.session.get_table_by_id(&dependent_table_id)?; let fields = table_catalog .columns @@ -286,6 +289,7 @@ impl SubscriptionCursor { .filter(|c| !c.is_hidden) .map(|c| Field::with_name(c.data_type().clone(), c.name())) .collect(); + let pk_column_names = get_pk_names(table_catalog.pk(), &table_catalog); let fields = Self::build_desc(fields, true); ( State::InitLogStoreQuery { @@ -293,13 +297,14 @@ impl SubscriptionCursor { expected_timestamp: None, }, fields, + pk_column_names, ) } else { // The query stream needs to initiated on cursor creation to make sure // future fetch on the cursor starts from the snapshot when the cursor is declared. // // TODO: is this the right behavior? Should we delay the query stream initiation till the first fetch? - let (chunk_stream, fields, init_query_timer) = + let (chunk_stream, fields, init_query_timer,pk_column_names) = Self::initiate_query(None, &dependent_table_id, handler_args.clone()).await?; let pinned_epoch = handler_args .session @@ -324,6 +329,7 @@ impl SubscriptionCursor { init_query_timer, }, fields, + pk_column_names, ) }; @@ -338,6 +344,7 @@ impl SubscriptionCursor { fields, cursor_metrics, last_fetch: Instant::now(), + pk_column_names }) } @@ -363,7 +370,7 @@ impl SubscriptionCursor { &self.subscription, ) { Ok((Some(rw_timestamp), expected_timestamp)) => { - let (mut chunk_stream, fields, init_query_timer) = + let (mut chunk_stream, fields, init_query_timer, pk_column_names) = Self::initiate_query( Some(rw_timestamp), &self.dependent_table_id, @@ -392,8 +399,9 @@ impl SubscriptionCursor { expected_timestamp, init_query_timer, }; - if self.fields.ne(&fields) { + if self.fields.ne(&fields) || self.pk_column_names.ne(&pk_column_names) { self.fields = fields; + self.pk_column_names = pk_column_names; return Ok(None); } } @@ -546,6 +554,7 @@ impl SubscriptionCursor { } } self.last_fetch = Instant::now(); + Self::process_output_desc_row(descs, row, pk_column_names) let desc = self.fields.iter().map(to_pg_field).collect(); Ok((ans, desc)) @@ -653,21 +662,14 @@ impl SubscriptionCursor { let pk_names = pks .iter() .map(|f| { - Ok::( table_catalog .columns .get(f.column_index) - .ok_or_else(|| { - anyhow!( - "columns not find in table schema, index is {:?}", - f.column_index - ) - })? + .unwrap() .name() - .to_string(), - ) + .to_string() }) - .collect::>>()?; + .collect_vec(); let query_stmt = Statement::Query(Box::new(gen_query_from_table_name_order_by( subscription_from_table_name, pk_names, @@ -680,8 +682,12 @@ impl SubscriptionCursor { rw_timestamp: Option, dependent_table_id: &TableId, handler_args: HandlerArgs, - ) -> Result<(CursorDataChunkStream, Vec, Instant)> { + ) -> Result<(CursorDataChunkStream, Vec, Instant, HashMap)> { let init_query_timer = Instant::now(); + let session = handler_args.clone().session; + let table_catalog = session.get_table_by_id(dependent_table_id)?; + let pks = table_catalog.pk(); + let pk_column_names = get_pk_names(pks, &table_catalog); let plan_result = Self::init_batch_plan_for_subscription_cursor( rw_timestamp, dependent_table_id, @@ -694,6 +700,7 @@ impl SubscriptionCursor { chunk_stream, Self::build_desc(fields, rw_timestamp.is_none()), init_query_timer, + pk_column_names, )) } @@ -740,6 +747,12 @@ impl SubscriptionCursor { Ok(row) } + pub fn process_output_desc_row(descs: Vec, row: Vec,pk_column_names: &HashSet) -> (Vec,Vec) { + descs.into_iter().enumerate().filter_map(|(index, field)| { + + }) + } + pub fn build_desc(mut descs: Vec, from_snapshot: bool) -> Vec { if from_snapshot { descs.push(Field::with_name(DataType::Varchar, "op")); @@ -770,14 +783,8 @@ impl SubscriptionCursor { } }) .collect::>(); - let output_col_idx_with_out_hidden = output_col_idx - .iter() - .filter(|index| !table_catalog.columns[**index].is_hidden) - .cloned() - .collect::>(); let core = generic::LogScan::new( table_catalog.name.clone(), - output_col_idx_with_out_hidden, output_col_idx, Rc::new(table_catalog.table_desc()), context, @@ -789,7 +796,7 @@ impl SubscriptionCursor { let batch_log_seq_scan = BatchLogSeqScan::new(core); let out_fields = batch_log_seq_scan.core().out_fields(); - let out_names = batch_log_seq_scan.core().column_names_without_hidden(); + let out_names = batch_log_seq_scan.core().column_names(); // order by pk, so don't need to sort let order = Order::new(pks.to_vec()); @@ -1043,3 +1050,17 @@ impl CursorManager { } } } + +fn get_pk_names(pks: &[ColumnOrder], table_catalog: &TableCatalog) -> HashMap { + pks + .iter() + .map(|f| { + let column = + table_catalog + .columns + .get(f.column_index) + .unwrap(); + (column.name().to_string(),column.is_hidden) + }) + .collect() +} \ No newline at end of file From c071111fb20e0da10b08576e5c38d65123cd693b Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Fri, 1 Nov 2024 14:05:31 +0800 Subject: [PATCH 02/17] fix all --- src/frontend/src/session/cursor_manager.rs | 18 ++++++++++++++---- src/utils/pgwire/src/types.rs | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index 02bf9918716cc..4203cb02f561d 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -747,10 +747,20 @@ impl SubscriptionCursor { Ok(row) } - pub fn process_output_desc_row(descs: Vec, row: Vec,pk_column_names: &HashSet) -> (Vec,Vec) { - descs.into_iter().enumerate().filter_map(|(index, field)| { - - }) + pub fn process_output_desc_row(descs: Vec, mut row: Vec,pk_column_names: &HashMap) -> (Vec,Vec) { + let iter= descs.iter().map(|field| { + if let Some(is_hidden) = pk_column_names.get(&field.name) && *is_hidden{ + (false,field) + } else { + (true,field) + } + }); + let pk_fields = iter.filter(|(is_hidden,_)| *is_hidden).map(|(_,field)| field).cloned().collect(); + let mut pk_keep = iter.map(|(is_hidden,_)| is_hidden); + row.iter_mut().for_each(|row| { + row.0.retain(|x| pk_keep.next().unwrap()); + }); + (pk_fields,row) } pub fn build_desc(mut descs: Vec, from_snapshot: bool) -> Vec { diff --git a/src/utils/pgwire/src/types.rs b/src/utils/pgwire/src/types.rs index c76aa20aac4cd..95638d4f6c021 100644 --- a/src/utils/pgwire/src/types.rs +++ b/src/utils/pgwire/src/types.rs @@ -23,7 +23,7 @@ use crate::error::{PsqlError, PsqlResult}; /// A row of data returned from the database by a query. #[derive(Debug, Clone)] // NOTE: Since we only support simple query protocol, the values are represented as strings. -pub struct Row(Vec>); +pub struct Row(pub Vec>); impl Row { /// Create a row from values. From 0ab16687347119e1c41b6896d272e6187838669d Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Fri, 1 Nov 2024 15:47:44 +0800 Subject: [PATCH 03/17] save --- src/frontend/src/session/cursor_manager.rs | 36 +++++++++++++++------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index 4203cb02f561d..df83909e58f61 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -270,6 +270,7 @@ pub struct SubscriptionCursor { cursor_metrics: Arc, last_fetch: Instant, pk_column_names: HashMap, + seek_pk_row: Option>>, } impl SubscriptionCursor { @@ -344,7 +345,8 @@ impl SubscriptionCursor { fields, cursor_metrics, last_fetch: Instant::now(), - pk_column_names + pk_column_names, + seek_pk_row: None, }) } @@ -554,10 +556,13 @@ impl SubscriptionCursor { } } self.last_fetch = Instant::now(); - Self::process_output_desc_row(descs, row, pk_column_names) - let desc = self.fields.iter().map(to_pg_field).collect(); + let (fields,rows,seek_pk_row) = Self::process_output_desc_row(&self.fields, ans, &self.pk_column_names); + if let Some(seek_pk_row) = seek_pk_row{ + self.seek_pk_row = Some(seek_pk_row); + } + let desc = fields.iter().map(to_pg_field).collect(); - Ok((ans, desc)) + Ok((rows, desc)) } fn get_next_rw_timestamp( @@ -747,20 +752,29 @@ impl SubscriptionCursor { Ok(row) } - pub fn process_output_desc_row(descs: Vec, mut row: Vec,pk_column_names: &HashMap) -> (Vec,Vec) { + pub fn process_output_desc_row(descs: &Vec, mut rows: Vec,pk_column_names: &HashMap) -> (Vec,Vec,Option>>) { + let last_row = rows.last_mut().map(|row|{ + row.0.iter().zip_eq_fast(descs.iter()).filter_map(|(data,field)|{ + if pk_column_names.contains_key(&field.name){ + Some(data.clone()) + } else { + None + } + }).collect_vec() + }); let iter= descs.iter().map(|field| { if let Some(is_hidden) = pk_column_names.get(&field.name) && *is_hidden{ (false,field) } else { (true,field) } + }).collect_vec(); + let pk_fields = iter.iter().filter(|(is_hidden,_)| *is_hidden).map(|(_,field)| (*field).clone()).collect(); + let mut pk_keep = iter.iter().map(|(is_hidden,_)| *is_hidden); + rows.iter_mut().for_each(|row| { + row.0.retain(|_| pk_keep.next().unwrap()); }); - let pk_fields = iter.filter(|(is_hidden,_)| *is_hidden).map(|(_,field)| field).cloned().collect(); - let mut pk_keep = iter.map(|(is_hidden,_)| is_hidden); - row.iter_mut().for_each(|row| { - row.0.retain(|x| pk_keep.next().unwrap()); - }); - (pk_fields,row) + (pk_fields,rows,last_row) } pub fn build_desc(mut descs: Vec, from_snapshot: bool) -> Vec { From aa47c6e454347dc7399f61ee0fdf8792a5f2446e Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Fri, 1 Nov 2024 17:15:24 +0800 Subject: [PATCH 04/17] save save support all data support --- proto/batch_plan.proto | 1 + src/batch/src/executor/log_row_seq_scan.rs | 71 +++-- src/batch/src/executor/row_seq_scan.rs | 114 +------- src/batch/src/executor/utils.rs | 125 +++++++++ src/frontend/src/expr/function_call.rs | 2 +- src/frontend/src/handler/util.rs | 91 +++++-- .../optimizer/plan_node/batch_log_seq_scan.rs | 37 ++- .../src/optimizer/plan_node/batch_seq_scan.rs | 70 +---- .../optimizer/plan_node/batch_sys_seq_scan.rs | 66 +---- .../optimizer/plan_node/generic/log_scan.rs | 25 +- src/frontend/src/optimizer/plan_node/utils.rs | 58 +++++ src/frontend/src/session/cursor_manager.rs | 246 ++++++++++++++---- .../src/table/batch_table/storage_table.rs | 7 +- .../executor/backfill/snapshot_backfill.rs | 3 + 14 files changed, 582 insertions(+), 334 deletions(-) diff --git a/proto/batch_plan.proto b/proto/batch_plan.proto index f881f6546fae5..dd531ed5773a7 100644 --- a/proto/batch_plan.proto +++ b/proto/batch_plan.proto @@ -123,6 +123,7 @@ message LogRowSeqScanNode { common.BatchQueryEpoch old_epoch = 4; common.BatchQueryEpoch new_epoch = 5; bool ordered = 6; + repeated ScanRange scan_ranges = 7; } message InsertNode { diff --git a/src/batch/src/executor/log_row_seq_scan.rs b/src/batch/src/executor/log_row_seq_scan.rs index 6f40f42fbba8c..cde3ae5ee8636 100644 --- a/src/batch/src/executor/log_row_seq_scan.rs +++ b/src/batch/src/executor/log_row_seq_scan.rs @@ -18,13 +18,15 @@ use std::sync::Arc; use futures::prelude::stream::StreamExt; use futures_async_stream::try_stream; use futures_util::pin_mut; +use iceberg::scan; use prometheus::Histogram; use risingwave_common::array::{DataChunk, Op}; use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{ColumnId, Field, Schema}; use risingwave_common::hash::VnodeCountCompat; use risingwave_common::row::{Row, RowExt}; -use risingwave_common::types::ScalarImpl; +use risingwave_common::types::{DataType, ScalarImpl}; +use risingwave_common::util::scan_range; use risingwave_hummock_sdk::{HummockReadEpoch, HummockVersionId}; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::common::{batch_query_epoch, BatchQueryEpoch}; @@ -33,7 +35,9 @@ use risingwave_storage::table::batch_table::storage_table::StorageTable; use risingwave_storage::table::collect_data_chunk; use risingwave_storage::{dispatch_state_store, StateStore}; -use super::{BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, Executor, ExecutorBuilder}; +use super::{ + BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, Executor, ExecutorBuilder, ScanRange, +}; use crate::error::{BatchError, Result}; use crate::monitor::BatchMetrics; use crate::task::BatchTaskContext; @@ -53,6 +57,7 @@ pub struct LogRowSeqScanExecutor { new_epoch: u64, version_id: HummockVersionId, ordered: bool, + scan_ranges: Vec, } impl LogRowSeqScanExecutor { @@ -65,6 +70,7 @@ impl LogRowSeqScanExecutor { identity: String, metrics: Option, ordered: bool, + scan_ranges: Vec, ) -> Self { let mut schema = table.schema().clone(); schema.fields.push(Field::with_name( @@ -81,6 +87,7 @@ impl LogRowSeqScanExecutor { new_epoch, version_id, ordered, + scan_ranges, } } } @@ -139,6 +146,28 @@ impl BoxedExecutorBuilder for LogStoreRowSeqScanExecutorBuilder { let old_epoch = old_epoch.epoch; let new_epoch = new_epoch.epoch; + let scan_ranges = { + let scan_ranges = &log_store_seq_scan_node.scan_ranges; + if scan_ranges.is_empty() { + vec![ScanRange::full()] + } else { + scan_ranges + .iter() + .map(|scan_range| { + let pk_types = table_desc.pk.iter().map(|order| { + DataType::from( + table_desc.columns[order.column_index as usize] + .column_type + .as_ref() + .unwrap(), + ) + }); + ScanRange::new(scan_range.clone(), pk_types) + }) + .try_collect()? + } + }; + dispatch_state_store!(source.context().state_store(), state_store, { let table = StorageTable::new_partial(state_store, column_ids, vnodes, table_desc); Ok(Box::new(LogRowSeqScanExecutor::new( @@ -150,6 +179,7 @@ impl BoxedExecutorBuilder for LogStoreRowSeqScanExecutorBuilder { source.plan_node().get_identity().clone(), metrics, log_store_seq_scan_node.ordered, + scan_ranges, ))) }) } @@ -180,6 +210,7 @@ impl LogRowSeqScanExecutor { version_id, schema, ordered, + scan_ranges, .. } = *self; let table = std::sync::Arc::new(table); @@ -191,20 +222,23 @@ impl LogRowSeqScanExecutor { // Range Scan // WARN: DO NOT use `select` to execute range scans concurrently // it can consume too much memory if there're too many ranges. - let stream = Self::execute_range( - table.clone(), - old_epoch, - new_epoch, - version_id, - chunk_size, - histogram, - Arc::new(schema.clone()), - ordered, - ); - #[for_await] - for chunk in stream { - let chunk = chunk?; - yield chunk; + for range in scan_ranges { + let stream = Self::execute_range( + table.clone(), + old_epoch, + new_epoch, + version_id, + chunk_size, + histogram, + Arc::new(schema.clone()), + ordered, + range, + ); + #[for_await] + for chunk in stream { + let chunk = chunk?; + yield chunk; + } } } @@ -218,13 +252,18 @@ impl LogRowSeqScanExecutor { histogram: Option>, schema: Arc, ordered: bool, + scan_range: ScanRange, ) { + let pk_prefix = scan_range.pk_prefix.clone(); + let range_bounds = scan_range.convert_to_range_bounds(table.clone()); // Range Scan. let iter = table .batch_iter_log_with_pk_bounds( old_epoch, HummockReadEpoch::BatchQueryCommitted(new_epoch, version_id), ordered, + range_bounds, + pk_prefix, ) .await? .flat_map(|r| { diff --git a/src/batch/src/executor/row_seq_scan.rs b/src/batch/src/executor/row_seq_scan.rs index b65f4bf8939b4..303ddf4270aa6 100644 --- a/src/batch/src/executor/row_seq_scan.rs +++ b/src/batch/src/executor/row_seq_scan.rs @@ -35,6 +35,7 @@ use risingwave_storage::store::PrefetchOptions; use risingwave_storage::table::batch_table::storage_table::StorageTable; use risingwave_storage::{dispatch_state_store, StateStore}; +use super::ScanRange; use crate::error::{BatchError, Result}; use crate::executor::{ BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, Executor, ExecutorBuilder, @@ -59,15 +60,6 @@ pub struct RowSeqScanExecutor { as_of: Option, } -/// Range for batch scan. -pub struct ScanRange { - /// The prefix of the primary key. - pub pk_prefix: OwnedRow, - - /// The range bounds of the next column. - pub next_col_bounds: (Bound, Bound), -} - #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct AsOf { pub timestamp: i64, @@ -98,64 +90,6 @@ impl From<&AsOf> for PbAsOf { } } -impl ScanRange { - /// Create a scan range from the prost representation. - pub fn new( - scan_range: PbScanRange, - mut pk_types: impl Iterator, - ) -> Result { - let pk_prefix = OwnedRow::new( - scan_range - .eq_conds - .iter() - .map(|v| { - let ty = pk_types.next().unwrap(); - deserialize_datum(v.as_slice(), &ty) - }) - .try_collect()?, - ); - if scan_range.lower_bound.is_none() && scan_range.upper_bound.is_none() { - return Ok(Self { - pk_prefix, - ..Self::full() - }); - } - - let bound_ty = pk_types.next().unwrap(); - let build_bound = |bound: &scan_range::Bound| -> Bound { - let datum = deserialize_datum(bound.value.as_slice(), &bound_ty).unwrap(); - if bound.inclusive { - Bound::Included(datum) - } else { - Bound::Excluded(datum) - } - }; - - let next_col_bounds: (Bound, Bound) = match ( - scan_range.lower_bound.as_ref(), - scan_range.upper_bound.as_ref(), - ) { - (Some(lb), Some(ub)) => (build_bound(lb), build_bound(ub)), - (None, Some(ub)) => (Bound::Unbounded, build_bound(ub)), - (Some(lb), None) => (build_bound(lb), Bound::Unbounded), - (None, None) => unreachable!(), - }; - - Ok(Self { - pk_prefix, - next_col_bounds, - }) - } - - /// Create a scan range for full table scan. - pub fn full() -> Self { - Self { - pk_prefix: OwnedRow::default(), - next_col_bounds: (Bound::Unbounded, Bound::Unbounded), - } - } -} - impl RowSeqScanExecutor { pub fn new( table: StorageTable, @@ -419,55 +353,15 @@ impl RowSeqScanExecutor { limit: Option, histogram: Option>, ) { - let ScanRange { - pk_prefix, - next_col_bounds, - } = scan_range; - - let order_type = table.pk_serializer().get_order_types()[pk_prefix.len()]; - let (start_bound, end_bound) = if order_type.is_ascending() { - (next_col_bounds.0, next_col_bounds.1) - } else { - (next_col_bounds.1, next_col_bounds.0) - }; - - let start_bound_is_bounded = !matches!(start_bound, Bound::Unbounded); - let end_bound_is_bounded = !matches!(end_bound, Bound::Unbounded); - + let pk_prefix = scan_range.pk_prefix.clone(); + let range_bounds = scan_range.convert_to_range_bounds(table.clone()); // Range Scan. assert!(pk_prefix.len() < table.pk_indices().len()); let iter = table .batch_chunk_iter_with_pk_bounds( epoch.into(), &pk_prefix, - ( - match start_bound { - Bound::Unbounded => { - if end_bound_is_bounded && order_type.nulls_are_first() { - // `NULL`s are at the start bound side, we should exclude them to meet SQL semantics. - Bound::Excluded(OwnedRow::new(vec![None])) - } else { - // Both start and end are unbounded, so we need to select all rows. - Bound::Unbounded - } - } - Bound::Included(x) => Bound::Included(OwnedRow::new(vec![x])), - Bound::Excluded(x) => Bound::Excluded(OwnedRow::new(vec![x])), - }, - match end_bound { - Bound::Unbounded => { - if start_bound_is_bounded && order_type.nulls_are_last() { - // `NULL`s are at the end bound side, we should exclude them to meet SQL semantics. - Bound::Excluded(OwnedRow::new(vec![None])) - } else { - // Both start and end are unbounded, so we need to select all rows. - Bound::Unbounded - } - } - Bound::Included(x) => Bound::Included(OwnedRow::new(vec![x])), - Bound::Excluded(x) => Bound::Excluded(OwnedRow::new(vec![x])), - }, - ), + range_bounds, ordered, chunk_size, PrefetchOptions::new(limit.is_none(), true), diff --git a/src/batch/src/executor/utils.rs b/src/batch/src/executor/utils.rs index 4f724ec5416c8..acbd45159a32f 100644 --- a/src/batch/src/executor/utils.rs +++ b/src/batch/src/executor/utils.rs @@ -12,11 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. +use core::ops::{Bound, RangeBounds}; +use std::sync::Arc; + use futures::stream::BoxStream; use futures::StreamExt; use futures_async_stream::try_stream; use risingwave_common::array::DataChunk; use risingwave_common::catalog::Schema; +use risingwave_common::row::{OwnedRow, Row}; +use risingwave_common::types::{DataType, Datum}; +use risingwave_common::util::value_encoding::deserialize_datum; +use risingwave_pb::batch_plan::{scan_range, PbScanRange}; +use risingwave_storage::table::batch_table::storage_table::StorageTable; +use risingwave_storage::StateStore; use crate::error::{BatchError, Result}; use crate::executor::{BoxedDataChunkStream, Executor}; @@ -124,3 +133,119 @@ impl Executor for WrapStreamExecutor { self.stream } } + +/// Range for batch scan. +pub struct ScanRange { + /// The prefix of the primary key. + pub pk_prefix: OwnedRow, + + /// The range bounds of the next column. + pub next_col_bounds: (Bound, Bound), +} + +impl ScanRange { + /// Create a scan range from the prost representation. + pub fn new( + scan_range: PbScanRange, + mut pk_types: impl Iterator, + ) -> Result { + let pk_prefix = OwnedRow::new( + scan_range + .eq_conds + .iter() + .map(|v| { + let ty = pk_types.next().unwrap(); + deserialize_datum(v.as_slice(), &ty) + }) + .try_collect()?, + ); + if scan_range.lower_bound.is_none() && scan_range.upper_bound.is_none() { + return Ok(Self { + pk_prefix, + ..Self::full() + }); + } + + let bound_ty = pk_types.next().unwrap(); + let build_bound = |bound: &scan_range::Bound| -> Bound { + let datum = deserialize_datum(bound.value.as_slice(), &bound_ty).unwrap(); + if bound.inclusive { + Bound::Included(datum) + } else { + Bound::Excluded(datum) + } + }; + + let next_col_bounds: (Bound, Bound) = match ( + scan_range.lower_bound.as_ref(), + scan_range.upper_bound.as_ref(), + ) { + (Some(lb), Some(ub)) => (build_bound(lb), build_bound(ub)), + (None, Some(ub)) => (Bound::Unbounded, build_bound(ub)), + (Some(lb), None) => (build_bound(lb), Bound::Unbounded), + (None, None) => unreachable!(), + }; + + Ok(Self { + pk_prefix, + next_col_bounds, + }) + } + + /// Create a scan range for full table scan. + pub fn full() -> Self { + Self { + pk_prefix: OwnedRow::default(), + next_col_bounds: (Bound::Unbounded, Bound::Unbounded), + } + } + + pub fn convert_to_range_bounds( + self, + table: Arc>, + ) -> impl RangeBounds { + let ScanRange { + pk_prefix, + next_col_bounds, + } = self; + + let order_type = table.pk_serializer().get_order_types()[pk_prefix.len()]; + let (start_bound, end_bound) = if order_type.is_ascending() { + (next_col_bounds.0, next_col_bounds.1) + } else { + (next_col_bounds.1, next_col_bounds.0) + }; + + let start_bound_is_bounded = !matches!(start_bound, Bound::Unbounded); + let end_bound_is_bounded = !matches!(end_bound, Bound::Unbounded); + + ( + match start_bound { + Bound::Unbounded => { + if end_bound_is_bounded && order_type.nulls_are_first() { + // `NULL`s are at the start bound side, we should exclude them to meet SQL semantics. + Bound::Excluded(OwnedRow::new(vec![None])) + } else { + // Both start and end are unbounded, so we need to select all rows. + Bound::Unbounded + } + } + Bound::Included(x) => Bound::Included(OwnedRow::new(vec![x])), + Bound::Excluded(x) => Bound::Excluded(OwnedRow::new(vec![x])), + }, + match end_bound { + Bound::Unbounded => { + if start_bound_is_bounded && order_type.nulls_are_last() { + // `NULL`s are at the end bound side, we should exclude them to meet SQL semantics. + Bound::Excluded(OwnedRow::new(vec![None])) + } else { + // Both start and end are unbounded, so we need to select all rows. + Bound::Unbounded + } + } + Bound::Included(x) => Bound::Included(OwnedRow::new(vec![x])), + Bound::Excluded(x) => Bound::Excluded(OwnedRow::new(vec![x])), + }, + ) + } +} diff --git a/src/frontend/src/expr/function_call.rs b/src/frontend/src/expr/function_call.rs index af1f84b321eb5..4d9e730049748 100644 --- a/src/frontend/src/expr/function_call.rs +++ b/src/frontend/src/expr/function_call.rs @@ -25,7 +25,7 @@ use crate::expr::{ExprDisplay, ExprType, ExprVisitor, ImpureAnalyzer}; #[derive(Clone, Eq, PartialEq, Hash)] pub struct FunctionCall { - pub(super) func_type: ExprType, + pub func_type: ExprType, pub(super) return_type: DataType, pub(super) inputs: Vec, } diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index 2253405a091b5..4fd38efb7b997 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -35,8 +35,8 @@ use risingwave_common::types::{ use risingwave_common::util::epoch::Epoch; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_sqlparser::ast::{ - CompatibleSourceSchema, ConnectorSchema, Expr, Ident, ObjectName, OrderByExpr, Query, Select, - SelectItem, SetExpr, TableFactor, TableWithJoins, + BinaryOperator, CompatibleSourceSchema, ConnectorSchema, Expr, Ident, ObjectName, OrderByExpr, + Query, Select, SelectItem, SetExpr, TableFactor, TableWithJoins, Value, }; use thiserror_ext::AsReport; @@ -235,7 +235,25 @@ pub fn gen_query_from_table_name(from_name: ObjectName) -> Query { } // Plan like 'select * , pk in table order by pk' -pub fn gen_query_from_table_name_order_by(from_name: ObjectName, pk_names: Vec) -> Query { +pub fn gen_query_from_table_name_order_by( + from_name: ObjectName, + pks: Vec<(String, bool)>, + seek_pk_rows: Option>>, +) -> Query { + let select_pks = pks + .iter() + .filter_map( + |(name, is_hidden)| { + if *is_hidden { + Some(name.clone()) + } else { + None + } + }, + ) + .collect_vec(); + let order_pks = pks.iter().map(|(name, _)| name).collect_vec(); + let table_factor = TableFactor::Table { name: from_name, alias: None, @@ -246,24 +264,67 @@ pub fn gen_query_from_table_name_order_by(from_name: ObjectName, pk_names: Vec, core: generic::LogScan, + scan_ranges: Vec, } impl BatchLogSeqScan { - fn new_inner(core: generic::LogScan, dist: Distribution) -> Self { - let order = Order::new(core.table_desc.pk.clone()); + fn new_inner(core: generic::LogScan, dist: Distribution, scan_ranges: Vec) -> Self { + let order = if scan_ranges.len() > 1 { + Order::any() + } else { + Order::new(core.table_desc.pk.clone()) + }; let base = PlanBase::new_batch(core.ctx(), core.schema(), dist, order); - Self { base, core } + Self { + base, + core, + scan_ranges, + } } - pub fn new(core: generic::LogScan) -> Self { + pub fn new(core: generic::LogScan, scan_ranges: Vec) -> Self { // Use `Single` by default, will be updated later with `clone_with_dist`. - Self::new_inner(core, Distribution::Single) + Self::new_inner(core, Distribution::Single, scan_ranges) } fn clone_with_dist(&self) -> Self { @@ -62,6 +72,7 @@ impl BatchLogSeqScan { } } }, + self.scan_ranges.clone(), ) } @@ -91,6 +102,17 @@ impl Distill for BatchLogSeqScan { vec.push(("old_epoch", Pretty::from(self.core.old_epoch.to_string()))); vec.push(("new_epoch", Pretty::from(self.core.new_epoch.to_string()))); vec.push(("version_id", Pretty::from(self.core.version_id.to_string()))); + if !self.scan_ranges.is_empty() { + let order_names = match verbose { + true => self.core.order_names_with_table_prefix(), + false => self.core.order_names(), + }; + let range_strs = scan_ranges_as_strs(order_names, &self.scan_ranges); + vec.push(( + "scan_ranges", + Pretty::Array(range_strs.into_iter().map(Pretty::from).collect()), + )); + } childless_record("BatchLogSeqScan", vec) } @@ -131,6 +153,7 @@ impl TryToBatchPb for BatchLogSeqScan { }), // It's currently true. ordered: !self.order().is_any(), + scan_ranges: self.scan_ranges.iter().map(|r| r.to_protobuf()).collect(), })) } } @@ -144,7 +167,7 @@ impl ToLocalBatch for BatchLogSeqScan { } else { Distribution::SomeShard }; - Ok(Self::new_inner(self.core.clone(), dist).into()) + Ok(Self::new_inner(self.core.clone(), dist, self.scan_ranges.clone()).into()) } } diff --git a/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs b/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs index 576793f4dd450..addd830873221 100644 --- a/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs +++ b/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs @@ -12,18 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::ops::Bound; - -use itertools::Itertools; use pretty_xmlish::{Pretty, XmlNode}; -use risingwave_common::types::ScalarImpl; use risingwave_common::util::scan_range::{is_full_range, ScanRange}; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::RowSeqScanNode; use risingwave_sqlparser::ast::AsOf; use super::batch::prelude::*; -use super::utils::{childless_record, to_pb_time_travel_as_of, Distill}; +use super::utils::{childless_record, scan_ranges_as_strs, to_pb_time_travel_as_of, Distill}; use super::{generic, ExprRewritable, PlanBase, PlanRef, ToDistributedBatch}; use crate::catalog::ColumnId; use crate::error::Result; @@ -135,37 +131,6 @@ impl BatchSeqScan { &self.scan_ranges } - fn scan_ranges_as_strs(&self, verbose: bool) -> Vec { - let order_names = match verbose { - true => self.core.order_names_with_table_prefix(), - false => self.core.order_names(), - }; - let mut range_strs = vec![]; - - let explain_max_range = 20; - for scan_range in self.scan_ranges.iter().take(explain_max_range) { - #[expect(clippy::disallowed_methods)] - let mut range_str = scan_range - .eq_conds - .iter() - .zip(order_names.iter()) - .map(|(v, name)| match v { - Some(v) => format!("{} = {:?}", name, v), - None => format!("{} IS NULL", name), - }) - .collect_vec(); - if !is_full_range(&scan_range.range) { - let i = scan_range.eq_conds.len(); - range_str.push(range_to_string(&order_names[i], &scan_range.range)) - } - range_strs.push(range_str.join(" AND ")); - } - if self.scan_ranges.len() > explain_max_range { - range_strs.push("...".to_string()); - } - range_strs - } - pub fn limit(&self) -> &Option { &self.limit } @@ -173,33 +138,6 @@ impl BatchSeqScan { impl_plan_tree_node_for_leaf! { BatchSeqScan } -fn lb_to_string(name: &str, lb: &Bound) -> String { - let (op, v) = match lb { - Bound::Included(v) => (">=", v), - Bound::Excluded(v) => (">", v), - Bound::Unbounded => unreachable!(), - }; - format!("{} {} {:?}", name, op, v) -} -fn ub_to_string(name: &str, ub: &Bound) -> String { - let (op, v) = match ub { - Bound::Included(v) => ("<=", v), - Bound::Excluded(v) => ("<", v), - Bound::Unbounded => unreachable!(), - }; - format!("{} {} {:?}", name, op, v) -} -fn range_to_string(name: &str, range: &(Bound, Bound)) -> String { - match (&range.0, &range.1) { - (Bound::Unbounded, Bound::Unbounded) => unreachable!(), - (Bound::Unbounded, ub) => ub_to_string(name, ub), - (lb, Bound::Unbounded) => lb_to_string(name, lb), - (lb, ub) => { - format!("{} AND {}", lb_to_string(name, lb), ub_to_string(name, ub)) - } - } -} - impl Distill for BatchSeqScan { fn distill<'a>(&self) -> XmlNode<'a> { let verbose = self.base.ctx().is_explain_verbose(); @@ -208,7 +146,11 @@ impl Distill for BatchSeqScan { vec.push(("columns", self.core.columns_pretty(verbose))); if !self.scan_ranges.is_empty() { - let range_strs = self.scan_ranges_as_strs(verbose); + let order_names = match verbose { + true => self.core.order_names_with_table_prefix(), + false => self.core.order_names(), + }; + let range_strs = scan_ranges_as_strs(order_names, &self.scan_ranges); vec.push(( "scan_ranges", Pretty::Array(range_strs.into_iter().map(Pretty::from).collect()), diff --git a/src/frontend/src/optimizer/plan_node/batch_sys_seq_scan.rs b/src/frontend/src/optimizer/plan_node/batch_sys_seq_scan.rs index 6068c1131626b..007e983e67f3d 100644 --- a/src/frontend/src/optimizer/plan_node/batch_sys_seq_scan.rs +++ b/src/frontend/src/optimizer/plan_node/batch_sys_seq_scan.rs @@ -23,7 +23,7 @@ use risingwave_pb::batch_plan::SysRowSeqScanNode; use risingwave_pb::plan_common::PbColumnDesc; use super::batch::prelude::*; -use super::utils::{childless_record, Distill}; +use super::utils::{childless_record, range_to_string, scan_ranges_as_strs, Distill}; use super::{generic, ExprRewritable, PlanBase, PlanRef, ToBatchPb, ToDistributedBatch}; use crate::error::Result; use crate::expr::{ExprRewriter, ExprVisitor}; @@ -91,68 +91,10 @@ impl BatchSysSeqScan { pub fn scan_ranges(&self) -> &[ScanRange] { &self.scan_ranges } - - fn scan_ranges_as_strs(&self, verbose: bool) -> Vec { - let order_names = match verbose { - true => self.core.order_names_with_table_prefix(), - false => self.core.order_names(), - }; - let mut range_strs = vec![]; - - let explain_max_range = 20; - for scan_range in self.scan_ranges.iter().take(explain_max_range) { - #[expect(clippy::disallowed_methods)] - let mut range_str = scan_range - .eq_conds - .iter() - .zip(order_names.iter()) - .map(|(v, name)| match v { - Some(v) => format!("{} = {:?}", name, v), - None => format!("{} IS NULL", name), - }) - .collect_vec(); - if !is_full_range(&scan_range.range) { - let i = scan_range.eq_conds.len(); - range_str.push(range_to_string(&order_names[i], &scan_range.range)) - } - range_strs.push(range_str.join(" AND ")); - } - if self.scan_ranges.len() > explain_max_range { - range_strs.push("...".to_string()); - } - range_strs - } } impl_plan_tree_node_for_leaf! { BatchSysSeqScan } -fn lb_to_string(name: &str, lb: &Bound) -> String { - let (op, v) = match lb { - Bound::Included(v) => (">=", v), - Bound::Excluded(v) => (">", v), - Bound::Unbounded => unreachable!(), - }; - format!("{} {} {:?}", name, op, v) -} -fn ub_to_string(name: &str, ub: &Bound) -> String { - let (op, v) = match ub { - Bound::Included(v) => ("<=", v), - Bound::Excluded(v) => ("<", v), - Bound::Unbounded => unreachable!(), - }; - format!("{} {} {:?}", name, op, v) -} -fn range_to_string(name: &str, range: &(Bound, Bound)) -> String { - match (&range.0, &range.1) { - (Bound::Unbounded, Bound::Unbounded) => unreachable!(), - (Bound::Unbounded, ub) => ub_to_string(name, ub), - (lb, Bound::Unbounded) => lb_to_string(name, lb), - (lb, ub) => { - format!("{} AND {}", lb_to_string(name, lb), ub_to_string(name, ub)) - } - } -} - impl Distill for BatchSysSeqScan { fn distill<'a>(&self) -> XmlNode<'a> { let verbose = self.base.ctx().is_explain_verbose(); @@ -161,7 +103,11 @@ impl Distill for BatchSysSeqScan { vec.push(("columns", self.core.columns_pretty(verbose))); if !self.scan_ranges.is_empty() { - let range_strs = self.scan_ranges_as_strs(verbose); + let order_names = match verbose { + true => self.core.order_names_with_table_prefix(), + false => self.core.order_names(), + }; + let range_strs = scan_ranges_as_strs(order_names, &self.scan_ranges); vec.push(( "scan_ranges", Pretty::Array(range_strs.into_iter().map(Pretty::from).collect()), diff --git a/src/frontend/src/optimizer/plan_node/generic/log_scan.rs b/src/frontend/src/optimizer/plan_node/generic/log_scan.rs index d38b5b3e72d59..5e384dd3dc842 100644 --- a/src/frontend/src/optimizer/plan_node/generic/log_scan.rs +++ b/src/frontend/src/optimizer/plan_node/generic/log_scan.rs @@ -18,7 +18,7 @@ use std::rc::Rc; use educe::Educe; use fixedbitset::FixedBitSet; use pretty_xmlish::Pretty; -use risingwave_common::catalog::{Field, Schema, TableDesc}; +use risingwave_common::catalog::{ColumnDesc, Field, Schema, TableDesc}; use risingwave_common::types::DataType; use risingwave_common::util::sort_util::ColumnOrder; use risingwave_hummock_sdk::HummockVersionId; @@ -147,8 +147,7 @@ impl LogScan { } pub(crate) fn out_fields(&self) -> FixedBitSet { - let mut out_fields_vec = self - .output_col_idx.clone(); + let mut out_fields_vec = self.output_col_idx.clone(); // add op column out_fields_vec.push(self.output_col_idx.len()); FixedBitSet::from_iter(out_fields_vec) @@ -157,4 +156,24 @@ impl LogScan { pub(crate) fn ctx(&self) -> OptimizerContextRef { self.ctx.clone() } + + pub fn get_table_columns(&self) -> &[ColumnDesc] { + &self.table_desc.columns + } + + pub(crate) fn order_names(&self) -> Vec { + self.table_desc + .order_column_indices() + .iter() + .map(|&i| self.get_table_columns()[i].name.clone()) + .collect() + } + + pub(crate) fn order_names_with_table_prefix(&self) -> Vec { + self.table_desc + .order_column_indices() + .iter() + .map(|&i| format!("{}.{}", self.table_name, self.get_table_columns()[i].name)) + .collect() + } } diff --git a/src/frontend/src/optimizer/plan_node/utils.rs b/src/frontend/src/optimizer/plan_node/utils.rs index 2433a659bad0a..22f28cb0d9733 100644 --- a/src/frontend/src/optimizer/plan_node/utils.rs +++ b/src/frontend/src/optimizer/plan_node/utils.rs @@ -14,6 +14,7 @@ use std::collections::HashMap; use std::default::Default; +use std::ops::Bound; use std::vec; use anyhow::anyhow; @@ -28,6 +29,8 @@ use risingwave_common::constants::log_store::v2::{ KV_LOG_STORE_PREDEFINED_COLUMNS, PK_ORDERING, VNODE_COLUMN_INDEX, }; use risingwave_common::hash::VnodeCount; +use risingwave_common::types::ScalarImpl; +use risingwave_common::util::scan_range::{is_full_range, ScanRange}; use risingwave_common::util::sort_util::{ColumnOrder, OrderType}; use crate::catalog::table_catalog::TableType; @@ -462,3 +465,58 @@ pub fn to_pb_time_travel_as_of(a: &Option) -> Result> { as_of_type: Some(as_of_type), })) } + +pub fn scan_ranges_as_strs(order_names: Vec, scan_ranges: &Vec) -> Vec { + let mut range_strs = vec![]; + + let explain_max_range = 20; + for scan_range in scan_ranges.iter().take(explain_max_range) { + #[expect(clippy::disallowed_methods)] + let mut range_str = scan_range + .eq_conds + .iter() + .zip(order_names.iter()) + .map(|(v, name)| match v { + Some(v) => format!("{} = {:?}", name, v), + None => format!("{} IS NULL", name), + }) + .collect_vec(); + if !is_full_range(&scan_range.range) { + let i = scan_range.eq_conds.len(); + range_str.push(range_to_string(&order_names[i], &scan_range.range)) + } + range_strs.push(range_str.join(" AND ")); + } + if scan_ranges.len() > explain_max_range { + range_strs.push("...".to_string()); + } + range_strs +} + +pub fn range_to_string(name: &str, range: &(Bound, Bound)) -> String { + match (&range.0, &range.1) { + (Bound::Unbounded, Bound::Unbounded) => unreachable!(), + (Bound::Unbounded, ub) => ub_to_string(name, ub), + (lb, Bound::Unbounded) => lb_to_string(name, lb), + (lb, ub) => { + format!("{} AND {}", lb_to_string(name, lb), ub_to_string(name, ub)) + } + } +} + +fn lb_to_string(name: &str, lb: &Bound) -> String { + let (op, v) = match lb { + Bound::Included(v) => (">=", v), + Bound::Excluded(v) => (">", v), + Bound::Unbounded => unreachable!(), + }; + format!("{} {} {:?}", name, op, v) +} +fn ub_to_string(name: &str, ub: &Bound) -> String { + let (op, v) = match ub { + Bound::Included(v) => ("<=", v), + Bound::Excluded(v) => ("<", v), + Bound::Unbounded => unreachable!(), + }; + format!("{} {} {:?}", name, op, v) +} diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index df83909e58f61..638acfeb872fd 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -27,19 +27,23 @@ use itertools::Itertools; use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::StatementType; use pgwire::types::{Format, Row}; +use prost::Message; use risingwave_common::catalog::Field; use risingwave_common::error::BoxedError; use risingwave_common::session_config::QueryMode; -use risingwave_common::types::DataType; +use risingwave_common::types::{DataType, ScalarImpl}; use risingwave_common::util::iter_util::ZipEqFast; +use risingwave_common::util::scan_range; use risingwave_common::util::sort_util::ColumnOrder; use risingwave_hummock_sdk::HummockVersionId; +use risingwave_pb::expr::expr_node::Type; use risingwave_sqlparser::ast::{Ident, ObjectName, Statement}; use super::SessionImpl; use crate::catalog::subscription_catalog::SubscriptionCatalog; use crate::catalog::TableId; use crate::error::{ErrorCode, Result}; +use crate::expr::{ExprImpl, ExprType, FunctionCall, InputRef, Literal}; use crate::handler::declare_cursor::create_chunk_stream_for_cursor; use crate::handler::query::{ gen_batch_plan_by_statement, gen_batch_plan_fragmenter, BatchQueryPlanResult, @@ -50,10 +54,11 @@ use crate::handler::util::{ }; use crate::handler::HandlerArgs; use crate::monitor::{CursorMetrics, PeriodicCursorMetrics}; -use crate::optimizer::plan_node::{generic, BatchLogSeqScan}; +use crate::optimizer::plan_node::{generic, BatchFilter, BatchLogSeqScan}; use crate::optimizer::property::{Order, RequiredDist}; use crate::optimizer::PlanRoot; use crate::scheduler::{DistributedQueryStream, LocalQueryStream}; +use crate::utils::Condition; use crate::{ Binder, OptimizerContext, OptimizerContextRef, PgResponseStream, PlanRef, TableCatalog, }; @@ -269,7 +274,7 @@ pub struct SubscriptionCursor { fields: Vec, cursor_metrics: Arc, last_fetch: Instant, - pk_column_names: HashMap, + pk_column_names: HashMap, seek_pk_row: Option>>, } @@ -282,7 +287,7 @@ impl SubscriptionCursor { handler_args: &HandlerArgs, cursor_metrics: Arc, ) -> Result { - let (state, fields,pk_column_names) = if let Some(start_timestamp) = start_timestamp { + let (state, fields, pk_column_names) = if let Some(start_timestamp) = start_timestamp { let table_catalog = handler_args.session.get_table_by_id(&dependent_table_id)?; let fields = table_catalog .columns @@ -305,8 +310,8 @@ impl SubscriptionCursor { // future fetch on the cursor starts from the snapshot when the cursor is declared. // // TODO: is this the right behavior? Should we delay the query stream initiation till the first fetch? - let (chunk_stream, fields, init_query_timer,pk_column_names) = - Self::initiate_query(None, &dependent_table_id, handler_args.clone()).await?; + let (chunk_stream, fields, init_query_timer, pk_column_names) = + Self::initiate_query(None, &dependent_table_id, handler_args.clone(), None).await?; let pinned_epoch = handler_args .session .env @@ -377,6 +382,7 @@ impl SubscriptionCursor { Some(rw_timestamp), &self.dependent_table_id, handler_args.clone(), + None, ) .await?; Self::init_row_stream( @@ -386,6 +392,26 @@ impl SubscriptionCursor { &fields, handler_args.session.clone(), ); + { + let (mut chunk_stream, fields, init_query_timer, pk_column_names) = + Self::initiate_query( + Some(rw_timestamp), + &self.dependent_table_id, + handler_args.clone(), + self.seek_pk_row.clone(), + ) + .await?; + Self::init_row_stream( + &mut chunk_stream, + formats, + &from_snapshot, + &fields, + handler_args.session.clone(), + ); + while let Some(a) = chunk_stream.next().await? { + println!("testtest {:?}", a); + } + } self.cursor_need_drop_time = Instant::now() + Duration::from_secs(self.subscription.retention_seconds); @@ -401,7 +427,8 @@ impl SubscriptionCursor { expected_timestamp, init_query_timer, }; - if self.fields.ne(&fields) || self.pk_column_names.ne(&pk_column_names) { + if self.fields.ne(&fields) || self.pk_column_names.ne(&pk_column_names) + { self.fields = fields; self.pk_column_names = pk_column_names; return Ok(None); @@ -556,8 +583,9 @@ impl SubscriptionCursor { } } self.last_fetch = Instant::now(); - let (fields,rows,seek_pk_row) = Self::process_output_desc_row(&self.fields, ans, &self.pk_column_names); - if let Some(seek_pk_row) = seek_pk_row{ + let (fields, rows, seek_pk_row) = + Self::process_output_desc_row(&self.fields, ans, &self.pk_column_names); + if let Some(seek_pk_row) = seek_pk_row { self.seek_pk_row = Some(seek_pk_row); } let desc = fields.iter().map(to_pg_field).collect(); @@ -603,6 +631,7 @@ impl SubscriptionCursor { Some(0), &self.dependent_table_id, handler_args, + self.seek_pk_row.clone(), ), State::Fetch { from_snapshot, @@ -614,12 +643,14 @@ impl SubscriptionCursor { None, &self.dependent_table_id, handler_args, + self.seek_pk_row.clone(), ) } else { Self::init_batch_plan_for_subscription_cursor( Some(rw_timestamp), &self.dependent_table_id, handler_args, + self.seek_pk_row.clone(), ) } } @@ -634,10 +665,10 @@ impl SubscriptionCursor { rw_timestamp: Option, dependent_table_id: &TableId, handler_args: HandlerArgs, + seek_pk_row: Option>>, ) -> Result { let session = handler_args.clone().session; let table_catalog = session.get_table_by_id(dependent_table_id)?; - let pks = table_catalog.pk(); let context = OptimizerContext::from_handler_args(handler_args.clone()); if let Some(rw_timestamp) = rw_timestamp { let version_id = { @@ -659,25 +690,23 @@ impl SubscriptionCursor { rw_timestamp, rw_timestamp, version_id, - pks, + seek_pk_row, ) } else { - let subscription_from_table_name = - ObjectName(vec![Ident::from(table_catalog.name.as_ref())]); - let pk_names = pks + let pks = table_catalog.pk(); + let pks = pks .iter() .map(|f| { - table_catalog - .columns - .get(f.column_index) - .unwrap() - .name() - .to_string() + let pk = table_catalog.columns.get(f.column_index).unwrap(); + (pk.name().to_string(), pk.is_hidden) }) .collect_vec(); + let subscription_from_table_name = + ObjectName(vec![Ident::from(table_catalog.name.as_ref())]); let query_stmt = Statement::Query(Box::new(gen_query_from_table_name_order_by( subscription_from_table_name, - pk_names, + pks, + seek_pk_row, ))); gen_batch_plan_by_statement(&session, context.into(), query_stmt) } @@ -687,7 +716,13 @@ impl SubscriptionCursor { rw_timestamp: Option, dependent_table_id: &TableId, handler_args: HandlerArgs, - ) -> Result<(CursorDataChunkStream, Vec, Instant, HashMap)> { + seek_pk_row: Option>>, + ) -> Result<( + CursorDataChunkStream, + Vec, + Instant, + HashMap, + )> { let init_query_timer = Instant::now(); let session = handler_args.clone().session; let table_catalog = session.get_table_by_id(dependent_table_id)?; @@ -697,6 +732,7 @@ impl SubscriptionCursor { rw_timestamp, dependent_table_id, handler_args.clone(), + seek_pk_row, )?; let plan_fragmenter_result = gen_batch_plan_fragmenter(&handler_args.session, plan_result)?; let (chunk_stream, fields) = @@ -752,29 +788,46 @@ impl SubscriptionCursor { Ok(row) } - pub fn process_output_desc_row(descs: &Vec, mut rows: Vec,pk_column_names: &HashMap) -> (Vec,Vec,Option>>) { - let last_row = rows.last_mut().map(|row|{ - row.0.iter().zip_eq_fast(descs.iter()).filter_map(|(data,field)|{ - if pk_column_names.contains_key(&field.name){ - Some(data.clone()) + pub fn process_output_desc_row( + descs: &Vec, + mut rows: Vec, + pk_column_names: &HashMap, + ) -> (Vec, Vec, Option>>) { + let last_row = rows.last_mut().map(|row| { + row.0 + .iter() + .zip_eq_fast(descs.iter()) + .filter_map(|(data, field)| { + if pk_column_names.contains_key(&field.name) { + Some(data.clone()) + } else { + None + } + }) + .collect_vec() + }); + let iter = descs + .iter() + .map(|field| { + if let Some(is_hidden) = pk_column_names.get(&field.name) + && *is_hidden + { + (false, field) } else { - None + (true, field) } - }).collect_vec() - }); - let iter= descs.iter().map(|field| { - if let Some(is_hidden) = pk_column_names.get(&field.name) && *is_hidden{ - (false,field) - } else { - (true,field) - } - }).collect_vec(); - let pk_fields = iter.iter().filter(|(is_hidden,_)| *is_hidden).map(|(_,field)| (*field).clone()).collect(); - let mut pk_keep = iter.iter().map(|(is_hidden,_)| *is_hidden); + }) + .collect_vec(); + let pk_fields = iter + .iter() + .filter(|(is_hidden, _)| *is_hidden) + .map(|(_, field)| (*field).clone()) + .collect(); + let mut pk_keep = iter.iter().map(|(is_hidden, _)| *is_hidden); rows.iter_mut().for_each(|row| { row.0.retain(|_| pk_keep.next().unwrap()); }); - (pk_fields,rows,last_row) + (pk_fields, rows, last_row) } pub fn build_desc(mut descs: Vec, from_snapshot: bool) -> Vec { @@ -792,7 +845,7 @@ impl SubscriptionCursor { old_epoch: u64, new_epoch: u64, version_id: HummockVersionId, - pks: &[ColumnOrder], + seek_pk_row: Option>>, ) -> Result { // pk + all column without hidden let output_col_idx = table_catalog @@ -807,6 +860,7 @@ impl SubscriptionCursor { } }) .collect::>(); + let max_split_range_gap = context.session_ctx().config().max_split_range_gap() as u64; let core = generic::LogScan::new( table_catalog.name.clone(), output_col_idx, @@ -816,18 +870,105 @@ impl SubscriptionCursor { new_epoch, version_id, ); + let pks = table_catalog.pk(); + let pks = pks + .iter() + .map(|f| { + let pk = table_catalog.columns.get(f.column_index).unwrap(); + (pk.name().to_string(), pk.data_type(), f.column_index) + }) + .collect_vec(); + // let selection = if let Some(seek_pk_rows) = seek_pk_rows { + // let mut pk_rows = vec![]; + // let mut values = vec![]; + // for ((name, _), seek_pk) in pks.iter().zip_eq_fast(seek_pk_rows.iter()) { + // if let Some(seek_pk) = seek_pk { + // pk_rows.push( + // Expr::Identifier(Ident::with_quote_unchecked( + // '"', + // name.clone(), + // )) + // ); + // values.push(String::from_utf8(seek_pk.clone().into()).unwrap()); + // } + // } + // if pk_rows.is_empty() { + // None + // } else if pk_rows.len() == 1 { + // let left = pk_rows.pop().unwrap(); + // let right = Expr::Value(Value::SingleQuotedString(values.pop().unwrap())); + // Some(Expr::BinaryOp { + // left: Box::new(left), + // op: BinaryOperator::Eq, + // right: Box::new(right), + // }) + // }else{ + // let left = Expr::Row(pk_rows); + // let values = values.join(","); + // let right = Expr::Value(Value::SingleQuotedString(format!("({})", values))); + // Some(Expr::BinaryOp { + // left: Box::new(left), + // op: BinaryOperator::Gt, + // right: Box::new(right), + // }) + // } + // } else{ + // None + // }; + let (scan, predicate) = match seek_pk_row { + Some(seek_pk_row) => { + let seek_pk_row = seek_pk_row + .into_iter() + .zip_eq_fast(pks.into_iter()) + .filter_map(|(pk, (name, data_type, column_index))| { + if let Some(seek_pk) = pk { + let column = InputRef { + index: column_index, + data_type: data_type.clone(), + }; + let value_string = String::from_utf8(seek_pk.clone().into()).unwrap(); + let value_data = + ScalarImpl::from_text(&value_string, data_type).unwrap(); + let value = Literal::new(Some(value_data), data_type.clone()); + Some( + FunctionCall::new( + ExprType::LessThan, + vec![column.into(), value.into()], + ) + .unwrap() + .into(), + ) + } else { + None + } + }) + .collect_vec(); + let (scan, predicate) = Condition { + conjunctions: seek_pk_row, + } + .split_to_scan_ranges(table_catalog.table_desc().into(), max_split_range_gap) + .unwrap(); + (scan, Some(predicate)) + } + None => (vec![], None), + }; - let batch_log_seq_scan = BatchLogSeqScan::new(core); - + let batch_log_seq_scan = BatchLogSeqScan::new(core, scan); let out_fields = batch_log_seq_scan.core().out_fields(); let out_names = batch_log_seq_scan.core().column_names(); + let plan = if let Some(predicate) = predicate { + BatchFilter::new(generic::Filter::new(predicate, batch_log_seq_scan.into())).into() + } else { + batch_log_seq_scan.into() + }; + // order by pk, so don't need to sort - let order = Order::new(pks.to_vec()); + let order = Order::new(table_catalog.pk().to_vec()); // Here we just need a plan_root to call the method, only out_fields and out_names will be used let plan_root = PlanRoot::new_with_batch_plan( - PlanRef::from(batch_log_seq_scan.clone()), + plan, RequiredDist::single(), order, out_fields, @@ -1075,16 +1216,11 @@ impl CursorManager { } } -fn get_pk_names(pks: &[ColumnOrder], table_catalog: &TableCatalog) -> HashMap { - pks - .iter() +fn get_pk_names(pks: &[ColumnOrder], table_catalog: &TableCatalog) -> HashMap { + pks.iter() .map(|f| { - let column = - table_catalog - .columns - .get(f.column_index) - .unwrap(); - (column.name().to_string(),column.is_hidden) + let column = table_catalog.columns.get(f.column_index).unwrap(); + (column.name().to_string(), column.is_hidden) }) .collect() -} \ No newline at end of file +} diff --git a/src/storage/src/table/batch_table/storage_table.rs b/src/storage/src/table/batch_table/storage_table.rs index a665a37be68e0..e00a76286932f 100644 --- a/src/storage/src/table/batch_table/storage_table.rs +++ b/src/storage/src/table/batch_table/storage_table.rs @@ -756,10 +756,11 @@ impl StorageTableInner { start_epoch: u64, end_epoch: HummockReadEpoch, ordered: bool, + range_bounds: impl RangeBounds, + pk_prefix: impl Row, ) -> StorageResult> + Send + 'static> { - let pk_prefix = OwnedRow::default(); - let start_key = self.serialize_pk_bound(&pk_prefix, Unbounded, true); - let end_key = self.serialize_pk_bound(&pk_prefix, Unbounded, false); + let start_key = self.serialize_pk_bound(&pk_prefix, range_bounds.start_bound(), true); + let end_key = self.serialize_pk_bound(&pk_prefix, range_bounds.end_bound(), false); assert!(pk_prefix.len() <= self.pk_indices.len()); let table_key_ranges = { diff --git a/src/stream/src/executor/backfill/snapshot_backfill.rs b/src/stream/src/executor/backfill/snapshot_backfill.rs index 7234a56deca26..3e5a644fadb09 100644 --- a/src/stream/src/executor/backfill/snapshot_backfill.rs +++ b/src/stream/src/executor/backfill/snapshot_backfill.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use core::ops::Bound; use std::cmp::min; use std::collections::VecDeque; use std::future::{pending, Future}; @@ -199,6 +200,8 @@ impl SnapshotBackfillExecutor { barrier_epoch.prev, HummockReadEpoch::Committed(barrier_epoch.prev), false, + (Bound::::Unbounded, Bound::::Unbounded), + OwnedRow::default(), )) .await?; let data_types = self.upstream_table.schema().data_types(); From dae04dbcf873db49038356ae9d9b3b59853487f3 Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Wed, 6 Nov 2024 19:45:21 +0800 Subject: [PATCH 05/17] save --- src/expr/impl/src/scalar/cast.rs | 8 ++ src/frontend/src/handler/mod.rs | 2 +- src/frontend/src/handler/util.rs | 2 +- src/frontend/src/session/cursor_manager.rs | 122 +++++++++------------ 4 files changed, 60 insertions(+), 74 deletions(-) diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs index 41c51d95445ec..1b065efed985c 100644 --- a/src/expr/impl/src/scalar/cast.rs +++ b/src/expr/impl/src/scalar/cast.rs @@ -47,6 +47,14 @@ where }) } +#[function("cast(varchar) -> struct", type_infer = "unreachable")] +pub fn str_parse_struct(elem: &str, ctx: &Context) -> Result{ + match &ctx.return_type { + risingwave_common::types::DataType::Struct(s) => Ok(StructValue::from_str(elem, s).map_err(|e| ExprError::Parse(format!("error: {:?}",e.as_report()).into()))?), + _ => return Err(ExprError::Parse("unsupported type".into())), + } +} + // TODO: introduce `FromBinary` and support all types #[function("pgwire_recv(bytea) -> int8")] pub fn pgwire_recv(elem: &[u8]) -> Result { diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs index e0bd5a5efae2e..c9ae3749cb347 100644 --- a/src/frontend/src/handler/mod.rs +++ b/src/frontend/src/handler/mod.rs @@ -540,7 +540,7 @@ pub async fn handle( Statement::Query(_) | Statement::Insert { .. } | Statement::Delete { .. } - | Statement::Update { .. } => query::handle_query(handler_args, stmt, formats).await, + | Statement::Update { .. } => Ok(query::handle_query(handler_args, stmt, formats).await.unwrap()), Statement::CreateView { materialized, if_not_exists, diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index 4fd38efb7b997..4d412a09c6281 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -290,7 +290,7 @@ pub fn gen_query_from_table_name_order_by( let right = Expr::Value(Value::SingleQuotedString(values.pop().unwrap())); Some(Expr::BinaryOp { left: Box::new(left), - op: BinaryOperator::Eq, + op: BinaryOperator::Gt, right: Box::new(right), }) }else{ diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index 638acfeb872fd..bccafc7507901 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -31,13 +31,13 @@ use prost::Message; use risingwave_common::catalog::Field; use risingwave_common::error::BoxedError; use risingwave_common::session_config::QueryMode; -use risingwave_common::types::{DataType, ScalarImpl}; +use risingwave_common::types::{DataType, ScalarImpl, StructType, StructValue}; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::scan_range; use risingwave_common::util::sort_util::ColumnOrder; use risingwave_hummock_sdk::HummockVersionId; use risingwave_pb::expr::expr_node::Type; -use risingwave_sqlparser::ast::{Ident, ObjectName, Statement}; +use risingwave_sqlparser::ast::{BinaryOperator, Expr, Ident, ObjectName, Statement, Value}; use super::SessionImpl; use crate::catalog::subscription_catalog::SubscriptionCatalog; @@ -845,7 +845,7 @@ impl SubscriptionCursor { old_epoch: u64, new_epoch: u64, version_id: HummockVersionId, - seek_pk_row: Option>>, + seek_pk_rows: Option>>, ) -> Result { // pk + all column without hidden let output_col_idx = table_catalog @@ -878,79 +878,57 @@ impl SubscriptionCursor { (pk.name().to_string(), pk.data_type(), f.column_index) }) .collect_vec(); - // let selection = if let Some(seek_pk_rows) = seek_pk_rows { - // let mut pk_rows = vec![]; - // let mut values = vec![]; - // for ((name, _), seek_pk) in pks.iter().zip_eq_fast(seek_pk_rows.iter()) { - // if let Some(seek_pk) = seek_pk { - // pk_rows.push( - // Expr::Identifier(Ident::with_quote_unchecked( - // '"', - // name.clone(), - // )) - // ); - // values.push(String::from_utf8(seek_pk.clone().into()).unwrap()); - // } - // } - // if pk_rows.is_empty() { - // None - // } else if pk_rows.len() == 1 { - // let left = pk_rows.pop().unwrap(); - // let right = Expr::Value(Value::SingleQuotedString(values.pop().unwrap())); - // Some(Expr::BinaryOp { - // left: Box::new(left), - // op: BinaryOperator::Eq, - // right: Box::new(right), - // }) - // }else{ - // let left = Expr::Row(pk_rows); - // let values = values.join(","); - // let right = Expr::Value(Value::SingleQuotedString(format!("({})", values))); - // Some(Expr::BinaryOp { - // left: Box::new(left), - // op: BinaryOperator::Gt, - // right: Box::new(right), - // }) - // } - // } else{ - // None - // }; - let (scan, predicate) = match seek_pk_row { - Some(seek_pk_row) => { - let seek_pk_row = seek_pk_row - .into_iter() - .zip_eq_fast(pks.into_iter()) - .filter_map(|(pk, (name, data_type, column_index))| { - if let Some(seek_pk) = pk { - let column = InputRef { - index: column_index, - data_type: data_type.clone(), - }; - let value_string = String::from_utf8(seek_pk.clone().into()).unwrap(); - let value_data = - ScalarImpl::from_text(&value_string, data_type).unwrap(); - let value = Literal::new(Some(value_data), data_type.clone()); - Some( - FunctionCall::new( - ExprType::LessThan, - vec![column.into(), value.into()], - ) - .unwrap() - .into(), - ) - } else { - None + let (scan, predicate) = if let Some(seek_pk_rows) = seek_pk_rows { + let mut pk_rows = vec![]; + let mut values = vec![]; + for (seek_pk, (name, data_type, column_index)) in seek_pk_rows + .into_iter() + .zip_eq_fast(pks.into_iter()) { + if let Some(seek_pk) = seek_pk { + pk_rows.push( + InputRef { + index: column_index, + data_type: data_type.clone(), } - }) - .collect_vec(); - let (scan, predicate) = Condition { - conjunctions: seek_pk_row, + ); + let value_string = String::from_utf8(seek_pk.clone().into()).unwrap(); + let value_data = + ScalarImpl::from_text(&value_string, data_type).unwrap(); + values.push((Some(value_data),data_type.clone())); } - .split_to_scan_ranges(table_catalog.table_desc().into(), max_split_range_gap) - .unwrap(); + } + if pk_rows.is_empty() { + (vec![], None) + } else if pk_rows.len() == 1 { + let left = pk_rows.pop().unwrap(); + let (right_data,right_type) = values.pop().unwrap(); + let (scan, predicate) = Condition { + conjunctions: vec![FunctionCall::new( + ExprType::GreaterThan, + vec![left.into(), Literal::new(right_data, right_type).into()], + )?.into()], + }.split_to_scan_ranges(table_catalog.table_desc().into(), max_split_range_gap)?; + (scan, Some(predicate)) + }else{ + let (right_datas,right_types):(Vec<_>,Vec<_>) = values.into_iter().unzip(); + let right_data = ScalarImpl::Struct(StructValue::new(right_datas)); + let right_type = DataType::Struct(StructType::unnamed(right_types)); + let left = FunctionCall::new_unchecked( + ExprType::Row, + pk_rows.into_iter().map(|pk| pk.into()).collect(), + right_type.clone(), + ); + let right = Literal::new(Some(right_data), right_type); + let (scan, predicate) = Condition { + conjunctions: vec![FunctionCall::new( + ExprType::GreaterThan, + vec![left.into(), right.into()], + )?.into()], + }.split_to_scan_ranges(table_catalog.table_desc().into(), max_split_range_gap)?; (scan, Some(predicate)) } - None => (vec![], None), + } else{ + (vec![], None) }; let batch_log_seq_scan = BatchLogSeqScan::new(core, scan); From e4770afd7554ab157d1217ff5ab64c6384c6857d Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Thu, 7 Nov 2024 14:47:16 +0800 Subject: [PATCH 06/17] support --- src/batch/src/executor/log_row_seq_scan.rs | 2 - src/batch/src/executor/row_seq_scan.rs | 6 +- src/common/src/array/struct_array.rs | 91 ++++++++++++++++++- src/expr/impl/src/scalar/cast.rs | 7 +- src/frontend/src/handler/mod.rs | 4 +- src/frontend/src/handler/util.rs | 16 ++-- .../optimizer/plan_node/batch_sys_seq_scan.rs | 6 +- src/frontend/src/session/cursor_manager.rs | 80 ++++++---------- 8 files changed, 132 insertions(+), 80 deletions(-) diff --git a/src/batch/src/executor/log_row_seq_scan.rs b/src/batch/src/executor/log_row_seq_scan.rs index cde3ae5ee8636..7ca89871c026e 100644 --- a/src/batch/src/executor/log_row_seq_scan.rs +++ b/src/batch/src/executor/log_row_seq_scan.rs @@ -18,7 +18,6 @@ use std::sync::Arc; use futures::prelude::stream::StreamExt; use futures_async_stream::try_stream; use futures_util::pin_mut; -use iceberg::scan; use prometheus::Histogram; use risingwave_common::array::{DataChunk, Op}; use risingwave_common::bitmap::Bitmap; @@ -26,7 +25,6 @@ use risingwave_common::catalog::{ColumnId, Field, Schema}; use risingwave_common::hash::VnodeCountCompat; use risingwave_common::row::{Row, RowExt}; use risingwave_common::types::{DataType, ScalarImpl}; -use risingwave_common::util::scan_range; use risingwave_hummock_sdk::{HummockReadEpoch, HummockVersionId}; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::common::{batch_query_epoch, BatchQueryEpoch}; diff --git a/src/batch/src/executor/row_seq_scan.rs b/src/batch/src/executor/row_seq_scan.rs index 303ddf4270aa6..df0a060d6b328 100644 --- a/src/batch/src/executor/row_seq_scan.rs +++ b/src/batch/src/executor/row_seq_scan.rs @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -use std::ops::{Bound, Deref}; +use std::ops::Deref; use std::sync::Arc; use futures::{pin_mut, StreamExt}; @@ -23,11 +23,9 @@ use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{ColumnId, Schema}; use risingwave_common::hash::VnodeCountCompat; use risingwave_common::row::{OwnedRow, Row}; -use risingwave_common::types::{DataType, Datum}; +use risingwave_common::types::DataType; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; -use risingwave_common::util::value_encoding::deserialize_datum; use risingwave_pb::batch_plan::plan_node::NodeBody; -use risingwave_pb::batch_plan::{scan_range, PbScanRange}; use risingwave_pb::common::BatchQueryEpoch; use risingwave_pb::plan_common::as_of::AsOfType; use risingwave_pb::plan_common::{as_of, PbAsOf, StorageTableDesc}; diff --git a/src/common/src/array/struct_array.rs b/src/common/src/array/struct_array.rs index 10ded3a64d66c..37841d978b978 100644 --- a/src/common/src/array/struct_array.rs +++ b/src/common/src/array/struct_array.rs @@ -361,11 +361,27 @@ impl StructValue { if !s.ends_with(')') { return Err("Missing right parenthesis".into()); } - let mut fields = Vec::with_capacity(s.len()); - for (s, ty) in s[1..s.len() - 1].split(',').zip_eq_debug(ty.types()) { - let datum = match s.trim() { + let s = &s[1..s.len() - 1]; + let mut split_str = Vec::with_capacity(ty.len()); + let mut left_parenthesis_num = 0; + let mut start = 0; + for (i, c) in s.char_indices() { + match c { + '(' => left_parenthesis_num += 1, + ')' => left_parenthesis_num -= 1, + ',' if left_parenthesis_num == 0 => { + split_str.push(&s[start..i]); + start = i + 1; + } + _ => {} + } + } + split_str.push(&s[start..=(s.len() - 1)]); + let mut fields = Vec::with_capacity(ty.len()); + for (str, ty) in split_str.iter().zip_eq_debug(ty.types()) { + let datum = match str.trim() { "" => None, - s => Some(ScalarImpl::from_text(s, ty)?), + s => Some(ScalarImpl::from_text(s, ty).unwrap()), }; fields.push(datum); } @@ -832,4 +848,71 @@ mod tests { test("{1,2}", r#""{1,2}""#); test(r#"{"f": 1}"#, r#""{""f"": 1}""#); } + + #[test] + fn test_from_str_nested_struct() { + let struct_str = "(1,sad ,(3, 4.0),(1,( 2,(3,(4,(5, 6))) )) )"; + let struct_type = StructType::unnamed(vec![ + DataType::Int32, + DataType::Varchar, + DataType::new_unnamed_struct(vec![DataType::Int32, DataType::Float64]), + DataType::new_unnamed_struct(vec![ + DataType::Int32, + DataType::new_unnamed_struct(vec![ + DataType::Int32, + DataType::new_unnamed_struct(vec![ + DataType::Int32, + DataType::new_unnamed_struct(vec![ + DataType::Int32, + DataType::new_unnamed_struct(vec![DataType::Int32, DataType::Int32]), + ]), + ]), + ]), + ]), + ]); + let struct_value = StructValue::from_str(struct_str, &struct_type).unwrap(); + let expected = StructValue::new(vec![ + Some(1.to_scalar_value()), + Some("sad".into()), + Some( + StructValue::new(vec![ + Some(3.to_scalar_value()), + Some(ScalarImpl::Float64(4.0.into())), + ]) + .to_scalar_value(), + ), + Some( + StructValue::new(vec![ + Some(1.to_scalar_value()), + Some( + StructValue::new(vec![ + Some(2.to_scalar_value()), + Some( + StructValue::new(vec![ + Some(3.to_scalar_value()), + Some( + StructValue::new(vec![ + Some(4.to_scalar_value()), + Some( + StructValue::new(vec![ + Some(5.to_scalar_value()), + Some(6.to_scalar_value()), + ]) + .to_scalar_value(), + ), + ]) + .to_scalar_value(), + ), + ]) + .to_scalar_value(), + ), + ]) + .to_scalar_value(), + ), + ]) + .to_scalar_value(), + ), + ]); + assert_eq!(struct_value, expected); + } } diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs index 1b065efed985c..63348df481e28 100644 --- a/src/expr/impl/src/scalar/cast.rs +++ b/src/expr/impl/src/scalar/cast.rs @@ -48,10 +48,11 @@ where } #[function("cast(varchar) -> struct", type_infer = "unreachable")] -pub fn str_parse_struct(elem: &str, ctx: &Context) -> Result{ +pub fn str_parse_struct(elem: &str, ctx: &Context) -> Result { match &ctx.return_type { - risingwave_common::types::DataType::Struct(s) => Ok(StructValue::from_str(elem, s).map_err(|e| ExprError::Parse(format!("error: {:?}",e.as_report()).into()))?), - _ => return Err(ExprError::Parse("unsupported type".into())), + risingwave_common::types::DataType::Struct(s) => Ok(StructValue::from_str(elem, s) + .map_err(|e| ExprError::Parse(format!("error: {:?}", e.as_report()).into()))?), + _ => Err(ExprError::Parse("unsupported type".into())), } } diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs index c9ae3749cb347..cb0d19bab3953 100644 --- a/src/frontend/src/handler/mod.rs +++ b/src/frontend/src/handler/mod.rs @@ -540,7 +540,9 @@ pub async fn handle( Statement::Query(_) | Statement::Insert { .. } | Statement::Delete { .. } - | Statement::Update { .. } => Ok(query::handle_query(handler_args, stmt, formats).await.unwrap()), + | Statement::Update { .. } => Ok(query::handle_query(handler_args, stmt, formats) + .await + .unwrap()), Statement::CreateView { materialized, if_not_exists, diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index 4d412a09c6281..e82bcd51900fd 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -274,12 +274,10 @@ pub fn gen_query_from_table_name_order_by( let mut values = vec![]; for ((name, _), seek_pk) in pks.iter().zip_eq_fast(seek_pk_rows.iter()) { if let Some(seek_pk) = seek_pk { - pk_rows.push( - Expr::Identifier(Ident::with_quote_unchecked( - '"', - name.clone(), - )) - ); + pk_rows.push(Expr::Identifier(Ident::with_quote_unchecked( + '"', + name.clone(), + ))); values.push(String::from_utf8(seek_pk.clone().into()).unwrap()); } } @@ -293,7 +291,7 @@ pub fn gen_query_from_table_name_order_by( op: BinaryOperator::Gt, right: Box::new(right), }) - }else{ + } else { let left = Expr::Row(pk_rows); let values = values.join(","); let right = Expr::Value(Value::SingleQuotedString(format!("({})", values))); @@ -303,13 +301,13 @@ pub fn gen_query_from_table_name_order_by( right: Box::new(right), }) } - } else{ + } else { None }; let select = Select { - from, projection, + from, selection, ..Default::default() }; diff --git a/src/frontend/src/optimizer/plan_node/batch_sys_seq_scan.rs b/src/frontend/src/optimizer/plan_node/batch_sys_seq_scan.rs index 007e983e67f3d..a7133e47a2186 100644 --- a/src/frontend/src/optimizer/plan_node/batch_sys_seq_scan.rs +++ b/src/frontend/src/optimizer/plan_node/batch_sys_seq_scan.rs @@ -12,18 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::ops::Bound; - -use itertools::Itertools; use pretty_xmlish::{Pretty, XmlNode}; -use risingwave_common::types::ScalarImpl; use risingwave_common::util::scan_range::{is_full_range, ScanRange}; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::batch_plan::SysRowSeqScanNode; use risingwave_pb::plan_common::PbColumnDesc; use super::batch::prelude::*; -use super::utils::{childless_record, range_to_string, scan_ranges_as_strs, Distill}; +use super::utils::{childless_record, scan_ranges_as_strs, Distill}; use super::{generic, ExprRewritable, PlanBase, PlanRef, ToBatchPb, ToDistributedBatch}; use crate::error::Result; use crate::expr::{ExprRewriter, ExprVisitor}; diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index bccafc7507901..c0669378197be 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -27,23 +27,20 @@ use itertools::Itertools; use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::StatementType; use pgwire::types::{Format, Row}; -use prost::Message; use risingwave_common::catalog::Field; use risingwave_common::error::BoxedError; use risingwave_common::session_config::QueryMode; use risingwave_common::types::{DataType, ScalarImpl, StructType, StructValue}; use risingwave_common::util::iter_util::ZipEqFast; -use risingwave_common::util::scan_range; use risingwave_common::util::sort_util::ColumnOrder; use risingwave_hummock_sdk::HummockVersionId; -use risingwave_pb::expr::expr_node::Type; -use risingwave_sqlparser::ast::{BinaryOperator, Expr, Ident, ObjectName, Statement, Value}; +use risingwave_sqlparser::ast::{Ident, ObjectName, Statement}; use super::SessionImpl; use crate::catalog::subscription_catalog::SubscriptionCatalog; use crate::catalog::TableId; use crate::error::{ErrorCode, Result}; -use crate::expr::{ExprImpl, ExprType, FunctionCall, InputRef, Literal}; +use crate::expr::{ExprType, FunctionCall, InputRef, Literal}; use crate::handler::declare_cursor::create_chunk_stream_for_cursor; use crate::handler::query::{ gen_batch_plan_by_statement, gen_batch_plan_fragmenter, BatchQueryPlanResult, @@ -59,9 +56,7 @@ use crate::optimizer::property::{Order, RequiredDist}; use crate::optimizer::PlanRoot; use crate::scheduler::{DistributedQueryStream, LocalQueryStream}; use crate::utils::Condition; -use crate::{ - Binder, OptimizerContext, OptimizerContextRef, PgResponseStream, PlanRef, TableCatalog, -}; +use crate::{Binder, OptimizerContext, OptimizerContextRef, PgResponseStream, TableCatalog}; pub enum CursorDataChunkStream { LocalDataChunk(Option), @@ -392,26 +387,6 @@ impl SubscriptionCursor { &fields, handler_args.session.clone(), ); - { - let (mut chunk_stream, fields, init_query_timer, pk_column_names) = - Self::initiate_query( - Some(rw_timestamp), - &self.dependent_table_id, - handler_args.clone(), - self.seek_pk_row.clone(), - ) - .await?; - Self::init_row_stream( - &mut chunk_stream, - formats, - &from_snapshot, - &fields, - handler_args.session.clone(), - ); - while let Some(a) = chunk_stream.next().await? { - println!("testtest {:?}", a); - } - } self.cursor_need_drop_time = Instant::now() + Duration::from_secs(self.subscription.retention_seconds); @@ -875,43 +850,42 @@ impl SubscriptionCursor { .iter() .map(|f| { let pk = table_catalog.columns.get(f.column_index).unwrap(); - (pk.name().to_string(), pk.data_type(), f.column_index) + (pk.data_type(), f.column_index) }) .collect_vec(); let (scan, predicate) = if let Some(seek_pk_rows) = seek_pk_rows { let mut pk_rows = vec![]; let mut values = vec![]; - for (seek_pk, (name, data_type, column_index)) in seek_pk_rows - .into_iter() - .zip_eq_fast(pks.into_iter()) { + for (seek_pk, (data_type, column_index)) in + seek_pk_rows.into_iter().zip_eq_fast(pks.into_iter()) + { if let Some(seek_pk) = seek_pk { - pk_rows.push( - InputRef { - index: column_index, - data_type: data_type.clone(), - } - ); + pk_rows.push(InputRef { + index: column_index, + data_type: data_type.clone(), + }); let value_string = String::from_utf8(seek_pk.clone().into()).unwrap(); - let value_data = - ScalarImpl::from_text(&value_string, data_type).unwrap(); - values.push((Some(value_data),data_type.clone())); + let value_data = ScalarImpl::from_text(&value_string, data_type).unwrap(); + values.push((Some(value_data), data_type.clone())); } } if pk_rows.is_empty() { (vec![], None) } else if pk_rows.len() == 1 { let left = pk_rows.pop().unwrap(); - let (right_data,right_type) = values.pop().unwrap(); + let (right_data, right_type) = values.pop().unwrap(); let (scan, predicate) = Condition { conjunctions: vec![FunctionCall::new( ExprType::GreaterThan, vec![left.into(), Literal::new(right_data, right_type).into()], - )?.into()], - }.split_to_scan_ranges(table_catalog.table_desc().into(), max_split_range_gap)?; + )? + .into()], + } + .split_to_scan_ranges(table_catalog.table_desc().into(), max_split_range_gap)?; (scan, Some(predicate)) - }else{ - let (right_datas,right_types):(Vec<_>,Vec<_>) = values.into_iter().unzip(); - let right_data = ScalarImpl::Struct(StructValue::new(right_datas)); + } else { + let (right_data, right_types): (Vec<_>, Vec<_>) = values.into_iter().unzip(); + let right_data = ScalarImpl::Struct(StructValue::new(right_data)); let right_type = DataType::Struct(StructType::unnamed(right_types)); let left = FunctionCall::new_unchecked( ExprType::Row, @@ -921,13 +895,15 @@ impl SubscriptionCursor { let right = Literal::new(Some(right_data), right_type); let (scan, predicate) = Condition { conjunctions: vec![FunctionCall::new( - ExprType::GreaterThan, - vec![left.into(), right.into()], - )?.into()], - }.split_to_scan_ranges(table_catalog.table_desc().into(), max_split_range_gap)?; + ExprType::GreaterThan, + vec![left.into(), right.into()], + )? + .into()], + } + .split_to_scan_ranges(table_catalog.table_desc().into(), max_split_range_gap)?; (scan, Some(predicate)) } - } else{ + } else { (vec![], None) }; From 7f4e0cd2fcfbb6ad4117e28e75ac21c1c25bb82d Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Fri, 8 Nov 2024 16:42:20 +0800 Subject: [PATCH 07/17] fix cursor panic --- src/common/src/array/struct_array.rs | 2 +- src/frontend/src/handler/mod.rs | 4 +--- src/frontend/src/handler/util.rs | 13 +++++++++---- src/frontend/src/session/cursor_manager.rs | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/common/src/array/struct_array.rs b/src/common/src/array/struct_array.rs index 37841d978b978..91cd28345f380 100644 --- a/src/common/src/array/struct_array.rs +++ b/src/common/src/array/struct_array.rs @@ -381,7 +381,7 @@ impl StructValue { for (str, ty) in split_str.iter().zip_eq_debug(ty.types()) { let datum = match str.trim() { "" => None, - s => Some(ScalarImpl::from_text(s, ty).unwrap()), + s => Some(ScalarImpl::from_text(s, ty)?), }; fields.push(datum); } diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs index 9d5a235a658cf..9cf94a37c65b0 100644 --- a/src/frontend/src/handler/mod.rs +++ b/src/frontend/src/handler/mod.rs @@ -541,9 +541,7 @@ pub async fn handle( Statement::Query(_) | Statement::Insert { .. } | Statement::Delete { .. } - | Statement::Update { .. } => Ok(query::handle_query(handler_args, stmt, formats) - .await - .unwrap()), + | Statement::Update { .. } => query::handle_query(handler_args, stmt, formats).await, Statement::CreateView { materialized, if_not_exists, diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index 7509d7dd11998..6e043c4f0ac19 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -239,7 +239,7 @@ pub fn gen_query_from_table_name_order_by( from_name: ObjectName, pks: Vec<(String, bool)>, seek_pk_rows: Option>>, -) -> Query { +) -> RwResult { let select_pks = pks .iter() .filter_map( @@ -278,7 +278,12 @@ pub fn gen_query_from_table_name_order_by( '"', name.clone(), ))); - values.push(String::from_utf8(seek_pk.clone().into()).unwrap()); + values.push(String::from_utf8(seek_pk.clone().into()).map_err(|e| { + ErrorCode::InternalError(format!( + "Convert cursor seek_pk to string error: {:?}", + e.as_report() + )) + })?); } } if pk_rows.is_empty() { @@ -323,14 +328,14 @@ pub fn gen_query_from_table_name_order_by( } }) .collect(); - Query { + Ok(Query { with: None, body, order_by, limit: None, offset: None, fetch: None, - } + }) } pub fn convert_unix_millis_to_logstore_u64(unix_millis: u64) -> u64 { diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index c0669378197be..92252d064b943 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -682,7 +682,7 @@ impl SubscriptionCursor { subscription_from_table_name, pks, seek_pk_row, - ))); + )?)); gen_batch_plan_by_statement(&session, context.into(), query_stmt) } } From 2a49efa5ffe9d77ed5045ff25b268fd811229778 Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Mon, 11 Nov 2024 12:53:28 +0800 Subject: [PATCH 08/17] fix ci --- src/expr/impl/tests/sig.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expr/impl/tests/sig.rs b/src/expr/impl/tests/sig.rs index 2dc8aacdb203f..cdf8ad1be7854 100644 --- a/src/expr/impl/tests/sig.rs +++ b/src/expr/impl/tests/sig.rs @@ -62,7 +62,7 @@ fn test_func_sig_map() { "cast(anyarray) -> character varying/anyarray", "cast(bigint) -> rw_int256/integer/smallint/numeric/double precision/real/character varying", "cast(boolean) -> integer/character varying", - "cast(character varying) -> jsonb/interval/timestamp without time zone/time without time zone/date/rw_int256/real/double precision/numeric/smallint/integer/bigint/character varying/boolean/bytea/anyarray", + "cast(character varying) -> jsonb/interval/timestamp without time zone/time without time zone/date/rw_int256/real/double precision/numeric/smallint/integer/bigint/anystruct/character varying/boolean/bytea/anyarray", "cast(date) -> timestamp without time zone/character varying", "cast(double precision) -> numeric/real/bigint/integer/smallint/character varying", "cast(integer) -> rw_int256/smallint/numeric/double precision/real/bigint/boolean/character varying", From 394d78cb2d58216e80e8a7faf2b5dd9ea1976910 Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Thu, 21 Nov 2024 13:54:51 +0800 Subject: [PATCH 09/17] add ci --- e2e_test/subscription/main.py | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/e2e_test/subscription/main.py b/e2e_test/subscription/main.py index e8549ae85025b..a170b1d911df7 100644 --- a/e2e_test/subscription/main.py +++ b/e2e_test/subscription/main.py @@ -543,6 +543,41 @@ def test_order_multi_pk(): check_rows_data([17,17,17,17],row[4],"Insert") drop_table_subscription() +def test_explain_cursor(): + print(f"test_order_mutil_pk") + create_table_subscription() + conn = psycopg2.connect( + host="localhost", + port="4566", + user="root", + database="dev" + ) + execute_insert("insert into t5 values(1,1,1,1)",conn) + execute_insert("flush",conn) + execute_insert("insert into t5 values(2,2,2,2)",conn) + execute_insert("flush",conn) + execute_insert("declare cur subscription cursor for sub5 full",conn) + execute_insert("insert into t5 values(3,3,3,3)",conn) + execute_insert("flush",conn) + execute_insert("insert into t5 values(4,4,4,4)",conn) + execute_insert("flush",conn) + plan = execute_query("explain fetch next from cur",conn) + assert plan[0][0] == "BatchExchange { order: [t5.v1 ASC, t5.v2 ASC], dist: Single }" + assert plan[1][0] == "└─BatchScan { table: t5, columns: [v1, v2, v3, v4] }" + execute_query("fetch next from cur",conn) + plan = execute_query("explain fetch next from cur",conn) + assert plan[0][0] == "BatchExchange { order: [t5.v1 ASC, t5.v2 ASC], dist: Single }" + assert plan[1][0] == "└─BatchFilter { predicate: (Row(t5.v1, t5.v2) > '(1,1)':Struct(StructType { field_names: [], field_types: [Int32, Int32] })) }" + assert plan[2][0] == " └─BatchScan { table: t5, columns: [v1, v2, v3, v4] }" + execute_query("fetch next from cur",conn) + execute_query("fetch next from cur",conn) + plan = execute_query("explain fetch next from cur",conn) + assert plan[0][0] == "BatchExchange { order: [t5.v1 ASC, t5.v2 ASC], dist: Single }" + assert plan[1][0] == "└─BatchFilter { predicate: (Row(t5.v1, t5.v2) > '(3,3)':Struct(StructType { field_names: [], field_types: [Int32, Int32] })) }" + assert " └─BatchLogSeqScan { table: t5, columns: [v1, v2, v3, v4, op]" in plan[2][0] + execute_query("fetch next from cur",conn) + drop_table_subscription() + if __name__ == "__main__": test_cursor_snapshot() test_cursor_op() @@ -559,3 +594,4 @@ def test_order_multi_pk(): test_order_mv() test_order_multi_pk() test_block_cursor() + test_explain_cursor() From 75630b8983faae392bc633628c80168c29748a75 Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Fri, 6 Dec 2024 16:37:09 +0800 Subject: [PATCH 10/17] fix comm --- src/batch/src/executor/log_row_seq_scan.rs | 33 ++++------------------ src/batch/src/executor/row_seq_scan.rs | 33 +++------------------- src/batch/src/executor/utils.rs | 33 ++++++++++++++++++++-- src/frontend/src/handler/util.rs | 5 +++- 4 files changed, 44 insertions(+), 60 deletions(-) diff --git a/src/batch/src/executor/log_row_seq_scan.rs b/src/batch/src/executor/log_row_seq_scan.rs index be13cd89f6b90..95392de09b5a0 100644 --- a/src/batch/src/executor/log_row_seq_scan.rs +++ b/src/batch/src/executor/log_row_seq_scan.rs @@ -18,14 +18,13 @@ use std::sync::Arc; use futures::prelude::stream::StreamExt; use futures_async_stream::try_stream; use futures_util::pin_mut; -use itertools::Itertools; use prometheus::Histogram; use risingwave_common::array::{DataChunk, Op}; use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{ColumnId, Field, Schema}; use risingwave_common::hash::VnodeCountCompat; use risingwave_common::row::{Row, RowExt}; -use risingwave_common::types::{DataType, ScalarImpl}; +use risingwave_common::types::ScalarImpl; use risingwave_hummock_sdk::{HummockReadEpoch, HummockVersionId}; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::common::{batch_query_epoch, BatchQueryEpoch}; @@ -38,6 +37,7 @@ use super::{ BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, Executor, ExecutorBuilder, ScanRange, }; use crate::error::{BatchError, Result}; +use crate::executor::build_scan_ranges_from_pb; use crate::monitor::BatchMetrics; use crate::task::BatchTaskContext; @@ -145,31 +145,8 @@ impl BoxedExecutorBuilder for LogStoreRowSeqScanExecutorBuilder { let old_epoch = old_epoch.epoch; let new_epoch = new_epoch.epoch; - let scan_ranges = { - let scan_ranges = &log_store_seq_scan_node.scan_ranges; - if scan_ranges.is_empty() { - vec![ScanRange::full()] - } else { - scan_ranges - .iter() - .map(|scan_range| { - let pk_types = table_desc - .pk - .iter() - .map(|order| { - DataType::from( - table_desc.columns[order.column_index as usize] - .column_type - .as_ref() - .unwrap(), - ) - }) - .collect_vec(); - ScanRange::new(scan_range.clone(), pk_types) - }) - .try_collect()? - } - }; + let scan_ranges = + build_scan_ranges_from_pb(&log_store_seq_scan_node.scan_ranges, table_desc)?; dispatch_state_store!(source.context().state_store(), state_store, { let table = StorageTable::new_partial(state_store, column_ids, vnodes, table_desc); @@ -258,7 +235,7 @@ impl LogRowSeqScanExecutor { scan_range: ScanRange, ) { let pk_prefix = scan_range.pk_prefix.clone(); - let range_bounds = scan_range.convert_to_range_bounds(table.clone()); + let range_bounds = scan_range.convert_to_range_bounds(&table); // Range Scan. let iter = table .batch_iter_log_with_pk_bounds( diff --git a/src/batch/src/executor/row_seq_scan.rs b/src/batch/src/executor/row_seq_scan.rs index f879e975b688b..b13e0041473ee 100644 --- a/src/batch/src/executor/row_seq_scan.rs +++ b/src/batch/src/executor/row_seq_scan.rs @@ -16,14 +16,12 @@ use std::sync::Arc; use futures::{pin_mut, StreamExt}; use futures_async_stream::try_stream; -use itertools::Itertools; use prometheus::Histogram; use risingwave_common::array::DataChunk; use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{ColumnId, Schema}; use risingwave_common::hash::VnodeCountCompat; use risingwave_common::row::{OwnedRow, Row}; -use risingwave_common::types::DataType; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::common::BatchQueryEpoch; @@ -36,7 +34,8 @@ use risingwave_storage::{dispatch_state_store, StateStore}; use super::ScanRange; use crate::error::{BatchError, Result}; use crate::executor::{ - BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, Executor, ExecutorBuilder, + build_scan_ranges_from_pb, BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, Executor, + ExecutorBuilder, }; use crate::monitor::BatchMetrics; use crate::task::BatchTaskContext; @@ -144,31 +143,7 @@ impl BoxedExecutorBuilder for RowSeqScanExecutorBuilder { None => Some(Bitmap::ones(table_desc.vnode_count()).into()), }; - let scan_ranges = { - let scan_ranges = &seq_scan_node.scan_ranges; - if scan_ranges.is_empty() { - vec![ScanRange::full()] - } else { - scan_ranges - .iter() - .map(|scan_range| { - let pk_types = table_desc - .pk - .iter() - .map(|order| { - DataType::from( - table_desc.columns[order.column_index as usize] - .column_type - .as_ref() - .unwrap(), - ) - }) - .collect_vec(); - ScanRange::new(scan_range.clone(), pk_types) - }) - .try_collect()? - } - }; + let scan_ranges = build_scan_ranges_from_pb(&seq_scan_node.scan_ranges, table_desc)?; let ordered = seq_scan_node.ordered; @@ -355,7 +330,7 @@ impl RowSeqScanExecutor { histogram: Option>, ) { let pk_prefix = scan_range.pk_prefix.clone(); - let range_bounds = scan_range.convert_to_range_bounds(table.clone()); + let range_bounds = scan_range.convert_to_range_bounds(&table); // Range Scan. assert!(pk_prefix.len() < table.pk_indices().len()); let iter = table diff --git a/src/batch/src/executor/utils.rs b/src/batch/src/executor/utils.rs index 2ca36301c0e3f..fb94deabeaa7e 100644 --- a/src/batch/src/executor/utils.rs +++ b/src/batch/src/executor/utils.rs @@ -13,17 +13,18 @@ // limitations under the License. use core::ops::{Bound, RangeBounds}; -use std::sync::Arc; use futures::stream::BoxStream; use futures::StreamExt; use futures_async_stream::try_stream; +use itertools::Itertools; use risingwave_common::array::DataChunk; use risingwave_common::catalog::Schema; use risingwave_common::row::{OwnedRow, Row}; use risingwave_common::types::DataType; use risingwave_common::util::value_encoding::deserialize_datum; use risingwave_pb::batch_plan::{scan_range, PbScanRange}; +use risingwave_pb::plan_common::StorageTableDesc; use risingwave_storage::table::batch_table::storage_table::StorageTable; use risingwave_storage::StateStore; @@ -208,7 +209,7 @@ impl ScanRange { pub fn convert_to_range_bounds( self, - table: Arc>, + table: &StorageTable, ) -> impl RangeBounds { let ScanRange { pk_prefix, @@ -253,3 +254,31 @@ impl ScanRange { (start_bound, end_bound) } } + +pub fn build_scan_ranges_from_pb( + scan_ranges: &Vec, + table_desc: &StorageTableDesc, +) -> Result> { + if scan_ranges.is_empty() { + Ok(vec![ScanRange::full()]) + } else { + Ok(scan_ranges + .iter() + .map(|scan_range| { + let pk_types = table_desc + .pk + .iter() + .map(|order| { + DataType::from( + table_desc.columns[order.column_index as usize] + .column_type + .as_ref() + .unwrap(), + ) + }) + .collect_vec(); + ScanRange::new(scan_range.clone(), pk_types) + }) + .try_collect()?) + } +} diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index c2fed6ce9acd0..aab30bd2375b1 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -240,7 +240,10 @@ pub fn gen_query_from_table_name(from_name: ObjectName) -> Query { } } -// Plan like 'select * , pk in table order by pk' +/// `from_name` is the table name, +/// `pks` is the primary key columns’ `name` and `is_hidden` +/// `seek_pk_rows` is the seek pk values for the cursor. +/// So the query like `SELECT *, except(hidden pk) FROM table_name WHERE (pk1,pk2,pk3..) > (seek_pk1,seek_pk2,seek_pk3...) order by pk1,pk2,pk3..` pub fn gen_query_from_table_name_order_by( from_name: ObjectName, pks: Vec<(String, bool)>, From 4f07eebd7b7ac1bfa81ea6e7792347eede66990d Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Fri, 6 Dec 2024 17:50:36 +0800 Subject: [PATCH 11/17] fix cci --- src/common/src/array/struct_array.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/common/src/array/struct_array.rs b/src/common/src/array/struct_array.rs index b127b08cd75b0..cfdf858381151 100644 --- a/src/common/src/array/struct_array.rs +++ b/src/common/src/array/struct_array.rs @@ -853,20 +853,24 @@ mod tests { let struct_type = StructType::unnamed(vec![ DataType::Int32, DataType::Varchar, - DataType::new_unnamed_struct(vec![DataType::Int32, DataType::Float64]), - DataType::new_unnamed_struct(vec![ + StructType::unnamed(vec![DataType::Int32, DataType::Float64]).into(), + StructType::unnamed(vec![ DataType::Int32, - DataType::new_unnamed_struct(vec![ + StructType::unnamed(vec![ DataType::Int32, - DataType::new_unnamed_struct(vec![ + StructType::unnamed(vec![ DataType::Int32, - DataType::new_unnamed_struct(vec![ + StructType::unnamed(vec![ DataType::Int32, - DataType::new_unnamed_struct(vec![DataType::Int32, DataType::Int32]), - ]), - ]), - ]), - ]), + StructType::unnamed(vec![DataType::Int32, DataType::Int32]).into(), + ]) + .into(), + ]) + .into(), + ]) + .into(), + ]) + .into(), ]); let struct_value = StructValue::from_str(struct_str, &struct_type).unwrap(); let expected = StructValue::new(vec![ From 083001bcf7928024c4a1154c81c8677bf57d6172 Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Thu, 12 Dec 2024 17:13:16 +0800 Subject: [PATCH 12/17] fix ci --- e2e_test/subscription/main.py | 15 +++++++++++---- src/frontend/src/session/cursor_manager.rs | 12 +++++++++--- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/e2e_test/subscription/main.py b/e2e_test/subscription/main.py index a170b1d911df7..c41a293777899 100644 --- a/e2e_test/subscription/main.py +++ b/e2e_test/subscription/main.py @@ -109,6 +109,7 @@ def test_cursor_since_begin(): execute_insert("insert into t1 values(6,6)",conn) execute_insert("flush",conn) row = execute_query("fetch next from cur",conn) + row = execute_query("fetch next from cur",conn) check_rows_data([4,4],row[0],"Insert") row = execute_query("fetch next from cur",conn) check_rows_data([5,5],row[0],"Insert") @@ -138,6 +139,7 @@ def test_cursor_since_now(): execute_insert("insert into t1 values(6,6)",conn) execute_insert("flush",conn) row = execute_query("fetch next from cur",conn) + row = execute_query("fetch next from cur",conn) check_rows_data([6,6],row[0],"Insert") row = execute_query("fetch next from cur",conn) assert row == [] @@ -163,6 +165,7 @@ def test_cursor_without_since(): execute_insert("insert into t1 values(6,6)",conn) execute_insert("flush",conn) row = execute_query("fetch next from cur",conn) + row = execute_query("fetch next from cur",conn) check_rows_data([6,6],row[0],"Insert") row = execute_query("fetch next from cur",conn) assert row == [] @@ -187,6 +190,7 @@ def test_cursor_since_rw_timestamp(): execute_insert("insert into t1 values(6,6)",conn) execute_insert("flush",conn) row = execute_query("fetch next from cur",conn) + row = execute_query("fetch next from cur",conn) valuelen = len(row[0]) rw_timestamp_1 = row[0][valuelen - 1] check_rows_data([4,4],row[0],"Insert") @@ -204,16 +208,19 @@ def test_cursor_since_rw_timestamp(): execute_insert(f"declare cur subscription cursor for sub since {rw_timestamp_1}",conn) row = execute_query("fetch next from cur",conn) + row = execute_query("fetch next from cur",conn) check_rows_data([4,4],row[0],"Insert") execute_insert("close cur",conn) execute_insert(f"declare cur subscription cursor for sub since {rw_timestamp_2}",conn) row = execute_query("fetch next from cur",conn) + row = execute_query("fetch next from cur",conn) check_rows_data([5,5],row[0],"Insert") execute_insert("close cur",conn) execute_insert(f"declare cur subscription cursor for sub since {rw_timestamp_3}",conn) row = execute_query("fetch next from cur",conn) + row = execute_query("fetch next from cur",conn) assert row == [] execute_insert("close cur",conn) @@ -567,14 +574,14 @@ def test_explain_cursor(): execute_query("fetch next from cur",conn) plan = execute_query("explain fetch next from cur",conn) assert plan[0][0] == "BatchExchange { order: [t5.v1 ASC, t5.v2 ASC], dist: Single }" - assert plan[1][0] == "└─BatchFilter { predicate: (Row(t5.v1, t5.v2) > '(1,1)':Struct(StructType { field_names: [], field_types: [Int32, Int32] })) }" - assert plan[2][0] == " └─BatchScan { table: t5, columns: [v1, v2, v3, v4] }" + assert plan[1][0] == "└─BatchScan { table: t5, columns: [v1, v2, v3, v4], scan_ranges: [(v1, v2) > (Int32(1), Int32(1))] }" execute_query("fetch next from cur",conn) execute_query("fetch next from cur",conn) plan = execute_query("explain fetch next from cur",conn) + print(plan) assert plan[0][0] == "BatchExchange { order: [t5.v1 ASC, t5.v2 ASC], dist: Single }" - assert plan[1][0] == "└─BatchFilter { predicate: (Row(t5.v1, t5.v2) > '(3,3)':Struct(StructType { field_names: [], field_types: [Int32, Int32] })) }" - assert " └─BatchLogSeqScan { table: t5, columns: [v1, v2, v3, v4, op]" in plan[2][0] + assert "└─BatchLogSeqScan { table: t5, columns: [v1, v2, v3, v4, op]" in plan[1][0] + assert "scan_ranges: [(v1, v2) > (Int32(3), Int32(3))] }" in plan[1][0] execute_query("fetch next from cur",conn) drop_table_subscription() diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index 92252d064b943..b174ea4252952 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -798,9 +798,13 @@ impl SubscriptionCursor { .filter(|(is_hidden, _)| *is_hidden) .map(|(_, field)| (*field).clone()) .collect(); - let mut pk_keep = iter.iter().map(|(is_hidden, _)| *is_hidden); + let pk_keep = iter + .iter() + .map(|(is_hidden, _)| *is_hidden) + .collect::>(); rows.iter_mut().for_each(|row| { - row.0.retain(|_| pk_keep.next().unwrap()); + let mut pk_keep_iter = pk_keep.iter(); + row.0.retain(|_| *pk_keep_iter.next().unwrap()) }); (pk_fields, rows, last_row) } @@ -911,7 +915,9 @@ impl SubscriptionCursor { let out_fields = batch_log_seq_scan.core().out_fields(); let out_names = batch_log_seq_scan.core().column_names(); - let plan = if let Some(predicate) = predicate { + let plan = if let Some(predicate) = predicate + && !predicate.always_true() + { BatchFilter::new(generic::Filter::new(predicate, batch_log_seq_scan.into())).into() } else { batch_log_seq_scan.into() From 1408d712c569f426a0f3313e6e30fc5c24fa1148 Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Fri, 13 Dec 2024 13:11:24 +0800 Subject: [PATCH 13/17] fix check --- src/frontend/src/session/cursor_manager.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index 65f41912a6c6f..1d2784363d118 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -1180,7 +1180,7 @@ fn get_pk_names(pks: &[ColumnOrder], table_catalog: &TableCatalog) -> HashMap Date: Mon, 16 Dec 2024 15:46:16 +0800 Subject: [PATCH 14/17] fix bugs --- src/frontend/src/handler/util.rs | 111 +-------------- .../optimizer/plan_node/generic/table_scan.rs | 5 + src/frontend/src/session/cursor_manager.rs | 129 +++++++++--------- 3 files changed, 71 insertions(+), 174 deletions(-) diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index d05ae1ceef259..344b33047b590 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -38,8 +38,8 @@ use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; use risingwave_connector::source::KAFKA_CONNECTOR; use risingwave_pb::catalog::connection_params::PbConnectionType; use risingwave_sqlparser::ast::{ - BinaryOperator, CompatibleFormatEncode, Expr, FormatEncodeOptions, Ident, ObjectName, - OrderByExpr, Query, Select, SelectItem, SetExpr, TableFactor, TableWithJoins, Value, + CompatibleFormatEncode, FormatEncodeOptions, ObjectName, Query, Select, SelectItem, SetExpr, + TableFactor, TableWithJoins, }; use thiserror_ext::AsReport; @@ -240,113 +240,6 @@ pub fn gen_query_from_table_name(from_name: ObjectName) -> Query { } } -/// `from_name` is the table name, -/// `pks` is the primary key columns’ `name` and `is_hidden` -/// `seek_pk_rows` is the seek pk values for the cursor. -/// So the query like `SELECT *, except(hidden pk) FROM table_name WHERE (pk1,pk2,pk3..) > (seek_pk1,seek_pk2,seek_pk3...) order by pk1,pk2,pk3..` -pub fn gen_query_from_table_name_order_by( - from_name: ObjectName, - pks: Vec<(String, bool)>, - seek_pk_rows: Option>>, -) -> RwResult { - let select_pks = pks - .iter() - .filter_map( - |(name, is_hidden)| { - if *is_hidden { - Some(name.clone()) - } else { - None - } - }, - ) - .collect_vec(); - let order_pks = pks.iter().map(|(name, _)| name).collect_vec(); - - let table_factor = TableFactor::Table { - name: from_name, - alias: None, - as_of: None, - }; - let from = vec![TableWithJoins { - relation: table_factor, - joins: vec![], - }]; - let mut projection = vec![SelectItem::Wildcard(None)]; - projection.extend( - select_pks.iter().map(|name| { - SelectItem::UnnamedExpr(Expr::Identifier(Ident::new_unchecked(name.clone()))) - }), - ); - let selection = if let Some(seek_pk_rows) = seek_pk_rows { - let mut pk_rows = vec![]; - let mut values = vec![]; - for ((name, _), seek_pk) in pks.iter().zip_eq_fast(seek_pk_rows.iter()) { - if let Some(seek_pk) = seek_pk { - pk_rows.push(Expr::Identifier(Ident::with_quote_unchecked( - '"', - name.clone(), - ))); - values.push(String::from_utf8(seek_pk.clone().into()).map_err(|e| { - ErrorCode::InternalError(format!( - "Convert cursor seek_pk to string error: {:?}", - e.as_report() - )) - })?); - } - } - if pk_rows.is_empty() { - None - } else if pk_rows.len() == 1 { - let left = pk_rows.pop().unwrap(); - let right = Expr::Value(Value::SingleQuotedString(values.pop().unwrap())); - Some(Expr::BinaryOp { - left: Box::new(left), - op: BinaryOperator::Gt, - right: Box::new(right), - }) - } else { - let left = Expr::Row(pk_rows); - let values = values.join(","); - let right = Expr::Value(Value::SingleQuotedString(format!("({})", values))); - Some(Expr::BinaryOp { - left: Box::new(left), - op: BinaryOperator::Gt, - right: Box::new(right), - }) - } - } else { - None - }; - - let select = Select { - projection, - from, - selection, - ..Default::default() - }; - let body = SetExpr::Select(Box::new(select)); - let order_by = order_pks - .into_iter() - .map(|pk| { - let expr = Expr::Identifier(Ident::with_quote_unchecked('"', pk)); - OrderByExpr { - expr, - asc: None, - nulls_first: None, - } - }) - .collect(); - Ok(Query { - with: None, - body, - order_by, - limit: None, - offset: None, - fetch: None, - }) -} - pub fn convert_unix_millis_to_logstore_u64(unix_millis: u64) -> u64 { Epoch::from_unix_millis(unix_millis).0 } diff --git a/src/frontend/src/optimizer/plan_node/generic/table_scan.rs b/src/frontend/src/optimizer/plan_node/generic/table_scan.rs index 7ef5fd8ca61e6..0abebffb844d3 100644 --- a/src/frontend/src/optimizer/plan_node/generic/table_scan.rs +++ b/src/frontend/src/optimizer/plan_node/generic/table_scan.rs @@ -126,6 +126,11 @@ impl TableScan { .collect() } + pub(crate) fn out_fields(&self) -> FixedBitSet { + let out_fields_vec = self.output_col_idx.clone(); + FixedBitSet::from_iter(out_fields_vec) + } + pub(crate) fn order_names(&self) -> Vec { self.table_desc .order_column_indices() diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index 1d2784363d118..6a4ab12a85276 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -34,7 +34,7 @@ use risingwave_common::types::{DataType, ScalarImpl, StructType, StructValue}; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::sort_util::ColumnOrder; use risingwave_hummock_sdk::HummockVersionId; -use risingwave_sqlparser::ast::{Ident, ObjectName, Statement}; +use risingwave_sqlparser::ast::ObjectName; use super::SessionImpl; use crate::catalog::subscription_catalog::SubscriptionCatalog; @@ -42,17 +42,15 @@ use crate::catalog::TableId; use crate::error::{ErrorCode, Result}; use crate::expr::{ExprType, FunctionCall, InputRef, Literal}; use crate::handler::declare_cursor::create_chunk_stream_for_cursor; -use crate::handler::query::{ - gen_batch_plan_by_statement, gen_batch_plan_fragmenter, BatchQueryPlanResult, -}; +use crate::handler::query::{gen_batch_plan_fragmenter, BatchQueryPlanResult}; use crate::handler::util::{ - convert_logstore_u64_to_unix_millis, gen_query_from_table_name_order_by, pg_value_format, - to_pg_field, DataChunkToRowSetAdapter, StaticSessionData, + convert_logstore_u64_to_unix_millis, pg_value_format, to_pg_field, DataChunkToRowSetAdapter, + StaticSessionData, }; use crate::handler::HandlerArgs; use crate::monitor::{CursorMetrics, PeriodicCursorMetrics}; -use crate::optimizer::plan_node::{generic, BatchFilter, BatchLogSeqScan}; -use crate::optimizer::property::{Order, RequiredDist}; +use crate::optimizer::plan_node::{generic, BatchFilter, BatchLogSeqScan, BatchSeqScan}; +use crate::optimizer::property::{Cardinality, Order, RequiredDist}; use crate::optimizer::PlanRoot; use crate::scheduler::{DistributedQueryStream, LocalQueryStream}; use crate::utils::Condition; @@ -645,46 +643,27 @@ impl SubscriptionCursor { let session = handler_args.clone().session; let table_catalog = session.get_table_by_id(dependent_table_id)?; let context = OptimizerContext::from_handler_args(handler_args.clone()); - if let Some(rw_timestamp) = rw_timestamp { - let version_id = { - let version = session.env.hummock_snapshot_manager.acquire(); - let version = version.version(); - if !version - .state_table_info - .info() - .contains_key(dependent_table_id) - { - return Err(anyhow!("table id {dependent_table_id} has been dropped").into()); - } - version.id - }; - Self::create_batch_plan_for_cursor( - &table_catalog, - &session, - context.into(), - rw_timestamp, - rw_timestamp, - version_id, - seek_pk_row, - ) - } else { - let pks = table_catalog.pk(); - let pks = pks - .iter() - .map(|f| { - let pk = table_catalog.columns.get(f.column_index).unwrap(); - (pk.name().to_owned(), pk.is_hidden) - }) - .collect_vec(); - let subscription_from_table_name = - ObjectName(vec![Ident::from(table_catalog.name.as_ref())]); - let query_stmt = Statement::Query(Box::new(gen_query_from_table_name_order_by( - subscription_from_table_name, - pks, - seek_pk_row, - )?)); - gen_batch_plan_by_statement(&session, context.into(), query_stmt) - } + let version_id = { + let version = session.env.hummock_snapshot_manager.acquire(); + let version = version.version(); + if !version + .state_table_info + .info() + .contains_key(dependent_table_id) + { + return Err(anyhow!("table id {dependent_table_id} has been dropped").into()); + } + version.id + }; + Self::create_batch_plan_for_cursor( + table_catalog, + &session, + context.into(), + rw_timestamp, + rw_timestamp, + version_id, + seek_pk_row, + ) } async fn initiate_query( @@ -818,11 +797,11 @@ impl SubscriptionCursor { } pub fn create_batch_plan_for_cursor( - table_catalog: &TableCatalog, + table_catalog: Arc, session: &SessionImpl, context: OptimizerContextRef, - old_epoch: u64, - new_epoch: u64, + old_epoch: Option, + new_epoch: Option, version_id: HummockVersionId, seek_pk_rows: Option>>, ) -> Result { @@ -840,15 +819,6 @@ impl SubscriptionCursor { }) .collect::>(); let max_split_range_gap = context.session_ctx().config().max_split_range_gap() as u64; - let core = generic::LogScan::new( - table_catalog.name.clone(), - output_col_idx, - Rc::new(table_catalog.table_desc()), - context, - old_epoch, - new_epoch, - version_id, - ); let pks = table_catalog.pk(); let pks = pks .iter() @@ -911,16 +881,45 @@ impl SubscriptionCursor { (vec![], None) }; - let batch_log_seq_scan = BatchLogSeqScan::new(core, scan); - let out_fields = batch_log_seq_scan.core().out_fields(); - let out_names = batch_log_seq_scan.core().column_names(); + let (seq_scan, out_fields, out_names) = if old_epoch.is_some() && new_epoch.is_some() { + let core = generic::LogScan::new( + table_catalog.name.clone(), + output_col_idx, + Rc::new(table_catalog.table_desc()), + context, + old_epoch.unwrap(), + new_epoch.unwrap(), + version_id, + ); + let batch_log_seq_scan = BatchLogSeqScan::new(core, scan); + let out_fields = batch_log_seq_scan.core().out_fields(); + let out_names = batch_log_seq_scan.core().column_names(); + (batch_log_seq_scan.into(), out_fields, out_names) + } else { + let core = generic::TableScan::new( + table_catalog.name.clone(), + output_col_idx, + table_catalog.clone(), + vec![], + context, + Condition { + conjunctions: vec![], + }, + None, + Cardinality::default(), + ); + let table_scan = BatchSeqScan::new(core, scan, None); + let out_fields = table_scan.core().out_fields(); + let out_names = table_scan.core().column_names(); + (table_scan.into(), out_fields, out_names) + }; let plan = if let Some(predicate) = predicate && !predicate.always_true() { - BatchFilter::new(generic::Filter::new(predicate, batch_log_seq_scan.into())).into() + BatchFilter::new(generic::Filter::new(predicate, seq_scan)).into() } else { - batch_log_seq_scan.into() + seq_scan }; // order by pk, so don't need to sort From 05f9e9e7cfdfe52d958aaa6c4a917a067de56c6f Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Thu, 26 Dec 2024 14:20:45 +0800 Subject: [PATCH 15/17] fix comm --- e2e_test/subscription/main.py | 2 +- src/common/src/array/struct_array.rs | 93 +-------- src/expr/impl/src/scalar/cast.rs | 9 - src/expr/impl/tests/sig.rs | 3 +- src/frontend/src/session/cursor_manager.rs | 212 ++++++++++++--------- 5 files changed, 128 insertions(+), 191 deletions(-) diff --git a/e2e_test/subscription/main.py b/e2e_test/subscription/main.py index c41a293777899..ad7511b57b148 100644 --- a/e2e_test/subscription/main.py +++ b/e2e_test/subscription/main.py @@ -551,7 +551,7 @@ def test_order_multi_pk(): drop_table_subscription() def test_explain_cursor(): - print(f"test_order_mutil_pk") + print(f"test_explain_cursor") create_table_subscription() conn = psycopg2.connect( host="localhost", diff --git a/src/common/src/array/struct_array.rs b/src/common/src/array/struct_array.rs index cfdf858381151..e0b254bfcad75 100644 --- a/src/common/src/array/struct_array.rs +++ b/src/common/src/array/struct_array.rs @@ -361,25 +361,9 @@ impl StructValue { if !s.ends_with(')') { return Err("Missing right parenthesis".into()); } - let s = &s[1..s.len() - 1]; - let mut split_str = Vec::with_capacity(ty.len()); - let mut left_parenthesis_num = 0; - let mut start = 0; - for (i, c) in s.char_indices() { - match c { - '(' => left_parenthesis_num += 1, - ')' => left_parenthesis_num -= 1, - ',' if left_parenthesis_num == 0 => { - split_str.push(&s[start..i]); - start = i + 1; - } - _ => {} - } - } - split_str.push(&s[start..=(s.len() - 1)]); - let mut fields = Vec::with_capacity(ty.len()); - for (str, ty) in split_str.iter().zip_eq_debug(ty.types()) { - let datum = match str.trim() { + let mut fields = Vec::with_capacity(s.len()); + for (s, ty) in s[1..s.len() - 1].split(',').zip_eq_debug(ty.types()) { + let datum = match s.trim() { "" => None, s => Some(ScalarImpl::from_text(s, ty)?), }; @@ -846,75 +830,4 @@ mod tests { test("{1,2}", r#""{1,2}""#); test(r#"{"f": 1}"#, r#""{""f"": 1}""#); } - - #[test] - fn test_from_str_nested_struct() { - let struct_str = "(1,sad ,(3, 4.0),(1,( 2,(3,(4,(5, 6))) )) )"; - let struct_type = StructType::unnamed(vec![ - DataType::Int32, - DataType::Varchar, - StructType::unnamed(vec![DataType::Int32, DataType::Float64]).into(), - StructType::unnamed(vec![ - DataType::Int32, - StructType::unnamed(vec![ - DataType::Int32, - StructType::unnamed(vec![ - DataType::Int32, - StructType::unnamed(vec![ - DataType::Int32, - StructType::unnamed(vec![DataType::Int32, DataType::Int32]).into(), - ]) - .into(), - ]) - .into(), - ]) - .into(), - ]) - .into(), - ]); - let struct_value = StructValue::from_str(struct_str, &struct_type).unwrap(); - let expected = StructValue::new(vec![ - Some(1.to_scalar_value()), - Some("sad".into()), - Some( - StructValue::new(vec![ - Some(3.to_scalar_value()), - Some(ScalarImpl::Float64(4.0.into())), - ]) - .to_scalar_value(), - ), - Some( - StructValue::new(vec![ - Some(1.to_scalar_value()), - Some( - StructValue::new(vec![ - Some(2.to_scalar_value()), - Some( - StructValue::new(vec![ - Some(3.to_scalar_value()), - Some( - StructValue::new(vec![ - Some(4.to_scalar_value()), - Some( - StructValue::new(vec![ - Some(5.to_scalar_value()), - Some(6.to_scalar_value()), - ]) - .to_scalar_value(), - ), - ]) - .to_scalar_value(), - ), - ]) - .to_scalar_value(), - ), - ]) - .to_scalar_value(), - ), - ]) - .to_scalar_value(), - ), - ]); - assert_eq!(struct_value, expected); - } } diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs index 386dfa45a93ce..e25ca07e3398f 100644 --- a/src/expr/impl/src/scalar/cast.rs +++ b/src/expr/impl/src/scalar/cast.rs @@ -47,15 +47,6 @@ where }) } -#[function("cast(varchar) -> struct", type_infer = "unreachable")] -pub fn str_parse_struct(elem: &str, ctx: &Context) -> Result { - match &ctx.return_type { - risingwave_common::types::DataType::Struct(s) => Ok(StructValue::from_str(elem, s) - .map_err(|e| ExprError::Parse(format!("error: {:?}", e.as_report()).into()))?), - _ => Err(ExprError::Parse("unsupported type".into())), - } -} - // TODO: introduce `FromBinary` and support all types #[function("pgwire_recv(bytea) -> int8")] pub fn pgwire_recv(elem: &[u8]) -> Result { diff --git a/src/expr/impl/tests/sig.rs b/src/expr/impl/tests/sig.rs index 9182a8792eb27..7583438252829 100644 --- a/src/expr/impl/tests/sig.rs +++ b/src/expr/impl/tests/sig.rs @@ -62,8 +62,7 @@ fn test_func_sig_map() { "cast(anyarray) -> character varying/anyarray", "cast(bigint) -> rw_int256/serial/integer/smallint/numeric/double precision/real/character varying", "cast(boolean) -> integer/character varying", - "cast(character varying) -> jsonb/interval/timestamp without time zone/time without time zone/date/rw_int256/real/double precision/numeric/smallint/integer/bigint/anystruct/character varying/boolean/bytea/anyarray", - "cast(date) -> timestamp without time zone/character varying", + "cast(character varying) -> jsonb/interval/timestamp without time zone/time without time zone/date/rw_int256/real/double precision/numeric/smallint/integer/bigint/character varying/boolean/bytea/anyarray", "cast(date) -> timestamp without time zone/character varying", "cast(double precision) -> numeric/real/bigint/integer/smallint/character varying", "cast(integer) -> rw_int256/smallint/numeric/double precision/real/bigint/boolean/character varying", "cast(interval) -> time without time zone/character varying", diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index 6a4ab12a85276..3046610d86af5 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -132,7 +132,7 @@ impl Cursor { pub fn get_fields(&mut self) -> Vec { match self { - Cursor::Subscription(cursor) => cursor.fields.clone(), + Cursor::Subscription(cursor) => cursor.fields_manager.get_output_fields().clone(), Cursor::Query(cursor) => cursor.fields.clone(), } } @@ -256,6 +256,106 @@ impl Display for State { } } +struct FieldsManager { + all_fields: Vec, + output_fields: Vec, + pk_columns_flags: Vec, + hidden_columns_flags: Vec, + pk_column_names: HashMap, +} +impl FieldsManager { + pub fn new(all_fields: Vec, pk_column_names: HashMap) -> Self { + let mut pk_columns_flags = Vec::new(); + let mut hidden_columns_flags = Vec::new(); + for field in &all_fields { + if let Some(is_hidden) = pk_column_names.get(&field.name) { + pk_columns_flags.push(true); + if *is_hidden { + hidden_columns_flags.push(false); + } else { + hidden_columns_flags.push(true); + } + } else { + hidden_columns_flags.push(true); + pk_columns_flags.push(false); + } + } + let mut output_fields = all_fields.clone(); + let mut hidden_columns_flags_iter = hidden_columns_flags.iter(); + output_fields.retain(|_| *hidden_columns_flags_iter.next().unwrap()); + Self { + all_fields, + output_fields, + pk_columns_flags, + hidden_columns_flags, + pk_column_names, + } + } + + pub fn try_refill_fields( + &mut self, + all_fields: Vec, + pk_column_names: HashMap, + ) -> bool { + if self.all_fields.ne(&all_fields) || self.pk_column_names.ne(&pk_column_names) { + *self = Self::new(all_fields, pk_column_names); + true + } else { + false + } + } + + // In the beginning (declare cur), we will give it an empty formats, + // this formats is not a real, when we fetch, We fill it with the formats returned from the pg client. + pub fn get_row_stream_fields_and_formats( + &self, + formats: &Vec, + from_snapshot: bool, + ) -> (Vec, Vec) { + let mut fields = self.all_fields.clone(); + fields.pop(); + if from_snapshot { + fields.pop(); + } + if formats.is_empty() || formats.len() == 1 { + (fields, formats.clone()) + } else { + let mut formats = formats.clone(); + for (index, value) in self.hidden_columns_flags.iter().enumerate() { + if *value { + formats.insert(index, Format::Text); + } + } + formats.pop(); + if from_snapshot { + formats.pop(); + } + (fields, formats) + } + } + + pub fn process_output_desc_row( + &self, + mut rows: Vec, + ) -> (Vec, Option>>) { + let last_row = rows.last_mut().map(|row| { + let mut row = row.0.clone(); + let mut pk_columns_flags_iter = self.pk_columns_flags.iter(); + row.retain(|_| *pk_columns_flags_iter.next().unwrap()); + row + }); + rows.iter_mut().for_each(|row| { + let mut hidden_columns_flags_iter = self.hidden_columns_flags.iter(); + row.0.retain(|_| *hidden_columns_flags_iter.next().unwrap()); + }); + (rows, last_row) + } + + pub fn get_output_fields(&self) -> &Vec { + &self.output_fields + } +} + pub struct SubscriptionCursor { cursor_name: String, subscription: Arc, @@ -264,10 +364,9 @@ pub struct SubscriptionCursor { state: State, // fields will be set in the table's catalog when the cursor is created, // and will be reset each time it is created chunk_stream, this is to avoid changes in the catalog due to alter. - fields: Vec, + fields_manager: FieldsManager, cursor_metrics: Arc, last_fetch: Instant, - pk_column_names: HashMap, seek_pk_row: Option>>, } @@ -340,10 +439,9 @@ impl SubscriptionCursor { dependent_table_id, cursor_need_drop_time, state, - fields, + fields_manager: FieldsManager::new(fields, pk_column_names), cursor_metrics, last_fetch: Instant::now(), - pk_column_names, seek_pk_row: None, }) } @@ -378,11 +476,15 @@ impl SubscriptionCursor { None, ) .await?; - Self::init_row_stream( - &mut chunk_stream, - formats, - &from_snapshot, + let table_schema_changed = self + .fields_manager + .try_refill_fields(fields, pk_column_names); + let (fields, formats) = self + .fields_manager + .get_row_stream_fields_and_formats(formats, from_snapshot); + chunk_stream.init_row_stream( &fields, + &formats, handler_args.session.clone(), ); @@ -400,10 +502,7 @@ impl SubscriptionCursor { expected_timestamp, init_query_timer, }; - if self.fields.ne(&fields) || self.pk_column_names.ne(&pk_column_names) - { - self.fields = fields; - self.pk_column_names = pk_column_names; + if table_schema_changed { return Ok(None); } } @@ -504,13 +603,10 @@ impl SubscriptionCursor { .. } = &mut self.state { - Self::init_row_stream( - chunk_stream, - formats, - from_snapshot, - &self.fields, - session.clone(), - ); + let (fields, fotmats) = self + .fields_manager + .get_row_stream_fields_and_formats(formats, *from_snapshot); + chunk_stream.init_row_stream(&fields, &fotmats, session.clone()); } while cur < count { let fetch_cursor_timer = Instant::now(); @@ -556,12 +652,16 @@ impl SubscriptionCursor { } } self.last_fetch = Instant::now(); - let (fields, rows, seek_pk_row) = - Self::process_output_desc_row(&self.fields, ans, &self.pk_column_names); + let (rows, seek_pk_row) = self.fields_manager.process_output_desc_row(ans); if let Some(seek_pk_row) = seek_pk_row { self.seek_pk_row = Some(seek_pk_row); } - let desc = fields.iter().map(to_pg_field).collect(); + let desc = self + .fields_manager + .get_output_fields() + .iter() + .map(to_pg_field) + .collect(); Ok((rows, desc)) } @@ -742,52 +842,6 @@ impl SubscriptionCursor { Ok(row) } - pub fn process_output_desc_row( - descs: &Vec, - mut rows: Vec, - pk_column_names: &HashMap, - ) -> (Vec, Vec, Option>>) { - let last_row = rows.last_mut().map(|row| { - row.0 - .iter() - .zip_eq_fast(descs.iter()) - .filter_map(|(data, field)| { - if pk_column_names.contains_key(&field.name) { - Some(data.clone()) - } else { - None - } - }) - .collect_vec() - }); - let iter = descs - .iter() - .map(|field| { - if let Some(is_hidden) = pk_column_names.get(&field.name) - && *is_hidden - { - (false, field) - } else { - (true, field) - } - }) - .collect_vec(); - let pk_fields = iter - .iter() - .filter(|(is_hidden, _)| *is_hidden) - .map(|(_, field)| (*field).clone()) - .collect(); - let pk_keep = iter - .iter() - .map(|(is_hidden, _)| *is_hidden) - .collect::>(); - rows.iter_mut().for_each(|row| { - let mut pk_keep_iter = pk_keep.iter(); - row.0.retain(|_| *pk_keep_iter.next().unwrap()) - }); - (pk_fields, rows, last_row) - } - pub fn build_desc(mut descs: Vec, from_snapshot: bool) -> Vec { if from_snapshot { descs.push(Field::with_name(DataType::Varchar, "op")); @@ -952,26 +1006,6 @@ impl SubscriptionCursor { }) } - // In the beginning (declare cur), we will give it an empty formats, - // this formats is not a real, when we fetch, We fill it with the formats returned from the pg client. - pub fn init_row_stream( - chunk_stream: &mut CursorDataChunkStream, - formats: &Vec, - from_snapshot: &bool, - fields: &Vec, - session: Arc, - ) { - let mut formats = formats.clone(); - let mut fields = fields.clone(); - formats.pop(); - fields.pop(); - if *from_snapshot { - formats.pop(); - fields.pop(); - } - chunk_stream.init_row_stream(&fields, &formats, session); - } - pub fn idle_duration(&self) -> Duration { self.last_fetch.elapsed() } From 5779465715a40d58bb4404d5493b76c2d8c0fb7d Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Thu, 9 Jan 2025 15:09:07 +0800 Subject: [PATCH 16/17] fix comm --- e2e_test/subscription/main.py | 7 - src/expr/impl/tests/sig.rs | 3 +- src/frontend/src/expr/function_call.rs | 2 +- .../optimizer/plan_node/batch_log_seq_scan.rs | 2 +- .../optimizer/plan_node/generic/log_scan.rs | 30 ++ src/frontend/src/session/cursor_manager.rs | 293 ++++++++---------- src/utils/pgwire/src/types.rs | 8 + 7 files changed, 178 insertions(+), 167 deletions(-) diff --git a/e2e_test/subscription/main.py b/e2e_test/subscription/main.py index ad7511b57b148..9b4b90314b049 100644 --- a/e2e_test/subscription/main.py +++ b/e2e_test/subscription/main.py @@ -109,7 +109,6 @@ def test_cursor_since_begin(): execute_insert("insert into t1 values(6,6)",conn) execute_insert("flush",conn) row = execute_query("fetch next from cur",conn) - row = execute_query("fetch next from cur",conn) check_rows_data([4,4],row[0],"Insert") row = execute_query("fetch next from cur",conn) check_rows_data([5,5],row[0],"Insert") @@ -139,7 +138,6 @@ def test_cursor_since_now(): execute_insert("insert into t1 values(6,6)",conn) execute_insert("flush",conn) row = execute_query("fetch next from cur",conn) - row = execute_query("fetch next from cur",conn) check_rows_data([6,6],row[0],"Insert") row = execute_query("fetch next from cur",conn) assert row == [] @@ -165,7 +163,6 @@ def test_cursor_without_since(): execute_insert("insert into t1 values(6,6)",conn) execute_insert("flush",conn) row = execute_query("fetch next from cur",conn) - row = execute_query("fetch next from cur",conn) check_rows_data([6,6],row[0],"Insert") row = execute_query("fetch next from cur",conn) assert row == [] @@ -190,7 +187,6 @@ def test_cursor_since_rw_timestamp(): execute_insert("insert into t1 values(6,6)",conn) execute_insert("flush",conn) row = execute_query("fetch next from cur",conn) - row = execute_query("fetch next from cur",conn) valuelen = len(row[0]) rw_timestamp_1 = row[0][valuelen - 1] check_rows_data([4,4],row[0],"Insert") @@ -208,19 +204,16 @@ def test_cursor_since_rw_timestamp(): execute_insert(f"declare cur subscription cursor for sub since {rw_timestamp_1}",conn) row = execute_query("fetch next from cur",conn) - row = execute_query("fetch next from cur",conn) check_rows_data([4,4],row[0],"Insert") execute_insert("close cur",conn) execute_insert(f"declare cur subscription cursor for sub since {rw_timestamp_2}",conn) row = execute_query("fetch next from cur",conn) - row = execute_query("fetch next from cur",conn) check_rows_data([5,5],row[0],"Insert") execute_insert("close cur",conn) execute_insert(f"declare cur subscription cursor for sub since {rw_timestamp_3}",conn) row = execute_query("fetch next from cur",conn) - row = execute_query("fetch next from cur",conn) assert row == [] execute_insert("close cur",conn) diff --git a/src/expr/impl/tests/sig.rs b/src/expr/impl/tests/sig.rs index 7583438252829..8dc0bb29ebdf5 100644 --- a/src/expr/impl/tests/sig.rs +++ b/src/expr/impl/tests/sig.rs @@ -62,7 +62,8 @@ fn test_func_sig_map() { "cast(anyarray) -> character varying/anyarray", "cast(bigint) -> rw_int256/serial/integer/smallint/numeric/double precision/real/character varying", "cast(boolean) -> integer/character varying", - "cast(character varying) -> jsonb/interval/timestamp without time zone/time without time zone/date/rw_int256/real/double precision/numeric/smallint/integer/bigint/character varying/boolean/bytea/anyarray", "cast(date) -> timestamp without time zone/character varying", + "cast(character varying) -> jsonb/interval/timestamp without time zone/time without time zone/date/rw_int256/real/double precision/numeric/smallint/integer/bigint/character varying/boolean/bytea/anyarray", + "cast(date) -> timestamp without time zone/character varying", "cast(double precision) -> numeric/real/bigint/integer/smallint/character varying", "cast(integer) -> rw_int256/smallint/numeric/double precision/real/bigint/boolean/character varying", "cast(interval) -> time without time zone/character varying", diff --git a/src/frontend/src/expr/function_call.rs b/src/frontend/src/expr/function_call.rs index a3657cd51bda2..c5ae4ca178bf8 100644 --- a/src/frontend/src/expr/function_call.rs +++ b/src/frontend/src/expr/function_call.rs @@ -24,7 +24,7 @@ use crate::expr::{bail_cast_error, ExprDisplay, ExprType, ExprVisitor, ImpureAna #[derive(Clone, Eq, PartialEq, Hash)] pub struct FunctionCall { - pub func_type: ExprType, + pub(super) func_type: ExprType, pub(super) return_type: DataType, pub(super) inputs: Vec, } diff --git a/src/frontend/src/optimizer/plan_node/batch_log_seq_scan.rs b/src/frontend/src/optimizer/plan_node/batch_log_seq_scan.rs index a7b6065db1e8a..e42247a9ca585 100644 --- a/src/frontend/src/optimizer/plan_node/batch_log_seq_scan.rs +++ b/src/frontend/src/optimizer/plan_node/batch_log_seq_scan.rs @@ -40,7 +40,7 @@ impl BatchLogSeqScan { let order = if scan_ranges.len() > 1 { Order::any() } else { - Order::new(core.table_desc.pk.clone()) + core.get_out_column_index_order() }; let base = PlanBase::new_batch(core.ctx(), core.schema(), dist, order); diff --git a/src/frontend/src/optimizer/plan_node/generic/log_scan.rs b/src/frontend/src/optimizer/plan_node/generic/log_scan.rs index 75d3b73618d30..136cc8908f02c 100644 --- a/src/frontend/src/optimizer/plan_node/generic/log_scan.rs +++ b/src/frontend/src/optimizer/plan_node/generic/log_scan.rs @@ -20,11 +20,14 @@ use fixedbitset::FixedBitSet; use pretty_xmlish::Pretty; use risingwave_common::catalog::{ColumnDesc, Field, Schema, TableDesc}; use risingwave_common::types::DataType; +use risingwave_common::util::column_index_mapping::ColIndexMapping; use risingwave_common::util::sort_util::ColumnOrder; use risingwave_hummock_sdk::HummockVersionId; use crate::catalog::ColumnId; use crate::optimizer::optimizer_context::OptimizerContextRef; +use crate::optimizer::property::Order; +use crate::utils::ColIndexMappingRewriteExt; const OP_NAME: &str = "op"; const OP_TYPE: DataType = DataType::Varchar; @@ -176,4 +179,31 @@ impl LogScan { .map(|&i| format!("{}.{}", self.table_name, self.get_table_columns()[i].name)) .collect() } + + /// Return indices of fields the output is ordered by and + /// corresponding direction + pub fn get_out_column_index_order(&self) -> Order { + let id_to_tb_idx = self.table_desc.get_id_to_op_idx_mapping(); + let order = Order::new( + self.table_desc + .pk + .iter() + .map(|order| { + let idx = id_to_tb_idx + .get(&self.table_desc.columns[order.column_index].column_id) + .unwrap(); + ColumnOrder::new(*idx, order.order_type) + }) + .collect(), + ); + self.i2o_col_mapping().rewrite_provided_order(&order) + } + + /// get the Mapping of columnIndex from internal column index to output column index + pub fn i2o_col_mapping(&self) -> ColIndexMapping { + ColIndexMapping::with_remaining_columns( + &self.output_col_idx, + self.get_table_columns().len(), + ) + } } diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index 767d35f91cc9c..6cf2072009c01 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -27,12 +27,11 @@ use itertools::Itertools; use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::StatementType; use pgwire::types::{Format, Row}; -use risingwave_common::catalog::Field; +use risingwave_common::catalog::{ColumnCatalog, Field}; use risingwave_common::error::BoxedError; use risingwave_common::session_config::QueryMode; use risingwave_common::types::{DataType, ScalarImpl, StructType, StructValue}; use risingwave_common::util::iter_util::ZipEqFast; -use risingwave_common::util::sort_util::ColumnOrder; use risingwave_hummock_sdk::HummockVersionId; use risingwave_sqlparser::ast::ObjectName; @@ -52,7 +51,7 @@ use crate::monitor::{CursorMetrics, PeriodicCursorMetrics}; use crate::optimizer::plan_node::{generic, BatchFilter, BatchLogSeqScan, BatchSeqScan}; use crate::optimizer::property::{Cardinality, Order, RequiredDist}; use crate::optimizer::PlanRoot; -use crate::scheduler::{DistributedQueryStream, LocalQueryStream}; +use crate::scheduler::{DistributedQueryStream, LocalQueryStream, ReadSnapshot}; use crate::utils::Condition; use crate::{Binder, OptimizerContext, OptimizerContextRef, PgResponseStream, TableCatalog}; @@ -257,54 +256,94 @@ impl Display for State { } struct FieldsManager { - all_fields: Vec, - output_fields: Vec, - pk_columns_flags: Vec, - hidden_columns_flags: Vec, - pk_column_names: HashMap, + columns_catalog: Vec, + // All row fields + row_fields: Vec, + // Row output column indices based on the scan output columns. + row_output_col_indices: Vec, + // Row pk indices based on the scan output columns. + row_pk_indices: Vec, + stream_chunk_row_indices: Vec, + op_index: usize, } + impl FieldsManager { - pub fn new(all_fields: Vec, pk_column_names: HashMap) -> Self { - let mut pk_columns_flags = Vec::new(); - let mut hidden_columns_flags = Vec::new(); - for field in &all_fields { - if let Some(is_hidden) = pk_column_names.get(&field.name) { - pk_columns_flags.push(true); - if *is_hidden { - hidden_columns_flags.push(false); - } else { - hidden_columns_flags.push(true); + // pub const OP_FIELD: Field = Field::with_name(DataType::Varchar, "op".to_owned()); + // pub const RW_TIMESTAMP_FIELD: Field = Field::with_name(DataType::Int64, "rw_timestamp".to_owned()); + + pub fn new(catalog: &TableCatalog) -> Self { + let mut row_fields = Vec::new(); + let mut row_output_col_indices = Vec::new(); + let mut row_pk_indices = Vec::new(); + let mut stream_chunk_row_indices = Vec::new(); + let mut output_idx = 0_usize; + let pk_set: HashSet = catalog + .pk + .iter() + .map(|col_order| col_order.column_index) + .collect(); + + for (index, v) in catalog.columns.iter().enumerate() { + if pk_set.contains(&index) { + row_pk_indices.push(output_idx); + stream_chunk_row_indices.push(output_idx); + row_fields.push(Field::with_name(v.data_type().clone(), v.name())); + if !v.is_hidden { + row_output_col_indices.push(output_idx); } - } else { - hidden_columns_flags.push(true); - pk_columns_flags.push(false); + output_idx += 1; + } else if !v.is_hidden { + row_output_col_indices.push(output_idx); + stream_chunk_row_indices.push(output_idx); + row_fields.push(Field::with_name(v.data_type().clone(), v.name())); + output_idx += 1; } } - let mut output_fields = all_fields.clone(); - let mut hidden_columns_flags_iter = hidden_columns_flags.iter(); - output_fields.retain(|_| *hidden_columns_flags_iter.next().unwrap()); + + row_fields.push(Field::with_name(DataType::Varchar, "op".to_owned())); + row_output_col_indices.push(output_idx); + let op_index = output_idx; + output_idx += 1; + row_fields.push(Field::with_name(DataType::Int64, "rw_timestamp".to_owned())); + row_output_col_indices.push(output_idx); Self { - all_fields, - output_fields, - pk_columns_flags, - hidden_columns_flags, - pk_column_names, + columns_catalog: catalog.columns.clone(), + row_fields, + row_output_col_indices, + row_pk_indices, + stream_chunk_row_indices, + op_index, } } - pub fn try_refill_fields( - &mut self, - all_fields: Vec, - pk_column_names: HashMap, - ) -> bool { - if self.all_fields.ne(&all_fields) || self.pk_column_names.ne(&pk_column_names) { - *self = Self::new(all_fields, pk_column_names); + pub fn try_refill_fields(&mut self, catalog: &TableCatalog) -> bool { + if self.columns_catalog.ne(&catalog.columns) { + *self = Self::new(catalog); true } else { false } } + pub fn process_output_desc_row(&self, mut rows: Vec) -> (Vec, Option) { + let last_row = rows.last_mut().map(|row| { + let mut row = row.clone(); + row.project(&self.row_pk_indices) + }); + let rows = rows + .iter_mut() + .map(|row| row.project(&self.row_output_col_indices)) + .collect(); + (rows, last_row) + } + + pub fn get_output_fields(&self) -> Vec { + self.row_output_col_indices + .iter() + .map(|&idx| self.row_fields[idx].clone()) + .collect() + } + // In the beginning (declare cur), we will give it an empty formats, // this formats is not a real, when we fetch, We fill it with the formats returned from the pg client. pub fn get_row_stream_fields_and_formats( @@ -312,47 +351,23 @@ impl FieldsManager { formats: &Vec, from_snapshot: bool, ) -> (Vec, Vec) { - let mut fields = self.all_fields.clone(); - fields.pop(); - if from_snapshot { - fields.pop(); - } - if formats.is_empty() || formats.len() == 1 { - (fields, formats.clone()) + let mut fields = Vec::new(); + let need_format = !(formats.is_empty() || formats.len() == 1); + let mut new_formats = formats.clone(); + let stream_chunk_row_indices_iter = if from_snapshot { + self.stream_chunk_row_indices.iter().chain(None) } else { - let mut formats = formats.clone(); - for (index, value) in self.hidden_columns_flags.iter().enumerate() { - if *value { - formats.insert(index, Format::Text); - } - } - formats.pop(); - if from_snapshot { - formats.pop(); + self.stream_chunk_row_indices + .iter() + .chain(Some(&self.op_index)) + }; + for index in stream_chunk_row_indices_iter { + fields.push(self.row_fields[*index].clone()); + if need_format && !self.row_output_col_indices.contains(index) { + new_formats.insert(*index, Format::Text); } - (fields, formats) } - } - - pub fn process_output_desc_row( - &self, - mut rows: Vec, - ) -> (Vec, Option>>) { - let last_row = rows.last_mut().map(|row| { - let mut row = row.0.clone(); - let mut pk_columns_flags_iter = self.pk_columns_flags.iter(); - row.retain(|_| *pk_columns_flags_iter.next().unwrap()); - row - }); - rows.iter_mut().for_each(|row| { - let mut hidden_columns_flags_iter = self.hidden_columns_flags.iter(); - row.0.retain(|_| *hidden_columns_flags_iter.next().unwrap()); - }); - (rows, last_row) - } - - pub fn get_output_fields(&self) -> &Vec { - &self.output_fields + (fields, new_formats) } } @@ -367,7 +382,7 @@ pub struct SubscriptionCursor { fields_manager: FieldsManager, cursor_metrics: Arc, last_fetch: Instant, - seek_pk_row: Option>>, + seek_pk_row: Option, } impl SubscriptionCursor { @@ -379,42 +394,46 @@ impl SubscriptionCursor { handler_args: &HandlerArgs, cursor_metrics: Arc, ) -> Result { - let (state, fields, pk_column_names) = if let Some(start_timestamp) = start_timestamp { + let (state, fields_manager) = if let Some(start_timestamp) = start_timestamp { let table_catalog = handler_args.session.get_table_by_id(&dependent_table_id)?; - let fields = table_catalog - .columns - .iter() - .filter(|c| !c.is_hidden) - .map(|c| Field::with_name(c.data_type().clone(), c.name())) - .collect(); - let pk_column_names = get_pk_names(table_catalog.pk(), &table_catalog); - let fields = Self::build_desc(fields, true); ( State::InitLogStoreQuery { seek_timestamp: start_timestamp, expected_timestamp: None, }, - fields, - pk_column_names, + FieldsManager::new(&table_catalog), ) } else { // The query stream needs to initiated on cursor creation to make sure // future fetch on the cursor starts from the snapshot when the cursor is declared. // // TODO: is this the right behavior? Should we delay the query stream initiation till the first fetch? - let (chunk_stream, fields, init_query_timer, pk_column_names) = + let (chunk_stream, init_query_timer, table_catalog) = Self::initiate_query(None, &dependent_table_id, handler_args.clone(), None).await?; - let pinned_epoch = handler_args - .session - .env - .hummock_snapshot_manager - .acquire() - .version() - .state_table_info - .info() - .get(&dependent_table_id) - .ok_or_else(|| anyhow!("dependent_table_id {dependent_table_id} not exists"))? - .committed_epoch; + let pinned_epoch = match handler_args.session.get_pinned_snapshot().ok_or_else( + || ErrorCode::InternalError("Fetch Cursor can't find snapshot epoch".to_owned()), + )? { + ReadSnapshot::FrontendPinned { snapshot, .. } => { + snapshot + .version() + .state_table_info + .info() + .get(&dependent_table_id) + .ok_or_else(|| { + anyhow!("dependent_table_id {dependent_table_id} not exists") + })? + .committed_epoch + } + ReadSnapshot::Other(_) => { + return Err(ErrorCode::InternalError("Fetch Cursor can't start from specified query epoch. May run `set query_epoch = 0;`".to_owned()).into()); + } + ReadSnapshot::ReadUncommitted => { + return Err(ErrorCode::InternalError( + "Fetch Cursor don't support read uncommitted".to_owned(), + ) + .into()); + } + }; let start_timestamp = pinned_epoch; ( @@ -426,8 +445,7 @@ impl SubscriptionCursor { expected_timestamp: None, init_query_timer, }, - fields, - pk_column_names, + FieldsManager::new(&table_catalog), ) }; @@ -439,7 +457,7 @@ impl SubscriptionCursor { dependent_table_id, cursor_need_drop_time, state, - fields_manager: FieldsManager::new(fields, pk_column_names), + fields_manager, cursor_metrics, last_fetch: Instant::now(), seek_pk_row: None, @@ -468,7 +486,7 @@ impl SubscriptionCursor { &self.subscription, ) { Ok((Some(rw_timestamp), expected_timestamp)) => { - let (mut chunk_stream, fields, init_query_timer, pk_column_names) = + let (mut chunk_stream, init_query_timer, catalog) = Self::initiate_query( Some(rw_timestamp), &self.dependent_table_id, @@ -476,9 +494,8 @@ impl SubscriptionCursor { None, ) .await?; - let table_schema_changed = self - .fields_manager - .try_refill_fields(fields, pk_column_names); + let table_schema_changed = + self.fields_manager.try_refill_fields(&catalog); let (fields, formats) = self .fields_manager .get_row_stream_fields_and_formats(formats, from_snapshot); @@ -532,21 +549,15 @@ impl SubscriptionCursor { if let Some(row) = remaining_rows.pop_front() { // 1. Fetch the next row - let new_row = row.take(); if from_snapshot { - return Ok(Some(Row::new(Self::build_row( - new_row, - None, - formats, - &session_data, - )?))); + return Ok(Some(Self::build_row(row.0, None, formats, &session_data)?)); } else { - return Ok(Some(Row::new(Self::build_row( - new_row, + return Ok(Some(Self::build_row( + row.0, Some(rw_timestamp), formats, &session_data, - )?))); + )?)); } } else { self.cursor_metrics @@ -738,7 +749,7 @@ impl SubscriptionCursor { rw_timestamp: Option, dependent_table_id: &TableId, handler_args: HandlerArgs, - seek_pk_row: Option>>, + seek_pk_row: Option, ) -> Result { let session = handler_args.clone().session; let table_catalog = session.get_table_by_id(dependent_table_id)?; @@ -770,18 +781,11 @@ impl SubscriptionCursor { rw_timestamp: Option, dependent_table_id: &TableId, handler_args: HandlerArgs, - seek_pk_row: Option>>, - ) -> Result<( - CursorDataChunkStream, - Vec, - Instant, - HashMap, - )> { + seek_pk_row: Option, + ) -> Result<(CursorDataChunkStream, Instant, Arc)> { let init_query_timer = Instant::now(); let session = handler_args.clone().session; let table_catalog = session.get_table_by_id(dependent_table_id)?; - let pks = table_catalog.pk(); - let pk_column_names = get_pk_names(pks, &table_catalog); let plan_result = Self::init_batch_plan_for_subscription_cursor( rw_timestamp, dependent_table_id, @@ -789,14 +793,9 @@ impl SubscriptionCursor { seek_pk_row, )?; let plan_fragmenter_result = gen_batch_plan_fragmenter(&handler_args.session, plan_result)?; - let (chunk_stream, fields) = + let (chunk_stream, _) = create_chunk_stream_for_cursor(handler_args.session, plan_fragmenter_result).await?; - Ok(( - chunk_stream, - Self::build_desc(fields, rw_timestamp.is_none()), - init_query_timer, - pk_column_names, - )) + Ok((chunk_stream, init_query_timer, table_catalog)) } async fn try_refill_remaining_rows( @@ -816,7 +815,7 @@ impl SubscriptionCursor { rw_timestamp: Option, formats: &Vec, session_data: &StaticSessionData, - ) -> Result>> { + ) -> Result { let row_len = row.len(); let new_row = if let Some(rw_timestamp) = rw_timestamp { let rw_timestamp_formats = formats.get(row_len).unwrap_or(&Format::Text); @@ -839,7 +838,7 @@ impl SubscriptionCursor { vec![Some(op), None] }; row.extend(new_row); - Ok(row) + Ok(Row(row)) } pub fn build_desc(mut descs: Vec, from_snapshot: bool) -> Vec { @@ -857,7 +856,7 @@ impl SubscriptionCursor { old_epoch: Option, new_epoch: Option, version_id: HummockVersionId, - seek_pk_rows: Option>>, + seek_pk_rows: Option, ) -> Result { // pk + all column without hidden let output_col_idx = table_catalog @@ -885,7 +884,7 @@ impl SubscriptionCursor { let mut pk_rows = vec![]; let mut values = vec![]; for (seek_pk, (data_type, column_index)) in - seek_pk_rows.into_iter().zip_eq_fast(pks.into_iter()) + seek_pk_rows.0.into_iter().zip_eq_fast(pks.into_iter()) { if let Some(seek_pk) = seek_pk { pk_rows.push(InputRef { @@ -899,18 +898,6 @@ impl SubscriptionCursor { } if pk_rows.is_empty() { (vec![], None) - } else if pk_rows.len() == 1 { - let left = pk_rows.pop().unwrap(); - let (right_data, right_type) = values.pop().unwrap(); - let (scan, predicate) = Condition { - conjunctions: vec![FunctionCall::new( - ExprType::GreaterThan, - vec![left.into(), Literal::new(right_data, right_type).into()], - )? - .into()], - } - .split_to_scan_ranges(table_catalog.table_desc().into(), max_split_range_gap)?; - (scan, Some(predicate)) } else { let (right_data, right_types): (Vec<_>, Vec<_>) = values.into_iter().unzip(); let right_data = ScalarImpl::Struct(StructValue::new(right_data)); @@ -950,6 +937,7 @@ impl SubscriptionCursor { let out_names = batch_log_seq_scan.core().column_names(); (batch_log_seq_scan.into(), out_fields, out_names) } else { + assert!(old_epoch.is_none() && new_epoch.is_none()); let core = generic::TableScan::new( table_catalog.name.clone(), output_col_idx, @@ -1208,12 +1196,3 @@ impl CursorManager { } } } - -fn get_pk_names(pks: &[ColumnOrder], table_catalog: &TableCatalog) -> HashMap { - pks.iter() - .map(|f| { - let column = table_catalog.columns.get(f.column_index).unwrap(); - (column.name().to_owned(), column.is_hidden) - }) - .collect() -} diff --git a/src/utils/pgwire/src/types.rs b/src/utils/pgwire/src/types.rs index b5d8e0ff102b6..e4df4699fda27 100644 --- a/src/utils/pgwire/src/types.rs +++ b/src/utils/pgwire/src/types.rs @@ -49,6 +49,14 @@ impl Row { pub fn take(self) -> Vec> { self.0 } + + pub fn project(&mut self, indices: &[usize]) -> Row { + let mut new_row = Vec::with_capacity(indices.len()); + for i in indices { + new_row.push(self.0[*i].take()); + } + Row(new_row) + } } impl Index for Row { From 7dd0b09b6faf5132a37ebbd6bba678e8bacf171a Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Thu, 9 Jan 2025 15:12:32 +0800 Subject: [PATCH 17/17] add doc --- src/frontend/src/session/cursor_manager.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/frontend/src/session/cursor_manager.rs b/src/frontend/src/session/cursor_manager.rs index 6cf2072009c01..e095d476e5ba3 100644 --- a/src/frontend/src/session/cursor_manager.rs +++ b/src/frontend/src/session/cursor_manager.rs @@ -257,13 +257,15 @@ impl Display for State { struct FieldsManager { columns_catalog: Vec, - // All row fields + // All row fields, including hidden pk, op, rw_timestamp and all non-hidden columns in the upstream table. row_fields: Vec, - // Row output column indices based on the scan output columns. + // Row output column indices based on `row_fields`. row_output_col_indices: Vec, - // Row pk indices based on the scan output columns. + // Row pk indices based on `row_fields`. row_pk_indices: Vec, + // Stream chunk row indices based on `row_fields`. stream_chunk_row_indices: Vec, + // The op index based on `row_fields`. op_index: usize, }