Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(planner): Implement cost-based optimization #7187

Merged
merged 4 commits into from
Aug 26, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/query/catalog/src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ pub enum NavigationPoint {
TimePoint(DateTime<Utc>),
}

#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
pub struct TableStatistics {
pub num_rows: Option<u64>,
pub data_size: Option<u64>,
Expand Down
19 changes: 19 additions & 0 deletions src/query/datavalues/src/data_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

use std::cmp::Ordering;
use std::fmt;
use std::hash::Hash;
use std::sync::Arc;

use common_exception::ErrorCode;
Expand Down Expand Up @@ -367,6 +368,24 @@ impl Ord for DataValue {
}
}

#[allow(clippy::derive_hash_xor_eq)]
impl Hash for DataValue {
leiysky marked this conversation as resolved.
Show resolved Hide resolved
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
leiysky marked this conversation as resolved.
Show resolved Hide resolved
std::mem::discriminant(self).hash(state);
match self {
DataValue::Null => {}
DataValue::Boolean(v) => v.hash(state),
DataValue::UInt64(v) => v.hash(state),
DataValue::Int64(v) => v.hash(state),
DataValue::Float64(v) => v.to_bits().hash(state),
DataValue::String(v) => v.hash(state),
DataValue::Array(v) => v.hash(state),
DataValue::Struct(v) => v.hash(state),
DataValue::Variant(v) => v.hash(state),
}
}
}

// Did not use std::convert:TryFrom
// Because we do not need custom type error.
pub trait DFTryFrom<T>: Sized {
Expand Down
3 changes: 2 additions & 1 deletion src/query/datavalues/src/types/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ use crate::serializations::ConstSerializer;
pub const ARROW_EXTENSION_NAME: &str = "ARROW:extension:databend_name";
pub const ARROW_EXTENSION_META: &str = "ARROW:extension:databend_metadata";

#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
#[derive(Clone, Debug, Hash, serde::Deserialize, serde::Serialize)]
#[allow(clippy::derive_hash_xor_eq)]
#[serde(tag = "type")]
#[enum_dispatch(DataType)]
pub enum DataTypeImpl {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use crate::prelude::*;
use crate::serializations::ArraySerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Clone, serde::Deserialize, serde::Serialize)]
#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct ArrayType {
inner: Box<DataTypeImpl>,
}
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub use crate::prelude::*;
use crate::serializations::BooleanSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct BooleanType {}

impl BooleanType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pub fn check_date(days: i32) -> Result<()> {
))
}

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct DateType {}

impl DateType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_interval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::prelude::*;
use crate::serializations::DateSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Clone, serde::Deserialize, serde::Serialize)]
#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct IntervalType {
kind: IntervalKind,
}
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::prelude::*;
use crate::serializations::NullSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct NullType {}

impl NullType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_nullable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::prelude::*;
use crate::serializations::NullableSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Clone, serde::Deserialize, serde::Serialize)]
#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct NullableType {
inner: Box<DataTypeImpl>,
}
Expand Down
6 changes: 6 additions & 0 deletions src/query/datavalues/src/types/type_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@ macro_rules! impl_numeric {
write!(f, "{}", self.name())
}
}

impl std::hash::Hash for PrimitiveDataType<$ty> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.data_type_id().hash(state);
}
}
};
}
//
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::prelude::*;
use crate::serializations::StringSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct StringType {}

impl StringType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use crate::prelude::*;
use crate::serializations::StructSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct StructType {
names: Option<Vec<String>>,
types: Vec<DataTypeImpl>,
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub fn check_timestamp(micros: i64) -> Result<()> {
}

/// Timestamp type only stores UTC time in microseconds
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct TimestampType {
/// Typically are used - 0 (seconds) 3 (milliseconds), 6 (microseconds)
precision: usize,
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::prelude::*;
use crate::serializations::TypeSerializerImpl;
use crate::serializations::VariantSerializer;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct VariantType {}

impl VariantType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_variant_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::prelude::*;
use crate::serializations::TypeSerializerImpl;
use crate::serializations::VariantSerializer;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct VariantArrayType {}

impl VariantArrayType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_variant_object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::prelude::*;
use crate::serializations::TypeSerializerImpl;
use crate::serializations::VariantSerializer;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct VariantObjectType {}

impl VariantObjectType {
Expand Down
10 changes: 10 additions & 0 deletions src/query/datavalues/src/variant_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use core::str::FromStr;
use std::cmp::Ordering;
use std::fmt::Display;
use std::fmt::Formatter;
use std::hash::Hash;
use std::ops::Deref;

use common_exception::ErrorCode;
Expand Down Expand Up @@ -219,6 +220,15 @@ impl PartialOrd for VariantValue {
}
}

#[allow(clippy::derive_hash_xor_eq)]
impl Hash for VariantValue {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
let v = self.as_ref().to_string();
let u = v.as_bytes();
Hash::hash(&u, state);
}
}

impl Display for VariantValue {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_ref())
Expand Down
4 changes: 2 additions & 2 deletions src/query/functions/src/scalars/hashes/hash_base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,13 @@ impl DFHash for DataValue {
}
DataValue::Array(vals) => {
for v in vals {
v.hash(state);
DFHash::hash(v, state);
Hash::hash(&',', state);
}
}
DataValue::Struct(vals) => {
for v in vals {
v.hash(state);
DFHash::hash(v, state);
Hash::hash(&',', state);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/query/service/src/interpreters/interpreter_copy_v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ impl CopyInterpreterV2 {
let select_interpreter = SelectInterpreterV2::try_create(
self.ctx.clone(),
*(bind_context.clone()),
s_expr.clone(),
*s_expr.clone(),
metadata.clone(),
)?;

Expand Down
4 changes: 2 additions & 2 deletions src/query/service/src/interpreters/interpreter_explain_v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl Interpreter for ExplainInterpreterV2 {
Plan::Query {
s_expr, metadata, ..
} => {
self.explain_pipeline(s_expr.clone(), metadata.clone())
self.explain_pipeline(*s_expr.clone(), metadata.clone())
.await?
}
_ => {
Expand All @@ -67,7 +67,7 @@ impl Interpreter for ExplainInterpreterV2 {
Plan::Query {
s_expr, metadata, ..
} => {
self.explain_fragments(s_expr.clone(), metadata.clone())
self.explain_fragments(*s_expr.clone(), metadata.clone())
.await?
}
_ => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ impl InterpreterFactoryV2 {
} => Ok(Arc::new(SelectInterpreterV2::try_create(
ctx,
*bind_context.clone(),
s_expr.clone(),
*s_expr.clone(),
metadata.clone(),
)?)),
Plan::Explain { kind, plan } => Ok(Arc::new(ExplainInterpreterV2::try_create(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ impl InsertInterpreterV2 {
} => SelectInterpreterV2::try_create(
self.ctx.clone(),
*bind_context.clone(),
s_expr.clone(),
*s_expr.clone(),
metadata.clone(),
),
_ => unreachable!(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::sql::optimizer::RuleID;
use crate::sql::optimizer::RuleSet;

pub fn get_explore_rule_set() -> RuleSet {
RuleSet::create_with_ids(vec![]).unwrap()
RuleSet::create_with_ids(vec![RuleID::CommuteJoin]).unwrap()
}

#[cfg(test)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::sql::optimizer::RuleID;
use crate::sql::optimizer::RuleSet;

pub fn get_implement_rule_set() -> RuleSet {
RuleSet::create_with_ids(vec![RuleID::ImplementGet]).unwrap()
RuleSet::create_with_ids(vec![RuleID::ImplementGet, RuleID::ImplementHashJoin]).unwrap()
}

#[cfg(test)]
Expand Down
Loading