Skip to content

Commit

Permalink
implement cost-based optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
leiysky committed Aug 24, 2022
1 parent fd4c6f9 commit c77bc7d
Show file tree
Hide file tree
Showing 87 changed files with 1,252 additions and 1,418 deletions.
2 changes: 1 addition & 1 deletion src/query/catalog/src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ pub enum NavigationPoint {
TimePoint(DateTime<Utc>),
}

#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct TableStatistics {
pub num_rows: Option<u64>,
pub data_size: Option<u64>,
Expand Down
18 changes: 18 additions & 0 deletions src/query/datavalues/src/data_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

use std::cmp::Ordering;
use std::fmt;
use std::hash::Hash;
use std::sync::Arc;

use common_exception::ErrorCode;
Expand Down Expand Up @@ -367,6 +368,23 @@ impl Ord for DataValue {
}
}

#[allow(clippy::derive_hash_xor_eq)]
impl Hash for DataValue {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
match self {
DataValue::Null => std::mem::discriminant(self).hash(state),
DataValue::Boolean(v) => v.hash(state),
DataValue::UInt64(v) => v.hash(state),
DataValue::Int64(v) => v.hash(state),
DataValue::Float64(v) => v.to_bits().hash(state),
DataValue::String(v) => v.hash(state),
DataValue::Array(v) => v.hash(state),
DataValue::Struct(v) => v.hash(state),
DataValue::Variant(v) => v.hash(state),
}
}
}

// Did not use std::convert:TryFrom
// Because we do not need custom type error.
pub trait DFTryFrom<T>: Sized {
Expand Down
3 changes: 2 additions & 1 deletion src/query/datavalues/src/types/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ use crate::serializations::ConstSerializer;
pub const ARROW_EXTENSION_NAME: &str = "ARROW:extension:databend_name";
pub const ARROW_EXTENSION_META: &str = "ARROW:extension:databend_metadata";

#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
#[derive(Clone, Debug, Hash, serde::Deserialize, serde::Serialize)]
#[allow(clippy::derive_hash_xor_eq)]
#[serde(tag = "type")]
#[enum_dispatch(DataType)]
pub enum DataTypeImpl {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use crate::prelude::*;
use crate::serializations::ArraySerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Clone, serde::Deserialize, serde::Serialize)]
#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct ArrayType {
inner: Box<DataTypeImpl>,
}
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub use crate::prelude::*;
use crate::serializations::BooleanSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct BooleanType {}

impl BooleanType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pub fn check_date(days: i32) -> Result<()> {
))
}

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct DateType {}

impl DateType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_interval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::prelude::*;
use crate::serializations::DateSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Clone, serde::Deserialize, serde::Serialize)]
#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct IntervalType {
kind: IntervalKind,
}
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::prelude::*;
use crate::serializations::NullSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct NullType {}

impl NullType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_nullable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::prelude::*;
use crate::serializations::NullableSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Clone, serde::Deserialize, serde::Serialize)]
#[derive(Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct NullableType {
inner: Box<DataTypeImpl>,
}
Expand Down
6 changes: 6 additions & 0 deletions src/query/datavalues/src/types/type_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@ macro_rules! impl_numeric {
write!(f, "{}", self.name())
}
}

impl std::hash::Hash for PrimitiveDataType<$ty> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.data_type_id().hash(state);
}
}
};
}
//
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::prelude::*;
use crate::serializations::StringSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct StringType {}

impl StringType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use crate::prelude::*;
use crate::serializations::StructSerializer;
use crate::serializations::TypeSerializerImpl;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct StructType {
names: Option<Vec<String>>,
types: Vec<DataTypeImpl>,
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub fn check_timestamp(micros: i64) -> Result<()> {
}

/// Timestamp type only stores UTC time in microseconds
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct TimestampType {
/// Typically are used - 0 (seconds) 3 (milliseconds), 6 (microseconds)
precision: usize,
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::prelude::*;
use crate::serializations::TypeSerializerImpl;
use crate::serializations::VariantSerializer;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct VariantType {}

impl VariantType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_variant_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::prelude::*;
use crate::serializations::TypeSerializerImpl;
use crate::serializations::VariantSerializer;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct VariantArrayType {}

impl VariantArrayType {
Expand Down
2 changes: 1 addition & 1 deletion src/query/datavalues/src/types/type_variant_object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::prelude::*;
use crate::serializations::TypeSerializerImpl;
use crate::serializations::VariantSerializer;

#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[derive(Default, Clone, Hash, serde::Deserialize, serde::Serialize)]
pub struct VariantObjectType {}

impl VariantObjectType {
Expand Down
10 changes: 10 additions & 0 deletions src/query/datavalues/src/variant_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use core::str::FromStr;
use std::cmp::Ordering;
use std::fmt::Display;
use std::fmt::Formatter;
use std::hash::Hash;
use std::ops::Deref;

use common_exception::ErrorCode;
Expand Down Expand Up @@ -219,6 +220,15 @@ impl PartialOrd for VariantValue {
}
}

#[allow(clippy::derive_hash_xor_eq)]
impl Hash for VariantValue {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
let v = self.as_ref().to_string();
let u = v.as_bytes();
Hash::hash(&u, state);
}
}

impl Display for VariantValue {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_ref())
Expand Down
4 changes: 2 additions & 2 deletions src/query/functions/src/scalars/hashes/hash_base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,13 @@ impl DFHash for DataValue {
}
DataValue::Array(vals) => {
for v in vals {
v.hash(state);
DFHash::hash(v, state);
Hash::hash(&',', state);
}
}
DataValue::Struct(vals) => {
for v in vals {
v.hash(state);
DFHash::hash(v, state);
Hash::hash(&',', state);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/query/service/src/interpreters/interpreter_copy_v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ impl CopyInterpreterV2 {
let select_interpreter = SelectInterpreterV2::try_create(
self.ctx.clone(),
*(bind_context.clone()),
s_expr.clone(),
*s_expr.clone(),
metadata.clone(),
)?;

Expand Down
4 changes: 2 additions & 2 deletions src/query/service/src/interpreters/interpreter_explain_v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl Interpreter for ExplainInterpreterV2 {
Plan::Query {
s_expr, metadata, ..
} => {
self.explain_pipeline(s_expr.clone(), metadata.clone())
self.explain_pipeline(*s_expr.clone(), metadata.clone())
.await?
}
_ => {
Expand All @@ -67,7 +67,7 @@ impl Interpreter for ExplainInterpreterV2 {
Plan::Query {
s_expr, metadata, ..
} => {
self.explain_fragments(s_expr.clone(), metadata.clone())
self.explain_fragments(*s_expr.clone(), metadata.clone())
.await?
}
_ => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ impl InterpreterFactoryV2 {
} => Ok(Arc::new(SelectInterpreterV2::try_create(
ctx,
*bind_context.clone(),
s_expr.clone(),
*s_expr.clone(),
metadata.clone(),
)?)),
Plan::Explain { kind, plan } => Ok(Arc::new(ExplainInterpreterV2::try_create(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ impl InsertInterpreterV2 {
} => SelectInterpreterV2::try_create(
self.ctx.clone(),
*bind_context.clone(),
s_expr.clone(),
*s_expr.clone(),
metadata.clone(),
),
_ => unreachable!(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::sql::optimizer::RuleID;
use crate::sql::optimizer::RuleSet;

pub fn get_explore_rule_set() -> RuleSet {
RuleSet::create_with_ids(vec![]).unwrap()
RuleSet::create_with_ids(vec![RuleID::CommuteJoin]).unwrap()
}

#[cfg(test)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::sql::optimizer::RuleID;
use crate::sql::optimizer::RuleSet;

pub fn get_implement_rule_set() -> RuleSet {
RuleSet::create_with_ids(vec![RuleID::ImplementGet]).unwrap()
RuleSet::create_with_ids(vec![RuleID::ImplementGet, RuleID::ImplementHashJoin]).unwrap()
}

#[cfg(test)]
Expand Down
Loading

0 comments on commit c77bc7d

Please sign in to comment.