From 8d1cd87912ab4a0eb294449b0c334d18758d4f74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 30 Sep 2022 16:56:47 +0200 Subject: [PATCH] Prototype Value_Types and add basic type checks to filters --- .../Database/0.0.0-dev/src/Data/Column.enso | 16 ++++ .../Table/0.0.0-dev/src/Data/Column.enso | 13 +++ .../0.0.0-dev/src/Data/Filter_Condition.enso | 25 ++++-- .../Table/0.0.0-dev/src/Data/Value_Type.enso | 79 +++++++++++++++++++ 4 files changed, 127 insertions(+), 6 deletions(-) create mode 100644 distribution/lib/Standard/Table/0.0.0-dev/src/Data/Value_Type.enso diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index ab1770a4ad700..7bb54d9cfce51 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -7,6 +7,7 @@ from Standard.Table import Filter_Condition import Standard.Table.Data.Column as Materialized_Column import Standard.Table.Data.Sort_Column_Selector import Standard.Table.Data.Sort_Column +from Standard.Table.Data.Value_Type import Value_Type from Standard.Database.Data.SQL import SQL_Type, Statement from Standard.Database.Data.Table import Integrity_Error @@ -93,6 +94,21 @@ type Column without_ix = self.to_table.set_index [] without_ix . read . at self.name . to_vector + ## UNSTABLE TODO this is a very early prototype that will be revisited later + This implementation is really just so that we can use the types in + `filter`, it does not provide even a decent approximation of the true + type in many cases. It will be improved when the types work is + implemented. + value_type : Value_Type + value_type self = + if self.sql_type.is_definitely_boolean then Value_Type.Boolean else + if self.sql_type.is_definitely_text then Value_Type.Char else + ## TODO we could return integers here too but then we should + check how many bits there are - and this is out of scope for + this prototype especially as the method for checking the type + is likely to change so this code would likely be thrown away. + So we just fall back to unsupported (abusing it slightly). + Value_Type.Unsupported_Data_Type self.sql_type.name ## UNSTABLE Returns an SQL statement that will be used for materializing this column. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 8249356cb82e0..20607d276c725 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -4,6 +4,7 @@ import Standard.Base.Data.Index_Sub_Range import Standard.Table.Data.Table import Standard.Table.Data.Storage +from Standard.Table.Data.Value_Type import Value_Type # TODO Dubious constructor export from Standard.Table.Data.Column.Column import all @@ -753,6 +754,18 @@ type Column storage_types.at tp . catch Index_Out_Of_Bounds_Error _-> Panic.throw (Illegal_State_Error "Unknown storage type: "+tp.to_text) + ## UNSTABLE TODO this is a prototype that will be revisited later on + value_type : Value_Type + value_type self = case self.storage_type of + Storage.Text -> Value_Type.Char + Storage.Integer -> Value_Type.Integer + Storage.Decimal -> Value_Type.Float + Storage.Boolean -> Value_Type.Boolean + Storage.Date -> Value_Type.Date + Storage.Time_Of_Day -> Value_Type.Time + Storage.Date_Time -> Value_Type.Date_Time + Storage.Any -> Value_Type.Mixed + ## UNSTABLE Converts this column to JSON. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Filter_Condition.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Filter_Condition.enso index ac85ea29d5421..1a7a14425b9af 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Filter_Condition.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Filter_Condition.enso @@ -1,6 +1,7 @@ from Standard.Base import all from Standard.Table.Data.Table import Table from Standard.Table.Data.Column import Column +from Standard.Table.Data.Value_Type import Value_Type from Standard.Table.Data.Filter_Condition.Filter_Condition import all @@ -57,8 +58,10 @@ type Filter_Condition A helper function gathering the common logic that generates a boolean mask from a given source column and a filter condition. It contains logic common for all backends. + + It also performs validation and will throw errors if unexpected column types + are encountered. make_filter_column source_column filter_condition = case filter_condition of - # TODO check types Less value -> (source_column < value) Equal_Or_Less value -> (source_column <= value) Equal value -> (source_column == value) @@ -66,11 +69,21 @@ make_filter_column source_column filter_condition = case filter_condition of Greater value -> (source_column > value) Not_Equal value -> (source_column != value) Between lower upper -> ((source_column >= lower) && (source_column <= upper)) - Starts_With prefix -> source_column.starts_with prefix - Ends_With suffix -> source_column.ends_with suffix - Contains substring -> source_column.contains substring + Starts_With prefix -> case source_column.value_type of + Value_Type.Char _ _ -> source_column.starts_with prefix + _ -> Error.throw (Illegal_Argument_Error "`Starts_With` expected a text column.") + Ends_With suffix -> case source_column.value_type of + Value_Type.Char _ _ -> source_column.ends_with suffix + _ -> Error.throw (Illegal_Argument_Error "`Ends_With` expected a text column.") + Contains substring -> case source_column.value_type of + Value_Type.Char _ _ -> source_column.contains substring + _ -> Error.throw (Illegal_Argument_Error "`Contains` expected a text column.") Is_Nothing -> source_column.is_missing Not_Nothing -> source_column.is_missing.not - Is_True -> source_column - Is_False -> source_column.not + Is_True -> case source_column.value_type of + Value_Type.Boolean -> source_column + _ -> Error.throw (Illegal_Argument_Error "`Is_True` expected a Boolean column.") + Is_False -> case source_column.value_type of + Value_Type.Boolean -> source_column.not + _ -> Error.throw (Illegal_Argument_Error "`Is_False` expected a Boolean column.") _ -> source_column.map filter_condition diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Value_Type.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Value_Type.enso new file mode 100644 index 0000000000000..8616ad9e0e232 --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Value_Type.enso @@ -0,0 +1,79 @@ +from Standard.Base import all + +## TODO This is a prototype based on the current pending design, used to proceed + with handling of types in the `filter` component and others. It will be + revisited when proper type support is implemented. + +## Type to represent the different sizes of integer or float possible within a database. +type Bits + ## 16-bit (2 byte) value + Bits_16 + ## 32-bit (4 byte) value + Bits_32 + ## 64-bit (8 byte) value + Bits_64 + +## Represents the different possible types of values within RDBMS columns. +type Value_Type + ## Boolean or Bit value: 0 or 1. + + ANSI SQL: BIT / BOOLEAN + Boolean + + ## Integer value: 0 to 255 + + ANSI SQL: TINYINT + Byte + + ## Integer value: + + 16-bit: -32,768 to 32,767 + 32-bit: -2,147,483,648 to -2,147,483,648 + 64-bit: -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + ANSI SQL: SMALLINT (16-bit), INT (32-bit), BIGINT (64-bit) + Integer size:Bits=Bits.Bits_64 + + ## Floating point value. + + ANSI SQL: REAL, FLOAT, DOUBLE + Float size:(Bits.Bits_32 | Bits.Bits_64)=Bits.Bits_64 + + ## Arbitrary precision numerical value with a scale and precision. + + ANSI SQL: NUMERIC, DECIMAL + Decimal precision:(Integer|Nothing)=Nothing scale:(Integer|Nothing)=Nothing + + ## Character string. + + ANSI SQL: CHAR, VARCHAR, TEXT, LONGVARCHAR, NCHAR, NVARCHAR, TEXT, CLOB, NCLOB + Char size:(Integer|Nothing)=Nothing variable:Boolean=True + + ## Date + + ANSI SQL: DATE + Date + + ## Date and Time + + ANSI SQL: TIMESTAMP / DateTime + Date_Time with_timezone:Boolean=True + + ## Time of day + + ANSI SQL: TIME, TIME WITH TIME ZONE, TIME WITHOUT TIME ZONE + Time with_timezone:Boolean=False + + ## Binary stream. + + ANSI SQL: BINARY, VARBINARY, LONGVARBINARY, BLOB, BIT(n) + Binary size:(Integer|Nothing)=Nothing variable:Boolean=False + + ## Unsupported SQL type. + + Fallback provided to allow describing types that are not supported by Enso at this time. + Unsupported_Data_Type type_name:Text="" + + ## A mix of values can be stored in the Column. + + In-Memory and SQLite tables support this. + Mixed