Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Infer SQLite types locally #6381

Merged
merged 5 commits into from
Apr 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -121,21 +121,24 @@ type Column
is always passed as the first argument).
- new_name: The name of the resulting column.
make_op self op_kind operands new_name =
type_mapping = self.connection.dialect.get_type_mapping
prepare_operand operand = case operand of
other_column : Column ->
if Helpers.check_integrity self other_column then other_column.expression else
Error.throw <| Unsupported_Database_Operation.Error "Cannot use columns coming from different contexts in one expression without a join."
constant ->
SQL_Expression.Constant constant

expressions = operands.map prepare_operand
new_expr = SQL_Expression.Operation op_kind ([self.expression] + expressions)

infer_from_database_callback expression =
SQL_Type_Reference.new self.connection self.context expression
new_type_ref = type_mapping.infer_return_type infer_from_database_callback op_kind [self]+operands new_expr
Column.Value new_name self.connection new_type_ref new_expr self.context
checked_support = if self.connection.dialect.is_supported op_kind then True else
Error.throw (Unsupported_Database_Operation.Error "The operation "+op_kind+" is not supported by this backend.")
checked_support.if_not_error <|
type_mapping = self.connection.dialect.get_type_mapping
prepare_operand operand = case operand of
other_column : Column ->
if Helpers.check_integrity self other_column then other_column.expression else
Error.throw <| Unsupported_Database_Operation.Error "Cannot use columns coming from different contexts in one expression without a join."
constant ->
SQL_Expression.Constant constant

expressions = operands.map prepare_operand
new_expr = SQL_Expression.Operation op_kind ([self.expression] + expressions)

infer_from_database_callback expression =
SQL_Type_Reference.new self.connection self.context expression
new_type_ref = type_mapping.infer_return_type infer_from_database_callback op_kind [self]+operands new_expr
Column.Value new_name self.connection new_type_ref new_expr self.context

## PRIVATE

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ type Dialect
_ = aggregate
Unimplemented.throw "This is an interface only."

## PRIVATE
Checks if an operation is supported by the dialect.
is_supported : Text -> Boolean
is_supported self operation =
_ = operation
Unimplemented.throw "This is an interface only."

## PRIVATE

The dialect of SQLite databases.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,7 @@ type SQLite_Dialect

## PRIVATE
make_cast : Internal_Column -> SQL_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column
make_cast self column target_type infer_result_type_from_database_callback =
_ = infer_result_type_from_database_callback
make_cast self column target_type _ =
mapping = self.get_type_mapping
sql_type_text = mapping.sql_type_to_text target_type
new_expression = SQL_Expression.Operation "CAST" [column.expression, SQL_Expression.Literal sql_type_text]
Expand Down Expand Up @@ -172,8 +171,8 @@ type SQLite_Dialect
## PRIVATE
prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
prepare_fetch_types_query self expression context =
minimized_context = context.set_limit 1
self.generate_sql (Query.Select [["typed_column", expression]] minimized_context)
_ = [expression, context]
Panic.throw (Illegal_State.Error "Type inference by asking the Database for the expected types is not supported in SQLite since it tended to give wrong results. This should have never been called - if it was - that is a bug in the Database library.")

## PRIVATE
check_aggregate_support : Aggregate_Column -> Boolean ! Unsupported_Database_Operation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,59 +85,15 @@ type SQLite_Type_Mapping
sql_type_to_text sql_type = SQL_Type_Mapping.default_sql_type_to_text sql_type

## PRIVATE
The SQLite type mapping takes special measures to keep boolean columns
boolean even if the Database will say that they are numeric.

To do so, any operation that returns booleans will override its return
type to boolean, and operations that return the same type as inputs will
also ensure to override to the boolean type if the input was boolean. In
particular, if the operations accept multiple arguments, they will
override the return type to boolean if all the input arguments had
boolean type.
The SQL type mapping uses the same logic as the in-memory backend, just
simplified to only the types that it supports. It does not rely on the
Database to tell the expected types, because it has been found to be
unreliable in more complex expressions.
infer_return_type : (SQL_Expression -> SQL_Type_Reference) -> Text -> Vector -> SQL_Expression -> SQL_Type_Reference
infer_return_type infer_from_database_callback op_name arguments expression =
return value_type =
sql_type = SQLite_Type_Mapping.value_type_to_sql value_type Problem_Behavior.Ignore
SQL_Type_Reference.from_constant sql_type
infer_default_type =
infer_from_database_callback expression

find_type arg = case arg of
column : Column -> column.value_type
internal_column : Internal_Column ->
SQLite_Type_Mapping.sql_type_to_value_type internal_column.sql_type_reference.get
enso_value -> Enso_Types.most_specific_value_type enso_value use_smallest=True

reconcile_types types =
result = Value_Type_Helpers.find_common_type types strict=False
# We remap Mixed to Char, to be consistent with our main mapping.
if result == Value_Type.Mixed then default_text else result

## We actually re-use the logic from the in-memory backend, since the
SQLite types are a small subset of that and the logic for SQLite
essentially forms a proper sub-algebra (in the universal algebra
terms).
find_a_common_type _ =
inputs_types = arguments.map find_type
return (reconcile_types inputs_types)

handle_iif _ =
if arguments.length != 3 then
Panic.throw (Illegal_State.Error "Impossible: IIF must have 3 arguments. This is a bug in the Database library.")
inputs_types = arguments.drop 1 . map find_type
return (reconcile_types inputs_types)

always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_NAN", "IS_EMPTY", "LIKE", "IS_IN", "starts_with", "ends_with", "contains"]
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED"]
merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FIRST", "LAST", "FIRST_NOT_NULL", "LAST_NOT_NULL", "FILL_NULL"]
others = [["IIF", handle_iif]]
mapping = Map.from_vector <|
v1 = always_boolean_ops.map [_, const (return Value_Type.Boolean)]
v2 = merge_input_types_ops.map [_, find_a_common_type]
v3 = always_text_ops.map [_, const (return default_text)]
v1 + v2 + v3 + others
handler = mapping.get op_name (_ -> infer_default_type)
handler Nothing
infer_return_type _ op_name arguments _ =
handler = operations_map.get op_name (_ -> Error.throw (Illegal_State.Error "Impossible: Unknown operation "+op_name+". This is a bug in the Database library."))
sql_type = handler arguments
SQL_Type_Reference.from_constant sql_type

## PRIVATE
SQLite `ResultSet` metadata may differ row-by-row, so we cannot rely on
Expand All @@ -159,15 +115,58 @@ type SQLite_Type_Mapping
For types like dates - we map them to unsupported type, because date
operations in SQLite are currently not supported due to their weird storage.
simple_types_map = Map.from_vector <|
ints = [Types.TINYINT, Types.SMALLINT, Types.BIGINT, Types.INTEGER] . map x-> [x, Value_Type.Integer Bits.Bits_64]
floats = [Types.DOUBLE, Types.REAL, Types.FLOAT] . map x-> [x, Value_Type.Float Bits.Bits_64]
ints = [Types.TINYINT, Types.SMALLINT, Types.BIGINT, Types.INTEGER] . map x-> [x, default_integer]
floats = [Types.DOUBLE, Types.REAL, Types.FLOAT] . map x-> [x, default_float]
# We treat numeric as a float, since that is what really sits in SQLite under the hood.
numerics = [Types.DECIMAL, Types.NUMERIC] . map x-> [x, Value_Type.Float Bits.Bits_64]
numerics = [Types.DECIMAL, Types.NUMERIC] . map x-> [x, default_float]
strings = [Types.CHAR, Types.VARCHAR] . map x-> [x, default_text]
blobs = [Types.BINARY, Types.BLOB, Types.CLOB] . map x-> [x, Value_Type.Binary]
special_types = [[Types.BOOLEAN, Value_Type.Boolean]]
ints + floats + numerics + strings + blobs + special_types

## PRIVATE
Maps operation names to functions that infer its result type.
operations_map : Map Text (Vector -> SQL_Type)
operations_map =
find_type arg = case arg of
column : Column -> column.value_type
internal_column : Internal_Column ->
SQLite_Type_Mapping.sql_type_to_value_type internal_column.sql_type_reference.get
enso_value -> Enso_Types.most_specific_value_type enso_value use_smallest=True

## We actually re-use the logic from the in-memory backend, since the
SQLite types essentially implement a very simple subset of our types.
find_a_common_type arguments =
types = arguments.map find_type
unified = Value_Type_Helpers.find_common_type types strict=False
# We remap Mixed to Char, to be consistent with our main mapping.
result = if unified == Value_Type.Mixed then default_text else unified
SQLite_Type_Mapping.value_type_to_sql result Problem_Behavior.Ignore

handle_iif arguments =
if arguments.length != 3 then
Panic.throw (Illegal_State.Error "Impossible: IIF must have 3 arguments. This is a bug in the Database library.")
find_a_common_type (arguments.drop 1)

handle_cast _ =
Panic.throw (Illegal_State.Error "Cast relies on its own type inference logic, so this code should never be reached. This is a bug in the Database library.")

always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR"]
always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP"]
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM"]
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS"]
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]
merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FILL_NULL", "COALESCE"]
others = [["IIF", handle_iif], ["CAST", handle_cast]]
Map.from_vector <|
v1 = always_boolean_ops.map [_, const SQLite_Types.boolean]
v2 = always_floating_ops.map [_, const SQLite_Types.real]
v3 = always_integer_ops.map [_, const SQLite_Types.integer]
v4 = always_text_ops.map [_, const SQLite_Types.text]
v5 = arithmetic_ops.map [_, find_a_common_type]
v6 = merge_input_types_ops.map [_, find_a_common_type]
v1 + v2 + v3 + v4 + v5 + v6 + others

## PRIVATE
type SQLite_Types
## PRIVATE
Expand All @@ -191,3 +190,9 @@ type SQLite_Types

## PRIVATE
default_text = Value_Type.Char size=Nothing variable_length=True

## PRIVATE
default_float = Value_Type.Float Bits.Bits_64

## PRIVATE
default_integer = Value_Type.Integer Bits.Bits_64
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ spec setup =
c4.value_type.is_integer . should_be_true
c4.to_vector . should_equal [1001, 1000, 1001]

pending_sqlite_types = if prefix.contains "SQLite" then "TODO: perform SQLite type inference locally - #6208"
Test.specify "should not lose the type after further operations were performed on the result, even if the first row is NULL" pending=pending_sqlite_types <|
Test.specify "should not lose the type after further operations were performed on the result, even if the first row is NULL" <|
t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]]
c1 = t.at "X" . cast Value_Type.Char
c2 = t.at "Y" . cast Value_Type.Integer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
from Standard.Table import Value_Type
from Standard.Table.Errors import all

from Standard.Database.Errors import SQL_Error
from Standard.Database.Errors import all

from Standard.Test import Test, Problems
import Standard.Test.Extensions
Expand Down Expand Up @@ -328,11 +328,15 @@ spec setup =
(y ^ "a").should_fail_with Invalid_Value_Type
(y ^ 42).should_fail_with Invalid_Value_Type

if setup.test_selection.is_nan_and_nothing_distinct then
Test.specify "should support is_nan" <|
case setup.test_selection.is_nan_and_nothing_distinct of
True -> Test.specify "should support is_nan" <|
t = table_builder [["X", [1.5, 2, Number.nan]], ["Y", [1, 2, 3]]]
t.at "X" . is_nan . to_vector . should_equal [False, False, True]
t.at "Y" . is_nan . should_fail_with Invalid_Value_Type
False -> Test.specify "should report that is_nan is not supported" <|
t = table_builder [["X", [1.5]]]
t.at "X" . is_nan . should_fail_with Unsupported_Database_Operation

Test.specify "should support is_blank" <|
t = table_builder [["X", [1.5, 2, Number.nan, Nothing]], ["Y", [1, Nothing, 3, 4]]]
t.at "X" . is_blank treat_nans_as_blank=True . to_vector . should_equal [False, False, True, True]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import Standard.Table.Data.Type.Value_Type.Bits
from Standard.Table import Aggregate_Column, Value_Type
from Standard.Table.Errors import Invalid_Value_Type, Inexact_Type_Coercion

import Standard.Database.Data.Dialect
import Standard.Database.Internal.SQLite.SQLite_Type_Mapping
from Standard.Database import Database, SQLite, In_Memory, SQL_Query

from Standard.Test import Problems, Test, Test_Suite
Expand Down Expand Up @@ -57,26 +59,25 @@ spec =
t = make_table "foo" [["a", "int"], ["b", "text"], ["c", "boolean"], ["d", "double precision"]]

t.compute 'starts_with([b], "1")' . value_type . should_equal Value_Type.Boolean
t.compute '[a] * [d]' . value_type . should_equal (Value_Type.Float Bits.Bits_64)
t.compute '[a] + 100' . value_type . should_equal (Value_Type.Float Bits.Bits_64)
t.compute '[a] + 100.0' . value_type . should_equal (Value_Type.Float Bits.Bits_64)
t.compute '[a] * [d]' . value_type . should_equal Value_Type.Float
t.compute '[a] + 100' . value_type . should_equal Value_Type.Integer
t.compute '[a] + 100.0' . value_type . should_equal Value_Type.Float
t.compute '[c] || not [c]' . value_type . should_equal Value_Type.Boolean
t.compute '[b] + "_suf"' . value_type . should_equal Value_Type.Char
t.compute 'fill_nothing([c], false)' . value_type . should_equal Value_Type.Boolean
t.compute 'fill_empty([b], "<NA>")' . value_type . should_equal Value_Type.Char
t.compute 'is_blank([b])' . value_type . should_equal Value_Type.Boolean
t.compute 'is_empty([b])' . value_type . should_equal Value_Type.Boolean
t.compute 'is_nan([d])' . value_type . should_equal Value_Type.Boolean
t.compute 'is_nothing([a])' . value_type . should_equal Value_Type.Boolean

t2 = t.aggregate [Aggregate_Column.Group_By "b", Aggregate_Column.Sum "a", Aggregate_Column.Maximum "a", Aggregate_Column.Minimum "d", Aggregate_Column.Count_Not_Nothing "c", Aggregate_Column.Concatenate "b", Aggregate_Column.Count]
t2.at "b" . value_type . should_equal Value_Type.Char
t2.at "Sum a" . value_type . should_equal (Value_Type.Float Bits.Bits_64)
t2.at "Maximum a" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
t2.at "Minimum d" . value_type . should_equal (Value_Type.Float Bits.Bits_64)
t2.at "Count Not Nothing c" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
t2.at "Sum a" . value_type . should_equal Value_Type.Integer
t2.at "Maximum a" . value_type . should_equal Value_Type.Integer
t2.at "Minimum d" . value_type . should_equal Value_Type.Float
t2.at "Count Not Nothing c" . value_type . should_equal Value_Type.Integer
t2.at "Concatenate b" . value_type . should_equal Value_Type.Char
t2.at "Count" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
t2.at "Count" . value_type . should_equal Value_Type.Integer
# First is not currently implemented in SQLite
# t2.at "First c" . value_type . should_equal Value_Type.Boolean

Expand All @@ -91,4 +92,11 @@ spec =
t2.at "b" . value_type . should_equal Value_Type.Integer
Problems.expect_warning Inexact_Type_Coercion t2

Test.specify "should be able to infer types for all supported operations" <|
dialect = Dialect.sqlite
internal_mapping = dialect.internal_generator_dialect.operation_map
operation_type_mapping = SQLite_Type_Mapping.operations_map

operation_type_mapping.keys.sort . should_equal internal_mapping.keys.sort

main = Test_Suite.run_main spec