diff --git a/CHANGELOG.md b/CHANGELOG.md index 6709a1ddfe87..1fd70668e350 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -603,6 +603,8 @@ - [Added text_length to Column][8606] - [Added none delimiter option for Data.Read][8627] - [Added text_left and text_right to Column][8691] +- [Implement relational `NULL` semantics for `Nothing` for in-memory Column + operations.][5156] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -770,6 +772,7 @@ [4120]: https://github.com/enso-org/enso/pull/4120 [4050]: https://github.com/enso-org/enso/pull/4050 [4072]: https://github.com/enso-org/enso/pull/4072 +[5156]: https://github.com/enso-org/enso/pull/5156 [5582]: https://github.com/enso-org/enso/pull/5582 [5645]: https://github.com/enso-org/enso/pull/5645 [5646]: https://github.com/enso-org/enso/pull/5646 diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Set.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Set.enso index 20ae8472ea74..e789c605611f 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Set.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Set.enso @@ -66,6 +66,27 @@ type Set contains : Any -> Boolean contains self value = self.underlying_map.contains_key value + ## GROUP Logical + Checks if this set contains a given value, treating Nothing as a + relational NULL. + + If the argument is non-Nothing and exists in `value`, return true. + + If the argument is non-Nothing and does not exist in `value`, return + false if `value` does not contain a Nothing, or Nothing if `value` does + contain a Nothing. + + If the argument is Nothing, return Nothing if `value` is non-empty, or + false if `value` is empty. + contains_relational : Any -> Boolean | Nothing + contains_relational self value = + case value of + _ : Nothing -> if self.is_empty then False else Nothing + _ -> + if self.contains value then True else + has_nulls = self.contains Nothing + if has_nulls then Nothing else False + ## ALIAS add GROUP Calculations Adds a value to this set. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 2a5f687856b9..37a9c3463fdc 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -1536,34 +1536,12 @@ type Column fix it later) and setting up the query - but at the set up this only applies to adding nulls - setting any other object does not check the type at this level anyway. - partitioned = vector.partition .is_nothing - nulls = partitioned.first - non_nulls = partitioned.second - ## Since SQL `NULL IN (NULL)` yields `NULL`, we need to handle this case - separately. So we handle all non-null values using `IS_IN` and then - `OR` that with a null check (if the vector contained any nulls to - begin with). The implementation also ensures that even - `NULL IN (...)` is coalesced to False, so that negation works as - expected. - is_in_not_null = self.make_op "IS_IN" operands=non_nulls new_name=new_name - result = case nulls.not_empty of - True -> is_in_not_null || self.is_nothing - False -> is_in_not_null + result = self.make_op "IS_IN" operands=vector new_name=new_name result.rename new_name _ : Array -> self.is_in (Vector.from_polyglot_array vector) column : Column -> if Helpers.check_connection self column . not then (Error.throw (Integrity_Error.Error "Column "+column.name)) else - ## We slightly abuse the expression syntax putting a Query as one of - the sub-expressions. Once type-checking is added, we may need to - amend the signature of `SQL_Expression.Operation` to account for - this. Also, unfortunately as `NULL IN (...)` is `NULL` in SQL, we - need to do separate handling of nulls - we check if the target - column has any nulls and if so, we will do `IS NULL` checks for - our columns too. That is because, we want the containment check - for `NULL` to work the same way as for any other value. in_subquery = Query.Select [Pair.new column.name column.expression] column.context - has_nulls_expression = SQL_Expression.Operation "BOOL_OR" [column.is_nothing.expression] - has_nulls_subquery = Query.Select [Pair.new "has_nulls" has_nulls_expression] column.context - new_expr = SQL_Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery] + new_expr = SQL_Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery] # This mapping should never be imprecise, if there are errors we need to amend the implementation. sql_type = self.connection.dialect.get_type_mapping.value_type_to_sql Value_Type.Boolean Problem_Behavior.Report_Error new_type_ref = SQL_Type_Reference.from_constant sql_type . catch Inexact_Type_Coercion _-> diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso index bb16dc733587..33015e7c7405 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso @@ -255,22 +255,16 @@ make_is_in arguments = case arguments.length of _ -> expr = arguments.first list = arguments.drop 1 - is_in = expr ++ " IN (" ++ (Builder.join ", " list) ++ ")" - ## We ensure that even `NULL IN (...)` is coalesced to False, so that - negation will work as expected. - Builder.code "COALESCE(" ++ is_in ++ ", FALSE)" + expr ++ " IN (" ++ (Builder.join ", " list) ++ ")" ## PRIVATE make_is_in_column : Vector Builder -> Builder make_is_in_column arguments = case arguments.length of - 3 -> + 2 -> expr = arguments.at 0 in_query = arguments.at 1 - has_nulls_query = arguments.at 2 - is_in = Builder.code "COALESCE(" ++ expr ++ " IN (" ++ in_query ++ "), FALSE)" - has_nulls = has_nulls_query.paren ++ " = TRUE" - Builder.code "CASE WHEN " ++ expr ++ " IS NULL THEN " ++ has_nulls ++ " ELSE " ++ is_in ++ " END" - _ -> Error.throw <| Illegal_State.Error ("The operation IS_IN_COLUMN requires at exactly 3 arguments: the expression, the IN subquery, the subquery checking for nulls.") + Builder.code "(" ++ expr ++ " IN (" ++ in_query ++ "))" + _ -> Error.throw <| Illegal_State.Error ("The operation IS_IN_COLUMN requires at exactly 2 arguments: the expression and the IN subquery.") ## PRIVATE make_row_number : Vector Builder -> Row_Number_Metadata -> Builder diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index dda85a7f71da..717e3a9414c5 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -1600,7 +1600,7 @@ type Column run_vectorized_binary_op self op_name as_vector expected_result_type=Value_Type.Boolean skip_nulls=False new_name=result_name False -> set = Set.from_vector as_vector error_on_duplicates=False - run_unary_op self set.contains new_name=result_name skip_nulls=False expected_result_type=Value_Type.Boolean + run_unary_op self set.contains_relational new_name=result_name skip_nulls=False expected_result_type=Value_Type.Boolean ## GROUP Standard.Base.Conversions ICON convert diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java index 2218bd3e3725..9409ca1d7862 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java @@ -55,16 +55,23 @@ public Storage runMap(S storage, List arg) { Context context = Context.getCurrent(); CompactRepresentation compactRepresentation = prepareList(arg); BitSet newVals = new BitSet(); - for (int i = 0; i < storage.size(); i++) { - if (storage.isNa(i) && compactRepresentation.hasNulls) { - newVals.set(i); - } else if (compactRepresentation.coercedValues.contains(storage.getItemBoxed(i))) { - newVals.set(i); - } + BitSet missing = new BitSet(); + if (arg.size() > 0) { + for (int i = 0; i < storage.size(); i++) { + if (storage.isNa(i)) { + missing.set(i); + } else if (compactRepresentation.coercedValues.contains(storage.getItemBoxed(i))) { + newVals.set(i); + } else if (compactRepresentation.hasNulls) { + missing.set(i); + } else { + // Leave as default=false + } - context.safepoint(); + context.safepoint(); + } } - return new BoolStorage(newVals, new BitSet(), storage.size(), false); + return new BoolStorage(newVals, missing, storage.size(), false); } @Override diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java index 95aa7867c378..b19f687e2c8a 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java @@ -1,17 +1,17 @@ package org.enso.table.data.column.operation.map.bool; -import java.util.BitSet; import java.util.List; import org.enso.table.data.column.operation.map.BinaryMapOperation; import org.enso.table.data.column.operation.map.MapOperationProblemAggregator; import org.enso.table.data.column.storage.BoolStorage; import org.enso.table.data.column.storage.Storage; +import org.enso.table.util.ImmutableBitSet; import org.graalvm.polyglot.Context; /** * A specialized implementation for the IS_IN operation on booleans - since booleans have just three * possible values we can have a highly efficient implementation that does not even rely on hashmap - * and after processing the input vector, performs the checks in constant time. + * and after processing the input vector, performs the checks using only BitSet builtins. */ public class BooleanIsInOp extends BinaryMapOperation { public BooleanIsInOp() { @@ -60,43 +60,30 @@ public Storage runZip( } private BoolStorage run(BoolStorage storage, boolean hadNull, boolean hadTrue, boolean hadFalse) { - BitSet newVals; - boolean negated = false; + int size = storage.size(); + ImmutableBitSet values = new ImmutableBitSet(storage.getValues(), size); + ImmutableBitSet missing = new ImmutableBitSet(storage.getIsMissing(), size); + boolean negated = storage.isNegated(); - if (hadNull && hadTrue && hadFalse) { - // We use empty newVals which has everything set to false and negate it to make all of that - // set to true with zero cost. - newVals = new BitSet(); - negated = true; - } else if (!hadNull && !hadTrue && !hadFalse) { - // No values are present, so the result is to be false everywhere. - newVals = new BitSet(); - } else if (hadNull && !hadTrue && !hadFalse) { - // Only missing values are in the set, so we just return the missing indicator. - newVals = storage.getIsMissing(); - } else if (hadTrue && hadFalse) { // && !hadNull - // All non-missing values are in the set - so we just return the negated missing indicator. - newVals = storage.getIsMissing(); - negated = true; - } else { - // hadTrue != hadFalse - newVals = storage.getValues().get(0, storage.size()); - if (hadTrue) { - if (storage.isNegated()) { - newVals.flip(0, storage.size()); - } - } else { // hadFalse - if (!storage.isNegated()) { - newVals.flip(0, storage.size()); - } - } - newVals.andNot(storage.getIsMissing()); + ImmutableBitSet newValues; + ImmutableBitSet newMissing; - if (hadNull) { - newVals.or(storage.getIsMissing()); - } + if (hadTrue && !hadFalse) { + newValues = storage.isNegated() ? missing.notAndNot(values) : missing.notAnd(values); + newMissing = + hadNull ? (storage.isNegated() ? missing.or(values) : missing.orNot(values)) : missing; + } else if (!hadTrue && hadFalse) { + newValues = storage.isNegated() ? missing.notAnd(values) : missing.notAndNot(values); + newMissing = + hadNull ? (storage.isNegated() ? missing.orNot(values) : missing.or(values)) : missing; + } else if (hadTrue && hadFalse) { + newValues = missing.not(); + newMissing = missing; + } else { + newValues = ImmutableBitSet.allFalse(size); + newMissing = hadNull ? ImmutableBitSet.allTrue(size) : ImmutableBitSet.allFalse(size); } - return new BoolStorage(newVals, new BitSet(), storage.size(), negated); + return new BoolStorage(newValues.toBitSet(), newMissing.toBitSet(), size, false); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java index 7fabfada9395..5baefb27e782 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java @@ -59,7 +59,7 @@ public BoolStorage runBinaryMap( BitSet missing = new BitSet(); Context context = Context.getCurrent(); for (int i = 0; i < storage.size(); i++) { - if (storage.getItem(i) == null) { + if (storage.getItem(i) == null || arg == null) { missing.set(i); } else if (arg instanceof String s && Text_Utils.equals(storage.getItem(i), s)) { r.set(i); diff --git a/std-bits/table/src/main/java/org/enso/table/util/ImmutableBitSet.java b/std-bits/table/src/main/java/org/enso/table/util/ImmutableBitSet.java new file mode 100644 index 000000000000..ce46540f1d1c --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/util/ImmutableBitSet.java @@ -0,0 +1,83 @@ +package org.enso.table.util; + +import java.util.BitSet; + +/** + * A wrapper around BitSet that implements boolean operations conveniently. Unlike BitSet, + * ImmutableBitSet takes a size parameter, which allows .not to be implemented. + */ +public class ImmutableBitSet { + private BitSet bitSet; + private int size; + + public ImmutableBitSet(BitSet bitSet, int size) { + this.bitSet = bitSet; + this.size = size; + } + + public BitSet toBitSet() { + return bitSet; + } + + public ImmutableBitSet and(ImmutableBitSet other) { + assert size == other.size; + BitSet result = (BitSet) bitSet.clone(); + result.and(other.bitSet); + return new ImmutableBitSet(result, size); + } + + public ImmutableBitSet or(ImmutableBitSet other) { + assert size == other.size; + BitSet result = (BitSet) bitSet.clone(); + result.or(other.bitSet); + return new ImmutableBitSet(result, size); + } + + public ImmutableBitSet andNot(ImmutableBitSet other) { + assert size == other.size; + BitSet result = (BitSet) bitSet.clone(); + result.andNot(other.bitSet); + return new ImmutableBitSet(result, size); + } + + public ImmutableBitSet not() { + BitSet result = (BitSet) bitSet.clone(); + result.flip(0, size); + return new ImmutableBitSet(result, size); + } + + public ImmutableBitSet notAnd(ImmutableBitSet other) { + assert size == other.size; + BitSet result = (BitSet) bitSet.clone(); + result.flip(0, size); + result.and(other.bitSet); + return new ImmutableBitSet(result, size); + } + + public ImmutableBitSet notAndNot(ImmutableBitSet other) { + assert size == other.size; + BitSet result = (BitSet) bitSet.clone(); + result.flip(0, size); + result.andNot(other.bitSet); + return new ImmutableBitSet(result, size); + } + + public ImmutableBitSet orNot(ImmutableBitSet other) { + // Doing an extra operation to avoid doing an extra allocation. + // a || !b => !(!a && b) + assert size == other.size; + BitSet result = (BitSet) bitSet.clone(); + result.flip(0, size); + result.and(other.bitSet); + result.flip(0, size); + return new ImmutableBitSet(result, size); + } + + public static ImmutableBitSet allFalse(int size) { + return new ImmutableBitSet(new BitSet(), size); + } + + public static ImmutableBitSet allTrue(int size) { + return new ImmutableBitSet(new BitSet(), size).not(); + } +} diff --git a/test/Base_Tests/src/Data/Set_Spec.enso b/test/Base_Tests/src/Data/Set_Spec.enso index 13deef702dad..9f47fb97491b 100644 --- a/test/Base_Tests/src/Data/Set_Spec.enso +++ b/test/Base_Tests/src/Data/Set_Spec.enso @@ -30,6 +30,16 @@ spec = s1.contains 3 . should_be_true s1.contains 4 . should_be_false + Test.specify "should allow checking contains with relational NULL logic" <| + Set.from_vector [1, 2] . contains_relational 1 . should_be_true + Set.from_vector [1, 2] . contains_relational 3 . should_be_false + Set.from_vector [1, 2, Nothing] . contains_relational 1 . should_be_true + Set.from_vector [1, 2, Nothing] . contains_relational 3 . should_equal Nothing + Set.from_vector [1, 2, Nothing] . contains_relational Nothing . should_equal Nothing + Set.from_vector [1, 2] . contains_relational Nothing . should_equal Nothing + Set.from_vector [Nothing] . contains_relational Nothing . should_equal Nothing + Set.from_vector [] . contains_relational Nothing . should_be_false + Test.specify "should allow to compute a union, intersection and difference" <| s1 = Set.from_vector [1, 2] s2 = Set.from_vector [2, 3] diff --git a/test/Table_Tests/src/Common_Table_Operations/Filter_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Filter_Spec.enso index b208edc20f3f..f121d942d7f1 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Filter_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Filter_Spec.enso @@ -268,11 +268,14 @@ spec setup = Test.specify "by an Is_In check" <| t = table_builder [["ix", [1, 2, 3, Nothing, 5, 6]], ["X", ["a", "b", "ccc", "X", "f", "2"]]] t1 = table_builder [["txt", ["X", "a", "c", Nothing]], ["int", [Nothing, 2, 5, 4]], ["bool", [True, Nothing, Nothing, True]]] + t2 = table_builder [["txt", ["X", "a", "c", "q"]], ["int", [123, 2, 5, 4]], ["bool", [True, True, True, True]]] t.filter "X" (Filter_Condition.Is_In (t1.at "txt")) . at "X" . to_vector . should_equal ["a", "X"] t.filter "X" (Filter_Condition.Is_In (t1.at "txt" . to_vector)) . at "X" . to_vector . should_equal ["a", "X"] - t.filter "X" (Filter_Condition.Not_In (t1.at "txt")) . at "X" . to_vector . should_equal ["b", "ccc", "f", "2"] - t.filter "X" (Filter_Condition.Not_In (t1.at "txt" . to_vector)) . at "X" . to_vector . should_equal ["b", "ccc", "f", "2"] + t.filter "X" (Filter_Condition.Not_In (t1.at "txt")) . at "X" . to_vector . should_equal [] + t.filter "X" (Filter_Condition.Not_In (t2.at "txt")) . at "X" . to_vector . should_equal ["b", "ccc", "f", "2"] + t.filter "X" (Filter_Condition.Not_In (t1.at "txt" . to_vector)) . at "X" . to_vector . should_equal [] + t.filter "X" (Filter_Condition.Not_In (t2.at "txt" . to_vector)) . at "X" . to_vector . should_equal ["b", "ccc", "f", "2"] t.filter "X" (Filter_Condition.Is_In ["ccc"]) . at "X" . to_vector . should_equal ["ccc"] t.filter "X" (Filter_Condition.Is_In []) . at "X" . to_vector . should_equal [] t.filter "X" (Filter_Condition.Not_In []) . at "X" . to_vector . should_equal ["a", "b", "ccc", "X", "f", "2"] @@ -282,12 +285,12 @@ spec setup = # Some backends (e.g. SQLite) allow to coerce integer and text types when doing mixed type comparisons. ((mixed == []) || (mixed == ["2"])).should_be_true - t.filter "ix" (Filter_Condition.Is_In (t1.at "int")) . at "ix" . to_vector . should_equal [2, Nothing, 5] - t.filter "ix" (Filter_Condition.Is_In (t1.at "int" . to_vector)) . at "ix" . to_vector . should_equal [2, Nothing, 5] + t.filter "ix" (Filter_Condition.Is_In (t1.at "int")) . at "ix" . to_vector . should_equal [2, 5] + t.filter "ix" (Filter_Condition.Is_In (t1.at "int" . to_vector)) . at "ix" . to_vector . should_equal [2, 5] t.filter "ix" (Filter_Condition.Is_In [2, 5, 4]) . at "ix" . to_vector . should_equal [2, 5] - t.filter "ix" (Filter_Condition.Is_In [Nothing]) . at "ix" . to_vector . should_equal [Nothing] - t.filter "ix" (Filter_Condition.Not_In [Nothing]) . at "ix" . to_vector . should_equal [1, 2, 3, 5, 6] - t.filter "ix" (Filter_Condition.Not_In [1, 3]) . at "ix" . to_vector . should_equal [2, Nothing, 5, 6] + t.filter "ix" (Filter_Condition.Is_In [Nothing]) . at "ix" . to_vector . should_equal [] + t.filter "ix" (Filter_Condition.Not_In [Nothing]) . at "ix" . to_vector . should_equal [] + t.filter "ix" (Filter_Condition.Not_In [1, 3]) . at "ix" . to_vector . should_equal [2, 5, 6] v1 = t.filter "X" (Filter_Condition.Is_In ["c", "f", "b", "b", "b", 15, Nothing]) . at "X" . to_vector case test_selection.allows_mixed_type_comparisons of @@ -298,14 +301,14 @@ spec setup = True -> v2.should_equal [2, 3] False -> v2.should_fail_with SQL_Error - t2 = table_builder [["A", [True, False, True]], ["B", [False, False, False]], ["C", [True, False, Nothing]]] - t2.filter "A" (Filter_Condition.Is_In (t1.at "bool")) . at "A" . to_vector . should_equal [True, True] - t2.filter "A" (Filter_Condition.Is_In (t1.at "bool" . to_vector)) . at "A" . to_vector . should_equal [True, True] - t2.filter "B" (Filter_Condition.Is_In [True, Nothing]) . at "B" . to_vector . should_equal [] - t2.filter "C" (Filter_Condition.Is_In [True, Nothing]) . at "C" . to_vector . should_equal [True, Nothing] - t2.filter "A" (Filter_Condition.Is_In [False]) . at "A" . to_vector . should_equal [False] - t2.filter "B" (Filter_Condition.Is_In [False]) . at "B" . to_vector . should_equal [False, False, False] - t2.filter "C" (Filter_Condition.Is_In [False, False]) . at "C" . to_vector . should_equal [False] + t3 = table_builder [["A", [True, False, True]], ["B", [False, False, False]], ["C", [True, False, Nothing]]] + t3.filter "A" (Filter_Condition.Is_In (t1.at "bool")) . at "A" . to_vector . should_equal [True, True] + t3.filter "A" (Filter_Condition.Is_In (t1.at "bool" . to_vector)) . at "A" . to_vector . should_equal [True, True] + t3.filter "B" (Filter_Condition.Is_In [True, Nothing]) . at "B" . to_vector . should_equal [] + t3.filter "C" (Filter_Condition.Is_In [True, Nothing]) . at "C" . to_vector . should_equal [True] + t3.filter "A" (Filter_Condition.Is_In [False]) . at "A" . to_vector . should_equal [False] + t3.filter "B" (Filter_Condition.Is_In [False]) . at "B" . to_vector . should_equal [False, False, False] + t3.filter "C" (Filter_Condition.Is_In [False, False]) . at "C" . to_vector . should_equal [False] Test.specify "does not allow Column_Ref in Is_In/Not_In because that would be confusing" <| ## Is In and Not In check if a value is contained anywhere in a provided collection (e.g. column), diff --git a/test/Table_Tests/src/Common_Table_Operations/Main.enso b/test/Table_Tests/src/Common_Table_Operations/Main.enso index 183b38fca99f..efcbae8fca93 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Main.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Main.enso @@ -22,6 +22,7 @@ import project.Common_Table_Operations.Join.Union_Spec import project.Common_Table_Operations.Join.Lookup_Spec import project.Common_Table_Operations.Map_Spec import project.Common_Table_Operations.Missing_Values_Spec +import project.Common_Table_Operations.Nothing_Spec import project.Common_Table_Operations.Order_By_Spec import project.Common_Table_Operations.Select_Columns_Spec import project.Common_Table_Operations.Take_Drop_Spec @@ -144,5 +145,6 @@ spec setup = Add_Row_Number_Spec.spec setup Integration_Tests.spec setup Temp_Column_Spec.spec setup + Nothing_Spec.spec setup main = run_default_backend spec diff --git a/test/Table_Tests/src/Common_Table_Operations/Nothing_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Nothing_Spec.enso new file mode 100644 index 000000000000..57ac35f56e19 --- /dev/null +++ b/test/Table_Tests/src/Common_Table_Operations/Nothing_Spec.enso @@ -0,0 +1,240 @@ +from Standard.Base import all + +from Standard.Table import all + +import Standard.Database.Data.Column.Column as Database_Column + +from Standard.Test import Test +import Standard.Test.Extensions + +from project.Common_Table_Operations.Util import run_default_backend + +main = run_default_backend spec + +type My_Type + Value x:Text + + +spec setup = + prefix = setup.prefix + table_builder = setup.table_builder + table_builder_typed columns value_type = + cast_columns = columns.map c-> + Column.from_vector (c.at 0) (c.at 1) . cast value_type + setup.table_builder cast_columns + + # We cannot create a column of Nothing/NULL in the database without casting it to a non-mixed type. + type_for_nothing_column = if setup.is_database then Value_Type.Char else Value_Type.Mixed + + zone = Time_Zone.parse "Z" + + mixed_values = if setup.is_database then [] else [[My_Type.Value "1", My_Type.Value "2", Value_Type.Mixed]] + + [[[1], [2], Value_Type.Mixed]] + date_time_values = if setup.test_selection.date_time.not then [] else [[Date.new 2024 1 3, Date.new 2024 1 4, Value_Type.Date]] + + [[Date_Time.new 2024 1 3 2 30 10 zone=zone, Date_Time.new 2024 1 3 2 30 11 zone=zone, Value_Type.Date_Time]] + + [[Time_Of_Day.new 2 30 10, Time_Of_Day.new 2 30 11, Value_Type.Time]] + values_without_nothing = [[False, True, Value_Type.Boolean]] + + [["abcd", "efgh", Value_Type.Char]] + + [[12, 13, Value_Type.Integer]] + + [[9223372036854775806, 9223372036854775807, Value_Type.Integer]] + + [[12.3, 23.4, Value_Type.Float]] + + date_time_values + + mixed_values + values_with_nothing = values_without_nothing + [[Nothing, Nothing, type_for_nothing_column]] + + Test.group prefix+"Comparisons" <| + comparisons = [["==", ==], ["!=", !=], ["<", <], ["<=", <=], [">", >], [">=", >=]] + + comparisons.map pair-> + comparison_name = pair.at 0 + comparison = pair.at 1 + + values_with_nothing.map triple-> + value = triple.at 0 + value_type = triple.at 2 + + table = table_builder_typed [["x", [value]], ["n", [Nothing]]] value_type + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+" "+comparison_name+" Nothing value" <| + co = comparison (table.at "x") Nothing + co.to_vector . should_equal [Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+" "+comparison_name+" Nothing column" <| + co = comparison (table.at "x") (table.at "n") + co.to_vector . should_equal [Nothing] + + Test.specify "Correctly handle Nothing in: Nothing column "+comparison_name+" "+value_type.to_text <| + co = comparison (table.at "n") (table.at "x") + co.to_vector . should_equal [Nothing] + + Test.group prefix+"between" <| + values_with_nothing.map triple-> + value = triple.at 0 + value_type = triple.at 2 + + table = table_builder_typed [["x", [value]], ["y", [value]], ["n", [Nothing]]] value_type + + Test.specify "Correctly handle Nothing in: Nothing column between "+value_type.to_text+" and "+value_type.to_text <| + co = table.at "n" . between (table.at "x") (table.at "y") + co.to_vector . should_equal [Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+" between Nothing column and "+value_type.to_text <| + co = table.at "x" . between (table.at "n") (table.at "y") + co.to_vector . should_equal [Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+" between "+value_type.to_text+" and Nothing column" <| + co = table.at "x" . between (table.at "y") (table.at "n") + co.to_vector . should_equal [Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+" between Nothing value and "+value_type.to_text <| + co = table.at "x" . between Nothing (table.at "y") + co.to_vector . should_equal [Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+" between "+value_type.to_text+" and Nothing value" <| + co = table.at "x" . between (table.at "y") Nothing + co.to_vector . should_equal [Nothing] + + Test.group prefix+"is_nothing" <| + values_with_nothing.map triple-> + value = triple.at 0 + value_type = triple.at 2 + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+".is_nothing" <| + table = table_builder_typed [["x", [value]]] value_type + co = table.at "x" . is_nothing + co.to_vector . should_equal [value == Nothing] + + Test.group prefix+"not" <| + Test.specify "Correctly handle Nothing in .not" <| + table = table_builder [["x", [True, False, Nothing]]] + table.at "x" . not . to_vector . should_equal [False, True, Nothing] + + Test.group prefix+"is_in" <| + values_with_nothing.map triple-> + value = triple.at 0 + other_value = triple.at 1 + value_type = triple.at 2 + table = table_builder_typed [["x", [value, Nothing]], ["y", [other_value, Nothing]], ["z", [value, other_value]], ["n", [Nothing, Nothing]]] value_type + + true_if_not_nothing = if value == Nothing then Nothing else True + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+".is_in Column (returning True)" <| + table.at "x" . is_in (table.at "z") . to_vector . should_equal [true_if_not_nothing, Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+".is_in Column with Nothings (returning True)" <| + table.at "x" . is_in (table.at "x") . to_vector . should_equal [true_if_not_nothing, Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+".is_in Column with Nothings (returning Nothing)" <| + table.at "x" . is_in (table.at "y") . to_vector . should_equal [Nothing, Nothing] + + Test.specify "Correctly handle Nothing in: Nothing.is_in Column with Nothings (returning Nothing)" <| + table.at "n" . is_in (table.at "x") . to_vector . should_equal [Nothing, Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+".is_in Vector (returning True)" <| + table.at "x" . is_in (table.at "z" . to_vector) . to_vector . should_equal [true_if_not_nothing, Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+".is_in Vector with Nothings (returning True)" <| + table.at "x" . is_in (table.at "x" . to_vector) . to_vector . should_equal [true_if_not_nothing, Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+".is_in Vector with Nothings (returning Nothing)" <| + table.at "x" . is_in (table.at "y" . to_vector) . to_vector . should_equal [Nothing, Nothing] + + Test.specify "Correctly handle Nothing in: Nothing.is_in Vector with Nothings (returning Nothing)" <| + table.at "n" . is_in (table.at "x" . to_vector) . to_vector . should_equal [Nothing, Nothing] + + Test.specify "Correctly handle Nothing in: "+value_type.to_text+".is_in empty Vector (returning False)" <| + table.at "x" . is_in [] . to_vector . should_equal [False, False] + + Test.group prefix+"Boolean is_in" <| + make_containing_values had_null had_true had_false = + null_maybe = if had_null then [Nothing] else [] + true_maybe = if had_true then [True] else [] + false_maybe = if had_false then [False] else [] + null_maybe + true_maybe + false_maybe + + bools = [True, False] + bools_and_nothing = [True, False, Nothing] + # Format: [input, had_null, had_true, had_false, output] + cases = bools_and_nothing.flat_map input-> bools.flat_map had_null-> bools.flat_map had_true-> bools.map had_false-> + output = case input of + True -> if had_true then True else (if had_null then Nothing else False) + False -> if had_false then True else (if had_null then Nothing else False) + Nothing -> if had_null || had_true || had_false then Nothing else False + [input, had_null, had_true, had_false, output] + + # To test negated columns, we invert the values before putting them into the Storage, and set the negated bit. + negate : Vector | Column -> Vector | Column + negate vorc = case vorc of + _ : Vector -> vorc.map (x-> x.if_not_nothing x.not) # Invert values first + _ : Column -> vorc.not # Set negated bit + _ : Database_Column -> vorc.not # Set negated bit + negation_cases = [["normal input, normal argument", identity, identity]] + + [["negated input, normal argument", negate, identity]] + + [["normal input, negated argument", identity, negate]] + + [["negated input, negated argument", negate, negate]] + + negation_cases.map negation_case-> + negation_desc = negation_case.at 0 + transform_input = negation_case.at 1 + transform_argument = negation_case.at 2 + + cases.map cs-> + input = cs.at 0 + output = cs.at 4 + containing_values = make_containing_values (cs.at 1) (cs.at 2) (cs.at 3) + + Test.specify "Boolean is_in: (Vector), "+negation_desc+" "+cs.to_text <| + input_column = transform_input [input] + t = table_builder_typed [["input", input_column]] Value_Type.Boolean + + c = (transform_input (t.at "input")) . is_in containing_values + + c.to_vector . should_equal [output] + + Test.specify "Boolean is_in: (Column), "+negation_desc+" "+cs.to_text <| + input_column = transform_input (Vector.fill containing_values.length input) + t = table_builder_typed [["input", input_column], ["containing", transform_argument containing_values]] Value_Type.Boolean + expected_output = if input_column.is_empty then [] else [output] + + c = (transform_input (t.at "input")) . is_in (transform_argument (t.at "containing")) + + c.to_vector . length . should_equal input_column.length + c.to_vector.distinct . should_equal expected_output + + Test.group prefix+"distinct" <| + values_without_nothing.map triple-> + value = triple.at 0 + other_value = triple.at 1 + value_type = triple.at 2 + + Test.specify "Correctly handle Nothing in .distinct for "+value_type.to_text <| + table = table_builder [["x", [value, other_value, other_value, Nothing, value, Nothing]]] + t1 = table . distinct ["x"] + v = t1.at "x" . to_vector + v . length . should_equal 3 + v . should_contain_the_same_elements_as [value, other_value, Nothing] + + Test.specify "Correctly handle Nothing in .distinct for Nothing" <| + table = table_builder_typed [["x", [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]]] Value_Type.Char + t1 = table . distinct ["x"] + v = t1.at "x" . to_vector + v . should_equal [Nothing] + + Test.group prefix+"order_by" <| + values_with_nothing.map triple-> + value = triple.at 0 + other_value = triple.at 1 + value_type = triple.at 2 + + is_comparable = Default_Comparator.compare value other_value != Nothing + + if is_comparable then + table = table_builder_typed [["x", [value, Nothing, other_value, other_value, Nothing, value, Nothing]]] value_type + + Test.specify "Correctly handle Nothing in .order_by (asc) for "+value_type.to_text <| + t1 = table . order_by [Sort_Column.Name "x" Sort_Direction.Ascending] + t1.at "x" . to_vector . should_equal [Nothing, Nothing, Nothing, value, value, other_value, other_value] + + Test.specify "Correctly handle Nothing in .order_by (desc) for "+value_type.to_text <| + t1 = table . order_by [Sort_Column.Name "x" Sort_Direction.Descending] + t1.at "x" . to_vector . should_equal [other_value, other_value, value, value, Nothing, Nothing, Nothing] diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 4c9123d8fe87..15d77d450fee 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -64,16 +64,16 @@ spec = Test.specify "should generate an IN expression" <| t2 = t1.filter "A" (Filter_Condition.Is_In [1, 2, 'foo']) - t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE COALESCE("T1"."A" IN (?, ?, ?), FALSE)', [1, 2, "foo"]] + t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE "T1"."A" IN (?, ?, ?)', [1, 2, "foo"]] t3 = t1.filter "A" (Filter_Condition.Is_In [1]) - t3.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE COALESCE("T1"."A" IN (?), FALSE)', [1]] + t3.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE "T1"."A" IN (?)', [1]] t4 = t1.filter "A" (Filter_Condition.Is_In []) t4.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE (FALSE)', []] t5 = t1.filter "A" (Filter_Condition.Is_In [Nothing]) - t5.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ((FALSE) OR ("T1"."A" IS NULL))', []] + t5.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE "T1"."A" IN (?)', [Nothing]] Test.group "[Codegen] Handling Missing Values" <| Test.specify "fill_nothing should allow to replace missing values in a column with a constant" <| diff --git a/test/Table_Tests/src/In_Memory/Integer_Overflow_Spec.enso b/test/Table_Tests/src/In_Memory/Integer_Overflow_Spec.enso index 66bc81de4802..b55491eae70b 100644 --- a/test/Table_Tests/src/In_Memory/Integer_Overflow_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Integer_Overflow_Spec.enso @@ -322,7 +322,7 @@ spec = x.is_nothing . to_vector . should_equal [False, False, True, False] x.is_nan . to_vector . should_equal [False, False, Nothing, False] x.is_infinite . to_vector . should_equal [False, False, Nothing, False] - x.is_in [3, 2^70] . to_vector . should_equal [False, True, False, True] + x.is_in [3, 2^70] . to_vector . should_equal [False, True, Nothing, True] Test.specify "allows arithmetic on Decimal columns and other numeric columns" <| t = Table.new [["X", [10^30, 2^70, Nothing, 3]], ["Y", [1, 2, 3, 4]], ["Z", [1.5, 2.5, 3.5, 4.5]]] diff --git a/test/Table_Tests/src/In_Memory/Table_Spec.enso b/test/Table_Tests/src/In_Memory/Table_Spec.enso index c9c4f7ba08d8..4ea6fa2498de 100644 --- a/test/Table_Tests/src/In_Memory/Table_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Table_Spec.enso @@ -809,29 +809,29 @@ spec = nulls = ["nulls", [Nothing, Nothing, Nothing, 0]] custom = ["custom", [2, My.Data 2 1, Nothing, Nothing]] [str, int, int2, dbl, dates, dts, tod, mix, nulls, custom] - varied_type_table.filter "strs" (Filter_Condition.Is_In (ins.at "str")) . at "strs" . to_vector . should_equal ["b", "c", Nothing] - varied_type_table.filter "strs" (Filter_Condition.Is_In (ins.at "str" . to_vector)) . at "strs" . to_vector . should_equal ["b", "c", Nothing] + varied_type_table.filter "strs" (Filter_Condition.Is_In (ins.at "str")) . at "strs" . to_vector . should_equal ["b", "c"] + varied_type_table.filter "strs" (Filter_Condition.Is_In (ins.at "str" . to_vector)) . at "strs" . to_vector . should_equal ["b", "c"] varied_type_table.filter "ints" (Filter_Condition.Is_In (ins.at "int")) . at "ints" . to_vector . should_equal [1, 2] varied_type_table.filter "ints" (Filter_Condition.Is_In (ins.at "int" . to_vector)) . at "ints" . to_vector . should_equal [1, 2] - varied_type_table.filter "ints" (Filter_Condition.Is_In (ins.at "int2")) . at "ints" . to_vector . should_equal [Nothing, 1] - varied_type_table.filter "ints" (Filter_Condition.Is_In (ins.at "int2" . to_vector)) . at "ints" . to_vector . should_equal [Nothing, 1] - varied_type_table.filter "doubles" (Filter_Condition.Is_In (ins.at "dbl")) . at "doubles" . to_vector . should_equal [0.0, Nothing] - varied_type_table.filter "doubles" (Filter_Condition.Is_In (ins.at "dbl" . to_vector)) . at "doubles" . to_vector . should_equal [0.0, Nothing] + varied_type_table.filter "ints" (Filter_Condition.Is_In (ins.at "int2")) . at "ints" . to_vector . should_equal [1] + varied_type_table.filter "ints" (Filter_Condition.Is_In (ins.at "int2" . to_vector)) . at "ints" . to_vector . should_equal [1] + varied_type_table.filter "doubles" (Filter_Condition.Is_In (ins.at "dbl")) . at "doubles" . to_vector . should_equal [0.0] + varied_type_table.filter "doubles" (Filter_Condition.Is_In (ins.at "dbl" . to_vector)) . at "doubles" . to_vector . should_equal [0.0] varied_type_table.filter "dates" (Filter_Condition.Is_In (ins.at "dates")) . at "dates" . to_vector . should_equal [Date.new 2000, Date.new 1999 1 1] varied_type_table.filter "dates" (Filter_Condition.Is_In (ins.at "dates" . to_vector)) . at "dates" . to_vector . should_equal [Date.new 2000, Date.new 1999 1 1] - varied_type_table.filter "datetimes" (Filter_Condition.Is_In (ins.at "dts")) . at "datetimes" . to_vector . should_equal [Nothing, Date_Time.new 2022 8 27 11 22 25] - varied_type_table.filter "datetimes" (Filter_Condition.Is_In (ins.at "dts" . to_vector)) . at "datetimes" . to_vector . should_equal [Nothing, Date_Time.new 2022 8 27 11 22 25] + varied_type_table.filter "datetimes" (Filter_Condition.Is_In (ins.at "dts")) . at "datetimes" . to_vector . should_equal [Date_Time.new 2022 8 27 11 22 25] + varied_type_table.filter "datetimes" (Filter_Condition.Is_In (ins.at "dts" . to_vector)) . at "datetimes" . to_vector . should_equal [Date_Time.new 2022 8 27 11 22 25] varied_type_table.filter "times" (Filter_Condition.Is_In (ins.at "tod")) . at "times" . to_vector . should_equal [Time_Of_Day.new 18 00] varied_type_table.filter "times" (Filter_Condition.Is_In (ins.at "tod" . to_vector)) . at "times" . to_vector . should_equal [Time_Of_Day.new 18 00] - varied_type_table.filter "mixed" (Filter_Condition.Is_In [42, "a", 1, Nothing, Date.new 2022 8 27, Date_Time.new 2022 8 27]) . at "mixed" . to_vector . should_equal [1, "a", Nothing, Date.new 2022 8 27] + varied_type_table.filter "mixed" (Filter_Condition.Is_In [42, "a", 1, Nothing, Date.new 2022 8 27, Date_Time.new 2022 8 27]) . at "mixed" . to_vector . should_equal [1, "a", Date.new 2022 8 27] varied_type_table.filter "mixed" (Filter_Condition.Is_In (ins.at "mix")) . at "mixed" . to_vector . should_equal [1] varied_type_table.filter "mixed" (Filter_Condition.Is_In (ins.at "mix" . to_vector)) . at "mixed" . to_vector . should_equal [1] varied_type_table.filter "just_nulls" (Filter_Condition.Is_In []) . at "just_nulls" . to_vector . should_equal [] - varied_type_table.filter "just_nulls" (Filter_Condition.Is_In (ins.at "nulls")) . at "just_nulls" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] - varied_type_table.filter "just_nulls" (Filter_Condition.Is_In (ins.at "nulls" . to_vector)) . at "just_nulls" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] + varied_type_table.filter "just_nulls" (Filter_Condition.Is_In (ins.at "nulls")) . at "just_nulls" . to_vector . should_equal [] + varied_type_table.filter "just_nulls" (Filter_Condition.Is_In (ins.at "nulls" . to_vector)) . at "just_nulls" . to_vector . should_equal [] varied_type_table.filter "just_nulls" (Filter_Condition.Is_In [0]) . at "just_nulls" . to_vector . should_equal [] - varied_type_table.filter "custom_objects" (Filter_Condition.Is_In (ins.at "custom")) . at "custom_objects" . to_vector . should_equal [My.Data 1 2, Nothing, Nothing] - varied_type_table.filter "custom_objects" (Filter_Condition.Is_In (ins.at "custom" . to_vector)) . at "custom_objects" . to_vector . should_equal [My.Data 1 2, Nothing, Nothing] + varied_type_table.filter "custom_objects" (Filter_Condition.Is_In (ins.at "custom")) . at "custom_objects" . to_vector . should_equal [My.Data 1 2] + varied_type_table.filter "custom_objects" (Filter_Condition.Is_In (ins.at "custom" . to_vector)) . at "custom_objects" . to_vector . should_equal [My.Data 1 2] t2 = Table.new [["ints", [1, 2, 3]], ["doubles", [1.2, 0.0, 1.0]]] t2.filter "ints" (Filter_Condition.Is_In [2.0, 1.5, 3, 4]) . at "ints" . to_vector . should_equal [2, 3] @@ -849,6 +849,7 @@ spec = if has_true then vec_builder.append True if has_false then vec_builder.append False in_vector = vec_builder.to_vector + in_vector_set = Set.from_vector in_vector vectors = [[True, False, Nothing], [Nothing, Nothing, Nothing], [False, False, True], [True, True, True], [False, False, False], [Nothing, True, True], [False, Nothing, False]] vectors.each column_vector-> @@ -863,10 +864,10 @@ spec = t = t0.drop 1 in_column = Column.from_vector "in" in_vector - expected_vector = column_vector.filter (Filter_Condition.Is_In in_vector) - expected_neg_vector = negated_column_vector.filter (Filter_Condition.Is_In in_vector) + expected_vector = column_vector.filter (x-> in_vector_set.contains_relational x == True) + expected_neg_vector = negated_column_vector.filter (x-> in_vector_set.contains_relational x == True) - Test.with_clue "(Is_In "+in_vector.to_text+"): " <| + Test.with_clue "("+column_vector.to_text+" Is_In "+in_vector.to_text+"): " <| t.filter "X" (Filter_Condition.Is_In in_vector) . at "X" . to_vector . should_equal expected_vector t.filter "X" (Filter_Condition.Is_In in_column) . at "X" . to_vector . should_equal expected_vector t2 = t.set (t.at "X" . not) new_name="Y"