Update stuff so that existing tests pass again after the API change

enso-org · Sep 29, 2022 · f3c7c24 · f3c7c24
1 parent 08ffdb4
commit f3c7c24
Show file tree

Hide file tree

Showing 11 changed files with 62 additions and 88 deletions.
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
@@ -3,6 +3,7 @@ from Standard.Base import all
 import Standard.Database.Internal.Helpers
 import Standard.Database.Internal.IR
 import Standard.Database.Data.Table
+from Standard.Table import Filter_Condition
 import Standard.Table.Data.Column as Materialized_Column
 import Standard.Table.Data.Sort_Column_Selector
 import Standard.Table.Data.Sort_Column

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@@ -7,7 +7,7 @@ import Standard.Database.Internal.IR
 from Standard.Database.Data.SQL_Query import Raw_SQL
 from Standard.Database.Data.SQL import Statement, SQL_Type
 
-from Standard.Table.Data.Filter_Condition import make_filter_column
+from Standard.Table.Data.Filter_Condition import make_filter_column, Filter_Condition
 import Standard.Table.Data.Column as Materialized_Column
 import Standard.Table.Data.Table as Materialized_Table
 from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Sort_Column_Selector, Sort_Column, Match_Columns
@@ -361,16 +361,16 @@ type Table
 
              people.filter "age" (age -> (age%10 == 0))
     filter : (Column | Text | Integer) -> (Filter_Condition|(Any->Boolean)) -> Table
-    filter self column filter=(Filter_Condition.Equal True) = case column of
+    filter self column filter=Filter_Condition.Is_True = case Meta.type_of column of
         Text -> self.filter (self.at column) filter
         Integer -> self.filter (self.at column) filter
-        Column _ ->
+        Column ->
             filter_column = make_filter_column column filter
             case Helpers.check_integrity self filter_column of
                 False ->
-                    Error.throw (Integrity_Error_Data "Column "+filter.name)
+                    Error.throw (Integrity_Error_Data "Column "+filter_column.name)
                 True ->
-                    new_filters = self.context.where_filters + [filter.expression]
+                    new_filters = self.context.where_filters + [filter_column.expression]
                     new_ctx = self.context.set_where_filters new_filters
                     self.updated_context new_ctx
 
@@ -427,7 +427,7 @@ type Table
          numbers 1, 2, ..., 10, will return rows starting from 6 and not an empty
          result as one could expect if the limit was applied before the filters.
              t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . limit 5
-             t2 = t1.where (t1.at 'A' > 5)
+             t2 = t1.filter 'A' (Greater than=5)
              t2.read
     limit : Integer -> Table
     limit self max_rows =
@@ -751,20 +751,14 @@ type Table
         msg = "Parsing values is not supported in database tables, the table has to be materialized first with `read`."
         Error.throw (Unsupported_Database_Operation_Error_Data msg)
 
-    ## UNSTABLE
-
-       Returns a new Table without rows that contained missing values in any of
-       the columns.
+    ## DEPRECATED Will be replaced with `filter_incomplete_rows`.
     drop_missing_rows : Table
     drop_missing_rows self =
         filters = self.columns.map (c -> c.is_missing.not.expression)
         new_ctx = self.context.set_where_filters (self.context.where_filters + filters)
         self.updated_context new_ctx
 
-    ## Returns a new Table without columns that contained any missing values.
-
-       This operation needs to actually materialize the underlying query in
-       order to know which columns to drop.
+    ## DEPRECATED Will be replaced with `Incomplete_Columns` selector (to be used with `remove_columns`).
     drop_missing_columns : Table
     drop_missing_columns self =
         rows_expr = IR.Operation "COUNT_ROWS" []

diff --git a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science.enso b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science.enso
@@ -42,7 +42,7 @@
          example_where =
              table = Examples.inventory_table
              mask = (table.at "sold_stock" > (table.at "total_stock" / 2))
-             table.where mask
+             table.filter mask
 
    > Example
      Sort the shop inventory based on the total stock, using the number sold to

diff --git a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Transform.enso b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Transform.enso
@@ -14,7 +14,7 @@
          example_where =
              table = Examples.inventory_table
              mask = (table.at "sold_stock" > (table.at "total_stock" / 2))
-             table.where mask
+             table.filter mask
 
    > Example
      Multiply each element of the column by itself.

diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
@@ -5,6 +5,10 @@ import Standard.Base.Data.Index_Sub_Range
 import Standard.Table.Data.Table
 import Standard.Table.Data.Storage
 
+# TODO Dubious constructor export
+from Standard.Table.Data.Column.Column import all
+from Standard.Table.Data.Column.Column export all
+
 polyglot java import org.enso.table.data.table.Column as Java_Column
 polyglot java import org.enso.table.operations.OrderBuilder
 
@@ -24,10 +28,6 @@ polyglot java import org.enso.table.operations.OrderBuilder
 from_vector : Text -> Vector -> Column
 from_vector name items = Column_Data (Java_Column.fromItems name items.to_array)
 
-# TODO Dubious constructor export
-from project.Data.Column.Column import all
-from project.Data.Column.Column export all
-
 type Column
 
     ## PRIVATE

diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Filter_Condition.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Filter_Condition.enso
@@ -2,6 +2,8 @@ from Standard.Base import all
 from Standard.Table.Data.Table import Table
 from Standard.Table.Data.Column import Column
 
+from Standard.Table.Data.Filter_Condition.Filter_Condition import all
+
 type Filter_Condition
     ## Is less than a value (or another column)?
     Less than:(Column|Any)
@@ -55,20 +57,20 @@ type Filter_Condition
    A helper function gathering the common logic that generates a boolean mask
    from a given source column and a filter condition. It contains logic common
    for all backends.
-make_filter_column source_column filter_condition = case filter of
+make_filter_column source_column filter_condition = case filter_condition of
     # TODO check types
     Less value -> (source_column < value)
-    Less_Or_Equal value -> (source_column <= value)
+    Equal_Or_Less value -> (source_column <= value)
     Equal value -> (source_column == value)
-    Greater_Or_Equal value -> (source_column >= value)
+    Equal_Or_Greater value -> (source_column >= value)
     Greater value -> (source_column > value)
     Not_Equal value -> (source_column != value)
     Between lower upper -> ((source_column >= lower) && (source_column <= upper))
     Starts_With prefix -> source_column.starts_with prefix
     Ends_With suffix -> source_column.ends_with suffix
-    Contains substring -> source_column.contains prefix
-    Is_Nothing -> (source_column == Nothing)
-    Not_Nothing -> (source_column != Nothing)
-    Is_True -> (source_column == True)
-    Is_False -> (source_column == False)
-    _ -> column.map filter
+    Contains substring -> source_column.contains substring
+    Is_Nothing -> source_column.is_missing
+    Not_Nothing -> source_column.is_missing.not
+    Is_True -> source_column
+    Is_False -> source_column.not
+    _ -> source_column.map filter_condition
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
@@ -7,7 +7,7 @@ import Standard.Base.Data.Text.Case
 import Standard.Base.System.Platform
 
 import Standard.Table.Data.Column
-from Standard.Table.Data.Filter_Condition import make_filter_column
+from Standard.Table.Data.Filter_Condition import make_filter_column, Filter_Condition
 import Standard.Table.Internal.Table_Helpers
 import Standard.Table.Internal.Aggregate_Column_Helper
 import Standard.Table.Internal.Parse_Values_Helper
@@ -30,6 +30,10 @@ import Standard.Table.Data.Position
 import Standard.Table.Data.Sort_Column_Selector
 import Standard.Table.Data.Sort_Column
 
+# TODO Dubious constructor export
+from Standard.Table.Data.Table.Table import all
+from Standard.Table.Data.Table.Table export all
+
 import Standard.Table.Data.Aggregate_Column
 import Standard.Visualization
 
@@ -135,10 +139,6 @@ concat : Vector -> Table
 concat tables =
     Table_Data (Java_Table.concat (tables.map .java_table).to_array)
 
-# TODO Dubious constructor export
-from project.Data.Table.Table import all
-from project.Data.Table.Table export all
-
 ## Represents a column-oriented table data structure.
 type Table
 
@@ -807,30 +807,13 @@ type Table
 
              people.filter "age" (age -> (age%10 == 0))
     filter : (Column | Text | Integer) -> (Filter_Condition|(Any->Boolean)) -> Table
-    filter self column filter=(Filter_Condition.Is_True) = case column of
+    filter self column filter=(Filter_Condition.Is_True) = case Meta.type_of column of
         Text -> self.filter (self.at column) filter
         Integer -> self.filter (self.at column) filter
-        Column _ ->
+        Column.Column ->
             filter_column = make_filter_column column filter
             Table_Data (self.java_table.mask filter_column.java_column)
 
-    ## Filter rows not containing any values.
-
-       Arguments:
-       - keep_nans: If `True`, then NaN values are considered as valid.
-       - keep_empty: If `True`, then `""` is considered as valid.
-    filter_blank_rows : Boolean -> Boolean -> Table
-    filter_blank_rows self keep_nans=True keep_empty=True = ...
-
-    ## Filter rows containing any missing values.
-
-       Arguments:
-       - keep_nans: If `True`, then NaN values are considered as valid.
-       - keep_empty: If `True`, then `""` is considered as valid.
-    filter_incomplete_rows : Boolean -> Boolean -> Table
-    filter_incomplete_rows self keep_nans=True keep_empty=True = ...
-
-
     ## Creates a new Table with the specified range of rows from the input
        Table.
 
@@ -872,7 +855,7 @@ type Table
                  table = Examples.inventory_table
                  double_inventory = table.at "total_stock" * 2
                  table.set "total_stock" double_inventory
-    set : Text -> Column.Column | Vector.Vector -> Table
+    set : Text -> Column | Vector.Vector -> Table
     set self name column = case column of
         Vector.Vector ->
             self.set name (Column.from_vector name column)
@@ -918,7 +901,7 @@ type Table
              import Standard.Examples
 
              example_index = Examples.inventory_table.index
-    index : Column.Column ! No_Index_Set_Error
+    index : Column ! No_Index_Set_Error
     index self = case self.java_table.getIndex.toColumn of
         Nothing -> Error.throw No_Index_Set_Error
         i -> Column.Column_Data i
@@ -954,13 +937,31 @@ type Table
                  Examples.inventory_table.join Examples.popularity_table
 
        Icon: join
-    join : Table | Column.Column -> Text | Nothing -> Boolean -> Text -> Text -> Table
+    join : Table | Column -> Text | Nothing -> Boolean -> Text -> Text -> Table
     join self other on=Nothing drop_unmatched=False left_suffix='_left' right_suffix='_right' =
         case other of
             Column.Column_Data _ -> self.join other.to_table on drop_unmatched left_suffix right_suffix
             Table_Data t ->
                 Table_Data (self.java_table.join t drop_unmatched on left_suffix right_suffix)
 
+    ## DEPRECATED Will be replaced with `filter_incomplete_rows`.
+    drop_missing_rows : Table
+    drop_missing_rows self =
+        cols = self.columns
+        case cols.not_empty of
+            True ->
+                any_missing_mask = cols.map .is_missing . reduce (||)
+                non_missing_mask = any_missing_mask.not
+                self.filter non_missing_mask
+            False -> self
+
+    ## DEPRECATED Will be replaced with `Incomplete_Columns` selector (to be used with `remove_columns`).
+    drop_missing_columns : Table
+    drop_missing_columns self =
+        non_missing = self.columns . filter (col -> col.count_missing == 0)
+        index = self.java_table.getIndex
+        Table_Data (Java_Table.new (non_missing.map .java_column . to_array) index)
+
     ## Returns the number of rows in this table.
 
        > Example

diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso
@@ -29,7 +29,7 @@ spec =
     t1 = test_connection.query (Table_Name "T1")
     Test.group "[Codegen] JSON serialization" <|
         Test.specify "should serialize Tables and Columns to their SQL representation" <|
-            q1 = t1.where (t1.at "A" == 42) . to_json
+            q1 = t1.filter (t1.at "A" == 42) . to_json
             part1 = Json.from_pairs [["sql_code", 'SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" = ']]
             interp = Json.from_pairs [["value", 42], ["expected_sql_type", "INTEGER"]]
             part2 = Json.from_pairs [["sql_interpolation", interp]]
@@ -92,11 +92,11 @@ spec =
 
     Test.group "[Codegen] Masking Tables and Columns" <|
         Test.specify "should allow filtering table rows based on a boolean expression" <|
-            t2 = t1.where (t1.at "A" == 42)
+            t2 = t1.filter (t1.at "A" == 42)
             t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" = ?)', [[42, int]]]
 
         Test.specify "should allow selecting column rows based on a boolean expression" <|
-            c2 = (t1.at "B").where (t1.at "A" == t1.at "C")
+            c2 = t1.filter (t1.at "A" == t1.at "C") . at "B"
             c2.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" WHERE ("T1"."A" = "T1"."C")', []]
 
     Test.group "[Codegen] Joining Tables" <|
@@ -141,10 +141,6 @@ spec =
             c = t1.at "A" . fill_missing "not-applicable"
             c.to_sql.prepare . should_equal ['SELECT COALESCE("T1"."A", ?) AS "A" FROM "T1" AS "T1"', [["not-applicable", int]]]
 
-        Test.specify "drop_missing should drop missing rows in a Column" <|
-            col = t1.at "A" . drop_missing
-            col.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" WHERE (NOT ("T1"."A" IS NULL))', []]
-
         Test.specify "drop_missing_rows should drop rows that contain at least one missing column in a Table" <|
             t2 = t1.drop_missing_rows
             t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE (NOT ("T1"."A" IS NULL)) AND (NOT ("T1"."B" IS NULL)) AND (NOT ("T1"."C" IS NULL))', []]

diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso
@@ -115,7 +115,7 @@ spec prefix connection pending=Nothing =
 
     Test.group prefix+"Masking Tables" pending=pending <|
         Test.specify "should allow to select rows from a table or column based on an expression" <|
-            t2 = t1.where (t1.at "a" == 1)
+            t2 = t1.filter (t1.at "a" == 1)
             df = t2.read
             df.at "a" . to_vector . should_equal [1]
             df.at "b" . to_vector . should_equal [2]
@@ -187,10 +187,6 @@ spec prefix connection pending=Nothing =
             col.count . should_equal 3
             col.count_missing . should_equal 2
 
-        Test.specify "drop_missing should drop missing rows in a Column" <|
-            col = t4.at 'a'
-            col.drop_missing.to_vector . should_equal [0, 1, 42]
-
         Test.specify "drop_missing_rows should drop rows that contain at least one missing column in a Table" <|
             d = t4.drop_missing_rows.read
             d.at 'a' . to_vector . should_equal [0]

diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso
@@ -271,35 +271,23 @@ spec =
     Test.group "Masking Tables" <|
         Test.specify "should allow selecting table rows based on a boolean column" <|
             df = (enso_project.data / "simple_empty.csv").read
-            r = df.where (Column.from_vector 'x' [True, False, False, True])
+            r = df.filter (Column.from_vector 'x' [True, False, False, True])
             r.at "a" . to_vector . should_equal ["1", "10"]
             r.at "b" . to_vector . should_equal [2, 11]
             r.at "c" . to_vector . should_equal [Nothing, 12]
         Test.specify "should treat NA values in the mask as false and extend the mask with NAs" <|
             df = (enso_project.data / "simple_empty.csv").read
-            r = df.where (Column.from_vector 'x' [Nothing, True, False])
+            r = df.filter (Column.from_vector 'x' [Nothing, True, False])
             r.at "a" . to_vector . should_equal ["4"]
             r.at "b" . to_vector . should_equal [Nothing]
             r.at "c" . to_vector . should_equal [6]
         Test.specify "should work correctly if a mask is bigger than the table itself" <|
             df = (enso_project.data / "simple_empty.csv").read
-            r = df.where (Column.from_vector 'x' [True, False, False, False, True])
+            r = df.filter (Column.from_vector 'x' [True, False, False, False, True])
             r.at "a" . to_vector . should_equal ["1"]
             r.at "b" . to_vector . should_equal [2]
             r.at "c" . to_vector . should_equal [Nothing]
 
-    Test.group "Masking Columns" <|
-        Test.specify "should allow selecting column rows based on a boolean column" <|
-            df = (enso_project.data / "simple_empty.csv").read
-            mask = Column.from_vector 'x' [True, False, False, True]
-            df.at "a" . where mask . to_vector . should_equal ["1", "10"]
-            df.at "c" . where mask . to_vector . should_equal [Nothing, 12]
-
-        Test.specify "should work correctly if a mask is bigger than the column itself" <|
-            col = Column.from_vector "col" [1, 2]
-            mask = Column.from_vector 'x' [True, False, False, True]
-            col.where mask . to_vector . should_equal [1]
-
     Test.group "Joining Tables" <|
         a_0 = ['x', [0, 1, 7, 3, 6]]
         a_1 = ['y', ["foo", "bar", "baz", "spam", "eggs"]]
@@ -401,10 +389,6 @@ spec =
             col.count_missing . should_equal 1
 
     Test.group "Dropping Missing Values" <|
-        Test.specify "should drop missing rows in a Column" <|
-            col = Column.from_vector 'x' [1, Nothing, 2, Nothing]
-            col.drop_missing.to_vector . should_equal [1, 2]
-
         Test.specify "should drop rows that contain at least one missing column in a Table" <|
             t = Table.new [["a", [1, Nothing, 3, 4]], ["b", [1, 2, Nothing, "x"]]]
             d = t.drop_missing_rows

diff --git a/test/Visualization_Tests/src/SQL_Spec.enso b/test/Visualization_Tests/src/SQL_Spec.enso
@@ -12,7 +12,7 @@ visualization_spec connection =
     t = connection.query (Table_Name "T")
     Test.group "SQL Visualization" <|
         Test.specify "should provide type metadata for interpolations" <|
-            q = t.where ((t.at "B" == 2) && (t.at "A" == True)) . at "C"
+            q = t.filter ((t.at "B" == 2) && (t.at "A" == True)) . at "C"
             vis = Visualization.prepare_visualization q
             int_param = Json.from_pairs [["value", 2], ["actual_type", "Standard.Base.Data.Numbers.Integer"], ["expected_sql_type", "INTEGER"], ["expected_enso_type", "Standard.Base.Data.Numbers.Integer"]]
             str_param = Json.from_pairs [["value", True], ["actual_type", "Standard.Base.Data.Boolean.Boolean"], ["expected_sql_type", "VARCHAR"], ["expected_enso_type", "Standard.Base.Data.Text.Text"]]