From 891f064a6a8e8bfa1aa3b713c00456dfb213a014 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 12 Apr 2022 13:02:01 +0200 Subject: [PATCH] Extend Aggregate_Spec test suite with tests for missed edge-cases to ensure the feature is well-tested on all backends (#3383) Implements https://www.pivotaltracker.com/story/show/181805693 and finishes the basic set of features of the Aggregate component. Still not all aggregations are supported everywhere, because for example SQLite has quite limited support for aggregations. Currently the workaround is to bring the table into memory (if possible) and perform the computation locally. Later on, we may add more complex generator features to emulate the missing aggregations with complex sub-queries. --- CHANGELOG.md | 7 +- .../0.0.0-dev/src/Data/Number/Extensions.enso | 23 +- .../0.0.0-dev/src/Connection/Connection.enso | 46 +- .../Database/0.0.0-dev/src/Data/Dialect.enso | 2 +- .../0.0.0-dev/src/Data/Dialect/Helpers.enso | 3 +- .../0.0.0-dev/src/Data/Dialect/Postgres.enso | 112 ++-- .../0.0.0-dev/src/Data/Dialect/Redshift.enso | 2 +- .../0.0.0-dev/src/Data/Dialect/Sqlite.enso | 99 ++-- .../src/Data/Internal/Aggregate_Helper.enso | 9 + .../src/Data/Internal/Base_Generator.enso | 13 +- .../Database/0.0.0-dev/src/Data/Sql.enso | 9 + .../Database/0.0.0-dev/src/Data/Table.enso | 20 +- .../src/Internal/Aggregate_Column_Helper.enso | 2 +- .../lib/Standard/Test/0.0.0-dev/src/Main.enso | 16 +- .../builtin/text/AnyToTextNode.java | 7 +- .../text/util/TypeToDisplayTextNode.java | 10 +- .../enso/table/aggregations/Concatenate.java | 33 +- .../enso/table/aggregations/Percentile.java | 41 +- .../table/aggregations/StandardDeviation.java | 19 +- test/Table_Tests/src/Aggregate_Spec.enso | 502 ++++++++++++++++-- test/Table_Tests/src/Common_Table_Spec.enso | 60 +-- .../src/Database/Codegen_Spec.enso | 2 +- .../Table_Tests/src/Database/Common_Spec.enso | 24 - .../src/Database/Postgresql_Spec.enso | 59 +- .../src/Database/Redshift_Spec.enso | 11 +- .../Table_Tests/src/Database/Sqlite_Spec.enso | 17 +- test/Table_Tests/src/Table_Spec.enso | 5 +- test/Tests/src/Data/Numbers_Spec.enso | 28 + 28 files changed, 874 insertions(+), 307 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70f077a5f5d3..27dea42380d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -94,9 +94,11 @@ - [Implemented `Panic.catch` and helper functions for handling errors. Added a type parameter to `Panic.recover` to recover specific types of errors.][3344] - [Added warning handling to `Table.aggregate`][3349] -- [Improved performance of `Table.aggregate` and full warnings implementation] - [3364] +- [Improved performance of `Table.aggregate` and full warnings + implementation][3364] - [Implemented `Text.reverse`][3377] +- [Implemented support for most Table aggregations in the Database + backend.][3383] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -149,6 +151,7 @@ [3366]: https://github.com/enso-org/enso/pull/3366 [3379]: https://github.com/enso-org/enso/pull/3379 [3381]: https://github.com/enso-org/enso/pull/3381 +[3383]: https://github.com/enso-org/enso/pull/3383 #### Enso Compiler diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso index 5faefeb9deb1..dc3280f2d26a 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso @@ -209,7 +209,8 @@ Integer.up_to n = Range this n 1.equals 1.0000001 epsilon=0.001 Number.equals : Number -> Number -> Boolean -Number.equals that epsilon=0.0 = (this - that).abs <= epsilon +Number.equals that epsilon=0.0 = + (this == that) || ((this - that).abs <= epsilon) ## Returns the smaller value of `this` and `that`. @@ -301,3 +302,23 @@ Parse_Error.to_display_text : Text Parse_Error.to_display_text = "Could not parse " + this.text.to_text + " as a double." +## A constant holding the floating-point positive infinity. +Number.positive_infinity : Decimal +Number.positive_infinity = Double.POSITIVE_INFINITY + +## A constant holding the floating-point negative infinity. +Number.negative_infinity : Decimal +Number.negative_infinity = Double.NEGATIVE_INFINITY + +## A constant holding the floating-point Not-a-Number value. +Number.nan : Decimal +Number.nan = Double.NaN + +## Checks if the given number is the floating-point Not-a-Number value. + + This is needed, because the NaN value will return `False` even when being + compared with itself, so `x == Number.nan` would not work. +Number.is_nan : Boolean +Number.is_nan = case this of + Decimal -> Double.isNaN this + _ -> False diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso index 02d2df9cd8df..9b299b585f46 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso @@ -65,7 +65,7 @@ type Connection meant only for internal use. execute_query : Text | Sql.Statement -> Vector Sql.Sql_Type -> Materialized_Table = execute_query query expected_types=Nothing = here.handle_sql_errors <| - Resource.bracket (this.prepare_statement query) .close stmt-> + this.with_prepared_statement query stmt-> rs = stmt.executeQuery metadata = rs.getMetaData ncols = metadata.getColumnCount @@ -97,31 +97,27 @@ type Connection representing the query to execute. execute_update : Text | Sql.Statement -> Integer execute_update query = here.handle_sql_errors <| - Resource.bracket (this.prepare_statement query) .close stmt-> - ## FIXME USE CATCH HERE! - result = Panic.recover Any stmt.executeLargeUpdate - result.catch err-> case err of - Polyglot_Error exc -> - case Java.is_instance exc UnsupportedOperationException of - True -> - stmt.executeUpdate - False -> Error.throw err - _ -> Error.throw err + this.with_prepared_statement query stmt-> + Panic.catch UnsupportedOperationException stmt.executeLargeUpdate _-> + stmt.executeUpdate ## PRIVATE - Prepares the statement by ensuring that it is sanitised. - - Arguments: - - query: The query to prepare the SQL statement in. - prepare_statement : Text | Sql.Statement -> PreparedStatement - prepare_statement query = - go template holes=[] = Managed_Resource.with this.connection_resource java_connection-> + Runs the provided action with a prepared statement, adding contextual + information to any thrown SQL errors. + with_prepared_statement : Text | Sql.Statement -> (PreparedStatement -> Any) -> Any + with_prepared_statement query action = + prepare template holes = Managed_Resource.with this.connection_resource java_connection-> stmt = java_connection.prepareStatement template Panic.catch Any (here.set_statement_values stmt holes) caught_panic-> stmt.close Panic.throw caught_panic stmt + + go template holes = + here.wrap_sql_errors related_query=template <| + Resource.bracket (prepare template holes) .close action + case query of Text -> go query [] Sql.Statement _ -> @@ -140,7 +136,7 @@ type Connection fetch_columns table_name = query = IR.Select_All (IR.make_ctx_from table_name) compiled = this.dialect.generate_sql query - Resource.bracket (this.prepare_statement compiled) .close stmt-> + this.with_prepared_statement compiled stmt-> rs = stmt.executeQuery metadata = rs.getMetaData ncols = metadata.getColumnCount @@ -363,7 +359,7 @@ type Sql_Error Convert the SQL error to a textual representation. to_text : Text to_text = - query = if this.related_query.is_nothing.not then " [Query was: " + query + "]" else "" + query = if this.related_query.is_nothing.not then " [Query was: " + this.related_query + "]" else "" "There was an SQL error: " + this.java_exception.getMessage.to_text + "." + query ## UNSTABLE @@ -406,10 +402,10 @@ type Sql_Timeout_Error Arguments: - action: The computation to execute. This computation may throw SQL errors. -handle_sql_errors : Any -> Any ! (Sql_Error | Sql_Timeout_Error) -handle_sql_errors ~action = +handle_sql_errors : Any -> (Text | Nothing) -> Any ! (Sql_Error | Sql_Timeout_Error) +handle_sql_errors ~action related_query=Nothing = Panic.recover [Sql_Error, Sql_Timeout_Error] <| - here.wrap_sql_errors action + here.wrap_sql_errors action related_query ## PRIVATE @@ -437,7 +433,9 @@ default_storage_type storage_type = case storage_type of Storage.Integer -> Sql_Type.integer Storage.Decimal -> Sql_Type.double Storage.Boolean -> Sql_Type.boolean - Storage.Any -> Sql_Type.blob + ## Support for mixed type columns in Table upload is currently very limited, + falling back to treating everything as text. + Storage.Any -> Sql_Type.text ## PRIVATE Sets values inside of a prepared statement. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso index 042056d74acb..48cf0d582033 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso @@ -35,7 +35,7 @@ type Dialect Deduces the result type for an aggregation operation. The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type : Aggregate_Column -> Sql_Type resolve_target_sql_type = Errors.unimplemented "This is an interface only." diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Helpers.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Helpers.enso index 13815a974a45..8f1477374735 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Helpers.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Helpers.enso @@ -38,7 +38,8 @@ make_concat make_raw_concat_expr make_contains_expr has_quote args = includes_separator = separator ++ Sql.code " != '' AND " ++ make_contains_expr expr separator ## We use the assumption that `has_quote` is True iff `quote` is not empty. includes_quote = make_contains_expr expr quote - needs_quoting = includes_separator.paren ++ Sql.code " OR " ++ includes_quote.paren + is_empty = expr ++ Sql.code " = ''" + needs_quoting = includes_separator.paren ++ Sql.code " OR " ++ includes_quote.paren ++ Sql.code " OR " ++ is_empty.paren escaped = Sql.code "replace(" ++ expr ++ Sql.code ", " ++ quote ++ Sql.code ", " ++ quote ++ append ++ quote ++ Sql.code ")" quoted = quote ++ append ++ escaped ++ append ++ quote Sql.code "CASE WHEN " ++ needs_quoting ++ Sql.code " THEN " ++ quoted ++ Sql.code " ELSE " ++ expr ++ Sql.code " END" diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso index 444b812a3a53..58ed3795a1ab 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso @@ -39,41 +39,14 @@ type Postgresql_Dialect Deduces the result type for an aggregation operation. The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type : Aggregate_Column -> Sql_Type resolve_target_sql_type aggregate = here.resolve_target_sql_type aggregate ## PRIVATE make_internal_generator_dialect = - starts_with arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation starts_with") - ends_with arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation ends_with") - contains arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ", '%')") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation contains") - text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with], here.agg_shortest, here.agg_longest]+here.concat_ops - counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty] + text = [here.starts_with, here.contains, here.ends_with, here.agg_shortest, here.agg_longest]+here.concat_ops + counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, ["COUNT_DISTINCT", here.agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.agg_count_distinct_include_null]] stddev_pop = ["STDDEV_POP", Base_Generator.make_function "stddev_pop"] stddev_samp = ["STDDEV_SAMP", Base_Generator.make_function "stddev_samp"] @@ -83,7 +56,7 @@ make_internal_generator_dialect = ## PRIVATE The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type aggregate = case aggregate of Group_By c _ -> c.sql_type Count _ -> Sql_Type.bigint @@ -102,10 +75,15 @@ resolve_target_sql_type aggregate = case aggregate of Longest c _ -> c.sql_type Standard_Deviation _ _ _ -> Sql_Type.double Concatenate _ _ _ _ _ _ -> Sql_Type.text - ## TODO [RW] revise these - Sum _ _ -> Sql_Type.numeric # TODO can also be bigint, real, double - Average _ _ -> Sql_Type.numeric # TODO can be double sometimes - Median _ _ -> Sql_Type.numeric # TODO can be double sometimes + Sum c _ -> + if (c.sql_type == Sql_Type.integer) || (c.sql_type == Sql_Type.smallint) then Sql_Type.bigint else + if c.sql_type == Sql_Type.bigint then Sql_Type.numeric else + c.sql_type + Average c _ -> + if c.sql_type.is_definitely_integer then Sql_Type.numeric else + if c.sql_type.is_definitely_double then Sql_Type.double else + c.sql_type + Median _ _ -> Sql_Type.double ## PRIVATE agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg-> @@ -121,14 +99,30 @@ agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg-> ## PRIVATE agg_median = Base_Generator.lift_unary_op "MEDIAN" arg-> - Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")" + median = Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")" + ## TODO Technically, this check may not be necessary if the input column has + type INTEGER, because it is impossible to represent a NaN in that type. + However, currently the column type inference is not tested well-enough to + rely on this, so leaving an uniform approach regardless of type. This + could be revisited when further work on column types takes place. + See issue: https://www.pivotaltracker.com/story/show/180854759 + has_nan = Sql.code "bool_or(" ++ arg ++ Sql.code " = double precision 'NaN')" + Sql.code "CASE WHEN " ++ has_nan ++ Sql.code " THEN 'NaN' ELSE " ++ median ++ Sql.code " END" ## PRIVATE agg_mode = Base_Generator.lift_unary_op "MODE" arg-> Sql.code "mode() WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")" agg_percentile = Base_Generator.lift_binary_op "PERCENTILE" p-> expr-> - Sql.code "percentile_cont(" ++ p ++ Sql.code ") WITHIN GROUP (ORDER BY " ++ expr ++ Sql.code ")" + percentile = Sql.code "percentile_cont(" ++ p ++ Sql.code ") WITHIN GROUP (ORDER BY " ++ expr ++ Sql.code ")" + ## TODO Technically, this check may not be necessary if the input column has + type INTEGER, because it is impossible to represent a NaN in that type. + However, currently the column type inference is not tested well-enough to + rely on this, so leaving an uniform approach regardless of type. This + could be revisited when further work on column types takes place. + See issue: https://www.pivotaltracker.com/story/show/180854759 + has_nan = Sql.code "bool_or(" ++ expr ++ Sql.code " = double precision 'NaN')" + Sql.code "CASE WHEN " ++ has_nan ++ Sql.code " THEN 'NaN' ELSE " ++ percentile ++ Sql.code " END" ## PRIVATE These are written in a not most-efficient way, but a way that makes them @@ -172,8 +166,44 @@ agg_longest = Base_Generator.lift_unary_op "LONGEST" arg-> ## PRIVATE concat_ops = make_raw_concat_expr expr separator = - Sql.code "array_to_string(array_agg(" ++ expr ++ Sql.code "), " ++ separator ++ Sql.code ")" - make_contains_expr expr substring = - Sql.code "position(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0" - concat = Helpers.make_concat make_raw_concat_expr make_contains_expr + Sql.code "string_agg(" ++ expr ++ Sql.code ", " ++ separator ++ Sql.code ")" + concat = Helpers.make_concat make_raw_concat_expr here.make_contains_expr [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]] + + +## PRIVATE +agg_count_distinct args = if args.is_empty then (Error.throw (Illegal_Argument_Error "COUNT_DISTINCT requires at least one argument.")) else + case args.length == 1 of + True -> + ## A single null value will be skipped. + Sql.code "COUNT(DISTINCT " ++ args.first ++ Sql.code ")" + False -> + ## A tuple of nulls is not a null, so it will not be skipped - but + we want to ignore all-null columns. So we manually filter them + out. + count = Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code "))" + are_nulls = args.map arg-> arg.paren ++ Sql.code " IS NULL" + all_nulls_filter = Sql.code " FILTER (WHERE NOT (" ++ Sql.join " AND " are_nulls ++ Sql.code "))" + (count ++ all_nulls_filter).paren + +## PRIVATE +agg_count_distinct_include_null args = + ## If we always count as tuples, then even null fields are counted. + Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code ", 0))" + +## PRIVATE +starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub-> + res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')") + res.paren + +## PRIVATE +ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub-> + res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")") + res.paren + +## PRIVATE +make_contains_expr expr substring = + Sql.code "position(" ++ substring ++ Sql.code " in " ++ expr ++ Sql.code ") > 0" + +## PRIVATE +contains = Base_Generator.lift_binary_op "contains" here.make_contains_expr diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Redshift.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Redshift.enso index a693413e6fdc..624fc74174c1 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Redshift.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Redshift.enso @@ -38,7 +38,7 @@ type Redshift_Dialect Deduces the result type for an aggregation operation. The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type : Aggregate_Column -> Sql_Type resolve_target_sql_type aggregate = Postgres.resolve_target_sql_type aggregate diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso index b94d821f7c8e..a684a6b65e48 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso @@ -5,6 +5,7 @@ from Standard.Database.Data.Sql import Sql_Type import Standard.Database.Data.Dialect import Standard.Database.Data.Dialect.Helpers import Standard.Database.Data.Internal.Base_Generator +from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error ## PRIVATE @@ -38,70 +39,48 @@ type Sqlite_Dialect Deduces the result type for an aggregation operation. The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type : Aggregate_Column -> Sql_Type resolve_target_sql_type aggregate = here.resolve_target_sql_type aggregate ## PRIVATE make_internal_generator_dialect = - starts_with arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE (") ++ sub ++ (Sql.code " || '%')") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation starts_with") - ends_with arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code ")") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation ends_with") - contains arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code " || '%')") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation contains") - text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]+here.concat_ops - counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty] + text = [here.starts_with, here.contains, here.ends_with]+here.concat_ops + counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, ["COUNT_DISTINCT", here.agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.agg_count_distinct_include_null]] stats = [here.agg_stddev_pop, here.agg_stddev_samp] my_mappings = text + counts + stats Base_Generator.base_dialect . extend_with my_mappings ## PRIVATE The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type aggregate = case aggregate of Group_By c _ -> c.sql_type Count _ -> Sql_Type.integer - Count_Distinct _ _ _ -> Sql_Type.integer + Count_Distinct columns _ _ -> + if columns.length == 1 then Sql_Type.integer else + here.unsupported "Count_Distinct on multiple columns" Count_Not_Nothing _ _ -> Sql_Type.integer Count_Nothing _ _ -> Sql_Type.integer Count_Not_Empty _ _ -> Sql_Type.integer Count_Empty _ _ -> Sql_Type.integer - Percentile _ _ _ -> Sql_Type.real - Mode c _ -> c.sql_type - First c _ _ _ -> c.sql_type - Last c _ _ _ -> c.sql_type + Percentile _ _ _ -> here.unsupported "Percentile" + Mode _ _ -> here.unsupported "Mode" + First _ _ _ _ -> here.unsupported "First" + Last _ _ _ _ -> here.unsupported "Last" Maximum c _ -> c.sql_type Minimum c _ -> c.sql_type - Shortest c _ -> c.sql_type - Longest c _ -> c.sql_type + Shortest _ _ -> here.unsupported "Shortest" + Longest _ _ -> here.unsupported "Longest" Standard_Deviation _ _ _ -> Sql_Type.real Concatenate _ _ _ _ _ _ -> Sql_Type.text - ## TODO revise these Sum c _ -> c.sql_type Average _ _ -> Sql_Type.real - Median _ _ -> Sql_Type.real + Median _ _ -> here.unsupported "Median" + +## PRIVATE +unsupported name = + Error.throw (Unsupported_Database_Operation_Error name+" is not supported by SQLite backend. You may need to materialize the table and perform the operation in-memory.") ## PRIVATE agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg-> @@ -119,7 +98,7 @@ agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg-> agg_stddev_pop = Base_Generator.lift_unary_op "STDDEV_POP" arg-> sum_of_squares = Sql.code "SUM(" ++ arg.paren ++ Sql.code "*" ++ arg.paren ++ Sql.code ")" square_of_sums = Sql.code "SUM(" ++ arg ++ Sql.code ") * SUM(" ++ arg ++ Sql.code ")" - n = Sql.code "COUNT(" ++ arg ++ Sql.code ")" + n = Sql.code "CAST(COUNT(" ++ arg ++ Sql.code ") AS REAL)" var = Sql.code "(" ++ sum_of_squares ++ Sql.code " - (" ++ square_of_sums ++ Sql.code " / " ++ n ++ Sql.code ")) / " ++ n Sql.code "SQRT(" ++ var ++ Sql.code ")" @@ -127,7 +106,7 @@ agg_stddev_pop = Base_Generator.lift_unary_op "STDDEV_POP" arg-> agg_stddev_samp = Base_Generator.lift_unary_op "STDDEV_SAMP" arg-> sum_of_squares = Sql.code "SUM(" ++ arg.paren ++ Sql.code "*" ++ arg.paren ++ Sql.code ")" square_of_sums = Sql.code "SUM(" ++ arg ++ Sql.code ") * SUM(" ++ arg ++ Sql.code ")" - n = Sql.code "COUNT(" ++ arg ++ Sql.code ")" + n = Sql.code "CAST(COUNT(" ++ arg ++ Sql.code ") AS REAL)" var = Sql.code "(" ++ sum_of_squares ++ Sql.code " - (" ++ square_of_sums ++ Sql.code " / " ++ n ++ Sql.code ")) / (" ++ n ++ Sql.code " - 1)" Sql.code "SQRT(" ++ var ++ Sql.code ")" @@ -158,7 +137,37 @@ window_aggregate window_type ignore_null args = concat_ops = make_raw_concat_expr expr separator = Sql.code "group_concat(" ++ expr ++ Sql.code ", " ++ separator ++ Sql.code ")" - make_contains_expr expr substring = - Sql.code "instr(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0" - concat = Helpers.make_concat make_raw_concat_expr make_contains_expr + concat = Helpers.make_concat make_raw_concat_expr here.make_contains_expr [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]] + + +## PRIVATE +agg_count_distinct args = case args.length == 1 of + True -> Sql.code "COUNT(DISTINCT (" ++ args.first ++ Sql.code "))" + False -> Error.throw (Illegal_Argument_Error "COUNT_DISTINCT supports only single arguments in SQLite.") + +## PRIVATE +agg_count_distinct_include_null args = case args.length == 1 of + True -> + arg = args.first + count = Sql.code "COUNT(DISTINCT " ++ arg ++ Sql.code ")" + all_nulls_case = Sql.code "CASE WHEN COUNT(CASE WHEN " ++ arg ++ Sql.code "IS NULL THEN 1 END) > 0 THEN 1 ELSE 0 END" + count ++ Sql.code " + " ++ all_nulls_case + False -> Error.throw (Illegal_Argument_Error "COUNT_DISTINCT supports only single arguments in SQLite.") + +## PRIVATE +starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub-> + res = str ++ (Sql.code " LIKE (") ++ sub ++ (Sql.code " || '%')") + res.paren + +## PRIVATE +ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub-> + res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code ")") + res.paren + +## PRIVATE +make_contains_expr expr substring = + Sql.code "instr(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0" + +## PRIVATE +contains = Base_Generator.lift_binary_op "contains" here.make_contains_expr diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Aggregate_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Aggregate_Helper.enso index 048322413ef6..df63005f2e71 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Aggregate_Helper.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Aggregate_Helper.enso @@ -5,12 +5,21 @@ import Standard.Database.Data.Internal.IR from Standard.Database.Data.Sql import Sql_Type from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error +## PRIVATE + Creates an `Internal_Column` that computes the specified statistic. + It returns a dataflow error if the given operation is not supported. + + The provided `aggregate` is assumed to contain only already resolved columns. + You may need to transform it with `resolve_aggregate` first. make_aggregate_column : Table -> Aggregate_Column -> Text -> IR.Internal_Column make_aggregate_column table aggregate new_name = sql_type = table.connection.dialect.resolve_target_sql_type aggregate expression = here.make_expression aggregate IR.Internal_Column new_name sql_type expression +## PRIVATE + Creates an Internal Representation of the expression that computes a + requested statistic. make_expression : Aggregate_Column -> IR.Expression make_expression aggregate = is_non_empty_vector v = if v.is_nothing then False else v.not_empty diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Base_Generator.enso index f7b31fccc90d..edfb4f7761fa 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Base_Generator.enso @@ -168,22 +168,11 @@ base_dialect = logic = [bin "AND", bin "OR", unary "NOT"] compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">="] agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"] - counts = [fun "COUNT", ["COUNT_ROWS", here.make_constant "COUNT(*)"], ["COUNT_DISTINCT", here.count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.count_distinct_include_null]] + counts = [fun "COUNT", ["COUNT_ROWS", here.make_constant "COUNT(*)"]] nulls = [["ISNULL", here.make_right_unary_op "IS NULL"], ["FILLNULL", here.make_function "COALESCE"]] base_map = Map.from_vector (arith + logic + compare + agg + nulls + counts) Internal_Dialect base_map here.wrap_in_quotes -## PRIVATE -count_distinct args = - Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code "))" - -## PRIVATE -count_distinct_include_null args = - count = here.count_distinct args - are_nulls = args.map arg-> arg.paren ++ Sql.code " IS NULL" - all_nulls_case = Sql.code "CASE WHEN COUNT(CASE WHEN " ++ Sql.join " AND " are_nulls ++ Sql.code " THEN 1 END) > 0 THEN 1 ELSE 0 END" - count ++ Sql.code " + " ++ all_nulls_case - ## PRIVATE Builds code for an expression. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Sql.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Sql.enso index 36c5ca344869..3d56a1e5d090 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Sql.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Sql.enso @@ -59,6 +59,11 @@ type Sql_Type - name: a database-specific type name, used for pretty printing. type Sql_Type typeid name + == that = case that of + Sql_Type that_id _ -> + this.typeid == that_id + _ -> False + ## The SQL representation of `Boolean` type. boolean : Sql_Type boolean = Sql_Type Types.BOOLEAN "BOOLEAN" @@ -71,6 +76,10 @@ type Sql_Type bigint : Sql_Type bigint = Sql_Type Types.BIGINT "BIGINT" + ## The SQL representation of the `SMALLINT` type. + smallint : Sql_Type + smallint = Sql_Type Types.SMALLINT "SMALLINT" + ## The SQL type representing decimal numbers. decimal : Sql_Type decimal = Sql_Type Types.DECIMAL "DECIMAL" diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 51847ea1ca5e..89ae00d443cc 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -657,6 +657,10 @@ type Table new_name = p.first Aggregate_Helper.make_aggregate_column this agg new_name . catch partitioned = results.partition (_.is_an Internal_Column) + ## When working on join we may encounter further issues with having + aggregate columns exposed directly, it may be useful to re-use + the `lift_aggregate` method to push the aggregates into a + subquery. new_columns = partitioned.first problems = partitioned.second on_problems.attach_problems_before problems <| @@ -761,9 +765,19 @@ type Table info : Table info = cols = this.internal_columns - count_columns = cols.map c-> IR.Internal_Column c.name Sql.Sql_Type.integer (IR.Operation "COUNT" [c.expression]) - count_table = this.updated_columns count_columns . to_dataframe - counts = count_table.columns.map c-> c.at 0 + count_query = + ## Performing a subquery is the most robust way to handle both + regular columns and aggregates. + Naively wrapping each column in a `COUNT(...)` will not + always work as aggregates cannot be nested. + setup = this.context.as_subquery this.name [this.internal_columns] + new_ctx = IR.subquery_as_ctx setup.first + new_columns = setup.second.first.map column-> + [column.name, IR.Operation "COUNT" [column.expression]] + query = IR.Select new_columns new_ctx + this.connection.dialect.generate_sql query + count_table = this.connection.execute_query count_query + counts = if cols.is_empty then [] else count_table.columns.map c-> c.at 0 types = cols.map c-> c.sql_type.name Materialized_Table.new [["Column", cols.map .name], ["Items Count", counts], ["SQL Type", types]] . set_index "Column" diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso index 74438d2b5f03..2f8f7fd58347 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso @@ -86,7 +86,7 @@ prepare_aggregate_columns aggregates table = To be used when `new_name` is `Nothing`. Assumes that the `Aggregate_Column` is resolved. You may need to transform it - with `resolve_columns` first. + with `resolve_aggregate` first. default_aggregate_column_name aggregate_column = case aggregate_column of Group_By c _ -> c.name diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso index abb63e94953d..ae7d5b8137ae 100644 --- a/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso @@ -272,12 +272,16 @@ Error.should_equal _ frames_to_skip=0 = here.fail_match_on_unexpected_error this example_should_equal = 1.00000001 . should_equal 1.00000002 epsilon=0.0001 Decimal.should_equal : Decimal -> Decimal -> Integer -> Assertion -Decimal.should_equal that (epsilon = 0) (frames_to_skip=0) = case this.equals that epsilon of - True -> Success - False -> - loc = Meta.get_source_location 2+frames_to_skip - msg = this.to_text + " did not equal " + that.to_text + " (at " + loc + ")." - Panic.throw (Failure msg) +Decimal.should_equal that epsilon=0 frames_to_skip=0 = + matches = case that of + Number -> this.equals that epsilon + _ -> False + case matches of + True -> Success + False -> + loc = Meta.get_source_location 2+frames_to_skip + msg = this.to_text + " did not equal " + that.to_text + " (at " + loc + ")." + Panic.throw (Failure msg) ## Asserts that the given `Boolean` is `True` diff --git a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java index 7d091d4a7399..de37bfe7edec 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java @@ -64,7 +64,12 @@ private Text doComplexAtom(Atom atom) { @CompilerDirectives.TruffleBoundary private String showObject(Object child) throws UnsupportedMessageException { - if (child instanceof Boolean) { + if (child == null) { + // TODO [RW] This is a temporary workaround to make it possible to display errors related to + // https://www.pivotaltracker.com/story/show/181652974 + // Most likely it should be removed once that is implemented. + return "null"; + } else if (child instanceof Boolean) { return (boolean) child ? "True" : "False"; } else { return strings.asString(displays.toDisplayString(child)); diff --git a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java index 3fa7fb269dc0..34eff81d4b06 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java @@ -29,7 +29,12 @@ String doDisplay( @CachedLibrary(limit = "5") InteropLibrary objects, @CachedLibrary(limit = "5") InteropLibrary displays, @CachedLibrary(limit = "5") InteropLibrary strings) { - if (TypesGen.isLong(value)) { + if (value == null) { + // TODO [RW] This is a temporary workaround to make it possible to display errors related to + // https://www.pivotaltracker.com/story/show/181652974 + // Most likely it should be removed once that is implemented. + return "null"; + } else if (TypesGen.isLong(value)) { return value + " (Integer)"; } else if (TypesGen.isEnsoBigInteger(value)) { return "Integer"; @@ -59,8 +64,7 @@ String doDisplay( try { return strings.asString(displays.toDisplayString(objects.getMetaObject(value))); } catch (UnsupportedMessageException e) { - throw new IllegalStateException( - "Receiver declares a meta object, but does not it return it."); + throw new IllegalStateException("Receiver declares a meta object, but does not return it."); } } else { return "a polyglot object"; diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java index 25ec7b378cc3..8f0578503a92 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java @@ -1,24 +1,24 @@ package org.enso.table.aggregations; +import java.util.List; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.table.Column; import org.enso.table.data.table.problems.InvalidAggregation; import org.enso.table.data.table.problems.UnquotedDelimiter; -import java.util.List; - public class Concatenate extends Aggregator { private final Storage storage; - private final String join; + private final String separator; private final String prefix; private final String suffix; private final String quote; - public Concatenate(String name, Column column, String join, String prefix, String suffix, String quote) { + public Concatenate( + String name, Column column, String separator, String prefix, String suffix, String quote) { super(name, Storage.Type.STRING); this.storage = column.getStorage(); - this.join = join == null ? "" : join; + this.separator = separator == null ? "" : separator; this.prefix = prefix; this.suffix = suffix; this.quote = quote == null ? "" : quote; @@ -27,12 +27,12 @@ public Concatenate(String name, Column column, String join, String prefix, Strin @Override public Object aggregate(List indexes) { StringBuilder current = null; - for (int row: indexes) { + for (int row : indexes) { Object value = storage.getItemBoxed(row); if (value == null || value instanceof String) { - String textValue = toQuotedString(value, quote, join); + String textValue = toQuotedString(value, quote, separator); - if (quote.equals("") && textValue.contains(join)) { + if (quote.equals("") && textValue.contains(separator)) { this.addProblem(new UnquotedDelimiter(this.getName(), row, "Unquoted delimiter.")); } @@ -40,7 +40,7 @@ public Object aggregate(List indexes) { current = new StringBuilder(); current.append(textValue); } else { - current.append(join); + current.append(separator); current.append(textValue); } } else { @@ -53,19 +53,26 @@ public Object aggregate(List indexes) { return null; } - if (prefix != null) { current.insert(0, prefix); } + if (prefix != null) { + current.insert(0, prefix); + } current.append(suffix); return current.toString(); } - private static String toQuotedString(Object value, final String quote, final String join) { + private static String toQuotedString(Object value, final String quote, final String separator) { if (value == null) { return ""; } String textValue = value.toString(); - if (!quote.equals("") && (textValue.equals("") || textValue.contains(join))) { - return quote + textValue.replace(quote, quote + quote) + quote; + if (!quote.isEmpty()) { + boolean includes_separator = !separator.isEmpty() && textValue.contains(separator); + boolean includes_quote = textValue.contains(quote); + boolean needs_quoting = textValue.isEmpty() || includes_separator || includes_quote; + if (needs_quoting) { + return quote + textValue.replace(quote, quote + quote) + quote; + } } return textValue; diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java index 82ebf90de8d7..d90311bd34c9 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java @@ -1,11 +1,13 @@ package org.enso.table.aggregations; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.table.Column; import org.enso.table.data.table.problems.InvalidAggregation; -import java.util.*; - /*** * Aggregate Column computing a percentile value in a group. */ @@ -22,19 +24,20 @@ public Percentile(String name, Column column, double percentile) { @Override public Object aggregate(List indexes) { int count = 0; - SortedMap currentMap = null; - for (int row: indexes) { + SortedMap currentMap = new TreeMap<>(); + for (int row : indexes) { Object value = storage.getItemBoxed(row); if (value != null) { Double dValue = CastToDouble(value); if (dValue == null) { - this.addProblem(new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); + this.addProblem( + new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); return null; - } else if (count == 0) { - count = 1; - currentMap = new TreeMap<>(); - currentMap.put(dValue, 1); + } else if (dValue.isNaN()) { + // If any of the input values is a NaN, we do not know where in the ordering it should be + // and so we return NaN. + return Double.NaN; } else { count++; currentMap.put(dValue, currentMap.getOrDefault(dValue, 0) + 1); @@ -42,7 +45,7 @@ public Object aggregate(List indexes) { } } - if (count == 0) { + if (count == 0) { return null; } @@ -66,13 +69,27 @@ public Object aggregate(List indexes) { if (current <= mid && nextCurrent > mid) { double second = entry.getKey(); - return first + (second - first) * (mid_value - mid); + return interpolate(first, second, mid_value - mid); } current = nextCurrent; } - this.addProblem(new InvalidAggregation(this.getName(), -1, "Failed calculating the percentile.")); + this.addProblem( + new InvalidAggregation(this.getName(), -1, "Failed calculating the percentile.")); return null; } + + double interpolate(double first, double second, double alpha) { + if (Double.isInfinite(first) && Double.isInfinite(second)) { + if (first == second) return first; + else return Double.NaN; + } + + // If both are not infinite, then if one of them is infinite, the other must be finite. + if (Double.isInfinite(first)) return first; + if (Double.isInfinite(second)) return second; + + return first + (second - first) * alpha; + } } diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java index a738c392da6d..ac0b99dc35de 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java @@ -1,11 +1,10 @@ package org.enso.table.aggregations; +import java.util.List; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.table.Column; import org.enso.table.data.table.problems.InvalidAggregation; -import java.util.List; - /*** * Aggregate Column computing the standard deviation of a group. */ @@ -25,7 +24,7 @@ public Calculation(double value) { private final Storage storage; private final boolean population; - public StandardDeviation(String name, Column column,boolean population) { + public StandardDeviation(String name, Column column, boolean population) { super(name, Storage.Type.DOUBLE); this.storage = column.getStorage(); this.population = population; @@ -34,12 +33,13 @@ public StandardDeviation(String name, Column column,boolean population) { @Override public Object aggregate(List indexes) { Calculation current = null; - for (int row: indexes) { + for (int row : indexes) { Object value = storage.getItemBoxed(row); if (value != null) { Double dValue = CastToDouble(value); if (dValue == null) { - this.addProblem(new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); + this.addProblem( + new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); return null; } @@ -48,12 +48,13 @@ public Object aggregate(List indexes) { } else { current.count++; current.total += dValue; - current.total_sqr += dValue*dValue; + current.total_sqr += dValue * dValue; } } } - return current == null ? null : - (population ? 1 : Math.sqrt(current.count / (current.count - 1.0))) * - Math.sqrt(current.total_sqr / current.count - Math.pow(current.total / current.count, 2)); + + if (current == null || (!population && current.count <= 1)) return null; + return (population ? 1 : Math.sqrt(current.count / (current.count - 1.0))) + * Math.sqrt(current.total_sqr / current.count - Math.pow(current.total / current.count, 2)); } } diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 05cd7d625630..19c64a13bfa1 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -4,21 +4,24 @@ import Standard.Table from Standard.Table.Data.Column_Selector import By_Name, By_Index from Standard.Table.Data.Aggregate_Column import all from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings +from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error import Standard.Test import Standard.Test.Problems import Standard.Base.Error.Problem_Behavior -type Test_Selection problem_handling=True advanced_stats=True text_concat=True text_shortest_longest=True first_last=True first_last_row_order=True std_dev=True multi_distinct=True aggregation_problems=True +polyglot java import java.lang.Double -all_tests = Test_Selection True True True True True True True True True +type Test_Selection problem_handling=True advanced_stats=True text_concat=True text_shortest_longest=True first_last=True first_last_row_order=True std_dev=True multi_distinct=True aggregation_problems=True nan=True + +all_tests = Test_Selection True True True True True True True True True True spec = file_contents = (Enso_Project.data / "data.csv") . read table = Table.from_csv file_contents empty_table = Table.new <| table.columns.map c->[c.name, []] materialize = x->x - here.aggregate_spec "[In-Memory] " table empty_table materialize + here.aggregate_spec "[In-Memory] " table empty_table Table.new materialize is_database=False ## Runs the common aggregate tests. @@ -27,6 +30,8 @@ spec = - table: A table using the tested backend containing data from `data/data.csv`. - empty_table: An empty table using the tested backend. + - table_builder: A function used to build a table using the tested backend + from a vector of columns represented as pairs of name and vector of values. - materialize: A helper function which materializes a table from the tested backend as an in-memory table. Used to easily inspect results of a particular query/operation. @@ -34,7 +39,7 @@ spec = skip checks for backends which do not support particular features. - pending: An optional mark to disable all test groups. Can be used to indicate that some tests are disabled due to missing test setup. -aggregate_spec prefix table empty_table materialize test_selection=here.all_tests pending=Nothing = +aggregate_spec prefix table empty_table table_builder materialize is_database test_selection=here.all_tests pending=Nothing = expect_column_names names table = table.columns . map .name . should_equal names frames_to_skip=2 @@ -43,11 +48,15 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test 0.up_to table.row_count . find i-> 0.up_to key.length . all j-> (table_columns.at j . at i)==(key.at j) + resolve_pending enabled_flag pending=Nothing = + case pending of + Nothing -> if enabled_flag.not then "Not supported." + _ -> pending + Test.group prefix+"Table.aggregate should summarize whole table" pending=pending <| Test.specify "should be able to count" <| grouped = table.aggregate [Count Nothing] materialized = materialize grouped - ## TODO check row count of not materialized one grouped.row_count . should_equal 1 materialized.columns.length . should_equal 1 materialized.columns.at 0 . name . should_equal "Count" @@ -79,8 +88,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 2 . name . should_equal "Count Distinct Flag" materialized.columns.at 2 . at 0 . should_equal 2 - Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported.") <| - ## TODO [RW] add Count_Distinct with overridden ignore_nothing! also need to modify data.csv to include some nulls on index and flag + Test.specify "should be able to count distinct values over multiple columns" (pending = resolve_pending test_selection.multi_distinct) <| grouped = table.aggregate [Count_Distinct (By_Name ["Index", "Flag"])] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -102,7 +110,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 3 . name . should_equal "Average ValueWithNothing" materialized.columns.at 3 . at 0 . should_equal 1.228650 epsilon=0.000001 - Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify "should be able to compute standard deviation of values" (pending = resolve_pending test_selection.std_dev) <| grouped = table.aggregate [Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -116,7 +124,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 3 . name . should_equal "Standard Deviation ValueWithNothing_1" materialized.columns.at 3 . at 0 . should_equal 58.575554 epsilon=0.000001 - Test.specify "should be able to create median, mode and percentile values" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should be able to create median, mode and percentile values" (pending = resolve_pending test_selection.advanced_stats) <| grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -134,7 +142,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 5 . name . should_equal "40%-ile ValueWithNothing" materialized.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001 - Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| grouped = table.aggregate [First "Index" (order_by = By_Name ["Hexadecimal", "TextWithNothing"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -144,7 +152,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Last ValueWithNothing" materialized.columns.at 1 . at 0 . should_equal -89.78 epsilon=0.000001 - Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <| + Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| grouped = table.aggregate [First "Index", Last "Value"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -168,7 +176,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 3 . name . should_equal "Maximum ValueWithNothing" materialized.columns.at 3 . at 0 . should_equal 99.95 epsilon=0.000001 - Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <| + Test.specify "should be able to get shortest and longest text values" (pending = resolve_pending test_selection.text_shortest_longest) <| grouped = table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -178,7 +186,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Longest TextWithNothing" materialized.columns.at 1 . at 0 . should_equal "setp295gjvbanana" - Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should be able to get concatenated text values" (pending = resolve_pending test_selection.text_concat) <| grouped = table.aggregate [Concatenate "Code"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -210,12 +218,14 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 3 . at 0 . should_equal 0 Test.specify "should be able to count distinct values" <| - grouped = empty_table.aggregate [Count_Distinct "Code"] + grouped = empty_table.aggregate [Count_Distinct "Code" (ignore_nothing=False), Count_Distinct "Code" (ignore_nothing=True)] materialized = materialize grouped grouped.row_count . should_equal 1 - materialized.columns.length . should_equal 1 + materialized.columns.length . should_equal 2 materialized.columns.at 0 . name . should_equal "Count Distinct Code" materialized.columns.at 0 . at 0 . should_equal 0 + materialized.columns.at 1 . name . should_equal "Count Distinct Code_1" + materialized.columns.at 1 . at 0 . should_equal 0 Test.specify "should be able to compute sum and average of values" <| grouped = empty_table.aggregate [Sum "Value", Average "ValueWithNothing"] @@ -227,7 +237,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Average ValueWithNothing" materialized.columns.at 1 . at 0 . should_equal Nothing - Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify "should be able to compute standard deviation of values" (pending = resolve_pending test_selection.std_dev) <| grouped = empty_table.aggregate [Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -237,7 +247,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Standard Deviation ValueWithNothing" materialized.columns.at 1 . at 0 . should_equal Nothing - Test.specify "should be able to create median, mode and percentile values" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should be able to create median, mode and percentile values" (pending = resolve_pending test_selection.advanced_stats) <| grouped = empty_table.aggregate [Median "Index", Mode "Index", Percentile 0.25 "Value"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -249,7 +259,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 2 . name . should_equal "25%-ile Value" materialized.columns.at 2 . at 0 . should_equal Nothing - Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| grouped = empty_table.aggregate [First "Index" (order_by = By_Name ["Hexadecimal", "TextWithNothing"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -259,7 +269,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Last ValueWithNothing" materialized.columns.at 1 . at 0 . should_equal Nothing - Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <| + Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| grouped = empty_table.aggregate [First "Index", Last "Value"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -279,7 +289,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Maximum ValueWithNothing" materialized.columns.at 1 . at 0 . should_equal Nothing - Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <| + Test.specify "should be able to get shortest and longest text values" (pending = resolve_pending test_selection.text_shortest_longest) <| grouped = empty_table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -289,7 +299,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Longest TextWithNothing" materialized.columns.at 1 . at 0 . should_equal Nothing - Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should be able to get concatenated text values" (pending = resolve_pending test_selection.text_concat) <| grouped = empty_table.aggregate [Concatenate "Code"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -334,7 +344,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Sum Value" materialized.columns.at 2 . name . should_equal "Average ValueWithNothing" - Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify "should be able to compute standard deviation of values" (pending = resolve_pending test_selection.std_dev) <| grouped = empty_table.aggregate [Group_By 0, Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -343,7 +353,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Standard Deviation Value" materialized.columns.at 2 . name . should_equal "Standard Deviation ValueWithNothing" - Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should be able to create median values" (pending = resolve_pending test_selection.advanced_stats) <| grouped = empty_table.aggregate [Group_By 0, Median "Index", Mode "Index", Percentile 0.25 "Value"] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -353,7 +363,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 2 . name . should_equal "Mode Index" materialized.columns.at 3 . name . should_equal "25%-ile Value" - Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| grouped = empty_table.aggregate [Group_By 0, First "Index" (order_by = By_Name ["Hexadecimal", "TextWithNothing"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -362,7 +372,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "First Index" materialized.columns.at 2 . name . should_equal "Last ValueWithNothing" - Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <| + Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| grouped = empty_table.aggregate [Group_By 0, First "Index", Last "Value"] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -380,7 +390,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Minimum Value" materialized.columns.at 2 . name . should_equal "Maximum ValueWithNothing" - Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <| + Test.specify "should be able to get shortest and longest text values" (pending = resolve_pending test_selection.text_shortest_longest) <| grouped = empty_table.aggregate [Group_By 0, Shortest "TextWithNothing", Longest "TextWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -389,7 +399,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 1 . name . should_equal "Shortest TextWithNothing" materialized.columns.at 2 . name . should_equal "Longest TextWithNothing" - Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should be able to get concatenated text values" (pending = resolve_pending test_selection.text_concat) <| grouped = empty_table.aggregate [Group_By 0, Concatenate "Code"] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -441,8 +451,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 3 . name . should_equal "Count Distinct Flag" materialized.columns.at 3 . at idx . should_equal 2 - Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported.") <| - ## TODO probably should use different cols for multi-distinct and also should check ignore_nothing + Test.specify "should be able to count distinct values over multiple columns" (pending = resolve_pending test_selection.multi_distinct) <| grouped = table.aggregate [Group_By "Index", Count_Distinct (By_Name ["Index", "Flag"])] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -470,7 +479,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 4 . name . should_equal "Average ValueWithNothing" materialized.columns.at 4 . at idx . should_equal 0.646213 epsilon=0.000001 - Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify "should be able to compute standard deviation of values" (pending = resolve_pending test_selection.std_dev) <| grouped = table.aggregate [Group_By "Index", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -487,7 +496,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing_1" materialized.columns.at 4 . at idx . should_equal 56.677714 epsilon=0.000001 - Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should be able to create median values" (pending = resolve_pending test_selection.advanced_stats) <| grouped = table.aggregate [Group_By "Index", Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -508,7 +517,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 6 . name . should_equal "40%-ile ValueWithNothing" materialized.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001 - Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| grouped = table.aggregate [Group_By "Index", First "TextWithNothing" (order_by = By_Name ["Hexadecimal", "Flag"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -521,7 +530,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 2 . name . should_equal "Last ValueWithNothing" materialized.columns.at 2 . at idx . should_equal 19.77 epsilon=0.000001 - Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <| + Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| grouped = table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -551,7 +560,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 4 . name . should_equal "Maximum ValueWithNothing" materialized.columns.at 4 . at idx . should_equal 99.79 epsilon=0.000001 - Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <| + Test.specify "should be able to get shortest and longest text values" (pending = resolve_pending test_selection.text_shortest_longest) <| grouped = table.aggregate [Group_By "Index", Shortest "TextWithNothing", Longest "TextWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -564,7 +573,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 2 . name . should_equal "Longest TextWithNothing" materialized.columns.at 2 . at idx . should_equal "byo6kn5l3sz" - Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should be able to get concatenated text values" (pending = resolve_pending test_selection.text_concat) <| grouped = table.aggregate [Group_By "Index", Concatenate "Code"] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -622,8 +631,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 3 . name . should_equal "Count Distinct Flag" materialized.columns.at 3 . at idx . should_equal 1 - Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported.") <| - ## TODO probably should use different cols for multi-distinct and also should check ignore_nothing + Test.specify "should be able to count distinct values over multiple columns" (pending = resolve_pending test_selection.multi_distinct) <| grouped = table.aggregate [Group_By "Index", Count_Distinct (By_Name ["Index", "Flag"]), Group_By "Flag"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -653,7 +661,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 4 . name . should_equal "Average ValueWithNothing" materialized.columns.at 4 . at idx . should_equal 4.721858 epsilon=0.000001 - Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify "should be able to compute standard deviation of values" (pending = resolve_pending test_selection.std_dev) <| grouped = table.aggregate [Group_By "Index", Group_By "Flag", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -671,7 +679,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 5 . name . should_equal "Standard Deviation ValueWithNothing_1" materialized.columns.at 5 . at idx . should_equal 57.306492 epsilon=0.000001 - Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should be able to create median values" (pending = resolve_pending test_selection.advanced_stats) <| grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Group_By "Index", Group_By "Flag", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -693,7 +701,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 7 . name . should_equal "40%-ile ValueWithNothing" materialized.columns.at 7 . at idx . should_equal -17.174000 epsilon=0.000001 - Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| grouped = table.aggregate [Group_By "Flag", First "TextWithNothing" (order_by = By_Name ["Hexadecimal", "Flag"]), Last "ValueWithNothing" (order_by = By_Name ["Value"]), Group_By "Index"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -707,7 +715,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 2 . name . should_equal "Last ValueWithNothing" materialized.columns.at 2 . at idx . should_equal 42.17 epsilon=0.000001 - Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <| + Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| grouped = table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -739,7 +747,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 5 . name . should_equal "Maximum ValueWithNothing" materialized.columns.at 5 . at idx . should_equal 97.17 epsilon=0.000001 - Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <| + Test.specify "should be able to get shortest and longest text values" (pending = resolve_pending test_selection.text_shortest_longest) <| grouped = table.aggregate [Group_By "Index", Group_By "Flag", Shortest "TextWithNothing", Longest "TextWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -753,7 +761,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 3 . name . should_equal "Longest TextWithNothing" materialized.columns.at 3 . at idx . should_equal "byo6kn5l3sz" - Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should be able to get concatenated text values" (pending = resolve_pending test_selection.text_concat) <| grouped = table.aggregate [Group_By "Index", Group_By "Flag", Concatenate "Code"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -765,14 +773,351 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 2 . name . should_equal "Concatenate Code" materialized.columns.at 2 . at idx . length . should_equal 381 - problem_pending = case pending.is_nothing of - False -> pending - True -> if test_selection.problem_handling.not then "Not supported." - Test.group prefix+"Table.aggregate should raise warnings when there are issues" pending=problem_pending <| + Test.group prefix+"Table.aggregate Concat" (pending = resolve_pending test_selection.text_concat pending) <| + Test.specify "should insert the separator, add prefix and suffix" <| + table = table_builder [["A", ["foo", "bar", "foo", "foo"]], ["B", ["a", "b", "c", "d"]]] + result = table.aggregate [Group_By "A", (Concatenate "B" prefix="[[" suffix="]]" separator="; ")] + result.row_count . should_equal 2 + materialized = materialize result . sort "A" + materialized.columns.length . should_equal 2 + materialized.columns.at 0 . name . should_equal "A" + materialized.columns.at 0 . to_vector . should_equal ["bar", "foo"] + materialized.columns.at 1 . name . should_equal "Concatenate B" + materialized.columns.at 1 . to_vector . should_equal ["[[b]]", "[[a; c; d]]"] + + Test.specify "should correctly escape separator and quote characters but only if necessary" <| + table = table_builder [["A", ["1,0", "b", "'c", "''", ","]]] + result = table.aggregate [(Concatenate "A" prefix="[[" suffix="]]" separator="," quote_char="'")] + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.at 0 . name . should_equal "Concatenate A" + materialized.columns.at 0 . to_vector . should_equal ["[['1,0',b,'''c','''''',',']]"] + + Test.specify "should correctly handle missing values and empty values with quote character" <| + table = table_builder [["A", ["1,0", "A", "", "", "B", Nothing, Nothing, "C"]]] + result = table.aggregate [(Concatenate "A" prefix="[[" suffix="]]" separator="," quote_char="'")] + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.at 0 . name . should_equal "Concatenate A" + materialized.columns.at 0 . to_vector . should_equal ["[['1,0',A,'','',B,,,C]]"] + + Test.specify "will not be able to distinguish missing values from empty values without quote character" <| + table = table_builder [["A", ["1,0", "A", "", "", "B", Nothing, Nothing, "C"]]] + result = table.aggregate [(Concatenate "A" prefix="[[" suffix="]]" separator=",")] + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.at 0 . name . should_equal "Concatenate A" + materialized.columns.at 0 . to_vector . should_equal ["[[1,0,A,,,B,,,C]]"] + + Test.specify "should work with empty separator" <| + table = table_builder [["A", ["1,0", "A", "", "", "B", Nothing, Nothing, "C"]]] + result = table.aggregate [(Concatenate "A")] + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.at 0 . name . should_equal "Concatenate A" + materialized.columns.at 0 . to_vector . should_equal ["1,0ABC"] + + Test.specify "should work with empty separator but non-empty quote" <| + table = table_builder [["A", ["1'0", "A", "", "", "B", Nothing, Nothing, "C"]]] + result = table.aggregate [(Concatenate "A" quote_char="'")] + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.at 0 . name . should_equal "Concatenate A" + materialized.columns.at 0 . to_vector . should_equal ["'1''0'A''''BC"] + + Test.group prefix+"Table.aggregate Count_Distinct" pending=pending <| + Test.specify "should correctly count missing values" <| + get_value t = + columns = materialize t . columns + columns.length . should_equal 1 frames_to_skip=1 + columns.first.length . should_equal 1 frames_to_skip=1 + columns.first . at 0 + + t1 = table_builder [["A", []]] + get_value (t1.aggregate [Count_Distinct "A" (ignore_nothing=True)]) . should_equal 0 + get_value (t1.aggregate [Count_Distinct "A" (ignore_nothing=False)]) . should_equal 0 + + t2 = table_builder [["A", [Nothing, Nothing]]] + get_value (t2.aggregate [Count_Distinct "A" (ignore_nothing=True)]) . should_equal 0 + get_value (t2.aggregate [Count_Distinct "A" (ignore_nothing=False)]) . should_equal 1 + + t3 = table_builder [["A", [1, 2]]] + get_value (t3.aggregate [Count_Distinct "A" (ignore_nothing=True)]) . should_equal 2 + get_value (t3.aggregate [Count_Distinct "A" (ignore_nothing=False)]) . should_equal 2 + + t4 = table_builder [["A", [1, 2, Nothing, Nothing]]] + get_value (t4.aggregate [Count_Distinct "A" (ignore_nothing=True)]) . should_equal 2 + get_value (t4.aggregate [Count_Distinct "A" (ignore_nothing=False)]) . should_equal 3 + + t5 = table_builder [["G", ["foo", "foo", "bar", "foo"]], ["A", [Nothing, 0, Nothing, Nothing]]] + + r1 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=True)] + r1.row_count . should_equal 2 + m1 = materialize r1 . sort "G" + m1.columns.length . should_equal 2 + m1.columns.first.to_vector . should_equal ["bar", "foo"] + m1.columns.second.to_vector . should_equal [0, 1] + + r2 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=False)] + r2.row_count . should_equal 2 + m2 = materialize r2 . sort "G" + m2.columns.length . should_equal 2 + m2.columns.first.to_vector . should_equal ["bar", "foo"] + m2.columns.second.to_vector . should_equal [1, 2] + + Test.specify "should correctly count all-null keys in multi-column mode" (pending = resolve_pending test_selection.multi_distinct) <| + table = table_builder [["A", ["foo", "foo", Nothing, Nothing, Nothing]], ["B", ["baz", Nothing, Nothing, Nothing, "baz"]], ["C", [1, 2, 3, Nothing, 5]]] + + r2 = table.aggregate [Count_Distinct (By_Name ["A", "B"]) (ignore_nothing=False)] + r2.row_count.should_equal 1 + m2 = materialize r2 + m2.columns.length.should_equal 1 + m2.columns.first.name . should_equal "Count Distinct A B" + m2.columns.first.to_vector . should_equal [4] + + r1 = table.aggregate [Count_Distinct (By_Name ["A", "B"]) (ignore_nothing=True)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length.should_equal 1 + m1.columns.first.name . should_equal "Count Distinct A B" + m1.columns.first.to_vector . should_equal [3] + + Test.group prefix+"Table.aggregate Standard_Deviation" pending=(resolve_pending test_selection.std_dev pending) <| + Test.specify "should correctly handle single elements" <| + r1 = table_builder [["X", [1]]] . aggregate [Standard_Deviation "X" (population=False), Standard_Deviation "X" (population=True)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 2 + m1.columns.first.at 0 . should_equal Nothing + m1.columns.second.at 0 . should_equal 0 + + Test.group prefix+"Table.aggregate should correctly select result types" pending=pending <| + Test.specify "widening to decimals on Average" <| + table = table_builder [["G", ["a", "a", "b", "b"]], ["X", [0, 1, 1, Nothing]]] + r1 = table.aggregate [Average "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.at 0 . should_equal (2/3) epsilon=0.00001 + + r2 = table.aggregate [Group_By "G", Average "X"] + r2.row_count.should_equal 2 + m2 = materialize r2 . sort "G" + m2.columns.length . should_equal 2 + m2.columns.first.to_vector . should_equal ["a", "b"] + m2.columns.second.to_vector . should_equal [0.5, 1] + + Test.specify "widening to decimals on Median" (pending = resolve_pending test_selection.advanced_stats) <| + table = table_builder [["X", [-1000, 0, 1, 100000, Nothing]]] + r1 = table.aggregate [Median "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.to_vector . should_equal [0.5] + + Test.specify "widening to decimals on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| + table = table_builder [["X", [1, 2, 3, 4, 5, 6, Nothing]]] + r1 = table.aggregate [Percentile 0.3 "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.to_vector . should_equal [2.5] + + Test.specify "widening to decimals on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| + table = table_builder [["X", [1, 2, 3, 4, Nothing]]] + r1 = table.aggregate [Standard_Deviation "X" (population=True), Standard_Deviation "X" (population=False)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 2 + m1.columns.first.at 0 . should_equal 1.1180339887499 epsilon=0.000001 + m1.columns.second.at 0 . should_equal 1.2909944487358 epsilon=0.000001 + + expect_null_or_nan value = + matches = case value of + Nothing -> True + Decimal -> Double.isNaN value + _ -> False + if matches.not then + loc = Meta.get_source_location 2 + Test.fail "Expected a Nothing or NaN but got: "+value.to_text+" (at "+loc+")." + + Test.group prefix+"Table.aggregate should correctly handle infinities" pending=pending <| + pos_inf = 1/0 + neg_inf = -1/0 + Test.specify "on Average" <| + t1 = table_builder [["X", [Nothing, pos_inf, pos_inf, 0]]] + r1 = t1.aggregate [Average "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.at 0 . should_equal pos_inf + + t2 = table_builder [["X", [Nothing, pos_inf, neg_inf, 0]]] + r2 = t2.aggregate [Average "X"] + r2.row_count.should_equal 1 + m2 = materialize r2 + m2.columns.length . should_equal 1 + expect_null_or_nan <| m2.columns.first.at 0 + + Test.specify "on Median" (pending = resolve_pending test_selection.advanced_stats) <| + t1 = table_builder [["X", [Nothing, neg_inf, pos_inf, 0, pos_inf, pos_inf]]] + r1 = t1.aggregate [Median "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.at 0 . should_equal pos_inf + + t2 = table_builder [["X", [pos_inf, pos_inf, neg_inf, neg_inf]]] + r2 = t2.aggregate [Median "X"] + r2.row_count.should_equal 1 + m2 = materialize r2 + m2.columns.length . should_equal 1 + expect_null_or_nan <| m2.columns.first.at 0 + + t3 = table_builder [["X", [pos_inf, pos_inf, Nothing, 0, 10, 20, neg_inf, neg_inf]]] + r3 = t3.aggregate [Median "X"] + r3.row_count.should_equal 1 + m3 = materialize r3 + m3.columns.length . should_equal 1 + m3.columns.first.at 0 . should_equal 10 + + t4 = table_builder [["X", [Nothing, pos_inf, pos_inf, 10, 12]]] + r4 = t4.aggregate [Median "X"] + r4.row_count.should_equal 1 + m4 = materialize r4 + m4.columns.length . should_equal 1 + m4.columns.first.at 0 . should_equal pos_inf + + Test.specify "on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| + t1 = table_builder [["X", [Nothing, neg_inf, 2, 3, 4, pos_inf]]] + r1 = t1.aggregate [Percentile 0.3 "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.at 0 . should_equal 2.2 + + t2 = table_builder [["X", [Nothing, neg_inf, neg_inf, 3, 4, pos_inf]]] + r2 = t2.aggregate [Percentile 0.25 "X"] + r2.row_count.should_equal 1 + m2 = materialize r2 + m2.columns.length . should_equal 1 + m2.columns.first.at 0 . should_equal neg_inf + + t3 = table_builder [["X", [Nothing, neg_inf, neg_inf, pos_inf, pos_inf, pos_inf]]] + r3 = t3.aggregate [Percentile 0.3 "X"] + r3.row_count.should_equal 1 + m3 = materialize r3 + m3.columns.length . should_equal 1 + expect_null_or_nan <| m3.columns.first.at 0 + + Test.specify "on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| + t1 = table_builder [["X", [neg_inf, 1]]] + r1 = t1.aggregate [Standard_Deviation "X" (population=True), Standard_Deviation "X" (population=False)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 2 + expect_null_or_nan <| m1.columns.first.at 0 + expect_null_or_nan <| m1.columns.second.at 0 + + Test.group prefix+"Table.aggregate should correctly handle NaN" pending=(resolve_pending test_selection.nan pending) <| + nan = 0.log 0 + Test.specify "on Average" <| + t1 = table_builder [["X", [Nothing, nan, 0, 1, 2]]] + r1 = t1.aggregate [Average "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + Double.isNaN (m1.columns.first.at 0) . should_be_true + + Test.specify "on Median" (pending = resolve_pending test_selection.advanced_stats) <| + t1 = table_builder [["X", [Nothing, nan, 0, 1, 2]]] + r1 = t1.aggregate [Median "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + Double.isNaN (m1.columns.first.at 0) . should_be_true + + Test.specify "on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| + t1 = table_builder [["X", [Nothing, nan, 0, 1, 2, 4, 5]]] + r1 = t1.aggregate [Percentile 0.3 "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + Double.isNaN (m1.columns.first.at 0) . should_be_true + + Test.specify "on Mode" (pending = resolve_pending test_selection.advanced_stats) <| + t1 = table_builder [["X", [Nothing, nan, nan, nan, nan, 4, 5]]] + r1 = t1.aggregate [Mode "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + Double.isNaN (m1.columns.first.at 0) . should_be_true + + Test.specify "on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| + t1 = table_builder [["X", [Nothing, nan, 0, 1, 2]]] + r1 = t1.aggregate [Standard_Deviation "X" (population=False), Standard_Deviation "X" (population=True)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 2 + Double.isNaN (m1.columns.first.at 0) . should_be_true + Double.isNaN (m1.columns.second.at 0) . should_be_true + + Test.group prefix+"Table.aggregate Mode" (pending = resolve_pending test_selection.advanced_stats pending) <| + Test.specify "should ignore missing values" <| + t1 = table_builder [["X", [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 2, 2, 1]]] + r1 = t1.aggregate [Mode "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.at 0 . should_equal 2 + + Test.group prefix+"Table.aggregate First and Last" pending=pending <| + Test.specify "should not return the same value for groups with different values but equal ordering keys" (pending = resolve_pending test_selection.first_last) <| + t1 = table_builder [["G", ["a", "a"]], ["X", [1, 2]]] + order = By_Name ["G"] + r1 = t1.aggregate [First "X" (order_by=order), Last "X" (order_by=order)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 2 + first = m1.columns.first.at 0 + last = m1.columns.second.at 0 + (first != last).should_be_true + + Test.group prefix+"Table.aggregate" pending=pending <| + Test.specify "should work even if no aggregations apart from groupings are specified" <| + table = table_builder [["A", [1, 1, 2, 1]], ["B", [3, 2, 2, 3]], ["C", [11, 12, 13, 14]]] + grouped = table.aggregate [Group_By "B", Group_By "A"] + grouped.row_count . should_equal 3 + materialized = materialize grouped . sort ["A", "B"] + materialized.columns.length . should_equal 2 + materialized.columns.at 1 . name . should_equal "A" + materialized.columns.at 1 . to_vector . should_equal [1, 1, 2] + materialized.columns.at 0 . name . should_equal "B" + materialized.columns.at 0 . to_vector . should_equal [2, 3, 2] + + if test_selection.first_last && test_selection.first_last_row_order.not then + Test.specify "should report a warning and ignore problematic columns if a feature is not supported" <| + table = table_builder [["A", [1,2,Nothing,3]]] + action = table.aggregate [Sum "A", First "A", Last "A"] on_problems=_ + tester result = + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.first.name . should_equal "Sum A" + materialized.columns.first.to_vector . should_equal [6] + problems = [Unsupported_Database_Operation_Error "`First` aggregation requires at least one `order_by` column.", Unsupported_Database_Operation_Error "`Last` aggregation requires at least one `order_by` column."] + Problems.test_problem_handling action problems tester + + Test.group prefix+"Table.aggregate should raise warnings when there are issues" pending=(resolve_pending test_selection.problem_handling pending) <| table = col1 = ["Index", [1, 2, 3]] col2 = ["Value", [1, 2, 3]] - Table.new [col1, col2] + table_builder [col1, col2] Test.specify "should raise a warning when there are no output columns" <| action = table.aggregate [] on_problems=_ @@ -828,10 +1173,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test tester = expect_column_names [] Problems.test_problem_handling action problems tester - aggregate_pending = case pending.is_nothing of - False -> pending - True -> if test_selection.aggregation_problems.not then "Not supported." - Test.group prefix+"Table.aggregate should raise warnings when there are issues computing aggregation" pending=aggregate_pending <| + Test.group prefix+"Table.aggregate should raise warnings when there are issues computing aggregation" pending=(resolve_pending test_selection.aggregation_problems pending) <| table = col1 = ["Index", [1, 2, 3]] col2 = ["Value", [1, 2, 3.1]] @@ -900,7 +1242,7 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test tester = expect_column_names ["Maximum Mixed"] Problems.test_problem_handling action problems tester - Test.group prefix+"Table.aggregate should merge warnings when issues computing aggregation" pending=aggregate_pending <| + Test.group prefix+"Table.aggregate should merge warnings when issues computing aggregation" pending=(resolve_pending test_selection.aggregation_problems pending) <| table = col1 = ["Key", ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O"]] col2 = ["Value", [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5]] @@ -921,4 +1263,58 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test problems.at 0 . is_an Floating_Point_Grouping . should_be_true problems.at 0 . rows . length . should_equal 9 + if is_database then + Test.group prefix+"Table.aggregate should report unsupported operations but not block other aggregations in warning mode" pending=pending <| + expect_sum_and_unsupported_errors error_count result = + result.columns.length . should_equal 1 + result.row_count . should_equal 1 + result.columns.first.to_vector . should_equal [6] + warnings = Warning.get_all result . map .value + warnings.length . should_equal error_count + warnings.each warning-> + warning.should_be_an Unsupported_Database_Operation_Error + + if test_selection.first_last_row_order.not then + Test.specify "with First and Last in row order" <| + table = table_builder [["X", [1,2,3]]] + expect_sum_and_unsupported_errors 2 <| + table.aggregate [Sum "X", First "X", Last "X"] + + if test_selection.first_last.not then + Test.specify "with First and Last with ordering" <| + table = table_builder [["A", [3,2,1]], ["X", [1,2,3]]] + order = By_Name ["A"] + expect_sum_and_unsupported_errors 2 <| + table.aggregate [Sum "X", First "X" (order_by=order), Last "X" (order_by=order)] + + if test_selection.advanced_stats.not then + Test.specify "with Median, Mode and Percentile" <| + table = table_builder [["X", [1,2,3]]] + expect_sum_and_unsupported_errors 3 <| + table.aggregate [Sum "X", Median "X", Mode "X", Percentile 0.3 "X"] + + if test_selection.std_dev.not then + Test.specify "with Standard_Deviation" <| + table = table_builder [["X", [1,2,3]]] + expect_sum_and_unsupported_errors 1 <| + table.aggregate [Sum "X", Standard_Deviation "X"] + + if test_selection.text_shortest_longest.not then + Test.specify "with Shortest and Longest" <| + table = table_builder [["X", [1,2,3]], ["Y", ["a", "bb", "ccc"]]] + expect_sum_and_unsupported_errors 2 <| + table.aggregate [Sum "X", Shortest "Y", Longest "Y"] + + if test_selection.text_concat.not then + Test.specify "with Concatenate" <| + table = table_builder [["X", [1,2,3]], ["Y", ["a", "bb", "ccc"]]] + expect_sum_and_unsupported_errors 1 <| + table.aggregate [Sum "X", Concatenate "Y"] + + if test_selection.multi_distinct.not then + Test.specify "with Count_Distinct on multiple fields" <| + table = table_builder [["X", [1,2,3]], ["Y", ["a", "bb", "ccc"]]] + expect_sum_and_unsupported_errors 1 <| + table.aggregate [Sum "X", Count_Distinct (By_Name ["X", "Y"])] + main = Test.Suite.run_main here.spec diff --git a/test/Table_Tests/src/Common_Table_Spec.enso b/test/Table_Tests/src/Common_Table_Spec.enso index f0865f3fa79d..017e4287b517 100644 --- a/test/Table_Tests/src/Common_Table_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Spec.enso @@ -26,13 +26,13 @@ from Standard.Table.Data.Position as Position_Module import all spec : Text -> (Vector -> Any) -> Boolean -> Text -> Nothing spec prefix table_builder supports_case_sensitive_columns pending=Nothing = table = - col1 = ["foo", Integer, [1,2,3]] - col2 = ["bar", Integer, [4,5,6]] - col3 = ["Baz", Integer, [7,8,9]] - col4 = ["foo_1", Integer, [10,11,12]] - col5 = ["foo_2", Integer, [13,14,15]] - col6 = ["ab.+123", Integer, [16,17,18]] - col7 = ["abcd123", Integer, [19,20,21]] + col1 = ["foo", [1,2,3]] + col2 = ["bar", [4,5,6]] + col3 = ["Baz", [7,8,9]] + col4 = ["foo_1", [10,11,12]] + col5 = ["foo_2", [13,14,15]] + col6 = ["ab.+123", [16,17,18]] + col7 = ["abcd123", [19,20,21]] table_builder [col1, col2, col3, col4, col5, col6, col7] expect_column_names names table = @@ -67,9 +67,9 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = if supports_case_sensitive_columns then Test.specify "should correctly handle exact matches matching multiple names due to case insensitivity" <| table = - col1 = ["foo", Integer, [1,2,3]] - col2 = ["bar", Integer, [4,5,6]] - col3 = ["Bar", Integer, [7,8,9]] + col1 = ["foo", [1,2,3]] + col2 = ["bar", [4,5,6]] + col3 = ["Bar", [7,8,9]] table_builder [col1, col2, col3] expect_column_names ["bar", "Bar"] <| table.select_columns (By_Name ["bar"] (Text_Matcher Case_Insensitive.new)) @@ -122,7 +122,7 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: unmatched columns" <| - table_2 = table_builder [["foo", Integer, [0,0,0]], ["weird_column", Integer, [0,0,0]]] + table_2 = table_builder [["foo", [0,0,0]], ["weird_column", [0,0,0]]] foo = table_2.at "foo" weird_column = table_2.at "weird_column" bar = table.at "bar" @@ -177,9 +177,9 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = if supports_case_sensitive_columns then Test.specify "should correctly handle exact matches matching multiple names due to case insensitivity" <| table = - col1 = ["foo", Integer, [1,2,3]] - col2 = ["bar", Integer, [4,5,6]] - col3 = ["Bar", Integer, [7,8,9]] + col1 = ["foo", [1,2,3]] + col2 = ["bar", [4,5,6]] + col3 = ["Bar", [7,8,9]] table_builder [col1, col2, col3] expect_column_names ["foo"] <| table.remove_columns (By_Name ["bar"] (Text_Matcher Case_Insensitive.new)) @@ -231,7 +231,7 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: unmatched columns" <| - table_2 = table_builder [["foo", Integer, [0,0,0]], ["weird_column", Integer, [0,0,0]]] + table_2 = table_builder [["foo", [0,0,0]], ["weird_column", [0,0,0]]] foo = table_2.at "foo" weird_column = table_2.at "weird_column" bar = table.at "bar" @@ -286,9 +286,9 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = if supports_case_sensitive_columns then Test.specify "should correctly handle exact matches matching multiple names due to case insensitivity" <| table = - col1 = ["foo", Integer, [1,2,3]] - col2 = ["bar", Integer, [4,5,6]] - col3 = ["Bar", Integer, [7,8,9]] + col1 = ["foo", [1,2,3]] + col2 = ["bar", [4,5,6]] + col3 = ["Bar", [7,8,9]] table_builder [col1, col2, col3] expect_column_names ["bar", "Bar", "foo"] <| table.reorder_columns (By_Name ["bar"] (Text_Matcher Case_Insensitive.new)) @@ -340,7 +340,7 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: unmatched columns" <| - table_2 = table_builder [["foo", Integer, [0,0,0]], ["weird_column", Integer, [0,0,0]]] + table_2 = table_builder [["foo", [0,0,0]], ["weird_column", [0,0,0]]] foo = table_2.at "foo" weird_column = table_2.at "weird_column" bar = table.at "bar" @@ -359,13 +359,13 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = Test.group prefix+"Table.sort_columns" pending=pending <| table = - col1 = ["foo_21", Integer, [1,2,3]] - col2 = ["foo_100", Integer, [4,5,6]] - col3 = ["foo_1", Integer, [7,8,9]] - col4 = ["Foo_2", Integer, [10,11,12]] - col5 = ["foo_3", Integer, [13,14,15]] - col6 = ["foo_001", Integer, [16,17,18]] - col7 = ["bar", Integer, [19,20,21]] + col1 = ["foo_21", [1,2,3]] + col2 = ["foo_100", [4,5,6]] + col3 = ["foo_1", [7,8,9]] + col4 = ["Foo_2", [10,11,12]] + col5 = ["foo_3", [13,14,15]] + col6 = ["foo_001", [16,17,18]] + col7 = ["bar", [19,20,21]] table_builder [col1, col2, col3, col4, col5, col6, col7] Test.specify "should work as shown in the doc examples" <| @@ -387,10 +387,10 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = Test.group prefix+"Table.rename_columns" pending=pending <| table = - col1 = ["alpha", Integer, [1,2,3]] - col2 = ["beta", Integer, [4,5,6]] - col3 = ["gamma", Integer, [16,17,18]] - col4 = ["delta", Integer, [19,20,21]] + col1 = ["alpha", [1,2,3]] + col2 = ["beta", [4,5,6]] + col3 = ["gamma", [16,17,18]] + col4 = ["delta", [19,20,21]] table_builder [col1, col2, col3, col4] Test.specify "should work as shown in the doc examples" <| diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index e15047481c5d..a4525aa1703b 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -83,7 +83,7 @@ spec = contains = b.contains "inf" ends.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (\'%\' || ?)) AS "B" FROM "T1" AS "T1"', [["suf", str]]] starts.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (? || \'%\')) AS "B" FROM "T1" AS "T1"', [["pref", str]]] - contains.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (\'%\' || ? || \'%\')) AS "B" FROM "T1" AS "T1"', [["inf", str]]] + contains.to_sql.prepare . should_equal ['SELECT instr("T1"."B", ?) > 0 AS "B" FROM "T1" AS "T1"', [["inf", str]]] Test.group "[Codegen] Masking Tables and Columns" <| Test.specify "should allow filtering table rows based on a boolean expression" <| diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index f52c54fb9d27..228c03471661 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -376,30 +376,6 @@ spec prefix connection pending=Nothing = t2.at "Count Not Nothing price" . to_vector . should_equal [11] t2.at "Count Nothing price" . to_vector . should_equal [5] - Test.specify "should allow to count distinct values" <| - aggregates = [Count_Distinct "quantity", Count_Distinct "price" (ignore_nothing=True), Count_Distinct "price" (ignore_nothing=False)] - - t1 = determinize_by "name" (t.aggregate [Group_By "name"]+aggregates . to_dataframe) - t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"] - # t1.at "Count Distinct quantity" . to_vector . should_equal [2, 1, 3, 0] - # TODO - - t2 = t.aggregate aggregates . to_dataframe - t2 . at "Count Distinct quantity" . to_vector . should_equal [10] - t2 . at "Count Distinct price" . to_vector . should_equal [7] - #t2 . at "Count Distinct price 2" . to_vector . should_equal [8] - - Test.specify "should allow to count distinct values over multiple fields" pending="TODO" <| - aggregates = [Count_Distinct ["price", "quantity"]] - - t1 = determinize_by "name" (t.aggregate [Group_By "name"]+aggregates . to_dataframe) - t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"] - # t1.at "Count Distinct quantity" . to_vector . should_equal [2, 1, 3, 0] - # TODO - - t2 = t.aggregate aggregates . to_dataframe - t2 . at "Count Distinct price quantity" . to_vector . should_equal [13] - Test.specify "should allow simple arithmetic aggregations" <| aggregates = [Sum "price" Nothing, Sum "quantity" Nothing, Average "price" Nothing] ## TODO can check the datatypes diff --git a/test/Table_Tests/src/Database/Postgresql_Spec.enso b/test/Table_Tests/src/Database/Postgresql_Spec.enso index 0b353e56a777..133222a9c038 100644 --- a/test/Table_Tests/src/Database/Postgresql_Spec.enso +++ b/test/Table_Tests/src/Database/Postgresql_Spec.enso @@ -9,6 +9,8 @@ import project.Database.Common_Spec import project.Database.Helpers.Name_Generator import project.Common_Table_Spec import project.Aggregate_Spec +from Standard.Table.Data.Aggregate_Column import all +from Standard.Database.Data.Sql import Sql_Type postgres_specific_spec connection pending = Test.group "[PostgreSQL] Info" pending=pending <| @@ -18,11 +20,19 @@ postgres_specific_spec connection pending = t.insert ["a", Nothing, False, 1.2, 0.000000000001] t.insert ["abc", Nothing, Nothing, 1.3, Nothing] t.insert ["def", 42, True, 1.4, 10] + Test.specify "should return Table information" <| i = t.info i.index . to_vector . should_equal ["strs", "ints", "bools", "reals", "doubles"] i.at "Items Count" . to_vector . should_equal [3, 1, 2, 3, 2] i.at "SQL Type" . to_vector . should_equal ["varchar", "int4", "bool", "float4", "float8"] + + Test.specify "should return Table information, also for aggregated results" <| + i = t.aggregate [Concatenate "strs", Sum "ints", Count_Distinct "bools"] . info + i.index . to_vector . should_equal ["Concatenate strs", "Sum ints", "Count Distinct bools"] + i.at "Items Count" . to_vector . should_equal [1, 1, 1] + i.at "SQL Type" . to_vector . should_equal ["VARCHAR", "BIGINT", "BIGINT"] + Test.specify "should infer standard types correctly" <| t.at "strs" . sql_type . is_definitely_text . should_be_true t.at "ints" . sql_type . is_definitely_integer . should_be_true @@ -30,6 +40,42 @@ postgres_specific_spec connection pending = t.at "reals" . sql_type . is_definitely_double . should_be_true connection.execute_update 'DROP TABLE "'+tinfo+'"' + Test.group "[PostgreSQL] Table.aggregate should correctly infer result types" pending=pending <| + name = Name_Generator.random_name "Ttypes" + connection.execute_update 'CREATE TEMPORARY TABLE "'+name+'" ("txt" VARCHAR, "i1" SMALLINT, "i2" INT, "i3" BIGINT, "i4" NUMERIC, "r1" REAL, "r2" DOUBLE PRECISION, "bools" BOOLEAN)' + t = connection.access_table name + Test.specify "Concatenate, Shortest and Longest" <| + r = t.aggregate [Concatenate "txt", Shortest "txt", Longest "txt"] + r.columns.at 0 . sql_type . should_equal Sql_Type.text + r.columns.at 1 . sql_type . should_equal Sql_Type.text + r.columns.at 2 . sql_type . should_equal Sql_Type.text + + Test.specify "Counts" <| + r = t.aggregate [Count, Count_Empty "txt", Count_Not_Empty "txt", Count_Distinct "i1", Count_Not_Nothing "i2", Count_Nothing "i3"] + r.columns.length . should_equal 6 + r.columns.each column-> + column.sql_type . should_equal Sql_Type.bigint + + Test.specify "Sum" <| + r = t.aggregate [Sum "i1", Sum "i2", Sum "i3", Sum "i4", Sum "r1", Sum "r2"] + r.columns.at 0 . sql_type . should_equal Sql_Type.bigint + r.columns.at 1 . sql_type . should_equal Sql_Type.bigint + r.columns.at 2 . sql_type . should_equal Sql_Type.numeric + r.columns.at 3 . sql_type . should_equal Sql_Type.numeric + r.columns.at 4 . sql_type . should_equal Sql_Type.real + r.columns.at 5 . sql_type . should_equal Sql_Type.double + + Test.specify "Average" <| + r = t.aggregate [Average "i1", Average "i2", Average "i3", Average "i4", Average "r1", Average "r2"] + r.columns.at 0 . sql_type . should_equal Sql_Type.numeric + r.columns.at 1 . sql_type . should_equal Sql_Type.numeric + r.columns.at 2 . sql_type . should_equal Sql_Type.numeric + r.columns.at 3 . sql_type . should_equal Sql_Type.numeric + r.columns.at 4 . sql_type . should_equal Sql_Type.double + r.columns.at 5 . sql_type . should_equal Sql_Type.double + + connection.execute_update 'DROP TABLE "'+name+'"' + run_tests connection pending=Nothing = prefix = "[PostgreSQL] " name_counter = Ref.new 0 @@ -39,10 +85,11 @@ run_tests connection pending=Nothing = Ref.put name_counter ix+1 name = Name_Generator.random_name "table_"+ix.to_text - in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2] - table = connection.upload_table name in_mem_table - tables.append name - table + in_mem_table = Materialized_Table.new columns + case connection.upload_table name in_mem_table of + table -> + tables.append name + table clean_tables table_names = table_names.each name-> sql = 'DROP TABLE "' + name + '"' @@ -52,14 +99,14 @@ run_tests connection pending=Nothing = here.postgres_specific_spec connection pending=pending Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=True pending=pending - selection = Aggregate_Spec.Test_Selection text_shortest_longest=True first_last_row_order=False aggregation_problems=False + selection = Aggregate_Spec.Test_Selection first_last_row_order=False aggregation_problems=False agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table tables.append agg_table.name empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) tables.append empty_agg_table.name materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection pending=pending + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=True selection pending=pending clean_tables tables.to_vector diff --git a/test/Table_Tests/src/Database/Redshift_Spec.enso b/test/Table_Tests/src/Database/Redshift_Spec.enso index 0a74df6ff48b..39bb0ab41ab9 100644 --- a/test/Table_Tests/src/Database/Redshift_Spec.enso +++ b/test/Table_Tests/src/Database/Redshift_Spec.enso @@ -39,10 +39,11 @@ run_tests connection pending=Nothing = Ref.put name_counter ix+1 name = Name_Generator.random_name "table_"+ix.to_text - in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2] - table = connection.upload_table name in_mem_table - tables.append name - table + in_mem_table = Materialized_Table.new columns + case connection.upload_table name in_mem_table of + table -> + tables.append name + table clean_tables table_names = table_names.each name-> sql = 'DROP TABLE "' + name + '"' @@ -59,7 +60,7 @@ run_tests connection pending=Nothing = empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) tables.append empty_agg_table.name materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection pending=pending + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=True selection pending=pending clean_tables tables.to_vector diff --git a/test/Table_Tests/src/Database/Sqlite_Spec.enso b/test/Table_Tests/src/Database/Sqlite_Spec.enso index 3231e4609b7a..94408132a7fe 100644 --- a/test/Table_Tests/src/Database/Sqlite_Spec.enso +++ b/test/Table_Tests/src/Database/Sqlite_Spec.enso @@ -17,7 +17,7 @@ sqlite_specific_spec connection = action = connection.execute_query "SELECT A FROM undefined_table" action . should_fail_with Sql_Error - action.catch.to_text . should_equal "There was an SQL error: '[SQLITE_ERROR] SQL error or missing database (no such table: undefined_table)'." + action.catch.to_text . should_equal "There was an SQL error: '[SQLITE_ERROR] SQL error or missing database (no such table: undefined_table)'. [Query was: SELECT A FROM undefined_table]" Test.group "[SQLite] Metadata" <| tinfo = Name_Generator.random_name "Tinfo" @@ -55,26 +55,27 @@ spec = Ref.put name_counter ix+1 name = Name_Generator.random_name "table_"+ix.to_text - in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2] + in_mem_table = Materialized_Table.new columns connection.upload_table name in_mem_table Common_Spec.spec prefix connection here.sqlite_specific_spec connection Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=False - ## For now `advanced_stats` remain disabled, because SQLite does not provide - aggregate functions for median, mode and percentile and emulating them is - highly problematic. We can rethink in the future how these could be - emulated. Two of the possible solutions are: + ## For now `advanced_stats`, `first_last`, `text_shortest_longest` and + `multi_distinct` remain disabled, because SQLite does not provide the + needed aggregate functions and emulating them is highly problematic. + We can rethink in the future how these could be emulated. Two of the + possible solutions are: - creating complex nested queries using NTILE to compute the stats, - compiling SQLite library on our own and adding native extensions for the missing statistics. - selection = Aggregate_Spec.Test_Selection advanced_stats=False text_shortest_longest=False first_last=False first_last_row_order=False multi_distinct=False aggregation_problems=False + selection = Aggregate_Spec.Test_Selection advanced_stats=False text_shortest_longest=False first_last=False first_last_row_order=False multi_distinct=False aggregation_problems=False nan=False agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=True selection connection.close file.delete diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 449f86f5a17c..4ad55c4f8944 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -635,10 +635,7 @@ spec = t_3 = Table.new [c_3_1, c_3_2, c_3_3] t_3.default_visualization.should_equal Visualization.Id.table - table_builder columns = - Table.new <| columns.map description-> [description.at 0, description.at 2] - - Common_Table_Spec.spec "[In-Memory] " table_builder supports_case_sensitive_columns=True + Common_Table_Spec.spec "[In-Memory] " Table.new supports_case_sensitive_columns=True Test.group "Use First Row As Names" <| expect_column_names names table = diff --git a/test/Tests/src/Data/Numbers_Spec.enso b/test/Tests/src/Data/Numbers_Spec.enso index 3d939b819bfb..51323d20b6ea 100644 --- a/test/Tests/src/Data/Numbers_Spec.enso +++ b/test/Tests/src/Data/Numbers_Spec.enso @@ -262,4 +262,32 @@ spec = almost_max_long_times_three_decimal.ceil.to_decimal . should_equal almost_max_long_times_three_plus_1.to_decimal almost_max_long_times_three_plus_1.ceil . should_equal almost_max_long_times_three_plus_1 + Test.specify "should expose a NaN value" <| + Number.nan.is_nan . should_be_true + 0.is_nan . should_be_false + Number.positive_infinity.is_nan . should_be_false + Number.negative_infinity.is_nan . should_be_false + + Number.nan==Number.nan . should_be_false + Number.nan==0 . should_be_false + Number.nan!=Number.nan . should_be_true + + Test.specify "should support inexact equality comparisons" <| + 1.0001 . equals 1.0002 epsilon=0.01 . should_be_true + 1.0001 . equals 1.0002 epsilon=0.0000001 . should_be_false + + 1 . equals 2 . should_be_false + 1 . equals (0+1) . should_be_true + + Number.positive_infinity . equals Number.positive_infinity . should_be_true + + Number.negative_infinity . equals Number.negative_infinity . should_be_true + Number.negative_infinity . equals Number.positive_infinity . should_be_false + + Number.negative_infinity . should_equal (-Number.positive_infinity) + Number.negative_infinity . equals (-Number.positive_infinity) . should_be_true + + Number.nan . equals Number.nan . should_be_false + Number.nan . equals 0 . should_be_false + main = Test.Suite.run_main here.spec