diff --git a/datafusion/core/tests/data/partitioned_csv/partition-0.csv b/datafusion/core/tests/data/partitioned_csv/partition-0.csv new file mode 100644 index 000000000000..f54554228d67 --- /dev/null +++ b/datafusion/core/tests/data/partitioned_csv/partition-0.csv @@ -0,0 +1,11 @@ +0,0,true +0,1,false +0,2,true +0,3,false +0,4,true +0,5,false +0,6,true +0,7,false +0,8,true +0,9,false +0,10,true diff --git a/datafusion/core/tests/data/partitioned_csv/partition-1.csv b/datafusion/core/tests/data/partitioned_csv/partition-1.csv new file mode 100644 index 000000000000..b8977f1c58d4 --- /dev/null +++ b/datafusion/core/tests/data/partitioned_csv/partition-1.csv @@ -0,0 +1,11 @@ +1,0,true +1,1,false +1,2,true +1,3,false +1,4,true +1,5,false +1,6,true +1,7,false +1,8,true +1,9,false +1,10,true diff --git a/datafusion/core/tests/data/partitioned_csv/partition-2.csv b/datafusion/core/tests/data/partitioned_csv/partition-2.csv new file mode 100644 index 000000000000..7e42564aba59 --- /dev/null +++ b/datafusion/core/tests/data/partitioned_csv/partition-2.csv @@ -0,0 +1,11 @@ +2,0,true +2,1,false +2,2,true +2,3,false +2,4,true +2,5,false +2,6,true +2,7,false +2,8,true +2,9,false +2,10,true diff --git a/datafusion/core/tests/data/partitioned_csv/partition-3.csv b/datafusion/core/tests/data/partitioned_csv/partition-3.csv new file mode 100644 index 000000000000..29aabba4acbd --- /dev/null +++ b/datafusion/core/tests/data/partitioned_csv/partition-3.csv @@ -0,0 +1,11 @@ +3,0,true +3,1,false +3,2,true +3,3,false +3,4,true +3,5,false +3,6,true +3,7,false +3,8,true +3,9,false +3,10,true diff --git a/datafusion/core/tests/sql/order.rs b/datafusion/core/tests/sql/order.rs index 8f08612459c3..2388eebef931 100644 --- a/datafusion/core/tests/sql/order.rs +++ b/datafusion/core/tests/sql/order.rs @@ -18,199 +18,6 @@ use super::*; use test_utils::{batches_to_vec, partitions_to_sorted_vec}; -#[tokio::test] -async fn test_sort_unprojected_col() -> Result<()> { - let ctx = SessionContext::new(); - register_alltypes_parquet(&ctx).await; - // execute the query - let sql = "SELECT id FROM alltypes_plain ORDER BY int_col, double_col"; - let actual = execute_to_batches(&ctx, sql).await; - #[rustfmt::skip] - let expected = vec![ - "+----+", - "| id |", - "+----+", - "| 4 |", - "| 6 |", - "| 2 |", - "| 0 |", - "| 5 |", - "| 7 |", - "| 3 |", - "| 1 |", - "+----+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn test_order_by_agg_expr() -> Result<()> { - let ctx = SessionContext::new(); - register_aggregate_csv(&ctx).await?; - let sql = "SELECT MIN(c12) FROM aggregate_test_100 ORDER BY MIN(c12)"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-----------------------------+", - "| MIN(aggregate_test_100.c12) |", - "+-----------------------------+", - "| 0.01479305307777301 |", - "+-----------------------------+", - ]; - assert_batches_eq!(expected, &actual); - - let sql = "SELECT MIN(c12) FROM aggregate_test_100 ORDER BY MIN(c12) + 0.1"; - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn test_nulls_first_asc() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-----+--------+", - "| num | letter |", - "+-----+--------+", - "| 1 | one |", - "| 2 | two |", - "| | three |", - "+-----+--------+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn test_nulls_first_desc() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-----+--------+", - "| num | letter |", - "+-----+--------+", - "| | three |", - "| 2 | two |", - "| 1 | one |", - "+-----+--------+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn test_specific_nulls_last_desc() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC NULLS LAST"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-----+--------+", - "| num | letter |", - "+-----+--------+", - "| 2 | two |", - "| 1 | one |", - "| | three |", - "+-----+--------+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn test_specific_nulls_first_asc() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num ASC NULLS FIRST"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-----+--------+", - "| num | letter |", - "+-----+--------+", - "| | three |", - "| 1 | one |", - "| 2 | two |", - "+-----+--------+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn sort() -> Result<()> { - let results = - partitioned_csv::execute("SELECT c1, c2 FROM test ORDER BY c1 DESC, c2 ASC", 4) - .await?; - assert_eq!(results.len(), 1); - - let expected: Vec<&str> = vec![ - "+----+----+", - "| c1 | c2 |", - "+----+----+", - "| 3 | 1 |", - "| 3 | 2 |", - "| 3 | 3 |", - "| 3 | 4 |", - "| 3 | 5 |", - "| 3 | 6 |", - "| 3 | 7 |", - "| 3 | 8 |", - "| 3 | 9 |", - "| 3 | 10 |", - "| 2 | 1 |", - "| 2 | 2 |", - "| 2 | 3 |", - "| 2 | 4 |", - "| 2 | 5 |", - "| 2 | 6 |", - "| 2 | 7 |", - "| 2 | 8 |", - "| 2 | 9 |", - "| 2 | 10 |", - "| 1 | 1 |", - "| 1 | 2 |", - "| 1 | 3 |", - "| 1 | 4 |", - "| 1 | 5 |", - "| 1 | 6 |", - "| 1 | 7 |", - "| 1 | 8 |", - "| 1 | 9 |", - "| 1 | 10 |", - "| 0 | 1 |", - "| 0 | 2 |", - "| 0 | 3 |", - "| 0 | 4 |", - "| 0 | 5 |", - "| 0 | 6 |", - "| 0 | 7 |", - "| 0 | 8 |", - "| 0 | 9 |", - "| 0 | 10 |", - "+----+----+", - ]; - - // Note it is important to NOT use assert_batches_sorted_eq - // here as we are testing the sortedness of the output - assert_batches_eq!(expected, &results); - - Ok(()) -} - -#[tokio::test] -async fn sort_empty() -> Result<()> { - // The predicate on this query purposely generates no results - let results = partitioned_csv::execute( - "SELECT c1, c2 FROM test WHERE c1 > 100000 ORDER BY c1 DESC, c2 ASC", - 4, - ) - .await - .unwrap(); - assert_eq!(results.len(), 0); - Ok(()) -} - #[tokio::test] async fn sort_with_lots_of_repetition_values() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/core/tests/sqllogictests/test_files/order.slt b/datafusion/core/tests/sqllogictests/test_files/order.slt new file mode 100644 index 000000000000..5afdd013125f --- /dev/null +++ b/datafusion/core/tests/sqllogictests/test_files/order.slt @@ -0,0 +1,165 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +statement ok +CREATE EXTERNAL TABLE alltypes_plain STORED AS PARQUET LOCATION '../../parquet-testing/data/alltypes_plain.parquet'; + +statement ok +CREATE EXTERNAL TABLE aggregate_test_100 ( + c1 VARCHAR NOT NULL, + c2 TINYINT NOT NULL, + c3 SMALLINT NOT NULL, + c4 SMALLINT, + c5 INT, + c6 BIGINT NOT NULL, + c7 SMALLINT NOT NULL, + c8 INT NOT NULL, + c9 BIGINT UNSIGNED NOT NULL, + c10 VARCHAR NOT NULL, + c11 FLOAT NOT NULL, + c12 DOUBLE NOT NULL, + c13 VARCHAR NOT NULL +) +STORED AS CSV +WITH HEADER ROW +LOCATION '../../testing/data/csv/aggregate_test_100.csv' + +# test_sort_unprojected_col +query I +SELECT id FROM alltypes_plain ORDER BY int_col, double_col +---- +4 +6 +2 +0 +5 +7 +3 +1 + + +# test_order_by_agg_expr +query R +SELECT MIN(c12) FROM aggregate_test_100 ORDER BY MIN(c12) +---- +0.014793053078 + +# test_nulls_first_asc + +query RC +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num +---- +1 one +2 two +NULL three + +# test_nulls_first_desc() + +query RC +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC +---- +NULL three +2 two +1 one + +# test_specific_nulls_last_desc + +query RC +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC NULLS LAST +---- +2 two +1 one +NULL three + +# test_specific_nulls_first_asc +query RT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num ASC NULLS FIRST +---- +NULL three +1 one +2 two + +# sort + +statement ok +CREATE EXTERNAL TABLE test (c1 int, c2 bigint, c3 boolean) +STORED AS CSV LOCATION 'tests/data/partitioned_csv'; + +# Demonstrate types +query RC +SELECT arrow_typeof(c1), arrow_typeof(c2), arrow_typeof(c3) FROM test LIMIT 1; +---- +Int32 Int64 Boolean + + +query RC +SELECT c1, c2 FROM test ORDER BY c1 DESC, c2 ASC +---- +3 0 +3 1 +3 2 +3 3 +3 4 +3 5 +3 6 +3 7 +3 8 +3 9 +3 10 +2 0 +2 1 +2 2 +2 3 +2 4 +2 5 +2 6 +2 7 +2 8 +2 9 +2 10 +1 0 +1 1 +1 2 +1 3 +1 4 +1 5 +1 6 +1 7 +1 8 +1 9 +1 10 +0 0 +0 1 +0 2 +0 3 +0 4 +0 5 +0 6 +0 7 +0 8 +0 9 +0 10 + + + +# sort_empty +# The predicate on this query purposely generates no results + +query RC +SELECT c1, c2 FROM test WHERE c1 > 100000 ORDER BY c1 DESC, c2 ASC +----