Skip to content

Commit

Permalink
Merge branch 'main' into segtree-finalize
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkfish committed Jul 17, 2024
2 parents 5f7c30c + 3de0fd5 commit 630600f
Show file tree
Hide file tree
Showing 267 changed files with 4,198 additions and 1,466 deletions.
13 changes: 13 additions & 0 deletions .github/patches/extensions/substrait/private_column_ids.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/src/to_substrait.cpp b/src/to_substrait.cpp
index ece0f5d..468fa36 100644
--- a/src/to_substrait.cpp
+++ b/src/to_substrait.cpp
@@ -1188,7 +1188,7 @@ substrait::Rel *DuckDBToSubstrait::TransformGet(LogicalOperator &dop) {
auto select = new substrait::Expression_MaskExpression_StructSelect();
for (auto col_idx : dget.projection_ids) {
auto struct_item = select->add_struct_items();
- struct_item->set_field((int32_t)dget.column_ids[col_idx]);
+ struct_item->set_field((int32_t)dget.GetColumnIds()[col_idx]);
// FIXME do we need to set the child? if yes, to what?
}
projection->set_allocated_select(select);
15 changes: 15 additions & 0 deletions .github/regression/large/ingestion.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
benchmark/large/ingestion/tpcds/csv/ingest_inventory.benchmark
benchmark/large/ingestion/tpcds/parquet/ingest_inventory.benchmark
benchmark/large/ingestion/tpcds/native/ingest_inventory.benchmark

benchmark/large/ingestion/tpcds/csv/ingest_store_sales.benchmark
benchmark/large/ingestion/tpcds/parquet/ingest_store_sales.benchmark
benchmark/large/ingestion/tpcds/native/ingest_store_sales.benchmark

benchmark/large/ingestion/tpch/csv/ingest_lineitem.benchmark
benchmark/large/ingestion/tpch/parquet/ingest_lineitem.benchmark
benchmark/large/ingestion/tpch/native/ingest_lineitem.benchmark

benchmark/large/ingestion/tpch/csv/ingest_orders.benchmark
benchmark/large/ingestion/tpch/parquet/ingest_orders.benchmark
benchmark/large/ingestion/tpch/native/ingest_orders.benchmark
File renamed without changes.
2 changes: 1 addition & 1 deletion .sanitizer-thread-suppressions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ race:NextRightSemiOrAntiJoin
race:duckdb_moodycamel
race:duckdb_jemalloc
race:AddToEvictionQueue
race:ValidityAppend
race:ValidityAppend
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# name: benchmark/large/ingestion/tpcds/csv/ingest_inventory.benchmark
# description: benchmark ingestion of inventory
# group: [csv]

template benchmark/large/ingestion/tpcds/tpcds_ingestion_persistent.benchmark.in
table_name=inventory
format=csv

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# name: benchmark/large/ingestion/tpcds/csv/ingest_store_sales.benchmark
# description: benchmark ingestion of store_sales
# group: [csv]

template benchmark/large/ingestion/tpcds/tpcds_ingestion_persistent.benchmark.in
table_name=store_sales
format=csv

11 changes: 11 additions & 0 deletions benchmark/large/ingestion/tpcds/load_tpcds_sf10.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CALL dsdgen(sf=10);
EXPORT DATABASE 'duckdb_benchmark_data/tpcds_parquet_sf10' (FORMAT PARQUET);
EXPORT DATABASE 'duckdb_benchmark_data/tpcds_csv_sf10' (FORMAT CSV);

create or replace view store_sales_csv as select * from read_csv('duckdb_benchmark_data/tpcds_csv_sf10/store_sales.csv');
create or replace view store_sales_parquet as select * from read_parquet('duckdb_benchmark_data/tpcds_parquet_sf10/store_sales.parquet');
create or replace view store_sales_native as select * from store_sales;

create or replace view inventory_csv as select * from read_csv('duckdb_benchmark_data/tpcds_csv_sf10/inventory.csv');
create or replace view inventory_parquet as select * from read_parquet('duckdb_benchmark_data/tpcds_parquet_sf10/inventory.parquet');
create or replace view inventory_native as select * from inventory;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# name: benchmark/large/ingestion/tpcds/native/ingest_inventory.benchmark
# description: benchmark ingestion of inventory
# group: [native]

template benchmark/large/ingestion/tpcds/tpcds_ingestion_persistent.benchmark.in
table_name=inventory
format=native

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# name: benchmark/large/ingestion/tpcds/native/ingest_store_sales.benchmark
# description: benchmark ingestion of store_sales
# group: [native]

template benchmark/large/ingestion/tpcds/tpcds_ingestion_persistent.benchmark.in
table_name=store_sales
format=native

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# name: benchmark/large/ingestion/tpcds/parquet/ingest_inventory.benchmark
# description: benchmark ingestion of inventory
# group: [parquet]

template benchmark/large/ingestion/tpcds/tpcds_ingestion_persistent.benchmark.in
table_name=inventory
format=parquet

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# name: benchmark/large/ingestion/tpcds/parquet/ingest_store_sales.benchmark
# description: benchmark ingestion of store_sales
# group: [parquet]

template benchmark/large/ingestion/tpcds/tpcds_ingestion_persistent.benchmark.in
table_name=store_sales
format=parquet

Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# name: ${FILE_PATH}
# description: ${DESCRIPTION}
# group: [ingestion]

name Q${table_name}
group ingestion
subgroup tpcds

require tpcds

require parquet

cache tpcds_sf10_ingest.duckdb

load benchmark/large/ingestion/tpcds/load_tpcds_sf10.sql

# reads table from view created in load script
run
create or replace table ${table_name}_ingested as select * from ${table_name}_${format};

cleanup
drop table ${table_name}_ingested;
7 changes: 7 additions & 0 deletions benchmark/large/ingestion/tpch/csv/ingest_lineitem.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/large/ingestion/tpch/csv/ingest_lineitem.benchmark
# description: benchmark ingestion of lineitem
# group: [csv]

template benchmark/large/ingestion/tpch/tpch_ingestion_persistent.benchmark.in
table_name=lineitem
format=csv
7 changes: 7 additions & 0 deletions benchmark/large/ingestion/tpch/csv/ingest_orders.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/large/ingestion/tpch/csv/ingest_orders.benchmark
# description: benchmark ingestion of orders
# group: [csv]

template benchmark/large/ingestion/tpch/tpch_ingestion_persistent.benchmark.in
table_name=orders
format=csv
11 changes: 11 additions & 0 deletions benchmark/large/ingestion/tpch/load_tpch_sf10.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CALL dbgen(sf=10);
EXPORT DATABASE 'duckdb_benchmark_data/tpch_parquet_sf10' (FORMAT PARQUET);
EXPORT DATABASE 'duckdb_benchmark_data/tpch_csv_sf10' (FORMAT CSV);

create view lineitem_csv as select * from read_csv('duckdb_benchmark_data/tpch_csv_sf10/lineitem.csv');
create view lineitem_parquet as select * from read_parquet('duckdb_benchmark_data/tpch_parquet_sf10/lineitem.parquet');
create view lineitem_native as select * from lineitem;

create view orders_csv as select * from read_csv('duckdb_benchmark_data/tpch_csv_sf10/orders.csv');
create view orders_parquet as select * from read_parquet('duckdb_benchmark_data/tpch_parquet_sf10/orders.parquet');
create view orders_native as select * from orders;
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/large/ingestion/tpch/native/ingest_lineitem.benchmark
# description: benchmark ingestion of lineitem
# group: [native]

template benchmark/large/ingestion/tpch/tpch_ingestion_persistent.benchmark.in
table_name=lineitem
format=native
7 changes: 7 additions & 0 deletions benchmark/large/ingestion/tpch/native/ingest_orders.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/large/ingestion/tpch/native/ingest_orders.benchmark
# description: benchmark ingestion of orders
# group: [native]

template benchmark/large/ingestion/tpch/tpch_ingestion_persistent.benchmark.in
table_name=orders
format=native
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/large/ingestion/tpch/parquet/ingest_lineitem.benchmark
# description: benchmark ingestion of lineitem
# group: [parquet]

template benchmark/large/ingestion/tpch/tpch_ingestion_persistent.benchmark.in
table_name=lineitem
format=parquet
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# name: benchmark/large/ingestion/tpch/parquet/ingest_orders.benchmark
# description: benchmark ingestion of orders
# group: [parquet]

template benchmark/large/ingestion/tpch/tpch_ingestion_persistent.benchmark.in
table_name=orders
format=parquet
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# name: ${FILE_PATH}
# description: ${DESCRIPTION}
# group: [ingestion]

name Q${table_name}
group ingestion
subgroup tpch

require parquet

require tpch

cache tpch_sf10_ingest.duckdb

load benchmark/large/ingestion/tpch/load_tpch_sf10.sql

run
create or replace table ${table_name}_ingested as select * from ${table_name}_${format};

cleanup
DROP TABLE ${table_name}_ingested;
25 changes: 25 additions & 0 deletions benchmark/tpch/parquet/parquet_load_encrypted.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# name: benchmark/tpch/parquet/parquet_load_encrypted.benchmark
# description: Import data from Parquet
# group: [parquet]

name Parquet Data Import (In-Memory)
group parquet
subgroup tpch

require parquet
require tpch
require httpfs

load
CALL dbgen(sf=1);
PRAGMA add_parquet_key('key128', '0123456789112345');
COPY lineitem TO '${BENCHMARK_DIR}/lineitem_encrypted.parquet' (ENCRYPTION_CONFIG {footer_key: 'key128'});
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);

run
COPY lineitem FROM '${BENCHMARK_DIR}/lineitem_encrypted.parquet' (ENCRYPTION_CONFIG {footer_key: 'key128'});

cleanup
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
23 changes: 23 additions & 0 deletions benchmark/tpch/parquet/parquet_q1_encrypted.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# name: benchmark/tpch/parquet/parquet_q1_encrypted.benchmark
# description: Execute Q1 over lineitem stored in a parquet file
# group: [parquet]

name Q1 (Parquet)
group parquet
subgroup tpch

# if httpfs is required, en/decryption is 2x as fast
require httpfs
require parquet
require tpch

load
CALL dbgen(sf=1, suffix='_normal');
PRAGMA add_parquet_key('key256', '01234567891123450123456789112345');
COPY lineitem_normal TO '${BENCHMARK_DIR}/lineitem_encrypted.parquet' (ENCRYPTION_CONFIG {footer_key: 'key256'});
CREATE VIEW lineitem AS SELECT * FROM read_parquet('${BENCHMARK_DIR}/lineitem_encrypted.parquet', encryption_config={footer_key: 'key256'});

run
PRAGMA tpch(1)

result extension/tpch/dbgen/answers/sf1/q01.csv
3 changes: 3 additions & 0 deletions data/csv/headers/empty_1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
a,b,c, ,e
1,2,3,4,5
6,7,8,9,10
3 changes: 3 additions & 0 deletions data/csv/headers/empty_2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
a,b,c,,e
1,2,3,4,5
6,7,8,9,10
3 changes: 3 additions & 0 deletions data/csv/headers/empty_3.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
a,b,c, ,e
1,2,3,4,5
6,7,8,9,10
3 changes: 3 additions & 0 deletions data/csv/headers/empty_4.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
a,b,c, ,e
1,2,3,4,5
6,7,8,9,10
4 changes: 4 additions & 0 deletions data/csv/timestamp_tz.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1/1/2020
1/1/2020
1/1/2020
5/7/1995
Loading

0 comments on commit 630600f

Please sign in to comment.