Skip to content

Commit

Permalink
Merge branch 'main' into fix_polars
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Nov 26, 2024
2 parents bd16ac3 + 9a151d0 commit 28fbc0b
Show file tree
Hide file tree
Showing 13 changed files with 159 additions and 339 deletions.
1 change: 1 addition & 0 deletions csvq/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This tool loads the whole file in memory, which means it cannot run even a single query from ClickBench.
7 changes: 7 additions & 0 deletions dsq/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
The tool looks non-functional.

Even the simplest query:
```
dsq hits.parquet "SELECT count(*) FROM {}"
```
leads to OOM.
40 changes: 20 additions & 20 deletions index.html

Large diffs are not rendered by default.

42 changes: 0 additions & 42 deletions questdb-partitioned/benchmark.sh

This file was deleted.

108 changes: 0 additions & 108 deletions questdb-partitioned/create.sql

This file was deleted.

43 changes: 0 additions & 43 deletions questdb-partitioned/queries.sql

This file was deleted.

55 changes: 0 additions & 55 deletions questdb-partitioned/results/c6a.metal.json

This file was deleted.

20 changes: 0 additions & 20 deletions questdb-partitioned/run.sh

This file was deleted.

31 changes: 28 additions & 3 deletions questdb/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Install

wget https://github.com/questdb/questdb/releases/download/8.1.2/questdb-8.1.2-rt-linux-x86-64.tar.gz
wget https://github.com/questdb/questdb/releases/download/8.2.0/questdb-8.2.0-rt-linux-x86-64.tar.gz
tar xf questdb*.tar.gz --one-top-level=questdb --strip-components 1
questdb/bin/questdb.sh start

Expand All @@ -11,6 +11,7 @@ while ! nc -z localhost 9000; do
done

sed -i 's/query.timeout.sec=60/query.timeout.sec=500/' ~/.questdb/conf/server.conf
sed -i "s|cairo.sql.copy.root=import|cairo.sql.copy.root=$PWD|" ~/.questdb/conf/server.conf
questdb/bin/questdb.sh stop
questdb/bin/questdb.sh start

Expand All @@ -19,8 +20,32 @@ questdb/bin/questdb.sh start
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
gzip -d hits.csv.gz

curl -G --data-urlencode "query=$(cat create.sql)" 'http://localhost:9000/exec?timings=true'
time curl -F [email protected] 'http://localhost:9000/imp?name=hits'
curl -G --data-urlencode "query=$(cat create.sql)" 'http://localhost:9000/exec'

# SQL COPY works best on metal instances:
curl -G --data-urlencode "query=copy hits from 'hits.csv' with timestamp 'EventTime' format 'yyyy-MM-dd HH:mm:ss';" 'http://localhost:9000/exec'

echo 'waiting for import to finish...'
until [ "$(curl -s -G --data-urlencode "query=select * from sys.text_import_log where phase is null and status='finished';" 'http://localhost:9000/exec' | grep -c '"count":1')" -ge 1 ]; do
echo '.'
sleep 5
done

curl -s -G --data-urlencode "query=select datediff('s', start, finish) took_secs from (select min(ts) start, max(ts) finish from sys.text_import_log where phase is null);" 'http://localhost:9000/exec'

# On smaller instances use this:
# start=$(date +%s)

# curl -F [email protected] 'http://localhost:9000/imp?name=hits'

# echo 'waiting for rows to become readable...'
# until [ "$(curl -s -G --data-urlencode "query=select 1 from (select count() c from hits) where c = 99997497;" 'http://localhost:9000/exec' | grep -c '"count":1')" -ge 1 ]; do
# echo '.'
# sleep 1
# done

# end=$(date +%s)
# echo "import took: $(($end-$start)) secs"

# Run queries

Expand Down
2 changes: 1 addition & 1 deletion questdb/create.sql
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,4 @@ CREATE TABLE hits
RefererHash long,
URLHash long,
CLID int
);
) TIMESTAMP(EventTime) PARTITION BY DAY;
2 changes: 1 addition & 1 deletion questdb/queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-01T00:00:00Z' AND EventTime <= '2013-07-31T23:59:59Z' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000, 1010;
SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-01T00:00:00Z' AND EventTime <= '2013-07-31T23:59:59Z' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100, 110;
SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-01T00:00:00Z' AND EventTime <= '2013-07-31T23:59:59Z' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000, 10010;
SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-14T00:00:00Z' AND EventTime <= '2013-07-15T23:59:59Z' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY M LIMIT 1000, 1010;
SELECT EventTime AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-14T00:00:00Z' AND EventTime <= '2013-07-15T23:59:59Z' AND IsRefresh = 0 AND DontCountHits = 0 SAMPLE BY 1m ALIGN TO CALENDAR ORDER BY M LIMIT 1000, 1010;
Loading

0 comments on commit 28fbc0b

Please sign in to comment.