From 7ecac9dcf27520165e1174f2a0c36646be0737ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Feb 2024 01:41:20 +0100 Subject: [PATCH 1/2] Add motherduck --- motherduck/benchmark.sh | 34 ++++++ motherduck/create.sql | 108 +++++++++++++++++ motherduck/log.txt | 215 +++++++++++++++++++++++++++++++++ motherduck/queries.sql | 43 +++++++ motherduck/query.py | 18 +++ motherduck/results/result.json | 57 +++++++++ motherduck/run.sh | 5 + 7 files changed, 480 insertions(+) create mode 100755 motherduck/benchmark.sh create mode 100644 motherduck/create.sql create mode 100644 motherduck/log.txt create mode 100644 motherduck/queries.sql create mode 100755 motherduck/query.py create mode 100644 motherduck/results/result.json create mode 100755 motherduck/run.sh diff --git a/motherduck/benchmark.sh b/motherduck/benchmark.sh new file mode 100755 index 000000000..55c2dd659 --- /dev/null +++ b/motherduck/benchmark.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Install + +sudo apt-get update +sudo apt-get install -y python3-pip +pip install duckdb psutil + +# Load the data + +# Open https://app.motherduck.com/ and paste the contents of create.sql + +# Then run: +# COPY hits FROM 'https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.csv.gz' + +# 4122 seconds + +# Install the command line tool + +wget https://github.com/duckdb/duckdb/releases/download/v0.9.2/duckdb_cli-linux-amd64.zip +unzip duckdb_cli-linux-amd64.zip + +./duckdb + +# .open md: +# Authenticate and obtain a token. +# export motherduck_token=... + +# Run the queries + +./run.sh 2>&1 | tee log.txt + +cat log.txt | grep -P '^\d|Killed|Segmentation' | sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/' | + awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/motherduck/create.sql b/motherduck/create.sql new file mode 100644 index 000000000..4d23eaac6 --- /dev/null +++ b/motherduck/create.sql @@ -0,0 +1,108 @@ +CREATE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT, + Referer TEXT, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT, + Params TEXT, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT, + BrowserCountry TEXT, + SocialNetwork TEXT, + SocialAction TEXT, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT, + ParamCurrency TEXT, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT, + OpenstatCampaignID TEXT, + OpenstatAdID TEXT, + OpenstatSourceID TEXT, + UTMSource TEXT, + UTMMedium TEXT, + UTMCampaign TEXT, + UTMContent TEXT, + UTMTerm TEXT, + FromTag TEXT, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL +); diff --git a/motherduck/log.txt b/motherduck/log.txt new file mode 100644 index 000000000..151969f9f --- /dev/null +++ b/motherduck/log.txt @@ -0,0 +1,215 @@ +SELECT COUNT(*) FROM hits; + +0.21968004433438182 +0.3162029208615422 +0.21604336006566882 +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; + +0.33489555725827813 +0.29951094510033727 +0.21411490021273494 +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; + +0.4648742200806737 +0.3040045131929219 +0.30952421203255653 +SELECT AVG(UserID) FROM hits; + +0.6499226209707558 +0.22191201290115714 +0.2888053017668426 +SELECT COUNT(DISTINCT UserID) FROM hits; + +0.7704195859842002 +0.921060211956501 +0.819766622968018 +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; + +1.9447067943401635 +1.2225632020272315 +1.3308632392436266 +SELECT MIN(EventDate), MAX(EventDate) FROM hits; + +0.2486483482643962 +0.22854855004698038 +0.20795917278155684 +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; + +0.22081567207351327 +0.2213178831152618 +0.2327750907279551 +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; + +1.3735258071683347 +1.2279518628492951 +1.0233631990849972 +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; + +1.277890115045011 +1.3313516019843519 +1.1260047918185592 +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; + +0.6073150201700628 +0.2593280617147684 +0.3234165138565004 +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; + +0.34701970824971795 +0.32168390695005655 +0.3142702719196677 +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; + +1.0892055933363736 +1.1165755479596555 +0.8717481149360538 +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; + +1.5503880502656102 +1.6257044719532132 +1.464306827634573 +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; + +0.9123282572254539 +0.7499142647720873 +0.8189653856679797 +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; + +0.9769321829080582 +0.8805147060193121 +0.8598887426778674 +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; + +1.8943038629367948 +2.4523583548143506 +2.457844952121377 +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; + +2.4018522929400206 +2.1315763420425355 +2.188034536782652 +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; + +7.007666043005884 +5.863597965799272 +5.461284582037479 +SELECT UserID FROM hits WHERE UserID = 435090932899640449; + +0.24364612391218543 +0.325953362043947 +0.3064295807853341 +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; + +15.391365340910852 +0.8924880139529705 +0.8800020259805024 +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; + +1.0099374479614198 +0.7060354948043823 +0.7063306840136647 +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; + +8.954986380413175 +4.095173366833478 +3.279917892999947 +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; + +23.613256133161485 +19.25096853263676 +23.278139277826995 +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; + +1.5025351103395224 +0.7977945390157402 +0.6170151601545513 +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; + +0.46431142510846257 +0.4349544760771096 +0.36817489471286535 +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; + +0.5523030371405184 +0.5110767548903823 +0.3649540198966861 +SELECT CounterID, AVG(STRLEN(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; + +4.98280710587278 +2.0472186598926783 +2.6902666091918945 +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www.)?([^/]+)/.*$', '1') AS k, AVG(STRLEN(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; + +6.719612340908498 +4.538661384955049 +4.369533614721149 +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; + +3.358773229178041 +3.1345412940718234 +3.100874589756131 +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; + +2.000370934139937 +1.3284917967393994 +1.2289274740032852 +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; + +3.7089375513605773 +2.681138473097235 +1.91789041319862 +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; + +7.777549963910133 +7.958697326015681 +7.435122138820589 +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; + +9.89957385417074 +6.938831991981715 +5.757995248306543 +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; + +5.393568014726043 +5.632281670346856 +5.551704770885408 +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; + +1.4525640471838415 +0.8954971083439887 +0.9103033309802413 +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; + +0.22383009875193238 +0.2152935261838138 +0.21353887394070625 +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; + +0.2124440912157297 +0.20737810991704464 +0.20923770079389215 +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; + +0.2175677828490734 +0.23641109932214022 +0.22010038886219263 +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; + +0.24250942608341575 +0.21723396237939596 +0.2134688631631434 +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; + +0.21473077218979597 +0.2085639163851738 +0.251109620090574 +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; + +0.21749612782150507 +0.24898552894592285 +0.20936914114281535 +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; + +0.2193519319407642 +0.2155164610594511 +0.20863541215658188 diff --git a/motherduck/queries.sql b/motherduck/queries.sql new file mode 100644 index 000000000..b4115ee3a --- /dev/null +++ b/motherduck/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(STRLEN(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(STRLEN(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/motherduck/query.py b/motherduck/query.py new file mode 100755 index 000000000..2802d8e54 --- /dev/null +++ b/motherduck/query.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 + +import duckdb +import timeit +import psutil +import sys + +query = sys.stdin.read() +print(query) + +con = duckdb.connect('md:') +con = duckdb.connect(database="md:my_db", read_only=False) +for try_num in range(3): + start = timeit.default_timer() + results = con.sql(query).fetchall() + end = timeit.default_timer() + print(end - start) + del results diff --git a/motherduck/results/result.json b/motherduck/results/result.json new file mode 100644 index 000000000..760bc2f56 --- /dev/null +++ b/motherduck/results/result.json @@ -0,0 +1,57 @@ +{ + "system": "Motherduck", + "date": "2024-01-27", + "machine": "cloud", + "cluster_size": 1, + + "tags": ["C++", "column-oriented", "serverless"], + + "load_time": 4122, + "data_size": 25332035584, + + "result": [ +[0.21968004433438182,0.3162029208615422,0.21604336006566882], +[0.33489555725827813,0.29951094510033727,0.21411490021273494], +[0.4648742200806737,0.3040045131929219,0.30952421203255653], +[0.6499226209707558,0.22191201290115714,0.2888053017668426], +[0.7704195859842002,0.921060211956501,0.819766622968018], +[1.9447067943401635,1.2225632020272315,1.3308632392436266], +[0.2486483482643962,0.22854855004698038,0.20795917278155684], +[0.22081567207351327,0.2213178831152618,0.2327750907279551], +[1.3735258071683347,1.2279518628492951,1.0233631990849972], +[1.277890115045011,1.3313516019843519,1.1260047918185592], +[0.6073150201700628,0.2593280617147684,0.3234165138565004], +[0.34701970824971795,0.32168390695005655,0.3142702719196677], +[1.0892055933363736,1.1165755479596555,0.8717481149360538], +[1.5503880502656102,1.6257044719532132,1.464306827634573], +[0.9123282572254539,0.7499142647720873,0.8189653856679797], +[0.9769321829080582,0.8805147060193121,0.8598887426778674], +[1.8943038629367948,2.4523583548143506,2.457844952121377], +[2.4018522929400206,2.1315763420425355,2.188034536782652], +[7.007666043005884,5.863597965799272,5.461284582037479], +[0.24364612391218543,0.325953362043947,0.3064295807853341], +[15.391365340910852,0.8924880139529705,0.8800020259805024], +[1.0099374479614198,0.7060354948043823,0.7063306840136647], +[8.954986380413175,4.095173366833478,3.279917892999947], +[23.613256133161485,19.25096853263676,23.278139277826995], +[1.5025351103395224,0.7977945390157402,0.6170151601545513], +[0.46431142510846257,0.4349544760771096,0.36817489471286535], +[0.5523030371405184,0.5110767548903823,0.3649540198966861], +[4.98280710587278,2.0472186598926783,2.6902666091918945], +[6.719612340908498,4.538661384955049,4.369533614721149], +[3.358773229178041,3.1345412940718234,3.100874589756131], +[2.000370934139937,1.3284917967393994,1.2289274740032852], +[3.7089375513605773,2.681138473097235,1.91789041319862], +[7.777549963910133,7.958697326015681,7.435122138820589], +[9.89957385417074,6.938831991981715,5.757995248306543], +[5.393568014726043,5.632281670346856,5.551704770885408], +[1.4525640471838415,0.8954971083439887,0.9103033309802413], +[0.22383009875193238,0.2152935261838138,0.21353887394070625], +[0.2124440912157297,0.20737810991704464,0.20923770079389215], +[0.2175677828490734,0.23641109932214022,0.22010038886219263], +[0.24250942608341575,0.21723396237939596,0.2134688631631434], +[0.21473077218979597,0.2085639163851738,0.251109620090574], +[0.21749612782150507,0.24898552894592285,0.20936914114281535], +[0.2193519319407642,0.2155164610594511,0.20863541215658188] + ] +} diff --git a/motherduck/run.sh b/motherduck/run.sh new file mode 100755 index 000000000..d71b63120 --- /dev/null +++ b/motherduck/run.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +cat queries.sql | while read query; do + ./query.py <<< "${query}" +done From 9d31d601beb865afb483e5902c40086ed355f1fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Feb 2024 01:43:33 +0100 Subject: [PATCH 2/2] Add a comment --- motherduck/benchmark.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/motherduck/benchmark.sh b/motherduck/benchmark.sh index 55c2dd659..eface3593 100755 --- a/motherduck/benchmark.sh +++ b/motherduck/benchmark.sh @@ -15,6 +15,9 @@ pip install duckdb psutil # 4122 seconds +# Note: it does not support transactions, +# and an attempt to query the table during leaded results in waiting on a global lock. + # Install the command line tool wget https://github.com/duckdb/duckdb/releases/download/v0.9.2/duckdb_cli-linux-amd64.zip