-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
all docstrings run. had to silence slice_sample for formatting reasons
- Loading branch information
Showing
21 changed files
with
275,478 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,15 @@ uuid = "86993f9b-bbba-4084-97c5-ee15961ad48b" | |
authors = ["drizk1 <[email protected]> and contributors"] | ||
version = "1.0.0-DEV" | ||
|
||
[deps] | ||
Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" | ||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" | ||
DuckDB = "d2f5444f-75bc-4fdf-ac35-56f514c445e1" | ||
LibPQ = "194296ae-ab2e-5f79-8cd4-7183a0a5a0d1" | ||
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" | ||
Reexport = "189a3867-3050-52da-a836-e630ba90ab69" | ||
SQLite = "0aa819cd-b072-5ff4-a722-6bc24af294d9" | ||
|
||
[compat] | ||
julia = "1.9" | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,200 @@ | ||
# TidierDB | ||
# TidierDB.jl | ||
|
||
[![Build Status](https://github.com/drizk1/TidierDB.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/drizk1/TidierDB.jl/actions/workflows/CI.yml?query=branch%3Amain) | ||
This is a very young draft and currently supports | ||
- `@arrange` | ||
- `@group_by` | ||
- `@filter` | ||
- `@select` | ||
- `@mutate` | ||
- `@summarize` / `@summarise` supports `across` with tidy selection | ||
- `@distinct` | ||
- `@left_join`, `@right_join`, `@inner_join` (slight syntax differences) | ||
- `@count` | ||
- `@slice_min`, `@slice_max`, `@slice_sample` | ||
- `@window_order` | ||
- `@show_query` | ||
- `@collect` | ||
|
||
Supported helper functions include | ||
- `if_else` and `case_when` (case_when has slight syntax difference (`,` instead of `=>`)) | ||
- `replace_missing` and `missing_if` | ||
- `starts_with`, `ends_with`, `contains` | ||
- `as_integer`, `as_float`, `as_string` | ||
- `!` negation | ||
- `across` | ||
|
||
Switch to Postgres using | ||
`set_sql_mode(:postgres)` | ||
|
||
Postgres support includes | ||
- Postgres specific aggregate functions: `corr`, `cov`, `std`, `var` | ||
- From TidierStrings.jl `str_detect`, `str_replace`, `str_replace_all`, `str_remove_all`, `str_remove` | ||
- From TidierDates.jl `year`, `month`, `day`, `hour`, `min`, `second`, `floor_date` | ||
|
||
|
||
Tidy selection for columns is supported in `@select`, `@group_by` and `across` in `@summarize`. | ||
|
||
Bang bang `!!` Interpolation for columns and values supported (with some edge cases to be fixed) in: `@select`, `@group_by`, `@filter`, `@summarize`, `@mutate`, `@count`, `@rename`. Other macros to join soon. | ||
|
||
CTEs are used to capture sequential changes, rather than subqueries (this can always be changed) | ||
|
||
This links to [examples](https://github.com/drizk1/TidierDB.jl/blob/main/olympics_examples_fromweb.jl) which achieve the same result as the SQL queries. | ||
|
||
Below, a few examples are illustrated, including examples with across and interpolation | ||
|
||
``` | ||
@chain start_query_meta(db, :mtcars2) begin | ||
@filter(Column1 != starts_with("M")) | ||
@group_by(cyl) | ||
@summarize(mpg = mean(mpg)) | ||
@mutate(sqaured = mpg^2, | ||
rounded = round(mpg), | ||
efficiency = case_when( | ||
mpg >= cyl^2 , 12, | ||
mpg < 15.2 , 14, | ||
44)) | ||
@filter(efficiency>12) | ||
@arrange(rounded) | ||
@show_query | ||
#@collect | ||
end | ||
``` | ||
``` | ||
WITH cte_1 AS ( | ||
SELECT * | ||
FROM mtcars2 | ||
WHERE NOT (Column1 LIKE 'M%')), | ||
cte_2 AS ( | ||
SELECT cyl, AVG(mpg) AS mpg | ||
FROM cte_1 | ||
GROUP BY cyl), | ||
cte_3 AS ( | ||
SELECT *, POWER(mpg, 2) AS sqaured, ROUND(mpg) AS rounded, CASE WHEN mpg >= POWER(cyl, 2) THEN 12 WHEN mpg < 15.2 THEN 14 ELSE 44 END AS efficiency | ||
FROM cte_2 | ||
GROUP BY cyl) | ||
SELECT * | ||
FROM cte_3 | ||
GROUP BY cyl | ||
HAVING efficiency > 12 | ||
ORDER BY rounded ASC | ||
``` | ||
``` | ||
@chain start_query_meta(db, :mtcars2) begin | ||
@filter(Column1 != starts_with("M")) | ||
@group_by(cyl) | ||
@summarize(mpg = mean(mpg)) | ||
@mutate(sqaured = mpg^2, | ||
rounded = round(mpg), | ||
efficiency = case_when( | ||
mpg >= cyl^2 , 12, | ||
mpg < 15.2 , 14, | ||
44)) | ||
@filter(efficiency>12) | ||
@arrange(rounded) | ||
@collect | ||
end | ||
``` | ||
``` | ||
2×5 DataFrame | ||
Row │ cyl mpg sqaured rounded efficiency | ||
│ Int64 Float64 Float64 Float64 Int64 | ||
─────┼────────────────────────────────────────────── | ||
1 │ 8 14.75 217.562 15.0 14 | ||
2 │ 6 19.7333 389.404 20.0 44 | ||
``` | ||
`across` in `summarize` | ||
``` | ||
@chain start_query_meta(db, :mtcars2) begin | ||
@group_by(cyl) | ||
@summarize(across((starts_with("a"), ends_with("s")), (mean, sum))) | ||
#@show_query | ||
@collect | ||
end | ||
``` | ||
``` | ||
3×5 DataFrame | ||
Row │ cyl mean_am mean_vs sum_am sum_vs | ||
│ Int64 Float64 Float64 Int64 Int64 | ||
─────┼─────────────────────────────────────────── | ||
1 │ 4 0.727273 0.909091 8 10 | ||
2 │ 6 0.428571 0.571429 3 4 | ||
3 │ 8 0.142857 0.0 2 0 | ||
``` | ||
|
||
``` | ||
@chain start_query_meta(db, :mtcars2) begin | ||
@filter(Column1 == starts_with("M")) | ||
@left_join(:join_test3, ID, Column1) ## autodetects the table/cte to apply to ID and, more importantly, column | ||
@select(mpg, vs:ID) ## also supports `starts_with`, `ends_with`, `contains` | ||
@collect | ||
end | ||
``` | ||
``` | ||
WITH cte_1 AS ( | ||
SELECT * | ||
FROM mtcars2 | ||
WHERE Column1 LIKE 'M%') | ||
SELECT mpg, vs, am, gear, carb, ID | ||
FROM cte_1 | ||
LEFT | ||
JOIN join_test3 ON join_test3.ID = cte_1.Column1 | ||
``` | ||
``` | ||
10×6 DataFrame | ||
Row │ mpg vs am gear carb ID | ||
│ Float64 Int64 Int64 Int64 Int64? String | ||
─────┼────────────────────────────────────────────────────── | ||
1 │ 21.0 0 1 4 missing Mazda RX4 | ||
2 │ 21.0 0 1 4 missing Mazda RX4 Wag | ||
3 │ 24.4 1 0 4 2 Merc 240D | ||
4 │ 22.8 1 0 4 2 Merc 230 | ||
5 │ 19.2 1 0 4 4 Merc 280 | ||
6 │ 17.8 1 0 4 4 Merc 280C | ||
7 │ 16.4 0 0 3 3 Merc 450SE | ||
8 │ 17.3 0 0 3 3 Merc 450SL | ||
9 │ 15.2 0 0 3 3 Merc 450SLC | ||
10 │ 15.0 0 1 5 8 Maserati Bora | ||
``` | ||
Interpolation support | ||
``` | ||
other_var = "Column1" | ||
my_var = :gear | ||
my_val = 3.7 | ||
third_var= "mpg" | ||
@chain start_query_meta(db, :mtcars2) begin | ||
@filter(!!other_var != starts_with("M")) | ||
@group_by(cyl) | ||
@summarize(mpg = mean(!!third_var)) | ||
@mutate(test = !!my_var * !!my_val, | ||
sqaured = (!!my_var)^2, | ||
rounded = round(!!my_var), | ||
efficiency = case_when( | ||
mpg >= (!!my_var)^2 , 12, | ||
mpg < !!my_val , 14, | ||
44)) | ||
@filter(efficiency>12) | ||
@arrange(rounded) | ||
@show_query | ||
#@collect | ||
end | ||
``` | ||
``` | ||
WITH cte_1 AS ( | ||
SELECT * | ||
FROM mtcars2 | ||
WHERE NOT (Column1 LIKE 'M%')), | ||
cte_2 AS ( | ||
SELECT cyl, AVG(mpg) AS mpg | ||
FROM cte_1 | ||
GROUP BY cyl), | ||
cte_3 AS ( | ||
SELECT cyl, mpg, gear * 3.7 AS test, POWER(gear, 2) AS sqaured, ROUND(gear) AS rounded, CASE WHEN mpg >= POWER(gear, 2) THEN 12 WHEN mpg < 3.7 THEN 14 ELSE 44 END AS efficiency | ||
FROM cte_2 ), | ||
cte_4 AS ( | ||
SELECT * | ||
FROM cte_3 | ||
WHERE efficiency > 12) | ||
SELECT * | ||
FROM cte_4 | ||
ORDER BY rounded ASC | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
using DuckDB | ||
db = DuckDB.open(":memory:") | ||
|
||
db = DuckDB.connect(db) | ||
|
||
DuckDB.register_data_frame(db, athlete_events, "athlete_events") | ||
DuckDB.register_data_frame(db, noc_regions, "noc_regions") | ||
set_sql_mode(:duckdb) | ||
@chain start_query_meta(con, :athlete_events) begin | ||
@rename(tf = NOC) | ||
@mutate(Gold = if_else(Medal == "Gold", 1, 0), | ||
Silver = if_else(Medal == "Silver", 1, 0), | ||
Bronze = if_else(Medal == "Bronze", 1, 0))#, | ||
@mutate(total_medals = Gold + Silver + Bronze) | ||
@left_join(:noc_regions, NOC, tf) | ||
@group_by(region, Sport, Games) | ||
@summarize(total_sum = sum(total_medals)) | ||
@filter region == "India" && Sport == "Hockey" | ||
@arrange(desc(total_sum)) | ||
@collect | ||
#@show_query | ||
end | ||
|
||
@chain start_query_meta(con, :athlete_events) begin | ||
@group_by(Games) | ||
@mutate(test = minimum(Year)) | ||
@select (test) | ||
@collect | ||
#@show_query | ||
end | ||
|
||
@chain start_query_meta(con, :athlete_events) begin | ||
@group_by(Games) | ||
@summarise(test = maximum(Year)) | ||
@show_query | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
ID,wt2 | ||
Mazda RX4,57.62 | ||
Mazda RX4 Wag,57.875 | ||
Datsun 710,57.32 | ||
Hornet 4 Drive,58.215 | ||
Hornet Sportabout,58.44 | ||
Valiant,58.46 | ||
Duster 360,58.57 | ||
Merc 240D,58.19 | ||
Merc 230,58.15 | ||
Merc 280,58.44 | ||
Merc 280C,58.44 | ||
Merc 450SE,59.07 | ||
Merc 450SL,58.73 | ||
Merc 450SLC,58.78 | ||
Cadillac Fleetwood,60.25 | ||
Lincoln Continental,60.424 | ||
Chrysler Imperial,60.345 | ||
Fiat 128,57.2 | ||
Honda Civic,56.615 | ||
Toyota Corolla,56.835 | ||
Toyota Corona,57.465 | ||
Dodge Challenger,58.52 | ||
AMC Javelin,58.435 | ||
Camaro Z28,58.84 | ||
Pontiac Firebird,58.845 | ||
Fiat X1-9,56.935 | ||
Porsche 914-2,57.14 | ||
Lotus Europa,56.513 | ||
Ford Pantera L,58.17 | ||
Ferrari Dino,57.77 | ||
Maserati Bora,58.57 | ||
Volvo 142E,57.78 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
"","mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb" | ||
"Mazda RX4",21,6,160,110,3.9,2.62,16.46,0,1,4, | ||
"Mazda RX4 Wag",21,6,160,110,3.9,2.875,17.02,0,1,4, | ||
"Datsun 710",22.8,4,108,93,3.85,2.32,18.61,1,1,4, | ||
"Hornet 4 Drive",21.4,6,258,110,3.08,3.215,19.44,1,0,3,1 | ||
"Hornet Sportabout",18.7,8,360,175,3.15,3.44,17.02,0,0,3,2 | ||
"Valiant",18.1,6,225,105,2.76,3.46,20.22,1,0,3,1 | ||
"Duster 360",14.3,8,360,245,3.21,3.57,15.84,0,0,3,4 | ||
"Merc 240D",24.4,4,146.7,62,3.69,3.19,20,1,0,4,2 | ||
"Merc 230",22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2 | ||
"Merc 280",19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4 | ||
"Merc 280C",17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4 | ||
"Merc 450SE",16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3 | ||
"Merc 450SL",17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3 | ||
"Merc 450SLC",15.2,8,275.8,180,3.07,3.78,18,0,0,3,3 | ||
"Cadillac Fleetwood",10.4,8,472,205,2.93,5.25,17.98,0,0,3,4 | ||
"Lincoln Continental",10.4,8,460,215,3,5.424,17.82,0,0,3,4 | ||
"Chrysler Imperial",14.7,8,440,230,3.23,5.345,17.42,0,0,3,4 | ||
"Fiat 128",32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1 | ||
"Honda Civic",30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2 | ||
"Toyota Corolla",33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1 | ||
"Toyota Corona",21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1 | ||
"Dodge Challenger",15.5,8,318,150,2.76,3.52,16.87,0,0,3,2 | ||
"AMC Javelin",15.2,8,304,150,3.15,3.435,17.3,0,0,3,2 | ||
"Camaro Z28",13.3,8,350,245,3.73,3.84,15.41,0,0,3,4 | ||
"Pontiac Firebird",19.2,8,400,175,3.08,3.845,17.05,0,0,3,2 | ||
"Fiat X1-9",27.3,4,79,66,4.08,1.935,18.9,1,1,4,1 | ||
"Porsche 914-2",26,4,120.3,91,4.43,2.14,16.7,0,1,5,2 | ||
"Lotus Europa",30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2 | ||
"Ford Pantera L",15.8,8,351,264,4.22,3.17,14.5,0,1,5,4 | ||
"Ferrari Dino",19.7,6,145,175,3.62,2.77,15.5,0,1,5,6 | ||
"Maserati Bora",15,8,301,335,3.54,3.57,14.6,0,1,5,8 | ||
"Volvo 142E",21.4,4,121,109,4.11,2.78,18.6,1,1,4,2 |
Oops, something went wrong.