Skip to content

Commit

Permalink
created separate duckdb parser,change duckdb to default parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
drizk1 committed Apr 1, 2024
1 parent 083299b commit bbfe34b
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 23 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@ Supported helper functions include
- `!` negation
- `across`

DuckDB is the default SQL backend.

Switch to Postgres using
`set_sql_mode(:postgres)`

Switch to MySQL using
`set_sql_mode(:mysql)`

Switch to DuckDB using
`set_sql_mode(:duckdb)`
Switch to SQLite using
`set_sql_mode(:lite)`

DuckDB support enables:
- directly reading in .parquet, .json, and .csv files paths.
Expand Down
11 changes: 6 additions & 5 deletions src/TidierDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@ include("docstrings.jl")
include("structs.jl")
include("db_parsing.jl")
include("TBD_macros.jl")
include("postgresparsing.jl")
include("sqlite_parsing.jl")
include("mysql_parsing.jl")
include("parsing_sqlite.jl")
include("parsing_duckdb.jl")
include("parsing_postgres.jl")
include("parsing_mysql.jl")
include("joins_sq.jl")
include("slices_sq.jl")


current_sql_mode = Ref(:lite)
current_sql_mode = Ref(:duckdb)

# Function to switch modes
function set_sql_mode(mode::Symbol)
Expand All @@ -44,7 +45,7 @@ function expr_to_sql(expr, sq; from_summarize::Bool = false)
elseif current_sql_mode[] == :postgres
return expr_to_sql_postgres(expr, sq; from_summarize=from_summarize)
elseif current_sql_mode[] == :duckdb
return expr_to_sql_postgres(expr, sq; from_summarize=from_summarize)
return expr_to_sql_duckdb(expr, sq; from_summarize=from_summarize)
elseif current_sql_mode[] == :mysql
return expr_to_sql_mysql(expr, sq; from_summarize=from_summarize)
else
Expand Down
6 changes: 3 additions & 3 deletions src/db_parsing.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
names_to_modify = ["str_replace", "str_replace_all", "str_remove", "str_remove_all", "replace_missing", "missing_if",
"floor_date", "is_missing"]
"floor_date", "is_missing", "starts_with", "ends_with"]
#this function allows for @capture to capture names that would have an underscore, ie str_replace
function exc_capture_bug(expr, names_to_modify::Vector{String})
names_set = Set(names_to_modify)
Expand Down Expand Up @@ -176,9 +176,9 @@ function parse_char_matching(expr)
func_name = pattern_func.args[1]
pattern = pattern_func.args[2]

like_expr = if func_name == :starts_with
like_expr = if func_name == :starts_with || func_name == :startswith
"$(column) LIKE '$(pattern)%'"
elseif func_name == :ends_with
elseif func_name == :ends_with || func_name == :endswith
"$(column) LIKE '%$(pattern)'"
elseif func_name == :contains
"$(column) LIKE '%$(pattern)%'"
Expand Down
38 changes: 25 additions & 13 deletions src/joins_sq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,25 +97,37 @@ $docstring_inner_join
"""
macro inner_join(sqlquery, join_table, lhs_column, rhs_column)
# Convert column references to string
lhs_col_str = string(lhs_column) # Assuming lhs_column could be a symbol or expression
rhs_col_str = string(rhs_column) # Assuming rhs_column could be a symbol or expression
lhs_col_str = string(lhs_column)
rhs_col_str = string(rhs_column)

return quote
sq = $(esc(sqlquery))
if isa(sq, SQLQuery)
# Dynamically determine the most recent CTE or base table for the rhs_column
# This is a simplified placeholder logic
most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count) : sq.from

# Construct the join condition, automatically prepending the most recent source to the rhs_column
# Note: Adjust the logic here to ensure it matches your actual requirements for column qualification
on_conditions_str = string($(esc(join_table)), ".", $lhs_col_str, " = ", most_recent_source, ".", $rhs_col_str)

join_clause = " INNER JOIN " * string($(esc(join_table))) * " ON " * on_conditions_str
sq.from *= join_clause

needs_new_cte = !isempty(sq.select) || !isempty(sq.where) || sq.is_aggregated || !isempty(sq.ctes)

if needs_new_cte
sq.cte_count += 1
cte_name = "cte_" * string(sq.cte_count)

most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from

join_sql = " " * most_recent_source * ".*, " * string($(esc(join_table))) * ".* FROM " * most_recent_source *
" INNER JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", most_recent_source, ".", $rhs_col_str)

# Create and add the new CTE
new_cte = CTE(name=cte_name, select=join_sql)
push!(sq.ctes, new_cte)

# Update the FROM clause
sq.from = cte_name
else
join_clause = " INNER JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", sq.from, ".", $rhs_col_str)
sq.from *= join_clause
end

new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
sq.metadata = vcat(sq.metadata, new_metadata)
sq.metadata = vcat(sq.metadata, new_metadata)
else
error("Expected sqlquery to be an instance of SQLQuery")
end
Expand Down
148 changes: 148 additions & 0 deletions src/parsing_duckdb.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
function expr_to_sql_duckdb(expr, sq; from_summarize::Bool)
expr = exc_capture_bug(expr, names_to_modify)
MacroTools.postwalk(expr) do x
# Handle basic arithmetic and functions
if @capture(x, a_ + b_)
return :($a + $b)
elseif @capture(x, a_ - b_)
return :($a - $b)
elseif @capture(x, a_ * b_)
return :($a * $b)
elseif @capture(x, a_ / b_)
return :($a / $b)
elseif @capture(x, a_ ^ b_)
return :(POWER($a, $b))
elseif @capture(x, round(a_))
return :(ROUND($a))
elseif @capture(x, round(a_, b_))
return :(ROUND($a, $b))
elseif @capture(x, mean(a_))
if from_summarize
return :(AVG($a))
else
window_clause = construct_window_clause(sq)
return "AVG($(string(a))) $(window_clause)"
end
elseif @capture(x, minimum(a_))
if from_summarize
return :(MIN($a))
else
window_clause = construct_window_clause(sq)
return "MIN($(string(a))) $(window_clause)"
end
elseif @capture(x, maximum(a_))
if from_summarize
return :(MAX($a))
else
window_clause = construct_window_clause(sq)
return "MAX($(string(a))) $(window_clause)"
end
elseif @capture(x, sum(a_))
if from_summarize
return :(SUM($a))
else
window_clause = construct_window_clause(sq)
return "SUM($(string(a))) $(window_clause)"
end
elseif @capture(x, cumsum(a_))
if from_summarize
error("cumsum is only available through a windowed @mutate")
else
# sq.windowFrame = "ROWS UNBOUNDED PRECEDING "
window_clause = construct_window_clause(sq, from_cumsum = true)
return "SUM($(string(a))) $(window_clause)"
end
#stats agg
elseif @capture(x, std(a_))
if from_summarize
return :(STDDEV_SAMP($a))
else
window_clause = construct_window_clause(sq, )
return "STDDEV_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, cor(a_, b_))
if from_summarize
return :(CORR($a))
else
window_clause = construct_window_clause(sq)
return "CORR($(string(a))) $(window_clause)"
end
elseif @capture(x, cov(a_, b_))
if from_summarize
return :(COVAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "COVAR_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, var(a_))
if from_summarize
return :(VAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "VAR_SAMP($(string(a))) $(window_clause)"
end
#stringr functions, have to use function that removes _ so capture can capture name
elseif @capture(x, strreplaceall(str_, pattern_, replace_))
return :(REGEXP_REPLACE($str, $pattern, $replace, 'g'))
elseif @capture(x, strreplace(str_, pattern_, replace_))
return :(REGEXP_REPLACE($str, $pattern, $replace))
elseif @capture(x, strremoveall(str_, pattern_))
return :(REGEXP_REPLACE($str, $pattern, "g"))
elseif @capture(x, strremove(str_, pattern_))
return :(REGEXP_REPLACE($str, $pattern, ""))
elseif @capture(x, startswith(str_, pattern_))
return :(starts_with($str, $pattern))
elseif @capture(x, endswith(str_, pattern_))
return :(ends_with($str, $pattern))
elseif @capture(x, contains(str_, pattern_))
return :(contains($str, $pattern))
elseif @capture(x, ismissing(a_))
return "($(string(a)) IS NULL)"
# Date extraction functions
elseif @capture(x, year(a_))
return "EXTRACT(YEAR FROM " * string(a) * ")"
elseif @capture(x, month(a_))
return "EXTRACT(MONTH FROM " * string(a) * ")"
elseif @capture(x, day(a_))
return "EXTRACT(DAY FROM " * string(a) * ")"
elseif @capture(x, hour(a_))
return "EXTRACT(HOUR FROM " * string(a) * ")"
elseif @capture(x, minute(a_))
return "EXTRACT(MINUTE FROM " * string(a) * ")"
elseif @capture(x, second(a_))
return "EXTRACT(SECOND FROM " * string(a) * ")"
elseif @capture(x, floordate(time_column_, unit_))
return :(DATE_TRUNC($unit, $time_column))
elseif @capture(x, difftime(endtime_, starttime_, unit_))
return :(date_diff($unit, $starttime, $endtime))
elseif @capture(x, replacemissing(column_, replacement_value_))
return :(COALESCE($column, $replacement_value))
elseif @capture(x, missingif(column_, value_to_replace_))
return :(NULLIF($column, $value_to_replace))
elseif isa(x, Expr) && x.head == :call
if x.args[1] == :if_else && length(x.args) == 4
return parse_if_else(x)
elseif x.args[1] == :as_float && length(x.args) == 2
column = x.args[2]
return "CAST(" * string(column) * " AS DECIMAL)"
elseif x.args[1] == :as_integer && length(x.args) == 2
column = x.args[2]
return "CAST(" * string(column) * " AS INT)"
elseif x.args[1] == :as_string && length(x.args) == 2
column = x.args[2]
return "CAST(" * string(column) * " AS STRING)"
elseif x.args[1] == :case_when
return parse_case_when(x)
elseif isa(x, Expr) && x.head == :call && x.args[1] == :! && x.args[1] != :!= && length(x.args) == 2
inner_expr = expr_to_sql_duckdb(x.args[2], sq, from_summarize = false) # Recursively transform the inner expression
return string("NOT (", inner_expr, ")")
elseif x.args[1] == :str_detect && length(x.args) == 3
column, pattern = x.args[2], x.args[3]
return string(column, " LIKE \'%", pattern, "%'")
elseif isa(x, Expr) && x.head == :call && x.args[1] == :n && length(x.args) == 1
return "COUNT(*)"
end
end
return x
end
end
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit bbfe34b

Please sign in to comment.