Skip to content

Commit

Permalink
adds .parquet and .csv file path copy_to support
Browse files Browse the repository at this point in the history
  • Loading branch information
drizk1 committed Mar 28, 2024
1 parent 240d2b2 commit e606605
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 6 deletions.
Binary file modified .DS_Store
Binary file not shown.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ Switch to Postgres using
Switch to DuckDB using
`set_sql_mode(:duckdb)`

DuckDB support enables:
- reading in .parquet or .csv files without converting to dataframe first.
```
path = "file_path.parquet"
copy_to(conn, file_path, "table_name")
```

Postgres and Duck DB support includes
- Postgres specific aggregate functions: `corr`, `cov`, `std`, `var`
- From TidierStrings.jl `str_detect`, `str_replace`, `str_replace_all`, `str_remove_all`, `str_remove`
Expand Down
35 changes: 29 additions & 6 deletions src/TidierDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,36 @@ end
"""
$docstring_copy_to
"""
function copy_to(conn, df::DataFrame, name::String)
if current_sql_mode[] == :duckdb
DuckDB.register_data_frame(conn, df, name)
elseif current_sql_mode[] == :lite
SQLite.load!(df, conn, name)
function copy_to(conn, df_or_path::Union{DataFrame, AbstractString}, name::String)
# Check if the input is a DataFrame
if isa(df_or_path, DataFrame)
if current_sql_mode[] == :duckdb
DuckDB.register_data_frame(conn, df_or_path, name)
elseif current_sql_mode[] == :lite
SQLite.load!(df_or_path, conn, name)
else
error("Unsupported SQL mode: $(current_sql_mode[])")
end
# If the input is not a DataFrame, treat it as a file path
elseif isa(df_or_path, AbstractString)
if current_sql_mode[] != :duckdb
error("Direct file loading is only supported for DuckDB in this implementation.")
end
# Determine the file type based on the extension
if occursin(r"\.csv$", df_or_path)
# Construct and execute a SQL command for loading a CSV file
sql_command = "CREATE TABLE $name AS SELECT * FROM read_csv_auto('$df_or_path');"
DuckDB.execute(conn, sql_command)
elseif occursin(r"\.parquet$", df_or_path)
# Construct and execute a SQL command for loading a Parquet file
sql_command = "CREATE TABLE $name AS SELECT * FROM read_parquet('$df_or_path');"
DuckDB.execute(conn, sql_command)
else
error("Unsupported file type for: $df_or_path")
end
else
error("Unsupported SQL mode: $set_sql_mode")
error("Unsupported type for df_or_path: Must be DataFrame or file path string.")
end
end

end

0 comments on commit e606605

Please sign in to comment.