Skip to content

Commit

Permalink
Merge #231
Browse files Browse the repository at this point in the history
231: enable S3Paths to be (de)serialized to/from Arrow r=omus a=jrevels

closes #184 

This adds ArrowTypes as a lightweight dependency, which is hopefully okay. 

Co-authored-by: Jarrett Revels <[email protected]>
Co-authored-by: Curtis Vogt <[email protected]>
  • Loading branch information
3 people authored Feb 7, 2022
2 parents 844efdd + 28b1414 commit a2e58b3
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 1 deletion.
5 changes: 4 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ version = "0.9.4"

[deps]
AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc"
ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand All @@ -20,6 +21,7 @@ XMLDict = "228000da-037f-5747-90a9-8195ccbf91a5"

[compat]
AWS = "1.63.1"
ArrowTypes = "1.2"
Compat = "3.29.0"
EzXML = "0.9, 1"
FilePathsBase = "=0.9.11, =0.9.12, =0.9.13, =0.9.14, =0.9.15"
Expand All @@ -35,9 +37,10 @@ XMLDict = "0.3, 0.4"
julia = "1.3"

[extras]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
Minio = "4281f0d9-7ae0-406e-9172-b7277c1efa20"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "JSON3", "Minio"]
test = ["Test", "Arrow", "JSON3", "Minio"]
1 change: 1 addition & 0 deletions src/AWSS3.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export S3Path,

using AWS
using AWS.AWSServices: s3
using ArrowTypes
using FilePathsBase
using FilePathsBase: /, join
using HTTP
Expand Down
13 changes: 13 additions & 0 deletions src/s3path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -640,3 +640,16 @@ function FilePathsBase.mktmpdir(parent::S3Path)
fp = parent / string(uuid4(), "/")
return mkdir(fp)
end

const S3PATH_ARROW_NAME = Symbol("JuliaLang.AWSS3.S3Path")
ArrowTypes.arrowname(::Type{<:S3Path}) = S3PATH_ARROW_NAME
ArrowTypes.ArrowType(::Type{<:S3Path}) = String
ArrowTypes.JuliaType(::Val{S3PATH_ARROW_NAME}, ::Any) = S3Path
ArrowTypes.fromarrow(::Type{<:S3Path}, uri_string) = S3Path(uri_string)

function ArrowTypes.toarrow(path::S3Path)
if !isnothing(path.config)
throw(ArgumentError("`S3Path` config must be `nothing` to serialize to Arrow"))
end
return string(path)
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ using AWS
using AWS.AWSExceptions: AWSException
using AWS.AWSServices: s3
using AWSS3
using Arrow
using Test
using Dates
using Retry
Expand Down
13 changes: 13 additions & 0 deletions test/s3path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,19 @@ function s3path_tests(config)
rm(json_path)
end

@testset "Arrow <-> S3Path (de)serialization" begin
ver = String('A':'Z') * String('0':'5')
paths = [
S3Path("s3://$(bucket_name)/a"),
S3Path("s3://$(bucket_name)/b?versionId=$ver"),
# format trick: using this comment to force use of multiple lines
]
tbl = Arrow.Table(Arrow.tobuffer((; paths=paths)))
@test all(tbl.paths .== paths)
push!(paths, S3Path("s3://$(bucket_name)/c"; config=config))
@test_throws Arrow.tobuffer((; paths=paths)) ArgumentError
end

@testset "tryparse" begin
cfg = global_aws_config()
ver = String('A':'Z') * String('0':'5')
Expand Down

0 comments on commit a2e58b3

Please sign in to comment.