From c2959bb5cd82b33acaf3cbe17657d1a0e97ca68a Mon Sep 17 00:00:00 2001 From: Jarrett Revels Date: Tue, 1 Feb 2022 13:32:53 -0800 Subject: [PATCH 1/7] enable S3Paths to be (de)serialized to/from Arrow --- Project.toml | 5 ++++- src/AWSS3.jl | 1 + src/s3path.jl | 11 +++++++++++ test/runtests.jl | 1 + test/s3path.jl | 9 +++++++++ 5 files changed, 26 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 026c32ed..a09c2fe9 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ version = "0.9.3" [deps] AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc" +ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" @@ -20,6 +21,7 @@ XMLDict = "228000da-037f-5747-90a9-8195ccbf91a5" [compat] AWS = "1.63.1" +ArrowTypes = "1.2" Compat = "3.29.0" EzXML = "0.9, 1" FilePathsBase = "0.9.11" @@ -35,9 +37,10 @@ XMLDict = "0.3, 0.4" julia = "1.3" [extras] +Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" Minio = "4281f0d9-7ae0-406e-9172-b7277c1efa20" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "JSON3", "Minio"] +test = ["Test", "Arrow", "JSON3", "Minio"] diff --git a/src/AWSS3.jl b/src/AWSS3.jl index 159cde53..24f028ea 100644 --- a/src/AWSS3.jl +++ b/src/AWSS3.jl @@ -38,6 +38,7 @@ export S3Path, using AWS using AWS.AWSServices: s3 +using ArrowTypes using FilePathsBase using FilePathsBase: /, join using HTTP diff --git a/src/s3path.jl b/src/s3path.jl index b0bee27c..7fc6d3ed 100644 --- a/src/s3path.jl +++ b/src/s3path.jl @@ -638,3 +638,14 @@ function FilePathsBase.mktmpdir(parent::S3Path) fp = parent / string(uuid4(), "/") return mkdir(fp) end + +const S3PATH_ARROW_NAME = Symbol("JuliaLang.AWSS3.S3Path") +ArrowTypes.arrowname(::Type{<:S3Path}) = S3PATH_ARROW_NAME +ArrowTypes.ArrowType(::Type{<:S3Path}) = String +ArrowTypes.JuliaType(::Val{S3PATH_ARROW_NAME}, ::Any) = S3Path +ArrowTypes.fromarrow(::Type{<:S3Path}, uri_string) = S3Path(uri_string) + +function ArrowTypes.toarrow(path::S3Path) + isnothing(path.config) || throw(ArgumentError("`path.config` must be `nothing` to serialize `path::S3Path` to Arrow")) + return string(path) +end diff --git a/test/runtests.jl b/test/runtests.jl index 82a9a5ef..32e70421 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using AWS using AWS.AWSExceptions: AWSException using AWS.AWSServices: s3 using AWSS3 +using Arrow using Test using Dates using Retry diff --git a/test/s3path.jl b/test/s3path.jl index c3f7fa4c..f98558f7 100644 --- a/test/s3path.jl +++ b/test/s3path.jl @@ -432,6 +432,15 @@ function s3path_tests(config) rm(json_path) end + @testset "Arrow <-> S3Path (de)serialization" begin + ver = String('A':'Z') * String('0':'5') + paths = [S3Path("s3://$(bucket_name)/a"), S3Path("s3://$(bucket_name)/b?versionId=$ver")] + tbl = Arrow.Table(Arrow.tobuffer((; paths=paths))) + @test all(tbl.paths .== paths) + push!(paths, S3Path("s3://$(bucket_name)/c"; config=config)) + @test_throws Arrow.tobuffer((; paths=paths)) ArgumentError + end + @testset "tryparse" begin cfg = global_aws_config() ver = String('A':'Z') * String('0':'5') From 9d79b3ca9d3ee065a19d6b601799f85e679c7947 Mon Sep 17 00:00:00 2001 From: Jarrett Revels Date: Tue, 1 Feb 2022 14:40:06 -0800 Subject: [PATCH 2/7] Update src/s3path.jl --- src/s3path.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/s3path.jl b/src/s3path.jl index 7fc6d3ed..8f7f89b2 100644 --- a/src/s3path.jl +++ b/src/s3path.jl @@ -646,6 +646,10 @@ ArrowTypes.JuliaType(::Val{S3PATH_ARROW_NAME}, ::Any) = S3Path ArrowTypes.fromarrow(::Type{<:S3Path}, uri_string) = S3Path(uri_string) function ArrowTypes.toarrow(path::S3Path) - isnothing(path.config) || throw(ArgumentError("`path.config` must be `nothing` to serialize `path::S3Path` to Arrow")) + isnothing(path.config) || throw( + ArgumentError( + "`path.config` must be `nothing` to serialize `path::S3Path` to Arrow" + ), + ) return string(path) end From 5b3ab745c0bdf1c29a136713cf127bdc61f7bd3f Mon Sep 17 00:00:00 2001 From: Jarrett Revels Date: Tue, 1 Feb 2022 14:40:35 -0800 Subject: [PATCH 3/7] Update test/s3path.jl --- test/s3path.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/s3path.jl b/test/s3path.jl index f98558f7..0675f019 100644 --- a/test/s3path.jl +++ b/test/s3path.jl @@ -434,7 +434,9 @@ function s3path_tests(config) @testset "Arrow <-> S3Path (de)serialization" begin ver = String('A':'Z') * String('0':'5') - paths = [S3Path("s3://$(bucket_name)/a"), S3Path("s3://$(bucket_name)/b?versionId=$ver")] + paths = [ + S3Path("s3://$(bucket_name)/a"), S3Path("s3://$(bucket_name)/b?versionId=$ver") + ] tbl = Arrow.Table(Arrow.tobuffer((; paths=paths))) @test all(tbl.paths .== paths) push!(paths, S3Path("s3://$(bucket_name)/c"; config=config)) From a21c61629645380530a7bd4cb3b3b314df1d4a88 Mon Sep 17 00:00:00 2001 From: Jarrett Revels Date: Wed, 2 Feb 2022 08:08:55 -0800 Subject: [PATCH 4/7] Update test/s3path.jl Co-authored-by: Curtis Vogt --- test/s3path.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/s3path.jl b/test/s3path.jl index 0675f019..6618a853 100644 --- a/test/s3path.jl +++ b/test/s3path.jl @@ -435,7 +435,8 @@ function s3path_tests(config) @testset "Arrow <-> S3Path (de)serialization" begin ver = String('A':'Z') * String('0':'5') paths = [ - S3Path("s3://$(bucket_name)/a"), S3Path("s3://$(bucket_name)/b?versionId=$ver") + S3Path("s3://$(bucket_name)/a"), + S3Path("s3://$(bucket_name)/b?versionId=$ver"), ] tbl = Arrow.Table(Arrow.tobuffer((; paths=paths))) @test all(tbl.paths .== paths) From 6d88739aa8f3828afd3aa281a84e0dd1ab445492 Mon Sep 17 00:00:00 2001 From: Jarrett Revels Date: Thu, 3 Feb 2022 11:05:25 -0800 Subject: [PATCH 5/7] bump patch version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index a09c2fe9..f0fe575a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "AWSS3" uuid = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" -version = "0.9.3" +version = "0.9.4" [deps] AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc" From dd0a99e9f6bab78cad97ff7df523840115b5265c Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Thu, 3 Feb 2022 15:21:39 -0600 Subject: [PATCH 6/7] Workaround formatter --- test/s3path.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/s3path.jl b/test/s3path.jl index 6618a853..b33711a6 100644 --- a/test/s3path.jl +++ b/test/s3path.jl @@ -437,6 +437,7 @@ function s3path_tests(config) paths = [ S3Path("s3://$(bucket_name)/a"), S3Path("s3://$(bucket_name)/b?versionId=$ver"), + # format trick: using this comment to force use of multiple lines ] tbl = Arrow.Table(Arrow.tobuffer((; paths=paths))) @test all(tbl.paths .== paths) From 28b141400e83cae363df46796b7c7e30edfa4f2c Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Thu, 3 Feb 2022 15:30:13 -0600 Subject: [PATCH 7/7] Refactor error --- src/s3path.jl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/s3path.jl b/src/s3path.jl index 8f7f89b2..dd74c93a 100644 --- a/src/s3path.jl +++ b/src/s3path.jl @@ -646,10 +646,8 @@ ArrowTypes.JuliaType(::Val{S3PATH_ARROW_NAME}, ::Any) = S3Path ArrowTypes.fromarrow(::Type{<:S3Path}, uri_string) = S3Path(uri_string) function ArrowTypes.toarrow(path::S3Path) - isnothing(path.config) || throw( - ArgumentError( - "`path.config` must be `nothing` to serialize `path::S3Path` to Arrow" - ), - ) + if !isnothing(path.config) + throw(ArgumentError("`S3Path` config must be `nothing` to serialize to Arrow")) + end return string(path) end