From fe1af7fed1067b7f0e7a0c22e0ad2859d54ff568 Mon Sep 17 00:00:00 2001 From: Alexander Barth Date: Wed, 30 Mar 2022 15:44:26 +0200 Subject: [PATCH] issue #95 --- src/cfvariable.jl | 14 +++++++------- src/dataset.jl | 2 +- src/multifile.jl | 37 ++++++++++++++++++++++++++++++++++++- src/types.jl | 8 ++++++++ test/test_multifile.jl | 22 ++++++++++++++++++++++ 5 files changed, 74 insertions(+), 9 deletions(-) diff --git a/src/cfvariable.jl b/src/cfvariable.jl index 12ce84fa..8e4bfa55 100644 --- a/src/cfvariable.jl +++ b/src/cfvariable.jl @@ -109,7 +109,6 @@ function defVar(ds::NCDataset,name,vtype::DataType,dimnames; kwargs...) varid = nc_def_var(ds.ncid,name,typeid,dimids) - if haskey(kw,:chunksizes) storage = :chunked chunksizes = kw[:chunksizes] @@ -148,6 +147,7 @@ function defVar(ds::NCDataset,name,vtype::DataType,dimnames; kwargs...) return ds[name] end + # data has the type e.g. Array{Union{Missing,Float64},3} function defVar(ds::NCDataset, name, @@ -485,14 +485,14 @@ end Return a tuple of strings with the dimension names of the variable `v`. """ -dimnames(v::CFVariable) = dimnames(v.var) +dimnames(v::Union{CFVariable,MFCFVariable}) = dimnames(v.var) """ dimsize(v::CFVariable) Get the size of a `CFVariable` as a named tuple of dimension → length. """ -function dimsize(v::CFVariable) +function dimsize(v::Union{CFVariable,MFCFVariable}) s = size(v) names = Symbol.(dimnames(v)) return NamedTuple{names}(s) @@ -500,7 +500,7 @@ end export dimsize -name(v::CFVariable) = name(v.var) +name(v::Union{CFVariable,MFCFVariable}) = name(v.var) chunking(v::CFVariable,storage,chunksize) = chunking(v.var,storage,chunksize) chunking(v::CFVariable) = chunking(v.var) @@ -746,9 +746,9 @@ Base.Array(v::Union{CFVariable{T,N},Variable{T,N}}) where {T,N} = v[ntuple(i -> Base.show(io::IO,v::CFVariable; indent="") = Base.show(io::IO,v.var; indent=indent) # necessary for IJulia if showing a variable from a closed file -Base.show(io::IO,::MIME"text/plain",v::Union{Variable,CFVariable}) = show(io,v) +Base.show(io::IO,::MIME"text/plain",v::Union{Variable,CFVariable,MFCFVariable}) = show(io,v) -Base.display(v::Union{Variable,CFVariable}) = show(stdout,v) +Base.display(v::Union{Variable,CFVariable,MFCFVariable}) = show(stdout,v) @@ -785,7 +785,7 @@ NCDatasets.load!(ncv,data,buffer,:,:,:) close(ds) ``` """ -@inline function load!(v::NCDatasets.CFVariable{T,N}, data, buffer, indices::Union{Integer, UnitRange, StepRange, Colon}...) where {T,N} +@inline function load!(v::Union{CFVariable{T,N},MFCFVariable{T,N}}, data, buffer, indices::Union{Integer, UnitRange, StepRange, Colon}...) where {T,N} load!(v.var,buffer,indices...) fmv = fill_and_missing_values(v) diff --git a/src/dataset.jl b/src/dataset.jl index 97daee72..8db2df80 100644 --- a/src/dataset.jl +++ b/src/dataset.jl @@ -177,7 +177,7 @@ function NCDataset(filename::AbstractString, end if share - @show "share" + @debug "share mode" ncmode = ncmode | NC_SHARE end diff --git a/src/multifile.jl b/src/multifile.jl index 69c0fb49..081ed2a0 100644 --- a/src/multifile.jl +++ b/src/multifile.jl @@ -146,6 +146,7 @@ Base.getindex(v::MFVariable,indexes::Union{Int,Colon,UnitRange{Int},StepRange{In Base.setindex!(v::MFVariable,data,indexes::Union{Int,Colon,UnitRange{Int},StepRange{Int,Int}}...) = setindex!(v.var,data,indexes...) Base.size(v::MFVariable) = size(v.var) +Base.size(v::MFCFVariable) = size(v.var) dimnames(v::MFVariable) = v.dimnames name(v::MFVariable) = v.varname @@ -178,4 +179,38 @@ function variable(mfds::MFDataset,varname::SymbolOrString) end -fillvalue(v::MFVariable{T}) where T = v.attrib["_FillValue"]::T + +function cfvariable(mfds::MFDataset,varname::SymbolOrString) + if mfds.aggdim == "" + # merge all variables + + # the latest dataset should be used if a variable name is present multiple times + for ds in reverse(mfds.ds) + if haskey(ds,varname) + return cfvariable(ds,varname) + end + end + else + # aggregated along a given dimension + cfvars = cfvariable.(mfds.ds,varname) + + dim = findfirst(dimnames(cfvars[1]) .== mfds.aggdim) + @debug "dim $dim" + + if (dim != nothing) + cfvar = CatArrays.CatArray(dim,cfvars...) + var = variable(mfds,varname) + + return MFCFVariable(cfvar,var,var.attrib, + dimnames(var),varname) + else + return cfvars[1] + end + end +end + + +fillvalue(v::Union{MFVariable{T},MFCFVariable{T}}) where T = v.attrib["_FillValue"]::T + +Base.getindex(v::MFCFVariable,ind...) = v.cfvar[ind...] +Base.setindex!(v::MFCFVariable,data,ind...) = v.cfvar[ind...] = data diff --git a/src/types.jl b/src/types.jl index 810c645c..4391f196 100644 --- a/src/types.jl +++ b/src/types.jl @@ -150,6 +150,14 @@ mutable struct MFVariable{T,N,M,TA,A} <: AbstractVariable{T,N} varname::String end +mutable struct MFCFVariable{T,N,M,TA,TV,A} <: AbstractVariable{T,N} + cfvar::CatArrays.CatArray{T,N,M,TA} + var::TV + attrib::MFAttributes{A} + dimnames::NTuple{N,String} + varname::String +end + mutable struct MFDimensions{T} <: AbstractDimensions where T <: AbstractDimensions as::Vector{T} aggdim::String diff --git a/test/test_multifile.jl b/test/test_multifile.jl index 2b71736c..cc254515 100644 --- a/test/test_multifile.jl +++ b/test/test_multifile.jl @@ -1,5 +1,6 @@ using Test using NCDatasets +using Dates function example_file(i,array, fname = tempname(); varname = "var") @@ -211,3 +212,24 @@ ds_merged = NCDataset(fname_merged) close(ds_merged) rm(fname_merged) nothing + + + +# multi-file with different time units +fnames = [tempname(), tempname()] +times = [DateTime(2000,1,1), DateTime(2000,1,2)] +time_units = ["days since 2000-01-01","seconds since 2000-01-01"] + +for i = 1:2 + ds = NCDataset(fnames[i],"c") + defVar(ds,"time",times[i:i],("time",),attrib = Dict( + "units" => time_units[i], + "scale_factor" => Float64(10*i), + "add_offset" => Float64(i), + )) + close(ds) +end + +ds = NCDataset(fnames,aggdim = "time") +@time ds["time"][:] == times +close(ds)