diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6eceeac..814d07a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: version: - - '1.3' + - '1.6' - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia. - 'nightly' os: diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 8cff839..0000000 --- a/.travis.yml +++ /dev/null @@ -1,10 +0,0 @@ -language: julia -os: - - linux -julia: - - 1.3 - - 1.6 - - nightly -matrix: - allow_failures: - - julia: nightly diff --git a/Project.toml b/Project.toml index 491f3b0..fe901a7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,23 +1,26 @@ name = "LazyStack" uuid = "1fad7336-0346-5a1a-a56f-a06ba010965b" authors = ["Michael Abbott"] -version = "0.0.8" +version = "0.1.0" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f" -OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +# NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f" +# OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" [compat] -ChainRulesCore = "0.10.9, 1" -NamedDims = "0.2.16, 0.3, 1.0" -OffsetArrays = "1" -julia = "1.3" +ChainRulesCore = "1" +Compat = "3.46, 4.2" +# NamedDims = "0.2.16, 0.3, 1.0" # try to remove +# OffsetArrays = "1" # try to remove +julia = "1.6" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Test", "Zygote"] +test = ["Test", "OffsetArrays", "Zygote"] diff --git a/README.md b/README.md index 88d562c..4bb7425 100644 --- a/README.md +++ b/README.md @@ -1,67 +1,72 @@ # LazyStack.jl -[![Travis CI](https://travis-ci.org/mcabbott/LazyStack.jl.svg?branch=master)](https://travis-ci.org/mcabbott/LazyStack.jl) [![Github CI](https://github.com/mcabbott/LazyStack.jl/workflows/CI/badge.svg)](https://github.com/mcabbott/LazyStack.jl/actions?query=workflow%3ACI+branch%3Amaster) -This package exports one function, `stack`, for turning a list of arrays +This package exports one function, `lazystack`, for turning a list of arrays into one `AbstractArray`. Given several arrays with the same `eltype`, or an array of such arrays, it returns a lazy `Stacked{T,N}` view of these: ```julia -stack([zeros(2,2), ones(2,2)]) # isa Stacked{Float64, 3, <:Vector{<:Matrix}} -stack([1,2,3], 4:6) # isa Stacked{Int, 2, <:Tuple{<:Vector, <:UnitRange}} +julia> lazystack([1:2, 3:4, 5:6]) +2×3 lazystack(::Vector{UnitRange{Int64}}) with eltype Int64: + 1 3 5 + 2 4 6 + +julia> lazystack([pi^ℯ], [ℯ^pi]) +1×2 lazystack(::Tuple{Vector{Float64}, Vector{Float64}}) with eltype Float64: + 22.4592 23.1407 ``` -Given a generator, it instead iterates through the elements and writes into a new array. -Given a function and then some arrays, it behaves like `map(f, A, B)` but immediately writes -into a new array: +Before v0.1 this function used to be called `stack`, but that name is now exported by Base (from Julia 1.9). +Like this package, `Base.stack` makes an array with `size(result) = (size(inner)..., size(outer)...)`. +It always returns a new dense array, not a lazy container. +And instead of two vectors (in the above example) it would want a tuple `stack(([pi^ℯ], [ℯ^pi]))`. -```julia -stack([i,2i] for i in 1:5) # isa Matrix{Int} # size(ans) == (2, 5) -stack(*, eachcol(ones(2,4)), 1:4) # == Matrix(stack(map(*, eachcol(...), 1:4))) -``` - -The same `stack_iter` method is also used for any list of arrays of heterogeneous element type, -and for arrays of tuples. Notice that like `map(identity, Any[1, 1.0, 5im])`, this promotes using -`promote_typejoin`, to `Number` here, rather than to `Complex{Float64}`: - -```julia -stack([1,2], [3.0, 4.0], [5im, 6im]) # isa Matrix{Number} # size(ans) == (2, 3) -stack([(i,2.0,3//j) for i=1:4, j=1:5])# isa Array{Real, 3} # size(ans) == (3, 4, 5) -``` +Generators such as `lazystack([i,2i] for i in 1:5)` and arrays of mixed eltype like `lazystack([1,2], [3.0, 4.0], [5im, 6im])` used to be be handled here, making a dense array, but are now simply passed through to `Base.stack`. -The slices must all have the same `size`, but they (and the container) -can have any number of dimensions. `stack` always places the slice dimensions first. -There are no options. +When the individual slices aren't backed by an `Array`, as for instance with `CuArray`s on a GPU, then again `Base.stack` is called. +This should make one big `CuArray`, since scalar indexing of individual slices won't work well. ### Ragged stack -There is also a version which does not demand that slices have equal `size` (or equal `ndims`), -which always returns a new `Array`. You can control the position of slices `using OffsetArrays`: +There is also a version which does not demand that slices have equal `size` (or equal `ndims`). +For now this is not lazy: ```julia -rstack([1:n for n in 1:10]) # upper triangular Matrix{Int} -rstack(OffsetArray(fill(n,4), rand(-2:2)) for n in 1:10; fill=NaN) +julia> raggedstack([10:10+n for n in 1:3]) +4×3 Matrix{Int64}: + 10 10 10 + 11 11 11 + 0 12 12 + 0 0 13 + +julia> using OffsetArrays + +julia> raggedstack(OffsetArray(fill(1.0n, 3), rand(-1:1)) for n in 1:10; fill=NaN) +5×10 OffsetArray(::Matrix{Float64}, 0:4, 1:10) with eltype Float64 with indices 0:4×1:10: + NaN 2.0 NaN 4.0 NaN 6.0 7.0 NaN 9.0 NaN + 1.0 2.0 3.0 4.0 5.0 6.0 7.0 NaN 9.0 10.0 + 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 + 1.0 NaN 3.0 NaN 5.0 NaN NaN 8.0 NaN 10.0 + NaN NaN NaN NaN NaN NaN NaN 8.0 NaN NaN ``` ### Other packages -This one plays well with [OffsetArrays.jl](https://github.com/JuliaArrays/OffsetArrays.jl), -[NamedDims.jl](https://github.com/invenia/NamedDims.jl), and -[Zygote.jl](https://github.com/FluxML/Zygote.jl). +This one plays well with [OffsetArrays.jl](https://github.com/JuliaArrays/OffsetArrays.jl), and [ChainRules.jl](https://github.com/JuliaDiff/ChainRules.jl)-compatible AD such as [Zygote.jl](https://github.com/FluxML/Zygote.jl). It's also used internally by [TensorCast.jl](https://github.com/mcabbott/TensorCast.jl). Besides which, there are several other ways to achieve similar things: * For an array of arrays, you can also use [`JuliennedArrays.Align`](https://bramtayl.github.io/JuliennedArrays.jl/latest/#JuliennedArrays.Align). This requires (or enables) you to specify which dimensions of the output belong to the sub-arrays, instead of writing `PermutedDimsArray(stack(...), ...)`. -* There is also [`RecursiveArrayTools.VectorOfArray`](https://github.com/JuliaDiffEq/RecursiveArrayTools.jl#vectorofarray) which as its name hints only allows a one-dimensional container. Linear indexing retreives a slice, not an element, which is sometimes surprising. +* There is also [`RecursiveArrayTools.VectorOfArray`](https://github.com/JuliaDiffEq/RecursiveArrayTools.jl#vectorofarray) which as its name hints only allows a one-dimensional container. (And unlike the package name, nothing is recursive.) Linear indexing retreives a slice, not an element, which is sometimes surprising. * For a tuple of arrays, [`LazyArrays.Hcat`](https://github.com/JuliaArrays/LazyArrays.jl#concatenation) is at present faster to index than `stack`, but doesn't allow arbitrary dimensions. -* For a generator of arrays, the built-in `reduce(hcat,...)` may work, but it slow compared to `stack`: see [test/speed.jl](test/speed.jl) for some examples. And a few more: * When writing this I missed [`SplitApplyCombine.combinedimsview`](https://github.com/JuliaData/SplitApplyCombine.jl#combinedimsviewarray), which is very similar to `stack`, but doesn't handle tuples. * Newer than this package is [StackViews.jl](https://github.com/JuliaArrays/StackViews.jl) handles both, with `StackView(A,B,dims=4) == StackView([A,B],4)` creating a 4th dimension; the container is always one-dimensional. * [`Flux.stack`](https://fluxml.ai/Flux.jl/stable/utilities/#Flux.stack) similarly takes a dimension, but eagerly creates an `Array`. +* Finally, [CatViews.jl](https://github.com/ahwillia/CatViews.jl) offers a lazy `vcat`. But the package is old and I think not so fast. The lazy inverse: @@ -71,10 +76,10 @@ The lazy inverse: * As does [`PackedVectorsOfVectors`](https://github.com/synchronoustechnologies/PackedVectorsOfVectors.jl), although only 1+1 dimensions. Also has an eager `pack` method which turns a vector of vectors into view of a single larger matrix. -* [`Base.eachslice`](https://docs.julialang.org/en/v1/base/arrays/#Base.eachslice) also views one large array as many slices. This is a generator, but [JuliaLang#32310](https://github.com/JuliaLang/julia/pull/32310) should upgrade it to a multi-dimensional container indexable container. +* [`Base.eachslice`](https://docs.julialang.org/en/v1/base/arrays/#Base.eachslice) also views one large array as many slices. This was a generator, but [JuliaLang#32310](https://github.com/JuliaLang/julia/pull/32310) upgrades it to a multi-dimensional indexable container, in Julia 1.9. Eager: * After writing this I learned of [JuliaLang#31644](https://github.com/JuliaLang/julia/pull/31644) which extends `reduce(hcat,...)` to work on generators. -* Later, [JuliaLang#31644](https://github.com/JuliaLang/julia/pull/43334) proposes to add the eager `stack_iter` method of this package to Base. +* Later, [JuliaLang#43334](https://github.com/JuliaLang/julia/pull/43334) has added a better version of this package's `stack_iter` method to Base. diff --git a/src/LazyStack.jl b/src/LazyStack.jl index 1b72f22..c114bf5 100644 --- a/src/LazyStack.jl +++ b/src/LazyStack.jl @@ -1,27 +1,37 @@ module LazyStack -export stack, rstack +export lazystack + +include("ragged.jl") +export raggedstack + +using Compat + +@deprecate stack lazystack false # don't export it +@deprecate stack_iter Compat.stack false # don't export it + +@deprecate rstack raggedstack + +include("flatten.jl") # probably not permanent, just prototyping here! + #===== Tuples =====# -ndims(A) = Base.ndims(A) -ndims(::Tuple) = 1 -ndims(::NamedTuple) = 1 +_ndims(A) = Base.ndims(A) +_ndims(::Tuple) = 1 +_ndims(::NamedTuple) = 1 -axes(A) = Base.axes(A) -axes(A, d) = Base.axes(A, d) -if VERSION < v"1.1" # because, on Julia 1.0, axes((1,2)) === Base.OneTo(2) - axes(t::Tuple) = tuple(Base.axes(t)) -end -axes(nt::NamedTuple) = tuple(Base.OneTo(length(nt))) +_axes(A) = Base.axes(A) +_axes(A, d) = Base.axes(A, d) +_axes(nt::NamedTuple) = tuple(Base.OneTo(length(nt))) -size(A) = Base.size(A) -size(t::Tuple) = tuple(length(t)) -size(t::NamedTuple) = tuple(length(t)) +_size(A) = Base.size(A) +_size(t::Tuple) = tuple(length(t)) +_size(t::NamedTuple) = tuple(length(t)) -similar_vector(x::AbstractArray, n::Int) = similar(x, n::Int) -similar_vector(x::Tuple, n::Int) = Vector{eltype(x)}(undef, n::Int) -similar_vector(x::NamedTuple, n::Int) = Vector{eltype(x)}(undef, n::Int) +# similar_vector(x::AbstractArray, n::Int) = similar(x, n::Int) +# similar_vector(x::Tuple, n::Int) = Vector{eltype(x)}(undef, n::Int) +# similar_vector(x::NamedTuple, n::Int) = Vector{eltype(x)}(undef, n::Int) # eltype(x::Tuple) = Base.promote_type(x...) # to match choice below # eltype(x::NamedTuple) = Base.promote_type(x...) @@ -29,52 +39,69 @@ similar_vector(x::NamedTuple, n::Int) = Vector{eltype(x)}(undef, n::Int) #===== Slices =====# """ - stack([A, B, C]) - stack(A, B, C) + lazystack([A, B, C]) + lazystack(A, B, C) == lazystack((A, B, C)) Creates a very simple lazy `::Stacked` view of an array of arrays, or a tuple of arrays, provided they have a consistent `eltype`. -The dimensions of the inner arrays come first: +The dimensions of the inner arrays come first. + +# Examples ``` -julia> stack([1,2,3], [10,20,30]) -3×2 LazyStack.Stacked{Int64,2,Tuple{Array{Int64,1},Array{Int64,1}}}: +julia> lazystack([1,2,3], [10,20,30]) +3×2 lazystack(::Tuple{Vector{Int64}, Vector{Int64}}) with eltype Int64: 1 10 2 20 3 30 julia> vecs = [[1,2,3] .+ j*im for j=2:2:8]; -julia> A = stack(vecs) -3×4 Stacked{Complex{Int64},2,Array{Array{Complex{Int64},1},1}}: +julia> A = lazystack(vecs) +3×4 lazystack(::Vector{Vector{Complex{Int64}}}) with eltype Complex{Int64}: 1+2im 1+4im 1+6im 1+8im 2+2im 2+4im 2+6im 2+8im 3+2im 3+4im 3+6im 3+8im ``` -A few `Base` functions will immediately look inside: + +When the slices aren't backed by `Array` (or a few other `Base` types) +then it reverts to eager `Base.stack`: + +``` +julia> using JLArrays # fake GPUArray + +julia> lazystack(jl([1, 2, 3f0]), jl([4, 5, 6f0])) +3×2 JLArray{Float32, 2}: + 1.0 4.0 + 2.0 5.0 + 3.0 6.0 +``` + +A few `Base` functions will immediately look inside the container: + ``` julia> eachcol(A) |> typeof -Array{Array{Complex{Int64},1},1} +Vector{Vector{Complex{Int64}}} (alias for Array{Array{Complex{Int64}, 1}, 1}) julia> view(A, :,1) -3-element Array{Complex{Int64},1}: +3-element Vector{Complex{Int64}}: 1 + 2im 2 + 2im 3 + 2im ``` """ -stack(xs::AbstractArray{IT,ON}) where {IT<:AbstractArray{T,IN}} where {T,IN,ON} = +lazystack(xs::AbstractArray{IT,ON}) where {IT<:AbstractArray{T,IN}} where {T,IN,ON} = stack_slices(xs, Val(T), Val(IN+ON)) -stack(xs::Tuple{Vararg{AbstractArray{T,IN}}}) where {T,IN} = +lazystack(xs::Tuple{Vararg{AbstractArray{T,IN}}}) where {T,IN} = stack_slices(xs, Val(T), Val(IN+1)) -stack(xs::AbstractArray{T}...) where {T} = stack(xs) +lazystack(xs::AbstractArray{T}...) where {T} = lazystack(xs) function stack_slices(xs::AT, ::Val{T}, ::Val{N}) where {T,N,AT} length(xs) >= 1 || throw(ArgumentError("stacking an empty collection is not allowed")) - storage_type(first(xs)) <: Union{Array, AbstractRange} || return stack_iter(xs) - s = size(first(xs)) + storage_type(first(xs)) <: Union{Array, AbstractRange} || return Compat.stack(xs) + ax = _axes(first(xs)) for x in xs - size(x) == s || throw(DimensionMismatch( - "slices being stacked must share a common size. Expected $s, got $(size(x))")) + _axes(x) == ax || throw(DimensionMismatch( + "slices being stacked must share a common size. Expected $ax, got $(_axes(x))")) end Stacked{T, N, AT}(xs) end @@ -83,17 +110,17 @@ struct Stacked{T,N,AT} <: AbstractArray{T,N} slices::AT end -Base.size(x::Stacked) = (size(first(x.slices))..., size(x.slices)...) -Base.size(x::Stacked{T,N,<:Tuple}) where {T,N} = (size(first(x.slices))..., length(x.slices)) +Base.size(x::Stacked) = (_size(first(x.slices))..., _size(x.slices)...) +Base.size(x::Stacked{T,N,<:Tuple}) where {T,N} = (_size(first(x.slices))..., length(x.slices)) -Base.axes(x::Stacked) = (axes(first(x.slices))..., axes(x.slices)...) +Base.axes(x::Stacked) = (_axes(first(x.slices))..., _axes(x.slices)...) Base.parent(x::Stacked) = x.slices -outer_ndims(x::Stacked) = ndims(x.slices) +outer_ndims(x::Stacked) = _ndims(x.slices) outer_ndims(x::Stacked{T,N,<:Tuple}) where {T,N} = 1 -inner_ndims(x::Stacked) = ndims(x) - outer_ndims(x) +inner_ndims(x::Stacked) = _ndims(x) - outer_ndims(x) @inline function Base.getindex(x::Stacked{T,N}, inds::Vararg{Integer,N}) where {T,N} @boundscheck checkbounds(x, inds...) @@ -104,11 +131,9 @@ end Base.unaliascopy(x::Stacked) = stack(map(Base.unaliascopy, x.slices)) -if VERSION >= v"1.1" - Base.eachcol(x::Stacked{T,2,<:AbstractArray{<:AbstractArray{T,1}}}) where {T} = x.slices -end +Base.eachcol(x::Stacked{T,2,<:AbstractArray{<:AbstractArray{T,1}}}) where {T} = x.slices -Base.collect(x::Stacked{T,2,<:AbstractArray{<:AbstractArray{T,1}}}) where {T} = reduce(hcat, x.slices) +Base.collect(x::Stacked) = Compat.stack(x.slices) """ LazyStack.ensure_dense(A) @@ -122,15 +147,15 @@ ensure_dense(x::AbstractArray) = x Base.view(x::Stacked{T,2,<:AbstractArray{<:AbstractArray{T,1}}}, ::Colon, i::Int) where {T} = x.slices[i] function Base.push!(x::Stacked{T,N,<:AbstractVector}, y::AbstractArray) where {T,N} - s = size(first(x.slices)) - isempty(y) || size(y) == s || throw(DimensionMismatch( - "slices being stacked must share a common size. Expected $s, got $(size(y))")) + s = _size(first(x.slices)) + isempty(y) || _size(y) == s || throw(DimensionMismatch( + "slices being stacked must share a common size. Expected $s, got $(_size(y))")) push!(x.slices, y) x end function Base.showarg(io::IO, x::Stacked, toplevel) - print(io, "stack(") + print(io, "lazystack(") Base.showarg(io, parent(x), false) print(io, ')') toplevel && print(io, " with eltype ", eltype(x)) @@ -138,221 +163,123 @@ end #===== Iteration =====# -ITERS = [:Flatten, :Drop, :Filter] -for iter in ITERS - @eval ndims(::Iterators.$iter) = 1 -end -ndims(gen::Base.Generator) = ndims(gen.iter) -ndims(zed::Iterators.Zip) = maximum(ndims, zed.is) -if VERSION < v"1.1" - ndims(zed::Iterators.Zip2) = max(ndims(zed.a), ndims(zed.b)) -end - -similar_vector(x, n::Int) = throw(ArgumentError()) +# ITERS = [:Flatten, :Drop, :Filter] +# for iter in ITERS +# @eval _ndims(::Iterators.$iter) = 1 +# end +# _ndims(gen::Base.Generator) = _ndims(gen.iter) +# _ndims(zed::Iterators.Zip) = maximum(ndims, zed.is) +# +# similar_vector(x, n::Int) = throw(ArgumentError()) """ - stack(::Generator) - stack(::Array{T}, ::Array{S}, ...) - -This constructs a new array, calling `stack_iter`. -Can handle inconsistent eltypes, but not inconsistent sizes. - -``` -julia> stack([i i; i 10i] for i in 1:2:3) -2×2×2 Array{Int64,3}: -[:, :, 1] = - 1 1 - 1 10 - -[:, :, 2] = - 3 3 - 3 30 - -julia> stack(c<9 ? [c,2c] : [9.99, 10] for c in 1:10) -2×10 Array{Real,2}: - 1 2 3 4 5 6 7 8 9.99 9.99 - 2 4 6 8 10 12 14 16 10.0 10.0 + lazystack(::Generator) + lazystack(::Array{T}, ::Array{S}, ...) -julia> stack(1:3, ones(3), zeros(3) .+ im) -3×3 Array{Number,2}: - 1 1.0 0.0+1.0im - 2 1.0 0.0+1.0im - 3 1.0 0.0+1.0im -``` +These used to call this package's own `stack_iter`, +but now they just call `Base.stack`. """ -stack(gen::Base.Generator) = stack_iter(gen) -for iter in ITERS - @eval stack(gen::Iterators.$iter) = stack_iter(gen) -end -stack(arr::AbstractArray{Any}) = stack_iter(arr) # e.g. from arr=[]; push!(arr, rand(3)); ... - -stack(arr::AbstractArray{<:AbstractArray}) = stack_iter(arr) -stack(arr::AbstractArray{<:Tuple}) = stack_iter(arr) -stack(arr::AbstractArray{<:NamedTuple}) = stack_iter(arr) - -stack(tup::AbstractArray...) = stack_iter(tup) -stack(tup::Tuple...) = stack_iter(tup) -stack(tup::NamedTuple...) = stack_iter(tup) -# stack(tup::Tuple{Vararg{AbstractArray}}) = stack_iter(tup) - -function stack_iter(itr) - if itr isa Tuple || itr isa Base.Generator{<:Tuple} || ndims(itr) == 1 - outsize = tuple(:) - else - Base.haslength(itr) || return stack_iter(collect(itr)) - outsize = size(itr) - end - - w, val = vstack_plus(itr) - - z = reshape(w, size(val)..., outsize...) - - rewrap_like(z, val) +lazystack(gen::Base.Generator) = Compat.stack(gen) +for iter in [:Flatten, :Drop, :Filter] + @eval lazystack(gen::Iterators.$iter) = Compat.stack(gen) end +lazystack(arr::AbstractArray{Any}) = Compat.stack(arr) # e.g. from arr=[]; push!(arr, rand(3)); ... -vstack(itr) = first(vstack_plus(itr)) +lazystack(arr::AbstractArray{<:AbstractArray}) = Compat.stack(arr) +lazystack(arr::AbstractArray{<:Tuple}) = Compat.stack(arr) +lazystack(arr::AbstractArray{<:NamedTuple}) = Compat.stack(arr) -function vstack_plus(itr) - zed = iterate(itr) - zed === nothing && throw(ArgumentError("stacking an empty collection is not allowed")) - val, state = zed - val isa Union{AbstractArray, Tuple, NamedTuple} || return collect(itr), val +lazystack(tup::AbstractArray...) = Compat.stack(tup) +lazystack(tup::Tuple...) = Compat.stack(tup) +lazystack(tup::NamedTuple...) = Compat.stack(tup) +# lazystack(tup::Tuple{Vararg{AbstractArray}}) = Compat.stack(tup) - s = size(val) - n = Base.haslength(itr) ? prod(s)*length(itr) : nothing - - v = similar_vector(no_wraps(val), something(n, prod(s))) - copyto!(v, 1, no_wraps(val), 1, prod(s)) - - w = stack_rest(v, 0, n, s, itr, state)#::Vector - w, val -end - -function stack_rest(v, i, n, s, itr, state) - while true - zed = iterate(itr, state) - zed === nothing && return v - val, state = zed - s == size(val) || throw(DimensionMismatch( - "slices being stacked must share a common size. Expected $s, got $(size(val))")) - - i += 1 - if eltype(val) <: eltype(v) - if n isa Int - copyto!(v, i*prod(s)+1, no_wraps(val), 1, prod(s)) - else - append!(v, vec(no_wraps(val))) - end - else - - T′ = Base.promote_typejoin(eltype(v), eltype(val)) - # T′ = Base.promote_type(eltype(v), eltype(val)) # which do I want? - v′ = similar(v, T′) - copyto!(v′, v) - - if n isa Int - copyto!(v′, i*prod(s)+1, no_wraps(val), 1, prod(s)) - else - append!(v′, vec(no_wraps(val))) - end - - return stack_rest(v′, i, n, s, itr, state) - end - - end -end """ - stack(fun, iters...) - stack(eachcol(A), eachslice(B, dims=3)) do a, b + lazystack(fun, iters...) + lazystack(eachcol(A), eachslice(B, dims=3)) do a, b f(a,b) end If the first argument is a function, then this is mapped over the other argumenst. -The result should be `== stack(map(fun, iters...))`, but done using `stack_iter` -to return a dense `Array` instead of a `Stacked` object. +Always uses `Base.stack`. """ -stack(fun::Function, iter) = stack(fun(arg) for arg in iter) -function stack(fun::Function, iters...) - if all(Base.haslength, iters) - sz = size(first(iters)) - all(a -> size(a)==sz, iters) || throw(DimensionMismatch( - "sizes of all argumens must match, in stack(f, A, B, C). " * - "This is slightly stricter than map(f, A, B, C), for now.")) - end - stack(fun(args...) for args in zip(iters...)) -end - -#===== Offset =====# - -using OffsetArrays - -no_wraps(a) = a -no_wraps(a::OffsetArray) = parent(a) - -rewrap_like(A, a) = A -function rewrap_like(A, a::OffsetArray) - B = rewrap_like(A, parent(a)) - OffsetArray(B, axes(a)..., axes(A, ndims(A))) -end - -#===== NamedDims =====# - -using NamedDims - -ensure_named(a::AbstractArray, L::Tuple) = NamedDimsArray(a, L) -ensure_named(a::NamedDimsArray, L::Tuple) = refine_names(a, L) - -# array of arrays -stack(xs::NamedDimsArray{<:Any,<:AbstractArray}) = - ensure_named(stack(parent(xs)), getnames(xs)) -stack(x::AT) where {AT <: AbstractArray{<:NamedDimsArray{L,T,IN},ON}} where {T,IN,ON,L} = - ensure_named(Stacked{T, IN+ON, AT}(x), getnames(x)) - -getnames(xs::AbstractArray{<:AbstractArray}) = - (dimnames(eltype(xs))..., dimnames(xs)...) - -# tuple of arrays -stack(x::AT) where {AT <: Tuple{Vararg{NamedDimsArray{L,T,IN}}}} where {T,IN,L} = - ensure_named(stack(map(parent, x)), getnames(x)) - -getnames(xs::Tuple{Vararg{<:NamedDimsArray}}) = - (dimnames(first(xs))..., :_) - -function rewrap_like(A, a::NamedDimsArray{L}) where {L} - B = rewrap_like(A, parent(a)) - ensure_named(B, (L..., ntuple(_ -> :_, ndims(A) - ndims(a))...)) -end - -no_wraps(a::NamedDimsArray) = no_wraps(parent(a)) - -""" - stack(name, things...) - -If you give one `name::Symbol` before the pieces to be stacked, -this will be the name of the last dimension of the resulting `NamedDimsArray`. -(Names attached to slices, and to containers, should also be preserved.) -""" -function LazyStack.stack(s::Symbol, args...) - data = stack(args...) - name_last = ntuple(d -> d==ndims(data) ? s : :_, ndims(data)) - ensure_named(data, name_last) -end +lazystack(fun::Function, iter) = Compat.stack(fun, iter) +lazystack(fun::Function, iters...) = Compat.stack(fun, iters...) + +# #===== Offset =====# +# +# using OffsetArrays +# +# no_offsets(a) = a +# no_offsets(a::OffsetArray) = parent(a) +# +# rewrap_like(A, a) = A +# function rewrap_like(A, a::OffsetArray) +# B = rewrap_like(A, parent(a)) +# OffsetArray(B, _axes(a)..., _axes(A, _ndims(A))) +# end +# +# no_wraps_or_offsets(a) = no_offsets(no_wraps(a)) +# +# #===== NamedDims =====# +# +# using NamedDims +# +# ensure_named(a::AbstractArray, L::Tuple) = NamedDimsArray(a, L) +# ensure_named(a::NamedDimsArray, L::Tuple) = refine_names(a, L) +# +# # array of arrays +# stack(xs::NamedDimsArray{<:Any,<:AbstractArray}) = +# ensure_named(stack(parent(xs)), getnames(xs)) +# stack(x::AT) where {AT <: AbstractArray{<:NamedDimsArray{L,T,IN},ON}} where {T,IN,ON,L} = +# ensure_named(Stacked{T, IN+ON, AT}(x), getnames(x)) +# +# getnames(xs::AbstractArray{<:AbstractArray}) = +# (dimnames(eltype(xs))..., dimnames(xs)...) +# +# # tuple of arrays +# stack(x::AT) where {AT <: Tuple{Vararg{NamedDimsArray{L,T,IN}}}} where {T,IN,L} = +# ensure_named(stack(map(parent, x)), getnames(x)) +# +# getnames(xs::Tuple{Vararg{<:NamedDimsArray}}) = +# (dimnames(first(xs))..., :_) +# +# function rewrap_like(A, a::NamedDimsArray{L}) where {L} +# B = rewrap_like(A, parent(a)) +# ensure_named(B, (L..., ntuple(_ -> :_, _ndims(A) - _ndims(a))...)) +# end +# +# no_wraps(a) = a +# no_wraps(a::NamedDimsArray) = no_wraps(parent(a)) +# +# """ +# stack(name, things...) +# +# If you give one `name::Symbol` before the pieces to be stacked, +# this will be the name of the last dimension of the resulting `NamedDimsArray`. +# (Names attached to slices, and to containers, should also be preserved.) +# """ +# function LazyStack.stack(s::Symbol, args...) +# data = stack(args...) +# name_last = ntuple(d -> d==_ndims(data) ? s : :_, _ndims(data)) +# ensure_named(data, name_last) +# end #===== Gradients =====# using ChainRulesCore: ChainRulesCore, rrule, NoTangent -function ChainRulesCore.rrule(::typeof(stack), vec::AbstractArray{<:AbstractArray{<:Any,IN}}) where {IN} - stack(vec), Δ -> (NoTangent(), [view(Δ, ntuple(_->(:),IN)..., Tuple(I)...) for I in eachindex(vec)],) +function ChainRulesCore.rrule(::typeof(lazystack), vec::AbstractArray{<:AbstractArray{<:Any,IN}}) where {IN} + lazystack(vec), Δ -> (NoTangent(), [view(Δ, ntuple(_->(:),IN)..., Tuple(I)...) for I in eachindex(vec)],) end -function ChainRulesCore.rrule(::typeof(stack), tup::Tuple{Vararg{<:AbstractArray{<:Any,IN}}}) where {IN} - stack(tup), Δ -> (NoTangent(), ntuple(i -> view(Δ, ntuple(_->(:),IN)..., i), length(tup)),) +function ChainRulesCore.rrule(::typeof(lazystack), tup::Tuple{Vararg{<:AbstractArray{<:Any,IN}}}) where {IN} + lazystack(tup), Δ -> (NoTangent(), ntuple(i -> view(Δ, ntuple(_->(:),IN)..., i), length(tup)),) end -function ChainRulesCore.rrule(::typeof(stack), gen::Base.Generator) - stack(gen), Δ -> error("not yet!") +function ChainRulesCore.rrule(::typeof(lazystack), gen::Base.Generator) + lazystack(gen), Δ -> error("not yet!") end #===== CuArrays =====# @@ -363,100 +290,4 @@ function storage_type(x::AbstractArray) typeof(x) === typeof(p) ? typeof(x) : storage_type(p) end -#===== Ragged =====# - -""" - rstack(arrays; fill=0) - -Ragged `stack`, which allows slices of varying size, and fills the gaps with zero -or the given `fill`. Always returns an `Array`. - -``` -julia> rstack(1:n for n in 1:5) -5×5 Array{Int64,2}: - 1 1 1 1 1 - 0 2 2 2 2 - 0 0 3 3 3 - 0 0 0 4 4 - 0 0 0 0 5 - -julia> rstack([[1,2,3], [10,20.0]], fill=missing) -3×2 Array{Union{Missing, Float64},2}: - 1.0 10.0 - 2.0 20.0 - 3.0 missing - -julia> using OffsetArrays - -julia> rstack(1:3, OffsetArray([2.0,2.1,2.2], -1), OffsetArray([3.2,3.3,3.4], +1)) -5×3 OffsetArray(::Array{Real,2}, 0:4, 1:3) with eltype Real with indices 0:4×1:3: - 0 2.0 0 - 1 2.1 0 - 2 2.2 3.2 - 3 0 3.3 - 0 0 3.4 -``` - -""" -rstack(x::AbstractArray, ys::AbstractArray...; kw...) = rstack((x, ys...); kw...) -rstack(g::Base.Generator; kw...) = rstack(collect(g); kw...) -rstack(f::Function, ABC...; kw...) = rstack(map(f, ABC...); kw...) -rstack(list::AbstractArray{<:AbstractArray}; fill=zero(eltype(first(list)))) = rstack_iter(list; fill=fill) -rstack(list::Tuple{Vararg{<:AbstractArray}}; fill=zero(eltype(first(list)))) = rstack_iter(list; fill=fill) - -function rstack_iter(list; fill) - T = mapreduce(eltype, Base.promote_typejoin, list, init=typeof(fill)) - # T = mapreduce(eltype, Base.promote_type, list, init=typeof(fill)) - N = maximum(ndims, list) - ax = ntuple(N) do d - hi = maximum(x -> last(axes(x,d)), list) - if all(x -> axes(x,d) isa Base.OneTo, list) - Base.OneTo(hi) - else - lo = minimum(x -> first(axes(x,d)), list) - lo:hi - end - end - arr = Array{T}(undef, map(length, ax)..., size(list)...) - fill!(arr, fill) - out = if ax isa Tuple{Vararg{Base.OneTo}} - arr - else - OffsetArray(arr, (ax..., axes(list)...)) - end - z = rstack_copyto!(out, list, Val(N)) - - rewrap_names(z, first(list)) # now I want to separate names & offsets again! -end - -function rstack_copyto!(out, list, ::Val{N}) where {N} - for i in tupleindices(list) - item = list[i...] - o = ntuple(_->1, N - ndims(item)) - out[CartesianIndices(axes(item)), o..., i...] .= item - - # https://github.com/JuliaArrays/OffsetArrays.jl/issues/100 - # view(out, axes(item)..., o..., i...) .= item - - # for I in CartesianIndices(item) - # out[Tuple(I)..., o..., i...] = item[I] - # end - end - out -end - -tupleindices(t::Tuple) = ((i,) for i in 1:length(t)) -tupleindices(A::AbstractArray) = (Tuple(I) for I in CartesianIndices(A)) - -rewrap_names(A, a) = A -function rewrap_names(A, a::NamedDimsArray{L}) where {L} - B = rewrap_names(A, parent(a)) - ensure_named(B, (L..., ntuple(_ -> :_, ndims(A) - ndims(a))...)) -end -function rstack(s::Symbol, args...; kw...) - data = rstack(args...; kw...) - name_last = ntuple(d -> d==ndims(data) ? s : :_, ndims(data)) - ensure_named(data, name_last) -end - end # module diff --git a/src/flatten.jl b/src/flatten.jl new file mode 100644 index 0000000..87da2be --- /dev/null +++ b/src/flatten.jl @@ -0,0 +1,300 @@ +export flatten +using Base: IteratorSize, HasShape, HasLength, @default_eltype, haslength + +""" + flatten([T], iter) + +Given an iterator of iterators, returns a `Vector{T}` containing all of their elements. +Should be equal to `collect(Iterators.flatten(iter))`, but faster. + +Acting on a vector of vectors, this is equal to `vcat(iter...)` or `reduce(vcat, iter)`, +and to `vec(stack(iter))` when this is allowed, i.e. when `allequal(length.(iter))`. + +# Examples +``` +julia> v = flatten((1:2, 8:9)) +4-element Vector{Int64}: + 1 + 2 + 8 + 9 + +julia> v == vcat(1:2, 8:9) == reduce(vcat, [1:2, 8:9]) == vec(stack((1:2, 8:9))) +true + +julia> flatten(Complex{Float32}, [1:1, 9:10]) # specify eltype +3-element Vector{ComplexF32}: + 1.0f0 + 0.0f0im + 9.0f0 + 0.0f0im + 10.0f0 + 0.0f0im + +julia> flatten(([1 2], [5.0], (x = false,))) # ignores shape, promotes eltype +4-element Vector{Float64}: + 1.0 + 2.0 + 5.0 + 0.0 + +julia> collect(Iterators.flatten(([1 2], [5.0], (x = false,)))) # wider eltype +4-element Vector{Real}: + 1 + 2 + 5.0 + false + +julia> flatten(42) # as numbers are iterable +1-element Vector{Int64}: + 42 + +julia> flatten(Vector{Int}[]), flatten(()) # empty case, with and without known eltype +(Int64[], Union{}[]) +``` +""" +function flatten end + +""" + flatten([T], f, args...) + +Equivalent to `flatten(map(f, args...))`, but without allocating the result of `map`. + +``` +julia> flatten(x -> (x, x/2), 5:6) +4-element Vector{Real}: + 5 + 2.5 + 6 + 3.0 +``` +""" +flatten(f, iter) = flatten(f(x) for x in iter) +flatten(f, xs, yzs...) = flatten(f(xy...) for xy in zip(xs, yzs...)) + +flatten(::Type{T}, iter) where {T} = _typed_flatten(T, IteratorSize(@default_eltype iter), iter) +flatten(::Type{T}, f, iter) where {T} = flatten(T, f(x) for x in iter) +flatten(::Type{T},f, xs, yzs...) where {T} = flatten(T, f(xy...) for xy in zip(xs, yzs...)) + +function flatten(iter) + S = @default_eltype iter + T = S != Union{} ? eltype(S) : Any # Union{} occurs for e.g. flatten(1,2), postpone the error + if isconcretetype(T) + _typed_flatten(T, IteratorSize(S), iter) + else + _untyped_flatten(iter) + end +end + +function _typed_flatten(::Type{T}, ::Union{HasShape, HasLength}, A::Union{AbstractArray, Tuple}) where {T} + len = sum(length, A; init=0) + B = Vector{T}(undef, len) + off = 1 + for x in A + copyto!(B, off, x) + off += length(x) + end + B +end + +# _typed_flatten(::Type{T}, ::Union{HasShape, HasLength}, A) where {T} = _flatten(collect(A)) + +function _typed_flatten(::Type{T}, ::IteratorSize, A) where {T} + xit = iterate(A) + nothing === xit && return T[] + x1, _ = xit + B = Vector{T}(undef, 0) + if haslength(x1) && haslength(A) + # sizehint!(B, _flatten_alloc_length(T, x1, A)) + guess = length(x1) * length(A) + alloc = min(guess, (2^30) ÷ max(1, sizeof(T))) # don't allocate > 1GB + sizehint!(B, alloc) + end + while xit !== nothing + x, state = xit + append!(B, x) + xit = iterate(A, state) + end + B +end + +# function _flatten_alloc_length(::Type{T}, x1, A) where {T} +# guess = length(x1) * length(A) +# min(guess, (2^30) ÷ max(1, sizeof(T))) # don't allocate > 1GB +# end + +function _untyped_flatten(A) + xit = iterate(A) + nothing === xit && return Union{}[] + x1, state = xit + B = convert(Vector, vec(collect(x1))) # can you do this better? + # if haslength(x1) && haslength(A) # doesn't seem to help + # sizehint!(B, _flatten_alloc_length(eltype(B), B, A)) + # end + _untyped_flatten!(B, A, state) +end + +#= For the empty case: + +julia> collect(Iterators.flatten(())) +Union{}[] + +=# + +function _untyped_flatten!(B, A, state) + xit = iterate(A, state) + while xit !== nothing + x, state = xit + if eltype(x) <: eltype(B) + append!(B, x) + else + C = vcat(B, vec(collect(x))) # can you do this better? + return _untyped_flatten!(C, A, state) + end + xit = iterate(A, state) + end + return B +end + + + +#= + +let + tups = [Tuple(rand(10)) for _ in 1:100] + println("vector of tuples") + @btime stack($tups) + @btime flatten($tups) + @btime collect(Iterators.flatten($tups)) + + println("generator of tuples") + @btime stack(t for t in $tups) + @btime flatten(t for t in $tups) + @btime collect(Iterators.flatten(t for t in $tups)) + + println(" ... extra to collet") + @btime collect(t for t in $tups) + + println("vectors") + vecs = [rand(100) for _ in 1:100] + @btime stack($vecs) + @btime flatten($vecs) + @btime collect(Iterators.flatten($vecs)) + + println("generator of vectors") + @btime stack(v for v in $vecs) + @btime flatten(v for v in $vecs) + @btime collect(Iterators.flatten(v for v in $vecs)) + + println(" ... extra to collet") + @btime collect(t for t in $vecs) +end; + +vector of tuples + min 952.636 ns, mean 2.109 μs (1 allocation, 7.94 KiB) + min 845.333 ns, mean 2.071 μs (1 allocation, 7.94 KiB) + min 11.708 μs, mean 12.503 μs (1 allocation, 7.94 KiB) +generator of tuples + min 945.800 ns, mean 2.055 μs (1 allocation, 7.94 KiB) + min 1.608 μs, mean 2.613 μs (2 allocations, 7.94 KiB) <--- could be better? + min 16.292 μs, mean 18.288 μs (7 allocations, 21.92 KiB) + ... extra to collet + min 482.051 ns, mean 1.713 μs (1 allocation, 7.94 KiB) +vectors + min 2.643 μs, mean 7.749 μs (2 allocations, 78.17 KiB) + min 2.704 μs, mean 7.681 μs (2 allocations, 78.17 KiB) + min 70.375 μs, mean 77.521 μs (9 allocations, 326.55 KiB) +generator of vectors + min 2.588 μs, mean 7.277 μs (2 allocations, 78.17 KiB) + min 4.738 μs, mean 9.588 μs (2 allocations, 78.25 KiB) <--- could be better + min 68.625 μs, mean 76.385 μs (10 allocations, 326.61 KiB) + ... extra to collet + min 192.396 ns, mean 217.751 ns (1 allocation, 896 bytes) <--- worth it! + + +let + tups = [Tuple(rand(10)) for _ in 1:100] + println("unknown number of tuples") + @btime stack(t for t in $tups if true) + @btime flatten(t for t in $tups if true) + @btime collect(Iterators.flatten(t for t in $tups if true)) + + println(" ... extra to collet") + @btime collect(t for t in $tups if true) + + vecs = [rand(100) for _ in 1:100] + println("unknown number of vectors") + @btime stack(v for v in $vecs if true) + @btime flatten(v for v in $vecs if true) + @btime collect(Iterators.flatten(v for v in $vecs if true)) + + println(" ... extra to collet") + @btime collect(v for v in $vecs if true) +end; + +unknown number of tuples + min 2.454 μs, mean 5.807 μs (6 allocations, 25.72 KiB) + min 2.042 μs, mean 4.953 μs (6 allocations, 21.91 KiB) + min 16.292 μs, mean 18.510 μs (7 allocations, 21.92 KiB) + ... extra to collet + min 1.450 μs, mean 3.878 μs (5 allocations, 17.78 KiB) +unknown number of vectors + min 3.625 μs, mean 9.854 μs (7 allocations, 80.12 KiB) + min 7.917 μs, mean 16.509 μs (7 allocations, 198.11 KiB) <--- could be better? + min 71.875 μs, mean 79.867 μs (10 allocations, 326.61 KiB) + ... extra to collet + min 1.042 μs, mean 1.158 μs (5 allocations, 1.95 KiB) <--- worth it! + +let + nums = rand(128, 100) + println("vector") + @btime stack($nums) + @btime flatten($nums) + @btime copy(vec($nums)) +end; + +vector + min 8.361 μs, mean 20.294 μs (2 allocations, 100.05 KiB) + min 9.333 μs, mean 19.566 μs (2 allocations, 100.05 KiB) + min 1.995 μs, mean 10.940 μs (4 allocations, 100.12 KiB) + +let + tups = [Tuple(rand(rand(1:20))) for _ in 1:100] + println("tuples of varying length") + @btime flatten($tups) + @btime collect(Iterators.flatten($tups)) + + vecs = [rand(1:200) for _ in 1:100] + println("vectors of varying length") + @btime flatten($vecs) + @btime collect(Iterators.flatten($vecs)) +end; + +tuples of varying length + min 14.167 μs, mean 15.792 μs (52 allocations, 9.11 KiB) + min 8.833 μs, mean 11.346 μs (6 allocations, 21.86 KiB) +vectors of varying length + min 105.169 ns, mean 125.837 ns (1 allocation, 896 bytes) + min 106.701 ns, mean 127.880 ns (1 allocation, 896 bytes) + +let + tups = vcat([Tuple(rand(1:10, 10)) for _ in 1:50], [Tuple(rand(10)) for _ in 1:50]) + println("tuples of varying eltype") + @btime stack($tups) + @btime flatten($tups) + @btime collect(Iterators.flatten($tups)) + + println("vectors of varying eltype") + vecs = vcat([rand(1:10, 100) for _ in 1:50], [rand(100) for _ in 1:50]) + @btime stack($vecs) + @btime flatten($vecs) + @btime collect(Iterators.flatten($vecs)) +end; + +tuples of varying eltype + min 23.917 μs, mean 26.051 μs (49 allocations, 8.69 KiB) + min 10.417 μs, mean 13.462 μs (7 allocations, 20.41 KiB) + min 28.041 μs, mean 29.826 μs (501 allocations, 15.75 KiB) +vectors of varying eltype + min 2.646 μs, mean 10.090 μs (2 allocations, 78.17 KiB) + min 2.713 μs, mean 9.107 μs (2 allocations, 78.17 KiB) + min 68.750 μs, mean 77.771 μs (9 allocations, 326.55 KiB) + +=# \ No newline at end of file diff --git a/src/ragged.jl b/src/ragged.jl new file mode 100644 index 0000000..11ffc64 --- /dev/null +++ b/src/ragged.jl @@ -0,0 +1,86 @@ +""" + raggedstack(arrays; fill=0) + +Ragged `stack`, which allows slices of varying _size, and fills the gaps with zero +or the given `fill`. Always returns an `Array`. + +``` +julia> raggedstack(1:n for n in 1:5) +5×5 Array{Int64,2}: + 1 1 1 1 1 + 0 2 2 2 2 + 0 0 3 3 3 + 0 0 0 4 4 + 0 0 0 0 5 + +julia> raggedstack([[1,2,3], [10,20.0]], fill=missing) +3×2 Array{Union{Missing, Float64},2}: + 1.0 10.0 + 2.0 20.0 + 3.0 missing + +julia> using OffsetArrays + +julia> raggedstack(1:3, OffsetArray([2.0,2.1,2.2], -1), OffsetArray([3.2,3.3,3.4], +1)) +5×3 OffsetArray(::Array{Real,2}, 0:4, 1:3) with eltype Real with indices 0:4×1:3: + 0 2.0 0 + 1 2.1 0 + 2 2.2 3.2 + 3 0 3.3 + 0 0 3.4 +``` + +""" +raggedstack(x::AbstractArray, ys::AbstractArray...; kw...) = raggedstack((x, ys...); kw...) +raggedstack(g::Base.Generator; kw...) = raggedstack(collect(g); kw...) +raggedstack(f::Function, ABC...; kw...) = raggedstack(map(f, ABC...); kw...) +raggedstack(list::AbstractArray{<:AbstractArray}; fill=zero(eltype(first(list)))) = raggedstack_iter(list; fill) +raggedstack(list::Tuple{Vararg{<:AbstractArray}}; fill=zero(eltype(first(list)))) = raggedstack_iter(list; fill) + +function raggedstack_iter(list; fill) + T = mapreduce(eltype, Base.promote_typejoin, list, init=typeof(fill)) + # T = mapreduce(eltype, Base.promote_type, list, init=typeof(fill)) + N = maximum(_ndims, list) + ax = ntuple(N) do d + hi = maximum(x -> last(_axes(x,d)), list) + if all(x -> _axes(x,d) isa Base.OneTo, list) + Base.OneTo(hi) + else + lo = minimum(x -> first(_axes(x,d)), list) + lo:hi + end + end + out = similar(1:0, T, ax..., _axes(list)...) + fill!(out, fill) + raggedstack_copyto!(out, list, Val(N)) +end + +function raggedstack_copyto!(out, list, ::Val{N}) where {N} + for i in tupleindices(list) + item = list[i...] + o = ntuple(_->1, N - _ndims(item)) + out[CartesianIndices(_axes(item)), o..., i...] .= item + + # https://github.com/JuliaArrays/OffsetArrays.jl/issues/100 + # view(out, _axes(item)..., o..., i...) .= item + + # for I in CartesianIndices(item) + # out[Tuple(I)..., o..., i...] = item[I] + # end + end + out +end + +tupleindices(t::Tuple) = ((i,) for i in 1:length(t)) +tupleindices(A::AbstractArray) = (Tuple(I) for I in CartesianIndices(A)) + +# rewrap_names(A, a) = A +# function rewrap_names(A, a::NamedDimsArray{L}) where {L} +# B = rewrap_names(A, parent(a)) +# ensure_named(B, (L..., ntuple(_ -> :_, _ndims(A) - _ndims(a))...)) +# end +# function raggedstack(s::Symbol, args...; kw...) +# data = raggedstack(args...; kw...) +# name_last = ntuple(d -> d==_ndims(data) ? s : :_, _ndims(data)) +# ensure_named(data, name_last) +# end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index b3c524e..f27ab47 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,56 +1,56 @@ using Test, LazyStack -using OffsetArrays, NamedDims +using OffsetArrays # , NamedDims @testset "basics" begin v34 = [rand(3) for i in 1:4] - @test stack(v34) == hcat(v34...) - @test stack(v34) isa LazyStack.Stacked + @test lazystack(v34) == hcat(v34...) + @test lazystack(v34) isa LazyStack.Stacked - @test stack(v34...) == hcat(v34...) - @test stack(v34...).slices isa Tuple - @test LazyStack.ensure_dense(stack(v34...)) isa Array + @test lazystack(v34...) == hcat(v34...) + @test lazystack(v34...).slices isa Tuple + @test LazyStack.ensure_dense(lazystack(v34...)) isa Array - @test stack(v34[i] for i in 1:4) == hcat(v34...) - @test stack(v34[i] for i in 1:4) isa Array + @test lazystack(v34[i] for i in 1:4) == hcat(v34...) + @test lazystack(v34[i] for i in 1:4) isa Array - @test stack(v34)[1] == v34[1][1] # linear indexing - @test stack(v34)[1,1,1] == v34[1][1] # trailing dims - @test stack(v34) * ones(4) ≈ hcat(v34...) * ones(4) # issue #6 - @test stack(v34) * ones(4,2) ≈ hcat(v34...) * ones(4,2) - @test axes(stack(v34)) === axes(stack(v34...)) === axes(stack(v34[i] for i in 1:4)) + @test lazystack(v34)[1] == v34[1][1] # linear indexing + @test lazystack(v34)[1,1,1] == v34[1][1] # trailing dims + @test lazystack(v34) * ones(4) ≈ hcat(v34...) * ones(4) # issue #6 + @test lazystack(v34) * ones(4,2) ≈ hcat(v34...) * ones(4,2) + @test axes(lazystack(v34)) === axes(lazystack(v34...)) === axes(lazystack(v34[i] for i in 1:4)) end @testset "tuples" begin vt = [(1,2), (3,4), (5,6)] vnt = [(a=1,b=2), (a=3,b=4), (a=5,b=6)] - @test stack(vt) isa Array - @test stack(vt) == reshape(1:6, 2,3) - @test stack(vnt) isa Array + @test lazystack(vt) isa Array + @test lazystack(vt) == reshape(1:6, 2,3) + @test lazystack(vnt) isa Array - @test stack(vt...) isa Array - @test stack(vnt...) isa Array + @test lazystack(vt...) isa Array + @test lazystack(vnt...) isa Array end @testset "generators" begin g234 = (ones(2) .* (10i + j) for i in 1:3, j in 1:4) - @test stack(g234) == stack(collect(g234)) + @test lazystack(g234) == lazystack(collect(g234)) - @test stack(Iterators.filter(_ -> true, g234)) == reshape(stack(g234), 2,:) + @test lazystack(Iterators.filter(_ -> true, g234)) == reshape(lazystack(g234), 2,:) - @test stack(Iterators.drop(g234, 2)) == reshape(stack(g234), 2,:)[:, 3:end] + @test lazystack(Iterators.drop(g234, 2)) == reshape(lazystack(g234), 2,:)[:, 3:end] g16 = (ones(1) .* (10i + j) for j in 1:3 for i in 1:2) # Iterators.Flatten - @test stack(collect(g16)) == stack(g16) + @test lazystack(collect(g16)) == lazystack(g16) g29 = (fill(i,2) for i=1:9) - @test stack(Iterators.reverse(g29)) == reverse(stack(g29), dims=2) + @test lazystack(Iterators.reverse(g29)) == reverse(lazystack(g29), dims=2) gt = ((1,2,3) for i in 1:4) - @test stack(gt) isa Array + @test lazystack(gt) isa Array end @testset "functions" begin @@ -58,116 +58,116 @@ end m1 = rand(1:99, 3,10) _eachcol(m) = (view(m, :, c) for c in axes(m,2)) - @test stack(sum, _eachcol(m1)) == vec(mapslices(sum, m1, dims=1)) + @test lazystack(sum, _eachcol(m1)) == vec(mapslices(sum, m1, dims=1)) f1(x,y) = x .+ y - @test stack(f1, _eachcol(m1), _eachcol(m1)) == 2 .* m1 - @test stack(f1, _eachcol(m1), 1:10) == m1 .+ (1:10)' + @test lazystack(f1, _eachcol(m1), _eachcol(m1)) == 2 .* m1 + @test lazystack(f1, _eachcol(m1), 1:10) == m1 .+ (1:10)' # @test_throws DimensionMismatch map(f1, _eachcol(m1), 1:12) # it doesn't throw! - @test_throws DimensionMismatch stack(f1, _eachcol(m1), 1:12) + # @test_throws DimensionMismatch lazystack(f1, _eachcol(m1), 1:12) - # This is where stack doesn't quite follow map's behaviour: - @test size(stack(map(*, [ones(2) for i=1:3, j=1:4], ones(3)))) == (2,3) - @test size(stack(map(*, [ones(2) for i=1:3, j=1:4], ones(5)))) == (2,5) - @test_throws DimensionMismatch stack(*, [ones(2) for i=1:3, j=1:4], ones(3)) - @test_throws DimensionMismatch map(*, [ones(2) for i=1:3, j=1:4], ones(3,1)) + # This is where lazystack doesn't quite follow map's behaviour: + @test size(lazystack(map(*, [ones(2) for i=1:3, j=1:4], ones(3)))) == (2,3) + @test size(lazystack(map(*, [ones(2) for i=1:3, j=1:4], ones(5)))) == (2,5) + # @test_throws DimensionMismatch lazystack(*, [ones(2) for i=1:3, j=1:4], ones(3)) + # @test_throws DimensionMismatch map(*, [ones(2) for i=1:3, j=1:4], ones(3,1)) end @testset "types" begin - @test stack([1:3 for _=1:2]...) isa LazyStack.Stacked{Int,2,<:Tuple{UnitRange,UnitRange}} + @test lazystack([1:3 for _=1:2]...) isa LazyStack.Stacked{Int,2,<:Tuple{UnitRange,UnitRange}} - @test eltype(stack(1:3, ones(3))) == Real - @test eltype(stack([1:3, ones(3)])) == Real - @test eltype(stack(i==1 ? (1:3) : ones(3) for i in 1:2)) == Real + @test_broken eltype(lazystack(1:3, ones(3))) == Real + @test_broken eltype(lazystack([1:3, ones(3)])) == Real + @test_broken eltype(lazystack(i==1 ? (1:3) : ones(3) for i in 1:2)) == Real acc = [] for i=1:3 push!(acc, fill(i, 4)) end - @test stack(acc) isa Array{Int} + @test lazystack(acc) isa Array{Int} push!(acc, rand(4)) - @test stack(acc) isa Array{Real} + @test_broken lazystack(acc) isa Array{Real} acc = Array[] for i=1:3 push!(acc, fill(i, 4)) end - stack(acc) isa Array{Int} - -end -@testset "names" begin - - nin = [NamedDimsArray(ones(3), :a) for i in 1:4] - @test dimnames(stack(nin)) == (:a, :_) - @test dimnames(stack(nin...)) == (:a, :_) - @test dimnames(stack(:b, nin)) == (:a, :b) - @test dimnames(stack(:b, nin...)) == (:a, :b) - @test stack(nin).data.slices[1] isa NamedDimsArray # vector container untouched, - @test stack(nin...).data.slices[1] isa Array # but tuple container cleaned up. - - nout = NamedDimsArray([ones(3) for i in 1:4], :b) - @test dimnames(stack(nout)) == (:_, :b) - @test dimnames(stack(:b, nout)) == (:_, :b) - @test_throws Exception stack(:c, nout) - - nboth = NamedDimsArray([NamedDimsArray(ones(3), :a) for i in 1:4], :b) - @test dimnames(stack(nboth)) == (:a, :b) - - ngen = (NamedDimsArray(ones(3), :a) for i in 1:4) - @test dimnames(stack(ngen)) == (:a, :_) - @test dimnames(stack(:b, ngen)) == (:a, :b) - - nmat = [NamedDimsArray(ones(3), :a) for i in 1:3, j in 1:4] - @test dimnames(stack(:c, nmat)) == (:a, :_, :c) + lazystack(acc) isa Array{Int} end +# @testset "names" begin +# +# nin = [NamedDimsArray(ones(3), :a) for i in 1:4] +# @test dimnames(lazystack(nin)) == (:a, :_) +# @test dimnames(lazystack(nin...)) == (:a, :_) +# @test dimnames(lazystack(:b, nin)) == (:a, :b) +# @test dimnames(lazystack(:b, nin...)) == (:a, :b) +# @test lazystack(nin).data.slices[1] isa NamedDimsArray # vector container untouched, +# @test lazystack(nin...).data.slices[1] isa Array # but tuple container cleaned up. +# +# nout = NamedDimsArray([ones(3) for i in 1:4], :b) +# @test dimnames(lazystack(nout)) == (:_, :b) +# @test dimnames(lazystack(:b, nout)) == (:_, :b) +# @test_throws Exception lazystack(:c, nout) +# +# nboth = NamedDimsArray([NamedDimsArray(ones(3), :a) for i in 1:4], :b) +# @test dimnames(lazystack(nboth)) == (:a, :b) +# +# ngen = (NamedDimsArray(ones(3), :a) for i in 1:4) +# @test dimnames(lazystack(ngen)) == (:a, :_) +# @test dimnames(lazystack(:b, ngen)) == (:a, :b) +# +# nmat = [NamedDimsArray(ones(3), :a) for i in 1:3, j in 1:4] +# @test dimnames(lazystack(:c, nmat)) == (:a, :_, :c) +# +# end @testset "offset" begin oin = [OffsetArray(ones(3), 3:5) for i in 1:4] - @test axes(stack(oin)) == (3:5, 1:4) - @test axes(stack(oin...)) == (3:5, 1:4) - @test axes(copy(stack(oin))) == (3:5, 1:4) + @test axes(lazystack(oin)) == (3:5, 1:4) + @test axes(lazystack(oin...)) == (3:5, 1:4) + @test axes(copy(lazystack(oin))) == (3:5, 1:4) oout = OffsetArray([ones(3) for i in 1:4], 11:14) - @test axes(stack(oout)) == (1:3, 11:14) - @test axes(copy(stack(oout))) == (1:3, 11:14) + @test axes(lazystack(oout)) == (1:3, 11:14) + @test axes(copy(lazystack(oout))) == (1:3, 11:14) oboth = OffsetArray(oin, 11:14) - @test axes(stack(oboth)) == (3:5, 11:14) + @test axes(lazystack(oboth)) == (3:5, 11:14) ogen = (OffsetArray([3,4,5], 3:5) for i in 1:4) - @test axes(stack(ogen)) == (3:5, 1:4) - -end -@testset "named offset" begin - - noin = [NamedDimsArray(OffsetArray(ones(3), 3:5), :a) for i in 1:4] - @test dimnames(stack(noin)) == (:a, :_) - @test dimnames(stack(noin...)) == (:a, :_) - @test dimnames(stack(:b, noin)) == (:a, :b) - @test dimnames(stack(:b, noin...)) == (:a, :b) - @test axes(stack(noin)) == (3:5, 1:4) - @test axes(stack(noin...)) == (3:5, 1:4) - - noout = NamedDimsArray(OffsetArray([ones(3) for i in 1:4], 11:14), :b) - @test dimnames(stack(noout)) == (:_, :b) - @test dimnames(stack(:b, noout)) == (:_, :b) - @test_throws Exception stack(:c, noout) - @test axes(stack(noout)) == (1:3, 11:14) - @test axes(copy(stack(noout))) == (1:3, 11:14) - - nogen = (NamedDimsArray(OffsetArray([3,4,5], 3:5), :a) for i in 1:4) - @test dimnames(stack(nogen)) == (:a, :_) - @test dimnames(stack(:b, nogen)) == (:a, :b) - @test axes(stack(nogen)) == (3:5, 1:4) + @test axes(lazystack(ogen)) == (3:5, 1:4) end +# @testset "named offset" begin +# +# noin = [NamedDimsArray(OffsetArray(ones(3), 3:5), :a) for i in 1:4] +# @test dimnames(lazystack(noin)) == (:a, :_) +# @test dimnames(lazystack(noin...)) == (:a, :_) +# @test dimnames(lazystack(:b, noin)) == (:a, :b) +# @test dimnames(lazystack(:b, noin...)) == (:a, :b) +# @test axes(lazystack(noin)) == (3:5, 1:4) +# @test axes(lazystack(noin...)) == (3:5, 1:4) +# +# noout = NamedDimsArray(OffsetArray([ones(3) for i in 1:4], 11:14), :b) +# @test dimnames(lazystack(noout)) == (:_, :b) +# @test dimnames(lazystack(:b, noout)) == (:_, :b) +# @test_throws Exception lazystack(:c, noout) +# @test axes(lazystack(noout)) == (1:3, 11:14) +# @test axes(copy(lazystack(noout))) == (1:3, 11:14) +# +# nogen = (NamedDimsArray(OffsetArray([3,4,5], 3:5), :a) for i in 1:4) +# @test dimnames(lazystack(nogen)) == (:a, :_) +# @test dimnames(lazystack(:b, nogen)) == (:a, :b) +# @test axes(lazystack(nogen)) == (3:5, 1:4) +# +# end @testset "push!" begin v34 = [rand(3) for i in 1:4] - s3 = stack(v34) + s3 = lazystack(v34) @test size(push!(s3, ones(3))) == (3,5) @test s3[1,end] == 1 @@ -175,13 +175,13 @@ end end @testset "errors" begin - @test_throws ArgumentError stack([]) - @test_throws ArgumentError stack(x for x in 1:0) + @test_throws ArgumentError lazystack([]) + @test_throws ArgumentError lazystack(x for x in 1:0) - @test_throws DimensionMismatch stack(1:n for n in 1:3) - @test_throws DimensionMismatch stack([1:n for n in 1:3]) + @test_throws DimensionMismatch lazystack(1:n for n in 1:3) + @test_throws DimensionMismatch lazystack([1:n for n in 1:3]) - @test_throws DimensionMismatch push!(stack([rand(2)]), rand(3)) + @test_throws DimensionMismatch push!(lazystack([rand(2)]), rand(3)) end @testset "readme" begin @@ -189,66 +189,66 @@ end using LazyStack: Stacked _eachcol(m) = (view(m, :, c) for c in axes(m,2)) - @test stack([zeros(2,2), ones(2,2)]) isa Stacked{Float64, 3, <:Vector{<:Matrix}} - @test stack([1,2,3], 4:6) isa Stacked{Int, 2, <:Tuple{<:Vector, <:UnitRange}} - - @test stack([i,2i] for i in 1:5) isa Matrix{Int} - @test stack(*, _eachcol(ones(2,4)), 1:4) isa Matrix{Float64} - @test stack([1,2], [3.0, 4.0], [5im, 6im]) isa Matrix{Number} + @test lazystack([zeros(2,2), ones(2,2)]) isa Stacked{Float64, 3, <:Vector{<:Matrix}} + @test lazystack([1,2,3], 4:6) isa Stacked{Int, 2, <:Tuple{<:Vector, <:UnitRange}} - @test rstack([1:n for n in 1:10]) isa Matrix{Int} - @test rstack(OffsetArray(fill(n,4), rand(-2:2)) for n in 1:10; fill=NaN) isa OffsetArray{Real,2} - -end -@testset "vstack" begin + @test lazystack([i,2i] for i in 1:5) isa Matrix{Int} + @test lazystack(*, _eachcol(ones(2,4)), 1:4) isa Matrix{Float64} + @test_broken lazystack([1,2], [3.0, 4.0], [5im, 6im]) isa Matrix{Number} - v34 = [rand(3) for i in 1:4] - @test LazyStack.vstack(v34) == reduce(vcat, v34) - - g234 = (ones(2) .* (10i + j) for i in 1:3, j in 1:4) - @test LazyStack.vstack(g234) == reduce(vcat, collect(g234)) + @test raggedstack([1:n for n in 1:10]) isa Matrix{Int} + @test raggedstack(OffsetArray(fill(n,4), rand(-2:2)) for n in 1:10; fill=NaN) isa OffsetArray{Real,2} end +# @testset "vlazystack" begin +# +# v34 = [rand(3) for i in 1:4] +# @test LazyStack.vlazystack(v34) == reduce(vcat, v34) +# +# g234 = (ones(2) .* (10i + j) for i in 1:3, j in 1:4) +# @test LazyStack.vlazystack(g234) == reduce(vcat, collect(g234)) +# +# end @testset "ragged" begin - @test rstack([1,2], 1:3) == [1 1; 2 2; 0 3] - @test rstack([[1,2], 1:3], fill=99) == [1 1; 2 2; 99 3] + @test raggedstack([1,2], 1:3) == [1 1; 2 2; 0 3] + @test raggedstack([[1,2], 1:3], fill=99) == [1 1; 2 2; 99 3] - @test rstack(1:2, OffsetArray([2,3], +1)) == [1 0; 2 2; 0 3] - @test rstack(1:2, OffsetArray([0.1,1], -1)) == OffsetArray([0 0.1; 1 1.0; 2 0],-1,0) + @test raggedstack(1:2, OffsetArray([2,3], +1)) == [1 0; 2 2; 0 3] + @test raggedstack(1:2, OffsetArray([0.1,1], -1)) == OffsetArray([0 0.1; 1 1.0; 2 0],-1,0) - @test dimnames(rstack(:b, 1:2, [3,4,5], fill=NaN)) == (:_, :b) - @test dimnames(rstack(:b, NamedDimsArray(1:2, :a), OffsetArray([2,3], +1))) == (:a, :b) + # @test dimnames(raggedstack(:b, 1:2, [3,4,5], fill=NaN)) == (:_, :b) + # @test dimnames(raggedstack(:b, NamedDimsArray(1:2, :a), OffsetArray([2,3], +1))) == (:a, :b) end @testset "tuple functions" begin - @test LazyStack.ndims([1,2]) == 1 - @test LazyStack.ndims((1,2)) == 1 - @test LazyStack.ndims((a=1,b=2)) == 1 + @test LazyStack._ndims([1,2]) == 1 + @test LazyStack._ndims((1,2)) == 1 + @test LazyStack._ndims((a=1,b=2)) == 1 - @test LazyStack.size([1,2]) == (2,) - @test LazyStack.size((1,2)) == (2,) - @test LazyStack.size((a=1,b=2)) == (2,) + @test LazyStack._size([1,2]) == (2,) + @test LazyStack._size((1,2)) == (2,) + @test LazyStack._size((a=1,b=2)) == (2,) - @test LazyStack.axes([1,2]) == (1:2,) - @test LazyStack.axes((1,2)) == (1:2,) - @test LazyStack.axes((a=1,b=2)) == (1:2,) + @test LazyStack._axes([1,2]) == (1:2,) + @test LazyStack._axes((1,2)) == (1:2,) + @test LazyStack._axes((a=1,b=2)) == (1:2,) end @info "loading Zygote" using Zygote @testset "zygote" begin - @test Zygote.gradient((x,y) -> sum(stack(x,y)), ones(2), ones(2)) == ([1,1], [1,1]) - @test Zygote.gradient((x,y) -> sum(stack([x,y])), ones(2), ones(2)) == ([1,1], [1,1]) + @test Zygote.gradient((x,y) -> sum(lazystack(x,y)), ones(2), ones(2)) == ([1,1], [1,1]) + @test Zygote.gradient((x,y) -> sum(lazystack([x,y])), ones(2), ones(2)) == ([1,1], [1,1]) - f399(x) = sum(stack(x) * sum(x)) - f399c(x) = sum(collect(stack(x)) * sum(x)) + f399(x) = sum(lazystack(x) * sum(x)) + f399c(x) = sum(collect(lazystack(x)) * sum(x)) @test Zygote.gradient(f399, [ones(2), ones(2)]) == ([[4,4], [4,4]],) @test Zygote.gradient(f399c, [ones(2), ones(2)]) == ([[4,4], [4,4]],) - ftup(x) = sum(stack(x...) * sum(x)) - ftupc(x) = sum(collect(stack(x...)) * sum(x)) + ftup(x) = sum(lazystack(x...) * sum(x)) + ftupc(x) = sum(collect(lazystack(x...)) * sum(x)) @test Zygote.gradient(ftup, (ones(2), ones(2))) == (([4,4], [4,4]),) @test Zygote.gradient(ftupc, (ones(2), ones(2))) == (([4,4], [4,4]),)