diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 6eceeac..814d07a 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -16,7 +16,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.3'
+          - '1.6'
           - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
           - 'nightly'
         os:
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 8cff839..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-language: julia
-os:
-  - linux
-julia:
-  - 1.3
-  - 1.6
-  - nightly
-matrix:
-  allow_failures:
-  - julia: nightly
diff --git a/Project.toml b/Project.toml
index 491f3b0..fe901a7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,23 +1,26 @@
 name = "LazyStack"
 uuid = "1fad7336-0346-5a1a-a56f-a06ba010965b"
 authors = ["Michael Abbott"]
-version = "0.0.8"
+version = "0.1.0"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f"
-OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
+# NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f"
+# OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
 
 [compat]
-ChainRulesCore = "0.10.9, 1"
-NamedDims = "0.2.16, 0.3, 1.0"
-OffsetArrays = "1"
-julia = "1.3"
+ChainRulesCore = "1"
+Compat = "3.46, 4.2"
+# NamedDims = "0.2.16, 0.3, 1.0"  # try to remove
+# OffsetArrays = "1"  # try to remove
+julia = "1.6"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Test", "Zygote"]
+test = ["Test", "OffsetArrays", "Zygote"]
diff --git a/README.md b/README.md
index 88d562c..4bb7425 100644
--- a/README.md
+++ b/README.md
@@ -1,67 +1,72 @@
 # LazyStack.jl
 
-[![Travis CI](https://travis-ci.org/mcabbott/LazyStack.jl.svg?branch=master)](https://travis-ci.org/mcabbott/LazyStack.jl)
 [![Github CI](https://github.com/mcabbott/LazyStack.jl/workflows/CI/badge.svg)](https://github.com/mcabbott/LazyStack.jl/actions?query=workflow%3ACI+branch%3Amaster)
 
-This package exports one function, `stack`, for turning a list of arrays 
+This package exports one function, `lazystack`, for turning a list of arrays 
 into one `AbstractArray`. Given several arrays with the same `eltype`, 
 or an array of such arrays, it returns a lazy `Stacked{T,N}` view of these:
 
 ```julia
-stack([zeros(2,2), ones(2,2)])  # isa Stacked{Float64, 3, <:Vector{<:Matrix}}
-stack([1,2,3], 4:6)             # isa Stacked{Int, 2, <:Tuple{<:Vector, <:UnitRange}}
+julia> lazystack([1:2, 3:4, 5:6])
+2×3 lazystack(::Vector{UnitRange{Int64}}) with eltype Int64:
+ 1  3  5
+ 2  4  6
+
+julia> lazystack([pi^ℯ], [ℯ^pi])
+1×2 lazystack(::Tuple{Vector{Float64}, Vector{Float64}}) with eltype Float64:
+ 22.4592  23.1407
 ```
 
-Given a generator, it instead iterates through the elements and writes into a new array.
-Given a function and then some arrays, it behaves like `map(f, A, B)` but immediately writes
-into a new array:
+Before v0.1 this function used to be called `stack`, but that name is now exported by Base (from Julia 1.9).
+Like this package, `Base.stack` makes an array with `size(result) = (size(inner)..., size(outer)...)`.
+It always returns a new dense array, not a lazy container.
+And instead of two vectors (in the above example) it would want a tuple `stack(([pi^ℯ], [ℯ^pi]))`.
 
-```julia
-stack([i,2i] for i in 1:5)            # isa Matrix{Int}     # size(ans) == (2, 5)
-stack(*, eachcol(ones(2,4)), 1:4)     # == Matrix(stack(map(*, eachcol(...), 1:4)))
-```
-
-The same `stack_iter` method is also used for any list of arrays of heterogeneous element type,
-and for arrays of tuples. Notice that like `map(identity, Any[1, 1.0, 5im])`, this promotes using 
-`promote_typejoin`, to `Number` here, rather than to `Complex{Float64}`:
-
-```julia
-stack([1,2], [3.0, 4.0], [5im, 6im])  # isa Matrix{Number}  # size(ans) == (2, 3)
-stack([(i,2.0,3//j) for i=1:4, j=1:5])# isa Array{Real, 3}  # size(ans) == (3, 4, 5)
-```
+Generators such as `lazystack([i,2i] for i in 1:5)` and arrays of mixed eltype like `lazystack([1,2], [3.0, 4.0], [5im, 6im])` used to be be handled here, making a dense array, but are now simply passed through to `Base.stack`.
 
-The slices must all have the same `size`, but they (and the container) 
-can have any number of dimensions. `stack` always places the slice dimensions first.
-There are no options.
+When the individual slices aren't backed by an `Array`, as for instance with `CuArray`s on a GPU, then again `Base.stack` is called. 
+This should make one big `CuArray`, since scalar indexing of individual slices won't work well.
 
 ### Ragged stack
 
-There is also a version which does not demand that slices have equal `size` (or equal `ndims`),
-which always returns a new `Array`. You can control the position of slices `using OffsetArrays`:
+There is also a version which does not demand that slices have equal `size` (or equal `ndims`).
+For now this is not lazy:
 
 ```julia
-rstack([1:n for n in 1:10])           # upper triangular Matrix{Int}
-rstack(OffsetArray(fill(n,4), rand(-2:2)) for n in 1:10; fill=NaN)
+julia> raggedstack([10:10+n for n in 1:3])
+4×3 Matrix{Int64}:
+ 10  10  10
+ 11  11  11
+  0  12  12
+  0   0  13
+
+julia> using OffsetArrays
+
+julia> raggedstack(OffsetArray(fill(1.0n, 3), rand(-1:1)) for n in 1:10; fill=NaN)
+5×10 OffsetArray(::Matrix{Float64}, 0:4, 1:10) with eltype Float64 with indices 0:4×1:10:
+ NaN      2.0  NaN      4.0  NaN      6.0    7.0  NaN      9.0  NaN
+   1.0    2.0    3.0    4.0    5.0    6.0    7.0  NaN      9.0   10.0
+   1.0    2.0    3.0    4.0    5.0    6.0    7.0    8.0    9.0   10.0
+   1.0  NaN      3.0  NaN      5.0  NaN    NaN      8.0  NaN     10.0
+ NaN    NaN    NaN    NaN    NaN    NaN    NaN      8.0  NaN    NaN
 ```
 
 ### Other packages
 
-This one plays well with [OffsetArrays.jl](https://github.com/JuliaArrays/OffsetArrays.jl),
-[NamedDims.jl](https://github.com/invenia/NamedDims.jl), and 
-[Zygote.jl](https://github.com/FluxML/Zygote.jl).
+This one plays well with [OffsetArrays.jl](https://github.com/JuliaArrays/OffsetArrays.jl), and [ChainRules.jl](https://github.com/JuliaDiff/ChainRules.jl)-compatible AD such as [Zygote.jl](https://github.com/FluxML/Zygote.jl). It's also used internally by [TensorCast.jl](https://github.com/mcabbott/TensorCast.jl).
 
 Besides which, there are several other ways to achieve similar things:
 
 * For an array of arrays, you can also use [`JuliennedArrays.Align`](https://bramtayl.github.io/JuliennedArrays.jl/latest/#JuliennedArrays.Align). This requires (or enables) you to specify which dimensions of the output belong to the sub-arrays, instead of writing `PermutedDimsArray(stack(...), ...)`. 
-* There is also [`RecursiveArrayTools.VectorOfArray`](https://github.com/JuliaDiffEq/RecursiveArrayTools.jl#vectorofarray) which as its name hints only allows a one-dimensional container. Linear indexing retreives a slice, not an element, which is sometimes surprising.
+* There is also [`RecursiveArrayTools.VectorOfArray`](https://github.com/JuliaDiffEq/RecursiveArrayTools.jl#vectorofarray) which as its name hints only allows a one-dimensional container. (And unlike the package name, nothing is recursive.) Linear indexing retreives a slice, not an element, which is sometimes surprising.
 * For a tuple of arrays, [`LazyArrays.Hcat`](https://github.com/JuliaArrays/LazyArrays.jl#concatenation) is at present faster to index than `stack`, but doesn't allow arbitrary dimensions.
-* For a generator of arrays, the built-in `reduce(hcat,...)` may work, but it slow compared to `stack`: see [test/speed.jl](test/speed.jl) for some examples.
 
 And a few more:
 
 * When writing this I missed [`SplitApplyCombine.combinedimsview`](https://github.com/JuliaData/SplitApplyCombine.jl#combinedimsviewarray), which is very similar to `stack`, but doesn't handle tuples.
 * Newer than this package is [StackViews.jl](https://github.com/JuliaArrays/StackViews.jl) handles both, with `StackView(A,B,dims=4) == StackView([A,B],4)` creating a 4th dimension; the container is always one-dimensional. 
 * [`Flux.stack`](https://fluxml.ai/Flux.jl/stable/utilities/#Flux.stack) similarly takes a dimension, but eagerly creates an `Array`.
+* Finally, [CatViews.jl](https://github.com/ahwillia/CatViews.jl) offers a lazy `vcat`. But the package is old and I think not so fast.
 
 The lazy inverse:
 
@@ -71,10 +76,10 @@ The lazy inverse:
 
 * As does [`PackedVectorsOfVectors`](https://github.com/synchronoustechnologies/PackedVectorsOfVectors.jl), although only 1+1 dimensions. Also has an eager `pack` method which turns a vector of vectors into view of a single larger matrix. 
 
-* [`Base.eachslice`](https://docs.julialang.org/en/v1/base/arrays/#Base.eachslice) also views one large array as many slices. This is a generator, but [JuliaLang#32310](https://github.com/JuliaLang/julia/pull/32310) should upgrade it to a multi-dimensional container indexable container.
+* [`Base.eachslice`](https://docs.julialang.org/en/v1/base/arrays/#Base.eachslice) also views one large array as many slices. This was a generator, but [JuliaLang#32310](https://github.com/JuliaLang/julia/pull/32310) upgrades it to a multi-dimensional indexable container, in Julia 1.9.
 
 Eager:
 
 * After writing this I learned of [JuliaLang#31644](https://github.com/JuliaLang/julia/pull/31644) which extends `reduce(hcat,...)` to work on generators. 
 
-* Later, [JuliaLang#31644](https://github.com/JuliaLang/julia/pull/43334) proposes to add the eager `stack_iter` method of this package to Base.
+* Later, [JuliaLang#43334](https://github.com/JuliaLang/julia/pull/43334) has added a better version of this package's `stack_iter` method to Base.
diff --git a/src/LazyStack.jl b/src/LazyStack.jl
index 1b72f22..c114bf5 100644
--- a/src/LazyStack.jl
+++ b/src/LazyStack.jl
@@ -1,27 +1,37 @@
 module LazyStack
 
-export stack, rstack
+export lazystack
+
+include("ragged.jl")
+export raggedstack
+
+using Compat
+
+@deprecate stack lazystack false  # don't export it 
+@deprecate stack_iter Compat.stack false  # don't export it 
+
+@deprecate rstack raggedstack
+
+include("flatten.jl")  # probably not permanent, just prototyping here!
+
 
 #===== Tuples =====#
 
-ndims(A) = Base.ndims(A)
-ndims(::Tuple) = 1
-ndims(::NamedTuple) = 1
+_ndims(A) = Base.ndims(A)
+_ndims(::Tuple) = 1
+_ndims(::NamedTuple) = 1
 
-axes(A) = Base.axes(A)
-axes(A, d) = Base.axes(A, d)
-if VERSION < v"1.1" # because, on Julia 1.0, axes((1,2)) === Base.OneTo(2)
-    axes(t::Tuple) = tuple(Base.axes(t))
-end
-axes(nt::NamedTuple) = tuple(Base.OneTo(length(nt)))
+_axes(A) = Base.axes(A)
+_axes(A, d) = Base.axes(A, d)
+_axes(nt::NamedTuple) = tuple(Base.OneTo(length(nt)))
 
-size(A) = Base.size(A)
-size(t::Tuple) = tuple(length(t))
-size(t::NamedTuple) = tuple(length(t))
+_size(A) = Base.size(A)
+_size(t::Tuple) = tuple(length(t))
+_size(t::NamedTuple) = tuple(length(t))
 
-similar_vector(x::AbstractArray, n::Int) = similar(x, n::Int)
-similar_vector(x::Tuple, n::Int) = Vector{eltype(x)}(undef, n::Int)
-similar_vector(x::NamedTuple, n::Int) = Vector{eltype(x)}(undef, n::Int)
+# similar_vector(x::AbstractArray, n::Int) = similar(x, n::Int)
+# similar_vector(x::Tuple, n::Int) = Vector{eltype(x)}(undef, n::Int)
+# similar_vector(x::NamedTuple, n::Int) = Vector{eltype(x)}(undef, n::Int)
 
 # eltype(x::Tuple) = Base.promote_type(x...) # to match choice below
 # eltype(x::NamedTuple) = Base.promote_type(x...)
@@ -29,52 +39,69 @@ similar_vector(x::NamedTuple, n::Int) = Vector{eltype(x)}(undef, n::Int)
 #===== Slices =====#
 
 """
-    stack([A, B, C])
-    stack(A, B, C)
+    lazystack([A, B, C])
+    lazystack(A, B, C) == lazystack((A, B, C))
 
 Creates a very simple lazy `::Stacked` view of an array of arrays,
 or a tuple of arrays, provided they have a consistent `eltype`.
-The dimensions of the inner arrays come first:
+The dimensions of the inner arrays come first.
+
+# Examples
 ```
-julia> stack([1,2,3], [10,20,30])
-3×2 LazyStack.Stacked{Int64,2,Tuple{Array{Int64,1},Array{Int64,1}}}:
+julia> lazystack([1,2,3], [10,20,30])
+3×2 lazystack(::Tuple{Vector{Int64}, Vector{Int64}}) with eltype Int64:
  1  10
  2  20
  3  30
 
 julia> vecs = [[1,2,3] .+ j*im for j=2:2:8];
 
-julia> A = stack(vecs)
-3×4 Stacked{Complex{Int64},2,Array{Array{Complex{Int64},1},1}}:
+julia> A = lazystack(vecs)
+3×4 lazystack(::Vector{Vector{Complex{Int64}}}) with eltype Complex{Int64}:
  1+2im  1+4im  1+6im  1+8im
  2+2im  2+4im  2+6im  2+8im
  3+2im  3+4im  3+6im  3+8im
 ```
-A few `Base` functions will immediately look inside:
+
+When the slices aren't backed by `Array` (or a few other `Base` types)
+then it reverts to eager `Base.stack`:
+
+```
+julia> using JLArrays  # fake GPUArray
+
+julia> lazystack(jl([1, 2, 3f0]), jl([4, 5, 6f0]))
+3×2 JLArray{Float32, 2}:
+ 1.0  4.0
+ 2.0  5.0
+ 3.0  6.0
+```
+
+A few `Base` functions will immediately look inside the container:
+
 ```
 julia> eachcol(A) |> typeof
-Array{Array{Complex{Int64},1},1}
+Vector{Vector{Complex{Int64}}} (alias for Array{Array{Complex{Int64}, 1}, 1})
 
 julia> view(A, :,1)
-3-element Array{Complex{Int64},1}:
+3-element Vector{Complex{Int64}}:
  1 + 2im
  2 + 2im
  3 + 2im
 ```
 """
-stack(xs::AbstractArray{IT,ON}) where {IT<:AbstractArray{T,IN}} where {T,IN,ON} =
+lazystack(xs::AbstractArray{IT,ON}) where {IT<:AbstractArray{T,IN}} where {T,IN,ON} =
     stack_slices(xs, Val(T), Val(IN+ON))
-stack(xs::Tuple{Vararg{AbstractArray{T,IN}}}) where {T,IN} =
+lazystack(xs::Tuple{Vararg{AbstractArray{T,IN}}}) where {T,IN} =
     stack_slices(xs, Val(T), Val(IN+1))
-stack(xs::AbstractArray{T}...) where {T} = stack(xs)
+lazystack(xs::AbstractArray{T}...) where {T} = lazystack(xs)
 
 function stack_slices(xs::AT, ::Val{T}, ::Val{N}) where {T,N,AT}
     length(xs) >= 1 || throw(ArgumentError("stacking an empty collection is not allowed"))
-    storage_type(first(xs)) <: Union{Array, AbstractRange} || return stack_iter(xs)
-    s = size(first(xs))
+    storage_type(first(xs)) <: Union{Array, AbstractRange} || return Compat.stack(xs)
+    ax = _axes(first(xs))
     for x in xs
-        size(x) == s || throw(DimensionMismatch(
-            "slices being stacked must share a common size. Expected $s, got $(size(x))"))
+        _axes(x) == ax || throw(DimensionMismatch(
+            "slices being stacked must share a common size. Expected $ax, got $(_axes(x))"))
     end
     Stacked{T, N, AT}(xs)
 end
@@ -83,17 +110,17 @@ struct Stacked{T,N,AT} <: AbstractArray{T,N}
     slices::AT
 end
 
-Base.size(x::Stacked) = (size(first(x.slices))..., size(x.slices)...)
-Base.size(x::Stacked{T,N,<:Tuple}) where {T,N} = (size(first(x.slices))..., length(x.slices))
+Base.size(x::Stacked) = (_size(first(x.slices))..., _size(x.slices)...)
+Base.size(x::Stacked{T,N,<:Tuple}) where {T,N} = (_size(first(x.slices))..., length(x.slices))
 
-Base.axes(x::Stacked) = (axes(first(x.slices))..., axes(x.slices)...)
+Base.axes(x::Stacked) = (_axes(first(x.slices))..., _axes(x.slices)...)
 
 Base.parent(x::Stacked) = x.slices
 
-outer_ndims(x::Stacked) = ndims(x.slices)
+outer_ndims(x::Stacked) = _ndims(x.slices)
 outer_ndims(x::Stacked{T,N,<:Tuple}) where {T,N} = 1
 
-inner_ndims(x::Stacked) = ndims(x) - outer_ndims(x)
+inner_ndims(x::Stacked) = _ndims(x) - outer_ndims(x)
 
 @inline function Base.getindex(x::Stacked{T,N}, inds::Vararg{Integer,N}) where {T,N}
     @boundscheck checkbounds(x, inds...)
@@ -104,11 +131,9 @@ end
 
 Base.unaliascopy(x::Stacked) = stack(map(Base.unaliascopy, x.slices))
 
-if VERSION >= v"1.1"
-    Base.eachcol(x::Stacked{T,2,<:AbstractArray{<:AbstractArray{T,1}}}) where {T} = x.slices
-end
+Base.eachcol(x::Stacked{T,2,<:AbstractArray{<:AbstractArray{T,1}}}) where {T} = x.slices
 
-Base.collect(x::Stacked{T,2,<:AbstractArray{<:AbstractArray{T,1}}}) where {T} = reduce(hcat, x.slices)
+Base.collect(x::Stacked) = Compat.stack(x.slices)
 
 """
     LazyStack.ensure_dense(A)
@@ -122,15 +147,15 @@ ensure_dense(x::AbstractArray) = x
 Base.view(x::Stacked{T,2,<:AbstractArray{<:AbstractArray{T,1}}}, ::Colon, i::Int) where {T} = x.slices[i]
 
 function Base.push!(x::Stacked{T,N,<:AbstractVector}, y::AbstractArray) where {T,N}
-    s = size(first(x.slices))
-    isempty(y) || size(y) == s || throw(DimensionMismatch(
-            "slices being stacked must share a common size. Expected $s, got $(size(y))"))
+    s = _size(first(x.slices))
+    isempty(y) || _size(y) == s || throw(DimensionMismatch(
+            "slices being stacked must share a common size. Expected $s, got $(_size(y))"))
     push!(x.slices, y)
     x
 end
 
 function Base.showarg(io::IO, x::Stacked, toplevel)
-    print(io, "stack(")
+    print(io, "lazystack(")
     Base.showarg(io, parent(x), false)
     print(io, ')')
     toplevel && print(io, " with eltype ", eltype(x))
@@ -138,221 +163,123 @@ end
 
 #===== Iteration =====#
 
-ITERS = [:Flatten, :Drop, :Filter]
-for iter in ITERS
-    @eval ndims(::Iterators.$iter) = 1
-end
-ndims(gen::Base.Generator) = ndims(gen.iter)
-ndims(zed::Iterators.Zip) = maximum(ndims, zed.is)
-if VERSION < v"1.1"
-    ndims(zed::Iterators.Zip2) = max(ndims(zed.a), ndims(zed.b))
-end
-
-similar_vector(x, n::Int) = throw(ArgumentError())
+# ITERS = [:Flatten, :Drop, :Filter]
+# for iter in ITERS
+#     @eval _ndims(::Iterators.$iter) = 1
+# end
+# _ndims(gen::Base.Generator) = _ndims(gen.iter)
+# _ndims(zed::Iterators.Zip) = maximum(ndims, zed.is)
+#
+# similar_vector(x, n::Int) = throw(ArgumentError())
 
 """
-    stack(::Generator)
-    stack(::Array{T}, ::Array{S}, ...)
-
-This constructs a new array, calling `stack_iter`.
-Can handle inconsistent eltypes, but not inconsistent sizes.
-
-```
-julia> stack([i i; i 10i] for i in 1:2:3)
-2×2×2 Array{Int64,3}:
-[:, :, 1] =
- 1   1
- 1  10
-
-[:, :, 2] =
- 3   3
- 3  30
-
-julia> stack(c<9 ? [c,2c] : [9.99, 10] for c in 1:10)
-2×10 Array{Real,2}:
- 1  2  3  4   5   6   7   8   9.99   9.99
- 2  4  6  8  10  12  14  16  10.0   10.0
+    lazystack(::Generator)
+    lazystack(::Array{T}, ::Array{S}, ...)
 
-julia> stack(1:3, ones(3), zeros(3) .+ im)
-3×3 Array{Number,2}:
- 1  1.0  0.0+1.0im
- 2  1.0  0.0+1.0im
- 3  1.0  0.0+1.0im
-```
+These used to call this package's own `stack_iter`,
+but now they just call `Base.stack`.
 """
-stack(gen::Base.Generator) = stack_iter(gen)
-for iter in ITERS
-    @eval stack(gen::Iterators.$iter) = stack_iter(gen)
-end
-stack(arr::AbstractArray{Any}) = stack_iter(arr) # e.g. from arr=[]; push!(arr, rand(3)); ...
-
-stack(arr::AbstractArray{<:AbstractArray}) = stack_iter(arr)
-stack(arr::AbstractArray{<:Tuple}) = stack_iter(arr)
-stack(arr::AbstractArray{<:NamedTuple}) = stack_iter(arr)
-
-stack(tup::AbstractArray...) = stack_iter(tup)
-stack(tup::Tuple...) = stack_iter(tup)
-stack(tup::NamedTuple...) = stack_iter(tup)
-# stack(tup::Tuple{Vararg{AbstractArray}}) = stack_iter(tup)
-
-function stack_iter(itr)
-    if itr isa Tuple || itr isa Base.Generator{<:Tuple} || ndims(itr) == 1
-        outsize = tuple(:)
-    else
-        Base.haslength(itr) || return stack_iter(collect(itr))
-        outsize = size(itr)
-    end
-
-    w, val = vstack_plus(itr)
-
-    z = reshape(w, size(val)..., outsize...)
-
-    rewrap_like(z, val)
+lazystack(gen::Base.Generator) = Compat.stack(gen)
+for iter in [:Flatten, :Drop, :Filter]
+    @eval lazystack(gen::Iterators.$iter) = Compat.stack(gen)
 end
+lazystack(arr::AbstractArray{Any}) = Compat.stack(arr) # e.g. from arr=[]; push!(arr, rand(3)); ...
 
-vstack(itr) = first(vstack_plus(itr))
+lazystack(arr::AbstractArray{<:AbstractArray}) = Compat.stack(arr)
+lazystack(arr::AbstractArray{<:Tuple}) = Compat.stack(arr)
+lazystack(arr::AbstractArray{<:NamedTuple}) = Compat.stack(arr)
 
-function vstack_plus(itr)
-    zed = iterate(itr)
-    zed === nothing && throw(ArgumentError("stacking an empty collection is not allowed"))
-    val, state = zed
-    val isa Union{AbstractArray, Tuple, NamedTuple} || return collect(itr), val
+lazystack(tup::AbstractArray...) = Compat.stack(tup)
+lazystack(tup::Tuple...) = Compat.stack(tup)
+lazystack(tup::NamedTuple...) = Compat.stack(tup)
+# lazystack(tup::Tuple{Vararg{AbstractArray}}) = Compat.stack(tup)
 
-    s = size(val)
-    n = Base.haslength(itr) ? prod(s)*length(itr) : nothing
-
-    v = similar_vector(no_wraps(val), something(n, prod(s)))
-    copyto!(v, 1, no_wraps(val), 1, prod(s))
-
-    w = stack_rest(v, 0, n, s, itr, state)#::Vector
-    w, val
-end
-
-function stack_rest(v, i, n, s, itr, state)
-    while true
-        zed = iterate(itr, state)
-        zed === nothing && return v
-        val, state = zed
-        s == size(val) || throw(DimensionMismatch(
-            "slices being stacked must share a common size. Expected $s, got $(size(val))"))
-
-        i += 1
-        if eltype(val) <: eltype(v)
-            if n isa Int
-                copyto!(v, i*prod(s)+1, no_wraps(val), 1, prod(s))
-            else
-                append!(v, vec(no_wraps(val)))
-            end
-        else
-
-            T′ = Base.promote_typejoin(eltype(v), eltype(val))
-            # T′ = Base.promote_type(eltype(v), eltype(val)) # which do I want?
-            v′ = similar(v, T′)
-            copyto!(v′, v)
-
-            if n isa Int
-                copyto!(v′, i*prod(s)+1, no_wraps(val), 1, prod(s))
-            else
-                append!(v′, vec(no_wraps(val)))
-            end
-
-            return stack_rest(v′, i, n, s, itr, state)
-        end
-
-    end
-end
 
 """
-    stack(fun, iters...)
-    stack(eachcol(A), eachslice(B, dims=3)) do a, b
+    lazystack(fun, iters...)
+    lazystack(eachcol(A), eachslice(B, dims=3)) do a, b
         f(a,b)
     end
 
 If the first argument is a function, then this is mapped over the other argumenst.
-The result should be `== stack(map(fun, iters...))`, but done using `stack_iter`
-to return a dense `Array` instead of a `Stacked` object.
+Always uses `Base.stack`.
 """
-stack(fun::Function, iter) = stack(fun(arg) for arg in iter)
-function stack(fun::Function, iters...)
-    if all(Base.haslength, iters)
-        sz = size(first(iters))
-        all(a -> size(a)==sz, iters) || throw(DimensionMismatch(
-            "sizes of all argumens must match, in stack(f, A, B, C). " *
-            "This is slightly stricter than map(f, A, B, C), for now."))
-    end
-    stack(fun(args...) for args in zip(iters...))
-end
-
-#===== Offset =====#
-
-using OffsetArrays
-
-no_wraps(a) = a
-no_wraps(a::OffsetArray) = parent(a)
-
-rewrap_like(A, a) = A
-function rewrap_like(A, a::OffsetArray)
-    B = rewrap_like(A, parent(a))
-    OffsetArray(B, axes(a)..., axes(A, ndims(A)))
-end
-
-#===== NamedDims =====#
-
-using NamedDims
-
-ensure_named(a::AbstractArray, L::Tuple) = NamedDimsArray(a, L)
-ensure_named(a::NamedDimsArray, L::Tuple) = refine_names(a, L)
-
-# array of arrays
-stack(xs::NamedDimsArray{<:Any,<:AbstractArray}) =
-    ensure_named(stack(parent(xs)), getnames(xs))
-stack(x::AT) where {AT <: AbstractArray{<:NamedDimsArray{L,T,IN},ON}} where {T,IN,ON,L} =
-    ensure_named(Stacked{T, IN+ON, AT}(x), getnames(x))
-
-getnames(xs::AbstractArray{<:AbstractArray}) =
-    (dimnames(eltype(xs))..., dimnames(xs)...)
-
-# tuple of arrays
-stack(x::AT) where {AT <: Tuple{Vararg{NamedDimsArray{L,T,IN}}}} where {T,IN,L} =
-    ensure_named(stack(map(parent, x)), getnames(x))
-
-getnames(xs::Tuple{Vararg{<:NamedDimsArray}}) =
-    (dimnames(first(xs))..., :_)
-
-function rewrap_like(A, a::NamedDimsArray{L}) where {L}
-    B = rewrap_like(A, parent(a))
-    ensure_named(B, (L..., ntuple(_ -> :_, ndims(A) - ndims(a))...))
-end
-
-no_wraps(a::NamedDimsArray) = no_wraps(parent(a))
-
-"""
-    stack(name, things...)
-
-If you give one `name::Symbol` before the pieces to be stacked,
-this will be the name of the last dimension of the resulting `NamedDimsArray`.
-(Names attached to slices, and to containers, should also be preserved.)
-"""
-function LazyStack.stack(s::Symbol, args...)
-    data = stack(args...)
-    name_last = ntuple(d -> d==ndims(data) ? s : :_, ndims(data))
-    ensure_named(data, name_last)
-end
+lazystack(fun::Function, iter) = Compat.stack(fun, iter)
+lazystack(fun::Function, iters...) = Compat.stack(fun, iters...)
+
+# #===== Offset =====#
+#
+# using OffsetArrays
+#
+# no_offsets(a) = a
+# no_offsets(a::OffsetArray) = parent(a)
+#
+# rewrap_like(A, a) = A
+# function rewrap_like(A, a::OffsetArray)
+#     B = rewrap_like(A, parent(a))
+#     OffsetArray(B, _axes(a)..., _axes(A, _ndims(A)))
+# end
+#
+# no_wraps_or_offsets(a) = no_offsets(no_wraps(a))
+#
+# #===== NamedDims =====#
+#
+# using NamedDims
+#
+# ensure_named(a::AbstractArray, L::Tuple) = NamedDimsArray(a, L)
+# ensure_named(a::NamedDimsArray, L::Tuple) = refine_names(a, L)
+#
+# # array of arrays
+# stack(xs::NamedDimsArray{<:Any,<:AbstractArray}) =
+#     ensure_named(stack(parent(xs)), getnames(xs))
+# stack(x::AT) where {AT <: AbstractArray{<:NamedDimsArray{L,T,IN},ON}} where {T,IN,ON,L} =
+#     ensure_named(Stacked{T, IN+ON, AT}(x), getnames(x))
+#
+# getnames(xs::AbstractArray{<:AbstractArray}) =
+#     (dimnames(eltype(xs))..., dimnames(xs)...)
+#
+# # tuple of arrays
+# stack(x::AT) where {AT <: Tuple{Vararg{NamedDimsArray{L,T,IN}}}} where {T,IN,L} =
+#     ensure_named(stack(map(parent, x)), getnames(x))
+#
+# getnames(xs::Tuple{Vararg{<:NamedDimsArray}}) =
+#     (dimnames(first(xs))..., :_)
+#
+# function rewrap_like(A, a::NamedDimsArray{L}) where {L}
+#     B = rewrap_like(A, parent(a))
+#     ensure_named(B, (L..., ntuple(_ -> :_, _ndims(A) - _ndims(a))...))
+# end
+#
+# no_wraps(a) = a
+# no_wraps(a::NamedDimsArray) = no_wraps(parent(a))
+#
+# """
+#     stack(name, things...)
+#
+# If you give one `name::Symbol` before the pieces to be stacked,
+# this will be the name of the last dimension of the resulting `NamedDimsArray`.
+# (Names attached to slices, and to containers, should also be preserved.)
+# """
+# function LazyStack.stack(s::Symbol, args...)
+#     data = stack(args...)
+#     name_last = ntuple(d -> d==_ndims(data) ? s : :_, _ndims(data))
+#     ensure_named(data, name_last)
+# end
 
 #===== Gradients =====#
 
 using ChainRulesCore: ChainRulesCore, rrule, NoTangent
 
-function ChainRulesCore.rrule(::typeof(stack), vec::AbstractArray{<:AbstractArray{<:Any,IN}}) where {IN}
-    stack(vec), Δ -> (NoTangent(), [view(Δ, ntuple(_->(:),IN)..., Tuple(I)...) for I in eachindex(vec)],)
+function ChainRulesCore.rrule(::typeof(lazystack), vec::AbstractArray{<:AbstractArray{<:Any,IN}}) where {IN}
+    lazystack(vec), Δ -> (NoTangent(), [view(Δ, ntuple(_->(:),IN)..., Tuple(I)...) for I in eachindex(vec)],)
 end
 
-function ChainRulesCore.rrule(::typeof(stack), tup::Tuple{Vararg{<:AbstractArray{<:Any,IN}}}) where {IN}
-    stack(tup), Δ -> (NoTangent(), ntuple(i -> view(Δ, ntuple(_->(:),IN)..., i), length(tup)),)
+function ChainRulesCore.rrule(::typeof(lazystack), tup::Tuple{Vararg{<:AbstractArray{<:Any,IN}}}) where {IN}
+    lazystack(tup), Δ -> (NoTangent(), ntuple(i -> view(Δ, ntuple(_->(:),IN)..., i), length(tup)),)
 end
 
-function ChainRulesCore.rrule(::typeof(stack), gen::Base.Generator)
-    stack(gen), Δ -> error("not yet!")
+function ChainRulesCore.rrule(::typeof(lazystack), gen::Base.Generator)
+    lazystack(gen), Δ -> error("not yet!")
 end
 
 #===== CuArrays =====#
@@ -363,100 +290,4 @@ function storage_type(x::AbstractArray)
     typeof(x) === typeof(p) ? typeof(x) : storage_type(p)
 end
 
-#===== Ragged =====#
-
-"""
-    rstack(arrays; fill=0)
-
-Ragged `stack`, which allows slices of varying size, and fills the gaps with zero
-or the given `fill`. Always returns an `Array`.
-
-```
-julia> rstack(1:n for n in 1:5)
-5×5 Array{Int64,2}:
- 1  1  1  1  1
- 0  2  2  2  2
- 0  0  3  3  3
- 0  0  0  4  4
- 0  0  0  0  5
-
-julia> rstack([[1,2,3], [10,20.0]], fill=missing)
-3×2 Array{Union{Missing, Float64},2}:
- 1.0  10.0
- 2.0  20.0
- 3.0    missing
-
-julia> using OffsetArrays
-
-julia> rstack(1:3, OffsetArray([2.0,2.1,2.2], -1), OffsetArray([3.2,3.3,3.4], +1))
-5×3 OffsetArray(::Array{Real,2}, 0:4, 1:3) with eltype Real with indices 0:4×1:3:
- 0  2.0  0
- 1  2.1  0
- 2  2.2  3.2
- 3  0    3.3
- 0  0    3.4
-```
-
-"""
-rstack(x::AbstractArray, ys::AbstractArray...; kw...) = rstack((x, ys...); kw...)
-rstack(g::Base.Generator; kw...) = rstack(collect(g); kw...)
-rstack(f::Function, ABC...; kw...) = rstack(map(f, ABC...); kw...)
-rstack(list::AbstractArray{<:AbstractArray}; fill=zero(eltype(first(list)))) = rstack_iter(list; fill=fill)
-rstack(list::Tuple{Vararg{<:AbstractArray}}; fill=zero(eltype(first(list)))) = rstack_iter(list; fill=fill)
-
-function rstack_iter(list; fill)
-    T = mapreduce(eltype, Base.promote_typejoin, list, init=typeof(fill))
-    # T = mapreduce(eltype, Base.promote_type, list, init=typeof(fill))
-    N = maximum(ndims, list)
-    ax = ntuple(N) do d
-        hi = maximum(x -> last(axes(x,d)), list)
-        if all(x -> axes(x,d) isa Base.OneTo, list)
-            Base.OneTo(hi)
-        else
-            lo = minimum(x -> first(axes(x,d)), list)
-            lo:hi
-        end
-    end
-    arr = Array{T}(undef, map(length, ax)..., size(list)...)
-    fill!(arr, fill)
-    out = if ax isa Tuple{Vararg{Base.OneTo}}
-        arr
-    else
-        OffsetArray(arr, (ax..., axes(list)...))
-    end
-    z = rstack_copyto!(out, list, Val(N))
-
-    rewrap_names(z, first(list)) # now I want to separate names & offsets again!
-end
-
-function rstack_copyto!(out, list, ::Val{N}) where {N}
-    for i in tupleindices(list)
-        item = list[i...]
-        o = ntuple(_->1, N - ndims(item))
-        out[CartesianIndices(axes(item)), o..., i...] .= item
-
-        # https://github.com/JuliaArrays/OffsetArrays.jl/issues/100
-        # view(out, axes(item)..., o..., i...) .= item
-
-        # for I in CartesianIndices(item)
-        #     out[Tuple(I)..., o..., i...] = item[I]
-        # end
-    end
-    out
-end
-
-tupleindices(t::Tuple) = ((i,) for i in 1:length(t))
-tupleindices(A::AbstractArray) = (Tuple(I) for I in CartesianIndices(A))
-
-rewrap_names(A, a) = A
-function rewrap_names(A, a::NamedDimsArray{L}) where {L}
-    B = rewrap_names(A, parent(a))
-    ensure_named(B, (L..., ntuple(_ -> :_, ndims(A) - ndims(a))...))
-end
-function rstack(s::Symbol, args...; kw...)
-    data = rstack(args...; kw...)
-    name_last = ntuple(d -> d==ndims(data) ? s : :_, ndims(data))
-    ensure_named(data, name_last)
-end
-
 end # module
diff --git a/src/flatten.jl b/src/flatten.jl
new file mode 100644
index 0000000..87da2be
--- /dev/null
+++ b/src/flatten.jl
@@ -0,0 +1,300 @@
+export flatten
+using Base: IteratorSize, HasShape, HasLength, @default_eltype, haslength
+
+"""
+    flatten([T], iter)
+
+Given an iterator of iterators, returns a `Vector{T}` containing all of their elements.
+Should be equal to `collect(Iterators.flatten(iter))`, but faster.
+
+Acting on a vector of vectors, this is equal to `vcat(iter...)` or `reduce(vcat, iter)`,
+and to `vec(stack(iter))` when this is allowed, i.e. when `allequal(length.(iter))`.
+
+# Examples
+```
+julia> v = flatten((1:2, 8:9))
+4-element Vector{Int64}:
+ 1
+ 2
+ 8
+ 9
+
+julia> v == vcat(1:2, 8:9) == reduce(vcat, [1:2, 8:9]) == vec(stack((1:2, 8:9)))
+true
+
+julia> flatten(Complex{Float32}, [1:1, 9:10])  # specify eltype
+3-element Vector{ComplexF32}:
+  1.0f0 + 0.0f0im
+  9.0f0 + 0.0f0im
+ 10.0f0 + 0.0f0im
+
+julia> flatten(([1 2], [5.0], (x = false,)))  # ignores shape, promotes eltype
+4-element Vector{Float64}:
+ 1.0
+ 2.0
+ 5.0
+ 0.0
+
+julia> collect(Iterators.flatten(([1 2], [5.0], (x = false,))))  # wider eltype
+4-element Vector{Real}:
+     1
+     2
+     5.0
+ false
+
+julia> flatten(42)  # as numbers are iterable
+1-element Vector{Int64}:
+ 42
+
+julia> flatten(Vector{Int}[]), flatten(())  # empty case, with and without known eltype
+(Int64[], Union{}[])
+```
+"""
+function flatten end
+
+"""
+    flatten([T], f, args...)
+
+Equivalent to `flatten(map(f, args...))`, but without allocating the result of `map`.
+
+```
+julia> flatten(x -> (x, x/2), 5:6)
+4-element Vector{Real}:
+ 5
+ 2.5
+ 6
+ 3.0
+```
+"""
+flatten(f, iter) = flatten(f(x) for x in iter)
+flatten(f, xs, yzs...) = flatten(f(xy...) for xy in zip(xs, yzs...))
+
+flatten(::Type{T}, iter) where {T} = _typed_flatten(T, IteratorSize(@default_eltype iter), iter)
+flatten(::Type{T}, f, iter) where {T} = flatten(T, f(x) for x in iter)
+flatten(::Type{T},f, xs, yzs...) where {T} = flatten(T, f(xy...) for xy in zip(xs, yzs...))
+
+function flatten(iter)
+  S = @default_eltype iter
+  T = S != Union{} ? eltype(S) : Any  # Union{} occurs for e.g. flatten(1,2), postpone the error
+  if isconcretetype(T)
+      _typed_flatten(T, IteratorSize(S), iter)
+  else
+      _untyped_flatten(iter)
+  end
+end
+
+function _typed_flatten(::Type{T}, ::Union{HasShape, HasLength}, A::Union{AbstractArray, Tuple}) where {T}
+    len = sum(length, A; init=0)
+    B = Vector{T}(undef, len)
+    off = 1
+    for x in A
+        copyto!(B, off, x)
+        off += length(x)
+    end
+    B
+end
+
+# _typed_flatten(::Type{T}, ::Union{HasShape, HasLength}, A) where {T} = _flatten(collect(A))
+
+function _typed_flatten(::Type{T}, ::IteratorSize, A) where {T}
+    xit = iterate(A)
+    nothing === xit && return T[]
+    x1, _ = xit
+    B = Vector{T}(undef, 0)
+    if haslength(x1) && haslength(A)
+        # sizehint!(B, _flatten_alloc_length(T, x1, A))
+        guess = length(x1) * length(A)
+        alloc = min(guess, (2^30) ÷ max(1, sizeof(T)))  # don't allocate > 1GB
+        sizehint!(B, alloc)
+    end
+    while xit !== nothing
+        x, state = xit
+        append!(B, x)
+        xit = iterate(A, state)
+    end
+    B
+end
+
+# function _flatten_alloc_length(::Type{T}, x1, A) where {T}
+#     guess = length(x1) * length(A)
+#     min(guess, (2^30) ÷ max(1, sizeof(T)))  # don't allocate > 1GB
+# end
+
+function _untyped_flatten(A)
+    xit = iterate(A)
+    nothing === xit && return Union{}[]
+    x1, state = xit
+    B = convert(Vector, vec(collect(x1)))  # can you do this better?
+    # if haslength(x1) && haslength(A)  # doesn't seem to help
+    #     sizehint!(B, _flatten_alloc_length(eltype(B), B, A))
+    # end
+    _untyped_flatten!(B, A, state)
+end
+
+#= For the empty case:
+
+julia> collect(Iterators.flatten(()))
+Union{}[]
+
+=#
+
+function _untyped_flatten!(B, A, state)
+    xit = iterate(A, state)
+    while xit !== nothing
+        x, state = xit
+        if eltype(x) <: eltype(B)
+            append!(B, x)
+        else
+            C = vcat(B, vec(collect(x)))  # can you do this better?
+            return _untyped_flatten!(C, A, state)
+        end
+        xit = iterate(A, state)
+    end
+    return B
+end
+
+
+
+#=
+
+let
+    tups = [Tuple(rand(10)) for _ in 1:100]
+    println("vector of tuples")
+    @btime stack($tups)
+    @btime flatten($tups)
+    @btime collect(Iterators.flatten($tups))
+
+    println("generator of tuples")
+    @btime stack(t for t in $tups)
+    @btime flatten(t for t in $tups)
+    @btime collect(Iterators.flatten(t for t in $tups))
+
+    println("  ... extra to collet")
+    @btime collect(t for t in $tups)
+
+    println("vectors")
+    vecs = [rand(100) for _ in 1:100]
+    @btime stack($vecs)
+    @btime flatten($vecs)
+    @btime collect(Iterators.flatten($vecs))
+
+    println("generator of vectors")
+    @btime stack(v for v in $vecs)
+    @btime flatten(v for v in $vecs)
+    @btime collect(Iterators.flatten(v for v in $vecs))
+
+    println("  ... extra to collet")
+    @btime collect(t for t in $vecs)
+end;
+
+vector of tuples
+  min 952.636 ns, mean 2.109 μs (1 allocation, 7.94 KiB)
+  min 845.333 ns, mean 2.071 μs (1 allocation, 7.94 KiB)
+  min 11.708 μs, mean 12.503 μs (1 allocation, 7.94 KiB)
+generator of tuples
+  min 945.800 ns, mean 2.055 μs (1 allocation, 7.94 KiB)
+  min 1.608 μs, mean 2.613 μs (2 allocations, 7.94 KiB)   <--- could be better?
+  min 16.292 μs, mean 18.288 μs (7 allocations, 21.92 KiB)
+  ... extra to collet
+  min 482.051 ns, mean 1.713 μs (1 allocation, 7.94 KiB)
+vectors
+  min 2.643 μs, mean 7.749 μs (2 allocations, 78.17 KiB)
+  min 2.704 μs, mean 7.681 μs (2 allocations, 78.17 KiB)
+  min 70.375 μs, mean 77.521 μs (9 allocations, 326.55 KiB)
+generator of vectors
+  min 2.588 μs, mean 7.277 μs (2 allocations, 78.17 KiB)
+  min 4.738 μs, mean 9.588 μs (2 allocations, 78.25 KiB)   <--- could be better
+  min 68.625 μs, mean 76.385 μs (10 allocations, 326.61 KiB)
+  ... extra to collet
+  min 192.396 ns, mean 217.751 ns (1 allocation, 896 bytes)  <--- worth it!
+
+
+let
+    tups = [Tuple(rand(10)) for _ in 1:100]
+    println("unknown number of tuples")
+    @btime stack(t for t in $tups if true)
+    @btime flatten(t for t in $tups if true)
+    @btime collect(Iterators.flatten(t for t in $tups if true))
+
+    println("  ... extra to collet")
+    @btime collect(t for t in $tups if true)
+
+    vecs = [rand(100) for _ in 1:100]
+    println("unknown number of vectors")
+    @btime stack(v for v in $vecs if true)
+    @btime flatten(v for v in $vecs if true)
+    @btime collect(Iterators.flatten(v for v in $vecs if true))
+
+    println("  ... extra to collet")
+    @btime collect(v for v in $vecs if true)
+end;
+
+unknown number of tuples
+  min 2.454 μs, mean 5.807 μs (6 allocations, 25.72 KiB)
+  min 2.042 μs, mean 4.953 μs (6 allocations, 21.91 KiB)
+  min 16.292 μs, mean 18.510 μs (7 allocations, 21.92 KiB)
+  ... extra to collet
+  min 1.450 μs, mean 3.878 μs (5 allocations, 17.78 KiB)
+unknown number of vectors
+  min 3.625 μs, mean 9.854 μs (7 allocations, 80.12 KiB)
+  min 7.917 μs, mean 16.509 μs (7 allocations, 198.11 KiB)   <--- could be better?
+  min 71.875 μs, mean 79.867 μs (10 allocations, 326.61 KiB)
+  ... extra to collet
+  min 1.042 μs, mean 1.158 μs (5 allocations, 1.95 KiB)  <--- worth it!
+
+let
+    nums = rand(128, 100)
+    println("vector")
+    @btime stack($nums)
+    @btime flatten($nums)
+    @btime copy(vec($nums))
+end;
+
+vector
+  min 8.361 μs, mean 20.294 μs (2 allocations, 100.05 KiB)
+  min 9.333 μs, mean 19.566 μs (2 allocations, 100.05 KiB)
+  min 1.995 μs, mean 10.940 μs (4 allocations, 100.12 KiB)
+
+let
+    tups = [Tuple(rand(rand(1:20))) for _ in 1:100]
+    println("tuples of varying length")
+    @btime flatten($tups)
+    @btime collect(Iterators.flatten($tups))
+
+    vecs = [rand(1:200) for _ in 1:100]
+    println("vectors of varying length")
+    @btime flatten($vecs)
+    @btime collect(Iterators.flatten($vecs))
+end;
+
+tuples of varying length
+  min 14.167 μs, mean 15.792 μs (52 allocations, 9.11 KiB)
+  min 8.833 μs, mean 11.346 μs (6 allocations, 21.86 KiB)
+vectors of varying length
+  min 105.169 ns, mean 125.837 ns (1 allocation, 896 bytes)
+  min 106.701 ns, mean 127.880 ns (1 allocation, 896 bytes)
+
+let
+    tups = vcat([Tuple(rand(1:10, 10)) for _ in 1:50], [Tuple(rand(10)) for _ in 1:50])
+    println("tuples of varying eltype")
+    @btime stack($tups)
+    @btime flatten($tups)
+    @btime collect(Iterators.flatten($tups))
+
+    println("vectors of varying eltype")
+    vecs = vcat([rand(1:10, 100) for _ in 1:50], [rand(100) for _ in 1:50])
+    @btime stack($vecs)
+    @btime flatten($vecs)
+    @btime collect(Iterators.flatten($vecs))
+end;
+
+tuples of varying eltype
+  min 23.917 μs, mean 26.051 μs (49 allocations, 8.69 KiB)
+  min 10.417 μs, mean 13.462 μs (7 allocations, 20.41 KiB)
+  min 28.041 μs, mean 29.826 μs (501 allocations, 15.75 KiB)
+vectors of varying eltype
+  min 2.646 μs, mean 10.090 μs (2 allocations, 78.17 KiB)
+  min 2.713 μs, mean 9.107 μs (2 allocations, 78.17 KiB)
+  min 68.750 μs, mean 77.771 μs (9 allocations, 326.55 KiB)
+
+=#
\ No newline at end of file
diff --git a/src/ragged.jl b/src/ragged.jl
new file mode 100644
index 0000000..11ffc64
--- /dev/null
+++ b/src/ragged.jl
@@ -0,0 +1,86 @@
+"""
+    raggedstack(arrays; fill=0)
+
+Ragged `stack`, which allows slices of varying _size, and fills the gaps with zero
+or the given `fill`. Always returns an `Array`.
+
+```
+julia> raggedstack(1:n for n in 1:5)
+5×5 Array{Int64,2}:
+ 1  1  1  1  1
+ 0  2  2  2  2
+ 0  0  3  3  3
+ 0  0  0  4  4
+ 0  0  0  0  5
+
+julia> raggedstack([[1,2,3], [10,20.0]], fill=missing)
+3×2 Array{Union{Missing, Float64},2}:
+ 1.0  10.0
+ 2.0  20.0
+ 3.0    missing
+
+julia> using OffsetArrays
+
+julia> raggedstack(1:3, OffsetArray([2.0,2.1,2.2], -1), OffsetArray([3.2,3.3,3.4], +1))
+5×3 OffsetArray(::Array{Real,2}, 0:4, 1:3) with eltype Real with indices 0:4×1:3:
+ 0  2.0  0
+ 1  2.1  0
+ 2  2.2  3.2
+ 3  0    3.3
+ 0  0    3.4
+```
+
+"""
+raggedstack(x::AbstractArray, ys::AbstractArray...; kw...) = raggedstack((x, ys...); kw...)
+raggedstack(g::Base.Generator; kw...) = raggedstack(collect(g); kw...)
+raggedstack(f::Function, ABC...; kw...) = raggedstack(map(f, ABC...); kw...)
+raggedstack(list::AbstractArray{<:AbstractArray}; fill=zero(eltype(first(list)))) = raggedstack_iter(list; fill)
+raggedstack(list::Tuple{Vararg{<:AbstractArray}}; fill=zero(eltype(first(list)))) = raggedstack_iter(list; fill)
+
+function raggedstack_iter(list; fill)
+    T = mapreduce(eltype, Base.promote_typejoin, list, init=typeof(fill))
+    # T = mapreduce(eltype, Base.promote_type, list, init=typeof(fill))
+    N = maximum(_ndims, list)
+    ax = ntuple(N) do d
+        hi = maximum(x -> last(_axes(x,d)), list)
+        if all(x -> _axes(x,d) isa Base.OneTo, list)
+            Base.OneTo(hi)
+        else
+            lo = minimum(x -> first(_axes(x,d)), list)
+            lo:hi
+        end
+    end
+    out = similar(1:0, T, ax..., _axes(list)...)
+    fill!(out, fill)
+    raggedstack_copyto!(out, list, Val(N))
+end
+
+function raggedstack_copyto!(out, list, ::Val{N}) where {N}
+    for i in tupleindices(list)
+        item = list[i...]
+        o = ntuple(_->1, N - _ndims(item))
+        out[CartesianIndices(_axes(item)), o..., i...] .= item
+
+        # https://github.com/JuliaArrays/OffsetArrays.jl/issues/100
+        # view(out, _axes(item)..., o..., i...) .= item
+
+        # for I in CartesianIndices(item)
+        #     out[Tuple(I)..., o..., i...] = item[I]
+        # end
+    end
+    out
+end
+
+tupleindices(t::Tuple) = ((i,) for i in 1:length(t))
+tupleindices(A::AbstractArray) = (Tuple(I) for I in CartesianIndices(A))
+
+# rewrap_names(A, a) = A
+# function rewrap_names(A, a::NamedDimsArray{L}) where {L}
+#     B = rewrap_names(A, parent(a))
+#     ensure_named(B, (L..., ntuple(_ -> :_, _ndims(A) - _ndims(a))...))
+# end
+# function raggedstack(s::Symbol, args...; kw...)
+#     data = raggedstack(args...; kw...)
+#     name_last = ntuple(d -> d==_ndims(data) ? s : :_, _ndims(data))
+#     ensure_named(data, name_last)
+# end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index b3c524e..f27ab47 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,56 +1,56 @@
 using Test, LazyStack
-using OffsetArrays, NamedDims
+using OffsetArrays # , NamedDims
 
 @testset "basics" begin
 
     v34 = [rand(3) for i in 1:4]
 
-    @test stack(v34) == hcat(v34...)
-    @test stack(v34) isa LazyStack.Stacked
+    @test lazystack(v34) == hcat(v34...)
+    @test lazystack(v34) isa LazyStack.Stacked
 
-    @test stack(v34...) == hcat(v34...)
-    @test stack(v34...).slices isa Tuple
-    @test LazyStack.ensure_dense(stack(v34...)) isa Array
+    @test lazystack(v34...) == hcat(v34...)
+    @test lazystack(v34...).slices isa Tuple
+    @test LazyStack.ensure_dense(lazystack(v34...)) isa Array
 
-    @test stack(v34[i] for i in 1:4) == hcat(v34...)
-    @test stack(v34[i] for i in 1:4) isa Array
+    @test lazystack(v34[i] for i in 1:4) == hcat(v34...)
+    @test lazystack(v34[i] for i in 1:4) isa Array
 
-    @test stack(v34)[1] == v34[1][1] # linear indexing
-    @test stack(v34)[1,1,1] == v34[1][1] # trailing dims
-    @test stack(v34) * ones(4) ≈ hcat(v34...) * ones(4) # issue #6
-    @test stack(v34) * ones(4,2) ≈ hcat(v34...) * ones(4,2)
-    @test axes(stack(v34)) === axes(stack(v34...)) === axes(stack(v34[i] for i in 1:4))
+    @test lazystack(v34)[1] == v34[1][1] # linear indexing
+    @test lazystack(v34)[1,1,1] == v34[1][1] # trailing dims
+    @test lazystack(v34) * ones(4) ≈ hcat(v34...) * ones(4) # issue #6
+    @test lazystack(v34) * ones(4,2) ≈ hcat(v34...) * ones(4,2)
+    @test axes(lazystack(v34)) === axes(lazystack(v34...)) === axes(lazystack(v34[i] for i in 1:4))
 
 end
 @testset "tuples" begin
 
     vt = [(1,2), (3,4), (5,6)]
     vnt = [(a=1,b=2), (a=3,b=4), (a=5,b=6)]
-    @test stack(vt) isa Array
-    @test stack(vt) == reshape(1:6, 2,3)
-    @test stack(vnt) isa Array
+    @test lazystack(vt) isa Array
+    @test lazystack(vt) == reshape(1:6, 2,3)
+    @test lazystack(vnt) isa Array
 
-    @test stack(vt...) isa Array
-    @test stack(vnt...) isa Array
+    @test lazystack(vt...) isa Array
+    @test lazystack(vnt...) isa Array
 
 end
 @testset "generators" begin
 
     g234 = (ones(2) .* (10i + j) for i in 1:3, j in 1:4)
-    @test stack(g234) == stack(collect(g234))
+    @test lazystack(g234) == lazystack(collect(g234))
 
-    @test stack(Iterators.filter(_ -> true, g234)) == reshape(stack(g234), 2,:)
+    @test lazystack(Iterators.filter(_ -> true, g234)) == reshape(lazystack(g234), 2,:)
 
-    @test stack(Iterators.drop(g234, 2)) == reshape(stack(g234), 2,:)[:, 3:end]
+    @test lazystack(Iterators.drop(g234, 2)) == reshape(lazystack(g234), 2,:)[:, 3:end]
 
     g16 = (ones(1) .* (10i + j) for j in 1:3 for i in 1:2) # Iterators.Flatten
-    @test stack(collect(g16)) == stack(g16)
+    @test lazystack(collect(g16)) == lazystack(g16)
 
     g29 = (fill(i,2) for i=1:9)
-    @test stack(Iterators.reverse(g29)) == reverse(stack(g29), dims=2)
+    @test lazystack(Iterators.reverse(g29)) == reverse(lazystack(g29), dims=2)
 
     gt = ((1,2,3) for i in 1:4)
-    @test stack(gt) isa Array
+    @test lazystack(gt) isa Array
 
 end
 @testset "functions" begin
@@ -58,116 +58,116 @@ end
     m1 = rand(1:99, 3,10)
     _eachcol(m) = (view(m, :, c) for c in axes(m,2))
 
-    @test stack(sum, _eachcol(m1)) == vec(mapslices(sum, m1, dims=1))
+    @test lazystack(sum, _eachcol(m1)) == vec(mapslices(sum, m1, dims=1))
 
     f1(x,y) = x .+ y
-    @test stack(f1, _eachcol(m1), _eachcol(m1)) == 2 .* m1
-    @test stack(f1, _eachcol(m1), 1:10) == m1 .+ (1:10)'
+    @test lazystack(f1, _eachcol(m1), _eachcol(m1)) == 2 .* m1
+    @test lazystack(f1, _eachcol(m1), 1:10) == m1 .+ (1:10)'
 
     # @test_throws DimensionMismatch map(f1, _eachcol(m1), 1:12) # it doesn't throw!
-    @test_throws DimensionMismatch stack(f1, _eachcol(m1), 1:12)
+    # @test_throws DimensionMismatch lazystack(f1, _eachcol(m1), 1:12)
 
-    # This is where stack doesn't quite follow map's behaviour:
-    @test size(stack(map(*, [ones(2) for i=1:3, j=1:4], ones(3)))) == (2,3)
-    @test size(stack(map(*, [ones(2) for i=1:3, j=1:4], ones(5)))) == (2,5)
-    @test_throws DimensionMismatch stack(*, [ones(2) for i=1:3, j=1:4], ones(3))
-    @test_throws DimensionMismatch map(*, [ones(2) for i=1:3, j=1:4], ones(3,1))
+    # This is where lazystack doesn't quite follow map's behaviour:
+    @test size(lazystack(map(*, [ones(2) for i=1:3, j=1:4], ones(3)))) == (2,3)
+    @test size(lazystack(map(*, [ones(2) for i=1:3, j=1:4], ones(5)))) == (2,5)
+    # @test_throws DimensionMismatch lazystack(*, [ones(2) for i=1:3, j=1:4], ones(3))
+    # @test_throws DimensionMismatch map(*, [ones(2) for i=1:3, j=1:4], ones(3,1))
 
 end
 @testset "types" begin
 
-    @test stack([1:3 for _=1:2]...) isa LazyStack.Stacked{Int,2,<:Tuple{UnitRange,UnitRange}}
+    @test lazystack([1:3 for _=1:2]...) isa LazyStack.Stacked{Int,2,<:Tuple{UnitRange,UnitRange}}
 
-    @test eltype(stack(1:3, ones(3))) == Real
-    @test eltype(stack([1:3, ones(3)])) == Real
-    @test eltype(stack(i==1 ? (1:3) : ones(3) for i in 1:2)) == Real
+    @test_broken eltype(lazystack(1:3, ones(3))) == Real
+    @test_broken eltype(lazystack([1:3, ones(3)])) == Real
+    @test_broken eltype(lazystack(i==1 ? (1:3) : ones(3) for i in 1:2)) == Real
 
     acc = []
     for i=1:3
         push!(acc, fill(i, 4))
     end
-    @test stack(acc) isa Array{Int}
+    @test lazystack(acc) isa Array{Int}
     push!(acc, rand(4))
-    @test stack(acc) isa Array{Real}
+    @test_broken lazystack(acc) isa Array{Real}
 
     acc = Array[]
     for i=1:3
         push!(acc, fill(i, 4))
     end
-    stack(acc) isa Array{Int}
-
-end
-@testset "names" begin
-
-    nin = [NamedDimsArray(ones(3), :a) for i in 1:4]
-    @test dimnames(stack(nin)) == (:a, :_)
-    @test dimnames(stack(nin...)) == (:a, :_)
-    @test dimnames(stack(:b, nin)) == (:a, :b)
-    @test dimnames(stack(:b, nin...)) == (:a, :b)
-    @test stack(nin).data.slices[1] isa NamedDimsArray # vector container untouched,
-    @test stack(nin...).data.slices[1] isa Array # but tuple container cleaned up.
-
-    nout = NamedDimsArray([ones(3) for i in 1:4], :b)
-    @test dimnames(stack(nout)) == (:_, :b)
-    @test dimnames(stack(:b, nout)) == (:_, :b)
-    @test_throws Exception stack(:c, nout)
-
-    nboth = NamedDimsArray([NamedDimsArray(ones(3), :a) for i in 1:4], :b)
-    @test dimnames(stack(nboth)) == (:a, :b)
-
-    ngen = (NamedDimsArray(ones(3), :a) for i in 1:4)
-    @test dimnames(stack(ngen)) == (:a, :_)
-    @test dimnames(stack(:b, ngen)) == (:a, :b)
-
-    nmat = [NamedDimsArray(ones(3), :a) for i in 1:3, j in 1:4]
-    @test dimnames(stack(:c, nmat)) == (:a, :_, :c)
+    lazystack(acc) isa Array{Int}
 
 end
+# @testset "names" begin
+#
+#     nin = [NamedDimsArray(ones(3), :a) for i in 1:4]
+#     @test dimnames(lazystack(nin)) == (:a, :_)
+#     @test dimnames(lazystack(nin...)) == (:a, :_)
+#     @test dimnames(lazystack(:b, nin)) == (:a, :b)
+#     @test dimnames(lazystack(:b, nin...)) == (:a, :b)
+#     @test lazystack(nin).data.slices[1] isa NamedDimsArray # vector container untouched,
+#     @test lazystack(nin...).data.slices[1] isa Array # but tuple container cleaned up.
+#
+#     nout = NamedDimsArray([ones(3) for i in 1:4], :b)
+#     @test dimnames(lazystack(nout)) == (:_, :b)
+#     @test dimnames(lazystack(:b, nout)) == (:_, :b)
+#     @test_throws Exception lazystack(:c, nout)
+#
+#     nboth = NamedDimsArray([NamedDimsArray(ones(3), :a) for i in 1:4], :b)
+#     @test dimnames(lazystack(nboth)) == (:a, :b)
+#
+#     ngen = (NamedDimsArray(ones(3), :a) for i in 1:4)
+#     @test dimnames(lazystack(ngen)) == (:a, :_)
+#     @test dimnames(lazystack(:b, ngen)) == (:a, :b)
+#
+#     nmat = [NamedDimsArray(ones(3), :a) for i in 1:3, j in 1:4]
+#     @test dimnames(lazystack(:c, nmat)) == (:a, :_, :c)
+#
+# end
 @testset "offset" begin
 
     oin = [OffsetArray(ones(3), 3:5) for i in 1:4]
-    @test axes(stack(oin)) == (3:5, 1:4)
-    @test axes(stack(oin...)) == (3:5, 1:4)
-    @test axes(copy(stack(oin))) == (3:5, 1:4)
+    @test axes(lazystack(oin)) == (3:5, 1:4)
+    @test axes(lazystack(oin...)) == (3:5, 1:4)
+    @test axes(copy(lazystack(oin))) == (3:5, 1:4)
 
     oout = OffsetArray([ones(3) for i in 1:4], 11:14)
-    @test axes(stack(oout)) == (1:3, 11:14)
-    @test axes(copy(stack(oout))) ==  (1:3, 11:14)
+    @test axes(lazystack(oout)) == (1:3, 11:14)
+    @test axes(copy(lazystack(oout))) ==  (1:3, 11:14)
 
     oboth = OffsetArray(oin, 11:14)
-    @test axes(stack(oboth)) == (3:5, 11:14)
+    @test axes(lazystack(oboth)) == (3:5, 11:14)
 
     ogen = (OffsetArray([3,4,5], 3:5) for i in 1:4)
-    @test axes(stack(ogen)) == (3:5, 1:4)
-
-end
-@testset "named offset" begin
-
-    noin = [NamedDimsArray(OffsetArray(ones(3), 3:5), :a) for i in 1:4]
-    @test dimnames(stack(noin)) == (:a, :_)
-    @test dimnames(stack(noin...)) == (:a, :_)
-    @test dimnames(stack(:b, noin)) == (:a, :b)
-    @test dimnames(stack(:b, noin...)) == (:a, :b)
-    @test axes(stack(noin)) == (3:5, 1:4)
-    @test axes(stack(noin...)) == (3:5, 1:4)
-
-    noout = NamedDimsArray(OffsetArray([ones(3) for i in 1:4], 11:14), :b)
-    @test dimnames(stack(noout)) == (:_, :b)
-    @test dimnames(stack(:b, noout)) == (:_, :b)
-    @test_throws Exception stack(:c, noout)
-    @test axes(stack(noout)) == (1:3, 11:14)
-    @test axes(copy(stack(noout))) ==  (1:3, 11:14)
-
-    nogen = (NamedDimsArray(OffsetArray([3,4,5], 3:5), :a) for i in 1:4)
-    @test dimnames(stack(nogen)) == (:a, :_)
-    @test dimnames(stack(:b, nogen)) == (:a, :b)
-    @test axes(stack(nogen)) == (3:5, 1:4)
+    @test axes(lazystack(ogen)) == (3:5, 1:4)
 
 end
+# @testset "named offset" begin
+#
+#     noin = [NamedDimsArray(OffsetArray(ones(3), 3:5), :a) for i in 1:4]
+#     @test dimnames(lazystack(noin)) == (:a, :_)
+#     @test dimnames(lazystack(noin...)) == (:a, :_)
+#     @test dimnames(lazystack(:b, noin)) == (:a, :b)
+#     @test dimnames(lazystack(:b, noin...)) == (:a, :b)
+#     @test axes(lazystack(noin)) == (3:5, 1:4)
+#     @test axes(lazystack(noin...)) == (3:5, 1:4)
+#
+#     noout = NamedDimsArray(OffsetArray([ones(3) for i in 1:4], 11:14), :b)
+#     @test dimnames(lazystack(noout)) == (:_, :b)
+#     @test dimnames(lazystack(:b, noout)) == (:_, :b)
+#     @test_throws Exception lazystack(:c, noout)
+#     @test axes(lazystack(noout)) == (1:3, 11:14)
+#     @test axes(copy(lazystack(noout))) ==  (1:3, 11:14)
+#
+#     nogen = (NamedDimsArray(OffsetArray([3,4,5], 3:5), :a) for i in 1:4)
+#     @test dimnames(lazystack(nogen)) == (:a, :_)
+#     @test dimnames(lazystack(:b, nogen)) == (:a, :b)
+#     @test axes(lazystack(nogen)) == (3:5, 1:4)
+#
+# end
 @testset "push!" begin
 
     v34 = [rand(3) for i in 1:4]
-    s3 = stack(v34)
+    s3 = lazystack(v34)
 
     @test size(push!(s3, ones(3))) == (3,5)
     @test s3[1,end] == 1
@@ -175,13 +175,13 @@ end
 end
 @testset "errors" begin
 
-    @test_throws ArgumentError stack([])
-    @test_throws ArgumentError stack(x for x in 1:0)
+    @test_throws ArgumentError lazystack([])
+    @test_throws ArgumentError lazystack(x for x in 1:0)
 
-    @test_throws DimensionMismatch stack(1:n for n in 1:3)
-    @test_throws DimensionMismatch stack([1:n for n in 1:3])
+    @test_throws DimensionMismatch lazystack(1:n for n in 1:3)
+    @test_throws DimensionMismatch lazystack([1:n for n in 1:3])
 
-    @test_throws DimensionMismatch push!(stack([rand(2)]), rand(3))
+    @test_throws DimensionMismatch push!(lazystack([rand(2)]), rand(3))
 
 end
 @testset "readme" begin
@@ -189,66 +189,66 @@ end
     using LazyStack: Stacked
     _eachcol(m) = (view(m, :, c) for c in axes(m,2))
 
-    @test stack([zeros(2,2), ones(2,2)]) isa Stacked{Float64, 3, <:Vector{<:Matrix}}
-    @test stack([1,2,3], 4:6) isa Stacked{Int, 2, <:Tuple{<:Vector, <:UnitRange}}
-
-    @test stack([i,2i] for i in 1:5) isa Matrix{Int}
-    @test stack(*, _eachcol(ones(2,4)), 1:4) isa Matrix{Float64}
-    @test stack([1,2], [3.0, 4.0], [5im, 6im]) isa Matrix{Number}
+    @test lazystack([zeros(2,2), ones(2,2)]) isa Stacked{Float64, 3, <:Vector{<:Matrix}}
+    @test lazystack([1,2,3], 4:6) isa Stacked{Int, 2, <:Tuple{<:Vector, <:UnitRange}}
 
-    @test rstack([1:n for n in 1:10]) isa Matrix{Int}
-    @test rstack(OffsetArray(fill(n,4), rand(-2:2)) for n in 1:10; fill=NaN) isa OffsetArray{Real,2}
-
-end
-@testset "vstack" begin
+    @test lazystack([i,2i] for i in 1:5) isa Matrix{Int}
+    @test lazystack(*, _eachcol(ones(2,4)), 1:4) isa Matrix{Float64}
+    @test_broken lazystack([1,2], [3.0, 4.0], [5im, 6im]) isa Matrix{Number}
 
-    v34 = [rand(3) for i in 1:4]
-    @test LazyStack.vstack(v34) == reduce(vcat, v34)
-
-    g234 = (ones(2) .* (10i + j) for i in 1:3, j in 1:4)
-    @test LazyStack.vstack(g234) == reduce(vcat, collect(g234))
+    @test raggedstack([1:n for n in 1:10]) isa Matrix{Int}
+    @test raggedstack(OffsetArray(fill(n,4), rand(-2:2)) for n in 1:10; fill=NaN) isa OffsetArray{Real,2}
 
 end
+# @testset "vlazystack" begin
+#
+#     v34 = [rand(3) for i in 1:4]
+#     @test LazyStack.vlazystack(v34) == reduce(vcat, v34)
+#
+#     g234 = (ones(2) .* (10i + j) for i in 1:3, j in 1:4)
+#     @test LazyStack.vlazystack(g234) == reduce(vcat, collect(g234))
+#
+# end
 @testset "ragged" begin
 
-    @test rstack([1,2], 1:3) == [1 1; 2 2; 0 3]
-    @test rstack([[1,2], 1:3], fill=99) == [1 1; 2 2; 99 3]
+    @test raggedstack([1,2], 1:3) == [1 1; 2 2; 0 3]
+    @test raggedstack([[1,2], 1:3], fill=99) == [1 1; 2 2; 99 3]
 
-    @test rstack(1:2, OffsetArray([2,3], +1)) == [1 0; 2 2; 0 3]
-    @test rstack(1:2, OffsetArray([0.1,1], -1)) == OffsetArray([0 0.1; 1 1.0; 2 0],-1,0)
+    @test raggedstack(1:2, OffsetArray([2,3], +1)) == [1 0; 2 2; 0 3]
+    @test raggedstack(1:2, OffsetArray([0.1,1], -1)) == OffsetArray([0 0.1; 1 1.0; 2 0],-1,0)
 
-    @test dimnames(rstack(:b, 1:2, [3,4,5], fill=NaN)) == (:_, :b)
-    @test dimnames(rstack(:b, NamedDimsArray(1:2, :a), OffsetArray([2,3], +1))) == (:a, :b)
+    # @test dimnames(raggedstack(:b, 1:2, [3,4,5], fill=NaN)) == (:_, :b)
+    # @test dimnames(raggedstack(:b, NamedDimsArray(1:2, :a), OffsetArray([2,3], +1))) == (:a, :b)
 
 end
 @testset "tuple functions" begin
 
-    @test LazyStack.ndims([1,2]) == 1
-    @test LazyStack.ndims((1,2)) == 1
-    @test LazyStack.ndims((a=1,b=2)) == 1
+    @test LazyStack._ndims([1,2]) == 1
+    @test LazyStack._ndims((1,2)) == 1
+    @test LazyStack._ndims((a=1,b=2)) == 1
 
-    @test LazyStack.size([1,2]) == (2,)
-    @test LazyStack.size((1,2)) == (2,)
-    @test LazyStack.size((a=1,b=2)) == (2,)
+    @test LazyStack._size([1,2]) == (2,)
+    @test LazyStack._size((1,2)) == (2,)
+    @test LazyStack._size((a=1,b=2)) == (2,)
 
-    @test LazyStack.axes([1,2]) == (1:2,)
-    @test LazyStack.axes((1,2)) == (1:2,)
-    @test LazyStack.axes((a=1,b=2)) == (1:2,)
+    @test LazyStack._axes([1,2]) == (1:2,)
+    @test LazyStack._axes((1,2)) == (1:2,)
+    @test LazyStack._axes((a=1,b=2)) == (1:2,)
 
 end
 @info "loading Zygote"
 using Zygote
 @testset "zygote" begin
 
-    @test Zygote.gradient((x,y) -> sum(stack(x,y)), ones(2), ones(2)) == ([1,1], [1,1])
-    @test Zygote.gradient((x,y) -> sum(stack([x,y])), ones(2), ones(2)) == ([1,1], [1,1])
+    @test Zygote.gradient((x,y) -> sum(lazystack(x,y)), ones(2), ones(2)) == ([1,1], [1,1])
+    @test Zygote.gradient((x,y) -> sum(lazystack([x,y])), ones(2), ones(2)) == ([1,1], [1,1])
 
-    f399(x) = sum(stack(x) * sum(x))
-    f399c(x) = sum(collect(stack(x)) * sum(x))
+    f399(x) = sum(lazystack(x) * sum(x))
+    f399c(x) = sum(collect(lazystack(x)) * sum(x))
     @test Zygote.gradient(f399, [ones(2), ones(2)]) == ([[4,4], [4,4]],)
     @test Zygote.gradient(f399c, [ones(2), ones(2)]) == ([[4,4], [4,4]],)
-    ftup(x) = sum(stack(x...) * sum(x))
-    ftupc(x) = sum(collect(stack(x...)) * sum(x))
+    ftup(x) = sum(lazystack(x...) * sum(x))
+    ftupc(x) = sum(collect(lazystack(x...)) * sum(x))
     @test Zygote.gradient(ftup, (ones(2), ones(2))) == (([4,4], [4,4]),)
     @test Zygote.gradient(ftupc, (ones(2), ones(2))) == (([4,4], [4,4]),)