diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 6522bc3d74a98..d626146c136f8 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -912,9 +912,6 @@ function isequal(A::AbstractArray, B::AbstractArray) if size(A) != size(B) return false end - if isa(A,Range) != isa(B,Range) - return false - end for (a, b) in zip(A, B) if !isequal(a, b) return false @@ -935,9 +932,6 @@ function (==)(A::AbstractArray, B::AbstractArray) if size(A) != size(B) return false end - if isa(A,Range) != isa(B,Range) - return false - end for (a, b) in zip(A, B) if !(a == b) return false @@ -1151,38 +1145,3 @@ push!(A, a, b) = push!(push!(A, a), b) push!(A, a, b, c...) = push!(push!(A, a, b), c...) unshift!(A, a, b) = unshift!(unshift!(A, b), a) unshift!(A, a, b, c...) = unshift!(unshift!(A, c...), a, b) - -## hashing collections ## - -const hashaa_seed = UInt === UInt64 ? 0x7f53e68ceb575e76 : 0xeb575e76 -const hashrle_seed = UInt == UInt64 ? 0x2aab8909bfea414c : 0xbfea414c -function hash(a::AbstractArray, h::UInt) - h += hashaa_seed - h += hash(size(a)) - - state = start(a) - done(a, state) && return h - x2, state = next(a, state) - done(a, state) && return hash(x2, h) - - x1 = x2 - while !done(a, state) - x1 = x2 - x2, state = next(a, state) - if isequal(x2, x1) - # For repeated elements, use run length encoding - # This allows efficient hashing of sparse arrays - runlength = 2 - while !done(a, state) - x2, state = next(a, state) - isequal(x1, x2) || break - runlength += 1 - end - h += hashrle_seed - h = hash(runlength, h) - end - h = hash(x1, h) - end - !isequal(x2, x1) && (h = hash(x2, h)) - return h -end diff --git a/base/hashing.jl b/base/hashing.jl index ab7ed4631b3de..4cf1490c210b2 100644 --- a/base/hashing.jl +++ b/base/hashing.jl @@ -64,11 +64,65 @@ end hash(x::QuoteNode, h::UInt) = hash(x.value, hash(QuoteNode, h)) -# hashing ranges by component at worst leads to collisions for very similar ranges -const hashr_seed = UInt === UInt64 ? 0x80707b6821b70087 : 0x21b70087 +## hashing collections ## + +const hashaa_seed = UInt === UInt64 ? 0x7f53e68ceb575e76 : 0xeb575e76 +const hashrle_seed = UInt == UInt64 ? 0x2aab8909bfea414c : 0xbfea414c +function hash(a::AbstractArray, h::UInt) + h += hashaa_seed + h += hash(size(a)) + + state = start(a) + done(a, state) && return h + x1, state = next(a, state) + # Always hash the first element + h = hash(x1, h) + done(a, state) && return h + + # Then hash the difference between two subsequent elements when - is supported, + # or the elements themselves when not + x2, state = next(a, state) + v2 = applicable(-, x2, x1) ? x2 - x1 : x2 + done(a, state) && return hash(v2, h) + + v1 = v2 + while !done(a, state) + x1 = x2 + x2, state = next(a, state) + v1 = v2 + v2 = applicable(-, x2, x1) ? x2 - x1 : x2 + if isequal(v2, v1) + # For repeated elements, use run length encoding + # This allows efficient hashing of sparse arrays + runlength = 2 + while !done(a, state) + x1 = x2 + x2, state = next(a, state) + v2 = applicable(-, x2, x1) ? x2 - x1 : x2 + isequal(v1, v2) || break + runlength += 1 + end + h += hashrle_seed + h = hash(runlength, h) + end + h = hash(v1, h) + end + !isequal(v2, v1) && (h = hash(v2, h)) + return h +end + function hash(r::Range, h::UInt) - h += hashr_seed + h += hashaa_seed + h += hash(size(r)) + + length(r) == 0 && return h + h = hash(first(r), h) - h = hash(step(r), h) - h = hash(last(r), h) + length(r) == 1 && return h + + length(r) == 2 && return hash(step(r), h) + + h += hashrle_seed + h = hash(length(r)-1, h) + hash(step(r), h) end diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl index 8dc0c89469903..c26be85896aa5 100644 --- a/base/sparse/sparsematrix.jl +++ b/base/sparse/sparsematrix.jl @@ -3288,51 +3288,6 @@ function rotl90(A::SparseMatrixCSC) return sparse(J, I, V, n, m) end -## hashing - -# End the run and return the current hash -@inline function hashrun(val, runlength::Int, h::UInt) - if runlength == 0 - return h - elseif runlength > 1 - h += Base.hashrle_seed - h = hash(runlength, h) - end - hash(val, h) -end - -function hash{T}(A::SparseMatrixCSC{T}, h::UInt) - h += Base.hashaa_seed - sz = size(A) - h += hash(sz) - - colptr = A.colptr - rowval = A.rowval - nzval = A.nzval - lastidx = 0 - runlength = 0 - lastnz = zero(T) - @inbounds for col = 1:size(A, 2) - for j = colptr[col]:colptr[col+1]-1 - nz = nzval[j] - isequal(nz, zero(T)) && continue - idx = sub2ind(sz, rowval[j], col) - if idx != lastidx+1 || !isequal(nz, lastnz) # Run is over - h = hashrun(lastnz, runlength, h) # Hash previous run - h = hashrun(0, idx-lastidx-1, h) # Hash intervening zeros - - runlength = 1 - lastnz = nz - else - runlength += 1 - end - lastidx = idx - end - end - h = hashrun(lastnz, runlength, h) # Hash previous run - hashrun(0, length(A)-lastidx, h) # Hash zeros at end -end - ## Statistics # This is the function that does the reduction underlying var/std diff --git a/test/hashing.jl b/test/hashing.jl index 98bf8a4a0788a..b87aa26029ebf 100644 --- a/test/hashing.jl +++ b/test/hashing.jl @@ -66,13 +66,20 @@ vals = Any[ Dict(42 => 101, 77 => 93), Dict{Any,Any}(42 => 101, 77 => 93), (1,2,3,4), (1.0,2.0,3.0,4.0), (1,3,2,4), ("a","b"), (SubString("a",1,1), SubString("b",1,1)), + ['a', 'b', 'c', 'd', 'e'], # issue #6900 [x => x for x in 1:10], Dict(7=>7,9=>9,4=>4,10=>10,2=>2,3=>3,8=>8,5=>5,6=>6,1=>1), [], [1], [2], [1, 1], [1, 2], [1, 3], [2, 2], [1, 2, 2], [1, 3, 3], zeros(2, 2), spzeros(2, 2), eye(2, 2), speye(2, 2), sparse(ones(2, 2)), ones(2, 2), sparse([0 0; 1 0]), [0 0; 1 0], - [-0. 0; -0. 0.], SparseMatrixCSC(2, 2, [1, 3, 3], [1, 2], [-0., -0.]) + [-0. 0; -0. 0.], SparseMatrixCSC(2, 2, [1, 3, 3], [1, 2], [-0., -0.]), + # issue #16364 + 1:4, 1:1:4, 1:-1:0, 1.0:4.0, 1.0:1.0:4.0, 'a':'e', + linspace(1, 3, 10), collect(linspace(1, 3, 10)), + # check that hash is still consistent with heteregeneous arrays for which - is defined + # for some pairs and not others (no element must be ignored) + ["a", "b", 1, 2], ["a", 1, 2], ["a", "b", 2, 2], ["a", "a", 1, 2], ["a", "b", 2, 3] ] for a in vals, b in vals