Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change computation of hash value. #4675

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions spec/std/big/big_float_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,26 @@ describe "BigFloat" do
end

it "#hash" do
b = 123.to_big_f
b.hash.should eq(b.to_f64.hash)
big_float = 123.to_big_f
big_float.hash.should eq(123.hash)
big_float.hash.should eq(big_float.to_f64.hash)

big_integer = "123456789012345678901".to_big_i
big_float = big_integer.to_big_f
big_float.should eq(big_integer)
big_float.hash_normalize.should eq(big_integer.hash_normalize)
big_float.hash.should eq(big_integer.hash)

float = 123.06125
big_float = float.to_big_f
big_float.hash.should eq(float.hash)

big_float = 1.to_big_f
big_float = big_float * 0x80000000 * 0x80000000 * 0x80000000
float = 1.0_f64
float = float * 0x80000000 * 0x80000000 * 0x80000000
big_float.hash_normalize.should eq(float.hash_normalize)
big_float.hash.should eq(float.hash)
end

it "clones" do
Expand Down
27 changes: 25 additions & 2 deletions spec/std/big/big_int_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,35 @@ describe "BigInt" do

it "#hash" do
hash = 5.to_big_i.hash
hash.should eq(5)
typeof(hash).should eq(UInt64)
hash.should eq(5.hash)
end

it "#hash_normalize" do
hn = 5.to_big_i.hash_normalize
hn.should eq(5.hash_normalize)
hn = (-5).to_big_i.hash_normalize
hn.should eq((-5).hash_normalize)
hn = 500000000000000_u64.to_big_i.hash_normalize
hn.should eq(500000000000000_u64.hash_normalize)
hn = (-500000000000000_i64).to_big_i.hash_normalize
hn.should eq((-500000000000000_i64).hash_normalize)

bi = 1.to_big_i
bi = bi << 93
f = 1.0_f64
f = f * 0x80000000 * 0x80000000 * 0x80000000
bi.hash_normalize.should eq(f.hash_normalize)
(-bi).hash_normalize.should eq((-f).hash_normalize)
end

it "clones" do
x = 1.to_big_i
x.clone.should eq(x)
end

it "#to_big_f" do
s = "123456789012345678901"
x = BigInt.new(s)
x.to_big_f.should eq BigFloat.new(s)
end
end
26 changes: 25 additions & 1 deletion spec/std/big/big_rational_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,25 @@ describe BigRational do
it "#hash" do
b = br(10, 3)
hash = b.hash
hash.should eq(b.to_f64.hash)
hash.should eq(b.to_big_f.hash)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Contributor

@akzhan akzhan Jul 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://play.crystal-lang.org/#/r/2cnl

big(10 / 3) hash by this PR calculated using BigFloat precision.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Float64 has only 53bit meaningful mantisa, but for hash calculation I use 61bit Mersen prime.
That is why conversion to Float64 looses precision.

As an alternative, it could stick with 31bit Mersen prime. Then here could be to_f64.
But then Int32, UInt32, Float64 hashing will have to perform modulo operation.

I believe, hashing Int32, UInt32 is much more important, than hashing BigRational and BigFloat.
That is why I decided to use 61bit Mersen Prime, and fallback to to_big_f at this place, and to calculate BigFloat hash by algorithm, instead of using to_f64.

It is just tradeoff. If you will decide, that BigRational and BigFloat is more important, than Float64 and Int32, I will change it back.

end

it "#hash_normalize" do
hn = 5.to_big_i.hash_normalize
hn.should eq(5.hash_normalize)
hn = (-5).to_big_i.hash_normalize
hn.should eq((-5).hash_normalize)
hn = 500000000000000_u64.to_big_i.hash_normalize
hn.should eq(500000000000000_u64.hash_normalize)
hn = (-500000000000000_i64).to_big_i.hash_normalize
hn.should eq((-500000000000000_i64).hash_normalize)

bi = 1.to_big_r
bi = bi << 93
f = 1.0_f64
f = f * 0x80000000 * 0x80000000 * 0x80000000
bi.hash_normalize.should eq(f.hash_normalize)
(-bi).hash_normalize.should eq((-f).hash_normalize)
end

it "is a number" do
Expand All @@ -174,4 +192,10 @@ describe BigRational do
x = br(10, 3)
x.clone.should eq(x)
end

it "#to_big_f" do
x = br(10, 3)
f = BigFloat.new(10) / BigFloat.new(3)
x.to_big_f.should eq(f)
end
end
5 changes: 3 additions & 2 deletions spec/std/bool_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ describe "Bool" do
end

describe "hash" do
it { true.hash.should eq(1) }
it { false.hash.should eq(0) }
it { true.hash.should eq(true.hash) }
it { false.hash.should eq(false.hash) }
it { true.hash.should_not eq(false.hash) }
end

describe "to_s" do
Expand Down
2 changes: 1 addition & 1 deletion spec/std/enum_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ describe Enum do
end

it "has hash" do
SpecEnum::Two.hash.should eq(1.hash)
SpecEnum::Two.hash.should_not eq(SpecEnum::One.hash)
end

it "parses" do
Expand Down
4 changes: 2 additions & 2 deletions spec/std/hash_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ describe "Hash" do
end
end

it "works with mixed types" do
{1 => :a, "a" => 1, 1.0 => "a", :a => 1.0}.values_at(1, "a", 1.0, :a).should eq({:a, 1, "a", 1.0})
it "works with mixed types and normalized numbers" do
{1 => :a, "a" => 1, 2.0 => "a", :a => 1.0}.values_at(1, 2, "a", 1.0, 2.0, :a).should eq({:a, "a", 1, :a, "a", 1.0})
end
end

Expand Down
7 changes: 5 additions & 2 deletions spec/std/struct_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,14 @@ describe "Struct" do

it "does hash" do
s = StructSpec::TestClass.new(1, "hello")
s.hash.should eq(31 + "hello".hash)
hasher = Hash::Hasher.new
hasher << 1
hasher << "hello"
s.hash.should eq(hasher.digest)
end

it "does hash for struct wrapper (#1940)" do
StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(0)
StructSpec::BigIntWrapper.new(BigInt.new(0)).hash.should eq(BigInt.new(0).hash)
end

it "does dup" do
Expand Down
2 changes: 1 addition & 1 deletion spec/std/time/span_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ describe Time::Span do
end

it "test hash code" do
Time::Span.new(77).hash.should eq(77)
Time::Span.new(77).hash.should eq(77.hash)
end

it "test subtract" do
Expand Down
15 changes: 13 additions & 2 deletions src/big/big_float.cr
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,19 @@ struct BigFloat < Float
new(mpf)
end

def hash
to_f64.hash
def hash_normalize
# more exact version of `remainder(HASH_MODULUS).to_f.hash_normalize`
LibGMP.mpf_get_d_2exp(out exp, self)
frac = BigFloat.new { |mpf|
if exp >= 0
LibGMP.mpf_div_2exp(mpf, self, exp)
else
LibGMP.mpf_mul_2exp(mpf, self, -exp)
end
}
float_normalize_wrap do
float_normalize_reference(frac, exp)
end
end

def self.default_precision
Expand Down
15 changes: 13 additions & 2 deletions src/big/big_int.cr
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,19 @@ struct BigInt < Int
io << "_big_i"
end

def hash
to_u64
private HASH_MODULUS_INT_P = BigInt.new((1_u64 << HASH_BITS) - 1)
private HASH_MODULUS_INT_N = -BigInt.new((1_u64 << HASH_BITS) - 1)

def hash_normalize
# it should calculate `remainder(HASH_MODULUS)`
if LibGMP::ULong == UInt64
v = int_to_hashnorm(LibGMP.tdiv_ui(self, HASH_MODULUS))
self < 0 ? -v : v
elsif self >= HASH_MODULUS_INT_P || self <= HASH_MODULUS_INT_N
unsafe_truncated_mod(HASH_MODULUS_INT_P).to_i64
else
self.to_i64
end
end

# Returns a string representation of self.
Expand Down
16 changes: 14 additions & 2 deletions src/big/big_rational.cr
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,20 @@ struct BigRational < Number
BigRational.new { |mpq| LibGMP.mpq_abs(mpq, self) }
end

def hash
to_f64.hash
private HASH_MODULUS_RAT_P = BigRational.new((1_u64 << HASH_BITS) - 1)
private HASH_MODULUS_RAT_N = -BigRational.new((1_u64 << HASH_BITS) - 1)

def hash_normalize
# more exact version of `remainder(HASH_MODULUS).to_f.hash_normalize`
rem = self
if self >= HASH_MODULUS_RAT_P || self <= HASH_MODULUS_RAT_N
num = numerator
denom = denominator
div = num.tdiv(denom)
floor = div.tdiv(HASH_MODULUS)
rem -= floor * HASH_MODULUS
end
rem.to_big_f.hash_normalize
end

# Returns the `Float64` representing this rational.
Expand Down
7 changes: 4 additions & 3 deletions src/bool.cr
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ struct Bool
self != other
end

# Returns a hash value for this boolean: 0 for `false`, 1 for `true`.
def hash
self ? 1 : 0
# Protocol method for generic hashing.
def hash(hasher)
hasher << (self ? 1 : 0)
hasher
end

# Returns `"true"` for `true` and `"false"` for `false`.
Expand Down
6 changes: 6 additions & 0 deletions src/char.cr
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,12 @@ struct Char
ord
end

# Protocol method for generic hashing.
def hash(hasher)
hasher.raw ord
hasher
end

# Returns a Char that is one codepoint bigger than this char's codepoint.
#
# ```
Expand Down
5 changes: 3 additions & 2 deletions src/class.cr
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ class Class
to_s(io)
end

def hash
crystal_type_id
def hash(hasher)
hasher.raw(crystal_type_id)
hasher
end

def ==(other : Class)
Expand Down
15 changes: 9 additions & 6 deletions src/compiler/crystal/syntax/ast.cr
Original file line number Diff line number Diff line change
Expand Up @@ -1175,8 +1175,9 @@ module Crystal
self
end

def hash
0
def hash(hasher)
hasher << 0
hasher
end
end

Expand Down Expand Up @@ -1545,8 +1546,9 @@ module Crystal
Self.new
end

def hash
0
def hash(hasher)
hasher << 0
hasher
end
end

Expand Down Expand Up @@ -2025,8 +2027,9 @@ module Crystal
Underscore.new
end

def hash
0
def hash(hasher)
hasher << 0
hasher
end
end

Expand Down
7 changes: 4 additions & 3 deletions src/enum.cr
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,10 @@ struct Enum
value == other.value
end

# Returns a hash value. This is the hash of the underlying value.
def hash
value.hash
# Protocol method for generic hashing.
def hash(hasher)
hasher.raw(value)
hasher
end

# Iterates each values in a Flags Enum.
Expand Down
1 change: 1 addition & 0 deletions src/event/signal_handler.cr
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
require "c/signal"
require "c/unistd"
require "signal"

# :nodoc:
# Singleton that runs Signal events (libevent2) in it's own Fiber.
Expand Down
Loading