Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change computation of hash value. #4675

Closed
wants to merge 2 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 49 additions & 46 deletions src/string_pool.cr
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ class StringPool

# Creates a new empty string pool.
def initialize
@buckets = Array(Array(String)?).new(11, nil)
@capacity = 8
@hashes = Pointer(Hash::Hasher::Value).malloc(@capacity, 0_u32)
@values = Pointer(String).malloc(@capacity, "")
@size = 0
end

Expand Down Expand Up @@ -70,26 +72,45 @@ class StringPool
# pool.size # => 1
# ```
def get(str : UInt8*, len)
rehash if @size > 5 * @buckets.size
hash = hash(str, len)
get(hash, str, len)
end

index = bucket_index str, len
bucket = @buckets[index]
private def get(hash : Hash::Hasher::Value, str : UInt8*, len)
rehash if @size >= @capacity / 4 * 3

if bucket
entry = find_entry_in_bucket(bucket, str, len)
if entry
return entry
mask = (@capacity - 1).to_u32
index, d = hash & mask, 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please split this into two assignments for readability.

while (h = @hashes[index]) != 0
if h == hash && @values[index].bytesize == len
if str.memcmp(@values[index].to_unsafe, len) == 0
return @values[index]
end
end
else
@buckets[index] = bucket = Array(String).new
index = (index + d) & mask
d += 1
end

@size += 1
entry = String.new(str, len)
bucket.push entry
@hashes[index] = hash
@values[index] = entry
entry
end

private def put_on_rehash(hash : Hash::Hasher::Value, entry : String)
mask = (@capacity - 1).to_u32
index, d = hash & mask, 1
while @hashes[index] != 0
index = (index + d) & mask
d += 1
end

@size += 1
@hashes[index] = hash
@values[index] = entry
end

# Returns a `String` with the contents of the given `IO::Memory`.
#
# If a string with those contents was already present in the pool, that one is returned.
Expand Down Expand Up @@ -127,48 +148,30 @@ class StringPool
#
# Call this method if you modified a string submitted to the pool.
def rehash
new_size = calculate_new_size(@size)
old_buckets = @buckets
@buckets = Array(Array(String)?).new(new_size, nil)
@size = 0

old_buckets.each do |bucket|
bucket.try &.each do |entry|
get(entry.to_unsafe, entry.size)
end
if @capacity * 2 <= 0
raise "Hash table too big"
end
end

private def bucket_index(str, len)
hash = hash(str, len)
(hash % @buckets.size).to_i
end
old_capacity = @capacity
old_hashes = @hashes
old_values = @values

private def find_entry_in_bucket(bucket, str, len)
bucket.each do |entry|
if entry.size == len
if str.memcmp(entry.to_unsafe, len) == 0
return entry
end
@capacity *= 2
@hashes = Pointer(Hash::Hasher::Value).malloc(@capacity, 0_u32)
@values = Pointer(String).malloc(@capacity, "")
@size = 0

0.upto(old_capacity - 1) do |i|
if old_hashes[i] != 0
put_on_rehash(old_hashes[i], old_values[i])
end
end
nil
end

private def hash(str, len)
h = 0
str.to_slice(len).each do |c|
h = 31 * h + c
end
h
end

private def calculate_new_size(size)
new_size = 8
Hash::HASH_PRIMES.each do |hash_size|
return hash_size if new_size > size
new_size <<= 1
end
raise "Hash table too big"
hasher = Hash::Hasher.new
hasher << str.to_slice(len)
# hash should be non-zero, so `or` it with high bit
hasher.digest | 0x80000000_u32
end
end