Skip to content

Commit

Permalink
Increase V-Hash Size from 32 Bit to 64 Bit
Browse files Browse the repository at this point in the history
In case we have 1 million different identifiers, we will have v-hash
collisions. So we have to increase the v-hash size from 32 bit to
64 bit.
  • Loading branch information
alexanderkiel committed Feb 17, 2023
1 parent 26e13b7 commit d10bd32
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 72 deletions.
4 changes: 2 additions & 2 deletions modules/db/src/blaze/db/impl/codec.clj
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
;; ---- Sizes of Byte Arrays --------------------------------------------------

(def ^:const ^long c-hash-size Integer/BYTES)
(def ^:const ^long v-hash-size Integer/BYTES)
(def ^:const ^long v-hash-size Long/BYTES)
(def ^:const ^long tid-size Integer/BYTES)
(def ^:const ^long t-size Long/BYTES)
(def ^:const ^long state-size Long/BYTES)
Expand Down Expand Up @@ -299,7 +299,7 @@


(defn v-hash [value]
(-> (Hashing/murmur3_32_fixed)
(-> (Hashing/farmHashFingerprint64)
(.hashString value StandardCharsets/UTF_8)
(.asBytes)
bs/from-byte-array))
Expand Down
17 changes: 7 additions & 10 deletions modules/db/src/blaze/db/node.clj
Original file line number Diff line number Diff line change
Expand Up @@ -445,11 +445,7 @@
[:blaze.db/enforce-referential-integrity]))


(def ^:private expected-kv-store-version 0)


(defn- kv-store-version [kv-store]
(or (some-> (kv/get kv-store version/key) version/decode-value) 0))
(def ^:private expected-kv-store-version 1)


(def ^:private incompatible-kv-store-version-msg
Expand All @@ -469,13 +465,14 @@
{:actual-version actual-version :expected-version expected-version}))


(defn- check-version! [kv-store]
(when (tx-success/last-t kv-store)
(let [actual-kv-store-version (kv-store-version kv-store)]
(defn- check-and-set-version! [kv-store]
(if (tx-success/last-t kv-store)
(let [actual-kv-store-version (version/get kv-store)]
(if (= actual-kv-store-version expected-kv-store-version)
(log/info "Index store version is" actual-kv-store-version)
(throw (incompatible-kv-store-version-ex actual-kv-store-version
expected-kv-store-version))))))
expected-kv-store-version))))
(version/set! kv-store expected-kv-store-version)))


(defmethod ig/init-key :blaze.db/node
Expand All @@ -484,7 +481,7 @@
:or {poll-timeout (time/seconds 1)}
:as config}]
(init-msg config)
(check-version! kv-store)
(check-and-set-version! kv-store)
(let [node (->Node (ctx config) tx-log resource-handle-cache tx-cache kv-store
resource-store search-param-registry resource-indexer
(atom (initial-state kv-store))
Expand Down
15 changes: 12 additions & 3 deletions modules/db/src/blaze/db/node/version.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
(ns blaze.db.node.version
(:refer-clojure :exclude [key])
(:refer-clojure :exclude [get key])
(:require
[blaze.byte-buffer :as bb])
[blaze.byte-buffer :as bb]
[blaze.db.kv :as kv])
(:import
[java.nio.charset StandardCharsets]))

Expand All @@ -19,5 +20,13 @@
(bb/array)))


(defn decode-value [bytes]
(defn- decode-value [bytes]
(bb/get-int! (bb/wrap bytes)))


(defn get [store]
(or (some-> (kv/get store key) decode-value) 0))


(defn set! [store version]
(kv/put! store key (encode-value version)))
22 changes: 11 additions & 11 deletions modules/db/test/blaze/db/api_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2428,29 +2428,29 @@
(:kv-store node)
:type :id :hash-prefix :code :v-hash)
[["Observation" "id-0" #blaze/hash-prefix"36A9F36D"
"value-quantity" #blaze/byte-string"0000000080"]
"value-quantity" #blaze/byte-string"4F40902F3B6AE19A80"]
["Observation" "id-0" #blaze/hash-prefix"36A9F36D"
"value-quantity" #blaze/byte-string"5C38E45A80"]
"value-quantity" #blaze/byte-string"9CEABF1B055DDDCF80"]
["Observation" "id-0" #blaze/hash-prefix"36A9F36D"
"value-quantity" #blaze/byte-string"9B780D9180"]
"value-quantity" #blaze/byte-string"B658D8AF4F417A2B80"]
["Observation" "id-0" #blaze/hash-prefix"36A9F36D"
"combo-value-quantity" #blaze/byte-string"0000000080"]
"combo-value-quantity" #blaze/byte-string"4F40902F3B6AE19A80"]
["Observation" "id-0" #blaze/hash-prefix"36A9F36D"
"combo-value-quantity" #blaze/byte-string"5C38E45A80"]
"combo-value-quantity" #blaze/byte-string"9CEABF1B055DDDCF80"]
["Observation" "id-0" #blaze/hash-prefix"36A9F36D"
"combo-value-quantity" #blaze/byte-string"9B780D9180"]
"combo-value-quantity" #blaze/byte-string"B658D8AF4F417A2B80"]
["Observation" "id-0" #blaze/hash-prefix"36A9F36D"
"_id" #blaze/byte-string"165494C5"]
"_id" #blaze/byte-string"490E5C1C8B04CCEC"]
["Observation" "id-0" #blaze/hash-prefix"36A9F36D"
"_lastUpdated" #blaze/byte-string"80008001"]
["TestScript" "id-0" #blaze/hash-prefix"51E67D28"
"context-quantity" #blaze/byte-string"0000000080"]
"context-quantity" #blaze/byte-string"4F40902F3B6AE19A80"]
["TestScript" "id-0" #blaze/hash-prefix"51E67D28"
"context-quantity" #blaze/byte-string"5C38E45A80"]
"context-quantity" #blaze/byte-string"9CEABF1B055DDDCF80"]
["TestScript" "id-0" #blaze/hash-prefix"51E67D28"
"context-quantity" #blaze/byte-string"9B780D9180"]
"context-quantity" #blaze/byte-string"B658D8AF4F417A2B80"]
["TestScript" "id-0" #blaze/hash-prefix"51E67D28"
"_id" #blaze/byte-string"165494C5"]
"_id" #blaze/byte-string"490E5C1C8B04CCEC"]
["TestScript" "id-0" #blaze/hash-prefix"51E67D28"
"_lastUpdated" #blaze/byte-string"80008001"]])))

Expand Down
6 changes: 6 additions & 0 deletions modules/db/test/blaze/db/impl/codec_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@
(apply codec/descending-long [(apply codec/descending-long [t])])))))


(deftest v-hash-test
(testing "no collisions"
(let [n (long 1e7)]
(is (= n (count (into #{} (map (comp codec/v-hash str)) (repeatedly n random-uuid))))))))


(deftest tid-test
(check `codec/tid))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@


(defn- split-value [bs]
[(bs/subs bs 0 4) (bs/subs bs 4)])
[(bs/subs bs 0 codec/v-hash-size) (bs/subs bs codec/v-hash-size)])


(defn compile-code-quantity-value [search-param-registry value]
Expand Down
79 changes: 40 additions & 39 deletions modules/db/test/blaze/db/node/resource_indexer_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -241,19 +241,20 @@
kv-store :type :id :hash-prefix)))
(is (= (sp-vr-tu/decode-index-entries kv-store :code :v-hash)
[["patient" (codec/v-hash "Patient/id-145552")]
["patient" (codec/v-hash "id-145552")]
["patient" (codec/tid-id
(codec/tid "Patient")
(codec/id-byte-string "id-145552"))]
["patient" (codec/v-hash "id-145552")]
["code" (codec/v-hash "code-204441")]
["code" (codec/v-hash "system-204435|")]

["code" (codec/v-hash "system-204435|code-204441")]
["code" (codec/v-hash "system-204435|")]
["code" (codec/v-hash "code-204441")]
["onset-date" (codec-date/encode-range (LocalDate/of 2020 1 30))]
["subject" (codec/v-hash "Patient/id-145552")]
["subject" (codec/v-hash "id-145552")]
["subject" (codec/tid-id
(codec/tid "Patient")
(codec/id-byte-string "id-145552"))]
["subject" (codec/v-hash "id-145552")]
["_profile" (codec/v-hash "url-164445")]
["_id" (codec/v-hash "id-204446")]
["_lastUpdated" #blaze/byte-string"80008001"]])))
Expand All @@ -264,19 +265,19 @@
kv-store :type :id :hash-prefix)))
(is (= (r-sp-v-tu/decode-index-entries kv-store :code :v-hash)
[["patient" (codec/v-hash "Patient/id-145552")]
["patient" (codec/v-hash "id-145552")]
["patient" (codec/tid-id
(codec/tid "Patient")
(codec/id-byte-string "id-145552"))]
["patient" (codec/v-hash "id-145552")]
["code" (codec/v-hash "code-204441")]
["code" (codec/v-hash "system-204435|")]
["code" (codec/v-hash "system-204435|code-204441")]
["code" (codec/v-hash "system-204435|")]
["code" (codec/v-hash "code-204441")]
["onset-date" (codec-date/encode-range (LocalDate/of 2020 1 30))]
["subject" (codec/v-hash "Patient/id-145552")]
["subject" (codec/v-hash "id-145552")]
["subject" (codec/tid-id
(codec/tid "Patient")
(codec/id-byte-string "id-145552"))]
["subject" (codec/v-hash "id-145552")]
["_profile" (codec/v-hash "url-164445")]
["_id" (codec/v-hash "id-204446")]
["_lastUpdated" #blaze/byte-string"80008001"]])))
Expand All @@ -292,19 +293,19 @@
kv-store :compartment :type :id :hash-prefix)))
(is (= (c-sp-vr-tu/decode-index-entries kv-store :code :v-hash)
[["patient" (codec/v-hash "Patient/id-145552")]
["patient" (codec/v-hash "id-145552")]
["patient" (codec/tid-id
(codec/tid "Patient")
(codec/id-byte-string "id-145552"))]
["patient" (codec/v-hash "id-145552")]
["code" (codec/v-hash "code-204441")]
["code" (codec/v-hash "system-204435|")]
["code" (codec/v-hash "system-204435|code-204441")]
["code" (codec/v-hash "system-204435|")]
["code" (codec/v-hash "code-204441")]
["onset-date" (codec-date/encode-range (LocalDate/of 2020 1 30))]
["subject" (codec/v-hash "Patient/id-145552")]
["subject" (codec/v-hash "id-145552")]
["subject" (codec/tid-id
(codec/tid "Patient")
(codec/id-byte-string "id-145552"))]
["subject" (codec/v-hash "id-145552")]
["_profile" (codec/v-hash "url-164445")]
["_id" (codec/v-hash "id-204446")]
["_lastUpdated" #blaze/byte-string"80008001"]]))))))
Expand Down Expand Up @@ -355,18 +356,6 @@
kv-store :type :id :hash-prefix)))
(is (= (sp-vr-tu/decode-index-entries kv-store :code :v-hash)
[["code-value-quantity"
#blaze/byte-string"82821D0F00000000900926"]
["code-value-quantity"
#blaze/byte-string"82821D0F32690DC8900926"]
["code-value-quantity"
#blaze/byte-string"82821D0FA3C37576900926"]
["code-value-quantity"
#blaze/byte-string"9F7C9B9400000000900926"]
["code-value-quantity"
#blaze/byte-string"9F7C9B9432690DC8900926"]
["code-value-quantity"
#blaze/byte-string"9F7C9B94A3C37576900926"]
["code-value-quantity"
(bs/concat (codec/v-hash "code-193824")
(codec/quantity "" 23.42M))]
["code-value-quantity"
Expand All @@ -376,50 +365,62 @@
(bs/concat (codec/v-hash "code-193824")
(codec/quantity "http://unitsofmeasure.org|kg/m2"
23.42M))]
["code-value-quantity"
#blaze/byte-string"B02358E02AD0942D4F40902F3B6AE19A900926"]
["code-value-quantity"
#blaze/byte-string"B02358E02AD0942DE95B25E4B02F01AF900926"]
["code-value-quantity"
#blaze/byte-string"B02358E02AD0942DF35972C2DDEDDFE6900926"]
["code-value-quantity"
#blaze/byte-string"D47C56F6D0C25BA34F40902F3B6AE19A900926"]
["code-value-quantity"
#blaze/byte-string"D47C56F6D0C25BA3E95B25E4B02F01AF900926"]
["code-value-quantity"
#blaze/byte-string"D47C56F6D0C25BA3F35972C2DDEDDFE6900926"]
["date" (codec-date/encode-range (LocalDate/of 2005 6 17))]
["category" (codec/v-hash "system-193558|code-193603")]
["category" (codec/v-hash "system-193558|")]
["category" (codec/v-hash "code-193603")]
["category" (codec/v-hash "system-193558|code-193603")]
["patient" (codec/v-hash "id-180857")]
["patient" (codec/tid-id
(codec/tid "Patient")
(codec/id-byte-string "id-180857"))]
["patient" (codec/v-hash "Patient/id-180857")]
["code" (codec/v-hash "code-193824")]
["code" (codec/v-hash "system-193821|")]
["code" (codec/v-hash "system-193821|code-193824")]
["code" (codec/v-hash "code-193824")]
["value-quantity" (codec/quantity "" 23.42M)]
["value-quantity" (codec/quantity "kg/m2" 23.42M)]
["value-quantity" (codec/quantity
"http://unitsofmeasure.org|kg/m2"
23.42M)]
["combo-code" (codec/v-hash "code-193824")]
["combo-code" (codec/v-hash "system-193821|")]
["combo-code" (codec/v-hash "system-193821|code-193824")]
["combo-code" (codec/v-hash "code-193824")]
["combo-value-quantity"
#blaze/byte-string"00000000900926"]
#blaze/byte-string"4F40902F3B6AE19A900926"]
["combo-value-quantity"
#blaze/byte-string"32690DC8900926"]
#blaze/byte-string"E95B25E4B02F01AF900926"]
["combo-value-quantity"
#blaze/byte-string"A3C37576900926"]
#blaze/byte-string"F35972C2DDEDDFE6900926"]
["combo-code-value-quantity"
#blaze/byte-string"82821D0F00000000900926"]
#blaze/byte-string"825F9E2AAE526A184F40902F3B6AE19A900926"]
["combo-code-value-quantity"
#blaze/byte-string"82821D0F32690DC8900926"]
#blaze/byte-string"825F9E2AAE526A18E95B25E4B02F01AF900926"]
["combo-code-value-quantity"
#blaze/byte-string"82821D0FA3C37576900926"]
#blaze/byte-string"825F9E2AAE526A18F35972C2DDEDDFE6900926"]
["combo-code-value-quantity"
#blaze/byte-string"9F7C9B9400000000900926"]
#blaze/byte-string"B02358E02AD0942D4F40902F3B6AE19A900926"]
["combo-code-value-quantity"
#blaze/byte-string"9F7C9B9432690DC8900926"]
#blaze/byte-string"B02358E02AD0942DE95B25E4B02F01AF900926"]
["combo-code-value-quantity"
#blaze/byte-string"9F7C9B94A3C37576900926"]
#blaze/byte-string"B02358E02AD0942DF35972C2DDEDDFE6900926"]
["combo-code-value-quantity"
#blaze/byte-string"A75DEC9D00000000900926"]
#blaze/byte-string"D47C56F6D0C25BA34F40902F3B6AE19A900926"]
["combo-code-value-quantity"
#blaze/byte-string"A75DEC9D32690DC8900926"]
#blaze/byte-string"D47C56F6D0C25BA3E95B25E4B02F01AF900926"]
["combo-code-value-quantity"
#blaze/byte-string"A75DEC9DA3C37576900926"]
#blaze/byte-string"D47C56F6D0C25BA3F35972C2DDEDDFE6900926"]
["subject" (codec/v-hash "id-180857")]
["subject" (codec/tid-id
(codec/tid "Patient")
Expand Down
19 changes: 19 additions & 0 deletions modules/db/test/blaze/db/node/version_spec.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
(ns blaze.db.node.version-spec
(:require
[blaze.db.kv.spec]
[blaze.db.node.version :as version]
[clojure.spec.alpha :as s]))


(s/fdef version/encode-value
:args (s/cat :version nat-int?)
:ret bytes?)


(s/fdef version/get
:args (s/cat :store :blaze.db/kv-store)
:ret (s/nilable nat-int?))


(s/fdef version/set!
:args (s/cat :store :blaze.db/kv-store :version nat-int?))
19 changes: 13 additions & 6 deletions modules/db/test/blaze/db/node_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
[blaze.db.node.resource-indexer :as resource-indexer]
[blaze.db.node.tx-indexer :as-alias tx-indexer]
[blaze.db.node.version :as version]
[blaze.db.node.version-spec]
[blaze.db.resource-handle-cache]
[blaze.db.resource-store :as rs]
[blaze.db.resource-store.spec :refer [resource-store?]]
Expand Down Expand Up @@ -86,8 +87,9 @@

(defn- with-index-store-version [system version]
(assoc-in system [[::kv/mem :blaze.db/index-kv-store] :init-data]
[[version/key (version/encode-value version)]
(tx-success/index-entry 1 Instant/EPOCH)]))
(cond-> [(tx-success/index-entry 1 Instant/EPOCH)]
(pos? version)
(conj [version/key (version/encode-value version)]))))


(deftest init-test
Expand Down Expand Up @@ -174,11 +176,11 @@
[:explain ::s/problems 0 :val] := ::invalid))

(testing "incompatible version"
(given-thrown (ig/init (with-index-store-version system -1))
(given-thrown (ig/init (with-index-store-version system 0))
:key := :blaze.db/node
:reason := ::ig/build-threw-exception
[:cause-data :expected-version] := 0
[:cause-data :actual-version] := -1)))
[:cause-data :expected-version] := 1
[:cause-data :actual-version] := 0)))


(deftest duration-seconds-collector-init-test
Expand Down Expand Up @@ -272,5 +274,10 @@


(deftest existing-data-with-compatible-version
(with-system [{:blaze.db/keys [node]} (with-index-store-version system 0)]
(with-system [{:blaze.db/keys [node]} (with-index-store-version system 1)]
(is node)))


(deftest sets-db-version-on-startup
(with-system [{kv-store [::kv/mem :blaze.db/index-kv-store]} system]
(is (= 1 (version/get kv-store)))))

0 comments on commit d10bd32

Please sign in to comment.