From d10bd3269155e42176ee8e796d24c77b7573e2f7 Mon Sep 17 00:00:00 2001 From: Alexander Kiel Date: Sat, 4 Jun 2022 17:37:41 +0200 Subject: [PATCH] Increase V-Hash Size from 32 Bit to 64 Bit In case we have 1 million different identifiers, we will have v-hash collisions. So we have to increase the v-hash size from 32 bit to 64 bit. --- modules/db/src/blaze/db/impl/codec.clj | 4 +- modules/db/src/blaze/db/node.clj | 17 ++-- modules/db/src/blaze/db/node/version.clj | 15 +++- modules/db/test/blaze/db/api_test.clj | 22 +++--- modules/db/test/blaze/db/impl/codec_test.clj | 6 ++ .../db/impl/search_param/composite_test.clj | 2 +- .../blaze/db/node/resource_indexer_test.clj | 79 ++++++++++--------- .../db/test/blaze/db/node/version_spec.clj | 19 +++++ modules/db/test/blaze/db/node_test.clj | 19 +++-- 9 files changed, 111 insertions(+), 72 deletions(-) create mode 100644 modules/db/test/blaze/db/node/version_spec.clj diff --git a/modules/db/src/blaze/db/impl/codec.clj b/modules/db/src/blaze/db/impl/codec.clj index 63888db63..7e805ba26 100644 --- a/modules/db/src/blaze/db/impl/codec.clj +++ b/modules/db/src/blaze/db/impl/codec.clj @@ -18,7 +18,7 @@ ;; ---- Sizes of Byte Arrays -------------------------------------------------- (def ^:const ^long c-hash-size Integer/BYTES) -(def ^:const ^long v-hash-size Integer/BYTES) +(def ^:const ^long v-hash-size Long/BYTES) (def ^:const ^long tid-size Integer/BYTES) (def ^:const ^long t-size Long/BYTES) (def ^:const ^long state-size Long/BYTES) @@ -299,7 +299,7 @@ (defn v-hash [value] - (-> (Hashing/murmur3_32_fixed) + (-> (Hashing/farmHashFingerprint64) (.hashString value StandardCharsets/UTF_8) (.asBytes) bs/from-byte-array)) diff --git a/modules/db/src/blaze/db/node.clj b/modules/db/src/blaze/db/node.clj index c738b4a4b..acc335abe 100644 --- a/modules/db/src/blaze/db/node.clj +++ b/modules/db/src/blaze/db/node.clj @@ -445,11 +445,7 @@ [:blaze.db/enforce-referential-integrity])) -(def ^:private expected-kv-store-version 0) - - -(defn- kv-store-version [kv-store] - (or (some-> (kv/get kv-store version/key) version/decode-value) 0)) +(def ^:private expected-kv-store-version 1) (def ^:private incompatible-kv-store-version-msg @@ -469,13 +465,14 @@ {:actual-version actual-version :expected-version expected-version})) -(defn- check-version! [kv-store] - (when (tx-success/last-t kv-store) - (let [actual-kv-store-version (kv-store-version kv-store)] +(defn- check-and-set-version! [kv-store] + (if (tx-success/last-t kv-store) + (let [actual-kv-store-version (version/get kv-store)] (if (= actual-kv-store-version expected-kv-store-version) (log/info "Index store version is" actual-kv-store-version) (throw (incompatible-kv-store-version-ex actual-kv-store-version - expected-kv-store-version)))))) + expected-kv-store-version)))) + (version/set! kv-store expected-kv-store-version))) (defmethod ig/init-key :blaze.db/node @@ -484,7 +481,7 @@ :or {poll-timeout (time/seconds 1)} :as config}] (init-msg config) - (check-version! kv-store) + (check-and-set-version! kv-store) (let [node (->Node (ctx config) tx-log resource-handle-cache tx-cache kv-store resource-store search-param-registry resource-indexer (atom (initial-state kv-store)) diff --git a/modules/db/src/blaze/db/node/version.clj b/modules/db/src/blaze/db/node/version.clj index 001532381..00ec0d3d0 100644 --- a/modules/db/src/blaze/db/node/version.clj +++ b/modules/db/src/blaze/db/node/version.clj @@ -1,7 +1,8 @@ (ns blaze.db.node.version - (:refer-clojure :exclude [key]) + (:refer-clojure :exclude [get key]) (:require - [blaze.byte-buffer :as bb]) + [blaze.byte-buffer :as bb] + [blaze.db.kv :as kv]) (:import [java.nio.charset StandardCharsets])) @@ -19,5 +20,13 @@ (bb/array))) -(defn decode-value [bytes] +(defn- decode-value [bytes] (bb/get-int! (bb/wrap bytes))) + + +(defn get [store] + (or (some-> (kv/get store key) decode-value) 0)) + + +(defn set! [store version] + (kv/put! store key (encode-value version))) diff --git a/modules/db/test/blaze/db/api_test.clj b/modules/db/test/blaze/db/api_test.clj index 190e04e3b..c335daf24 100644 --- a/modules/db/test/blaze/db/api_test.clj +++ b/modules/db/test/blaze/db/api_test.clj @@ -2428,29 +2428,29 @@ (:kv-store node) :type :id :hash-prefix :code :v-hash) [["Observation" "id-0" #blaze/hash-prefix"36A9F36D" - "value-quantity" #blaze/byte-string"0000000080"] + "value-quantity" #blaze/byte-string"4F40902F3B6AE19A80"] ["Observation" "id-0" #blaze/hash-prefix"36A9F36D" - "value-quantity" #blaze/byte-string"5C38E45A80"] + "value-quantity" #blaze/byte-string"9CEABF1B055DDDCF80"] ["Observation" "id-0" #blaze/hash-prefix"36A9F36D" - "value-quantity" #blaze/byte-string"9B780D9180"] + "value-quantity" #blaze/byte-string"B658D8AF4F417A2B80"] ["Observation" "id-0" #blaze/hash-prefix"36A9F36D" - "combo-value-quantity" #blaze/byte-string"0000000080"] + "combo-value-quantity" #blaze/byte-string"4F40902F3B6AE19A80"] ["Observation" "id-0" #blaze/hash-prefix"36A9F36D" - "combo-value-quantity" #blaze/byte-string"5C38E45A80"] + "combo-value-quantity" #blaze/byte-string"9CEABF1B055DDDCF80"] ["Observation" "id-0" #blaze/hash-prefix"36A9F36D" - "combo-value-quantity" #blaze/byte-string"9B780D9180"] + "combo-value-quantity" #blaze/byte-string"B658D8AF4F417A2B80"] ["Observation" "id-0" #blaze/hash-prefix"36A9F36D" - "_id" #blaze/byte-string"165494C5"] + "_id" #blaze/byte-string"490E5C1C8B04CCEC"] ["Observation" "id-0" #blaze/hash-prefix"36A9F36D" "_lastUpdated" #blaze/byte-string"80008001"] ["TestScript" "id-0" #blaze/hash-prefix"51E67D28" - "context-quantity" #blaze/byte-string"0000000080"] + "context-quantity" #blaze/byte-string"4F40902F3B6AE19A80"] ["TestScript" "id-0" #blaze/hash-prefix"51E67D28" - "context-quantity" #blaze/byte-string"5C38E45A80"] + "context-quantity" #blaze/byte-string"9CEABF1B055DDDCF80"] ["TestScript" "id-0" #blaze/hash-prefix"51E67D28" - "context-quantity" #blaze/byte-string"9B780D9180"] + "context-quantity" #blaze/byte-string"B658D8AF4F417A2B80"] ["TestScript" "id-0" #blaze/hash-prefix"51E67D28" - "_id" #blaze/byte-string"165494C5"] + "_id" #blaze/byte-string"490E5C1C8B04CCEC"] ["TestScript" "id-0" #blaze/hash-prefix"51E67D28" "_lastUpdated" #blaze/byte-string"80008001"]]))) diff --git a/modules/db/test/blaze/db/impl/codec_test.clj b/modules/db/test/blaze/db/impl/codec_test.clj index 3d614c489..1feeaab75 100644 --- a/modules/db/test/blaze/db/impl/codec_test.clj +++ b/modules/db/test/blaze/db/impl/codec_test.clj @@ -48,6 +48,12 @@ (apply codec/descending-long [(apply codec/descending-long [t])]))))) +(deftest v-hash-test + (testing "no collisions" + (let [n (long 1e7)] + (is (= n (count (into #{} (map (comp codec/v-hash str)) (repeatedly n random-uuid)))))))) + + (deftest tid-test (check `codec/tid)) diff --git a/modules/db/test/blaze/db/impl/search_param/composite_test.clj b/modules/db/test/blaze/db/impl/search_param/composite_test.clj index b262c5966..693e767ff 100644 --- a/modules/db/test/blaze/db/impl/search_param/composite_test.clj +++ b/modules/db/test/blaze/db/impl/search_param/composite_test.clj @@ -57,7 +57,7 @@ (defn- split-value [bs] - [(bs/subs bs 0 4) (bs/subs bs 4)]) + [(bs/subs bs 0 codec/v-hash-size) (bs/subs bs codec/v-hash-size)]) (defn compile-code-quantity-value [search-param-registry value] diff --git a/modules/db/test/blaze/db/node/resource_indexer_test.clj b/modules/db/test/blaze/db/node/resource_indexer_test.clj index 84d094b92..297f02e50 100644 --- a/modules/db/test/blaze/db/node/resource_indexer_test.clj +++ b/modules/db/test/blaze/db/node/resource_indexer_test.clj @@ -241,19 +241,20 @@ kv-store :type :id :hash-prefix))) (is (= (sp-vr-tu/decode-index-entries kv-store :code :v-hash) [["patient" (codec/v-hash "Patient/id-145552")] + ["patient" (codec/v-hash "id-145552")] ["patient" (codec/tid-id (codec/tid "Patient") (codec/id-byte-string "id-145552"))] - ["patient" (codec/v-hash "id-145552")] - ["code" (codec/v-hash "code-204441")] - ["code" (codec/v-hash "system-204435|")] + ["code" (codec/v-hash "system-204435|code-204441")] + ["code" (codec/v-hash "system-204435|")] + ["code" (codec/v-hash "code-204441")] ["onset-date" (codec-date/encode-range (LocalDate/of 2020 1 30))] ["subject" (codec/v-hash "Patient/id-145552")] + ["subject" (codec/v-hash "id-145552")] ["subject" (codec/tid-id (codec/tid "Patient") (codec/id-byte-string "id-145552"))] - ["subject" (codec/v-hash "id-145552")] ["_profile" (codec/v-hash "url-164445")] ["_id" (codec/v-hash "id-204446")] ["_lastUpdated" #blaze/byte-string"80008001"]]))) @@ -264,19 +265,19 @@ kv-store :type :id :hash-prefix))) (is (= (r-sp-v-tu/decode-index-entries kv-store :code :v-hash) [["patient" (codec/v-hash "Patient/id-145552")] + ["patient" (codec/v-hash "id-145552")] ["patient" (codec/tid-id (codec/tid "Patient") (codec/id-byte-string "id-145552"))] - ["patient" (codec/v-hash "id-145552")] - ["code" (codec/v-hash "code-204441")] - ["code" (codec/v-hash "system-204435|")] ["code" (codec/v-hash "system-204435|code-204441")] + ["code" (codec/v-hash "system-204435|")] + ["code" (codec/v-hash "code-204441")] ["onset-date" (codec-date/encode-range (LocalDate/of 2020 1 30))] ["subject" (codec/v-hash "Patient/id-145552")] + ["subject" (codec/v-hash "id-145552")] ["subject" (codec/tid-id (codec/tid "Patient") (codec/id-byte-string "id-145552"))] - ["subject" (codec/v-hash "id-145552")] ["_profile" (codec/v-hash "url-164445")] ["_id" (codec/v-hash "id-204446")] ["_lastUpdated" #blaze/byte-string"80008001"]]))) @@ -292,19 +293,19 @@ kv-store :compartment :type :id :hash-prefix))) (is (= (c-sp-vr-tu/decode-index-entries kv-store :code :v-hash) [["patient" (codec/v-hash "Patient/id-145552")] + ["patient" (codec/v-hash "id-145552")] ["patient" (codec/tid-id (codec/tid "Patient") (codec/id-byte-string "id-145552"))] - ["patient" (codec/v-hash "id-145552")] - ["code" (codec/v-hash "code-204441")] - ["code" (codec/v-hash "system-204435|")] ["code" (codec/v-hash "system-204435|code-204441")] + ["code" (codec/v-hash "system-204435|")] + ["code" (codec/v-hash "code-204441")] ["onset-date" (codec-date/encode-range (LocalDate/of 2020 1 30))] ["subject" (codec/v-hash "Patient/id-145552")] + ["subject" (codec/v-hash "id-145552")] ["subject" (codec/tid-id (codec/tid "Patient") (codec/id-byte-string "id-145552"))] - ["subject" (codec/v-hash "id-145552")] ["_profile" (codec/v-hash "url-164445")] ["_id" (codec/v-hash "id-204446")] ["_lastUpdated" #blaze/byte-string"80008001"]])))))) @@ -355,18 +356,6 @@ kv-store :type :id :hash-prefix))) (is (= (sp-vr-tu/decode-index-entries kv-store :code :v-hash) [["code-value-quantity" - #blaze/byte-string"82821D0F00000000900926"] - ["code-value-quantity" - #blaze/byte-string"82821D0F32690DC8900926"] - ["code-value-quantity" - #blaze/byte-string"82821D0FA3C37576900926"] - ["code-value-quantity" - #blaze/byte-string"9F7C9B9400000000900926"] - ["code-value-quantity" - #blaze/byte-string"9F7C9B9432690DC8900926"] - ["code-value-quantity" - #blaze/byte-string"9F7C9B94A3C37576900926"] - ["code-value-quantity" (bs/concat (codec/v-hash "code-193824") (codec/quantity "" 23.42M))] ["code-value-quantity" @@ -376,50 +365,62 @@ (bs/concat (codec/v-hash "code-193824") (codec/quantity "http://unitsofmeasure.org|kg/m2" 23.42M))] + ["code-value-quantity" + #blaze/byte-string"B02358E02AD0942D4F40902F3B6AE19A900926"] + ["code-value-quantity" + #blaze/byte-string"B02358E02AD0942DE95B25E4B02F01AF900926"] + ["code-value-quantity" + #blaze/byte-string"B02358E02AD0942DF35972C2DDEDDFE6900926"] + ["code-value-quantity" + #blaze/byte-string"D47C56F6D0C25BA34F40902F3B6AE19A900926"] + ["code-value-quantity" + #blaze/byte-string"D47C56F6D0C25BA3E95B25E4B02F01AF900926"] + ["code-value-quantity" + #blaze/byte-string"D47C56F6D0C25BA3F35972C2DDEDDFE6900926"] ["date" (codec-date/encode-range (LocalDate/of 2005 6 17))] - ["category" (codec/v-hash "system-193558|code-193603")] ["category" (codec/v-hash "system-193558|")] ["category" (codec/v-hash "code-193603")] + ["category" (codec/v-hash "system-193558|code-193603")] ["patient" (codec/v-hash "id-180857")] ["patient" (codec/tid-id (codec/tid "Patient") (codec/id-byte-string "id-180857"))] ["patient" (codec/v-hash "Patient/id-180857")] + ["code" (codec/v-hash "code-193824")] ["code" (codec/v-hash "system-193821|")] ["code" (codec/v-hash "system-193821|code-193824")] - ["code" (codec/v-hash "code-193824")] ["value-quantity" (codec/quantity "" 23.42M)] ["value-quantity" (codec/quantity "kg/m2" 23.42M)] ["value-quantity" (codec/quantity "http://unitsofmeasure.org|kg/m2" 23.42M)] + ["combo-code" (codec/v-hash "code-193824")] ["combo-code" (codec/v-hash "system-193821|")] ["combo-code" (codec/v-hash "system-193821|code-193824")] - ["combo-code" (codec/v-hash "code-193824")] ["combo-value-quantity" - #blaze/byte-string"00000000900926"] + #blaze/byte-string"4F40902F3B6AE19A900926"] ["combo-value-quantity" - #blaze/byte-string"32690DC8900926"] + #blaze/byte-string"E95B25E4B02F01AF900926"] ["combo-value-quantity" - #blaze/byte-string"A3C37576900926"] + #blaze/byte-string"F35972C2DDEDDFE6900926"] ["combo-code-value-quantity" - #blaze/byte-string"82821D0F00000000900926"] + #blaze/byte-string"825F9E2AAE526A184F40902F3B6AE19A900926"] ["combo-code-value-quantity" - #blaze/byte-string"82821D0F32690DC8900926"] + #blaze/byte-string"825F9E2AAE526A18E95B25E4B02F01AF900926"] ["combo-code-value-quantity" - #blaze/byte-string"82821D0FA3C37576900926"] + #blaze/byte-string"825F9E2AAE526A18F35972C2DDEDDFE6900926"] ["combo-code-value-quantity" - #blaze/byte-string"9F7C9B9400000000900926"] + #blaze/byte-string"B02358E02AD0942D4F40902F3B6AE19A900926"] ["combo-code-value-quantity" - #blaze/byte-string"9F7C9B9432690DC8900926"] + #blaze/byte-string"B02358E02AD0942DE95B25E4B02F01AF900926"] ["combo-code-value-quantity" - #blaze/byte-string"9F7C9B94A3C37576900926"] + #blaze/byte-string"B02358E02AD0942DF35972C2DDEDDFE6900926"] ["combo-code-value-quantity" - #blaze/byte-string"A75DEC9D00000000900926"] + #blaze/byte-string"D47C56F6D0C25BA34F40902F3B6AE19A900926"] ["combo-code-value-quantity" - #blaze/byte-string"A75DEC9D32690DC8900926"] + #blaze/byte-string"D47C56F6D0C25BA3E95B25E4B02F01AF900926"] ["combo-code-value-quantity" - #blaze/byte-string"A75DEC9DA3C37576900926"] + #blaze/byte-string"D47C56F6D0C25BA3F35972C2DDEDDFE6900926"] ["subject" (codec/v-hash "id-180857")] ["subject" (codec/tid-id (codec/tid "Patient") diff --git a/modules/db/test/blaze/db/node/version_spec.clj b/modules/db/test/blaze/db/node/version_spec.clj new file mode 100644 index 000000000..45e823657 --- /dev/null +++ b/modules/db/test/blaze/db/node/version_spec.clj @@ -0,0 +1,19 @@ +(ns blaze.db.node.version-spec + (:require + [blaze.db.kv.spec] + [blaze.db.node.version :as version] + [clojure.spec.alpha :as s])) + + +(s/fdef version/encode-value + :args (s/cat :version nat-int?) + :ret bytes?) + + +(s/fdef version/get + :args (s/cat :store :blaze.db/kv-store) + :ret (s/nilable nat-int?)) + + +(s/fdef version/set! + :args (s/cat :store :blaze.db/kv-store :version nat-int?)) diff --git a/modules/db/test/blaze/db/node_test.clj b/modules/db/test/blaze/db/node_test.clj index 6c21144a1..213850611 100644 --- a/modules/db/test/blaze/db/node_test.clj +++ b/modules/db/test/blaze/db/node_test.clj @@ -14,6 +14,7 @@ [blaze.db.node.resource-indexer :as resource-indexer] [blaze.db.node.tx-indexer :as-alias tx-indexer] [blaze.db.node.version :as version] + [blaze.db.node.version-spec] [blaze.db.resource-handle-cache] [blaze.db.resource-store :as rs] [blaze.db.resource-store.spec :refer [resource-store?]] @@ -86,8 +87,9 @@ (defn- with-index-store-version [system version] (assoc-in system [[::kv/mem :blaze.db/index-kv-store] :init-data] - [[version/key (version/encode-value version)] - (tx-success/index-entry 1 Instant/EPOCH)])) + (cond-> [(tx-success/index-entry 1 Instant/EPOCH)] + (pos? version) + (conj [version/key (version/encode-value version)])))) (deftest init-test @@ -174,11 +176,11 @@ [:explain ::s/problems 0 :val] := ::invalid)) (testing "incompatible version" - (given-thrown (ig/init (with-index-store-version system -1)) + (given-thrown (ig/init (with-index-store-version system 0)) :key := :blaze.db/node :reason := ::ig/build-threw-exception - [:cause-data :expected-version] := 0 - [:cause-data :actual-version] := -1))) + [:cause-data :expected-version] := 1 + [:cause-data :actual-version] := 0))) (deftest duration-seconds-collector-init-test @@ -272,5 +274,10 @@ (deftest existing-data-with-compatible-version - (with-system [{:blaze.db/keys [node]} (with-index-store-version system 0)] + (with-system [{:blaze.db/keys [node]} (with-index-store-version system 1)] (is node))) + + +(deftest sets-db-version-on-startup + (with-system [{kv-store [::kv/mem :blaze.db/index-kv-store]} system] + (is (= 1 (version/get kv-store)))))