From 484453a202b71126e6be1e139c41694dd74c9b52 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 29 May 2024 17:03:15 +0200 Subject: [PATCH] feat!: adding support for JSONL from v0.15 clinvar-this (#454) --- ForRelease | 9865 +++++++++++++++++ build.rs | 7 +- protos/annonars/clinvar/per_gene.proto | 89 +- .../annonars/clinvar_data/class_by_freq.proto | 29 + .../clinvar_data/clinvar_public.proto | 2708 +++++ .../clinvar_data/extracted_vars.proto | 75 + .../annonars/clinvar_data/gene_impact.proto | 89 + .../clinvar_data/phenotype_link.proto | 28 + protos/fetch.sh | 19 + src/clinvar_genes/cli/import.rs | 142 +- src/clinvar_genes/cli/mod.rs | 1 - src/clinvar_genes/cli/reading.rs | 173 - src/pbs/clinvar_data.rs | 61 + src/pbs/mod.rs | 1 + .../clinvar-genes/clinvar-genes.db/000014.sst | 3 - .../clinvar-genes/clinvar-genes.db/000016.sst | 3 - tests/clinvar-genes/clinvar-genes.db/CURRENT | 3 - tests/clinvar-genes/clinvar-genes.db/IDENTITY | 3 - tests/clinvar-genes/clinvar-genes.db/LOCK | 0 tests/clinvar-genes/clinvar-genes.db/LOG | 3 - .../clinvar-genes.db/MANIFEST-000005 | 3 - .../clinvar-genes.db/OPTIONS-000009 | 3 - .../clinvar-genes.db/OPTIONS-000011 | 3 - .../clinvar-variants-grch37-seqvars.jsonl | 20 +- .../clinvar-variants-grch38-seqvars.jsonl | 20 +- .../clinvar-genes/gene-frequency-report.jsonl | 21 +- tests/clinvar-genes/gene-variant-report.jsonl | 25 +- 27 files changed, 12981 insertions(+), 416 deletions(-) create mode 100644 ForRelease create mode 100644 protos/annonars/clinvar_data/class_by_freq.proto create mode 100644 protos/annonars/clinvar_data/clinvar_public.proto create mode 100644 protos/annonars/clinvar_data/extracted_vars.proto create mode 100644 protos/annonars/clinvar_data/gene_impact.proto create mode 100644 protos/annonars/clinvar_data/phenotype_link.proto create mode 100644 protos/fetch.sh delete mode 100644 src/clinvar_genes/cli/reading.rs create mode 100644 src/pbs/clinvar_data.rs delete mode 100644 tests/clinvar-genes/clinvar-genes.db/000014.sst delete mode 100644 tests/clinvar-genes/clinvar-genes.db/000016.sst delete mode 100644 tests/clinvar-genes/clinvar-genes.db/CURRENT delete mode 100644 tests/clinvar-genes/clinvar-genes.db/IDENTITY delete mode 100644 tests/clinvar-genes/clinvar-genes.db/LOCK delete mode 100644 tests/clinvar-genes/clinvar-genes.db/LOG delete mode 100644 tests/clinvar-genes/clinvar-genes.db/MANIFEST-000005 delete mode 100644 tests/clinvar-genes/clinvar-genes.db/OPTIONS-000009 delete mode 100644 tests/clinvar-genes/clinvar-genes.db/OPTIONS-000011 diff --git a/ForRelease b/ForRelease new file mode 100644 index 00000000..0f150fc5 --- /dev/null +++ b/ForRelease @@ -0,0 +1,9865 @@ +diff --git a/.cargo/config b/.cargo/config.toml +similarity index 100% +rename from .cargo/config +rename to .cargo/config.toml +diff --git a/.github/workflows/add-pr-to-project.yml b/.github/workflows/add-pr-to-project.yml +deleted file mode 100644 +index 8d65069..0000000 +--- a/.github/workflows/add-pr-to-project.yml ++++ /dev/null +@@ -1,18 +0,0 @@ +-name: add needs-review pull requests to projects +- +-on: +- pull_request: +- types: +- - labeled +- +-jobs: +- add-to-project: +- name: Add pull request to project +- runs-on: ubuntu-latest +- steps: +- - name: register pull requests with release planning project +- uses: actions/add-to-project@v0.6.0 +- with: +- project-url: https://github.com/orgs/varfish-org/projects/2 +- labeled: needs-review +- github-token: ${{ secrets.BOT_TOKEN }} +diff --git a/.github/workflows/conventional-prs.yml b/.github/workflows/conventional-prs.yml +index b9bf6bc..ccba6a9 100644 +--- a/.github/workflows/conventional-prs.yml ++++ b/.github/workflows/conventional-prs.yml +@@ -11,7 +11,7 @@ jobs: + title-format: + runs-on: ubuntu-latest + steps: +- - uses: amannn/action-semantic-pull-request@v5.4.0 ++ - uses: amannn/action-semantic-pull-request@v5.5.2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: +diff --git a/CHANGELOG.md b/CHANGELOG.md +index d02c81b..b8100d5 100644 +--- a/CHANGELOG.md ++++ b/CHANGELOG.md +@@ -1,5 +1,12 @@ + # Changelog +  ++## [0.36.2](https://github.com/varfish-org/annonars/compare/v0.36.1...v0.36.2) (2024-05-08) ++ ++ ++### Bug Fixes ++ ++* update noodles group ([#447](https://github.com/varfish-org/annonars/issues/447)) ([683c84d](https://github.com/varfish-org/annonars/commit/683c84d0dcc17cae659bebb14120f23f7bf2ff4d)) ++ + ## [0.36.1](https://github.com/varfish-org/annonars/compare/v0.36.0...v0.36.1) (2024-03-01) +  +  +diff --git a/Cargo.lock b/Cargo.lock +index 41eed54..3ccdbb8 100644 +--- a/Cargo.lock ++++ b/Cargo.lock +@@ -8,7 +8,7 @@ version = "0.5.2" + source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "5f7b0a21988c1bf877cf4759ef5ddaac04c1c9fe808c9142ecb78ba97d97a28a" + dependencies = [ +- "bitflags 2.4.2", ++ "bitflags", + "bytes", + "futures-core", + "futures-sink", +@@ -21,17 +21,17 @@ dependencies = [ +  + [[package]] + name = "actix-http" +-version = "3.6.0" ++version = "3.7.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743" ++checksum = "4eb9843d84c775696c37d9a418bbb01b932629d01870722c0f13eb3f95e2536d" + dependencies = [ + "actix-codec", + "actix-rt", + "actix-service", + "actix-utils", + "ahash", +- "base64", +- "bitflags 2.4.2", ++ "base64 0.22.1", ++ "bitflags", + "brotli", + "bytes", + "bytestring", +@@ -65,18 +65,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" + dependencies = [ + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] + name = "actix-router" +-version = "0.5.2" ++version = "0.5.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "d22475596539443685426b6bdadb926ad0ecaefdfc5fb05e5e3441f15463c511" ++checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8" + dependencies = [ + "bytestring", ++ "cfg-if", + "http", + "regex", ++ "regex-lite", + "serde", + "tracing", + ] +@@ -131,9 +133,9 @@ dependencies = [ +  + [[package]] + name = "actix-web" +-version = "4.5.1" ++version = "4.6.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984" ++checksum = "b1cf67dadb19d7c95e5a299e2dda24193b89d5d4f33a3b9800888ede9e19aa32" + dependencies = [ + "actix-codec", + "actix-http", +@@ -160,6 +162,7 @@ dependencies = [ + "once_cell", + "pin-project-lite", + "regex", ++ "regex-lite", + "serde", + "serde_json", + "serde_urlencoded", +@@ -178,7 +181,7 @@ dependencies = [ + "actix-router", + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] +@@ -198,9 +201,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +  + [[package]] + name = "ahash" +-version = "0.8.7" ++version = "0.8.11" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" ++checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" + dependencies = [ + "cfg-if", + "getrandom", +@@ -211,9 +214,9 @@ dependencies = [ +  + [[package]] + name = "aho-corasick" +-version = "1.1.2" ++version = "1.1.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" ++checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" + dependencies = [ + "memchr", + ] +@@ -250,7 +253,7 @@ dependencies = [ +  + [[package]] + name = "annonars" +-version = "0.36.1" ++version = "0.36.2" + dependencies = [ + "actix-web", + "anyhow", +@@ -270,6 +273,7 @@ dependencies = [ + "indexmap", + "indicatif", + "insta", ++ "itertools 0.13.0", + "log", + "noodles-bed", + "noodles-bgzf", +@@ -292,7 +296,7 @@ dependencies = [ + "serde", + "serde_json", + "serde_with", +- "strum 0.26.1", ++ "strum 0.26.2", + "temp_testdir", + "test-log", + "thiserror", +@@ -303,47 +307,48 @@ dependencies = [ +  + [[package]] + name = "anstream" +-version = "0.6.11" ++version = "0.6.14" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" ++checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" + dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", ++ "is_terminal_polyfill", + "utf8parse", + ] +  + [[package]] + name = "anstyle" +-version = "1.0.6" ++version = "1.0.7" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" ++checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" +  + [[package]] + name = "anstyle-parse" +-version = "0.2.3" ++version = "0.2.4" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" ++checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" + dependencies = [ + "utf8parse", + ] +  + [[package]] + name = "anstyle-query" +-version = "1.0.2" ++version = "1.0.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" ++checksum = "a64c907d4e79225ac72e2a354c9ce84d50ebb4586dee56c82b3ee73004f537f5" + dependencies = [ + "windows-sys 0.52.0", + ] +  + [[package]] + name = "anstyle-wincon" +-version = "3.0.2" ++version = "3.0.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" ++checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" + dependencies = [ + "anstyle", + "windows-sys 0.52.0", +@@ -351,9 +356,9 @@ dependencies = [ +  + [[package]] + name = "anyhow" +-version = "1.0.80" ++version = "1.0.86" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" ++checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +  + [[package]] + name = "approx" +@@ -366,15 +371,15 @@ dependencies = [ +  + [[package]] + name = "autocfg" +-version = "1.1.0" ++version = "1.3.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" ++checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +  + [[package]] + name = "backtrace" +-version = "0.3.69" ++version = "0.3.71" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" ++checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" + dependencies = [ + "addr2line", + "cc", +@@ -391,6 +396,12 @@ version = "0.21.7" + source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +  ++[[package]] ++name = "base64" ++version = "0.22.1" ++source = "registry+https://github.com/rust-lang/crates.io-index" ++checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" ++ + [[package]] + name = "bgzip" + version = "0.3.1" +@@ -405,30 +416,29 @@ dependencies = [ +  + [[package]] + name = "bindgen" +-version = "0.65.1" ++version = "0.69.4" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" ++checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" + dependencies = [ +- "bitflags 1.3.2", ++ "bitflags", + "cexpr", + "clang-sys", ++ "itertools 0.12.1", + "lazy_static", + "lazycell", +- "peeking_take_while", +- "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] + name = "bio" +-version = "1.5.0" ++version = "1.6.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "25dccfc5babf5a4f505ab5bdda0e18d4b5fc1600c222677c54992203632cbdf5" ++checksum = "7a72cb93babf08c85b375c2938ac678cc637936b3ebb72266d433cec2577f6c2" + dependencies = [ + "anyhow", + "approx", +@@ -441,8 +451,7 @@ dependencies = [ + "editdistancek", + "enum-map", + "fxhash", +- "getset", +- "itertools", ++ "itertools 0.11.0", + "itertools-num", + "lazy_static", + "multimap 0.9.1", +@@ -508,15 +517,9 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +  + [[package]] + name = "bitflags" +-version = "1.3.2" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +- +-[[package]] +-name = "bitflags" +-version = "2.4.2" ++version = "2.5.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" ++checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +  + [[package]] + name = "block-buffer" +@@ -535,9 +538,9 @@ checksum = "c057a3e5631e754d98c5a9e3baa56803fce8fe238b009ecd876e57d381d44c00" +  + [[package]] + name = "brotli" +-version = "3.4.0" ++version = "6.0.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" ++checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" + dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +@@ -546,19 +549,29 @@ dependencies = [ +  + [[package]] + name = "brotli-decompressor" +-version = "2.5.1" ++version = "4.0.1" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" ++checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" + dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + ] +  ++[[package]] ++name = "bstr" ++version = "1.9.1" ++source = "registry+https://github.com/rust-lang/crates.io-index" ++checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" ++dependencies = [ ++ "memchr", ++ "serde", ++] ++ + [[package]] + name = "bumpalo" +-version = "3.14.0" ++version = "3.16.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" ++checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +  + [[package]] + name = "bv" +@@ -572,15 +585,15 @@ dependencies = [ +  + [[package]] + name = "bytecount" +-version = "0.6.7" ++version = "0.6.8" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" ++checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" +  + [[package]] + name = "bytemuck" +-version = "1.14.2" ++version = "1.16.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "ea31d69bda4949c1c1562c1e6f042a1caefac98cdc8a298260a2ff41c1e2d42b" ++checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" +  + [[package]] + name = "byteorder" +@@ -590,9 +603,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +  + [[package]] + name = "bytes" +-version = "1.5.0" ++version = "1.6.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" ++checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +  + [[package]] + name = "bytestring" +@@ -616,12 +629,13 @@ dependencies = [ +  + [[package]] + name = "cc" +-version = "1.0.83" ++version = "1.0.98" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" ++checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" + dependencies = [ + "jobserver", + "libc", ++ "once_cell", + ] +  + [[package]] +@@ -641,9 +655,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +  + [[package]] + name = "chrono" +-version = "0.4.34" ++version = "0.4.38" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" ++checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" + dependencies = [ + "android-tzdata", + "iana-time-zone", +@@ -651,14 +665,14 @@ dependencies = [ + "num-traits", + "serde", + "wasm-bindgen", +- "windows-targets 0.52.0", ++ "windows-targets 0.52.5", + ] +  + [[package]] + name = "clang-sys" +-version = "1.7.0" ++version = "1.8.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" ++checksum = "a483f3cbf7cec2e153d424d0e92329d816becc6421389bd494375c6065921b9b" + dependencies = [ + "glob", + "libc", +@@ -667,9 +681,9 @@ dependencies = [ +  + [[package]] + name = "clap" +-version = "4.5.1" ++version = "4.5.4" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" ++checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" + dependencies = [ + "clap_builder", + "clap_derive", +@@ -677,9 +691,9 @@ dependencies = [ +  + [[package]] + name = "clap-verbosity-flag" +-version = "2.1.2" ++version = "2.2.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "b57f73ca21b17a0352944b9bb61803b6007bd911b6cccfef7153f7f0600ac495" ++checksum = "bb9b20c0dd58e4c2e991c8d203bbeb76c11304d1011659686b5b644bc29aa478" + dependencies = [ + "clap", + "log", +@@ -687,26 +701,26 @@ dependencies = [ +  + [[package]] + name = "clap_builder" +-version = "4.5.1" ++version = "4.5.2" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" ++checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" + dependencies = [ + "anstream", + "anstyle", + "clap_lex", +- "strsim 0.11.0", ++ "strsim", + ] +  + [[package]] + name = "clap_derive" +-version = "4.5.0" ++version = "4.5.4" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" ++checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" + dependencies = [ +- "heck", ++ "heck 0.5.0", + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] +@@ -717,9 +731,9 @@ checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +  + [[package]] + name = "colorchoice" +-version = "1.0.0" ++version = "1.0.1" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" ++checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" +  + [[package]] + name = "console" +@@ -768,18 +782,18 @@ dependencies = [ +  + [[package]] + name = "crc32fast" +-version = "1.3.2" ++version = "1.4.2" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" ++checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" + dependencies = [ + "cfg-if", + ] +  + [[package]] + name = "crossbeam-channel" +-version = "0.5.11" ++version = "0.5.13" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "176dc175b78f56c0f321911d9c8eb2b77a78a4860b9c19db83835fea1a46649b" ++checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" + dependencies = [ + "crossbeam-utils", + ] +@@ -805,9 +819,9 @@ dependencies = [ +  + [[package]] + name = "crossbeam-utils" +-version = "0.8.19" ++version = "0.8.20" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" ++checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +  + [[package]] + name = "crypto-common" +@@ -848,9 +862,9 @@ checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" +  + [[package]] + name = "darling" +-version = "0.20.5" ++version = "0.20.9" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "fc5d6b04b3fd0ba9926f945895de7d806260a2d7431ba82e7edaecb043c4c6b8" ++checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" + dependencies = [ + "darling_core", + "darling_macro", +@@ -858,27 +872,27 @@ dependencies = [ +  + [[package]] + name = "darling_core" +-version = "0.20.5" ++version = "0.20.9" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "04e48a959bcd5c761246f5d090ebc2fbf7b9cd527a492b07a67510c108f1e7e3" ++checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" + dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", +- "strsim 0.10.0", +- "syn 2.0.48", ++ "strsim", ++ "syn 2.0.66", + ] +  + [[package]] + name = "darling_macro" +-version = "0.20.5" ++version = "0.20.9" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "1d1545d67a2149e1d93b7e5c7752dce5a7426eb5d1357ddcfd89336b94444f77" ++checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" + dependencies = [ + "darling_core", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] +@@ -938,9 +952,9 @@ checksum = "3e02df23d5b1c6f9e69fa603b890378123b93073df998a21e6e33b9db0a32613" +  + [[package]] + name = "either" +-version = "1.9.0" ++version = "1.12.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" ++checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +  + [[package]] + name = "encode_unicode" +@@ -950,9 +964,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +  + [[package]] + name = "encoding_rs" +-version = "0.8.33" ++version = "0.8.34" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" ++checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" + dependencies = [ + "cfg-if", + ] +@@ -975,7 +989,7 @@ checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" + dependencies = [ + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] +@@ -990,9 +1004,9 @@ dependencies = [ +  + [[package]] + name = "env_logger" +-version = "0.11.1" ++version = "0.11.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "05e7cf40684ae96ade6232ed84582f40ce0a66efcd43a5117aef610534f8e0b8" ++checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" + dependencies = [ + "anstream", + "anstyle", +@@ -1009,18 +1023,19 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +  + [[package]] + name = "erased-serde" +-version = "0.4.2" ++version = "0.4.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "55d05712b2d8d88102bc9868020c9e5c7a1f5527c452b9b97450a1d006140ba7" ++checksum = "24e2389d65ab4fab27dc2a5de7b191e1f6617d1f1c8855c0dc569c94a4cbb18d" + dependencies = [ + "serde", ++ "typeid", + ] +  + [[package]] + name = "errno" +-version = "0.3.8" ++version = "0.3.9" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" ++checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" + dependencies = [ + "libc", + "windows-sys 0.52.0", +@@ -1028,9 +1043,9 @@ dependencies = [ +  + [[package]] + name = "fastrand" +-version = "2.0.1" ++version = "2.1.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" ++checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +  + [[package]] + name = "feature-probe" +@@ -1046,9 +1061,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +  + [[package]] + name = "flate2" +-version = "1.0.28" ++version = "1.0.30" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" ++checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" + dependencies = [ + "crc32fast", + "miniz_oxide", +@@ -1125,7 +1140,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" + dependencies = [ + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] +@@ -1142,9 +1157,9 @@ checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +  + [[package]] + name = "futures-timer" +-version = "3.0.2" ++version = "3.0.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" ++checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" +  + [[package]] + name = "futures-util" +@@ -1185,27 +1200,15 @@ dependencies = [ +  + [[package]] + name = "getrandom" +-version = "0.2.12" ++version = "0.2.15" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" ++checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" + dependencies = [ + "cfg-if", + "libc", + "wasi", + ] +  +-[[package]] +-name = "getset" +-version = "0.1.2" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" +-dependencies = [ +- "proc-macro-error", +- "proc-macro2", +- "quote", +- "syn 1.0.109", +-] +- + [[package]] + name = "gimli" + version = "0.28.1" +@@ -1220,9 +1223,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +  + [[package]] + name = "h2" +-version = "0.3.24" ++version = "0.3.26" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" ++checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" + dependencies = [ + "bytes", + "fnv", +@@ -1239,9 +1242,9 @@ dependencies = [ +  + [[package]] + name = "hashbrown" +-version = "0.14.3" ++version = "0.14.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" ++checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +  + [[package]] + name = "heck" +@@ -1250,25 +1253,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +  + [[package]] +-name = "hex" +-version = "0.4.3" ++name = "heck" ++version = "0.5.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" ++checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +  + [[package]] +-name = "home" +-version = "0.5.9" ++name = "hex" ++version = "0.4.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +-dependencies = [ +- "windows-sys 0.52.0", +-] ++checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +  + [[package]] + name = "http" +-version = "0.2.11" ++version = "0.2.12" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" ++checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" + dependencies = [ + "bytes", + "fnv", +@@ -1334,9 +1334,9 @@ dependencies = [ +  + [[package]] + name = "indexmap" +-version = "2.2.3" ++version = "2.2.6" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" ++checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" + dependencies = [ + "equivalent", + "hashbrown", +@@ -1359,27 +1359,32 @@ dependencies = [ +  + [[package]] + name = "insta" +-version = "1.36.1" ++version = "1.39.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "0a7c22c4d34ef4788c351e971c52bfdfe7ea2766f8c5466bc175dd46e52ac22e" ++checksum = "810ae6042d48e2c9e9215043563a58a80b877bc863228a74cf10c49d4620a6f5" + dependencies = [ + "console", + "lazy_static", + "linked-hash-map", + "serde", + "similar", +- "yaml-rust", + ] +  + [[package]] + name = "instant" +-version = "0.1.12" ++version = "0.1.13" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" ++checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" + dependencies = [ + "cfg-if", + ] +  ++[[package]] ++name = "is_terminal_polyfill" ++version = "1.70.0" ++source = "registry+https://github.com/rust-lang/crates.io-index" ++checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" ++ + [[package]] + name = "itertools" + version = "0.11.0" +@@ -1389,6 +1394,24 @@ dependencies = [ + "either", + ] +  ++[[package]] ++name = "itertools" ++version = "0.12.1" ++source = "registry+https://github.com/rust-lang/crates.io-index" ++checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" ++dependencies = [ ++ "either", ++] ++ ++[[package]] ++name = "itertools" ++version = "0.13.0" ++source = "registry+https://github.com/rust-lang/crates.io-index" ++checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" ++dependencies = [ ++ "either", ++] ++ + [[package]] + name = "itertools-num" + version = "0.1.3" +@@ -1400,24 +1423,24 @@ dependencies = [ +  + [[package]] + name = "itoa" +-version = "1.0.10" ++version = "1.0.11" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" ++checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +  + [[package]] + name = "jobserver" +-version = "0.1.28" ++version = "0.1.31" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" ++checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" + dependencies = [ + "libc", + ] +  + [[package]] + name = "js-sys" +-version = "0.3.68" ++version = "0.3.69" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" ++checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" + dependencies = [ + "wasm-bindgen", + ] +@@ -1442,18 +1465,18 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" +  + [[package]] + name = "libc" +-version = "0.2.153" ++version = "0.2.155" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" ++checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +  + [[package]] + name = "libloading" +-version = "0.8.1" ++version = "0.8.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" ++checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" + dependencies = [ + "cfg-if", +- "windows-sys 0.48.0", ++ "windows-targets 0.52.5", + ] +  + [[package]] +@@ -1464,9 +1487,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +  + [[package]] + name = "librocksdb-sys" +-version = "0.11.0+8.1.1" ++version = "0.16.0+8.10.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "d3386f101bcb4bd252d8e9d2fb41ec3b0862a15a62b478c355b2982efa469e3e" ++checksum = "ce3d60bc059831dc1c83903fb45c103f75db65c5a7bf22272764d9cc683e348c" + dependencies = [ + "bindgen", + "bzip2-sys", +@@ -1480,9 +1503,9 @@ dependencies = [ +  + [[package]] + name = "libz-sys" +-version = "1.1.15" ++version = "1.1.18" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" ++checksum = "c15da26e5af7e25c90b37a2d75cdbf940cf4a55316de9d84c679c9b8bfabf82e" + dependencies = [ + "cc", + "pkg-config", +@@ -1497,9 +1520,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +  + [[package]] + name = "linux-raw-sys" +-version = "0.4.13" ++version = "0.4.14" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" ++checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +  + [[package]] + name = "local-channel" +@@ -1520,9 +1543,9 @@ checksum = "4d873d7c67ce09b42110d801813efbc9364414e356be9935700d368351657487" +  + [[package]] + name = "lock_api" +-version = "0.4.11" ++version = "0.4.12" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" ++checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" + dependencies = [ + "autocfg", + "scopeguard", +@@ -1530,9 +1553,9 @@ dependencies = [ +  + [[package]] + name = "log" +-version = "0.4.20" ++version = "0.4.21" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" ++checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +  + [[package]] + name = "lz4-sys" +@@ -1565,9 +1588,9 @@ dependencies = [ +  + [[package]] + name = "memchr" +-version = "2.7.1" ++version = "2.7.2" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" ++checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +  + [[package]] + name = "mime" +@@ -1583,18 +1606,18 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +  + [[package]] + name = "miniz_oxide" +-version = "0.7.2" ++version = "0.7.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" ++checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" + dependencies = [ + "adler", + ] +  + [[package]] + name = "mio" +-version = "0.8.10" ++version = "0.8.11" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" ++checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" + dependencies = [ + "libc", + "log", +@@ -1602,12 +1625,6 @@ dependencies = [ + "windows-sys 0.48.0", + ] +  +-[[package]] +-name = "multimap" +-version = "0.8.3" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +- + [[package]] + name = "multimap" + version = "0.9.1" +@@ -1617,6 +1634,12 @@ dependencies = [ + "serde", + ] +  ++[[package]] ++name = "multimap" ++version = "0.10.0" ++source = "registry+https://github.com/rust-lang/crates.io-index" ++checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" ++ + [[package]] + name = "nalgebra" + version = "0.29.0" +@@ -1680,18 +1703,18 @@ dependencies = [ +  + [[package]] + name = "noodles-bed" +-version = "0.12.0" ++version = "0.13.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c116ff14e3dd3ffa6c58771d1a2ee1205690e1c0fe78b0a3456402eebf44ad27" ++checksum = "57cdbb2ee5fce84ca241d9bafba8554aadf2ccc534f7ebb71e1c646e259b187c" + dependencies = [ + "noodles-core", + ] +  + [[package]] + name = "noodles-bgzf" +-version = "0.26.0" ++version = "0.30.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "8970db2e84adb1007377dd3988258d7a64e3fc4c05602ebf94e1f8cba207c030" ++checksum = "13f54d4840fd26ed94103ded9524aa5fdd757255a556f24653d162c0a45c47e8" + dependencies = [ + "byteorder", + "bytes", +@@ -1701,15 +1724,18 @@ dependencies = [ +  + [[package]] + name = "noodles-core" +-version = "0.14.0" ++version = "0.15.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7336c3be652de4e05444c9b12a32331beb5ba3316e8872d92bfdd8ef3b06c282" ++checksum = "c5a8c6b020d1205abef2b0fab4463a6c5ecc3c8f4d561ca8b0d1a42323376200" ++dependencies = [ ++ "bstr", ++] +  + [[package]] + name = "noodles-csi" +-version = "0.30.0" ++version = "0.35.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "a60dfe0919f7ecbd081a82eb1d32e8f89f9041932d035fe8309073c8c01277bf" ++checksum = "beb1618ca2aa88662d387197a188686105d6b5e25f6959c52b766276cbfc4620" + dependencies = [ + "bit-vec", + "byteorder", +@@ -1720,9 +1746,9 @@ dependencies = [ +  + [[package]] + name = "noodles-gff" +-version = "0.27.0" ++version = "0.33.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "14f8ec87fe3630f57d6d8ea24cbc2cbd0bfed1fe66238bda7a7c3fb6a36d3713" ++checksum = "578b6efaf5c3f029720af4c8590e34fd38e214666971d665a52e3566830170d0" + dependencies = [ + "indexmap", + "noodles-bgzf", +@@ -1733,9 +1759,9 @@ dependencies = [ +  + [[package]] + name = "noodles-tabix" +-version = "0.36.0" ++version = "0.41.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "cc1ab29335a68d0c2bdf41460a67714ca69e23a1cbeb950ac5c38a9afa446a62" ++checksum = "0cc110f78cb406f69f42c482d1986526c590b7295f37f0e37f1fc380413400ef" + dependencies = [ + "bit-vec", + "byteorder", +@@ -1747,9 +1773,9 @@ dependencies = [ +  + [[package]] + name = "noodles-vcf" +-version = "0.49.0" ++version = "0.57.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "2e1f2fa749afaccadc596ec55ccb7bdcd8101fa79f8382384223c0dbae3e245b" ++checksum = "4df6e162ec2e898581b5ccf5e8972e376a7c41807061b66152280dea2c53a989" + dependencies = [ + "indexmap", + "memchr", +@@ -1772,9 +1798,9 @@ dependencies = [ +  + [[package]] + name = "num-complex" +-version = "0.4.5" ++version = "0.4.6" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" ++checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" + dependencies = [ + "num-traits", + ] +@@ -1796,20 +1822,19 @@ dependencies = [ +  + [[package]] + name = "num-rational" +-version = "0.4.1" ++version = "0.4.2" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" ++checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" + dependencies = [ +- "autocfg", + "num-integer", + "num-traits", + ] +  + [[package]] + name = "num-traits" +-version = "0.2.18" ++version = "0.2.19" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" ++checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" + dependencies = [ + "autocfg", + "libm", +@@ -1853,9 +1878,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +  + [[package]] + name = "parking_lot" +-version = "0.12.1" ++version = "0.12.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" ++checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" + dependencies = [ + "lock_api", + "parking_lot_core", +@@ -1863,22 +1888,22 @@ dependencies = [ +  + [[package]] + name = "parking_lot_core" +-version = "0.9.9" ++version = "0.9.10" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" ++checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" + dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", +- "windows-targets 0.48.5", ++ "windows-targets 0.52.5", + ] +  + [[package]] + name = "paste" +-version = "1.0.14" ++version = "1.0.15" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" ++checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +  + [[package]] + name = "pbjson" +@@ -1886,7 +1911,7 @@ version = "0.6.0" + source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "1030c719b0ec2a2d25a5df729d6cff1acf3cc230bf766f4f97833591f7577b90" + dependencies = [ +- "base64", ++ "base64 0.21.7", + "serde", + ] +  +@@ -1896,8 +1921,8 @@ version = "0.6.2" + source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "2580e33f2292d34be285c5bc3dba5259542b083cfad6037b6d70345f24dcb735" + dependencies = [ +- "heck", +- "itertools", ++ "heck 0.4.1", ++ "itertools 0.11.0", + "prost", + "prost-types", + ] +@@ -1917,12 +1942,6 @@ dependencies = [ + "serde", + ] +  +-[[package]] +-name = "peeking_take_while" +-version = "0.1.2" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" +- + [[package]] + name = "percent-encoding" + version = "2.3.1" +@@ -1931,9 +1950,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +  + [[package]] + name = "petgraph" +-version = "0.6.4" ++version = "0.6.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" ++checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" + dependencies = [ + "fixedbitset", + "indexmap", +@@ -1941,9 +1960,9 @@ dependencies = [ +  + [[package]] + name = "pin-project-lite" +-version = "0.2.13" ++version = "0.2.14" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" ++checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +  + [[package]] + name = "pin-utils" +@@ -1953,9 +1972,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +  + [[package]] + name = "pkg-config" +-version = "0.3.29" ++version = "0.3.30" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" ++checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +  + [[package]] + name = "portable-atomic" +@@ -1987,52 +2006,28 @@ dependencies = [ +  + [[package]] + name = "prettyplease" +-version = "0.2.16" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +-dependencies = [ +- "proc-macro2", +- "syn 2.0.48", +-] +- +-[[package]] +-name = "proc-macro-error" +-version = "1.0.4" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +-dependencies = [ +- "proc-macro-error-attr", +- "proc-macro2", +- "quote", +- "syn 1.0.109", +- "version_check", +-] +- +-[[package]] +-name = "proc-macro-error-attr" +-version = "1.0.4" ++version = "0.2.20" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" ++checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" + dependencies = [ + "proc-macro2", +- "quote", +- "version_check", ++ "syn 2.0.66", + ] +  + [[package]] + name = "proc-macro2" +-version = "1.0.78" ++version = "1.0.84" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" ++checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" + dependencies = [ + "unicode-ident", + ] +  + [[package]] + name = "prost" +-version = "0.12.3" ++version = "0.12.6" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a" ++checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" + dependencies = [ + "bytes", + "prost-derive", +@@ -2040,53 +2035,52 @@ dependencies = [ +  + [[package]] + name = "prost-build" +-version = "0.12.3" ++version = "0.12.6" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2" ++checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" + dependencies = [ + "bytes", +- "heck", +- "itertools", ++ "heck 0.5.0", ++ "itertools 0.12.1", + "log", +- "multimap 0.8.3", ++ "multimap 0.10.0", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", +- "syn 2.0.48", ++ "syn 2.0.66", + "tempfile", +- "which", + ] +  + [[package]] + name = "prost-derive" +-version = "0.12.3" ++version = "0.12.6" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" ++checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" + dependencies = [ + "anyhow", +- "itertools", ++ "itertools 0.12.1", + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] + name = "prost-types" +-version = "0.12.3" ++version = "0.12.6" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "193898f59edcf43c26227dcd4c8427f00d99d61e95dcde58dabd49fa291d470e" ++checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" + dependencies = [ + "prost", + ] +  + [[package]] + name = "quote" +-version = "1.0.35" ++version = "1.0.36" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" ++checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" + dependencies = [ + "proc-macro2", + ] +@@ -2139,9 +2133,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" +  + [[package]] + name = "rayon" +-version = "1.8.1" ++version = "1.10.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" ++checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" + dependencies = [ + "either", + "rayon-core", +@@ -2159,23 +2153,23 @@ dependencies = [ +  + [[package]] + name = "redox_syscall" +-version = "0.4.1" ++version = "0.5.1" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" ++checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" + dependencies = [ +- "bitflags 1.3.2", ++ "bitflags", + ] +  + [[package]] + name = "regex" +-version = "1.10.3" ++version = "1.10.4" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" ++checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" + dependencies = [ + "aho-corasick", + "memchr", +- "regex-automata 0.4.5", +- "regex-syntax 0.8.2", ++ "regex-automata 0.4.6", ++ "regex-syntax 0.8.3", + ] +  + [[package]] +@@ -2189,15 +2183,21 @@ dependencies = [ +  + [[package]] + name = "regex-automata" +-version = "0.4.5" ++version = "0.4.6" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" ++checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" + dependencies = [ + "aho-corasick", + "memchr", +- "regex-syntax 0.8.2", ++ "regex-syntax 0.8.3", + ] +  ++[[package]] ++name = "regex-lite" ++version = "0.1.5" ++source = "registry+https://github.com/rust-lang/crates.io-index" ++checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" ++ + [[package]] + name = "regex-syntax" + version = "0.6.29" +@@ -2206,21 +2206,21 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +  + [[package]] + name = "regex-syntax" +-version = "0.8.2" ++version = "0.8.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" ++checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +  + [[package]] + name = "relative-path" +-version = "1.9.2" ++version = "1.9.3" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e898588f33fdd5b9420719948f9f2a32c922a246964576f71ba7f24f80610fbc" ++checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" +  + [[package]] + name = "rocksdb" +-version = "0.21.0" ++version = "0.22.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe" ++checksum = "6bd13e55d6d7b8cd0ea569161127567cd587676c99f4472f779a0279aa60a7a7" + dependencies = [ + "libc", + "librocksdb-sys", +@@ -2228,9 +2228,9 @@ dependencies = [ +  + [[package]] + name = "rocksdb-utils-lookup" +-version = "0.3.0" ++version = "0.4.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "b83002edb508bf7fc5b234bde1489ccaea5bffbaeaf0aae00270257c858b5f9f" ++checksum = "cfdff1c5ed63d35ce75ffc688318b3b9791c5e16a0c4b1ed8d8400149384f7bf" + dependencies = [ + "rocksdb", + "thiserror", +@@ -2239,9 +2239,9 @@ dependencies = [ +  + [[package]] + name = "rstest" +-version = "0.18.2" ++version = "0.19.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "97eeab2f3c0a199bc4be135c36c924b6590b88c377d416494288c14f2db30199" ++checksum = "9d5316d2a1479eeef1ea21e7f9ddc67c191d497abc8fc3ba2467857abbb68330" + dependencies = [ + "futures", + "futures-timer", +@@ -2251,9 +2251,9 @@ dependencies = [ +  + [[package]] + name = "rstest_macros" +-version = "0.18.2" ++version = "0.19.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605" ++checksum = "04a9df72cc1f67020b0d63ad9bfe4a323e459ea7eb68e03bd9824db49f9a4c25" + dependencies = [ + "cfg-if", + "glob", +@@ -2262,21 +2262,21 @@ dependencies = [ + "regex", + "relative-path", + "rustc_version 0.4.0", +- "syn 2.0.48", ++ "syn 2.0.66", + "unicode-ident", + ] +  + [[package]] + name = "rustc-demangle" +-version = "0.1.23" ++version = "0.1.24" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" ++checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +  + [[package]] + name = "rustc-hash" +-version = "1.1.0" ++version = "1.2.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" ++checksum = "3450ed37fe9609abb6bc3b8891b6e078404e4c53c7332350e2e15126a95229bf" +  + [[package]] + name = "rustc_version" +@@ -2293,16 +2293,16 @@ version = "0.4.0" + source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" + dependencies = [ +- "semver 1.0.21", ++ "semver 1.0.23", + ] +  + [[package]] + name = "rustix" +-version = "0.38.31" ++version = "0.38.34" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" ++checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" + dependencies = [ +- "bitflags 2.4.2", ++ "bitflags", + "errno", + "libc", + "linux-raw-sys", +@@ -2311,15 +2311,15 @@ dependencies = [ +  + [[package]] + name = "rustversion" +-version = "1.0.14" ++version = "1.0.17" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" ++checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +  + [[package]] + name = "ryu" +-version = "1.0.16" ++version = "1.0.18" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" ++checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +  + [[package]] + name = "safe_arch" +@@ -2344,35 +2344,35 @@ checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" +  + [[package]] + name = "semver" +-version = "1.0.21" ++version = "1.0.23" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" ++checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +  + [[package]] + name = "serde" +-version = "1.0.197" ++version = "1.0.203" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" ++checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" + dependencies = [ + "serde_derive", + ] +  + [[package]] + name = "serde_derive" +-version = "1.0.197" ++version = "1.0.203" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" ++checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" + dependencies = [ + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] + name = "serde_json" +-version = "1.0.114" ++version = "1.0.117" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" ++checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" + dependencies = [ + "indexmap", + "itoa", +@@ -2394,15 +2394,16 @@ dependencies = [ +  + [[package]] + name = "serde_with" +-version = "3.6.0" ++version = "3.8.1" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "1b0ed1662c5a68664f45b76d18deb0e234aff37207086803165c961eb695e981" ++checksum = "0ad483d2ab0149d5a5ebcd9972a3852711e0153d863bf5a5d0391d28883c4a20" + dependencies = [ +- "base64", ++ "base64 0.22.1", + "chrono", + "hex", + "indexmap", + "serde", ++ "serde_derive", + "serde_json", + "serde_with_macros", + "time", +@@ -2410,14 +2411,14 @@ dependencies = [ +  + [[package]] + name = "serde_with_macros" +-version = "3.6.0" ++version = "3.8.1" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "568577ff0ef47b879f736cd66740e022f3672788cdf002a05a4e609ea5a6fb15" ++checksum = "65569b702f41443e8bc8bbb1c5779bd0450bbe723b56198980e80ec45780bce2" + dependencies = [ + "darling", + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] +@@ -2448,9 +2449,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +  + [[package]] + name = "signal-hook-registry" +-version = "1.4.1" ++version = "1.4.2" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" ++checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" + dependencies = [ + "libc", + ] +@@ -2470,9 +2471,9 @@ dependencies = [ +  + [[package]] + name = "similar" +-version = "2.4.0" ++version = "2.5.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "32fea41aca09ee824cc9724996433064c89f7777e60762749a4170a14abbfa21" ++checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640" +  + [[package]] + name = "slab" +@@ -2485,18 +2486,18 @@ dependencies = [ +  + [[package]] + name = "smallvec" +-version = "1.13.1" ++version = "1.13.2" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" ++checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +  + [[package]] + name = "socket2" +-version = "0.5.5" ++version = "0.5.7" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" ++checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" + dependencies = [ + "libc", +- "windows-sys 0.48.0", ++ "windows-sys 0.52.0", + ] +  + [[package]] +@@ -2514,15 +2515,9 @@ dependencies = [ +  + [[package]] + name = "strsim" +-version = "0.10.0" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +- +-[[package]] +-name = "strsim" +-version = "0.11.0" ++version = "0.11.1" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" ++checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +  + [[package]] + name = "strum" +@@ -2532,11 +2527,11 @@ checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +  + [[package]] + name = "strum" +-version = "0.26.1" ++version = "0.26.2" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f" ++checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" + dependencies = [ +- "strum_macros 0.26.1", ++ "strum_macros 0.26.2", + ] +  + [[package]] +@@ -2545,24 +2540,24 @@ version = "0.25.3" + source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" + dependencies = [ +- "heck", ++ "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] + name = "strum_macros" +-version = "0.26.1" ++version = "0.26.2" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18" ++checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" + dependencies = [ +- "heck", ++ "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] +@@ -2578,9 +2573,9 @@ dependencies = [ +  + [[package]] + name = "syn" +-version = "2.0.48" ++version = "2.0.66" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" ++checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" + dependencies = [ + "proc-macro2", + "quote", +@@ -2595,9 +2590,9 @@ checksum = "921f1e9c427802414907a48b21a6504ff6b3a15a1a3cf37e699590949ad9befc" +  + [[package]] + name = "tempfile" +-version = "3.10.0" ++version = "3.10.1" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" ++checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" + dependencies = [ + "cfg-if", + "fastrand", +@@ -2607,50 +2602,51 @@ dependencies = [ +  + [[package]] + name = "test-log" +-version = "0.2.15" ++version = "0.2.16" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7b319995299c65d522680decf80f2c108d85b861d81dfe340a10d16cee29d9e6" ++checksum = "3dffced63c2b5c7be278154d76b479f9f9920ed34e7574201407f0b14e2bbb93" + dependencies = [ + "env_logger", + "test-log-macros", ++ "tracing-subscriber", + ] +  + [[package]] + name = "test-log-macros" +-version = "0.2.15" ++version = "0.2.16" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c8f546451eaa38373f549093fe9fd05e7d2bade739e2ddf834b9968621d60107" ++checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" + dependencies = [ + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] + name = "thiserror" +-version = "1.0.57" ++version = "1.0.61" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" ++checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" + dependencies = [ + "thiserror-impl", + ] +  + [[package]] + name = "thiserror-impl" +-version = "1.0.57" ++version = "1.0.61" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" ++checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" + dependencies = [ + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] + name = "thread_local" +-version = "1.1.7" ++version = "1.1.8" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" ++checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" + dependencies = [ + "cfg-if", + "once_cell", +@@ -2658,9 +2654,9 @@ dependencies = [ +  + [[package]] + name = "time" +-version = "0.3.34" ++version = "0.3.36" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" ++checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" + dependencies = [ + "deranged", + "itoa", +@@ -2679,9 +2675,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" +  + [[package]] + name = "time-macros" +-version = "0.2.17" ++version = "0.2.18" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" ++checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" + dependencies = [ + "num-conv", + "time-core", +@@ -2704,9 +2700,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +  + [[package]] + name = "tokio" +-version = "1.36.0" ++version = "1.37.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" ++checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" + dependencies = [ + "backtrace", + "bytes", +@@ -2721,16 +2717,15 @@ dependencies = [ +  + [[package]] + name = "tokio-util" +-version = "0.7.10" ++version = "0.7.11" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" ++checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" + dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +- "tracing", + ] +  + [[package]] +@@ -2753,7 +2748,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" + dependencies = [ + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] +@@ -2824,6 +2819,12 @@ version = "0.4.0" + source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "22048bc95dfb2ffd05b1ff9a756290a009224b60b2f0e7525faeee7603851e63" +  ++[[package]] ++name = "typeid" ++version = "1.0.0" ++source = "registry+https://github.com/rust-lang/crates.io-index" ++checksum = "059d83cc991e7a42fc37bd50941885db0888e34209f8cfd9aab07ddec03bc9cf" ++ + [[package]] + name = "typenum" + version = "1.17.0" +@@ -2844,18 +2845,18 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +  + [[package]] + name = "unicode-normalization" +-version = "0.1.22" ++version = "0.1.23" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" ++checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" + dependencies = [ + "tinyvec", + ] +  + [[package]] + name = "unicode-width" +-version = "0.1.11" ++version = "0.1.12" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" ++checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" +  + [[package]] + name = "url" +@@ -2909,9 +2910,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +  + [[package]] + name = "wasm-bindgen" +-version = "0.2.91" ++version = "0.2.92" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" ++checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" + dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +@@ -2919,24 +2920,24 @@ dependencies = [ +  + [[package]] + name = "wasm-bindgen-backend" +-version = "0.2.91" ++version = "0.2.92" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" ++checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" + dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + "wasm-bindgen-shared", + ] +  + [[package]] + name = "wasm-bindgen-macro" +-version = "0.2.91" ++version = "0.2.92" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" ++checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" + dependencies = [ + "quote", + "wasm-bindgen-macro-support", +@@ -2944,40 +2945,28 @@ dependencies = [ +  + [[package]] + name = "wasm-bindgen-macro-support" +-version = "0.2.91" ++version = "0.2.92" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" ++checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" + dependencies = [ + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + "wasm-bindgen-backend", + "wasm-bindgen-shared", + ] +  + [[package]] + name = "wasm-bindgen-shared" +-version = "0.2.91" ++version = "0.2.92" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" +- +-[[package]] +-name = "which" +-version = "4.4.2" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +-dependencies = [ +- "either", +- "home", +- "once_cell", +- "rustix", +-] ++checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +  + [[package]] + name = "wide" +-version = "0.7.15" ++version = "0.7.21" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "89beec544f246e679fc25490e3f8e08003bc4bf612068f325120dad4cea02c1c" ++checksum = "cd8dc749a1b03f3c255a3064a4f5c0ee5ed09b7c6bc6d4525d31f779cd74d7fc" + dependencies = [ + "bytemuck", + "safe_arch", +@@ -3011,7 +3000,7 @@ version = "0.52.0" + source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" + dependencies = [ +- "windows-targets 0.52.0", ++ "windows-targets 0.52.5", + ] +  + [[package]] +@@ -3029,7 +3018,7 @@ version = "0.52.0" + source = "registry+https://github.com/rust-lang/crates.io-index" + checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" + dependencies = [ +- "windows-targets 0.52.0", ++ "windows-targets 0.52.5", + ] +  + [[package]] +@@ -3049,17 +3038,18 @@ dependencies = [ +  + [[package]] + name = "windows-targets" +-version = "0.52.0" ++version = "0.52.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" ++checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" + dependencies = [ +- "windows_aarch64_gnullvm 0.52.0", +- "windows_aarch64_msvc 0.52.0", +- "windows_i686_gnu 0.52.0", +- "windows_i686_msvc 0.52.0", +- "windows_x86_64_gnu 0.52.0", +- "windows_x86_64_gnullvm 0.52.0", +- "windows_x86_64_msvc 0.52.0", ++ "windows_aarch64_gnullvm 0.52.5", ++ "windows_aarch64_msvc 0.52.5", ++ "windows_i686_gnu 0.52.5", ++ "windows_i686_gnullvm", ++ "windows_i686_msvc 0.52.5", ++ "windows_x86_64_gnu 0.52.5", ++ "windows_x86_64_gnullvm 0.52.5", ++ "windows_x86_64_msvc 0.52.5", + ] +  + [[package]] +@@ -3070,9 +3060,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +  + [[package]] + name = "windows_aarch64_gnullvm" +-version = "0.52.0" ++version = "0.52.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" ++checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +  + [[package]] + name = "windows_aarch64_msvc" +@@ -3082,9 +3072,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +  + [[package]] + name = "windows_aarch64_msvc" +-version = "0.52.0" ++version = "0.52.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" ++checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +  + [[package]] + name = "windows_i686_gnu" +@@ -3094,9 +3084,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +  + [[package]] + name = "windows_i686_gnu" +-version = "0.52.0" ++version = "0.52.5" ++source = "registry+https://github.com/rust-lang/crates.io-index" ++checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" ++ ++[[package]] ++name = "windows_i686_gnullvm" ++version = "0.52.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" ++checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +  + [[package]] + name = "windows_i686_msvc" +@@ -3106,9 +3102,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +  + [[package]] + name = "windows_i686_msvc" +-version = "0.52.0" ++version = "0.52.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" ++checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +  + [[package]] + name = "windows_x86_64_gnu" +@@ -3118,9 +3114,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +  + [[package]] + name = "windows_x86_64_gnu" +-version = "0.52.0" ++version = "0.52.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" ++checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +  + [[package]] + name = "windows_x86_64_gnullvm" +@@ -3130,9 +3126,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +  + [[package]] + name = "windows_x86_64_gnullvm" +-version = "0.52.0" ++version = "0.52.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" ++checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +  + [[package]] + name = "windows_x86_64_msvc" +@@ -3142,18 +3138,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +  + [[package]] + name = "windows_x86_64_msvc" +-version = "0.52.0" +-source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +- +-[[package]] +-name = "yaml-rust" +-version = "0.4.5" ++version = "0.52.5" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +-dependencies = [ +- "linked-hash-map", +-] ++checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +  + [[package]] + name = "yansi" +@@ -3163,47 +3150,47 @@ checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +  + [[package]] + name = "zerocopy" +-version = "0.7.32" ++version = "0.7.34" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" ++checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" + dependencies = [ + "zerocopy-derive", + ] +  + [[package]] + name = "zerocopy-derive" +-version = "0.7.32" ++version = "0.7.34" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" ++checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" + dependencies = [ + "proc-macro2", + "quote", +- "syn 2.0.48", ++ "syn 2.0.66", + ] +  + [[package]] + name = "zstd" +-version = "0.13.0" ++version = "0.13.1" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" ++checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" + dependencies = [ + "zstd-safe", + ] +  + [[package]] + name = "zstd-safe" +-version = "7.0.0" ++version = "7.1.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" ++checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" + dependencies = [ + "zstd-sys", + ] +  + [[package]] + name = "zstd-sys" +-version = "2.0.9+zstd.1.5.5" ++version = "2.0.10+zstd.1.5.6" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" ++checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" + dependencies = [ + "cc", + "pkg-config", +diff --git a/Cargo.toml b/Cargo.toml +index 1a0e5df..902648c 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -1,11 +1,11 @@ + [package] + name = "annonars" +-version = "0.36.1" ++version = "0.36.2" + edition = "2021" + authors = ["Manuel Holtgrewe "] + description = "Rust template repository" + license = "Apache-2.0" +-homepage = "https://github.com/varfish-org/annonars" ++repository = "https://github.com/varfish-org/annonars" + readme = "README.md" + autobins = false + exclude = ["/tests"] +@@ -27,48 +27,49 @@ biocommons-bioutils = "0.1.0" + boolvec = "0.2" + byteorder = "1.4" + chrono = { version = "0.4", features = ["serde"] } +-clap-verbosity-flag = "2.1" + clap = { version = "4.5", features = ["derive", "env"] } ++clap-verbosity-flag = "2.2" + csv = "1.2" + enum-map = { version = "2.7.3", features = ["serde"] } + env_logger = "0.11" ++erased-serde = "0.4.2" + flate2 = "1.0" + indexmap = { version = "2.2", features = ["serde"] } + indicatif = { version = "0.17", features = ["rayon"] } ++itertools = "0.13.0" + log = "0.4" +-noodles-bed = "0.12" +-noodles-bgzf = "0.26" +-noodles-core = "0.14" +-noodles-csi = "0.30" +-noodles-tabix = "0.36" +-noodles-vcf = "0.49" ++noodles-bed = "0.13" ++noodles-bgzf = "0.30" ++noodles-core = "0.15" ++noodles-csi = "0.35" ++noodles-gff = "0.33.0" ++noodles-tabix = "0.41" ++noodles-vcf = "0.57" + pbjson = "0.6" + pbjson-types = "0.6" + prost = "0.12" + rayon = "1.8" +-rocksdb-utils-lookup = "0.3" +-rocksdb = { version = "0.21", features = ["multi-threaded-cf"] } +-serde_json = { version = "1.0", features=["preserve_order"] } ++rocksdb = { version = "0.22", features = ["multi-threaded-cf"] } ++rocksdb-utils-lookup = "0.4" ++rustc-hash = "1.1.0" + serde = { version = "1.0", features = ["derive"] } ++serde_json = { version = "1.0", features=["preserve_order"] } + serde_with = { version = "3.6", features=["alloc", "macros", "indexmap_2"], default-features = false } + strum = { version = "0.26", features = ["strum_macros", "derive"] } + thiserror = "1.0" + tracing = "0.1" + tracing-subscriber = "0.3" +-rustc-hash = "1.1.0" +-noodles-gff = "0.27.0" +-erased-serde = "0.4.2" +  + [build-dependencies] +-prost-build = "0.12" +-pbjson-build = "0.6" + anyhow = "1.0" ++pbjson-build = "0.6" ++prost-build = "0.12" +  + [dev-dependencies] + insta = { version = "1.36", features = ["yaml"] } + log = "0.4" + pretty_assertions = "1.4" +-rstest = "0.18.2" ++rstest = "0.19.0" + temp_testdir = "0.2" + test-log = "0.2" + tracing-subscriber = "0.3" +diff --git a/build.rs b/build.rs +index 1c30e60..8c12acf 100644 +--- a/build.rs ++++ b/build.rs +@@ -27,6 +27,11 @@ fn main() -> Result<(), anyhow::Error> { + "annonars/gnomad/vep_gnomad4.proto", + "annonars/helixmtdb/base.proto", + "annonars/regions/clingen.proto", ++ "clinvar_data/class_by_freq.proto", ++ "clinvar_data/clinvar_public.proto", ++ "clinvar_data/extracted_vars.proto", ++ "clinvar_data/gene_impact.proto", ++ "clinvar_data/phenotype_link.proto", + ] + .iter() + .map(|f| root.join(f)) +@@ -51,7 +56,7 @@ fn main() -> Result<(), anyhow::Error> { + let descriptor_set = std::fs::read(descriptor_path).unwrap(); + pbjson_build::Builder::new() + .register_descriptors(&descriptor_set)? +- .build(&[".annonars"])?; ++ .build(&[".annonars", ".clinvar_data"])?; +  + Ok(()) + } +diff --git a/protos/annonars/clinvar/per_gene.proto b/protos/annonars/clinvar/per_gene.proto +index 1f4457e..1469d44 100644 +--- a/protos/annonars/clinvar/per_gene.proto ++++ b/protos/annonars/clinvar/per_gene.proto +@@ -5,87 +5,16 @@ syntax = "proto3"; + package annonars.clinvar.per_gene; +  + import "annonars/clinvar/minimal.proto"; +- +-// Impact on gene in ClinVar. +-enum Impact { +- // unknown +- IMPACT_UNKNOWN = 0; +- // 3' UTR variant +- IMPACT_THREE_PRIME_UTR_VARIANT = 1; +- // 5' UTR variant +- IMPACT_FIVE_PRIME_UTR_VARIANT = 2; +- // downstream gene variant +- IMPACT_DOWNSTREAM_TRANSCRIPT_VARIANT = 3; +- // frameshift variant +- IMPACT_FRAMESHIFT_VARIANT = 4; +- // inframe indel +- IMPACT_INFRAME_INDEL = 5; +- // start lost +- IMPACT_START_LOST = 6; +- // intron variant +- IMPACT_INTRON_VARIANT = 7; +- // missense variant +- IMPACT_MISSENSE_VARIANT = 8; +- // non-coding transcript variant +- IMPACT_NON_CODING_TRANSCRIPT_VARIANT = 9; +- // stop gained +- IMPACT_STOP_GAINED = 10; +- // no sequence alteration +- IMPACT_NO_SEQUENCE_ALTERATION = 11; +- // splice acceptor variant +- IMPACT_SPLICE_ACCEPTOR_VARIANT = 12; +- // splice donor variant +- IMPACT_SPLICE_DONOR_VARIANT = 13; +- // stop lost +- IMPACT_STOP_LOST = 14; +- // synonymous variant +- IMPACT_SYNONYMOUS_VARIANT = 15; +- // upstream gene variant +- IMPACT_UPSTREAM_TRANSCRIPT_VARIANT = 16; +-} +- +-// Counts of variants by impact. +-message GeneImpactRecordCounts { +- // Impact +- Impact impact = 1; +- // Counts for ACMG1..5 +- repeated uint32 counts = 2; +-} +- +-// Coarsened clinical significance +-enum CoarseClinicalSignificance { +- // unknown +- COARSE_CLINICAL_SIGNIFICANCE_UNKNOWN = 0; +- // benign / likely benign +- COARSE_CLINICAL_SIGNIFICANCE_BENIGN = 1; +- // uncertain signifiance +- COARSE_CLINICAL_SIGNIFICANCE_UNCERTAIN = 2; +- // pathogenic / likely pathogenic +- COARSE_CLINICAL_SIGNIFICANCE_PATHOGENIC = 3; +-} +- +-// Counts per frequency and impact. +-message GeneFreqRecordCounts { +- // Frequency +- CoarseClinicalSignificance coarse_clinsig = 1; +- // Counts for ACMG1..5 +- repeated uint32 counts = 2; +-} +- +-// Variants for a given genome release / assembly. +-message GeneVariantsForRelease { +- // Genome release / assembly +- string genome_release = 1; +- // Variants +- repeated annonars.clinvar.minimal.Record variants = 2; +-} ++import "clinvar_data/class_by_freq.proto"; ++import "clinvar_data/extracted_vars.proto"; ++import "clinvar_data/gene_impact.proto"; +  + // ClinVar detailed information per gene. + message ClinvarPerGeneRecord { + // Counts of variants per impact +- repeated GeneImpactRecordCounts per_impact_counts = 1; ++ optional clinvar_data.gene_impact.GeneImpactCounts per_impact_counts = 1; + // Counts of variants per impact / frequency +- repeated GeneFreqRecordCounts per_freq_counts = 2; ++ optional clinvar_data.class_by_freq.GeneCoarseClinsigFrequencyCounts per_freq_counts = 2; + // Variants for the given gene. +- repeated GeneVariantsForRelease variants = 3; ++ repeated clinvar_data.extracted_vars.ExtractedVcvRecord variants = 3; + } +diff --git a/protos/clinvar_data/class_by_freq.proto b/protos/clinvar_data/class_by_freq.proto +new file mode 100644 +index 0000000..11ab76c +--- /dev/null ++++ b/protos/clinvar_data/class_by_freq.proto +@@ -0,0 +1,29 @@ ++// Protocol buffers for types for class-by-freq tool. ++ ++syntax = "proto3"; ++ ++package clinvar_data.class_by_freq; ++ ++// Enumeration for coarse-grain classification. ++enum CoarseClinicalSignificance { ++ // unspecified coarse clinical significance ++ COARSE_CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0; ++ // Corresponds to "benign". ++ COARSE_CLINICAL_SIGNIFICANCE_BENIGN = 1; ++ // Corresponds to "uncertain". ++ COARSE_CLINICAL_SIGNIFICANCE_UNCERTAIN = 2; ++ // Corresponds to "pathogenic". ++ COARSE_CLINICAL_SIGNIFICANCE_PATHOGENIC = 3; ++} ++ ++// Output record. ++message GeneCoarseClinsigFrequencyCounts { ++ // The gene HGNC ID. ++ string hgnc_id = 1; ++ // The counts for (likely) pathogenic. ++ repeated uint32 pathogenic_counts = 2; ++ // The counts for uncertain significance. ++ repeated uint32 uncertain_counts = 3; ++ // The counts for (likely) benign. ++ repeated uint32 benign_counts = 4; ++} +diff --git a/protos/clinvar_data/clinvar_public.proto b/protos/clinvar_data/clinvar_public.proto +new file mode 100644 +index 0000000..b6e8502 +--- /dev/null ++++ b/protos/clinvar_data/clinvar_public.proto +@@ -0,0 +1,2708 @@ ++// Protobuf file for representing the types from the ClinVar VCV XSD file. ++ ++syntax = "proto3"; ++ ++package clinvar_data.clinvar_public; ++ ++import "google/protobuf/timestamp.proto"; ++ ++// Enumeration describing connection between genes and variants. ++// ++// Corresponds to XSD type "GeneVariantRelationship". ++enum GeneVariantRelationship { ++ // unspecified gene variant ++ GENE_VARIANT_RELATIONSHIP_UNSPECIFIED = 0; ++ // corresponds to "variant within gene" ++ GENE_VARIANT_RELATIONSHIP_VARIANT_WITHIN_GENE = 1; ++ // corresponds to "gene overlapped by variant" and ++ // (legacy:) "genes overlapped by variant" ++ GENE_VARIANT_RELATIONSHIP_GENE_OVERLAPPED_BY_VARIANT = 2; ++ // corresponds to "variant near gene, upstream" and ++ // (legacy:) "near gene, upstream" ++ GENE_VARIANT_RELATIONSHIP_NEAR_GENE_UPSTREAM = 3; ++ // corresponds to "variant near gene, downstream" and ++ // (legacy:) "near gene, downstream" ++ GENE_VARIANT_RELATIONSHIP_NEAR_GENE_DOWNSTREAM = 4; ++ // corresponds to "asserted, but not computed" ++ GENE_VARIANT_RELATIONSHIP_ASSERTED_BUT_NOT_COMPUTED = 5; ++ // corresponds to "within multiple genes by overlap" ++ GENE_VARIANT_RELATIONSHIP_WITHIN_MULTIPLE_GENES_BY_OVERLAP = 6; ++ // corresponds to "within single gene" ++ GENE_VARIANT_RELATIONSHIP_WITHIN_SINGLE_GENE = 7; ++} ++ ++// Enumeration describing severity. ++// ++// Corresponds to XSD type "typeSeverity" ++enum Severity { ++ // unspecified severity ++ SEVERITY_UNSPECIFIED = 0; ++ // corresponds to "mild" ++ SEVERITY_MILD = 1; ++ // corresponds to "moderate" ++ SEVERITY_MODERATE = 2; ++ // corresponds to "sever" ++ SEVERITY_SEVERE = 3; ++} ++ ++// Enumeration describing status. ++// ++// Corresponds to `typeStatus` in XSD. ++enum Status { ++ // unspecified status ++ STATUS_UNSPECIFIED = 0; ++ // corresponds to "current" ++ STATUS_CURRENT = 1; ++ // corresponds to "completed and retired" ++ STATUS_COMPLETED_AND_RETIRED = 2; ++ // corresponds to "delete" ++ STATUS_DELETE = 3; ++ // corresponds to "in development" ++ STATUS_IN_DEVELOPMENT = 4; ++ // corresponds to "reclassified" ++ STATUS_RECLASSIFIED = 5; ++ // corresponds to "reject" ++ STATUS_REJECT = 6; ++ // corresponds to "secondary" ++ STATUS_SECONDARY = 7; ++ // corresponds to "suppressed" ++ STATUS_SUPPRESSED = 8; ++ // corresponds to "under review" ++ STATUS_UNDER_REVIEW = 9; ++} ++ ++// Enumeration describing submitter review status. ++// ++// Corresponds to `typeSubmitterReviewStatusValue` in XSD. ++enum SubmitterReviewStatus { ++ // unspecified status ++ SUBMITTER_REVIEW_STATUS_UNSPECIFIED = 0; ++ // corresponds to "no classification provided" ++ SUBMITTER_REVIEW_STATUS_NO_CLASSIFICATION_PROVIDED = 1; ++ // corresponds to "no assertion criteria provided" ++ SUBMITTER_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED = 2; ++ // corresponds to "criteria provided, single submitter" ++ SUBMITTER_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER = 3; ++ // corresponds to "reviewed by expert panel" ++ SUBMITTER_REVIEW_STATUS_REVIEWED_BY_EXPERT_PANEL = 4; ++ // corresponds to "practice guideline" ++ SUBMITTER_REVIEW_STATUS_PRACTICE_GUIDELINE = 5; ++ // corresponds to "flagged submission" ++ SUBMITTER_REVIEW_STATUS_FLAGGED_SUBMISSION = 6; ++ // corresponds to "criteria provided, multiple submitters, no conflicts" ++ SUBMITTER_REVIEW_STATUS_CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS = 7; ++ // corresponds to "criteria provided, conflicting classifications" ++ SUBMITTER_REVIEW_STATUS_CRITERIA_PROVIDED_CONFLICTING_CLASSIFICATIONS = 8; ++ // corresponds to "classified by single submitter" ++ SUBMITTER_REVIEW_STATUS_CLASSIFIED_BY_SINGLE_SUBMITTER = 9; ++ // corresponds to "reviewed by professional society" ++ SUBMITTER_REVIEW_STATUS_REVIEWED_BY_PROFESSIONAL_SOCIETY = 10; ++ // corresponds to "not classified by submitter" ++ SUBMITTER_REVIEW_STATUS_NOT_CLASSIFIED_BY_SUBMITTER = 11; ++ // corresponds to "classified by multiple submitters" ++ SUBMITTER_REVIEW_STATUS_CLASSIFIED_BY_MULTIPLE_SUBMITTERS = 12; ++} ++ ++// Enumeration describing zygosity. ++enum Zygosity { ++ // unspecified zygosity ++ ZYGOSITY_UNSPECIFIED = 0; ++ // corresponds to "Homozygote" ++ ZYGOSITY_HOMOZYGOTE = 1; ++ // corresponds to "SingleHeterozygote" ++ ZYGOSITY_SINGLE_HETEROZYGOTE = 2; ++ // corresponds to "CompoundHeterozygote" ++ ZYGOSITY_COMPOUND_HETEROZYGOTE = 3; ++ // corresponds to "Hemizygote" ++ ZYGOSITY_HEMIZYGOTE = 4; ++ // corresponds to "not provided" ++ ZYGOSITY_NOT_PROVIDED = 5; ++} ++ ++// Enumeration describing assertion type attributes. ++enum Assertion { ++ // unspecified assertion type attribute ++ ASSERTION_UNSPECIFIED = 0; ++ // corresponds to "variation to disease" ++ ASSERTION_VARIATION_TO_DISEASE = 1; ++ // corresponds to "variation to included disease" ++ ASSERTION_VARIATION_TO_INCLUDED_DISEASE = 2; ++ // corresponds to "variation in modifier gene to disease" ++ ASSERTION_VARIATION_IN_MODIFIER_GENE_TO_DISEASE = 3; ++ // corresponds to "confers sensitivity" ++ ASSERTION_CONFERS_SENSITIVITY = 4; ++ // corresponds to "confers resistance" ++ ASSERTION_CONFERS_RESISTANCE = 5; ++ // corresponds to "variant to named protein" ++ ASSERTION_VARIANT_TO_NAMED_PROTEIN = 6; ++} ++ ++// Enumeration describing aggregate germline review status value. ++enum AggregateGermlineReviewStatus { ++ // unspecified aggregate germline review status value ++ AGGREGATE_GERMLINE_REVIEW_STATUS_UNSPECIFIED = 0; ++ // corresponds to "no classification provided" ++ AGGREGATE_GERMLINE_REVIEW_STATUS_NO_CLASSIFICATION_PROVIDED = 1; ++ // corresponds to "no assertion criteria provided" ++ AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED = 2; ++ // corresponds to "criteria provided, single submitter" ++ AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER = 3; ++ // corresponds to "criteria provided, multiple submitters, no conflicts" ++ AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS = 4; ++ // corresponds to "criteria provided, conflicting classifications" ++ AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_CONFLICTING_CLASSIFICATIONS = 5; ++ // corresponds to "reviewed by expert panel" ++ AGGREGATE_GERMLINE_REVIEW_STATUS_REVIEWED_BY_EXPERT_PANEL = 6; ++ // corresponds to "practice guideline" ++ AGGREGATE_GERMLINE_REVIEW_STATUS_PRACTICE_GUIDELINE = 7; ++ // corresponds to "no classifications from unflagged records" ++ AGGREGATE_GERMLINE_REVIEW_STATUS_NO_CLASSIFICATIONS_FROM_UNFLAGGED_RECORDS = 8; ++ // corresponds to "no classification for the single variant" ++ AGGREGATE_GERMLINE_REVIEW_STATUS_NO_CLASSIFICATION_FOR_THE_SINGLE_VARIANT = 9; ++} ++ ++// Enumeration describing aggregate somatic clinical impact review status value. ++enum AggregateSomaticClinicalImpactReviewStatus { ++ // unspecified aggregate somatic clinical impact review status value ++ AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_UNSPECIFIED = 0; ++ // corresponds to "no classification provided" ++ AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_NO_CLASSIFICATION_PROVIDED = 1; ++ // corresponds to "no assertion criteria provided" ++ AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED = 2; ++ // corresponds to "criteria provided, single submitter" ++ AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER = 3; ++ // corresponds to "criteria provided, multiple submitters" ++ AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS = 4; ++ // corresponds to "reviewed by expert panel" ++ AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_REVIEWED_BY_EXPERT_PANEL = 5; ++ // corresponds to "practice guideline" ++ AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_PRACTICE_GUIDELINE = 6; ++ // corresponds to "no classifications from unflagged records" ++ AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_NO_CLASSIFICATIONS_FROM_UNFLAGGED_RECORDS = 7; ++ // corresponds to "no classification for the single variant" ++ AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_NO_CLASSIFICATION_FOR_THE_SINGLE_VARIANT = 8; ++} ++ ++// Enumeration describing aggregate oncogenicity review status value. ++enum AggregateOncogenicityReviewStatus { ++ // unspecified aggregate oncogenicity review status value ++ AGGREGATE_ONCOGENICITY_REVIEW_STATUS_UNSPECIFIED = 0; ++ // corresponds to "no classification provided" ++ AGGREGATE_ONCOGENICITY_REVIEW_STATUS_NO_CLASSIFICATION_PROVIDED = 1; ++ // corresponds to "no assertion criteria provided" ++ AGGREGATE_ONCOGENICITY_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED = 2; ++ // corresponds to "criteria provided, single submitter" ++ AGGREGATE_ONCOGENICITY_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER = 3; ++ // corresponds to "criteria provided, multiple submitters, no conflicts" ++ AGGREGATE_ONCOGENICITY_REVIEW_STATUS_CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS = 4; ++ // corresponds to "criteria provided, conflicting classifications" ++ AGGREGATE_ONCOGENICITY_REVIEW_STATUS_CRITERIA_PROVIDED_CONFLICTING_CLASSIFICATIONS = 5; ++ // corresponds to "reviewed by expert panel" ++ AGGREGATE_ONCOGENICITY_REVIEW_STATUS_REVIEWED_BY_EXPERT_PANEL = 6; ++ // corresponds to "practice guideline" ++ AGGREGATE_ONCOGENICITY_REVIEW_STATUS_PRACTICE_GUIDELINE = 7; ++ // corresponds to "no classifications from unflagged records" ++ AGGREGATE_ONCOGENICITY_REVIEW_STATUS_NO_CLASSIFICATIONS_FROM_UNFLAGGED_RECORDS = 8; ++ // corresponds to "no classification for the single variant" ++ AGGREGATE_ONCOGENICITY_REVIEW_STATUS_NO_CLASSIFICATION_FOR_THE_SINGLE_VARIANT = 9; ++} ++ ++// Enumeration describing origin. ++enum Origin { ++ // unspecified origin ++ ORIGIN_UNSPECIFIED = 0; ++ // corresponds to "germline" ++ ORIGIN_GERMLINE = 1; ++ // corresponds to "somatic" ++ ORIGIN_SOMATIC = 2; ++ // corresponds to "de novo" ++ ORIGIN_DE_NOVO = 3; ++ // corresponds to "not provided" ++ ORIGIN_NOT_PROVIDED = 4; ++ // corresponds to "inherited" ++ ORIGIN_INHERITED = 5; ++ // corresponds to "maternal" ++ ORIGIN_MATERNAL = 6; ++ // corresponds to "paternal" ++ ORIGIN_PATERNAL = 7; ++ // corresponds to "uniparental" ++ ORIGIN_UNIPARENTAL = 8; ++ // corresponds to "biparental" ++ ORIGIN_BIPARENTAL = 9; ++ // corresponds to "not-reported" ++ ORIGIN_NOT_REPORTED = 10; ++ // corresponds to "tested-inconclusive" ++ ORIGIN_TESTED_INCONCLUSIVE = 11; ++ // corresponds to "unknown" ++ ORIGIN_UNKNOWN = 12; ++ // corresponds to "not applicable" ++ ORIGIN_NOT_APPLICABLE = 13; ++ // corresponds to "experimentally generated" ++ ORIGIN_EXPERIMENTALLY_GENERATED = 14; ++} ++ ++// Enumeration describing chromosome. ++enum Chromosome { ++ // unspecified chromosome ++ CHROMOSOME_UNSPECIFIED = 0; ++ // corresponds to "1" ++ CHROMOSOME_1 = 1; ++ // corresponds to "2" ++ CHROMOSOME_2 = 2; ++ // corresponds to "3" ++ CHROMOSOME_3 = 3; ++ // corresponds to "4" ++ CHROMOSOME_4 = 4; ++ // corresponds to "5" ++ CHROMOSOME_5 = 5; ++ // corresponds to "6" ++ CHROMOSOME_6 = 6; ++ // corresponds to "7" ++ CHROMOSOME_7 = 7; ++ // corresponds to "8" ++ CHROMOSOME_8 = 8; ++ // corresponds to "9" ++ CHROMOSOME_9 = 9; ++ // corresponds to "10" ++ CHROMOSOME_10 = 10; ++ // corresponds to "11" ++ CHROMOSOME_11 = 11; ++ // corresponds to "12" ++ CHROMOSOME_12 = 12; ++ // corresponds to "13" ++ CHROMOSOME_13 = 13; ++ // corresponds to "14" ++ CHROMOSOME_14 = 14; ++ // corresponds to "15" ++ CHROMOSOME_15 = 15; ++ // corresponds to "16" ++ CHROMOSOME_16 = 16; ++ // corresponds to "17" ++ CHROMOSOME_17 = 17; ++ // corresponds to "18" ++ CHROMOSOME_18 = 18; ++ // corresponds to "19" ++ CHROMOSOME_19 = 19; ++ // corresponds to "20" ++ CHROMOSOME_20 = 20; ++ // corresponds to "21" ++ CHROMOSOME_21 = 21; ++ // corresponds to "22" ++ CHROMOSOME_22 = 22; ++ // corresponds to "X" ++ CHROMOSOME_X = 23; ++ // corresponds to "Y" ++ CHROMOSOME_Y = 24; ++ // corresponds to "MT" ++ CHROMOSOME_MT = 25; ++ // corresponds to "PAR" ++ CHROMOSOME_PAR = 26; ++ // corresponds to "Un" ++ CHROMOSOME_UN = 27; ++} ++ ++// Enumeration describing comment type. ++enum CommentType { ++ // unspecified comment type ++ COMMENT_TYPE_UNSPECIFIED = 0; ++ // corresponds to "public" ++ COMMENT_TYPE_PUBLIC = 1; ++ // corresponds to "ConvertedByNCBI" ++ COMMENT_TYPE_CONVERTED_BY_NCB = 2; ++ // corresponds to "MissingFromAssembly" ++ COMMENT_TYPE_MISSING_FROM_ASSEMBLY = 3; ++ // corresponds to "GenomicLocationNotEstablished" ++ COMMENT_TYPE_GENOMIC_LOCATION_NOT_ESTABLISHED = 4; ++ // corresponds to "LocationOnGenomeAndProductNotAligned" ++ COMMENT_TYPE_LOCATION_ON_GENOME_AND_PRODUCT_NOT_ALIGNED = 5; ++ // corresponds to "DeletionComment" ++ COMMENT_TYPE_DELETION_COMMENT = 6; ++ // corresponds to "MergeComment" ++ COMMENT_TYPE_MERGE_COMMENT = 7; ++ // corresponds to "AssemblySpecificAlleleDefinition" ++ COMMENT_TYPE_ASSEMBLY_SPECIFIC_ALLELE_DEFINITION = 8; ++ // corresponds to "AlignmentGapMakesAppearInconsistent" ++ COMMENT_TYPE_ALIGNMENT_GAP_MAKES_APPEAR_INCONSISTENT = 9; ++ // corresponds to "ExplanationOfClassification" ++ COMMENT_TYPE_EXPLANATION_OF_CLASSIFICATION = 10; ++ // corresponds to "FlaggedComment" ++ COMMENT_TYPE_FLAGGED_COMMENT = 11; ++} ++ ++// Enumeration describing nucleotide sequence. ++enum NucleotideSequence { ++ // unspecified nucleotide sequence ++ NUCLEOTIDE_SEQUENCE_UNSPECIFIED = 0; ++ // corresponds to "genomic, top-level" ++ NUCLEOTIDE_SEQUENCE_GENOMIC_TOP_LEVEL = 1; ++ // corresponds to "genomic, RefSeqGene" ++ NUCLEOTIDE_SEQUENCE_GENOMIC_REF_SEQ_GENE = 2; ++ // corresponds to "genomic" ++ NUCLEOTIDE_SEQUENCE_GENOMIC = 3; ++ // corresponds to "coding" ++ NUCLEOTIDE_SEQUENCE_CODING = 4; ++ // corresponds to "non-coding" ++ NUCLEOTIDE_SEQUENCE_NON_CODING = 5; ++ // corresponds to "protein" ++ NUCLEOTIDE_SEQUENCE_PROTEIN = 6; ++} ++ ++// Enumeration describing protein sequence. ++enum ProteinSequence { ++ // unspecified protein sequence ++ PROTEIN_SEQUENCE_UNSPECIFIED = 0; ++ // corresponds to "protein" ++ PROTEIN_SEQUENCE_PROTEIN = 1; ++} ++ ++// Enumeration describing phenotype set. ++enum PhenotypeSetType { ++ // unspecified phenotype set ++ PHENOTYPE_SET_TYPE_UNSPECIFIED = 0; ++ // corresponds to "Disease" ++ PHENOTYPE_SET_TYPE_DISEASE = 1; ++ // corresponds to "DrugResponse" ++ PHENOTYPE_SET_TYPE_DRUG_RESPONSE = 2; ++ // corresponds to "Finding" ++ PHENOTYPE_SET_TYPE_FINDING = 3; ++ // corresponds to "PhenotypeInstruction" ++ PHENOTYPE_SET_TYPE_PHENOTYPE_INSTRUCTION = 4; ++ // corresponds to "TraitChoice" ++ PHENOTYPE_SET_TYPE_TRAIT_CHOICE = 5; ++} ++ ++// Enumeration describing variation type. ++enum VariationType { ++ // unspecified variation type ++ VARIATION_TYPE_UNSPECIFIED = 0; ++ // corresponds to "Diplotype" ++ VARIATION_TYPE_DIPLOTYPE = 1; ++ // corresponds to "CompoundHeterozygote" ++ VARIATION_TYPE_COMPOUND_HETEROZYGOTE = 2; ++ // corresponds to "Distinct chromosomes" ++ VARIATION_TYPE_DISTINCT_CHROMOSOMES = 3; ++} ++ ++// Enumeration describing evidence type. ++// ++// Corresponds to "EvidenceType" in XSD. ++enum EvidenceType { ++ // unspecified evidence type ++ EVIDENCE_TYPE_UNSPECIFIED = 0; ++ // corresponds to "Genetic" ++ EVIDENCE_TYPE_GENETIC = 1; ++ // corresponds to "Experimental" ++ EVIDENCE_TYPE_EXPERIMENTAL = 2; ++ // corresponds to "Population" ++ EVIDENCE_TYPE_POPULATION = 3; ++ // corresponds to "Computational" ++ EVIDENCE_TYPE_COMPUTATIONAL = 4; ++} ++ ++// Enumeration describing method list. ++// ++// Corresponds to "MethodListType" in XSD. ++enum MethodListType { ++ // unspecified method ++ METHOD_LIST_TYPE_UNSPECIFIED = 0; ++ // corresponds to "literature only" ++ METHOD_LIST_TYPE_LITERATURE_ONLY = 1; ++ // corresponds to "reference population" ++ METHOD_LIST_TYPE_REFERENCE_POPULATION = 2; ++ // corresponds to "case-control" ++ METHOD_LIST_TYPE_CASE_CONTROL = 3; ++ // corresponds to "clinical testing" ++ METHOD_LIST_TYPE_CLINICAL_TESTING = 4; ++ // corresponds to "in vitro" ++ METHOD_LIST_TYPE_IN_VITRO = 5; ++ // corresponds to "in vivo" ++ METHOD_LIST_TYPE_IN_VIVO = 6; ++ // corresponds to "research" ++ METHOD_LIST_TYPE_RESEARCH = 7; ++ // corresponds to "curation" ++ METHOD_LIST_TYPE_CURATION = 8; ++ // corresponds to "not provided" ++ METHOD_LIST_TYPE_NOT_PROVIDED = 9; ++ // corresponds to "provider interpretation" ++ METHOD_LIST_TYPE_PROVIDER_INTERPRETATION = 10; ++ // corresponds to "phenotyping only" ++ METHOD_LIST_TYPE_PHENOTYPING_ONLY = 11; ++} ++ ++// Enumeration describing HGVS types. ++enum HgvsType { ++ // unspecified HGVS type ++ HGVS_TYPE_UNSPECIFIED = 0; ++ // corresponds to "coding" ++ HGVS_TYPE_CODING = 1; ++ // corresponds to "genomic" ++ HGVS_TYPE_GENOMIC = 2; ++ // corresponds to "genomic, top-level" ++ HGVS_TYPE_GENOMIC_TOP_LEVEL = 3; ++ // corresponds to "non-coding" ++ HGVS_TYPE_NON_CODING = 4; ++ // corresponds to "protein" ++ HGVS_TYPE_PROTEIN = 5; ++} ++ ++// Enumeration describing clinical features affected status. ++enum ClinicalFeaturesAffectedStatusType { ++ // unspecified status ++ CLINICAL_FEATURES_AFFECTED_STATUS_TYPE_UNSPECIFIED = 0; ++ // corresponds to "present" ++ CLINICAL_FEATURES_AFFECTED_STATUS_TYPE_PRESENT = 1; ++ // corresponds to "absent" ++ CLINICAL_FEATURES_AFFECTED_STATUS_TYPE_ABSENT = 2; ++ // corresponds to "not tested" ++ CLINICAL_FEATURES_AFFECTED_STATUS_TYPE_NOT_TESTED = 3; ++} ++ ++// Enumeration describing haplotype variation types. ++enum HaploVariationType { ++ // unspecified haplotype variation type ++ HAPLO_VARIATION_TYPE_UNSPECIFIED = 0; ++ // corresponds to "Haplotype" ++ HAPLO_VARIATION_TYPE_HAPLOTYPE = 1; ++ // corresponds to "Haplotype, single variant" ++ HAPLO_VARIATION_TYPE_HAPLOTYPE_SINGLE_VARIANT = 2; ++ // corresponds to "Variation" ++ HAPLO_VARIATION_TYPE_VARIATION = 3; ++ // corresponds to "Phase unknown" ++ HAPLO_VARIATION_TYPE_PHASE_UNKNOWN = 4; ++ // corresponds to "Haplotype defined by a single variant" ++ HAPLO_VARIATION_TYPE_HAPLOTYPE_DEFINED_BY_SINGLE_VARIANT = 5; ++} ++ ++// A structure to support reporting unformatted content, with type and ++// source specified. ++message Comment { ++ // The comment's value. ++ string value = 1; ++ // The optional comment data source. ++ optional string data_source = 2; ++ // The comment's type. ++ optional CommentType type = 3; ++} ++ ++// This structure is used to represent how an object described in the ++// submission relates to objects in other databases. ++message Xref { ++ // The name of the database. When there is an overlap with sequence ++ // databases, that name is used. ++ string db = 1; ++ // The identifier used by the database. Being exported as a string ++ // even though internally the database has rules for defining which datases use ++ // integer identifers. ++ string id = 2; ++ // Used to differentiate between different types of identifiers that ++ // a database may provide. ++ optional string type = 3; ++ // Optional URL to the database entry. ++ optional string url = 4; ++ // The status; defaults to "current". ++ optional Status status = 5; ++} ++ ++// Description of a citation. ++message Citation { ++ // Local ID with source. ++ message IdType { ++ // The citation's value. ++ string value = 1; ++ // If there is an identifier, what database provides it. ++ string source = 2; ++ } ++ ++ /* nested elements */ ++ ++ // Optional list of IDs. ++ repeated IdType ids = 1; ++ // Optional URL. ++ optional string url = 2; ++ // Optional citation text. ++ optional string citation_text = 3; ++ ++ /* attributes */ ++ ++ // This maintained distinct from publication types in PubMed and ++ // established by GTR curators. The default is 'general'. ++ optional string type = 4; ++ // Corresponds to the abbreviation reported by GTR. ++ optional string abbrev = 5; ++} ++ ++// The attribute is a general element to represent a defined set of data ++// qualified by an enumerated set of types. For each attribute element, the value will ++// be a character string and is optional. Source shall be used to store identifiers for ++// supplied data from source other than the submitter (e.g. SequenceOntology). The data ++// submitted where Type="variation" shall be validated against sequence_alternation in ++// Sequence Ontology http://www.sequenceontology.org/. This is to be a generic version ++// of AttributeType and should be used with extension when it is used to specify Type ++// and its enumerations. ++message BaseAttribute { ++ // The attribute's value; can be empty. ++ optional string value = 1; ++ // The optional integer value. ++ optional int64 integer_value = 2; ++ // The optional date value. ++ optional google.protobuf.Timestamp date_value = 3; ++} ++ ++// Description of a nucleotide sequence expression. ++// ++// Corresponds to `typeNucleotideSequenceExpression` ++message HgvsNucleotideExpression { ++ // The expression values. ++ string expression = 1; ++ // The type of the nucleotide sequence. ++ optional NucleotideSequence sequence_type = 2; ++ // Optional sequence accession version. ++ optional string sequence_accession_version = 3; ++ // Optional sequence accession. ++ optional string sequence_accession = 4; ++ // Optional sequence version. ++ optional int32 sequence_version = 5; ++ // Optional description of the change. ++ optional string change = 6; ++ // Optional assembly information. ++ optional string assembly = 7; ++ // Optional submission information. ++ optional string submitted = 8; ++ // Optional MANE Select flag. ++ optional bool mane_select = 9; ++ // Optional MANE Plus Clinical flag. ++ optional bool mane_plus_clinical = 10; ++} ++ ++ ++// Description of a protein sequence expression. ++// ++// Corresponds to `typeProteinSequenceExpression` in XSD. ++message HgvsProteinExpression { ++ // The expression values. ++ string expression = 1; ++ // Optional sequence accession version. ++ optional string sequence_accession_version = 2; ++ // Optional sequence accession. ++ optional string sequence_accession = 3; ++ // Optional sequence version. ++ optional int32 sequence_version = 4; ++ // Optional description of the change. ++ optional string change = 5; ++} ++ ++// A structure to represent an HGVS expression for a nucleotide sequence ++// variant, along with the predicted protein change and the predicted molecular ++// consequence. Also used to represent only the protein change if that is all that has ++// been reported. ++// ++// Corresponds to `typeHVSExpression` in XSD. ++message HgvsExpression { ++ /* nested elements */ ++ ++ // Optional nucleotide sequence expression. ++ optional HgvsNucleotideExpression nucleotide_expression = 1; ++ // Optional protein sequence expression. ++ optional HgvsProteinExpression protein_expression = 2; ++ // List of molecular consequences. ++ repeated Xref molecular_consequences = 3; ++ ++ /* attributes */ ++ ++ // Type of HGVS expression. ++ HgvsType type = 4; ++ // Optional assembly. ++ optional string assembly = 5; ++} ++ ++// // Phenotype list details. ++// message PhenotypeListDetailsType { ++// // Local type for phenotype. ++// message Phenotype { ++// message XRefList { ++// repeated Xref xrefs = 1; ++// } ++ ++// /* nested elements */ ++// repeated XRefList xref_list = 1; ++ ++// /* attributes */ ++ ++// // `name` attribute ++// string name = 2; ++// // `target_id` attribute ++// int64 target_id = 3; ++// // `AffectedStatus` attribute. ++// optional string affected_status = 4; ++// } ++ ++// /* nested elements */ ++ ++// // List phenotypes. ++// repeated Phenotype phenotypes = 1; ++// // List of phenotype details. ++// repeated PhenotypeDetails phenotype_details = 2; ++ ++// /* attributes */ ++ ++// // Type of the phenotype set. ++// PhenotypeSetType phenotype_set_type = 3; ++// } ++ ++// Description of a software. ++message Software { ++ // Name of the software. ++ string name = 1; ++ // Version of the software; optional. ++ optional string version = 2; ++ // Purpose of the software; optional. ++ optional string purpose = 3; ++} ++ ++// Description of the history of a record. ++// ++// Called ``typeDescriptionHistory`` in XSD. ++message DescriptionHistory { ++ // The pathogenicity description. ++ string description = 1; ++ // The date of the description. ++ optional google.protobuf.Timestamp dated = 2; ++} ++ ++// Entry in an element set. ++// ++// Called ``SetElementSetType`` in XSD. ++message GenericSetElement { ++ // The element's value. ++ string value = 1; ++ // The element's type. ++ string type = 2; ++ // List of citations. ++ repeated Citation citations = 3; ++ // List of xrefs. ++ repeated Xref xrefs = 4; ++ // List of comments. ++ repeated Comment comments = 5; ++} ++ ++// Common type for an entry in a set of attributes. ++// ++// Called ``typeAttributeSet`` in XSD. ++message AttributeSetElement { ++ // Extend the BaseAttribute with a `type` field. ++ message Attribute { ++ // The base value. ++ BaseAttribute base = 1; ++ // The type of the attribute. ++ string type = 2; ++ } ++ ++ // The attribute value. ++ Attribute attribute = 1; ++ // List of xrefs. ++ repeated Xref xrefs = 2; ++ // List of citations. ++ repeated Citation citations = 3; ++ // List of comments. ++ repeated Comment comments = 4; ++} ++ ++// Type to describe traits in various places. ++message Trait { ++ // Local type for trait relationship. ++ message TraitRelationship { ++ // Local enumeration for trait types. ++ // ++ // NB: only DrugResponseAndDisease is used in the XML. ++ enum Type { ++ // unspecified trait type ++ TYPE_UNSPECIFIED = 0; ++ // corresponds to "phenotype" ++ TYPE_PHENOTYPE = 1; ++ // corresponds to "Subphenotype" ++ TYPE_SUBPHENOTYPE = 2; ++ // corresponds to "DrugResponseAndDisease" ++ TYPE_DRUG_RESPONSE_AND_DISEASE = 3; ++ // corresponds to "co-occuring condition" ++ TYPE_CO_OCCURING_CONDITION = 4; ++ // corresponds to "Finding member" ++ TYPE_FINDING_MEMBER = 5; ++ } ++ ++ /* nested elements */ ++ ++ // names ++ // ++ // NB: in XSD this is explictely given as unbounded but XML always has ++ // one element ++ repeated GenericSetElement names = 1; ++ // symbols (NB: never occur in the XML) ++ repeated GenericSetElement symbols = 2; ++ // attributes (NB: never occur in the XML) ++ repeated AttributeSetElement attributes = 3; ++ // Citation list. ++ repeated Citation citations = 4; ++ // Xref list. ++ repeated Xref xrefs = 5; ++ // Comment list. ++ repeated Comment comments = 6; ++ // Sources ++ repeated string sources = 7; ++ ++ /* attributes */ ++ ++ // Trait type. ++ Type type = 8; ++ } ++ ++ // names ++ repeated GenericSetElement names = 1; ++ // symbols ++ repeated GenericSetElement symbols = 2; ++ // attributes ++ repeated AttributeSetElement attributes = 3; ++ // Trait relationships ++ repeated TraitRelationship trait_relationships = 4; ++ // Citation list. ++ repeated Citation citations = 5; ++ // Xref list. ++ repeated Xref xrefs = 6; ++ // Comment list. ++ repeated Comment comments = 7; ++ // Sources ++ repeated string sources = 8; ++} ++ ++// Describes an indication. ++// ++// NB: Called "IndicationType" in the XSD. ++message Indication { ++ // Enumeration for the indication type. ++ enum Type { ++ // unspecified indication type. ++ TYPE_UNSPECIFIED = 0; ++ // corresponds to "Indication" ++ TYPE_INDICATION = 1; ++ } ++ ++ /* nested elements */ ++ ++ // Represents the value for the test indication as a name of a trait. ++ repeated Trait traits = 1; ++ // List of names. ++ repeated GenericSetElement names = 2; ++ // List of attributes. ++ repeated AttributeSetElement attributes = 3; ++ // List of xrefs. ++ repeated Xref xrefs = 4; ++ // List of citations. ++ repeated Citation citations = 5; ++ // List of comments. ++ repeated Comment comments = 6; ++ ++ /* attributes */ ++ ++ // The type of indication. ++ Type type = 7; ++} ++ ++// A set of ``Trait`` objects. ++// ++// NB: Called "ClinAsserTraitSetType" in the XSD. ++message TraitSet { ++ // Local type. ++ enum Type { ++ // unspecified type ++ TYPE_UNSPECIFIED = 0; ++ // corresponds to "Disease" ++ TYPE_DISEASE = 1; ++ // corresponds to "DrugResponse" ++ TYPE_DRUG_RESPONSE = 2; ++ // corresponds to "Finding" ++ TYPE_FINDING = 3; ++ // corresponds to "PhenotypeInstruction" ++ TYPE_PHENOTYPE_INSTRUCTION = 4; ++ // corresponds to "TraitChoice" ++ TYPE_TRAIT_CHOICE = 5; ++ } ++ ++ /* nested elements */ ++ ++ // The traits. ++ repeated Trait traits = 1; ++ // The names. ++ repeated GenericSetElement names = 2; ++ // The symbols. ++ repeated GenericSetElement symbols = 3; ++ // The attributes. ++ repeated AttributeSetElement attributes = 4; ++ // List of xrefs. ++ repeated Xref xrefs = 5; ++ // List of citations. ++ repeated Citation citations = 6; ++ // List of comments. ++ repeated Comment comments = 7; ++ ++ /* attributes */ ++ ++ // The type. ++ Type type = 8; ++ // Date of last evaluation. ++ optional google.protobuf.Timestamp date_last_evaluated = 9; ++ // ID. ++ optional int64 id = 10; ++ // Whether contributes to aggregate classification. ++ optional bool contributes_to_aggregate_classification = 11; ++ // Lower level of evidence. ++ optional bool lower_level_of_evidence = 12; ++ // Explanation of or multiple conditions. ++ optional string multiple_condition_explanation = 13; ++} ++ ++// Aggregated germline classification info. ++// ++// Corresponds to ``typeAggregatedGermlineClassification`` in XSD. ++message AggregatedGermlineClassification { ++ /* nested elements */ ++ ++ // The aggregate review status based on all germline submissions ++ // for this record. ++ AggregateGermlineReviewStatus review_status = 1; ++ // We are not providing an enumeration for the values we report ++ // for germline classification within the xsd. Details are in ++ // https://github.com/ncbi/clinvar/ClassificationOnClinVar.md ++ // ++ optional string description = 2; ++ // Explanation is used only when the description is 'conflicting ++ // data from submitters' The element summarizes the conflict. ++ optional Comment explanation = 3; ++ // List of xrefs. ++ repeated Xref xrefs = 4; ++ // List of citations. ++ repeated Citation citations = 5; ++ // List of comments. ++ repeated Comment comments = 6; ++ ++ // History information. ++ repeated DescriptionHistory history_records = 7; ++ // List of conditions. ++ repeated TraitSet conditions = 8; ++ ++ /* attributes */ ++ ++ // Date of last evaluation. ++ optional google.protobuf.Timestamp date_last_evaluated = 9; ++ // Date of creation. ++ google.protobuf.Timestamp date_created = 10; ++ // Date of most recent submission. ++ google.protobuf.Timestamp most_recent_submission = 11; ++ // Number of submitters. ++ optional int32 number_of_submitters = 12; ++ // Number of submissions. ++ optional int32 number_of_submissions = 13; ++} ++ ++// Aggregated somatic clinical impact info. ++message AggregatedSomaticClinicalImpact { ++ /* nested elements */ ++ ++ // The aggregate review status based on all somatic clinical ++ // impact submissions for this record. ++ AggregateSomaticClinicalImpactReviewStatus review_status = 1; ++ // We are not providing an enumeration for the values we report ++ // for somatic clinical impact classification within the xsd. Details are in ++ // https://github.com/ncbi/clinvar/ClassificationOnClinVar.md ++ optional string description = 2; ++ // List of xrefs. ++ repeated Xref xrefs = 3; ++ // List of citations. ++ repeated Citation citations = 4; ++ // List of comments. ++ repeated Comment comments = 5; ++ ++ // History information. ++ repeated DescriptionHistory history_records = 6; ++ // List of conditions. ++ repeated TraitSet conditions = 7; ++ ++ /* attributes */ ++ ++ // Date of last evaluation. ++ optional google.protobuf.Timestamp date_last_evaluated = 8; ++ // Date of creation. ++ google.protobuf.Timestamp date_created = 9; ++ // Date of most recent submission. ++ google.protobuf.Timestamp most_recent_submission = 10; ++ // Number of submitters. ++ optional int32 number_of_submitters = 11; ++ // Number of submissions. ++ optional int32 number_of_submissions = 12; ++} ++ ++// Aggregated oncogenicity classification info. ++message AggregatedOncogenicityClassification { ++ /* nested elements */ ++ ++ // The aggregate review status based on all somatic clinical ++ // impact submissions for this record. ++ AggregateOncogenicityReviewStatus review_status = 1; ++ // We are not providing an enumeration for the values we report ++ // for somatic clinical impact classification within the xsd. Details are in ++ // https://github.com/ncbi/clinvar/ClassificationOnClinVar.md ++ optional string description = 2; ++ // List of xrefs. ++ repeated Xref xrefs = 3; ++ // List of citations. ++ repeated Citation citations = 4; ++ // List of comments. ++ repeated Comment comments = 5; ++ ++ // History information. ++ repeated DescriptionHistory history_records = 6; ++ // List of conditions. ++ repeated TraitSet conditions = 7; ++ ++ /* attributes */ ++ ++ // Date of last evaluation. ++ optional google.protobuf.Timestamp date_last_evaluated = 8; ++ // Date of creation. ++ google.protobuf.Timestamp date_created = 9; ++ // Date of most recent submission. ++ google.protobuf.Timestamp most_recent_submission = 10; ++ // Number of submitters. ++ optional int32 number_of_submitters = 11; ++ // Number of submissions. ++ optional int32 number_of_submissions = 12; ++} ++ ++// Used to bundle different types of Classifications (germline, ++// oncogenic, somatic clinical impact) ; Supports summary as ++// well as submission details. ++// ++// NB: called "typeAggregateClassificationSet" in XSD. ++message AggregateClassificationSet { ++ // The aggregate germline classification. ++ optional AggregatedGermlineClassification germline_classification = 1; ++ // The aggregate somatic clinical impact. ++ repeated AggregatedSomaticClinicalImpact somatic_clinical_impacts = 2; ++ // The aggregate oncogenicity classification. ++ optional AggregatedOncogenicityClassification oncogenicity_classification = 3; ++} ++ ++// Describes the clinical significance of a variant. ++// ++// Corresponds to `ClinicalSignificanceType` in XSD. ++message ClinicalSignificance { ++ /* contained elements */ ++ ++ // The optional review status. ++ optional SubmitterReviewStatus review_status = 1; ++ // Structure used to support old data of AlleleDescriptionSet ++ // within Co-occurenceSet. ++ // ++ // NB: unused in XML ++ optional string description = 2; ++ // Optional explanatory comment. ++ // ++ // Explanation is used only when the description is 'conflicting ++ // data from submitters' The element summarizes the conflict. ++ // ++ // NB: unused in XML ++ optional Comment explanation = 3; ++ // Optional list of xrefs. ++ repeated Xref xrefs = 4; ++ // Optional list of citations. ++ repeated Citation citations = 5; ++ // Optional list of comments. ++ repeated Comment comments = 6; ++ ++ /* attributes */ ++ ++ // Date of last evaluation. ++ // ++ // NB: unused in XML ++ optional google.protobuf.Timestamp date_last_evaluated = 7; ++} ++ ++// This is to be used within co-occurrence set. ++// ++// Corresponds to `typeAlleleDescr` in XSD. ++message AlleleDescription { ++ // Enumeration for relative orientation. ++ // ++ // NB: unused in XML ++ enum RelativeOrientation { ++ // unspecified relative orientation ++ RELATIVE_ORIENTATION_UNSPECIFIED = 0; ++ // corresponds to "cis" ++ RELATIVE_ORIENTATION_CIS = 1; ++ // corresponds to "trans" ++ RELATIVE_ORIENTATION_TRANS = 2; ++ // corresponds to "unknown" ++ RELATIVE_ORIENTATION_UNKNOWN = 3; ++ } ++ ++ // The name of the allele. ++ string name = 1; ++ // Optional relative orientation. ++ // ++ // NB: Unused in XML ++ optional RelativeOrientation relative_orientation = 2; ++ // Optional zygosity. ++ optional Zygosity zygosity = 3; ++ // Optional clinical significance. ++ // ++ // Corresponds to `ClinicalSignificanceType` in XSD. ++ optional ClinicalSignificance clinical_significance = 4; ++} ++ ++// A structure to support reporting of an accession, its version, the ++// date its status changed, and text describing that change. ++// ++// Corresponds to `typeRecordHistory` in XSD. ++message RecordHistory { ++ /* nested elements */ ++ ++ // Optional comment on the history record. ++ optional Comment comment = 1; ++ ++ /* attributes */ ++ ++ // The accession. ++ string accession = 2; ++ // The version. ++ int32 version = 3; ++ // The date the record. ++ google.protobuf.Timestamp date_changed = 4; ++ // Attribute @VaritionID is only populated for VCV, where @Accession ++ // is like VCV000000009 ++ optional int64 variation_id = 5; ++} ++ ++// Report classification of a variant for a SCV. ++// ++// Corresponds to `ClassificationTypeSCV` in XSD. ++message ClassificationScv { ++ // Clinical impact of a somatic variatn. ++ message SomaticClinicalImpact { ++ // The somatic clinical impact value. ++ string value = 1; ++ ++ /* attributes */ ++ ++ // Type of the clinical impact assertion. ++ optional string clinical_impact_assertion_type = 2; ++ // Clinical impact significance. ++ optional string clinical_impact_clinical_significance = 3; ++ // Name of the drug for the therapeutic assertion. ++ optional string drug_for_therapeutic_assertion = 4; ++ } ++ ++ // Classification score description. ++ message ClassificationScore { ++ // The score's value. ++ double value = 1; ++ // The score's type; optional. ++ optional string type = 2; ++ } ++ ++ /* nested elements */ ++ ++ // The field's review status. ++ SubmitterReviewStatus review_status = 1; ++ // The germline classification; mutually exlusive with `somatic_clinical_impact` ++ // and `oncogenicity_classification`. ++ optional string germline_classification = 2; ++ // Information on the clinical impact; mutually exlusive with `germline_classification` ++ // and `oncogenicity_classification`. ++ optional SomaticClinicalImpact somatic_clinical_impacts = 3; ++ // The oncogenicity classification; mutually exlusive with `germline_classification` ++ // and `oncogenicity_classification`. ++ optional string oncogenicity_classification = 4; ++ // Optional explanation of classification. ++ optional string explanation_of_classification = 5; ++ // List of classification scores. ++ repeated ClassificationScore classification_scores = 6; ++ // List of xrefs. ++ repeated Xref xrefs = 7; ++ // List of citations. ++ repeated Citation citations = 8; ++ // List of comments. ++ repeated Comment comments = 9; ++ ++ /* attributes */ ++ ++ // Date of last evaluation. ++ optional google.protobuf.Timestamp date_last_evaluated = 10; ++} ++ ++// Set of attributes for the primary submitter. Any addtional submitters ++// are captured in the AdditionalSubmitters element. ++message SubmitterIdentifiers { ++ // Name of submitter. ++ string submitter_name = 1; ++ // Organization ID. ++ int64 org_id = 2; ++ // Organization category. ++ string org_category = 3; ++ // Organization abbreviation; optional. ++ optional string org_abbreviation = 4; ++} ++ ++// Definition of a species. ++message Species { ++ // Name of the species. ++ string name = 1; ++ // Optional taxonomy ID. ++ optional int32 taxonomy_id = 2; ++} ++ ++// Interpreted condition for an RCV record. ++// ++// Corresponds to `typeRCVInterpretedCondition` in XSD. ++message ClassifiedCondition { ++ // Condition value. ++ string value = 1; ++ // Database name. ++ optional string db = 2; ++ // Identifier in database. ++ optional string id = 3; ++} ++ ++// Inside ClinicalAssertion, a structure to support reporting of an ++// accession, its version, the date its status changed, and text describing that ++// change. ++// ++// Corresponds to `typeClinicalAssertionRecordHistory` in XSD. ++message ClinicalAssertionRecordHistory { ++ // Optional comment. ++ optional Comment comment = 1; ++ // Accession. ++ string accession = 2; ++ // Optional version. ++ optional int32 version = 3; ++ // Date of the record. ++ google.protobuf.Timestamp date_changed = 4; ++} ++ ++// Description of a functional consequence. ++// ++// Corresponds to `typeFunctionalConsequence` in XSD. ++message FunctionalConsequence { ++ /* nested elements */ ++ ++ // List of xrefs. ++ repeated Xref xrefs = 1; ++ // List of citations. ++ repeated Citation citations = 2; ++ // List of comments. ++ repeated Comment comments = 3; ++ ++ /* attributes */ ++ ++ // Value of functional consequence. ++ string value = 4; ++} ++ ++// Type for the tag `GeneralCitations`. ++message GeneralCitations { ++ // List of xrefs. ++ repeated Xref xrefs = 1; ++ // List of citations. ++ repeated Citation citations = 2; ++} ++ ++// This refers to the zygosity of the variant being asserted. ++message Cooccurrence { ++ // Optional zygosity. ++ optional Zygosity zygosity = 1; ++ // The allele descriptions. ++ repeated AlleleDescription allele_descriptions = 2; ++ // The optional count. ++ optional int32 count = 3; ++} ++ ++// A structure to support reporting the name of a submitter, its ++// organization id, and its abbreviation and type. ++message Submitter { ++ // Enumeration of submitter kind. ++ enum Type { ++ // unspecified submitter type ++ TYPE_UNSPECIFIED = 0; ++ // corresponds to "primary" ++ TYPE_PRIMARY = 1; ++ // corresponds to "secondary" ++ TYPE_SECONDARY = 2; ++ // corresponds to "behalf" ++ TYPE_BEHALF = 3; ++ } ++ ++ // The submitter's identifier. ++ SubmitterIdentifiers submitter_identifiers = 1; ++ // The submitter type. ++ Type type = 2; ++} ++ ++ ++// Haploinsufficiency/Triplosensitivity of gene. ++message DosageSensitivity { ++ /* nested elements */ ++ ++ // Value. ++ string value = 1; ++ ++ /* attributes */ ++ ++ // Optional last evaluated date. ++ optional google.protobuf.Timestamp last_evaluated = 2; ++ // URL to ClinGen. ++ optional string clingen = 3; ++} ++ ++// A name with an optional type. ++// ++// Corresponds to `typeNames` in XSD. ++message OtherName { ++ // The name's value. ++ string value = 1; ++ // The name's type. ++ optional string type = 2; ++} ++ ++// A structure to support reporting of an accession, its version, the ++// date it was deleted and a free-text summary of why it was deleted. ++// ++// Corresponds to `typeDeletedSCV`. ++message DeletedScv { ++ // The accession. ++ string accession = 1; ++ // The version. ++ int32 version = 2; ++ // The date of deletion. ++ google.protobuf.Timestamp date_deleted = 3; ++} ++ ++// There can be multiple types of location, and the locations may have ++// identifiers in other databases. ++// ++// Corresponds to `typeLocation` in XSD. ++message Location { ++ // Local type for sequence location. ++ message SequenceLocation { ++ // Local enum for the assembly status. ++ enum AssemblyStatus { ++ // unspecified assembly status ++ ASSEMBLY_STATUS_UNSPECIFIED = 0; ++ // corresponds to "current" ++ ASSEMBLY_STATUS_CURRENT = 1; ++ // corresponds to "previous" ++ ASSEMBLY_STATUS_PREVIOUS = 2; ++ } ++ ++ // forDisplay value. ++ optional bool for_display = 1; ++ // Name of assembly. ++ string assembly = 2; ++ // Chromosomeof variant. ++ Chromosome chr = 3; ++ // Optional chromosome accession. ++ optional string accession = 4; ++ // Outer start position. ++ optional uint32 outer_start = 5; ++ // Inner start position. ++ optional uint32 inner_start = 6; ++ // Start position. ++ optional uint32 start = 7; ++ // Stop position. ++ optional uint32 stop = 8; ++ // Inner stop position. ++ optional uint32 inner_stop = 9; ++ // Outer stop position. ++ optional uint32 outer_stop = 10; ++ // Display start position. ++ optional uint32 display_start = 11; ++ // Display stop position. ++ optional uint32 display_stop = 12; ++ // Strand. ++ optional string strand = 13; ++ // Variant length. ++ optional uint32 variant_length = 14; ++ // Reference allele. ++ optional string reference_allele = 15; ++ // Alternate allele. ++ optional string alternate_allele = 16; ++ // Assembly accession version. ++ optional string assembly_accession_version = 17; ++ // Assembly status. ++ optional AssemblyStatus assembly_status = 18; ++ // Position in VCF. ++ optional uint32 position_vcf = 19; ++ // Reference allele in VCF. ++ optional string reference_allele_vcf = 20; ++ // Alternate allele in VCF. ++ optional string alternate_allele_vcf = 21; ++ // For display length. ++ optional uint32 for_display_length = 22; ++ } ++ ++ // Cytogenetic location is maintained independent of sequence ++ // location, and can be submitted or computed from the sequence location. ++ // ++ // Between 0 and 4 entries. ++ repeated string cytogenetic_locations = 1; ++ // Location on a defined sequence, with reference and alternate ++ // allele, and start /stop values depending on the specificity with which the ++ // variant location is known. The number system of offset 1, and ++ // right-justified to be consistent with HGVS location data. ++ repeated SequenceLocation sequence_locations = 2; ++ // The location of the variant relative to features within the gene. ++ repeated string gene_locations = 3; ++ // List of xrefs. ++ repeated Xref xrefs = 4; ++} ++ ++// Description of a SCV. ++// ++// Corresponds to "typeSCV" in XSD. ++message Scv { ++ // Optional title. ++ optional string title = 1; ++ // Accession. ++ string accession = 2; ++ // Version. ++ int32 version = 3; ++} ++ ++// // Description of a list of indications. ++// // ++// // Corresponds to `IndicationListType` in XSD. ++// message Indications { ++// // The indication may be a set of phenotypic descriptions. ++// message Indication { ++// // Xrefs. ++// repeated Xref xrefs = 1; ++// // Name of the indication. ++// string name = 2; ++// // Target ID. ++// int64 target_id = 3; ++// } ++ ++// // Type of the phenotype. ++// PhenotypeSetType phenotype_set_type = 1; ++// } ++ ++// // Description of a pharma. ++// message PharmaType { ++// // Local type for drug response. ++// message DrugResponse { ++// // // The phenotype details. ++// // repeated PhenotypeListDetailsType phenotype_details = 1; ++// // Xref list. ++// repeated Xref xrefs = 1; ++// // Name of drug. ++// string drug_name = 2; ++// // Target ID. ++// int64 target_id = 3; ++// } ++ ++// // Drug responses. ++// repeated DrugResponse drug_responses = 1; ++// } ++ ++// // Description of a single method. ++// // ++// // Corresponds to `typeMethodRefs` in XSD. ++// message MethodWithXrefs { ++// // Name of the method. ++// string method_name = 1; ++// // Xrefs. ++// repeated Xref xrefs = 2; ++// } ++ ++// Structure to describe attributes of any family data in an observation. ++// If the details of the number of families and the de-identified pedigree id are not ++// available, use FamilyHistory to describe what type of family data is available. Can ++// also be used to report 'Yes' or 'No' if there are no more details. ++// ++// Corresponds to "FamilyInfo" in XSD. ++message FamilyData { ++ /* nested elements */ ++ ++ // Optional family history. ++ optional string family_history = 1; ++ ++ /* attributes */ ++ ++ // Number of families. ++ optional int32 num_families = 2; ++ // Number of families with variant. ++ optional int32 num_families_with_variant = 3; ++ // Number of families with segregation observed. ++ optional int32 num_families_with_segregation_observed = 4; ++ // Pedigree ID. ++ optional string pedigree_id = 5; ++ // Segregation oberved (yes, no, number) ++ optional string segregation_observed = 6; ++} ++ ++// Description of a sample. ++// ++// Corresponds to `typeSample` in XSD. ++message Sample { ++ // Local type for sample description. ++ message SampleDescription { ++ // Description of sample. ++ optional Comment description = 1; ++ // Citation. ++ optional Citation citation = 2; ++ } ++ ++ // Local enumeration for presence in normal tissue. ++ enum SomaticVariantInNormalTissue { ++ // unspecified presence ++ SOMATIC_VARIANT_IN_NORMAL_TISSUE_UNSPECIFIED = 0; ++ // corresponds to "present" ++ SOMATIC_VARIANT_IN_NORMAL_TISSUE_PRESENT = 1; ++ // corresponds to "absent" ++ SOMATIC_VARIANT_IN_NORMAL_TISSUE_ABSENT = 2; ++ // corresponds to "not tested" ++ SOMATIC_VARIANT_IN_NORMAL_TISSUE_NOT_TESTED = 3; ++ } ++ ++ // Local enumeration for an age unit. ++ enum AgeUnit { ++ // unspecified age unit ++ AGE_UNIT_UNSPECIFIED = 0; ++ // corresponds to "days" ++ AGE_UNIT_DAYS = 1; ++ // corresponds to "weeks" ++ AGE_UNIT_WEEKS = 2; ++ // corresponds to "months" ++ AGE_UNIT_MONTHS = 3; ++ // corresponds to "years" ++ AGE_UNIT_YEARS = 4; ++ // corresponds to "weeks gestation" ++ AGE_UNIT_WEEKS_GESTATION = 5; ++ // corresponds to "months gestation" ++ AGE_UNIT_MONTHS_GESTATION = 6; ++ } ++ ++ // Local enumeration for an age type. ++ enum AgeType { ++ // unspecified age type ++ AGE_TYPE_UNSPECIFIED = 0; ++ // corresponds to "minimum" ++ AGE_TYPE_MINIMUM = 1; ++ // corresponds to "maximum" ++ AGE_TYPE_MAXIMUM = 2; ++ // corresponds to "single" ++ AGE_TYPE_SINGLE = 3; ++ } ++ ++ // Local enumeration for the affected status. ++ enum AffectedStatus { ++ // unspecified affected status ++ AFFECTED_STATUS_UNSPECIFIED = 0; ++ // corresponds to "yes" ++ AFFECTED_STATUS_YES = 1; ++ // corresponds to "no" ++ AFFECTED_STATUS_NO = 2; ++ // corresponds to "not provided" ++ AFFECTED_STATUS_NOT_PROVIDED = 3; ++ // corresponds to "unknown" ++ AFFECTED_STATUS_UNKNOWN = 4; ++ // corresponds to "not applicable" ++ AFFECTED_STATUS_NOT_APPLICABLE = 5; ++ } ++ ++ // Local type for an age. ++ message Age { ++ // The age value. ++ int32 value = 1; ++ // The age unit. ++ AgeUnit unit = 2; ++ // The age type. ++ AgeType type = 3; ++ } ++ ++ // Local enumeration for gender. ++ enum Gender { ++ // unspecified gender ++ GENDER_UNSPECIFIED = 0; ++ // corresponds to "male" ++ GENDER_MALE = 1; ++ // corresponds to "female" ++ GENDER_FEMALE = 2; ++ // corresponds to "mixed" ++ GENDER_MIXED = 3; ++ } ++ ++ // Local enumeration for SourceType. ++ enum SourceType { ++ // unspecified source type ++ SOURCE_TYPE_UNSPECIFIED = 0; ++ // corresponds to "submitter-generated" ++ SOURCE_TYPE_SUBMITTER_GENERATED = 1; ++ // corresponds to "data mining" ++ SOURCE_TYPE_DATA_MINING = 2; ++ } ++ ++ // The sample description. ++ optional SampleDescription sample_description = 1; ++ // The sample origin. ++ optional Origin origin = 2; ++ // Sample ethnicity. ++ optional string ethnicity = 3; ++ // Sample geographic origin. ++ optional string geographic_origin = 4; ++ // Sample tissue. ++ optional string tissue = 5; ++ // Presence of variant in normal tissue. ++ optional SomaticVariantInNormalTissue somatic_variant_in_normal_tissue = 6; ++ // Somatic variant allele fraction. ++ optional string somatic_variant_allele_fraction = 7; ++ // Cell line name. ++ optional string cell_line = 8; ++ // Species. ++ optional Species species = 9; ++ // Age (range), max. size of 2. ++ repeated Age ages = 10; ++ // Strain. ++ optional string strain = 11; ++ // Affected status. ++ optional AffectedStatus affected_status = 12; ++ // Denominator, total individuals included in this observation set. ++ optional int32 numer_tested = 13; ++ // Denominator, total males included in this observation set. ++ optional int32 number_males = 14; ++ // Denominator, total females included in this observation set. ++ optional int32 number_females = 15; ++ // Denominator, total number chromosomes tested. Number affected ++ // and unaffected are captured in the element NumberObserved. ++ optional int32 number_chr_tested = 16; ++ // Gender should be used ONLY if explicit values are not ++ // available for number of males or females, and there is a need to indicate ++ // that the genders in the sample are known. ++ optional Gender gender = 17; ++ // Family information. ++ optional FamilyData family_data = 18; ++ // Optional proband ID. ++ optional string proband = 19; ++ // Optional indication. ++ optional Indication indication = 20; ++ // List of citations. ++ repeated Citation citations = 21; ++ // List of xrefs. ++ repeated Xref xrefs = 22; ++ // List of comments. ++ repeated Comment comments = 23; ++ // Source type. ++ optional SourceType source_type = 24; ++} ++ ++// Details of a method used to generate variant calls or predict/report ++// functional consequence. The name of the platform should represent a sequencer or an ++// array, e.g. sequencing or array , e.g. capillary, 454, Helicos, Solexa, SOLiD. This ++// structure should also be used if the method is 'Curation'. ++// ++// Corresponds to `MethodType` in XSD. ++message Method { ++ // Local enumeration for result types. ++ enum ResultType { ++ // unspecified result type ++ RESULT_TYPE_UNSPECIFIED = 0; ++ // corresponds to "number of occurrences" ++ RESULT_TYPE_NUMBER_OF_OCCURRENCES = 1; ++ // corresponds to "p value" ++ RESULT_TYPE_P_VALUE = 2; ++ // corresponds to "odds ratio" ++ RESULT_TYPE_ODDS_RATIO = 3; ++ // corresponds to "variant call" ++ RESULT_TYPE_VARIANT_CALL = 4; ++ } ++ ++ // Local enumeration for SourceType. ++ enum SourceType { ++ // unspecified source type ++ SOURCE_TYPE_UNSPECIFIED = 0; ++ // corresponds to "submitter-generated" ++ SOURCE_TYPE_SUBMITTER_GENERATED = 1; ++ // corresponds to "data mining" ++ SOURCE_TYPE_DATA_MINING = 2; ++ // corresponds to "data review" ++ SOURCE_TYPE_DATA_REVIEW = 3; ++ } ++ ++ // Local type for method attribute. ++ message MethodAttribute { ++ // Local enumeration of attribute type. ++ enum AttributeType { ++ // unspecified attribute type ++ ATTRIBUTE_TYPE_UNSPECIFIED = 0; ++ // corresponds to "Location" ++ ATTRIBUTE_TYPE_LOCATION = 1; ++ // corresponds to "ControlsAppropriate" ++ ATTRIBUTE_TYPE_CONTROLS_APPROPRIATE = 2; ++ // corresponds to "MethodAppropriate" ++ ATTRIBUTE_TYPE_METHOD_APPROPRIATE = 3; ++ // corresponds to "TestName" ++ ATTRIBUTE_TYPE_TEST_NAME = 4; ++ // corresponds to "StructVarMethod" ++ ATTRIBUTE_TYPE_STRUCT_VAR_METHOD_TYPE = 5; ++ // corresponds to "ProbeAccession" ++ ATTRIBUTE_TYPE_PROBE_ACCESSION = 6; ++ } ++ ++ // The base value. ++ BaseAttribute base = 1; ++ // The attribute type. ++ AttributeType type = 2; ++ } ++ ++ // Local type for observation method attribute. ++ message ObsMethodAttribute { ++ // Local enumeration for attribute type. ++ enum AttributeType { ++ // unspecified attribute type ++ ATTRIBUTE_TYPE_UNSPECIFIED = 0; ++ // corresponds to "MethodResult" ++ ATTRIBUTE_TYPE_METHOD_RESULT = 1; ++ // corresponds to "TestingLaboratory" ++ ATTRIBUTE_TYPE_TESTING_LABORATORY = 2; ++ } ++ ++ // The base value. ++ BaseAttribute base = 1; ++ // The attribute type. ++ AttributeType type = 2; ++ // Optional comments. ++ repeated Comment comments = 3; ++ } ++ ++ // Platform name. ++ optional string name_platform = 1; ++ // Platform type. ++ optional string type_platform = 2; ++ // Method purpose. ++ optional string purpose= 3; ++ // Method result type. ++ optional ResultType result_type = 4; ++ // Smallest reported. ++ optional string min_reported = 5; ++ // Largest reported. ++ optional string max_reported = 6; ++ // Reference standard. ++ optional string reference_standard = 7; ++ // List of citations. ++ repeated Citation citations = 8; ++ // List of xrefs. ++ repeated Xref xrefs = 9; ++ // Free text to enrich the description of the method and to ++ // provide information not captured in specific fields. ++ optional string description = 10; ++ // List of softwares used. ++ repeated Software software = 11; ++ // Source type. ++ optional SourceType source_type = 12; ++ // Method type. ++ MethodListType method_type = 13; ++ // Method attribute. ++ repeated MethodAttribute method_attributes = 14; ++ // ObsMethodAttribute is used to indicate an attribute specific ++ // to a particular method in conjunction with a particular observation . ++ repeated ObsMethodAttribute obs_method_attributes = 15; ++} ++ ++// This is a record per variant (Measure/@ID,AlleleID) as submitted for ++// accessioning in an SCV. ++// ++// Corresponds to "typeAlleleSCV" in XSD. ++message AlleleScv { ++ // Local type for Gene. ++ message Gene { ++ /* nested elements */ ++ ++ // Gene name. ++ optional string name = 1; ++ // Used to set key words for retrieval or ++ // display about a gene, such as genes listed by the ++ // ACMG guidelines. ++ repeated string properties = 2; ++ // Used for gene specific identifiers ++ // such as MIM number, Gene ID, HGNC ID, etc. ++ repeated Xref xrefs = 3; ++ ++ /* attributes */ ++ ++ // Optional gene symbol. ++ optional string symbol = 4; ++ // Relationship between gene and variant. ++ optional GeneVariantRelationship relationship_type = 5; ++ } ++ ++ // Local type for MolecularConsequence. ++ message MolecularConsequence { ++ /* nested elements */ ++ ++ // Xref list. ++ repeated Xref xrefs = 1; ++ // Citation list. ++ repeated Citation citations = 2; ++ // Comment list. ++ repeated Comment comments = 3; ++ ++ /* attributes */ ++ ++ // RS id. ++ optional int64 rs = 4; ++ // Optional HGVS expression. ++ optional string hgvs = 5; ++ // Optional SO id. ++ optional string so_id = 6; ++ // Function. ++ string function = 7; ++ } ++ ++ /* nested elements */ ++ ++ // 0 to many genes (and related data ) related to the allele ++ // being reported. ++ repeated Gene genes = 1; ++ // Name provided by the submitter. ++ repeated OtherName names = 2; ++ // Variant type. ++ optional string variant_type = 3; ++ // Location. ++ optional Location location = 4; ++ // List of other names. ++ repeated OtherName other_names = 5; ++ // Single letter representation of the amino acid change and its ++ // location. ++ repeated string protein_changes = 6; ++ // List of xrefs. ++ repeated Xref xrefs = 7; ++ // List of citations. ++ repeated Citation citations = 8; ++ // List of comments. ++ repeated Comment comments = 9; ++ // Currently redundant with the MolecularConsequence element of ++ // the HGVS element? ++ repeated MolecularConsequence molecular_consequences = 10; ++ // Functional consequences. ++ repeated FunctionalConsequence functional_consequences = 11; ++ // Attributes. ++ repeated AttributeSetElement attributes = 12; ++ ++ /* attributes */ ++ ++ // Allele ID. ++ optional int64 allele_id = 13; ++} ++ ++// This is a record of a haplotype in SCV. ++// ++// Corresponds to `typeHaplotypeSCV` in XSD. ++message HaplotypeScv { ++ // The list of alleles in the haplotype. ++ repeated AlleleScv simple_alleles = 1; ++ // The preferred representation of the haplotype. ++ optional string name = 2; ++ // Names other than 'preferred' used for the haplotype. ++ repeated OtherName other_names = 3; ++ // Classification of the variant. ++ optional AggregateClassificationSet classification = 4; ++ // Functional consequences of the variant. ++ repeated FunctionalConsequence functional_consequences = 5; ++ // List of attributes. ++ repeated AttributeSetElement attributes = 6; ++ // List of citations. ++ repeated Citation citations = 7; ++ // List of cross-references. ++ repeated Xref xrefs = 8; ++ // List of comments. ++ repeated Comment comments = 9; ++ ++ // Variation ID. ++ optional int64 variation_id = 10; ++ // Number of copies. ++ optional int32 number_of_copies = 11; ++ // Number of chromosomes. ++ optional int32 number_of_chromosomes = 12; ++} ++ ++// Used to report genotypes, be they simple or complex diplotypes. ++// ++// Corresponds to "typeGenotypeSCV" in XSD. ++message GenotypeScv { ++ /* nested elements */ ++ ++ // Simple alleles; mutually exclusive with `haplotypes`. ++ repeated AlleleScv simple_alleles = 1; ++ // Haplotype; mutually exclusive with `simple_alleles`. ++ // ++ // Allows more than 2 haplotypes per genotype to support ++ // representation of ploidy. ++ repeated HaplotypeScv haplotypes = 2; ++ // Optional name. ++ optional string name = 3; ++ // Other names used for the genotype. ++ repeated OtherName other_names = 4; ++ // The variation type. ++ VariationType variation_type = 5; ++ // Functional consequences. ++ repeated FunctionalConsequence functional_consequences = 6; ++ // Attributes. ++ repeated AttributeSetElement attributes = 7; ++ // List of citations. ++ repeated Citation citations = 8; ++ // List of xrefs. ++ repeated Xref xrefs = 9; ++ // List of comments. ++ repeated Comment comments = 10; ++ ++ /* attributes */ ++ ++ // Variation ID. ++ optional int64 variation_id = 11; ++} ++ ++// // Observation of one piece of evidence. ++// message typeEvidenceObservation { ++// // // Method of data capture, not method of evaluation. ++// // repeated MethodWithXrefs methods = 1; ++// // Modes of inheritance. ++// repeated string modes_of_inheritance = 1; ++// // List of citations. ++// repeated Citation citations = 2; ++// // Comments. ++// repeated Comment comments = 3; ++// // // Observed phenotypes. ++// // repeated PhenotypeListDetailsType observed_phenotypes = 5; ++// // // Indications. ++// // repeated Indications indications = 6; ++// // The submitter's identifiers. ++// repeated Submitter submitter_identifiers = 4; ++// // Families. ++// optional string families = 5; ++// // Individuals. ++// optional string individuals = 6; ++// // Segregation. ++// optional string segregations = 7; ++// // Other gene. ++// optional string other_gene = 8; ++// // Same gene. ++// optional string same_gene = 9; ++// // Evident type. ++// EvidenceType evidence_type = 10; ++// // Allele frequency. ++// optional string allele_frequency = 11; ++// // Allele origin. ++// optional string allele_origin = 12; ++// // Ethnicity. ++// optional string ethnicity = 13; ++// // Geographic origin. ++// optional string geographic_origin = 14; ++// } ++ ++// Documents in what populations or samples an allele or genotype has ++// been observed relative to the described trait. Summary observations can be ++// registered per submitted assertion, grouped by common citation, study type, origin, ++// ethnicity, tissue, cell line, and species data. Not all options are valid per study ++// type, but these will not be validated in the xsd. ++// ++// Corresponds to `ObservationSet` in XSD. ++message ObservedIn { ++ // Local enum for the method type. ++ enum MethodType { ++ // unspecified method type ++ METHOD_TYPE_UNSPECIFIED = 0; ++ // corresponds to "literature only" ++ METHOD_TYPE_LITERATURE_ONLY = 1; ++ // corresponds to "reference population" ++ METHOD_TYPE_REFERENCE_POPULATION = 2; ++ // corresponds to "case-control" ++ METHOD_TYPE_CASE_CONTROL = 3; ++ // corresponds to "clinical testing" ++ METHOD_TYPE_CLINICAL_TESTING = 4; ++ // corresponds to "in vitro" ++ METHOD_TYPE_IN_VITRO = 5; ++ // corresponds to "in vivo" ++ METHOD_TYPE_IN_VIVO = 6; ++ // corresponds to "inferred from source" ++ METHOD_TYPE_INFERRED_FROM_SOURCE = 7; ++ // corresponds to "research" ++ METHOD_TYPE_RESEARCH = 8; ++ } ++ ++ // Local struct for attributes based on `BaseAttribute`. ++ message ObservedDataAttribute { ++ // Local enum for the observed data type. ++ enum Type { ++ // unspecified observed data attribute type ++ TYPE_UNSPECIFIED = 0; ++ // corresponds to "Description" ++ TYPE_DESCRIPTION = 1; ++ // corresponds to "VariantAlleles" ++ TYPE_VARIANT_ALLELES = 2; ++ // corresponds to "SubjectsWithVariant" ++ TYPE_SUBJECTS_WITH_VARIANT = 3; ++ // corresponds to "SubjectsWithDifferentCausativeVariant" ++ TYPE_SUBJECTS_WITH_DIFFERENT_CAUSATIVE_VARIANT = 4; ++ // corresponds to "VariantChromosomes" ++ TYPE_VARIANT_CHROMOSOMES = 5; ++ // corresponds to "IndependentObservations" ++ TYPE_INDEPENDENT_OBSERVATIONS = 6; ++ // corresponds to "SingleHeterozygote" ++ TYPE_SINGLE_HETEROZYGOUS = 7; ++ // corresponds to "CompoundHeterozygote" ++ TYPE_COMPOUND_HETEROZYGOUS = 8; ++ // corresponds to "Homozygote" ++ TYPE_HOMOZYGOUS = 9; ++ // corresponds to "Hemizygote" ++ TYPE_HEMIZYGOUS = 10; ++ // corresponds to "NumberMosaic" ++ TYPE_NUMBER_MOSAIC = 11; ++ // corresponds to "ObservedUnspecified" ++ TYPE_OBSERVED_UNSPECIFIED = 12; ++ // corresponds to "AlleleFrequency" ++ TYPE_ALLELE_FREQUENCY = 13; ++ // corresponds to "SecondaryFinding" ++ TYPE_SECONDARY_FINDING = 14; ++ // corresponds to "GenotypeAndMOIConsistent" ++ TYPE_GENOTYPE_AND_MOI_CONSISTENT = 15; ++ // corresponds to "UnaffectedFamilyMemberWithCausativeVariant" ++ TYPE_UNAFFECTED_FAMILY_MEMBER_WITH_CAUSATIVE_VARIANT = 16; ++ // corresponds to "HetParentTransmitNormalAllele" ++ TYPE_HET_PARENT_TRANSMIT_NORMAL_ALLELE = 17; ++ // corresponds to "CosegregatingFamilies" ++ TYPE_COSEGREGATING_FAMILIES = 18; ++ // corresponds to "InformativeMeioses" ++ TYPE_INFORMATIVE_MEIOSES = 19; ++ // corresponds to "SampleLocalID" ++ TYPE_SAMPLE_LOCAL_ID = 20; ++ // corresponds to "SampleVariantID" ++ TYPE_SAMPLE_VARIANT_ID = 21; ++ // corresponds to "FamilyHistory" ++ TYPE_FAMILY_HISTORY = 22; ++ // corresponds to "NumFamiliesWithVariant" ++ TYPE_NUM_FAMILIES_WITH_VARIANT = 23; ++ // corresponds to "NumFamiliesWithSegregationObserved" ++ TYPE_NUM_FAMILIES_WITH_SEGREGATION_OBSERVED = 24; ++ // corresponds to "SegregationObserved" ++ TYPE_SEGREGATION_OBSERVED = 25; ++ } ++ ++ // base ++ BaseAttribute base = 1; ++ // type ++ Type type = 2; ++ } ++ ++ // This is an AttributeSet, there will be 1 attribute supported ++ // by optional citations, xrefs and comment. There must be at least one ++ // ObservedData Set, but can be any number. For each ObservedData set the ++ // Attribute will be either decimal or string depending on type. The value will ++ // be stored here, but decimals will be entered to the database as a string. ++ message ObservedData { ++ // Attributes. ++ repeated ObservedDataAttribute attributes = 1; ++ // Severity. ++ optional Severity severity = 2; ++ // Citation list. ++ repeated Citation citations = 3; ++ // Xref list. ++ repeated Xref xrefs = 4; ++ // Comment list. ++ repeated Comment comments = 5; ++ } ++ ++ // Sample. ++ Sample sample = 1; ++ // Observed data. ++ repeated ObservedData observed_data = 2; ++ // Co-occurence set. ++ repeated Cooccurrence cooccurrence_sets = 3; ++ // TraitSet. ++ optional TraitSet trait_set = 4; ++ // Citation list. ++ repeated Citation citations = 5; ++ // Xref list. ++ repeated Xref xrefs = 6; ++ // Comment list. ++ repeated Comment comments = 7; ++} ++ ++// A clinical assertion as submitted (SCV record). ++// ++// Corresponds to `MeasureTraitType` in XSD and `` in XML ++message ClinicalAssertion { ++ // Local type for ClinVarSubmissionID. ++ message ClinvarSubmissionId { ++ // The identifier provided by the submitter to facilitate ++ // identification of records corresponding to their submissions. If not ++ // provided by a submitter, NCBI generates one. If provided by ++ // submitter, that is represented in localKeyIsSubmitted. ++ string local_key = 1; ++ // Optional title. ++ optional string title = 2; ++ // Optional indication whether local key has been submitted. ++ optional bool local_key_is_submitted = 3; ++ // Optional assembly of submission. ++ optional string submitted_assembly = 4; ++ } ++ ++ // Local type for attribute set. ++ message AttributeSetElement { ++ // Local enum for types. ++ enum Type { ++ // unspecified type ++ TYPE_UNSPECIFIED = 0; ++ // Corresponds to "ModeOfInheritance" ++ TYPE_MODE_OF_INHERITANCE = 1; ++ // Corresponds to "Penetrance" ++ TYPE_PENETRANCE = 2; ++ // Corresponds to "AgeOfOnset" ++ TYPE_AGE_OF_ONSET = 3; ++ // Corresponds to "Severity" ++ TYPE_SEVERITY = 4; ++ // Corresponds to "ClassificationHistory" ++ TYPE_CLASSIFICATION_HISTORY = 5; ++ // Corresponds to "SeverityDescription" ++ TYPE_SEVERITY_DESCRIPTION = 6; ++ // Corresponds to "AssertionMethod" ++ TYPE_ASSERTION_METHOD = 7; ++ } ++ ++ // The base value. ++ BaseAttribute attribute = 1; ++ // The type of the attribute. ++ Type type = 2; ++ // List of xrefs. ++ repeated Xref xrefs = 3; ++ // List of citations. ++ repeated Citation citations = 4; ++ // List of comments. ++ repeated Comment comments = 5; ++ } ++ ++ // Local type for `ClinVarAccession` ++ message ClinvarAccession { ++ // Accession. ++ string accession = 1; ++ // Version. ++ int32 version = 2; ++ // The submitter's identifier. ++ SubmitterIdentifiers submitter_identifiers = 3; ++ // The date that the latest update to the submitted ++ // record (SCV) became public in ClinVar. ++ optional google.protobuf.Timestamp date_updated = 4; ++ // DateCreated is the date when the record first became ++ // public in ClinVar. ++ optional google.protobuf.Timestamp date_created = 5; ++ } ++ ++ // Local enum for record status. ++ enum RecordStatus { ++ // unspecified record status ++ RECORD_STATUS_UNSPECIFIED = 0; ++ // corresponds to "current" ++ RECORD_STATUS_CURRENT = 1; ++ // corresponds to "replaced" ++ RECORD_STATUS_REPLACED = 2; ++ // corresponds to "removed" ++ RECORD_STATUS_REMOVED = 3; ++ } ++ ++ /* nested elements */ ++ ++ // The ClinVar submission ID. ++ ClinvarSubmissionId clinvar_submission_id = 1; ++ // The ClinVar SCV accessions. ++ ClinvarAccession clinvar_accession = 2; ++ // Optional list of additional submitters. ++ repeated Submitter additional_submitters = 3; ++ // Record status. ++ RecordStatus record_status = 4; ++ // Replaces; mutually exclusive with replaceds ++ repeated string replaces = 5; ++ // Replaced list; mutually exclusive with replaces ++ repeated ClinicalAssertionRecordHistory replaceds = 6; ++ ++ // SCV classifications. ++ repeated ClassificationScv classifications = 7; ++ // The assertion. ++ Assertion assertion = 8; ++ // Attributes. ++ repeated AttributeSetElement attributes = 9; ++ // Observed in. ++ repeated ObservedIn observed_ins = 10; ++ ++ // Allele in SCV; mutually exclusive with haplotype/genotype. ++ optional AlleleScv simple_allele = 11; ++ // Haplotype in SCV; mutually exclusive with allele/genotype. ++ optional HaplotypeScv haplotype = 12; ++ // Genotype in SCV; mutually exclusive with allele/haplotype. ++ optional GenotypeScv genotype = 13; ++ ++ // The trait set. ++ TraitSet trait_set = 14; ++ // List of citations. ++ repeated Citation citations = 15; ++ // Optional study name. ++ optional string study_name = 16; ++ // Optional study description. ++ optional string study_description = 17; ++ // List of comments. ++ repeated Comment comments = 18; ++ // List of submissions. ++ repeated string submission_names = 19; ++ ++ /* attributes */ ++ ++ // Date of creation. ++ google.protobuf.Timestamp date_created = 20; ++ // Date of creation. ++ google.protobuf.Timestamp date_last_updated = 21; ++ // Date of creation. ++ google.protobuf.Timestamp submission_date = 22; ++ // ID. ++ optional uint64 id = 23; ++ // Whether it is an FDA recognized database. ++ optional bool fda_recognized_database = 24; ++} ++ ++// This is a record per variant (Measure/@ID,AlleleID). ++// ++// Corresponds to "typeAllele" in XSD. ++message Allele { ++ // Local type for Gene. ++ message Gene { ++ /* nested elements */ ++ ++ // Gene's locations. ++ repeated Location locations = 1; ++ // OMIM ID. ++ repeated uint64 omims = 2; ++ // Haploinsuffiency. ++ optional DosageSensitivity haploinsufficiency = 3; ++ // Triplosensitivity. ++ optional DosageSensitivity triplosensitivity = 4; ++ // Used to set key words for retrieval or ++ // display about a gene, such as genes listed by the ++ // ACMG guidelines. ++ repeated string properties = 5; ++ ++ /* attributes */ ++ ++ // Optional gene symbol. ++ optional string symbol = 6; ++ // Full gene name. ++ string full_name = 7; ++ // Gene ID. ++ int64 gene_id = 8; ++ // Optional HGNC ID. ++ optional string hgnc_id = 9; ++ // Source of gene (calculated or submitted). ++ string source = 10; ++ // Relationship between gene and variant. ++ optional GeneVariantRelationship relationship_type = 11; ++ } ++ ++ // Local type for allele frequency. ++ message AlleleFrequency { ++ // Value. ++ double value = 1; ++ // Source. ++ string source = 2; ++ // URL. ++ optional string url = 3; ++ } ++ ++ // Local type for GlobalMinorAlleleFrequency. ++ message GlobalMinorAlleleFrequency { ++ // Value. ++ double value = 1; ++ // Source. ++ string source = 2; ++ // Minor allele. ++ optional string minor_allele = 3; ++ // URL. ++ optional string url = 4; ++ } ++ ++ // Local type for allele name. ++ message Name { ++ // The name's value. ++ string value = 1; ++ // The name's type. ++ optional string type = 2; ++ } ++ ++ /* nested elements */ ++ ++ // Gene list. ++ repeated Gene genes = 1; ++ // Name. ++ string name = 2; ++ // Canonical SPDI. ++ optional string canonical_spdi = 3; ++ // Variant type(s). ++ repeated string variant_types = 4; ++ // Location. ++ repeated Location locations = 5; ++ // List of other names. ++ repeated OtherName other_names = 6; ++ // These are the single-letter representations of the protein change. ++ repeated string protein_changes = 7; ++ // List of HGVS expressions. ++ repeated HgvsExpression hgvs_expressions = 8; ++ // Aggregated classifications. ++ optional AggregateClassificationSet classifications = 9; ++ // List of xrefs. ++ repeated Xref xrefs = 10; ++ // List of comments. ++ repeated Comment comments = 11; ++ // List of functional consequences. ++ repeated FunctionalConsequence functional_consequences = 12; ++ // Allele frequencies. ++ repeated AlleleFrequency allele_frequencies = 13; ++ // Global minor allele frequencies. ++ optional GlobalMinorAlleleFrequency global_minor_allele_frequency = 14; ++ ++ /* attributes */ ++ ++ // Allele ID. ++ int64 allele_id = 15; ++ // Variation ID. ++ int64 variation_id = 16; ++} ++ ++// This is a record of one or more simple alleles on the same chromosome ++// molecule. ++// ++// Corresponds to `typeHaplotype` in XSD ++message Haplotype { ++ // The list of alleles in the haplotype. ++ repeated Allele simple_alleles = 1; ++ // The preferred representation of the haplotype. ++ string name = 2; ++ // The type of the haplotype. ++ HaploVariationType variation_type = 3; ++ // Names other than 'preferred' used for the haplotype. ++ repeated OtherName other_names = 4; ++ // List of all the HGVS expressions valid for, or used to submit, ++ // a variant. ++ repeated HgvsExpression hgvs_expressions = 5; ++ // Classifications of the variant. ++ optional AggregateClassificationSet classifications = 6; ++ // Functional consequences of the variant. ++ repeated FunctionalConsequence functional_consequences = 7; ++ // List of cross-references. ++ repeated Xref xrefs = 8; ++ // List of comments. ++ repeated Comment comments = 9; ++ ++ // Variation ID. ++ int64 variation_id = 10; ++ // Number of copies. ++ optional int32 number_of_copies = 11; ++ // Number of chromosomes. ++ optional int32 number_of_chromosomes = 12; ++} ++ ++// This element is used for alleles that were not directly part of a ++// submission but were part of a complex submission. They have no direct submitted ++// classification, but are being reported for a complete representation of all alleles ++// in ClinVar. Compare to ClassifiedRecord. ++message IncludedRecord { ++ // Local type for tag `ClassifiedVariation`. ++ message ClassifiedVariation { ++ // Variation ID. ++ int64 variation_id = 1; ++ // Optional accession. ++ optional string accession = 2; ++ // Version. ++ int32 version = 3; ++ } ++ ++ // Simple allele; mutually exclusive with haplotype. ++ optional Allele simple_allele = 1; ++ // Haplotype; mutually exclusive with simple_allele. ++ optional Haplotype haplotype = 2; ++ // Aggregate classification sets. ++ optional AggregateClassificationSet classifications = 3; ++ // List of submitted records. ++ repeated Scv submitted_classifications = 4; ++ // Maintains the list of classified variants represented in ++ // this submission, although not submitted with an Classification ++ // independently. ++ repeated ClassifiedVariation classified_variations = 5; ++ // List of general citations. ++ repeated GeneralCitations general_citations = 6; ++} ++ ++// Used to report genotypes, be they simple or complex diplotypes. ++// ++// Corresponds to "typeGenotype" in XSD. ++message Genotype { ++ /* nested elements */ ++ ++ // Simple allele; mutually exclusive with `haplotype`. ++ repeated Allele simple_alleles = 1; ++ // Haplotype; mutually exclusive with `simple_allele`. ++ // ++ // Allows more than 2 haplotypes per genotype to support ++ // representation of ploidy. ++ repeated Haplotype haplotypes = 2; ++ // Optional name. ++ string name = 3; ++ // The variation type. ++ VariationType variation_type = 4; ++ // Names other than 'preferred' used for the Genotype. ++ repeated OtherName other_names = 5; ++ // HGVS descriptions. ++ repeated HgvsExpression hgvs_expressions = 6; ++ // Functional consequences. ++ repeated FunctionalConsequence functional_consequences = 7; ++ // Aggregated classifications. ++ optional AggregateClassificationSet classifications = 8; ++ // List of xrefs. ++ repeated Xref xrefs = 9; ++ // List of citations. ++ repeated Citation citations = 10; ++ // List of comments. ++ repeated Comment comments = 11; ++ // Attributes. ++ repeated AttributeSetElement attributes = 12; ++ ++ /* attributes */ ++ ++ // Variation ID. ++ optional int64 variation_id = 13; ++} ++ ++// Corresponds to "typeRCV" in XSD. ++message RcvAccession { ++ // Local type for ClassifiedConditionList. ++ message ClassifiedConditionList { ++ /* nested elements */ ++ ++ // List of interpreted conditions. ++ repeated ClassifiedCondition classified_conditions = 1; ++ ++ /* attributes */ ++ ++ // Trait set ID. ++ optional int64 trait_set_id = 2; ++ } ++ ++ // Local type for GermlineClassification. ++ // ++ // The aggregate review status based on ++ // all germline submissions for this record. ++ message GermlineClassification { ++ // Local type for Description. ++ message Description { ++ // The description. ++ string value = 1; ++ ++ /* attributes */ ++ ++ // The date of the description. ++ optional google.protobuf.Timestamp date_last_evaluated = 2; ++ // The number of submissions. ++ optional uint32 submission_count = 3; ++ } ++ ++ // The aggregate review status based on ++ // all somatic clinical impact submissions for this ++ // record. ++ AggregateGermlineReviewStatus review_status = 1; ++ // The oncogenicity description. ++ Description description = 2; ++ } ++ ++ // Local type for SomaticClinicalImpact. ++ // ++ // The aggregate review status based on ++ // all somatic clinical impact submissions for this ++ // record. ++ message SomaticClinicalImpact { ++ // Local type for Description. ++ message Description { ++ // The description. ++ string value = 1; ++ ++ /* attributes */ ++ ++ // Clinical impact assertion type. ++ optional string clinical_impact_assertion_type = 2; ++ // Clinical impact significance ++ optional string clinical_impact_clinical_significance = 3; ++ // The date of the description. ++ optional google.protobuf.Timestamp date_last_evaluated = 4; ++ // The number of submissions. ++ optional uint32 submission_count = 5; ++ } ++ ++ // The aggregate review status based on ++ // all somatic clinical impact submissions for this ++ // record. ++ AggregateSomaticClinicalImpactReviewStatus review_status = 1; ++ // The oncogenicity description. ++ repeated Description descriptions = 2; ++ } ++ ++ // Local type for OncogenicityClassification. ++ message OncogenicityClassification { ++ // Local type for Description. ++ message Description { ++ // The description. ++ string value = 1; ++ ++ /* attributes */ ++ ++ // The date of the description. ++ optional google.protobuf.Timestamp date_last_evaluated = 2; ++ // The number of submissions. ++ optional uint32 submission_count = 3; ++ } ++ ++ // The aggregate review status based on ++ // all oncogenic submissions for this record. ++ AggregateOncogenicityReviewStatus review_status = 1; ++ // The oncogenicity description. ++ Description description = 2; ++ } ++ ++ // Local type for RCV classifications. ++ message RcvClassifications { ++ // Germline classification. ++ optional GermlineClassification germline_classification = 1; ++ // Somatic clinical impact. ++ optional SomaticClinicalImpact somatic_clinical_impact = 2; ++ // Oncogenicity classification. ++ optional OncogenicityClassification oncogenicity_classification = 3; ++ } ++ ++ /* nested elements */ ++ ++ // The list of classified conditions. ++ optional ClassifiedConditionList classified_condition_list = 1; ++ // The list of RCV classifications. ++ RcvClassifications rcv_classifications = 2; ++ // The list of RCV accessions this record has replaced. ++ repeated RecordHistory replaceds = 3; ++ ++ /* attributes */ ++ ++ // Optional title. ++ optional string title = 4; ++ // Accession. ++ string accession = 5; ++ // Version. ++ int32 version = 6; ++} ++ ++// This element is restricted to variation records for which an explicit ++// classification was submitted. Compare to IncludedRecord, which provides aggregate ++// information about variants that are part of another submission, but for which ++// ClinVar has *not* received a submission specific to that variant independently. ++message ClassifiedRecord { ++ // Local type for tag `RCVList`. ++ message RcvList { ++ // The RCV record. ++ repeated RcvAccession rcv_accessions = 1; ++ // The number of submissions (SCV accessions) referencing the VariationID. ++ optional int32 submission_count = 2; ++ // The number of idependent observations. ++ optional int32 independent_observations = 3; ++ } ++ ++ // Local type for the attribute `@MappingType`. ++ enum MappingType { ++ // unspecified mapping type ++ MAPPING_TYPE_UNSPECIFIED = 0; ++ // corresponds to "Name" ++ MAPPING_TYPE_NAME = 1; ++ // corresponds to "Xref" ++ MAPPING_TYPE_XREF = 2; ++ } ++ ++ // Local type for the tag `TraitMapping`. ++ message TraitMapping { ++ // Local type for the tag "MedGen" ++ message Medgen { ++ // Name. ++ string name = 1; ++ // CUI. ++ string cui = 2; ++ } ++ ++ /* nested elements */ ++ repeated Medgen medgens = 1; ++ ++ /* attributes */ ++ ++ // ID of clinical assertion. ++ int64 clinical_assertion_id = 2; ++ // The trait type. ++ string trait_type = 3; ++ // The mapping type. ++ MappingType mapping_type = 4; ++ // The mapping value. ++ string mapping_value = 5; ++ // The mapping reference. ++ string mapping_ref = 6; ++ } ++ ++ // Describes a single sequence change relative to a ++ // contiguous region of a chromosome or the mitochondrion. ++ // ++ // Mutually exclusive with `haplotype` and `genotype`. ++ optional Allele simple_allele = 1; ++ // Describes multiple sequence changes on one of the ++ // chromosomes of a homologous pair or on the mitochondrion. ++ // ++ // Mutually exclusive with `simple_allele` and `genotype`. ++ optional Haplotype haplotype = 2; ++ // Describes the combination of sequence changes on each ++ // chromosome of a homologous pair. ++ // ++ // Mutually exclusive with `simple_allele` and `haplotype`. ++ optional Genotype genotype = 3; ++ // List of RCV records. ++ RcvList rcv_list = 4; ++ // List of classifications. ++ AggregateClassificationSet classifications = 5; ++ // List of clinical assertions. ++ repeated ClinicalAssertion clinical_assertions = 6; ++ // This element is used to report how each user-submitted ++ // trait name was mapped to a MedGen CUI identifier and a preferred name. ++ // The structure may be used in the future to report, when a trait is ++ // identified by a source's identifier (e.g. MIM number), the preferred ++ // name used by that source at the time of submission. For MappingType ++ // XRef, MappingRef is the database name and MappingValue is the database's ++ // identifier. For MappingType Name, MappingRef is Alternate or Preferred, ++ // and MappingValue is the submitted name of the trait. ClinicalAssertionID ++ // is an integer identifier that corresponds 1:1 to the SCV assigned to the ++ // submission. ++ repeated TraitMapping trait_mappings = 7; ++ // List of deleted SCVs. ++ repeated DeletedScv deleted_scvs = 8; ++ // List of general citations. ++ repeated GeneralCitations general_citations = 9; ++} ++ ++// This element groups the set of data specific to a VariationArchive ++// record, namely the summary data of what has been submitted about a ++// VariationID AND for Classified records only, the content each ++// submission (SCV) provided. ++// ++// Type for the `` type. ++message VariationArchive { ++ // Enumeration for `@RecordType`. ++ enum RecordType { ++ // unspecified record type ++ RECORD_TYPE_UNSPECIFIED = 0; ++ // corresponds to "included" ++ RECORD_TYPE_INCLUDED = 1; ++ // corresponds to "classified" ++ RECORD_TYPE_CLASSIFIED = 2; ++ } ++ ++ // Enumeration for `@RecordStatus`. ++ enum RecordStatus { ++ // unspecified record status ++ RECORD_STATUS_UNSPECIFIED = 0; ++ // corresponds to "current" ++ RECORD_STATUS_CURRENT = 1; ++ // corresponds to "previous" ++ RECORD_STATUS_PREVIOUS = 2; ++ // corresponds to "replaced" ++ RECORD_STATUS_REPLACED = 3; ++ // correspodns to "deleted" ++ RECORD_STATUS_DELETED = 4; ++ } ++ ++ /* attributes */ ++ ++ // Numeric variation ID. ++ int64 variation_id = 1; ++ // This is ClinVar's name for the variant. ClinVar uses this term in ++ // its web displays ++ string variation_name = 2; ++ // Type of the variant. ++ string variation_type = 3; ++ // DateCreated is the date when the record first became public in ++ // ClinVar. ++ google.protobuf.Timestamp date_created = 4; ++ // The date the record was last updated in the public database. The ++ // update may be a change to one of the submitted records (SCVs) or ++ // annotation added to the aggregate record by NCBI staff. This date ++ // is independent of a version change; annotated added by NCBI may ++ // change without representing a change in the version. ++ google.protobuf.Timestamp date_last_updated = 5; ++ // This date is of the most recent submitted record (SCV) for the ++ // VCV; it may reflect a new submitted record or an update to a submitted record. ++ google.protobuf.Timestamp most_recent_submission = 6; ++ // Accession assigned to the variant, or set of variants, that was ++ // Classified ++ string accession = 7; ++ // Version of record and suffix for accession. ++ int32 version = 8; ++ // Number of submitters in record. ++ int32 number_of_submitters = 9; ++ // Number of submissions in record. ++ int32 number_of_submissions = 10; ++ // Record type. ++ RecordType record_type = 11; ++ ++ /* contained elements */ ++ ++ // The record's status. ++ RecordStatus record_status = 12; ++ // Pointer to the replacing record; optional. ++ RecordHistory replaced_by = 13; ++ // The list of VCV accessions this record has replaced. ++ repeated RecordHistory replaceds = 14; ++ // Comment on the record; optional. ++ Comment comment = 15; ++ // Specification of the species. ++ Species species = 16; ++ ++ // This element describes the classification of a single ++ // allele, haplotype, or genotype based on all submissions to ClinVar. This ++ // differs from the element IncludedRecord, which describes simple alleles ++ // or haplotypes, referenced in ClassifiedRecord, but for which no explicit ++ // classification was submitted. Once that variation is described, details ++ // are added about the phenotypes being classified, the classification, the ++ // submitters providing the classifications, and all supported evidence. ++ // ++ // NB: mutually exclusive with `included_record`. ++ optional ClassifiedRecord classified_record = 17; ++ ++ // This element describes a single allele or haplotype ++ // included in submissions to ClinVar, but for which no explicit ++ // classification was submitted. It also references the submissions and the ++ // Classified records that include them. ++ // ++ // NB: mutually exclusive with `classified_record`. ++ optional IncludedRecord included_record = 18; ++} ++ ++// The element to group each VariationArchive element in the release ++// ++// Type for the `` tag. ++message ClinvarVariationRelease { ++ /* attributes */ ++ ++ // The current release. ++ google.protobuf.Timestamp release_date = 1; ++ ++ /* contained elements */ ++ ++ // List of `` tags. ++ repeated VariationArchive variation_archives = 2; ++} +diff --git a/protos/clinvar_data/extracted_vars.proto b/protos/clinvar_data/extracted_vars.proto +new file mode 100644 +index 0000000..3e227b3 +--- /dev/null ++++ b/protos/clinvar_data/extracted_vars.proto +@@ -0,0 +1,73 @@ ++// Protocol buffers to store the extracted variants from ClinVar. ++ ++syntax = "proto3"; ++ ++package clinvar_data.extracted_vars; ++ ++import "clinvar_data/clinvar_public.proto"; ++ ++// Enumeration for the type of the variant. ++enum VariationType { ++ // unspecified variation type ++ VARIATION_TYPE_UNSPECIFIED = 0; ++ // Corresponds to "insertion". ++ VARIATION_TYPE_INSERTION = 1; ++ // Corresponds to "deletion". ++ VARIATION_TYPE_DELETION = 2; ++ // Corresponds to "single nucleotide variant". ++ VARIATION_TYPE_SNV = 3; ++ // Corresponds to "indel". ++ VARIATION_TYPE_INDEL = 4; ++ // Corresponds to "duplication". ++ VARIATION_TYPE_DUPLICATION = 5; ++ // Corresponds to "tandem duplication". ++ VARIATION_TYPE_TANDEM_DUPLICATION = 6; ++ // Corresponds to "structural variant". ++ VARIATION_TYPE_STRUCTURAL_VARIANT = 7; ++ // Corresponds to "copy number gain". ++ VARIATION_TYPE_COPY_NUMBER_GAIN = 8; ++ // Corresponds to "copy number loss". ++ VARIATION_TYPE_COPY_NUMBER_LOSS = 9; ++ // Corresponds to "protein only". ++ VARIATION_TYPE_PROTEIN_ONLY = 10; ++ // Corresponds to "microsatellite". ++ VARIATION_TYPE_MICROSATELLITE = 11; ++ // Corresponds to "inversion". ++ VARIATION_TYPE_INVERSION = 12; ++ // Corresponds to "other". ++ VARIATION_TYPE_OTHER = 13; ++} ++ ++// Accession with version. ++message VersionedAccession { ++ // The accession. ++ string accession = 1; ++ // The version. ++ int32 version = 2; ++} ++ ++// Protocol buffer for storing essential information of one RCV. ++message ExtractedRcvRecord { ++ // The accession. ++ VersionedAccession accession = 1; ++ // Title of RCV. ++ string title = 2; ++} ++ ++// Protocol buffer for storing essential information of one VCV. ++message ExtractedVcvRecord { ++ // The accession. ++ VersionedAccession accession = 1; ++ // List of aggregated RCVs. ++ repeated ExtractedRcvRecord rcvs = 2; ++ // Name of VCV. ++ string name = 3; ++ // The type of the variant. ++ VariationType variation_type = 4; ++ // Classifications. ++ clinvar_data.clinvar_public.AggregateClassificationSet classifications = 5; ++ // The sequence location on one reference. ++ clinvar_data.clinvar_public.Location.SequenceLocation sequence_location = 6; ++ // List of HGNC IDs. ++ repeated string hgnc_ids = 7; ++} +diff --git a/protos/clinvar_data/gene_impact.proto b/protos/clinvar_data/gene_impact.proto +new file mode 100644 +index 0000000..eca9695 +--- /dev/null ++++ b/protos/clinvar_data/gene_impact.proto +@@ -0,0 +1,91 @@ ++// Protocol buffers for types for per-gene impact. ++ ++syntax = "proto3"; ++ ++package clinvar_data.gene_impact; ++ ++import "clinvar_data/clinvar_public.proto"; ++ ++// Enumeration with the variant consequence. ++enum GeneImpact { ++ // unspecified impact ++ GENE_IMPACT_UNSPECIFIED = 0; ++ // Corresponds to "3_prime_UTR_variant" ++ GENE_IMPACT_THREE_PRIME_UTR_VARIANT = 1; ++ // Corresponds to "5_prime_UTR_variant" ++ GENE_IMPACT_FIVE_PRIME_UTR_VARIANT = 2; ++ // Corresponds to "downstream_gene_variant" ++ GENE_IMPACT_DOWNSTREAM_TRANSCRIPT_VARIANT = 3; ++ // Corresponds to "frameshift_variant" ++ GENE_IMPACT_FRAMESHIFT_VARIANT = 4; ++ // Corresponds to "inframe_indel" ++ GENE_IMPACT_INFRAME_INDEL = 5; ++ // Corresponds to "start_lost" ++ GENE_IMPACT_START_LOST = 6; ++ // Corresponds to "intron_variant" ++ GENE_IMPACT_INTRON_VARIANT = 7; ++ // Corresponds to "missense_variant" ++ GENE_IMPACT_MISSENSE_VARIANT = 8; ++ // Corresponds to "non_codnig_transcript_variant" ++ GENE_IMPACT_NON_CODING_TRANSCRIPT_VARIANT = 9; ++ // Corresponds to "stop_gained" ++ GENE_IMPACT_STOP_GAINED = 10; ++ // Corresponds to "no_sequence_alteration" ++ GENE_IMPACT_NO_SEQUENCE_ALTERATION = 11; ++ // Corresponds to "splice_acceptor_variant" ++ GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT = 12; ++ // Corresponds to "splice_donor_variant" ++ GENE_IMPACT_SPLICE_DONOR_VARIANT = 13; ++ // Corresponds to "stop_lost" ++ GENE_IMPACT_STOP_LOST = 14; ++ // Corresponds to "synonymous_variant" ++ GENE_IMPACT_SYNONYMOUS_VARIANT = 15; ++ // Corresponds to "upstream_gene_variant" ++ GENE_IMPACT_UPSTREAM_TRANSCRIPT_VARIANT = 16; ++} ++ ++ ++// Enumeration for ACMG clinical significance. ++enum ClinicalSignificance { ++ // unspecified clinical significance ++ CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0; ++ // Corresponds to "Benign" ++ CLINICAL_SIGNIFICANCE_BENIGN = 1; ++ // Corresponds to "Likely benign" ++ CLINICAL_SIGNIFICANCE_LIKELY_BENIGN = 2; ++ // Corresponds to "Uncertain significance" ++ CLINICAL_SIGNIFICANCE_UNCERTAIN_SIGNIFICANCE = 3; ++ // Corresponds to "Likely pathogenic" ++ CLINICAL_SIGNIFICANCE_LIKELY_PATHOGENIC = 4; ++ // Corresponds to "Pathogenic" ++ CLINICAL_SIGNIFICANCE_PATHOGENIC = 5; ++ // Corresponds to "not provided" ++ CLINICAL_SIGNIFICANCE_NOT_PROVIDED = 6; ++ // Corresponds to "other" ++ CLINICAL_SIGNIFICANCE_OTHER = 7; ++} ++ ++ ++// Entry for storing counts of `GeneImpact` and `ClinicalSignificance`. ++message GeneImpactCounts { ++ // Stores the counts for a gene impact. ++ message ImpactCounts { ++ // The gene impact. ++ GeneImpact gene_impact = 1; ++ // The counts for the benign impact. ++ uint32 count_benign = 2; ++ // The counts for the likely benign impact. ++ uint32 count_likely_benign = 3; ++ // The counts for the uncertain significance impact. ++ uint32 count_uncertain_significance = 4; ++ // The counts for the likely pathogenic impact. ++ uint32 count_likely_pathogenic = 5; ++ // The counts for the pathogenic impact. ++ uint32 count_pathogenic = 6; ++ } ++ ++ // The gene HGNC ID. ++ string hgnc_id = 1; ++ // The impact counts. ++ repeated ImpactCounts impact_counts = 2; ++} +diff --git a/protos/clinvar_data/phenotype_link.proto b/protos/clinvar_data/phenotype_link.proto +new file mode 100644 +index 0000000..1fe96c7 +--- /dev/null ++++ b/protos/clinvar_data/phenotype_link.proto +@@ -0,0 +1,28 @@ ++// Protocol buffers to store the gene-to-phenotype links extracted from ClinVar. ++ ++syntax = "proto3"; ++ ++package clinvar_data.phenotype_link; ++ ++import "clinvar_data/extracted_vars.proto"; ++ ++ ++// A record linking gene to phenotype. ++message GenePhenotypeRecord { ++ // VCV ++ clinvar_data.extracted_vars.VersionedAccession vcv = 1; ++ // SCV ++ clinvar_data.extracted_vars.VersionedAccession scv = 2; ++ // Clinical germline classification. ++ string germline_classification = 3; ++ // Submitter ++ string submitter_name = 4; ++ // Gene HGNC ID ++ repeated string hgnc_ids = 5; ++ // Linked OMIM terms ++ repeated string omim_terms = 6; ++ // Linked MONDO terms ++ repeated string mondo_terms = 7; ++ // Linked HPO terms ++ repeated string hpo_terms = 8; ++} +diff --git a/protos/fetch.sh b/protos/fetch.sh +new file mode 100644 +index 0000000..dfccab0 +--- /dev/null ++++ b/protos/fetch.sh +@@ -0,0 +1,19 @@ ++#!/usr/bin/bash ++ ++SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) ++ ++# Helper script to download protobuf files from elsewhere. ++ ++cd $SCRIPT_DIR ++ ++mkdir -p clinvar_data ++for name in class_by_freq clinvar_public extracted_vars gene_impact phenotype_link; do ++ wget \ ++ -O clinvar_data/$name.proto \ ++ https://raw.githubusercontent.com/varfish-org/clinvar-this/main/protos/clinvar_data/pbs/$name.proto ++done ++sed \ ++ -i \ ++ -e 's/clinvar_data\.pbs\./clinvar_data./g' \ ++ -e 's|clinvar_data/pbs/|clinvar_data/|g' \ ++ clinvar_data/*.proto +diff --git a/src/clinvar_genes/cli/import.rs b/src/clinvar_genes/cli/import.rs +index 7f88b88..47e7b32 100644 +--- a/src/clinvar_genes/cli/import.rs ++++ b/src/clinvar_genes/cli/import.rs +@@ -5,14 +5,14 @@ use std::{collections::HashSet, io::BufRead, sync::Arc}; + use clap::Parser; + use prost::Message; +  ++use crate::pbs::clinvar::class_by_freq::GeneCoarseClinsigFrequencyCounts; ++use crate::pbs::clinvar::extracted_vars::ExtractedVcvRecord; ++use crate::pbs::clinvar::gene_impact::GeneImpactCounts; + use crate::pbs::clinvar::minimal::{ + ClinicalSignificance, Record, ReferenceAssertion, ReviewStatus, + }; +-use crate::pbs::clinvar::per_gene::{ +- ClinvarPerGeneRecord, CoarseClinicalSignificance, GeneFreqRecordCounts, GeneImpactRecordCounts, +- GeneVariantsForRelease, Impact, +-}; +-use crate::{clinvar_genes, clinvar_minimal, common}; ++use crate::pbs::clinvar::per_gene::ClinvarPerGeneRecord; ++use crate::{clinvar_minimal, common}; +  + /// Command line arguments for `tsv import` sub command. + #[derive(Parser, Debug, Clone)] +@@ -42,7 +42,7 @@ pub struct Args { + /// Load per-impact JSONL file. + fn load_per_impact_jsonl( + path_per_impact_jsonl: &str, +-) -> Result>, anyhow::Error> { ++) -> Result, anyhow::Error> { + // Open reader, possibly decompressing gziped files. + let reader: Box = if path_per_impact_jsonl.ends_with(".gz") { + Box::new(flate2::read::GzDecoder::new(std::fs::File::open( +@@ -57,18 +57,8 @@ fn load_per_impact_jsonl( + let reader = std::io::BufReader::new(reader); + for line in reader.lines() { + let line = line?; +- let record = +- serde_json::from_str::(&line)?; +- +- let mut count_out = Vec::new(); +- for (impact, counts) in record.counts { +- let impact: Impact = impact.into(); +- count_out.push(GeneImpactRecordCounts { +- impact: impact as i32, +- counts, +- }); +- } +- result.insert(record.hgnc.clone(), count_out); ++ let record = serde_json::from_str::(&line)?; ++ result.insert(record.hgnc_id.clone(), record); + } +  + Ok(result) +@@ -77,7 +67,7 @@ fn load_per_impact_jsonl( + /// Load per-frequency JSONL file. + fn load_per_frequency_jsonl( + path_per_impact_jsonl: &str, +-) -> Result>, anyhow::Error> { ++) -> Result, anyhow::Error> { + // Open reader, possibly decompressing gziped files. + let reader: Box = if path_per_impact_jsonl.ends_with(".gz") { + Box::new(flate2::read::GzDecoder::new(std::fs::File::open( +@@ -92,18 +82,8 @@ fn load_per_frequency_jsonl( + let reader = std::io::BufReader::new(reader); + for line in reader.lines() { + let line = line?; +- let record = +- serde_json::from_str::(&line)?; +- +- let mut count_out = Vec::new(); +- for (clinsig, counts) in record.counts { +- let coarse_clinsig: CoarseClinicalSignificance = clinsig.into(); +- count_out.push(GeneFreqRecordCounts { +- coarse_clinsig: coarse_clinsig as i32, +- counts, +- }); +- } +- result.insert(record.hgnc.clone(), count_out); ++ let record = serde_json::from_str::(&line)?; ++ result.insert(record.hgnc_id.clone(), record); + } +  + Ok(result) +@@ -132,23 +112,29 @@ fn load_variants_jsonl( +  + for line in reader.lines() { + let line = line?; +- let input_record = serde_json::from_str::(&line); ++ let input_record = serde_json::from_str::(&line); + match input_record { + Err(e) => { + tracing::warn!("skipping line because of error: {}", e); + continue; + } + Ok(input_record) => { +- let clinvar_minimal::cli::reading::Record { +- vcv, +- rcv, +- title, +- hgnc_ids, +- clinical_significance, +- review_status, ++ let ExtractedVcvRecord { ++ accession, ++ rcvs, ++ name, ++ variation_type, ++ classifications, + sequence_location, +- .. ++ hgnc_ids + } = input_record; ++ ++ if let (Some(accession), Some(classifications)) = (accession, classifications) { ++ ++ } else { ++ continue; ++ } ++ + let clinvar_minimal::cli::reading::SequenceLocation { + assembly, + chr, +diff --git a/src/clinvar_genes/cli/mod.rs b/src/clinvar_genes/cli/mod.rs +index b4b20cc..e4aad4c 100644 +--- a/src/clinvar_genes/cli/mod.rs ++++ b/src/clinvar_genes/cli/mod.rs +@@ -2,4 +2,3 @@ +  + pub mod import; + pub mod query; +-pub mod reading; +diff --git a/src/clinvar_genes/cli/reading.rs b/src/clinvar_genes/cli/reading.rs +deleted file mode 100644 +index 2f75d92..0000000 +--- a/src/clinvar_genes/cli/reading.rs ++++ /dev/null +@@ -1,173 +0,0 @@ +-//! Reading JSONL data for per-gene ClinVar information. +- +-/// Reading of gene per-impact counts records. +-pub mod gene_impact { +- /// SO terms for impact on gene +- #[derive(Debug, serde::Deserialize, serde::Serialize, PartialEq, Eq, Hash)] +- pub enum Impact { +- /// 3' UTR variant +- #[serde(rename = "3_prime_UTR_variant")] +- ThreePrimeUtrVariant, +- /// 5' UTR variant +- #[serde(rename = "5_prime_UTR_variant")] +- FivePrimeUtrVariant, +- /// downstream gene variant +- #[serde(rename = "downstream_gene_variant")] +- DownstreamGeneVariant, +- /// frameshift variant +- #[serde(rename = "frameshift_variant")] +- FrameshiftVariant, +- /// inframe deletion +- #[serde(rename = "inframe_indel")] +- InframeIndel, +- /// start lost +- #[serde(rename = "start_lost")] +- StartLost, +- /// intron variant +- #[serde(rename = "intron_variant")] +- IntronVariant, +- /// missense variant +- #[serde(rename = "missense_variant")] +- MissenseVariant, +- /// non-coding transcript variant +- #[serde(rename = "non_coding_transcript_variant")] +- NonCodingTranscriptVariant, +- /// stop gained +- #[serde(rename = "stop_gained")] +- StopGained, +- /// no sequence alteration +- #[serde(rename = "no_sequence_alteration")] +- NoSequenceAlteration, +- /// splice acceptor variant +- #[serde(rename = "splice_acceptor_variant")] +- SpliceAcceptorVariant, +- /// splice donor variant +- #[serde(rename = "splice_donor_variant")] +- SpliceDonorVariant, +- /// stop lost +- #[serde(rename = "stop_lost")] +- StopLost, +- /// synonymous variant +- #[serde(rename = "synonymous_variant")] +- SyonymousVariant, +- /// upstream gene variant +- #[serde(rename = "upstream_gene_variant")] +- UpstreamGeneVariant, +- } +- +- impl From for crate::pbs::clinvar::per_gene::Impact { +- fn from(val: Impact) -> Self { +- match val { +- Impact::ThreePrimeUtrVariant => { +- crate::pbs::clinvar::per_gene::Impact::ThreePrimeUtrVariant +- } +- Impact::FivePrimeUtrVariant => { +- crate::pbs::clinvar::per_gene::Impact::FivePrimeUtrVariant +- } +- Impact::DownstreamGeneVariant => { +- crate::pbs::clinvar::per_gene::Impact::DownstreamTranscriptVariant +- } +- Impact::FrameshiftVariant => { +- crate::pbs::clinvar::per_gene::Impact::FrameshiftVariant +- } +- Impact::InframeIndel => crate::pbs::clinvar::per_gene::Impact::InframeIndel, +- Impact::StartLost => crate::pbs::clinvar::per_gene::Impact::StartLost, +- Impact::IntronVariant => crate::pbs::clinvar::per_gene::Impact::IntronVariant, +- Impact::MissenseVariant => crate::pbs::clinvar::per_gene::Impact::MissenseVariant, +- Impact::NonCodingTranscriptVariant => { +- crate::pbs::clinvar::per_gene::Impact::NonCodingTranscriptVariant +- } +- Impact::StopGained => crate::pbs::clinvar::per_gene::Impact::StopGained, +- Impact::NoSequenceAlteration => { +- crate::pbs::clinvar::per_gene::Impact::NoSequenceAlteration +- } +- Impact::SpliceAcceptorVariant => { +- crate::pbs::clinvar::per_gene::Impact::SpliceAcceptorVariant +- } +- Impact::SpliceDonorVariant => { +- crate::pbs::clinvar::per_gene::Impact::SpliceDonorVariant +- } +- Impact::StopLost => crate::pbs::clinvar::per_gene::Impact::StopLost, +- Impact::SyonymousVariant => { +- crate::pbs::clinvar::per_gene::Impact::SynonymousVariant +- } +- Impact::UpstreamGeneVariant => { +- crate::pbs::clinvar::per_gene::Impact::UpstreamTranscriptVariant +- } +- } +- } +- } +- +- /// ACMG clinical significance +- #[derive(Debug, serde::Deserialize, serde::Serialize, PartialEq, Eq, Hash)] +- pub enum ClinicalSignificance { +- /// Benign +- #[serde(rename = "benign")] +- Benign, +- /// Likely benign +- #[serde(rename = "likely benign")] +- LikelyBenign, +- /// Uncertain significance +- #[serde(rename = "uncertain significance")] +- UncertainSignificance, +- /// Likely pathogenic +- #[serde(rename = "likely pathogenic")] +- LikelyPathogenic, +- /// Pathogenic +- #[serde(rename = "pathogenic")] +- Pathogenic, +- } +- +- /// Gene-wise counts record. +- #[derive(Debug, serde::Deserialize, serde::Serialize)] +- pub struct Record { +- /// HGNC gene ID +- pub hgnc: String, +- /// Per-impact counts +- pub counts: indexmap::IndexMap>, +- } +-} +- +-/// Reading of ACMG class by frequency counts records. +-pub mod counts_by_freq { +- /// Coarsened clinical significance +- #[derive(Debug, serde::Deserialize, serde::Serialize, PartialEq, Eq, Hash)] +- pub enum CoarseClinicalSignificance { +- /// Likely benign / benign +- #[serde(rename = "benign")] +- Benign, +- /// Uncertain significance +- #[serde(rename = "uncertain")] +- Uncertain, +- /// Likely pathogenic / pathogenic +- #[serde(rename = "pathogenic")] +- Pathogenic, +- } +- +- impl From +- for crate::pbs::clinvar::per_gene::CoarseClinicalSignificance +- { +- fn from(val: CoarseClinicalSignificance) -> Self { +- match val { +- CoarseClinicalSignificance::Benign => { +- crate::pbs::clinvar::per_gene::CoarseClinicalSignificance::Benign +- } +- CoarseClinicalSignificance::Uncertain => { +- crate::pbs::clinvar::per_gene::CoarseClinicalSignificance::Uncertain +- } +- CoarseClinicalSignificance::Pathogenic => { +- crate::pbs::clinvar::per_gene::CoarseClinicalSignificance::Pathogenic +- } +- } +- } +- } +- +- /// Per-pathogenicity counts. +- #[derive(Debug, serde::Deserialize, serde::Serialize)] +- pub struct Record { +- /// HGNC gene ID +- pub hgnc: String, +- /// Per-impact counts +- pub counts: indexmap::IndexMap>, +- } +-} +diff --git a/src/common/keys.rs b/src/common/keys.rs +index 53a6ff0..2ee689e 100644 +--- a/src/common/keys.rs ++++ b/src/common/keys.rs +@@ -92,19 +92,19 @@ impl Var { + } +  + /// Create for all alternate alleles from the given VCF record. +- pub fn from_vcf_allele(value: &noodles_vcf::Record, allele_no: usize) -> Self { +- let chrom = match value.chromosome() { +- noodles_vcf::record::Chromosome::Name(name) +- | noodles_vcf::record::Chromosome::Symbol(name) => name.to_owned(), +- }; +- let pos: usize = value.position().into(); +- let pos = pos as i32; ++ pub fn from_vcf_allele(value: &noodles_vcf::variant::RecordBuf, allele_no: usize) -> Self { ++ let chrom = value.reference_sequence_name().to_string(); ++ let pos: usize = value ++ .variant_start() ++ .expect("Telomeric breakends not supported") ++ .get(); ++ let pos = i32::try_from(pos).unwrap(); + let reference = value.reference_bases().to_string(); + Var { + chrom, + pos, + reference, +- alternative: value.alternate_bases()[allele_no].to_string(), ++ alternative: value.alternate_bases().as_ref()[allele_no].to_string(), + } + } + } +diff --git a/src/common/noodles.rs b/src/common/noodles.rs +index 1b88f33..ecc37fb 100644 +--- a/src/common/noodles.rs ++++ b/src/common/noodles.rs +@@ -2,14 +2,17 @@ +  + use std::str::FromStr; +  +-use noodles_vcf::record::info::field; ++use noodles_vcf::variant::record_buf::info::field; +  + /// Extract a `String` field from a record. +-pub fn get_string(record: &noodles_vcf::Record, name: &str) -> Result { +- if let Some(Some(field::Value::String(v))) = record.info().get(&field::Key::from_str(name)?) { ++pub fn get_string( ++ record: &noodles_vcf::variant::RecordBuf, ++ name: &str, ++) -> Result { ++ if let Some(Some(field::Value::String(v))) = record.info().get(name) { + Ok(v.to_string()) + } else if let Some(Some(field::Value::Array(field::value::Array::String(vs)))) = +- record.info().get(&field::Key::from_str(name)?) ++ record.info().get(name) + { + Ok(vs.first().unwrap().as_ref().unwrap().to_string()) + } else { +@@ -18,19 +21,22 @@ pub fn get_string(record: &noodles_vcf::Record, name: &str) -> Result Result { ++pub fn get_flag( ++ record: &noodles_vcf::variant::RecordBuf, ++ name: &str, ++) -> Result { + Ok(matches!( +- record.info().get(&field::Key::from_str(name)?), ++ record.info().get(name), + Some(Some(field::Value::Flag)) + )) + } +  + /// Extract an `i32` field from a record. +-pub fn get_i32(record: &noodles_vcf::Record, name: &str) -> Result { +- if let Some(Some(field::Value::Integer(v))) = record.info().get(&field::Key::from_str(name)?) { ++pub fn get_i32(record: &noodles_vcf::variant::RecordBuf, name: &str) -> Result { ++ if let Some(Some(field::Value::Integer(v))) = record.info().get(name) { + Ok(*v) + } else if let Some(Some(field::Value::Array(field::value::Array::Integer(vs)))) = +- record.info().get(&field::Key::from_str(name)?) ++ record.info().get(name) + { + Ok(vs.first().unwrap().unwrap()) + } else { +@@ -39,11 +45,11 @@ pub fn get_i32(record: &noodles_vcf::Record, name: &str) -> Result Result { +- if let Some(Some(field::Value::Float(v))) = record.info().get(&field::Key::from_str(name)?) { ++pub fn get_f32(record: &noodles_vcf::variant::RecordBuf, name: &str) -> Result { ++ if let Some(Some(field::Value::Float(v))) = record.info().get(name) { + Ok(*v) + } else if let Some(Some(field::Value::Array(field::value::Array::Float(vs)))) = +- record.info().get(&field::Key::from_str(name)?) ++ record.info().get(name) + { + Ok(vs.first().unwrap().unwrap()) + } else { +@@ -54,9 +60,12 @@ pub fn get_f32(record: &noodles_vcf::Record, name: &str) -> Result` field from record with an array field. + /// + /// This is different than parsing the histograms from pipe-separated strings. +-pub fn get_vec_str(record: &noodles_vcf::Record, name: &str) -> Result, anyhow::Error> { ++pub fn get_vec_str( ++ record: &noodles_vcf::variant::RecordBuf, ++ name: &str, ++) -> Result, anyhow::Error> { + if let Some(Some(field::Value::Array(field::value::Array::String(vs)))) = +- record.info().get(&field::Key::from_str(name)?) ++ record.info().get(name) + { + Ok(vs.iter().flatten().cloned().collect()) + } else { +@@ -67,9 +76,12 @@ pub fn get_vec_str(record: &noodles_vcf::Record, name: &str) -> Result` field from record with an array field. + /// + /// This is different than parsing the histograms from pipe-separated strings. +-pub fn get_vec_i32(record: &noodles_vcf::Record, name: &str) -> Result, anyhow::Error> { ++pub fn get_vec_i32( ++ record: &noodles_vcf::variant::RecordBuf, ++ name: &str, ++) -> Result, anyhow::Error> { + if let Some(Some(field::Value::Array(field::value::Array::Integer(vs)))) = +- record.info().get(&field::Key::from_str(name)?) ++ record.info().get(name) + { + Ok(vs.iter().flatten().cloned().collect()) + } else { +@@ -78,11 +90,14 @@ pub fn get_vec_i32(record: &noodles_vcf::Record, name: &str) -> Result, + } +  + /// Extract an `Vec` field from a record encoded as a pipe symbol separated string. +-pub fn get_vec(record: &noodles_vcf::Record, name: &str) -> Result, anyhow::Error> ++pub fn get_vec( ++ record: &noodles_vcf::variant::RecordBuf, ++ name: &str, ++) -> Result, anyhow::Error> + where + T: FromStr, + { +- if let Some(Some(field::Value::String(v))) = record.info().get(&field::Key::from_str(name)?) { ++ if let Some(Some(field::Value::String(v))) = record.info().get(name) { + v.split('|') + .map(|s| s.parse()) + .collect::, _>>() +@@ -94,12 +109,15 @@ where +  + /// Extract an `Vec>` field from a record encoded as a list of pipe symbol + /// separated string. +-pub fn get_vec_vec(record: &noodles_vcf::Record, name: &str) -> Result, anyhow::Error> ++pub fn get_vec_vec( ++ record: &noodles_vcf::variant::RecordBuf, ++ name: &str, ++) -> Result, anyhow::Error> + where + T: FromStr, + { + if let Some(Some(field::Value::Array(field::value::Array::String(value)))) = +- record.info().get(&field::Key::from_str(name)?) ++ record.info().get(name) + { + Ok(value + .iter() +diff --git a/src/dbsnp/cli/import.rs b/src/dbsnp/cli/import.rs +index 10665c7..5277d5f 100644 +--- a/src/dbsnp/cli/import.rs ++++ b/src/dbsnp/cli/import.rs +@@ -8,6 +8,7 @@ use clap::Parser; + use indicatif::ParallelProgressIterator; + use noodles_csi::BinningIndex as _; + use noodles_vcf::header::record; ++use noodles_vcf::variant::RecordBuf; + use prost::Message; + use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; +  +@@ -105,7 +106,7 @@ fn process_window( + let cf_dbsnp = db.cf_handle(&args.cf_name).unwrap(); + let cf_dbsnp_by_rsid = db.cf_handle(&args.cf_name_by_rsid).unwrap(); + let mut reader = +- noodles_vcf::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?; ++ noodles_vcf::io::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?; + let header = reader.read_header()?; +  + let raw_region = format!("{}:{}-{}", chrom, begin + 1, end); +@@ -130,10 +131,10 @@ fn process_window( + // exist). + if let Some(query) = query { + for result in query { +- let vcf_record = result?; ++ let vcf_record = RecordBuf::try_from_variant_record(&header, &result?)?; +  + // Process each alternate allele into one record. +- for allele_no in 0..vcf_record.alternate_bases().len() { ++ for allele_no in 0..vcf_record.alternate_bases().as_ref().len() { + let key_buf: Vec = + common::keys::Var::from_vcf_allele(&vcf_record, allele_no).into(); + let record = dbsnp::pbs::Record::from_vcf_allele(&vcf_record, allele_no)?; +@@ -158,7 +159,7 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> + tracing::info!("Opening dbSNP VCF file..."); + let before_loading = std::time::Instant::now(); + let mut reader_vcf = +- noodles_vcf::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?; ++ noodles_vcf::io::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?; + let header = reader_vcf.read_header()?; + let dbsnp_reference = if let record::value::Collection::Unstructured(values) = header + .other_records() +diff --git a/src/dbsnp/pbs.rs b/src/dbsnp/pbs.rs +index 8ffb434..d4a5e52 100644 +--- a/src/dbsnp/pbs.rs ++++ b/src/dbsnp/pbs.rs +@@ -1,28 +1,30 @@ + //! Data structures for (de-)serialization as generated by `prost-build`. +  +-use std::str::FromStr; ++use noodles_vcf::variant::record::AlternateBases; +  + pub use crate::pbs::dbsnp::Record; +-use noodles_vcf::record::info::field; ++use noodles_vcf::variant::record_buf::info::field; +  + impl Record { + /// Creates a new `Record` from a VCF record and allele number. + pub fn from_vcf_allele( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + allele_no: usize, + ) -> Result { +- let chrom = record.chromosome().to_string(); +- let pos: usize = record.position().into(); +- let pos: i32 = pos.try_into()?; ++ let chrom = record.reference_sequence_name().to_string(); ++ let pos: usize = record ++ .variant_start() ++ .expect("Telomeric breakends not supported") ++ .get(); ++ let pos: i32 = i32::try_from(pos)?; + let ref_allele = record.reference_bases().to_string(); + let alt_allele = record + .alternate_bases() +- .get(allele_no) +- .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))? ++ .iter() ++ .nth(allele_no) ++ .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))?? + .to_string(); +- let rs_id = if let Some(Some(field::Value::Integer(rs))) = +- record.info().get(&field::Key::from_str("RS")?) +- { ++ let rs_id = if let Some(Some(field::Value::Integer(rs))) = record.info().get("RS") { + *rs + } else { + anyhow::bail!("no rs id in dbSNP record") +diff --git a/src/freqs/cli/import/auto.rs b/src/freqs/cli/import/auto.rs +index 3000366..d7e854f 100644 +--- a/src/freqs/cli/import/auto.rs ++++ b/src/freqs/cli/import/auto.rs +@@ -7,8 +7,8 @@ fn write_record( + db: &rocksdb::DBWithThreadMode, + cf: &std::sync::Arc, + record_key: &common::keys::Var, +- record_genome: &mut Option, +- record_exome: &mut Option, ++ record_genome: &mut Option, ++ record_exome: &mut Option, + ) -> Result<(), anyhow::Error> { + if record_genome.is_none() && record_exome.is_none() { + // Early exit, nothing to write out. +@@ -56,11 +56,14 @@ pub fn import_region( + let mut readers = Vec::new(); + if let Some(path_genome) = path_genome { + is_genome.push(true); +- readers.push(noodles_vcf::indexed_reader::Builder::default().build_from_path(path_genome)?); ++ readers.push( ++ noodles_vcf::io::indexed_reader::Builder::default().build_from_path(path_genome)?, ++ ); + } + if let Some(path_exome) = path_exome { + is_genome.push(false); +- readers.push(noodles_vcf::indexed_reader::Builder::default().build_from_path(path_exome)?); ++ readers ++ .push(noodles_vcf::io::indexed_reader::Builder::default().build_from_path(path_exome)?); + } + // Read headers. + let headers: Vec<_> = readers +@@ -75,7 +78,7 @@ pub fn import_region( + .map(|(reader, header)| reader.query(header, region)) + .collect::>()?; + // Construct the `MultiQuery`. +- let multi_query = super::reading::MultiQuery::new(queries)?; ++ let multi_query = super::reading::MultiQuery::new(queries, &headers)?; +  + // Now iterate over the `MultiQuery` and write to the database. + // +diff --git a/src/freqs/cli/import/mod.rs b/src/freqs/cli/import/mod.rs +index 602ca69..a9d74b1 100644 +--- a/src/freqs/cli/import/mod.rs ++++ b/src/freqs/cli/import/mod.rs +@@ -76,21 +76,24 @@ fn assign_to_chrom( + let mut res = HashMap::new(); +  + for path in paths { +- let mut reader = noodles_vcf::indexed_reader::Builder::default().build_from_path(path)?; ++ let mut reader = ++ noodles_vcf::io::indexed_reader::Builder::default().build_from_path(path)?; + let header = Box::new(reader.read_header()?); + freqs::cli::import::reading::guess_assembly(header.as_ref(), true, Some(assembly))?; + let record = reader +- .records(header.as_ref()) ++ .record_bufs(header.as_ref()) + .next() + .transpose()? + .ok_or(anyhow::anyhow!("No records in VCF file {}", path))?; +- let k = contig_map.chrom_to_idx(record.chromosome()).map_err(|e| { +- anyhow::anyhow!( +- "Error mapping chromosome {} to index: {}", +- record.chromosome(), +- e +- ) +- })?; ++ let k = contig_map ++ .chrom_to_idx(record.reference_sequence_name()) ++ .map_err(|e| { ++ anyhow::anyhow!( ++ "Error mapping chromosome {} to index: {}", ++ record.reference_sequence_name(), ++ e ++ ) ++ })?; + let v = path.clone(); + res.insert(k, v); + } +diff --git a/src/freqs/cli/import/mt.rs b/src/freqs/cli/import/mt.rs +index 32ef54c..ec8028b 100644 +--- a/src/freqs/cli/import/mt.rs ++++ b/src/freqs/cli/import/mt.rs +@@ -7,8 +7,8 @@ fn write_record( + db: &rocksdb::DBWithThreadMode, + cf: &std::sync::Arc, + record_key: &common::keys::Var, +- record_gnomad: &mut Option, +- record_helix: &mut Option, ++ record_gnomad: &mut Option, ++ record_helix: &mut Option, + ) -> Result<(), anyhow::Error> { + if record_gnomad.is_none() && record_helix.is_none() { + // Early exit, nothing to write out. +@@ -59,12 +59,15 @@ pub fn import_region( + if let Some(path_gnomad) = path_gnomad { + is_gnomad.push(true); + paths.push(path_gnomad); +- readers.push(noodles_vcf::indexed_reader::Builder::default().build_from_path(path_gnomad)?); ++ readers.push( ++ noodles_vcf::io::indexed_reader::Builder::default().build_from_path(path_gnomad)?, ++ ); + } + if let Some(path_helix) = path_helix { + is_gnomad.push(false); + paths.push(path_helix); +- readers.push(noodles_vcf::indexed_reader::Builder::default().build_from_path(path_helix)?); ++ readers ++ .push(noodles_vcf::io::indexed_reader::Builder::default().build_from_path(path_helix)?); + } + // Read headers. + let headers: Vec<_> = readers +@@ -92,7 +95,7 @@ pub fn import_region( + }) + .collect::>()?; + // Construct the `MultiQuery`. +- let multi_query = super::reading::MultiQuery::new(queries)?; ++ let multi_query = super::reading::MultiQuery::new(queries, &headers)?; +  + // Now iterate over the `MultiQuery` and write to the database. + // +diff --git a/src/freqs/cli/import/reading.rs b/src/freqs/cli/import/reading.rs +index 6b48263..b1ba1ae 100644 +--- a/src/freqs/cli/import/reading.rs ++++ b/src/freqs/cli/import/reading.rs +@@ -3,6 +3,9 @@ + use std::collections::{BTreeMap, HashMap}; +  + use biocommons_bioutils::assemblies::{Assembly, Sequence, ASSEMBLY_INFOS}; ++use noodles_vcf::variant::record::AlternateBases; ++use noodles_vcf::variant::RecordBuf; ++use noodles_vcf::Header; +  + use crate::common::cli::CANONICAL; +  +@@ -42,14 +45,8 @@ impl ContigMap { + } +  + /// Map chromosome to index. +- pub fn chrom_to_idx( +- &self, +- chrom: &noodles_vcf::record::Chromosome, +- ) -> Result { +- match chrom { +- noodles_vcf::record::Chromosome::Name(s) +- | noodles_vcf::record::Chromosome::Symbol(s) => self.chrom_name_to_idx(s), +- } ++ pub fn chrom_to_idx(&self, chrom: &str) -> Result { ++ self.chrom_name_to_idx(chrom) + } +  + /// Map chromosome name to index. +@@ -72,7 +69,7 @@ struct Key { + /// Chromosome. + chrom: String, + /// Noodles position. +- pos: noodles_vcf::record::Position, ++ pos: noodles_core::Position, + /// Reference allele. + reference: String, + /// First (and only) alternate allelele. +@@ -82,14 +79,18 @@ struct Key { + } +  + /// Build a key from a VCF record. +-fn build_key(record: &noodles_vcf::Record, i: usize) -> Key { ++fn build_key(record: &RecordBuf, i: usize) -> Key { + Key { +- chrom: record.chromosome().to_string(), +- pos: record.position(), ++ chrom: record.reference_sequence_name().to_string(), ++ pos: record ++ .variant_start() ++ .expect("Telomeric breakends not supported"), + reference: record.reference_bases().to_string(), + alternative: record + .alternate_bases() +- .first() ++ .iter() ++ .next() ++ .expect("must have alternate allele") + .expect("must have alternate allele") + .to_string(), + idx: i, +@@ -99,27 +100,33 @@ fn build_key(record: &noodles_vcf::Record, i: usize) -> Key { + /// Read through multiple `noodles_vcf::vcf::reader::Query`s at once. + pub struct MultiQuery<'r, 'h, R> + where +- R: std::io::Read + std::io::Seek, ++ R: std::io::Read + noodles_bgzf::io::Seek, + { + /// One query for each input file. +- queries: Vec>, ++ queries: Vec>, ++ ++ /// One header for each input file. (Not accessible from Query) ++ headers: Vec
, ++ + /// The current smallest-by-coordinate records. +- records: BTreeMap, ++ records: BTreeMap, + } +  + impl<'r, 'h, R> MultiQuery<'r, 'h, R> + where +- R: std::io::Read + std::io::Seek, ++ R: noodles_bgzf::io::BufRead + noodles_bgzf::io::Seek, + { + /// Construct a new `MultiQuery`. + pub fn new( +- mut record_iters: Vec>, ++ mut record_iters: Vec>, ++ headers: &[Header], + ) -> std::io::Result { + let mut records = BTreeMap::new(); +  +- for (i, iter) in record_iters.iter_mut().enumerate() { ++ for (i, (iter, header)) in record_iters.iter_mut().zip(headers).enumerate() { + if let Some(result) = iter.next() { + let record = result?; ++ let record = RecordBuf::try_from_variant_record(header, &record)?; + let key = build_key(&record, i); + records.insert(key, record); + } +@@ -127,6 +134,7 @@ where +  + Ok(Self { + queries: record_iters, ++ headers: headers.to_vec(), + records, + }) + } +@@ -134,9 +142,9 @@ where +  + impl<'r, 'h, R> Iterator for MultiQuery<'r, 'h, R> + where +- R: std::io::Read + std::io::Seek, ++ R: noodles_bgzf::io::BufRead + noodles_bgzf::io::Seek, + { +- type Item = std::io::Result<(usize, noodles_vcf::Record)>; ++ type Item = std::io::Result<(usize, RecordBuf)>; +  + /// Return next item if any. + fn next(&mut self) -> Option { +@@ -145,6 +153,8 @@ where + if let Some(result) = self.queries[idx].next() { + match result { + Ok(record) => { ++ let record = ++ RecordBuf::try_from_variant_record(&self.headers[idx], &record).ok()?; + let key = build_key(&record, idx); + self.records.insert(key, record); + } +@@ -193,7 +203,7 @@ pub fn guess_assembly( + let mut compatible = 0; + for (name, data) in vcf_header.contigs() { + if let Some(length) = data.length() { +- let idx = contig_map.name_map.get(name.as_ref()); ++ let idx = contig_map.name_map.get(name); + if let Some(idx) = idx { + let name = &info.sequences[*idx].name; + if CANONICAL.contains(&name.as_ref()) { +@@ -260,7 +270,7 @@ mod test { + #[test] + fn guess_assembly_helix_chrmt_ambiguous_ok_initial_none() -> Result<(), anyhow::Error> { + let path = "tests/freqs/grch37/v2.1/reading/helix.chrM.vcf"; +- let mut reader = noodles_vcf::reader::Builder::default().build_from_path(path)?; ++ let mut reader = noodles_vcf::io::reader::Builder::default().build_from_path(path)?; + let header = reader.read_header()?; +  + let actual = guess_assembly(&header, true, None)?; +@@ -272,7 +282,7 @@ mod test { + #[test] + fn guess_assembly_helix_chrmt_ambiguous_ok_initial_override() -> Result<(), anyhow::Error> { + let path = "tests/freqs/grch37/v2.1/reading/helix.chrM.vcf"; +- let mut reader = noodles_vcf::reader::Builder::default().build_from_path(path)?; ++ let mut reader = noodles_vcf::io::reader::Builder::default().build_from_path(path)?; + let header = reader.read_header()?; +  + let actual = guess_assembly(&header, true, Some(Assembly::Grch37p10))?; +@@ -285,7 +295,7 @@ mod test { + fn guess_assembly_helix_chrmt_ambiguous_ok_initial_override_fails() -> Result<(), anyhow::Error> + { + let path = "tests/freqs/grch37/v2.1/reading/helix.chrM.vcf"; +- let mut reader = noodles_vcf::reader::Builder::default().build_from_path(path)?; ++ let mut reader = noodles_vcf::io::reader::Builder::default().build_from_path(path)?; + let header = reader.read_header()?; +  + assert!(guess_assembly(&header, false, Some(Assembly::Grch37)).is_err()); +@@ -296,7 +306,7 @@ mod test { + #[test] + fn guess_assembly_helix_chrmt_ambiguous_fail() -> Result<(), anyhow::Error> { + let path = "tests/freqs/grch37/v2.1/reading/helix.chrM.vcf"; +- let mut reader = noodles_vcf::reader::Builder::default().build_from_path(path)?; ++ let mut reader = noodles_vcf::io::reader::Builder::default().build_from_path(path)?; + let header = reader.read_header()?; +  + assert!(guess_assembly(&header, false, None).is_err()); +@@ -313,9 +323,9 @@ mod test { + #[test] + fn test_multiquery() -> Result<(), anyhow::Error> { + let mut readers = vec![ +- noodles_vcf::indexed_reader::Builder::default() ++ noodles_vcf::io::indexed_reader::Builder::default() + .build_from_path("tests/freqs/grch37/v2.1/reading/gnomad.chrM.vcf.bgz")?, +- noodles_vcf::indexed_reader::Builder::default() ++ noodles_vcf::io::indexed_reader::Builder::default() + .build_from_path("tests/freqs/grch37/v2.1/reading/helix.chrM.vcf.bgz")?, + ]; +  +@@ -334,7 +344,7 @@ mod test { + .map(|(reader, header)| reader.query(header, ®ion)) + .collect::>()?; +  +- let multi_query = MultiQuery::new(queries)?; ++ let multi_query = MultiQuery::new(queries, &headers)?; +  + let mut records = Vec::new(); + for result in multi_query { +diff --git a/src/freqs/cli/import/snapshots/annonars__freqs__cli__import__reading__test__multiquery.snap b/src/freqs/cli/import/snapshots/annonars__freqs__cli__import__reading__test__multiquery.snap +index aa73c88..31f6a1b 100644 +--- a/src/freqs/cli/import/snapshots/annonars__freqs__cli__import__reading__test__multiquery.snap ++++ b/src/freqs/cli/import/snapshots/annonars__freqs__cli__import__reading__test__multiquery.snap +@@ -1,54 +1,41 @@ + --- + source: src/freqs/cli/import/reading.rs ++assertion_line: 354 + expression: records + --- + [ + ( + 0, +- Record { +- chromosome: Name( +- "chrM", +- ), +- position: Position( +- 3, ++ RecordBuf { ++ reference_sequence_name: "chrM", ++ variant_start: Some( ++ Position( ++ 3, ++ ), + ), + ids: Ids( + {}, + ), +- reference_bases: ReferenceBases( +- [ +- T, +- ], +- ), ++ reference_bases: "T", + alternate_bases: AlternateBases( + [ +- Bases( +- [ +- C, +- ], +- ), ++ "C", + ], + ), + quality_score: None, +- filters: Some( +- Pass, ++ filters: Filters( ++ { ++ "PASS", ++ }, + ), + info: Info( + { +- Other( +- Other( +- "variant_collapsed", +- ), +- ): Some( ++ "variant_collapsed": Some( + String( + "T3C", + ), + ), +- Other( +- Other( +- "vep", +- ), +- ): Some( ++ "vep": Some( + Array( + String( + [ +@@ -59,207 +46,117 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "base_qual_hist", +- ), +- ): Some( ++ "base_qual_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "position_hist", +- ), +- ): Some( ++ "position_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "strand_bias_hist", +- ), +- ): Some( ++ "strand_bias_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "weak_evidence_hist", +- ), +- ): Some( ++ "weak_evidence_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "contamination_hist", +- ), +- ): Some( ++ "contamination_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "heteroplasmy_below_min_het_threshold_hist", +- ), +- ): Some( ++ "heteroplasmy_below_min_het_threshold_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "excluded_AC", +- ), +- ): Some( ++ "excluded_AC": Some( + Integer( + 0, + ), + ), +- Standard( +- TotalAlleleCount, +- ): Some( ++ "AN": Some( + Integer( + 56434, + ), + ), +- Other( +- Other( +- "AC_hom", +- ), +- ): Some( ++ "AC_hom": Some( + Integer( + 19, + ), + ), +- Other( +- Other( +- "AC_het", +- ), +- ): Some( ++ "AC_het": Some( + Integer( + 1, + ), + ), +- Other( +- Other( +- "hl_hist", +- ), +- ): Some( ++ "hl_hist": Some( + String( + "0|0|0|0|0|0|0|0|1|19", + ), + ), +- Other( +- Other( +- "dp_mean", +- ), +- ): Some( ++ "dp_mean": Some( + Float( + 2522.87, + ), + ), +- Other( +- Other( +- "mq_mean", +- ), +- ): Some( ++ "mq_mean": Some( + Float( + 60.0, + ), + ), +- Other( +- Other( +- "tlod_mean", +- ), +- ): Some( ++ "tlod_mean": Some( + Float( + 6805.54, + ), + ), +- Other( +- Other( +- "AF_hom", +- ), +- ): Some( ++ "AF_hom": Some( + Float( + 0.000336676, + ), + ), +- Other( +- Other( +- "AF_het", +- ), +- ): Some( ++ "AF_het": Some( + Float( + 1.77198e-5, + ), + ), +- Other( +- Other( +- "max_hl", +- ), +- ): Some( ++ "max_hl": Some( + Float( + 0.997, + ), + ), +- Other( +- Other( +- "hap_AN", +- ), +- ): Some( ++ "hap_AN": Some( + String( + "2680|1537|868|603|34|282|91|14784|701|934|3144|2732|663|2977|4724|5672|126|1|1298|366|7|393|3080|6037|1234|819|546|12|89", + ), + ), +- Other( +- Other( +- "hap_AC_het", +- ), +- ): Some( ++ "hap_AC_het": Some( + String( + "0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "hap_AC_hom", +- ), +- ): Some( ++ "hap_AC_hom": Some( + String( + "0|0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|17|1|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "hap_AF_hom", +- ), +- ): Some( ++ "hap_AF_hom": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|6.76407e-05|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|5.51948e-03|1.65645e-04|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "hap_AF_het", +- ), +- ): Some( ++ "hap_AF_het": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|3.24675e-04|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "hap_hl_hist", +- ), +- ): Some( ++ "hap_hl_hist": Some( + Array( + String( + [ +@@ -354,92 +251,52 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "hap_faf_hom", +- ), +- ): Some( ++ "hap_faf_hom": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|3.51633e-03|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "hapmax_AF_hom", +- ), +- ): Some( ++ "hapmax_AF_hom": Some( + String( + "T", + ), + ), +- Other( +- Other( +- "hapmax_AF_het", +- ), +- ): Some( ++ "hapmax_AF_het": Some( + String( + "T", + ), + ), +- Other( +- Other( +- "faf_hapmax_hom", +- ), +- ): Some( ++ "faf_hapmax_hom": Some( + Float( + 0.00351633, + ), + ), +- Other( +- Other( +- "pop_AN", +- ), +- ): Some( ++ "pop_AN": Some( + String( + "14347|392|5718|1415|1482|4892|25849|826|1493|20", + ), + ), +- Other( +- Other( +- "pop_AC_het", +- ), +- ): Some( ++ "pop_AC_het": Some( + String( + "0|0|0|0|0|0|1|0|0|0", + ), + ), +- Other( +- Other( +- "pop_AC_hom", +- ), +- ): Some( ++ "pop_AC_hom": Some( + String( + "0|0|0|0|0|0|19|0|0|0", + ), + ), +- Other( +- Other( +- "pop_AF_hom", +- ), +- ): Some( ++ "pop_AF_hom": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|7.35038e-04|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "pop_AF_het", +- ), +- ): Some( ++ "pop_AF_het": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|3.86862e-05|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "pop_hl_hist", +- ), +- ): Some( ++ "pop_hl_hist": Some( + Array( + String( + [ +@@ -477,99 +334,59 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "age_hist_hom_bin_freq", +- ), +- ): Some( ++ "age_hist_hom_bin_freq": Some( + String( + "0|1|0|2|1|0|0|2|0|0", + ), + ), +- Other( +- Other( +- "age_hist_hom_n_smaller", +- ), +- ): Some( ++ "age_hist_hom_n_smaller": Some( + Integer( + 3, + ), + ), +- Other( +- Other( +- "age_hist_hom_n_larger", +- ), +- ): Some( ++ "age_hist_hom_n_larger": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "age_hist_het_bin_freq", +- ), +- ): Some( ++ "age_hist_het_bin_freq": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "age_hist_het_n_smaller", +- ), +- ): Some( ++ "age_hist_het_n_smaller": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "age_hist_het_n_larger", +- ), +- ): Some( ++ "age_hist_het_n_larger": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "dp_hist_all_n_larger", +- ), +- ): Some( ++ "dp_hist_all_n_larger": Some( + Integer( + 35782, + ), + ), +- Other( +- Other( +- "dp_hist_alt_n_larger", +- ), +- ): Some( ++ "dp_hist_alt_n_larger": Some( + Integer( + 11, + ), + ), +- Other( +- Other( +- "dp_hist_all_bin_freq", +- ), +- ): Some( ++ "dp_hist_all_bin_freq": Some( + String( + "0|1|217|1234|2334|2600|2967|3324|3791|4184", + ), + ), +- Other( +- Other( +- "dp_hist_alt_bin_freq", +- ), +- ): Some( ++ "dp_hist_alt_bin_freq": Some( + String( + "0|0|0|0|2|0|1|4|2|0", + ), + ), + }, + ), +- genotypes: Genotypes { ++ samples: Samples { + keys: Keys( + {}, + ), +@@ -579,64 +396,48 @@ expression: records + ), + ( + 1, +- Record { +- chromosome: Name( +- "chrM", +- ), +- position: Position( +- 5, ++ RecordBuf { ++ reference_sequence_name: "chrM", ++ variant_start: Some( ++ Position( ++ 5, ++ ), + ), + ids: Ids( + {}, + ), +- reference_bases: ReferenceBases( +- [ +- A, +- ], +- ), ++ reference_bases: "A", + alternate_bases: AlternateBases( + [ +- Bases( +- [ +- C, +- ], +- ), ++ "C", + ], + ), + quality_score: None, +- filters: Some( +- Pass, ++ filters: Filters( ++ { ++ "PASS", ++ }, + ), + info: Info( + { +- Standard( +- TotalAlleleCount, +- ): Some( ++ "AN": Some( + Integer( + 196554, + ), + ), +- Other( +- Other( +- "AC_hom", +- ), +- ): Some( ++ "AC_hom": Some( + Integer( + 1, + ), + ), +- Other( +- Other( +- "AC_het", +- ), +- ): Some( ++ "AC_het": Some( + Integer( + 0, + ), + ), + }, + ), +- genotypes: Genotypes { ++ samples: Samples { + keys: Keys( + {}, + ), +@@ -646,50 +447,31 @@ expression: records + ), + ( + 0, +- Record { +- chromosome: Name( +- "chrM", +- ), +- position: Position( +- 6, ++ RecordBuf { ++ reference_sequence_name: "chrM", ++ variant_start: Some( ++ Position( ++ 6, ++ ), + ), + ids: Ids( + {}, + ), +- reference_bases: ReferenceBases( +- [ +- C, +- ], +- ), ++ reference_bases: "C", + alternate_bases: AlternateBases( + [ +- Bases( +- [ +- C, +- C, +- T, +- C, +- A, +- A, +- ], +- ), ++ "CCTCAA", + ], + ), + quality_score: None, +- filters: Some( +- Fail( +- { +- "npg", +- }, +- ), ++ filters: Filters( ++ { ++ "npg", ++ }, + ), + info: Info( + { +- Other( +- Other( +- "filters", +- ), +- ): Some( ++ "filters": Some( + Array( + String( + [ +@@ -700,20 +482,12 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "variant_collapsed", +- ), +- ): Some( ++ "variant_collapsed": Some( + String( + "C6CCTCAA", + ), + ), +- Other( +- Other( +- "vep", +- ), +- ): Some( ++ "vep": Some( + Array( + String( + [ +@@ -724,207 +498,117 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "base_qual_hist", +- ), +- ): Some( ++ "base_qual_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "position_hist", +- ), +- ): Some( ++ "position_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "strand_bias_hist", +- ), +- ): Some( ++ "strand_bias_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "weak_evidence_hist", +- ), +- ): Some( ++ "weak_evidence_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "contamination_hist", +- ), +- ): Some( ++ "contamination_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "heteroplasmy_below_min_het_threshold_hist", +- ), +- ): Some( ++ "heteroplasmy_below_min_het_threshold_hist": Some( + String( + "1|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "excluded_AC", +- ), +- ): Some( ++ "excluded_AC": Some( + Integer( + 1, + ), + ), +- Standard( +- TotalAlleleCount, +- ): Some( ++ "AN": Some( + Integer( + 56433, + ), + ), +- Other( +- Other( +- "AC_hom", +- ), +- ): Some( ++ "AC_hom": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "AC_het", +- ), +- ): Some( ++ "AC_het": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "hl_hist", +- ), +- ): Some( ++ "hl_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "dp_mean", +- ), +- ): Some( ++ "dp_mean": Some( + Float( + 2527.78, + ), + ), +- Other( +- Other( +- "mq_mean", +- ), +- ): Some( ++ "mq_mean": Some( + Float( + 60.0, + ), + ), +- Other( +- Other( +- "tlod_mean", +- ), +- ): Some( ++ "tlod_mean": Some( + Float( + 0.537, + ), + ), +- Other( +- Other( +- "AF_hom", +- ), +- ): Some( ++ "AF_hom": Some( + Float( + 0.0, + ), + ), +- Other( +- Other( +- "AF_het", +- ), +- ): Some( ++ "AF_het": Some( + Float( + 0.0, + ), + ), +- Other( +- Other( +- "max_hl", +- ), +- ): Some( ++ "max_hl": Some( + Float( + 0.0, + ), + ), +- Other( +- Other( +- "hap_AN", +- ), +- ): Some( ++ "hap_AN": Some( + String( + "2679|1537|868|603|34|282|91|14784|701|934|3144|2732|663|2977|4724|5672|126|1|1298|366|7|393|3080|6037|1234|819|546|12|89", + ), + ), +- Other( +- Other( +- "hap_AC_het", +- ), +- ): Some( ++ "hap_AC_het": Some( + String( + "0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "hap_AC_hom", +- ), +- ): Some( ++ "hap_AC_hom": Some( + String( + "0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "hap_AF_hom", +- ), +- ): Some( ++ "hap_AF_hom": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "hap_AF_het", +- ), +- ): Some( ++ "hap_AF_het": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "hap_hl_hist", +- ), +- ): Some( ++ "hap_hl_hist": Some( + Array( + String( + [ +@@ -1019,74 +703,42 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "hap_faf_hom", +- ), +- ): Some( ++ "hap_faf_hom": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "faf_hapmax_hom", +- ), +- ): Some( ++ "faf_hapmax_hom": Some( + Float( + 0.0, + ), + ), +- Other( +- Other( +- "pop_AN", +- ), +- ): Some( ++ "pop_AN": Some( + String( + "14347|392|5717|1415|1482|4892|25849|826|1493|20", + ), + ), +- Other( +- Other( +- "pop_AC_het", +- ), +- ): Some( ++ "pop_AC_het": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "pop_AC_hom", +- ), +- ): Some( ++ "pop_AC_hom": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "pop_AF_hom", +- ), +- ): Some( ++ "pop_AF_hom": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "pop_AF_het", +- ), +- ): Some( ++ "pop_AF_het": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "pop_hl_hist", +- ), +- ): Some( ++ "pop_hl_hist": Some( + Array( + String( + [ +@@ -1124,99 +776,59 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "age_hist_hom_bin_freq", +- ), +- ): Some( ++ "age_hist_hom_bin_freq": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "age_hist_hom_n_smaller", +- ), +- ): Some( ++ "age_hist_hom_n_smaller": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "age_hist_hom_n_larger", +- ), +- ): Some( ++ "age_hist_hom_n_larger": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "age_hist_het_bin_freq", +- ), +- ): Some( ++ "age_hist_het_bin_freq": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "age_hist_het_n_smaller", +- ), +- ): Some( ++ "age_hist_het_n_smaller": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "age_hist_het_n_larger", +- ), +- ): Some( ++ "age_hist_het_n_larger": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "dp_hist_all_n_larger", +- ), +- ): Some( ++ "dp_hist_all_n_larger": Some( + Integer( + 35855, + ), + ), +- Other( +- Other( +- "dp_hist_alt_n_larger", +- ), +- ): Some( ++ "dp_hist_alt_n_larger": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "dp_hist_all_bin_freq", +- ), +- ): Some( ++ "dp_hist_all_bin_freq": Some( + String( + "0|0|216|1236|2310|2568|2964|3302|3802|4180", + ), + ), +- Other( +- Other( +- "dp_hist_alt_bin_freq", +- ), +- ): Some( ++ "dp_hist_alt_bin_freq": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), + }, + ), +- genotypes: Genotypes { ++ samples: Samples { + keys: Keys( + {}, + ), +@@ -1226,45 +838,31 @@ expression: records + ), + ( + 0, +- Record { +- chromosome: Name( +- "chrM", +- ), +- position: Position( +- 10, ++ RecordBuf { ++ reference_sequence_name: "chrM", ++ variant_start: Some( ++ Position( ++ 10, ++ ), + ), + ids: Ids( + {}, + ), +- reference_bases: ReferenceBases( +- [ +- T, +- ], +- ), ++ reference_bases: "T", + alternate_bases: AlternateBases( + [ +- Bases( +- [ +- C, +- ], +- ), ++ "C", + ], + ), + quality_score: None, +- filters: Some( +- Fail( +- { +- "npg", +- }, +- ), ++ filters: Filters( ++ { ++ "npg", ++ }, + ), + info: Info( + { +- Other( +- Other( +- "filters", +- ), +- ): Some( ++ "filters": Some( + Array( + String( + [ +@@ -1275,20 +873,12 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "variant_collapsed", +- ), +- ): Some( ++ "variant_collapsed": Some( + String( + "A7G", + ), + ), +- Other( +- Other( +- "vep", +- ), +- ): Some( ++ "vep": Some( + Array( + String( + [ +@@ -1299,199 +889,109 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "base_qual_hist", +- ), +- ): Some( ++ "base_qual_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "position_hist", +- ), +- ): Some( ++ "position_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "strand_bias_hist", +- ), +- ): Some( ++ "strand_bias_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "weak_evidence_hist", +- ), +- ): Some( ++ "weak_evidence_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "contamination_hist", +- ), +- ): Some( ++ "contamination_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "heteroplasmy_below_min_het_threshold_hist", +- ), +- ): Some( ++ "heteroplasmy_below_min_het_threshold_hist": Some( + String( + "1|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "excluded_AC", +- ), +- ): Some( ++ "excluded_AC": Some( + Integer( + 1, + ), + ), +- Standard( +- TotalAlleleCount, +- ): Some( ++ "AN": Some( + Integer( + 56433, + ), + ), +- Other( +- Other( +- "AC_hom", +- ), +- ): Some( ++ "AC_hom": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "AC_het", +- ), +- ): Some( ++ "AC_het": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "hl_hist", +- ), +- ): Some( ++ "hl_hist": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "dp_mean", +- ), +- ): Some( ++ "dp_mean": Some( + Float( + 2555.14, + ), + ), +- Other( +- Other( +- "mq_mean", +- ), +- ): None, +- Other( +- Other( +- "tlod_mean", +- ), +- ): None, +- Other( +- Other( +- "AF_hom", +- ), +- ): Some( ++ "mq_mean": None, ++ "tlod_mean": None, ++ "AF_hom": Some( + Float( + 0.0, + ), + ), +- Other( +- Other( +- "AF_het", +- ), +- ): Some( ++ "AF_het": Some( + Float( + 0.0, + ), + ), +- Other( +- Other( +- "max_hl", +- ), +- ): Some( ++ "max_hl": Some( + Float( + 0.0, + ), + ), +- Other( +- Other( +- "hap_AN", +- ), +- ): Some( ++ "hap_AN": Some( + String( + "2679|1537|868|603|34|282|91|14784|701|934|3144|2732|663|2977|4724|5672|126|1|1298|366|7|393|3080|6037|1234|819|546|12|89", + ), + ), +- Other( +- Other( +- "hap_AC_het", +- ), +- ): Some( ++ "hap_AC_het": Some( + String( + "0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "hap_AC_hom", +- ), +- ): Some( ++ "hap_AC_hom": Some( + String( + "0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "hap_AF_hom", +- ), +- ): Some( ++ "hap_AF_hom": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "hap_AF_het", +- ), +- ): Some( ++ "hap_AF_het": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "hap_hl_hist", +- ), +- ): Some( ++ "hap_hl_hist": Some( + Array( + String( + [ +@@ -1586,74 +1086,42 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "hap_faf_hom", +- ), +- ): Some( ++ "hap_faf_hom": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "faf_hapmax_hom", +- ), +- ): Some( ++ "faf_hapmax_hom": Some( + Float( + 0.0, + ), + ), +- Other( +- Other( +- "pop_AN", +- ), +- ): Some( ++ "pop_AN": Some( + String( + "14347|392|5717|1415|1482|4892|25849|826|1493|20", + ), + ), +- Other( +- Other( +- "pop_AC_het", +- ), +- ): Some( ++ "pop_AC_het": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "pop_AC_hom", +- ), +- ): Some( ++ "pop_AC_hom": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "pop_AF_hom", +- ), +- ): Some( ++ "pop_AF_hom": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "pop_AF_het", +- ), +- ): Some( ++ "pop_AF_het": Some( + String( + "0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00|0.00000e+00", + ), + ), +- Other( +- Other( +- "pop_hl_hist", +- ), +- ): Some( ++ "pop_hl_hist": Some( + Array( + String( + [ +@@ -1691,99 +1159,59 @@ expression: records + ), + ), + ), +- Other( +- Other( +- "age_hist_hom_bin_freq", +- ), +- ): Some( ++ "age_hist_hom_bin_freq": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "age_hist_hom_n_smaller", +- ), +- ): Some( ++ "age_hist_hom_n_smaller": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "age_hist_hom_n_larger", +- ), +- ): Some( ++ "age_hist_hom_n_larger": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "age_hist_het_bin_freq", +- ), +- ): Some( ++ "age_hist_het_bin_freq": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), +- Other( +- Other( +- "age_hist_het_n_smaller", +- ), +- ): Some( ++ "age_hist_het_n_smaller": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "age_hist_het_n_larger", +- ), +- ): Some( ++ "age_hist_het_n_larger": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "dp_hist_all_n_larger", +- ), +- ): Some( ++ "dp_hist_all_n_larger": Some( + Integer( + 36388, + ), + ), +- Other( +- Other( +- "dp_hist_alt_n_larger", +- ), +- ): Some( ++ "dp_hist_alt_n_larger": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "dp_hist_all_bin_freq", +- ), +- ): Some( ++ "dp_hist_all_bin_freq": Some( + String( + "0|0|194|1156|2241|2524|2903|3254|3706|4067", + ), + ), +- Other( +- Other( +- "dp_hist_alt_bin_freq", +- ), +- ): Some( ++ "dp_hist_alt_bin_freq": Some( + String( + "0|0|0|0|0|0|0|0|0|0", + ), + ), + }, + ), +- genotypes: Genotypes { ++ samples: Samples { + keys: Keys( + {}, + ), +@@ -1793,64 +1221,48 @@ expression: records + ), + ( + 1, +- Record { +- chromosome: Name( +- "chrM", +- ), +- position: Position( +- 10, ++ RecordBuf { ++ reference_sequence_name: "chrM", ++ variant_start: Some( ++ Position( ++ 10, ++ ), + ), + ids: Ids( + {}, + ), +- reference_bases: ReferenceBases( +- [ +- T, +- ], +- ), ++ reference_bases: "T", + alternate_bases: AlternateBases( + [ +- Bases( +- [ +- C, +- ], +- ), ++ "C", + ], + ), + quality_score: None, +- filters: Some( +- Pass, ++ filters: Filters( ++ { ++ "PASS", ++ }, + ), + info: Info( + { +- Standard( +- TotalAlleleCount, +- ): Some( ++ "AN": Some( + Integer( + 196554, + ), + ), +- Other( +- Other( +- "AC_hom", +- ), +- ): Some( ++ "AC_hom": Some( + Integer( + 7, + ), + ), +- Other( +- Other( +- "AC_het", +- ), +- ): Some( ++ "AC_het": Some( + Integer( + 1, + ), + ), + }, + ), +- genotypes: Genotypes { ++ samples: Samples { + keys: Keys( + {}, + ), +@@ -1860,64 +1272,48 @@ expression: records + ), + ( + 1, +- Record { +- chromosome: Name( +- "chrM", +- ), +- position: Position( +- 11, ++ RecordBuf { ++ reference_sequence_name: "chrM", ++ variant_start: Some( ++ Position( ++ 11, ++ ), + ), + ids: Ids( + {}, + ), +- reference_bases: ReferenceBases( +- [ +- C, +- ], +- ), ++ reference_bases: "C", + alternate_bases: AlternateBases( + [ +- Bases( +- [ +- T, +- ], +- ), ++ "T", + ], + ), + quality_score: None, +- filters: Some( +- Pass, ++ filters: Filters( ++ { ++ "PASS", ++ }, + ), + info: Info( + { +- Standard( +- TotalAlleleCount, +- ): Some( ++ "AN": Some( + Integer( + 196554, + ), + ), +- Other( +- Other( +- "AC_hom", +- ), +- ): Some( ++ "AC_hom": Some( + Integer( + 0, + ), + ), +- Other( +- Other( +- "AC_het", +- ), +- ): Some( ++ "AC_het": Some( + Integer( + 1, + ), + ), + }, + ), +- genotypes: Genotypes { ++ samples: Samples { + keys: Keys( + {}, + ), +diff --git a/src/freqs/cli/import/xy.rs b/src/freqs/cli/import/xy.rs +index 04b8623..6c71338 100644 +--- a/src/freqs/cli/import/xy.rs ++++ b/src/freqs/cli/import/xy.rs +@@ -7,8 +7,8 @@ fn write_record( + db: &rocksdb::DBWithThreadMode, + cf: &std::sync::Arc, + record_key: &common::keys::Var, +- record_genome: &mut Option, +- record_exome: &mut Option, ++ record_genome: &mut Option, ++ record_exome: &mut Option, + ) -> Result<(), anyhow::Error> { + if record_genome.is_none() && record_exome.is_none() { + // Early exit, nothing to write out. +@@ -56,11 +56,14 @@ pub fn import_region( + let mut readers = Vec::new(); + if let Some(path_genome) = path_genome { + is_genome.push(true); +- readers.push(noodles_vcf::indexed_reader::Builder::default().build_from_path(path_genome)?); ++ readers.push( ++ noodles_vcf::io::indexed_reader::Builder::default().build_from_path(path_genome)?, ++ ); + } + if let Some(path_exome) = path_exome { + is_genome.push(false); +- readers.push(noodles_vcf::indexed_reader::Builder::default().build_from_path(path_exome)?); ++ readers ++ .push(noodles_vcf::io::indexed_reader::Builder::default().build_from_path(path_exome)?); + } + // Read headers. + let headers: Vec<_> = readers +@@ -75,7 +78,7 @@ pub fn import_region( + .map(|(reader, header)| reader.query(header, region)) + .collect::>()?; + // Construct the `MultiQuery`. +- let multi_query = super::reading::MultiQuery::new(queries)?; ++ let multi_query = super::reading::MultiQuery::new(queries, &headers)?; +  + // Now iterate over the `MultiQuery` and write to the database. + // +diff --git a/src/freqs/serialized/auto.rs b/src/freqs/serialized/auto.rs +index e403e93..c459d84 100644 +--- a/src/freqs/serialized/auto.rs ++++ b/src/freqs/serialized/auto.rs +@@ -1,6 +1,7 @@ + //! Autosomal counts. +  + use byteorder::{ByteOrder, LittleEndian}; ++use noodles_vcf::variant::record::AlternateBases; +  + use crate::common::noodles; +  +@@ -17,7 +18,7 @@ pub struct Counts { +  + impl Counts { + /// Create from the given VCF record. +- pub fn from_vcf_allele(value: &noodles_vcf::Record, _allele_no: usize) -> Self { ++ pub fn from_vcf_allele(value: &noodles_vcf::variant::RecordBuf, _allele_no: usize) -> Self { + tracing::trace!("@ {:?}", &value); + assert_eq!( + value.alternate_bases().len(), +diff --git a/src/freqs/serialized/mt.rs b/src/freqs/serialized/mt.rs +index e4ba61f..de33fc8 100644 +--- a/src/freqs/serialized/mt.rs ++++ b/src/freqs/serialized/mt.rs +@@ -1,6 +1,7 @@ + //! Mitochondrial counts. +  + use byteorder::{ByteOrder, LittleEndian}; ++use noodles_vcf::variant::record::AlternateBases; +  + use crate::common::noodles; + // use noodles_vcf::{ +@@ -22,7 +23,7 @@ pub struct Counts { +  + impl Counts { + /// Create from the given VCF record. +- pub fn from_vcf_allele(value: &noodles_vcf::Record, _allele_no: usize) -> Self { ++ pub fn from_vcf_allele(value: &noodles_vcf::variant::RecordBuf, _allele_no: usize) -> Self { + assert_eq!( + value.alternate_bases().len(), + 1, +diff --git a/src/freqs/serialized/xy.rs b/src/freqs/serialized/xy.rs +index dabbed8..1730107 100644 +--- a/src/freqs/serialized/xy.rs ++++ b/src/freqs/serialized/xy.rs +@@ -1,6 +1,7 @@ + //! gonosomal counts. +  + use byteorder::{ByteOrder, LittleEndian}; ++use noodles_vcf::variant::record::AlternateBases; +  + use crate::common::noodles; +  +@@ -19,7 +20,7 @@ pub struct Counts { +  + impl Counts { + /// Create from the given VCF record. +- pub fn from_vcf_allele(value: &noodles_vcf::Record, _allele_no: usize) -> Self { ++ pub fn from_vcf_allele(value: &noodles_vcf::variant::RecordBuf, _allele_no: usize) -> Self { + assert_eq!( + value.alternate_bases().len(), + 1, +diff --git a/src/gnomad_mtdna/cli/import.rs b/src/gnomad_mtdna/cli/import.rs +index 1bbe02f..fb69e4f 100644 +--- a/src/gnomad_mtdna/cli/import.rs ++++ b/src/gnomad_mtdna/cli/import.rs +@@ -5,6 +5,8 @@ use std::sync::Arc; + use clap::Parser; + use indicatif::ParallelProgressIterator as _; + use noodles_csi::BinningIndex as _; ++use noodles_vcf::variant::record::AlternateBases; ++use noodles_vcf::variant::RecordBuf; + use prost::Message as _; + use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; +  +@@ -109,7 +111,7 @@ fn process_window( + ) -> Result<(), anyhow::Error> { + let cf_gnomad = db.cf_handle(&args.cf_name).unwrap(); + let mut reader = +- noodles_vcf::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?; ++ noodles_vcf::io::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?; + let header = reader.read_header()?; +  + let raw_region = format!("{}:{}-{}", chrom, begin + 1, end); +@@ -136,6 +138,7 @@ fn process_window( + if let Some(query) = query { + for result in query { + let vcf_record = result?; ++ let vcf_record = RecordBuf::try_from_variant_record(&header, &vcf_record)?; +  + // Process each alternate allele into one record. + let details_options = serde_json::from_str( +diff --git a/src/gnomad_nuclear/cli/import.rs b/src/gnomad_nuclear/cli/import.rs +index d6d54be..2b301ed 100644 +--- a/src/gnomad_nuclear/cli/import.rs ++++ b/src/gnomad_nuclear/cli/import.rs +@@ -6,6 +6,8 @@ use clap::Parser; + use indicatif::ParallelProgressIterator; + use noodles_csi::BinningIndex as _; + use noodles_vcf::header::record; ++use noodles_vcf::variant::record::AlternateBases; ++use noodles_vcf::variant::RecordBuf; + use prost::Message; + use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; +  +@@ -174,7 +176,7 @@ fn process_window( + ) -> Result<(), anyhow::Error> { + let cf_gnomad = db.cf_handle(&args.cf_name).unwrap(); + let mut reader = +- noodles_vcf::indexed_reader::Builder::default().build_from_path(path_in_vcf)?; ++ noodles_vcf::io::indexed_reader::Builder::default().build_from_path(path_in_vcf)?; + let header = reader.read_header()?; +  + let raw_region = format!("{}:{}-{}", chrom, begin + 1, end); +@@ -201,6 +203,7 @@ fn process_window( + if let Some(query) = query { + for result in query { + let vcf_record = result?; ++ let vcf_record = RecordBuf::try_from_variant_record(&header, &vcf_record)?; +  + // Process each alternate allele into one record. + for allele_no in 0..vcf_record.alternate_bases().len() { +@@ -328,7 +331,7 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> + tracing::info!("Opening gnomAD-nuclear VCF file..."); + let before_loading = std::time::Instant::now(); + let mut reader_vcf = +- noodles_vcf::reader::Builder::default().build_from_path(&args.path_in_vcf[0])?; ++ noodles_vcf::io::reader::Builder::default().build_from_path(&args.path_in_vcf[0])?; + let header = reader_vcf.read_header()?; +  + let vep_version = if let Some(record::value::Collection::Unstructured(values)) = header +diff --git a/src/gnomad_sv/cli/import/gnomad_cnv4.rs b/src/gnomad_sv/cli/import/gnomad_cnv4.rs +index 8e77ab3..3b73833 100644 +--- a/src/gnomad_sv/cli/import/gnomad_cnv4.rs ++++ b/src/gnomad_sv/cli/import/gnomad_cnv4.rs +@@ -1,6 +1,8 @@ + //! gnomAD CNV v4 import. +  +-use std::{str::FromStr, sync::Arc}; ++use itertools::Itertools; ++use noodles_vcf::variant::record::Ids; ++use std::{fmt, str::FromStr, sync::Arc}; +  + use crate::{ + common::noodles::{get_f32, get_i32, get_string, get_vec_str}, +@@ -43,17 +45,17 @@ impl FromStr for Population { + } + } +  +-impl ToString for Population { +- fn to_string(&self) -> String { ++impl fmt::Display for Population { ++ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { +- Population::Afr => "AFR".to_string(), +- Population::Amr => "AMR".to_string(), +- Population::Asj => "ASJ".to_string(), +- Population::Eas => "EAS".to_string(), +- Population::Fin => "FIN".to_string(), +- Population::Mid => "MID".to_string(), +- Population::Nfe => "NFE".to_string(), +- Population::Sas => "SAS".to_string(), ++ Population::Afr => write!(f, "AFR"), ++ Population::Amr => write!(f, "AMR"), ++ Population::Asj => write!(f, "ASJ"), ++ Population::Eas => write!(f, "EAS"), ++ Population::Fin => write!(f, "FIN"), ++ Population::Mid => write!(f, "MID"), ++ Population::Nfe => write!(f, "NFE"), ++ Population::Sas => write!(f, "SAS"), + _ => unreachable!("unknown population: {:?}", self), + } + } +@@ -75,11 +77,14 @@ impl Record { + /// + /// * Any error encountered during the creation. + pub fn from_vcf_record( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + cohort_name: &str, + ) -> Result { +- let chrom = record.chromosome().to_string(); +- let start: usize = record.position().into(); ++ let chrom = record.reference_sequence_name().to_string(); ++ let start: usize = record ++ .variant_start() ++ .expect("Telomeric breakends not supported") ++ .get(); + let stop = get_i32(record, "END").expect("no END?"); + let inner_start = get_i32(record, "POSMAX").expect("no POSMAX?"); + let outer_start = get_i32(record, "POSMIN").expect("no POSMIN?"); +@@ -147,7 +152,7 @@ impl Record { +  + /// Extract allele counts from VCF record. + fn carrier_counts_by_sex_from_vcf_record( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + population: Option, + ) -> Result { + let pop_prefix = population +@@ -163,7 +168,7 @@ impl Record { +  + /// Extract allele counts for a given population from VCF record. + fn extract_carrier_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + prefix: &str, + ) -> Result { + let sc = get_f32(record, &format!("{}SC", prefix)).unwrap_or_default() as i32; +@@ -217,12 +222,13 @@ pub fn import( + }; + tracing::info!("importing gnomAD-CNV v4 {} cohort", cohort_name); +  +- let mut reader = noodles_vcf::reader::Builder::default().build_from_path(path_in_vcf)?; ++ let mut reader = noodles_vcf::io::reader::Builder::default().build_from_path(path_in_vcf)?; + let header = reader.read_header()?; +  +- for result in reader.records(&header) { ++ for result in reader.record_bufs(&header) { + let vcf_record = result?; +- let key = format!("{}", vcf_record.ids()).into_bytes(); ++ // TODO make sure this doesn't change anything ++ let key = vcf_record.ids().as_ref().iter().join(",").into_bytes(); +  + // Build record for VCF record. + let record = Record::from_vcf_record(&vcf_record, cohort_name) +diff --git a/src/gnomad_sv/cli/import/gnomad_sv2.rs b/src/gnomad_sv/cli/import/gnomad_sv2.rs +index b5aadfa..5398fb3 100644 +--- a/src/gnomad_sv/cli/import/gnomad_sv2.rs ++++ b/src/gnomad_sv/cli/import/gnomad_sv2.rs +@@ -3,7 +3,11 @@ + //! Note that gnomAD v2 did not have distinction between different cohorts within + //! one file. Rather, there is one file for each cohort (all, controls, non-neuro). +  +-use std::{str::FromStr, sync::Arc}; ++use std::{fmt, str::FromStr, sync::Arc}; ++ ++use itertools::Itertools; ++use noodles_vcf::variant::record::Ids; ++use prost::Message; +  + use crate::{ + common::noodles::{get_f32, get_i32, get_string}, +@@ -13,8 +17,6 @@ use crate::{ + }, + }; +  +-use prost::Message; +- + impl FromStr for Filter { + type Err = anyhow::Error; +  +@@ -46,17 +48,16 @@ impl FromStr for Population { + } + } +  +-impl ToString for Population { +- fn to_string(&self) -> String { ++impl fmt::Display for Population { ++ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { +- Population::Afr => "AFR", +- Population::Amr => "AMR", +- Population::Eas => "EAS", +- Population::Eur => "EUR", +- Population::Other => "OTH", ++ Population::Afr => write!(f, "AFR"), ++ Population::Amr => write!(f, "AMR"), ++ Population::Eas => write!(f, "EAS"), ++ Population::Eur => write!(f, "EUR"), ++ Population::Other => write!(f, "OTH"), + _ => unreachable!("unknown population: {:?}", self), + } +- .to_string() + } + } +  +@@ -125,12 +126,15 @@ impl Record { + /// + /// * Any error encountered during the creation. + pub fn from_vcf_record( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + cohort_name: &str, + ) -> Result { +- let chrom = record.chromosome().to_string(); +- let pos: usize = record.position().into(); +- let pos = pos as i32; ++ let chrom = record.reference_sequence_name().to_string(); ++ let pos: usize = record ++ .variant_start() ++ .expect("Telomeric breakends not supported") ++ .get(); ++ let pos = i32::try_from(pos)?; + let end = get_i32(record, "END").ok(); + let chrom2 = get_string(record, "CHROM2").ok(); + let end2 = get_i32(record, "END2").ok(); +@@ -140,25 +144,20 @@ impl Record { + .next() + .map(|s| s.to_string()) + .ok_or_else(|| anyhow::anyhow!("no ID found in VCF record"))?; +- let filters = record +- .filters() +- .map(|f| -> Result<_, anyhow::Error> { +- use noodles_vcf::record::Filters::*; +- Ok(match f { +- Pass => vec![Filter::Pass as i32], +- Fail(f) => { +- let mut result = f +- .iter() +- .map(|s| s.parse::().map(|f| f as i32)) +- .collect::, _>>() +- .map_err(|e| anyhow::anyhow!("problem parsing FILTER: {}", e))?; +- result.sort(); +- result +- } +- }) +- }) +- .transpose()? +- .unwrap_or_else(|| vec![Filter::Pass as i32]); ++ let filters = if record.filters().is_pass() { ++ vec![Filter::Pass as i32] ++ } else { ++ let mut result = record ++ .filters() ++ .as_ref() ++ .iter() ++ .map(|s| s.parse::().map(|f| f as i32)) ++ .collect::, _>>() ++ .map_err(|e| anyhow::anyhow!("problem parsing FILTER: {}", e))?; ++ result.sort(); ++ result ++ }; ++ + let sv_type = get_string(record, "SVTYPE")? + .parse::() + .map(|x| x as i32)?; +@@ -184,7 +183,7 @@ impl Record { +  + /// Extract allele counts from VCF record. + fn allele_counts_from_vcf_record( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + cohort_name: &str, + ) -> Result { + let cohort = if cohort_name == "all" { +@@ -224,7 +223,7 @@ impl Record { +  + /// Extract poulation allele counts. + fn extract_population_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + population: Population, + ) -> Result { + let pop_str = population.to_string(); +@@ -246,7 +245,7 @@ impl Record { +  + /// Extract allele counts for a given population from VCF record. + fn extract_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + prefix: &str, + population: &str, + ) -> Result { +@@ -327,12 +326,12 @@ pub fn import( + }; + tracing::info!("importing gnomAD-SV v2 {} cohort", cohort_name); +  +- let mut reader = noodles_vcf::reader::Builder::default().build_from_path(path_in_vcf)?; ++ let mut reader = noodles_vcf::io::reader::Builder::default().build_from_path(path_in_vcf)?; + let header = reader.read_header()?; +  +- for result in reader.records(&header) { ++ for result in reader.record_bufs(&header) { + let vcf_record = result?; +- let key = format!("{}", vcf_record.ids()).into_bytes(); ++ let key = vcf_record.ids().as_ref().iter().join(",").into_bytes(); +  + // Build record for VCF record. + let record = Record::from_vcf_record(&vcf_record, cohort_name) +diff --git a/src/gnomad_sv/cli/import/gnomad_sv4.rs b/src/gnomad_sv/cli/import/gnomad_sv4.rs +index 87f0022..066bb1e 100644 +--- a/src/gnomad_sv/cli/import/gnomad_sv4.rs ++++ b/src/gnomad_sv/cli/import/gnomad_sv4.rs +@@ -3,7 +3,12 @@ + //! Note that gnomAD v2 did not have distinction between different cohorts within + //! one file. Rather, there is one file for each cohort (all, controls, non-neuro). +  +-use std::{str::FromStr, sync::Arc}; ++use std::{fmt, str::FromStr, sync::Arc}; ++ ++use indicatif::ParallelProgressIterator as _; ++use noodles_vcf::variant::record::Ids; ++use prost::Message as _; ++use rayon::iter::{IntoParallelRefIterator as _, ParallelIterator as _}; +  + use crate::{ + common::{ +@@ -17,10 +22,6 @@ use crate::{ + }, + }; +  +-use indicatif::ParallelProgressIterator as _; +-use prost::Message as _; +-use rayon::iter::{IntoParallelRefIterator as _, ParallelIterator as _}; +- + impl FromStr for Filter { + type Err = anyhow::Error; +  +@@ -62,22 +63,21 @@ impl FromStr for Population { + } + } +  +-impl ToString for Population { +- fn to_string(&self) -> String { ++impl fmt::Display for Population { ++ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { +- Population::Afr => "AFR", +- Population::Ami => "AMI", +- Population::Amr => "AMR", +- Population::Asj => "ASJ", +- Population::Eas => "EAS", +- Population::Fin => "FIN", +- Population::Mid => "MID", +- Population::Nfe => "NFE", +- Population::Sas => "SAS", +- Population::Other => "OTH", ++ Population::Afr => write!(f, "AFR"), ++ Population::Ami => write!(f, "AMI"), ++ Population::Amr => write!(f, "AMR"), ++ Population::Asj => write!(f, "ASJ"), ++ Population::Eas => write!(f, "EAS"), ++ Population::Fin => write!(f, "FIN"), ++ Population::Mid => write!(f, "MID"), ++ Population::Nfe => write!(f, "NFE"), ++ Population::Sas => write!(f, "SAS"), ++ Population::Other => write!(f, "OTH"), + _ => unreachable!("unknown population: {:?}", self), + } +- .to_string() + } + } +  +@@ -120,10 +120,15 @@ impl Record { + /// # Errors + /// + /// * Any error encountered during the creation. +- pub fn from_vcf_record(record: &noodles_vcf::Record) -> Result { +- let chrom = record.chromosome().to_string(); +- let pos: usize = record.position().into(); +- let pos = pos as i32; ++ pub fn from_vcf_record( ++ record: &noodles_vcf::variant::RecordBuf, ++ ) -> Result { ++ let chrom = record.reference_sequence_name().to_string(); ++ let pos: usize = record ++ .variant_start() ++ .expect("Telomeric breakends not supported") ++ .get(); ++ let pos = i32::try_from(pos)?; + let end = get_i32(record, "END").ok(); + let chrom2 = get_string(record, "CHROM2").ok(); + let end2 = get_i32(record, "END2").ok(); +@@ -133,25 +138,20 @@ impl Record { + .next() + .map(|s| s.to_string()) + .ok_or_else(|| anyhow::anyhow!("no ID found in VCF record"))?; +- let filters = record +- .filters() +- .map(|f| -> Result<_, anyhow::Error> { +- use noodles_vcf::record::Filters::*; +- Ok(match f { +- Pass => vec![Filter::Pass as i32], +- Fail(f) => { +- let mut result = f +- .iter() +- .map(|s| s.parse::().map(|f| f as i32)) +- .collect::, _>>() +- .map_err(|e| anyhow::anyhow!("problem parsing FILTER: {}", e))?; +- result.sort(); +- result +- } +- }) +- }) +- .transpose()? +- .unwrap_or_else(|| vec![Filter::Pass as i32]); ++ let filters = if record.filters().is_pass() { ++ vec![Filter::Pass as i32] ++ } else { ++ let mut result = record ++ .filters() ++ .as_ref() ++ .iter() ++ .map(|s| s.parse::().map(|f| f as i32)) ++ .collect::, _>>() ++ .map_err(|e| anyhow::anyhow!("problem parsing FILTER: {}", e))?; ++ result.sort(); ++ result ++ }; ++ + let sv_type = get_string(record, "SVTYPE")? + .parse::() + .map(|x| x as i32)?; +@@ -180,7 +180,7 @@ impl Record { +  + /// Extract allele counts from VCF record. + fn allele_counts_from_vcf_record( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + cohort_name: &str, + ) -> Result { + let cohort = if cohort_name == "all" { +@@ -225,7 +225,7 @@ impl Record { +  + /// Extract poulation allele counts. + fn extract_population_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + population: Population, + ) -> Result { + let pop_str = population.to_string(); +@@ -247,7 +247,7 @@ impl Record { +  + /// Extract allele counts for a given population from VCF record. + fn extract_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + prefix: &str, + population: &str, + ) -> Result { +@@ -317,13 +317,15 @@ fn import_file( + cf_data_name: &str, + path_in_vcf: &str, + ) -> Result<(), anyhow::Error> { +- let mut reader = noodles_vcf::reader::Builder::default().build_from_path(path_in_vcf)?; ++ let mut reader = noodles_vcf::io::reader::Builder::default().build_from_path(path_in_vcf)?; + let header = reader.read_header()?; + let cf_data = db.cf_handle(cf_data_name).unwrap(); +  +- for result in reader.records(&header) { ++ for result in reader.record_bufs(&header) { + let vcf_record = result?; +- let key = format!("{}", vcf_record.ids()).into_bytes(); ++ // TODO check if this key is the same as before ++ use itertools::Itertools; ++ let key = vcf_record.ids().as_ref().iter().join(",").into_bytes(); +  + // Build record for VCF record. + let record = Record::from_vcf_record(&vcf_record) +diff --git a/src/helixmtdb/cli/import.rs b/src/helixmtdb/cli/import.rs +index 5e7189d..d51164d 100644 +--- a/src/helixmtdb/cli/import.rs ++++ b/src/helixmtdb/cli/import.rs +@@ -5,6 +5,8 @@ use std::sync::Arc; + use clap::Parser; + use indicatif::ParallelProgressIterator; + use noodles_csi::BinningIndex as _; ++use noodles_vcf::variant::record::AlternateBases; ++use noodles_vcf::variant::RecordBuf; + use prost::Message; + use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; +  +@@ -98,7 +100,7 @@ fn process_window( + ) -> Result<(), anyhow::Error> { + let cf_helix = db.cf_handle(&args.cf_name).unwrap(); + let mut reader = +- noodles_vcf::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?; ++ noodles_vcf::io::indexed_reader::Builder::default().build_from_path(&args.path_in_vcf)?; + let header = reader.read_header()?; +  + let raw_region = format!("{}:{}-{}", chrom, begin + 1, end); +@@ -125,6 +127,7 @@ fn process_window( + if let Some(query) = query { + for result in query { + let vcf_record = result?; ++ let vcf_record = RecordBuf::try_from_variant_record(&header, &vcf_record)?; +  + // Process each alternate allele into one record. + for allele_no in 0..vcf_record.alternate_bases().len() { +diff --git a/src/helixmtdb/pbs.rs b/src/helixmtdb/pbs.rs +index 6718448..a1819ba 100644 +--- a/src/helixmtdb/pbs.rs ++++ b/src/helixmtdb/pbs.rs +@@ -1,60 +1,59 @@ + //! Data structures for (de-)serialization as generated by `prost-build`. +  +-use std::str::FromStr; ++use noodles_vcf::variant::record::AlternateBases; +  + pub use crate::pbs::helixmtdb::Record; +-use noodles_vcf::record::info::field; ++use noodles_vcf::variant::record_buf::info::field; +  + impl Record { + /// Creates a new `Record` from a VCF record and allele number. + pub fn from_vcf_allele( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + allele_no: usize, + ) -> Result { +- let chrom = record.chromosome().to_string(); +- let pos: usize = record.position().into(); +- let pos = pos as i32; ++ let chrom = record.reference_sequence_name().to_string(); ++ let pos: usize = record ++ .variant_start() ++ .expect("Telomeric breakends not supported") ++ .get(); ++ let pos = i32::try_from(pos)?; + let ref_allele = record.reference_bases().to_string(); + let alt_allele = record + .alternate_bases() +- .get(allele_no) +- .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))? ++ .iter() ++ .nth(allele_no) ++ .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))?? + .to_string(); +- let num_total = if let Some(Some(field::Value::Integer(num_total))) = +- record.info().get(&field::Key::from_str("AN")?) +- { +- *num_total +- } else { +- anyhow::bail!("missing INFO/AN in HelixMtDb record") +- }; +- let num_het = if let Some(Some(field::Value::Integer(num_het))) = +- record.info().get(&field::Key::from_str("AC_het")?) +- { +- *num_het +- } else { +- anyhow::bail!("missing INFO/AC in HelixMtDb record") +- }; +- let num_hom = if let Some(Some(field::Value::Integer(num_hom))) = +- record.info().get(&field::Key::from_str("AC_hom")?) +- { +- *num_hom +- } else { +- anyhow::bail!("missing INFO/AC_hom in HelixMtDb record") +- }; +- let feature_type = if let Some(Some(field::Value::String(feature))) = +- record.info().get(&field::Key::from_str("FEATURE")?) +- { +- feature.to_string() +- } else { +- anyhow::bail!("missing INFO/FEATURE in HelixMtDb record") +- }; +- let gene_name = if let Some(Some(field::Value::String(gene_name))) = +- record.info().get(&field::Key::from_str("GENE")?) +- { +- gene_name.to_string() +- } else { +- anyhow::bail!("missing INFO/GENE in HelixMtDb record") +- }; ++ let num_total = ++ if let Some(Some(field::Value::Integer(num_total))) = record.info().get("AN") { ++ *num_total ++ } else { ++ anyhow::bail!("missing INFO/AN in HelixMtDb record") ++ }; ++ let num_het = ++ if let Some(Some(field::Value::Integer(num_het))) = record.info().get("AC_het") { ++ *num_het ++ } else { ++ anyhow::bail!("missing INFO/AC in HelixMtDb record") ++ }; ++ let num_hom = ++ if let Some(Some(field::Value::Integer(num_hom))) = record.info().get("AC_hom") { ++ *num_hom ++ } else { ++ anyhow::bail!("missing INFO/AC_hom in HelixMtDb record") ++ }; ++ let feature_type = ++ if let Some(Some(field::Value::String(feature))) = record.info().get("FEATURE") { ++ feature.to_string() ++ } else { ++ anyhow::bail!("missing INFO/FEATURE in HelixMtDb record") ++ }; ++ let gene_name = ++ if let Some(Some(field::Value::String(gene_name))) = record.info().get("GENE") { ++ gene_name.to_string() ++ } else { ++ anyhow::bail!("missing INFO/GENE in HelixMtDb record") ++ }; +  + Ok(Record { + chrom, +diff --git a/src/pbs/clinvar.rs b/src/pbs/clinvar.rs +index ebdc58b..1e14aa0 100644 +--- a/src/pbs/clinvar.rs ++++ b/src/pbs/clinvar.rs +@@ -1,5 +1,50 @@ + //! Code generate for protobufs by `prost-build`. +  ++/// Code generated for protobufs by `prost-build`. ++pub mod class_by_freq { ++ include!(concat!(env!("OUT_DIR"), "/clinvar_data.class_by_freq.rs")); ++ include!(concat!( ++ env!("OUT_DIR"), ++ "/clinvar_data.class_by_freq.serde.rs" ++ )); ++} ++ ++/// Code generated for protobufs by `prost-build`. ++pub mod clinvar_public { ++ include!(concat!(env!("OUT_DIR"), "/clinvar_data.clinvar_public.rs")); ++ include!(concat!( ++ env!("OUT_DIR"), ++ "/clinvar_data.clinvar_public.serde.rs" ++ )); ++} ++ ++/// Code generated for protobufs by `prost-build`. ++pub mod extracted_vars { ++ include!(concat!(env!("OUT_DIR"), "/clinvar_data.extracted_vars.rs")); ++ include!(concat!( ++ env!("OUT_DIR"), ++ "/clinvar_data.extracted_vars.serde.rs" ++ )); ++} ++ ++/// Code generated for protobufs by `prost-build`. ++pub mod gene_impact { ++ include!(concat!(env!("OUT_DIR"), "/clinvar_data.gene_impact.rs")); ++ include!(concat!( ++ env!("OUT_DIR"), ++ "/clinvar_data.gene_impact.serde.rs" ++ )); ++} ++ ++/// Code generated for protobufs by `prost-build`. ++pub mod phenotype_link { ++ include!(concat!(env!("OUT_DIR"), "/clinvar_data.phenotype_link.rs")); ++ include!(concat!( ++ env!("OUT_DIR"), ++ "/clinvar_data.phenotype_link.serde.rs" ++ )); ++} ++ + /// Code generate for protobufs by `prost-build`. + pub mod minimal { + include!(concat!(env!("OUT_DIR"), "/annonars.clinvar.minimal.rs")); +diff --git a/src/pbs/gnomad/gnomad2.rs b/src/pbs/gnomad/gnomad2.rs +index c0450a6..c9e4d2c 100644 +--- a/src/pbs/gnomad/gnomad2.rs ++++ b/src/pbs/gnomad/gnomad2.rs +@@ -1,8 +1,9 @@ + //! Code generate for protobufs by `prost-build`. +  ++use noodles_vcf::variant::record::AlternateBases; + use std::str::FromStr; +  +-use noodles_vcf::record::info::field; ++use noodles_vcf::variant::record_buf::info::field; +  + use crate::common; +  +@@ -85,7 +86,7 @@ impl DetailsOptions { + impl Record { + /// Creates a new `Record` from a VCF record and allele number. + pub fn from_vcf_allele( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + allele_no: usize, + options: &DetailsOptions, + ) -> Result { +@@ -94,14 +95,18 @@ impl Record { + assert!(allele_no == 0, "only allele 0 is supported"); +  + // Extract mandatory fields. +- let chrom = record.chromosome().to_string(); +- let pos: usize = record.position().into(); +- let pos = pos as i32; ++ let chrom = record.reference_sequence_name().to_string(); ++ let pos: usize = record ++ .variant_start() ++ .expect("Telomeric breakends not supported") ++ .get(); ++ let pos = i32::try_from(pos).unwrap(); + let ref_allele = record.reference_bases().to_string(); + let alt_allele = record + .alternate_bases() +- .get(allele_no) +- .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))? ++ .iter() ++ .nth(allele_no) ++ .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))?? + .to_string(); + let filters = Self::extract_filters(record)?; + let allele_counts = Self::extract_cohorts_allele_counts(record, options)?; +@@ -159,10 +164,10 @@ impl Record { +  + /// Extract the "vep" field into gnomAD v2 `Vep` records. + fn extract_vep( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result, anyhow::Error> { + if let Some(Some(field::Value::Array(field::value::Array::String(v)))) = +- record.info().get(&field::Key::from_str("vep")?) ++ record.info().get("vep") + { + v.iter() + .flat_map(|v| { +@@ -184,7 +189,7 @@ impl Record { +  + /// Extract the liftover related fields into gnomAD v2 `Vep` records. + fn extract_liftover( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result, anyhow::Error> { + let tmp = LiftoverInfo { + reverse_complemented_alleles: common::noodles::get_flag( +@@ -210,7 +215,9 @@ impl Record { + } +  + /// Extract the details on the random forest. +- fn extract_rf_info(record: &noodles_vcf::Record) -> Result { ++ fn extract_rf_info( ++ record: &noodles_vcf::variant::RecordBuf, ++ ) -> Result { + Ok(RandomForestInfo { + rf_tp_probability: common::noodles::get_f32(record, "rf_tp_probability")?, + rf_positive_label: common::noodles::get_flag(record, "rf_positive_label")?, +@@ -221,7 +228,9 @@ impl Record { + } +  + /// Extract the details on the variant. +- fn extract_variant_info(record: &noodles_vcf::Record) -> Result { ++ fn extract_variant_info( ++ record: &noodles_vcf::variant::RecordBuf, ++ ) -> Result { + Ok(VariantInfo { + variant_type: common::noodles::get_string(record, "variant_type")?, + allele_type: common::noodles::get_string(record, "allele_type")?, +@@ -232,10 +241,12 @@ impl Record { + } +  + /// Extract the filters fields. +- fn extract_filters(record: &noodles_vcf::Record) -> Result, anyhow::Error> { ++ fn extract_filters( ++ record: &noodles_vcf::variant::RecordBuf, ++ ) -> Result, anyhow::Error> { + Ok( + if let Some(Some(field::Value::Array(field::value::Array::String(value)))) = +- record.info().get(&field::Key::from_str("filters")?) ++ record.info().get("filters") + { + value + .iter() +@@ -255,7 +266,7 @@ impl Record { + } +  + /// Extract the age related fields from the VCF record. +- fn extract_age(record: &noodles_vcf::record::Record) -> Result { ++ fn extract_age(record: &noodles_vcf::variant::RecordBuf) -> Result { + Ok(AgeInfo { + age_hist_hom_bin_freq: common::noodles::get_vec::(record, "age_hist_hom_bin_freq") + .unwrap_or_default(), +@@ -269,7 +280,7 @@ impl Record { + } +  + /// Extract the depth related fields from the VCF record. +- fn extract_depth(record: &noodles_vcf::record::Record) -> Result { ++ fn extract_depth(record: &noodles_vcf::variant::RecordBuf) -> Result { + Ok(DepthInfo { + dp_hist_all_n_larger: common::noodles::get_i32(record, "dp_hist_all_n_larger").ok(), + dp_hist_alt_n_larger: common::noodles::get_i32(record, "dp_hist_alt_n_larger").ok(), +@@ -281,7 +292,9 @@ impl Record { + } +  + /// Extract the quality-related fields from the VCF record. +- fn extract_quality(record: &noodles_vcf::record::Record) -> Result { ++ fn extract_quality( ++ record: &noodles_vcf::variant::RecordBuf, ++ ) -> Result { + Ok(QualityInfo { + fs: common::noodles::get_f32(record, "FS").ok(), + inbreeding_coeff: common::noodles::get_f32(record, "InbreedingCoeff").ok(), +@@ -313,7 +326,7 @@ impl Record { +  + /// Extract the allele counts from the `record` as configured in `options`. + fn extract_cohorts_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + options: &DetailsOptions, + ) -> Result, anyhow::Error> { + // Initialize global cohort. We will always extract the non-population specific +@@ -388,7 +401,7 @@ impl Record { +  + /// Extrac the population allele counts from the `record`. + fn extract_population_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + prefix: &str, + pop: &str, + ) -> Result { +@@ -408,7 +421,7 @@ impl Record { +  + /// Extract the allele counts from the `record` with the given prefix and suffix. + fn extract_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + prefix: &str, + suffix: &str, + ) -> Result, anyhow::Error> { +@@ -438,11 +451,12 @@ mod test { + #[test] + fn test_record_from_vcf_allele_gnomad_exomes_grch37() -> Result<(), anyhow::Error> { + let path_vcf = "tests/gnomad-nuclear/example-exomes-grch37/v2.1/gnomad-exomes.vcf"; +- let mut reader_vcf = noodles_vcf::reader::Builder::default().build_from_path(path_vcf)?; ++ let mut reader_vcf = ++ noodles_vcf::io::reader::Builder::default().build_from_path(path_vcf)?; + let header = reader_vcf.read_header()?; +  + let mut records = Vec::new(); +- for row in reader_vcf.records(&header) { ++ for row in reader_vcf.record_bufs(&header) { + let vcf_record = row?; + let record = + Record::from_vcf_allele(&vcf_record, 0, &DetailsOptions::with_all_enabled())?; +@@ -457,11 +471,12 @@ mod test { + #[test] + fn test_record_from_vcf_allele_gnomad_genomes_grch37() -> Result<(), anyhow::Error> { + let path_vcf = "tests/gnomad-nuclear/example-genomes-grch37/v2.1/gnomad-genomes.vcf"; +- let mut reader_vcf = noodles_vcf::reader::Builder::default().build_from_path(path_vcf)?; ++ let mut reader_vcf = ++ noodles_vcf::io::reader::Builder::default().build_from_path(path_vcf)?; + let header = reader_vcf.read_header()?; +  + let mut records = Vec::new(); +- for row in reader_vcf.records(&header) { ++ for row in reader_vcf.record_bufs(&header) { + let vcf_record = row?; + let record = + Record::from_vcf_allele(&vcf_record, 0, &DetailsOptions::with_all_enabled())?; +@@ -476,11 +491,12 @@ mod test { + #[test] + fn test_record_from_vcf_allele_gnomad_exomes_grch38() -> Result<(), anyhow::Error> { + let path_vcf = "tests/gnomad-nuclear/example-exomes-grch38/v2.1/gnomad-exomes.vcf"; +- let mut reader_vcf = noodles_vcf::reader::Builder::default().build_from_path(path_vcf)?; ++ let mut reader_vcf = ++ noodles_vcf::io::reader::Builder::default().build_from_path(path_vcf)?; + let header = reader_vcf.read_header()?; +  + let mut records = Vec::new(); +- for row in reader_vcf.records(&header) { ++ for row in reader_vcf.record_bufs(&header) { + let vcf_record = row?; + let record = + Record::from_vcf_allele(&vcf_record, 0, &DetailsOptions::with_all_enabled())?; +diff --git a/src/pbs/gnomad/gnomad3.rs b/src/pbs/gnomad/gnomad3.rs +index 7803922..6e4715d 100644 +--- a/src/pbs/gnomad/gnomad3.rs ++++ b/src/pbs/gnomad/gnomad3.rs +@@ -1,8 +1,9 @@ + //! Code generate for protobufs by `prost-build`. +  ++use noodles_vcf::variant::record::AlternateBases; + use std::str::FromStr; +  +-use noodles_vcf::record::info::field; ++use noodles_vcf::variant::record_buf::info::field; +  + use crate::common; +  +@@ -86,23 +87,25 @@ impl DetailsOptions { + impl Record { + /// Creates a new `Record` from a VCF record and allele number. + pub fn from_vcf_allele( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + allele_no: usize, + options: &DetailsOptions, + ) -> Result { + assert!(allele_no == 0, "only allele 0 is supported"); +  +- assert!(allele_no == 0, "only allele 0 is supported"); +- + // Extract mandatory fields. +- let chrom = record.chromosome().to_string(); +- let pos: usize = record.position().into(); ++ let chrom = record.reference_sequence_name().to_string(); ++ let pos: usize = record ++ .variant_start() ++ .expect("Telomeric breakends not supported") ++ .get(); + let pos = pos as i32; + let ref_allele = record.reference_bases().to_string(); + let alt_allele = record + .alternate_bases() +- .get(allele_no) +- .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))? ++ .iter() ++ .nth(allele_no) ++ .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))?? + .to_string(); + let filters = Self::extract_filters(record)?; + let allele_counts = Self::extract_cohorts_allele_counts(record, options)?; +@@ -154,10 +157,10 @@ impl Record { +  + /// Extract the "vep" field into gnomAD v3 `Vep` records. + pub(crate) fn extract_vep( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result, anyhow::Error> { + if let Some(Some(field::Value::Array(field::value::Array::String(v)))) = +- record.info().get(&field::Key::from_str("vep")?) ++ record.info().get("vep") + { + v.iter() + .flat_map(|v| { +@@ -179,7 +182,7 @@ impl Record { +  + /// Extract the details on the variant. + pub(crate) fn extract_variant_info( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result { + Ok(VariantInfo { + variant_type: common::noodles::get_string(record, "variant_type")?, +@@ -194,7 +197,7 @@ impl Record { +  + /// Extract details on the variant effects. + pub(crate) fn extract_effect_info( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result { + Ok(EffectInfo { + primate_ai_score: common::noodles::get_f32(record, "primate_ai_score").ok(), +@@ -208,10 +211,12 @@ impl Record { + } +  + /// Extract the filters fields. +- pub(crate) fn extract_filters(record: &noodles_vcf::Record) -> Result, anyhow::Error> { ++ pub(crate) fn extract_filters( ++ record: &noodles_vcf::variant::RecordBuf, ++ ) -> Result, anyhow::Error> { + Ok( + if let Some(Some(field::Value::Array(field::value::Array::String(value)))) = +- record.info().get(&field::Key::from_str("filters")?) ++ record.info().get("filters") + { + value + .iter() +@@ -234,7 +239,7 @@ impl Record { +  + /// Extract the age related fields from the VCF record. + pub(crate) fn extract_age( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result { + Ok(AgeInfo { + age_hist_hom_bin_freq: common::noodles::get_vec::(record, "age_hist_hom_bin_freq") +@@ -250,7 +255,7 @@ impl Record { +  + /// Extract the depth related fields from the VCF record. + pub(crate) fn extract_depth( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result { + Ok(DepthInfo { + dp_hist_all_n_larger: common::noodles::get_i32(record, "dp_hist_all_n_larger").ok(), +@@ -264,7 +269,7 @@ impl Record { +  + /// Extract the quality-related fields from the VCF record. + pub(crate) fn extract_quality( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result { + Ok(QualityInfo { + as_fs: common::noodles::get_f32(record, "AS_FS").ok(), +@@ -294,7 +299,7 @@ impl Record { +  + /// Extract the allele counts from the `record` as configured in `options`. + pub(crate) fn extract_cohorts_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + options: &DetailsOptions, + ) -> Result, anyhow::Error> { + // Initialize global cohort. We will always extract the non-population specific +@@ -367,7 +372,7 @@ impl Record { +  + /// Extrac the population allele counts from the `record`. + pub(crate) fn extract_population_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + infix: &str, + pop: &str, + ) -> Result { +@@ -403,7 +408,7 @@ impl Record { +  + /// Extract the allele counts from the `record` with the given infix and suffix. + pub(crate) fn extract_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + infix: &str, + suffix: &str, + ) -> Result { +@@ -427,11 +432,12 @@ mod test { + #[test] + fn test_record_from_vcf_allele_gnomad_genomes_grch38() -> Result<(), anyhow::Error> { + let path_vcf = "tests/gnomad-nuclear/example-genomes-grch38/v3.1/gnomad-genomes.vcf"; +- let mut reader_vcf = noodles_vcf::reader::Builder::default().build_from_path(path_vcf)?; ++ let mut reader_vcf = ++ noodles_vcf::io::reader::Builder::default().build_from_path(path_vcf)?; + let header = reader_vcf.read_header()?; +  + let mut records = Vec::new(); +- for row in reader_vcf.records(&header) { ++ for row in reader_vcf.record_bufs(&header) { + let vcf_record = row?; + let record = + Record::from_vcf_allele(&vcf_record, 0, &DetailsOptions::with_all_enabled())?; +diff --git a/src/pbs/gnomad/gnomad4.rs b/src/pbs/gnomad/gnomad4.rs +index 7f893af..8e633a7 100644 +--- a/src/pbs/gnomad/gnomad4.rs ++++ b/src/pbs/gnomad/gnomad4.rs +@@ -1,8 +1,9 @@ + //! Code generate for protobufs by `prost-build`. +  ++use noodles_vcf::variant::record::AlternateBases; + use std::str::FromStr; +  +-use noodles_vcf::record::info::field; ++use noodles_vcf::variant::record_buf::info::field; +  + use super::gnomad3; + use crate::common; +@@ -58,7 +59,7 @@ impl Record { + /// + /// The `Record` or an error if the record could not be extracted. + pub fn from_vcf_allele( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + allele_no: usize, + options: &gnomad3::DetailsOptions, + record_type: RecordType, +@@ -68,14 +69,18 @@ impl Record { + assert!(allele_no == 0, "only allele 0 is supported"); +  + // Extract mandatory fields. +- let chrom = record.chromosome().to_string(); +- let pos: usize = record.position().into(); ++ let chrom = record.reference_sequence_name().to_string(); ++ let pos: usize = record ++ .variant_start() ++ .expect("Telomeric breakends not supported") ++ .get(); + let pos = pos as i32; + let ref_allele = record.reference_bases().to_string(); + let alt_allele = record + .alternate_bases() +- .get(allele_no) +- .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))? ++ .iter() ++ .nth(allele_no) ++ .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))?? + .to_string(); + let filters = gnomad3::Record::extract_filters(record)?; + let allele_counts = Self::extract_cohorts_allele_counts(record, record_type)?; +@@ -143,10 +148,10 @@ impl Record { +  + /// Extract the "vep" field into gnomAD v3 `Vep` records. + fn extract_vep( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result, anyhow::Error> { + if let Some(Some(field::Value::Array(field::value::Array::String(v)))) = +- record.info().get(&field::Key::from_str("vep")?) ++ record.info().get("vep") + { + v.iter() + .flat_map(|v| { +@@ -167,7 +172,9 @@ impl Record { + } +  + /// Extract the VRS infos. +- fn extract_vrs_info(record: &noodles_vcf::Record) -> Result { ++ fn extract_vrs_info( ++ record: &noodles_vcf::variant::RecordBuf, ++ ) -> Result { + Ok(VrsInfo { + allele_ids: common::noodles::get_vec_str(record, "VRS_Allele_IDs").unwrap_or_default(), + ends: common::noodles::get_vec_i32(record, "VRS_Ends").unwrap_or_default(), +@@ -177,7 +184,9 @@ impl Record { + } +  + /// Extract details on the variant effects. +- fn extract_effect_info(record: &noodles_vcf::Record) -> Result { ++ fn extract_effect_info( ++ record: &noodles_vcf::variant::RecordBuf, ++ ) -> Result { + Ok(EffectInfo { + pangolin_largest_ds: common::noodles::get_f32(record, "pangolin_largest_ds").ok(), + phylop: common::noodles::get_f32(record, "phylop").ok(), +@@ -192,7 +201,7 @@ impl Record { +  + /// Extract the allele counts from the `record` as configured in `options`. + fn extract_cohorts_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + record_type: RecordType, + ) -> Result, anyhow::Error> { + // Initialize global cohort. +@@ -270,7 +279,7 @@ impl Record { +  + /// Extrac the ancestry group allele counts from the `record`. + fn extract_ancestry_group_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + infix: &str, + grp: &str, + ) -> Result { +@@ -306,7 +315,7 @@ impl Record { +  + /// Extract the allele counts from the `record` with the given infix and suffix. + fn extract_allele_counts( +- record: &noodles_vcf::Record, ++ record: &noodles_vcf::variant::RecordBuf, + infix: &str, + suffix: &str, + ) -> Result { +@@ -330,11 +339,12 @@ mod test { + #[test] + fn test_record_from_vcf_allele_gnomad_genomes_grch38() -> Result<(), anyhow::Error> { + let path_vcf = "tests/gnomad-nuclear/example-genomes-grch38/v4.0/gnomad-genomes.vcf"; +- let mut reader_vcf = noodles_vcf::reader::Builder::default().build_from_path(path_vcf)?; ++ let mut reader_vcf = ++ noodles_vcf::io::reader::Builder::default().build_from_path(path_vcf)?; + let header = reader_vcf.read_header()?; +  + let mut records = Vec::new(); +- for row in reader_vcf.records(&header) { ++ for row in reader_vcf.record_bufs(&header) { + let vcf_record = row?; + let record = Record::from_vcf_allele( + &vcf_record, +@@ -353,11 +363,12 @@ mod test { + #[test] + fn test_record_from_vcf_allele_gnomad_exomess_grch38() -> Result<(), anyhow::Error> { + let path_vcf = "tests/gnomad-nuclear/example-exomes-grch38/v4.0/gnomad-exomes.vcf"; +- let mut reader_vcf = noodles_vcf::reader::Builder::default().build_from_path(path_vcf)?; ++ let mut reader_vcf = ++ noodles_vcf::io::reader::Builder::default().build_from_path(path_vcf)?; + let header = reader_vcf.read_header()?; +  + let mut records = Vec::new(); +- for row in reader_vcf.records(&header) { ++ for row in reader_vcf.record_bufs(&header) { + let vcf_record = row?; + let record = Record::from_vcf_allele( + &vcf_record, +diff --git a/src/pbs/gnomad/mtdna.rs b/src/pbs/gnomad/mtdna.rs +index 7c2a4e0..acc1d85 100644 +--- a/src/pbs/gnomad/mtdna.rs ++++ b/src/pbs/gnomad/mtdna.rs +@@ -1,8 +1,9 @@ + //! Code generate for protobufs by `prost-build`. +  ++use noodles_vcf::variant::record::AlternateBases; + use std::str::FromStr; +  +-use noodles_vcf::record::info::field; ++use noodles_vcf::variant::record_buf::info::field; +  + use super::vep_gnomad3::Vep; + use crate::common; +@@ -68,21 +69,25 @@ impl DetailsOptions { + impl Record { + /// Creates a new `Record` from a VCF record and allele number. + pub fn from_vcf_allele( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + allele_no: usize, + options: &DetailsOptions, + ) -> Result { + assert!(allele_no == 0, "only allele 0 is supported"); +  + // Extract mandatory fields. +- let chrom = record.chromosome().to_string(); +- let pos: usize = record.position().into(); ++ let chrom = record.reference_sequence_name().to_string(); ++ let pos: usize = record ++ .variant_start() ++ .expect("Telomeric breakends not supported") ++ .get(); + let pos = pos as i32; + let ref_allele = record.reference_bases().to_string(); + let alt_allele = record + .alternate_bases() +- .get(allele_no) +- .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))? ++ .iter() ++ .nth(allele_no) ++ .ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))?? + .to_string(); + let variant_collapsed = common::noodles::get_string(record, "variant_collapsed")?; + let excluded_ac = common::noodles::get_i32(record, "excluded_AC")?; +@@ -164,9 +169,9 @@ impl Record { + } +  + /// Extract the "vep" field. +- fn extract_vep(record: &noodles_vcf::Record) -> Result, anyhow::Error> { ++ fn extract_vep(record: &noodles_vcf::variant::RecordBuf) -> Result, anyhow::Error> { + if let Some(Some(field::Value::Array(field::value::Array::String(v)))) = +- record.info().get(&field::Key::from_str("vep")?) ++ record.info().get("vep") + { + v.iter() + .flat_map(|v| v.as_ref().map(|s| Vep::from_str(s))) +@@ -178,7 +183,7 @@ impl Record { +  + /// Extract the heteroplasmy-related fields from the VCF record. + fn extract_heteroplasmy( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result { + Ok(HeteroplasmyInfo { + heteroplasmy_below_min_het_threshold_hist: common::noodles::get_vec::( +@@ -193,7 +198,7 @@ impl Record { +  + /// Extract the filter histogram related fields form the VCF record. + fn extract_filter_histograms( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result { + Ok(FilterHistograms { + base_qual_hist: common::noodles::get_vec::(record, "base_qual_hist") +@@ -211,7 +216,7 @@ impl Record { +  + /// Extract the population related fields from the VCF record. + fn extract_population( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result { + Ok(PopulationInfo { + pop_an: common::noodles::get_vec::(record, "pop_AN")?, +@@ -226,7 +231,7 @@ impl Record { +  + /// Extract the haplogroup related fields from the VCF record. + fn extract_haplogroup( +- record: &noodles_vcf::record::Record, ++ record: &noodles_vcf::variant::RecordBuf, + ) -> Result { + Ok(HaplogroupInfo { + hap_defining_variant: common::noodles::get_flag(record, "hap_defining_variant")?, +@@ -245,7 +250,7 @@ impl Record { + } +  + /// Extract the age related fields from the VCF record. +- fn extract_age(record: &noodles_vcf::record::Record) -> Result { ++ fn extract_age(record: &noodles_vcf::variant::RecordBuf) -> Result { + Ok(AgeInfo { + age_hist_hom_bin_freq: common::noodles::get_vec::(record, "age_hist_hom_bin_freq") + .unwrap_or_default(), +@@ -259,7 +264,7 @@ impl Record { + } +  + /// Extract the depth related fields from the VCF record. +- fn extract_depth(record: &noodles_vcf::record::Record) -> Result { ++ fn extract_depth(record: &noodles_vcf::variant::RecordBuf) -> Result { + Ok(DepthInfo { + dp_hist_all_n_larger: common::noodles::get_i32(record, "dp_hist_all_n_larger").ok(), + dp_hist_alt_n_larger: common::noodles::get_i32(record, "dp_hist_alt_n_larger").ok(), +@@ -271,7 +276,9 @@ impl Record { + } +  + /// Extract the quality-related fields from the VCF record. +- fn extract_quality(record: &noodles_vcf::record::Record) -> Result { ++ fn extract_quality( ++ record: &noodles_vcf::variant::RecordBuf, ++ ) -> Result { + Ok(QualityInfo { + dp_mean: common::noodles::get_f32(record, "dp_mean").ok(), + mq_mean: common::noodles::get_f32(record, "mq_mean").ok(), +@@ -280,10 +287,12 @@ impl Record { + } +  + /// Extract the filters fields. +- fn extract_filters(record: &noodles_vcf::Record) -> Result, anyhow::Error> { ++ fn extract_filters( ++ record: &noodles_vcf::variant::RecordBuf, ++ ) -> Result, anyhow::Error> { + Ok( + if let Some(Some(field::Value::Array(field::value::Array::String(value)))) = +- record.info().get(&field::Key::from_str("filters")?) ++ record.info().get("filters") + { + value + .iter() +@@ -309,11 +318,12 @@ mod test { + #[test] + fn test_record_from_vcf_allele() -> Result<(), anyhow::Error> { + let path_vcf = "tests/gnomad-mtdna/example/gnomad-mtdna.vcf"; +- let mut reader_vcf = noodles_vcf::reader::Builder::default().build_from_path(path_vcf)?; ++ let mut reader_vcf = ++ noodles_vcf::io::reader::Builder::default().build_from_path(path_vcf)?; + let header = reader_vcf.read_header()?; +  + let mut records = Vec::new(); +- for row in reader_vcf.records(&header) { ++ for row in reader_vcf.record_bufs(&header) { + let vcf_record = row?; + let record = + Record::from_vcf_allele(&vcf_record, 0, &DetailsOptions::with_all_enabled())?; +diff --git a/src/server/actix_server/annos_db_info.rs b/src/server/actix_server/annos_db_info.rs +index 9fb0cb3..84e064c 100644 +--- a/src/server/actix_server/annos_db_info.rs ++++ b/src/server/actix_server/annos_db_info.rs +@@ -5,9 +5,7 @@ use actix_web::{ + web::{self, Data, Json, Path}, + Responder, + }; +-use serde::{Deserialize, Serialize}; +- +-use crate::server::DbInfo; ++use serde::Deserialize; +  + use super::{error::CustomError, WebServerData}; +  +@@ -20,13 +18,6 @@ struct Request { + pub genome_release: String, + } +  +-/// Result for `handle`. +-#[derive(Serialize, Debug, Clone)] +-struct ResultEntry { +- /// Information for each database. +- pub db_info: indexmap::IndexMap, +-} +- + /// Query for annotations for one variant. + #[get("/annos/db-info")] + async fn handle( +diff --git a/src/tsv/coding.rs b/src/tsv/coding.rs +index c3ef162..41f7b9e 100644 +--- a/src/tsv/coding.rs ++++ b/src/tsv/coding.rs +@@ -221,7 +221,7 @@ impl Context { + // skip if not lifted to this genome build + return Ok(None); + } else if let serde_json::Value::String(chrom) = val { +- res.chrom = chrom.clone(); ++ res.chrom.clone_from(chrom); + } else { + return Err(error::Error::InvalidType( + self.config.col_chrom.clone(), +@@ -248,7 +248,7 @@ impl Context { + } + } else if col.name == self.config.col_ref { + if let serde_json::Value::String(reference) = val { +- res.reference = reference.clone(); ++ res.reference.clone_from(reference); + } else { + return Err(error::Error::InvalidType( + self.config.col_ref.clone(), +@@ -257,7 +257,7 @@ impl Context { + } + } else if col.name == self.config.col_alt { + if let serde_json::Value::String(alternative) = val { +- res.alternative = alternative.clone(); ++ res.alternative.clone_from(alternative); + } else { + return Err(error::Error::InvalidType( + self.config.col_alt.clone(), +diff --git a/tests/clinvar-genes/clinvar-genes.db/000014.sst b/tests/clinvar-genes/clinvar-genes.db/000014.sst +deleted file mode 100644 +index d5e7382..0000000 +--- a/tests/clinvar-genes/clinvar-genes.db/000014.sst ++++ /dev/null +@@ -1,3 +0,0 @@ +-version https://git-lfs.github.com/spec/v1 +-oid sha256:0e814dbc1b164581fe27671fa52382d0a5d548fb29b3f888c5c7deea007057ea +-size 1289 +diff --git a/tests/clinvar-genes/clinvar-genes.db/000016.sst b/tests/clinvar-genes/clinvar-genes.db/000016.sst +deleted file mode 100644 +index 64f0f6c..0000000 +--- a/tests/clinvar-genes/clinvar-genes.db/000016.sst ++++ /dev/null +@@ -1,3 +0,0 @@ +-version https://git-lfs.github.com/spec/v1 +-oid sha256:4f092a7c9399b0b357de72a74ae2595322c50ea6d4193891a7f5ce3b3a820d04 +-size 3060 +diff --git a/tests/clinvar-genes/clinvar-genes.db/CURRENT b/tests/clinvar-genes/clinvar-genes.db/CURRENT +deleted file mode 100644 +index f8d5048..0000000 +--- a/tests/clinvar-genes/clinvar-genes.db/CURRENT ++++ /dev/null +@@ -1,3 +0,0 @@ +-version https://git-lfs.github.com/spec/v1 +-oid sha256:9c283f6e81028b9eb0760d918ee4bc0aa256ed3b926393c1734c760c4bd724fd +-size 16 +diff --git a/tests/clinvar-genes/clinvar-genes.db/IDENTITY b/tests/clinvar-genes/clinvar-genes.db/IDENTITY +deleted file mode 100644 +index cb0c144..0000000 +--- a/tests/clinvar-genes/clinvar-genes.db/IDENTITY ++++ /dev/null +@@ -1,3 +0,0 @@ +-version https://git-lfs.github.com/spec/v1 +-oid sha256:b4ddf89532e4f83b30ddcb6dbe34568f2bf2d736d2fe0b76723e29b0a9ecb13d +-size 36 +diff --git a/tests/clinvar-genes/clinvar-genes.db/LOCK b/tests/clinvar-genes/clinvar-genes.db/LOCK +deleted file mode 100644 +index e69de29..0000000 +diff --git a/tests/clinvar-genes/clinvar-genes.db/LOG b/tests/clinvar-genes/clinvar-genes.db/LOG +deleted file mode 100644 +index f706ee9..0000000 +--- a/tests/clinvar-genes/clinvar-genes.db/LOG ++++ /dev/null +@@ -1,3 +0,0 @@ +-version https://git-lfs.github.com/spec/v1 +-oid sha256:2b45c9959fb2c01fb6a3801eb1117a8d9b57ce335fe1561c68cac49aeccdaa3f +-size 62348 +diff --git a/tests/clinvar-genes/clinvar-genes.db/MANIFEST-000005 b/tests/clinvar-genes/clinvar-genes.db/MANIFEST-000005 +deleted file mode 100644 +index c04a9d6..0000000 +--- a/tests/clinvar-genes/clinvar-genes.db/MANIFEST-000005 ++++ /dev/null +@@ -1,3 +0,0 @@ +-version https://git-lfs.github.com/spec/v1 +-oid sha256:de1314c30fac75289cf71a1e0357ff6c8ffd7caa114e1d8d946879d7d24e705a +-size 647 +diff --git a/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000009 b/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000009 +deleted file mode 100644 +index a2ad867..0000000 +--- a/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000009 ++++ /dev/null +@@ -1,3 +0,0 @@ +-version https://git-lfs.github.com/spec/v1 +-oid sha256:43b5ef50591a721fdda5576cf873fa4b515539decb35cac720deccaf7be8a61d +-size 15369 +diff --git a/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000011 b/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000011 +deleted file mode 100644 +index a2ad867..0000000 +--- a/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000011 ++++ /dev/null +@@ -1,3 +0,0 @@ +-version https://git-lfs.github.com/spec/v1 +-oid sha256:43b5ef50591a721fdda5576cf873fa4b515539decb35cac720deccaf7be8a61d +-size 15369 +diff --git a/tests/clinvar-genes/clinvar-variants-grch37-seqvars.jsonl b/tests/clinvar-genes/clinvar-variants-grch37-seqvars.jsonl +index 13c715f..989b6f9 100644 +--- a/tests/clinvar-genes/clinvar-variants-grch37-seqvars.jsonl ++++ b/tests/clinvar-genes/clinvar-variants-grch37-seqvars.jsonl +@@ -1,10 +1,10 @@ +-{"rcv": "RCV000000009", "vcv": "VCV000018396", "title": "NM_002036.4(ACKR1):c.265C>T (p.Arg89Cys) AND DUFFY BLOOD GROUP SYSTEM, FY(bwk) PHENOTYPE", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "1", "accession": "NC_000001.10", "outer_start": null, "inner_start": null, "start": 159175494, "stop": 159175494, "inner_stop": null, "outer_stop": null, "display_start": 159175494, "display_stop": 159175494, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 159175494, "reference_allele_vcf": "C", "alternate_allele_vcf": "T", "for_display_length": null}, "hgnc_ids": ["HGNC:4035"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000010", "vcv": "VCV000018397", "title": "NM_002036.4(ACKR1):c.286_299del (p.Trp96fs) AND DUFFY BLOOD GROUP SYSTEM, FY(a-b-) PHENOTYPE", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "1", "accession": "NC_000001.10", "outer_start": null, "inner_start": null, "start": 159175509, "stop": 159175522, "inner_stop": null, "outer_stop": null, "display_start": 159175509, "display_stop": 159175522, "strand": null, "variant_length": 14, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 159175508, "reference_allele_vcf": "CCCTGGCTGGCCTGT", "alternate_allele_vcf": "C", "for_display_length": null}, "hgnc_ids": ["HGNC:4035"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000012", "vcv": "VCV000000002", "title": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer) AND Hereditary spastic paraplegia 48", "variant_type": "indel", "clinical_significance": "pathogenic", "review_status": "criteria provided, single submitter", "sequence_location": {"assembly": "GRCh37", "chr": "7", "accession": "NC_000007.13", "outer_start": null, "inner_start": null, "start": 4820844, "stop": 4820847, "inner_stop": null, "outer_stop": null, "display_start": 4820844, "display_stop": 4820847, "strand": null, "variant_length": 22, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 4820844, "reference_allele_vcf": "GGAT", "alternate_allele_vcf": "TGCTGTAAACTGTAACTGTAAA", "for_display_length": null}, "hgnc_ids": ["HGNC:22197"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000013", "vcv": "VCV000000003", "title": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs) AND Hereditary spastic paraplegia 48", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "7", "accession": "NC_000007.13", "outer_start": null, "inner_start": null, "start": 4827361, "stop": 4827374, "inner_stop": null, "outer_stop": null, "display_start": 4827361, "display_stop": 4827374, "strand": null, "variant_length": 14, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 4827360, "reference_allele_vcf": "GCTGCTGGACCTGCC", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:22197"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000016", "vcv": "VCV000000006", "title": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser) AND Mitochondrial complex 1 deficiency, nuclear type 19", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "11", "accession": "NC_000011.9", "outer_start": null, "inner_start": null, "start": 126147412, "stop": 126147412, "inner_stop": null, "outer_stop": null, "display_start": 126147412, "display_stop": 126147412, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 126147412, "reference_allele_vcf": "A", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:26927"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000042", "vcv": "VCV000000025", "title": "NM_015600.4(ABHD12):c.-6898_191+7002delinsCC AND PHARC syndrome", "variant_type": "indel", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "20", "accession": "NC_000020.10", "outer_start": null, "inner_start": null, "start": 25364147, "stop": 25378237, "inner_stop": null, "outer_stop": null, "display_start": 25364147, "display_stop": 25378237, "strand": null, "variant_length": 14091, "reference_allele": null, "alternate_allele": "GG", "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": null, "reference_allele_vcf": null, "alternate_allele_vcf": null, "for_display_length": null}, "hgnc_ids": ["HGNC:15868"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000043", "vcv": "VCV000000026", "title": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter) AND PHARC syndrome", "variant_type": "duplication", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "20", "accession": "NC_000020.10", "outer_start": null, "inner_start": null, "start": 25288616, "stop": 25288617, "inner_stop": null, "outer_stop": null, "display_start": 25288616, "display_stop": 25288617, "strand": null, "variant_length": 7, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 25288616, "reference_allele_vcf": "G", "alternate_allele_vcf": "GGCTCTTA", "for_display_length": null}, "hgnc_ids": ["HGNC:15868"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000049", "vcv": "VCV000000032", "title": "NM_138413.4(HOGA1):c.700+4G>T AND Primary hyperoxaluria type 3", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "10", "accession": "NC_000010.10", "outer_start": null, "inner_start": null, "start": 99359924, "stop": 99359924, "inner_stop": null, "outer_stop": null, "display_start": 99359924, "display_stop": 99359924, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 99359924, "reference_allele_vcf": "G", "alternate_allele_vcf": "T", "for_display_length": null}, "hgnc_ids": ["HGNC:25155"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000057", "vcv": "VCV000000040", "title": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "19", "accession": "NC_000019.9", "outer_start": null, "inner_start": null, "start": 36595471, "stop": 36595474, "inner_stop": null, "outer_stop": null, "display_start": 36595471, "display_stop": 36595474, "strand": null, "variant_length": 4, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 36595470, "reference_allele_vcf": "GTGCC", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:24502"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000059", "vcv": "VCV000000042", "title": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "19", "accession": "NC_000019.9", "outer_start": null, "inner_start": null, "start": 36558317, "stop": 36558317, "inner_stop": null, "outer_stop": null, "display_start": 36558317, "display_stop": 36558317, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 36558317, "reference_allele_vcf": "G", "alternate_allele_vcf": "C", "for_display_length": null}, "hgnc_ids": ["HGNC:24502"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} ++{"accession": {"accession": "VCV000000002", "version": 3}, "rcvs": [{"accession": {"accession": "RCV000000012", "version": 5}, "title": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer) AND Hereditary spastic paraplegia 48"}], "name": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer)", "variationType": "VARIATION_TYPE_INDEL", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER", "description": "Pathogenic", "citations": [{"ids": [{"value": "20613862", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Hereditary spastic paraplegia 48", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013342"}]}, {"value": "Spastic paraplegia 48", "type": "Alternate"}, {"value": "Spastic paraplegia 48, autosomal recessive", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Spastic+paraplegia+48%2C+autosomal+recessive/9323"}]}], "symbols": [{"value": "SPG48", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613647", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "306511"}, {"db": "MedGen", "id": "C3150901"}, {"db": "MONDO", "id": "MONDO:0013342"}, {"db": "OMIM", "id": "613647", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "2", "contributesToAggregateClassification": true}], "dateCreated": "2017-01-30T00:00:00Z", "mostRecentSubmission": "2021-05-16T00:00:00Z", "numberOfSubmitters": 2, "numberOfSubmissions": 2}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_7", "accession": "NC_000007.13", "start": 4820844, "stop": 4820847, "displayStart": 4820844, "displayStop": 4820847, "variantLength": 22, "positionVcf": 4820844, "referenceAlleleVcf": "GGAT", "alternateAlleleVcf": "TGCTGTAAACTGTAACTGTAAA"}, "hgncIds": ["HGNC:22197"]} ++{"accession": {"accession": "VCV000000006", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000016", "version": 6}, "title": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser) AND Mitochondrial complex 1 deficiency, nuclear type 19"}], "name": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20818383", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Mitochondrial complex 1 deficiency, nuclear type 19", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0032624"}]}, {"value": "MITOCHONDRIAL COMPLEX I DEFICIENCY, NUCLEAR TYPE 19", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "618241", "type": "MIM"}, {"db": "OMIM", "id": "613622.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613622.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613622.0003", "type": "Allelic variant"}]}], "symbols": [{"value": "MC1DN19", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "618241", "type": "MIM"}]}], "xrefs": [{"db": "MedGen", "id": "C4748791"}, {"db": "MONDO", "id": "MONDO:0032624"}, {"db": "OMIM", "id": "618241", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "45335", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-10-01T00:00:00Z", "dateCreated": "2019-02-04T00:00:00Z", "mostRecentSubmission": "2019-02-04T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_11", "accession": "NC_000011.9", "start": 126147412, "stop": 126147412, "displayStart": 126147412, "displayStop": 126147412, "variantLength": 1, "positionVcf": 126147412, "referenceAlleleVcf": "A", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:26927"]} ++{"accession": {"accession": "VCV000000003", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000013", "version": 6}, "title": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs) AND Hereditary spastic paraplegia 48"}], "name": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs)", "variationType": "VARIATION_TYPE_DELETION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20613862", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Hereditary spastic paraplegia 48", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013342"}]}, {"value": "Spastic paraplegia 48", "type": "Alternate"}, {"value": "Spastic paraplegia 48, autosomal recessive", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Spastic+paraplegia+48%2C+autosomal+recessive/9323"}]}], "symbols": [{"value": "SPG48", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613647", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "306511"}, {"db": "MedGen", "id": "C3150901"}, {"db": "MONDO", "id": "MONDO:0013342"}, {"db": "OMIM", "id": "613647", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "2", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-06-29T00:00:00Z", "dateCreated": "2017-01-30T00:00:00Z", "mostRecentSubmission": "2017-01-30T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_7", "accession": "NC_000007.13", "start": 4827361, "stop": 4827374, "displayStart": 4827361, "displayStop": 4827374, "variantLength": 14, "positionVcf": 4827360, "referenceAlleleVcf": "GCTGCTGGACCTGCC", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:22197"]} ++{"accession": {"accession": "VCV000000043", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000060", "version": 4}, "title": "NM_001083961.2(WDR62):c.1408C>T (p.Gln470Ter) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.1408C>T (p.Gln470Ter)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2019-03-10T00:00:00Z", "mostRecentSubmission": "2019-03-10T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_19", "accession": "NC_000019.9", "start": 36574001, "stop": 36574001, "displayStart": 36574001, "displayStop": 36574001, "variantLength": 1, "positionVcf": 36574001, "referenceAlleleVcf": "C", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:24502"]} ++{"accession": {"accession": "VCV000000026", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000043", "version": 3}, "title": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter) AND PHARC syndrome"}], "name": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter)", "variationType": "VARIATION_TYPE_DUPLICATION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20797687", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "PHARC syndrome", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0012984"}]}, {"value": "Polyneuropathy-hearing loss-ataxia-retinitis pigmentosa-cataract syndrome", "type": "Alternate", "xrefs": [{"db": "Orphanet", "id": "171848"}]}, {"value": "Polyneuropathy, hearing loss, ataxia, retinitis pigmentosa, and cataract", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Polyneuropathy%2C+hearing+loss%2C+ataxia%2C+retinitis+pigmentosa%2C+and+cataract/9132"}]}], "symbols": [{"value": "PHARC", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "612674", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "171848"}, {"db": "MedGen", "id": "C2675204"}, {"db": "MONDO", "id": "MONDO:0012984"}, {"db": "OMIM", "id": "612674", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "17", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-10T00:00:00Z", "dateCreated": "2015-05-18T00:00:00Z", "mostRecentSubmission": "2015-05-18T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_20", "accession": "NC_000020.10", "start": 25288616, "stop": 25288617, "displayStart": 25288616, "displayStop": 25288617, "variantLength": 7, "positionVcf": 25288616, "referenceAlleleVcf": "G", "alternateAlleleVcf": "GGCTCTTA"}, "hgncIds": ["HGNC:15868"]} ++{"accession": {"accession": "VCV000000051", "version": 2}, "rcvs": [{"accession": {"accession": "RCV000000068", "version": 4}, "title": "NM_022098.4(XPNPEP3):c.1357G>T (p.Gly453Cys) AND Nephronophthisis-like nephropathy 1"}], "name": "NM_022098.4(XPNPEP3):c.1357G>T (p.Gly453Cys)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20179356", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Nephronophthisis-like nephropathy 1", "type": "Preferred", "xrefs": [{"db": "Genetic Alliance", "id": "Nephronophthisis-like+nephropathy+1/8986"}, {"db": "MONDO", "id": "MONDO:0013163"}]}], "symbols": [{"value": "NPHPL1", "type": "Preferred", "xrefs": [{"db": "OMIM", "id": "613159", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "The nephronophthisis (NPH) phenotype is characterized by reduced renal concentrating ability, chronic tubulointerstitial nephritis, cystic renal disease, and progression to end-stage renal disease (ESRD) before age 30 years. Three age-based clinical subtypes are recognized: infantile, juvenile, and adolescent/adult. Infantile NPH can present in utero with oligohydramnios sequence (limb contractures, pulmonary hypoplasia, and facial dysmorphisms) or postnatally with renal manifestations that progress to ESRD before age 3 years. Juvenile NPH, the most prevalent subtype, typically presents with polydipsia and polyuria, growth retardation, chronic iron-resistant anemia, or other findings related to chronic kidney disease (CKD). Hypertension is typically absent due to salt wasting. ESRD develops at a median age of 13 years. Ultrasound findings are increased echogenicity, reduced corticomedullary differentiation, and renal cysts (in 50% of affected individuals). Histologic findings include tubulointerstitial fibrosis, thickened and disrupted tubular basement membrane, sporadic corticomedullary cysts, and normal or reduced kidney size. Adolescent/adult NPH is clinically similar to juvenile NPH, but ESRD develops at a median age of 19 years. Within a subtype, inter- and intrafamilial variability in rate of progression to ESRD is considerable. Approximately 80%-90% of individuals with the NPH phenotype have no extrarenal features (i.e., they have isolated NPH); ~10%-20% have extrarenal manifestations that constitute a recognizable syndrome (e.g., Joubert syndrome, Bardet-Biedl syndrome, Jeune syndrome and related skeletal disorders, Meckel-Gruber syndrome, Senior-L\u00f8ken syndrome, Leber congenital amaurosis, COACH syndrome, and oculomotor apraxia, Cogan type)."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK368475"}]}], "citations": [{"ids": [{"value": "27336129", "source": "PubMed"}, {"value": "NBK368475", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "655"}, {"db": "MedGen", "id": "C3150419"}, {"db": "MONDO", "id": "MONDO:0013163"}, {"db": "OMIM", "id": "613159", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "26", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-03-01T00:00:00Z", "dateCreated": "2021-08-11T00:00:00Z", "mostRecentSubmission": "2021-08-11T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_22", "accession": "NC_000022.10", "start": 41320486, "stop": 41320486, "displayStart": 41320486, "displayStop": 41320486, "variantLength": 1, "positionVcf": 41320486, "referenceAlleleVcf": "G", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:28052"]} ++{"accession": {"accession": "VCV000000032", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000049", "version": 2}, "title": "NM_138413.4(HOGA1):c.700+4G>T AND Primary hyperoxaluria type 3"}], "name": "NM_138413.4(HOGA1):c.700+4G>T", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20797690", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Primary hyperoxaluria type 3", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013327"}]}, {"value": "PH III", "type": "Alternate"}, {"value": "Primary hyperoxaluria, type III", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+Hyperoxaluria+Type+3/8596"}]}], "symbols": [{"value": "HP3", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613616", "type": "MIM"}]}, {"value": "HOGA1", "type": "Alternate"}, {"value": "PH3", "type": "Alternate"}], "attributes": [{"attribute": {"base": {"value": "loss of function", "integerValue": "273"}, "type": "disease mechanism"}, "xrefs": [{"db": "Genetic Testing Registry (GTR)", "id": "GTR000561373"}]}, {"attribute": {"base": {"integerValue": "10738"}, "type": "GARD id"}, "xrefs": [{"db": "Office of Rare Diseases", "id": "10738"}]}], "citations": [{"ids": [{"value": "26401545", "source": "PubMed"}, {"value": "NBK316514", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "416"}, {"db": "Orphanet", "id": "93600"}, {"db": "MedGen", "id": "C3150878"}, {"db": "MONDO", "id": "MONDO:0013327"}, {"db": "OMIM", "id": "613616", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "19", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-10T00:00:00Z", "dateCreated": "2013-04-04T00:00:00Z", "mostRecentSubmission": "2013-04-04T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_10", "accession": "NC_000010.10", "start": 99359924, "stop": 99359924, "displayStart": 99359924, "displayStop": 99359924, "variantLength": 1, "positionVcf": 99359924, "referenceAlleleVcf": "G", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:25155"]} ++{"accession": {"accession": "VCV000000042", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000059", "version": 5}, "title": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2019-03-10T00:00:00Z", "mostRecentSubmission": "2019-03-10T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_19", "accession": "NC_000019.9", "start": 36558317, "stop": 36558317, "displayStart": 36558317, "displayStop": 36558317, "variantLength": 1, "positionVcf": 36558317, "referenceAlleleVcf": "G", "alternateAlleleVcf": "C"}, "hgncIds": ["HGNC:24502"]} ++{"accession": {"accession": "VCV000000059", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000076", "version": 5}, "title": "NM_006642.5(SDCCAG8):c.1946_1949del (p.Cys649fs) AND Senior-Loken syndrome 7"}], "name": "NM_006642.5(SDCCAG8):c.1946_1949del (p.Cys649fs)", "variationType": "VARIATION_TYPE_MICROSATELLITE", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20835237", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Senior-Loken syndrome 7", "type": "Preferred", "xrefs": [{"db": "Genetic Alliance", "id": "Senior-Loken+syndrome+7/9283"}, {"db": "MONDO", "id": "MONDO:0013326"}]}], "symbols": [{"value": "SLSN7", "type": "Preferred", "xrefs": [{"db": "OMIM", "id": "613615", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "3156"}, {"db": "MedGen", "id": "C3150877"}, {"db": "MONDO", "id": "MONDO:0013326"}, {"db": "OMIM", "id": "613615", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "29", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-10-01T00:00:00Z", "dateCreated": "2016-10-23T00:00:00Z", "mostRecentSubmission": "2016-10-23T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_1", "accession": "NC_000001.10", "start": 243589819, "stop": 243589822, "displayStart": 243589819, "displayStop": 243589822, "variantLength": 4, "positionVcf": 243589818, "referenceAlleleVcf": "AGTGT", "alternateAlleleVcf": "A"}, "hgncIds": ["HGNC:10671"]} ++{"accession": {"accession": "VCV000000040", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000057", "version": 4}, "title": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs)", "variationType": "VARIATION_TYPE_DELETION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2015-09-17T00:00:00Z", "mostRecentSubmission": "2015-09-17T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_19", "accession": "NC_000019.9", "start": 36595471, "stop": 36595474, "displayStart": 36595471, "displayStop": 36595474, "variantLength": 4, "positionVcf": 36595470, "referenceAlleleVcf": "GTGCC", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:24502"]} +diff --git a/tests/clinvar-genes/clinvar-variants-grch38-seqvars.jsonl b/tests/clinvar-genes/clinvar-variants-grch38-seqvars.jsonl +index 51c04b7..35b65e6 100644 +--- a/tests/clinvar-genes/clinvar-variants-grch38-seqvars.jsonl ++++ b/tests/clinvar-genes/clinvar-variants-grch38-seqvars.jsonl +@@ -1,10 +1,10 @@ +-{"rcv": "RCV000000009", "vcv": "VCV000018396", "title": "NM_002036.4(ACKR1):c.265C>T (p.Arg89Cys) AND DUFFY BLOOD GROUP SYSTEM, FY(bwk) PHENOTYPE", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "1", "accession": "NC_000001.11", "outer_start": null, "inner_start": null, "start": 159205704, "stop": 159205704, "inner_stop": null, "outer_stop": null, "display_start": 159205704, "display_stop": 159205704, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 159205704, "reference_allele_vcf": "C", "alternate_allele_vcf": "T", "for_display_length": null}, "hgnc_ids": ["HGNC:4035"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000010", "vcv": "VCV000018397", "title": "NM_002036.4(ACKR1):c.286_299del (p.Trp96fs) AND DUFFY BLOOD GROUP SYSTEM, FY(a-b-) PHENOTYPE", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "1", "accession": "NC_000001.11", "outer_start": null, "inner_start": null, "start": 159205719, "stop": 159205732, "inner_stop": null, "outer_stop": null, "display_start": 159205719, "display_stop": 159205732, "strand": null, "variant_length": 14, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 159205718, "reference_allele_vcf": "CCCTGGCTGGCCTGT", "alternate_allele_vcf": "C", "for_display_length": null}, "hgnc_ids": ["HGNC:4035"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000012", "vcv": "VCV000000002", "title": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer) AND Hereditary spastic paraplegia 48", "variant_type": "indel", "clinical_significance": "pathogenic", "review_status": "criteria provided, single submitter", "sequence_location": {"assembly": "GRCh38", "chr": "7", "accession": "NC_000007.14", "outer_start": null, "inner_start": null, "start": 4781213, "stop": 4781216, "inner_stop": null, "outer_stop": null, "display_start": 4781213, "display_stop": 4781216, "strand": null, "variant_length": 22, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 4781213, "reference_allele_vcf": "GGAT", "alternate_allele_vcf": "TGCTGTAAACTGTAACTGTAAA", "for_display_length": null}, "hgnc_ids": ["HGNC:22197"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000013", "vcv": "VCV000000003", "title": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs) AND Hereditary spastic paraplegia 48", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "7", "accession": "NC_000007.14", "outer_start": null, "inner_start": null, "start": 4787730, "stop": 4787743, "inner_stop": null, "outer_stop": null, "display_start": 4787730, "display_stop": 4787743, "strand": null, "variant_length": 14, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 4787729, "reference_allele_vcf": "GCTGCTGGACCTGCC", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:22197"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000016", "vcv": "VCV000000006", "title": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser) AND Mitochondrial complex 1 deficiency, nuclear type 19", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "11", "accession": "NC_000011.10", "outer_start": null, "inner_start": null, "start": 126277517, "stop": 126277517, "inner_stop": null, "outer_stop": null, "display_start": 126277517, "display_stop": 126277517, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 126277517, "reference_allele_vcf": "A", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:26927"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000042", "vcv": "VCV000000025", "title": "NM_015600.4(ABHD12):c.-6898_191+7002delinsCC AND PHARC syndrome", "variant_type": "indel", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "20", "accession": "NC_000020.11", "outer_start": null, "inner_start": null, "start": 25383511, "stop": 25397601, "inner_stop": null, "outer_stop": null, "display_start": 25383511, "display_stop": 25397601, "strand": null, "variant_length": 14091, "reference_allele": null, "alternate_allele": "GG", "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": null, "reference_allele_vcf": null, "alternate_allele_vcf": null, "for_display_length": null}, "hgnc_ids": ["HGNC:15868"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000043", "vcv": "VCV000000026", "title": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter) AND PHARC syndrome", "variant_type": "duplication", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "20", "accession": "NC_000020.11", "outer_start": null, "inner_start": null, "start": 25307980, "stop": 25307981, "inner_stop": null, "outer_stop": null, "display_start": 25307980, "display_stop": 25307981, "strand": null, "variant_length": 7, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 25307980, "reference_allele_vcf": "G", "alternate_allele_vcf": "GGCTCTTA", "for_display_length": null}, "hgnc_ids": ["HGNC:15868"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000049", "vcv": "VCV000000032", "title": "NM_138413.4(HOGA1):c.700+4G>T AND Primary hyperoxaluria type 3", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "10", "accession": "NC_000010.11", "outer_start": null, "inner_start": null, "start": 97600167, "stop": 97600167, "inner_stop": null, "outer_stop": null, "display_start": 97600167, "display_stop": 97600167, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 97600167, "reference_allele_vcf": "G", "alternate_allele_vcf": "T", "for_display_length": null}, "hgnc_ids": ["HGNC:25155"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000057", "vcv": "VCV000000040", "title": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "19", "accession": "NC_000019.10", "outer_start": null, "inner_start": null, "start": 36104569, "stop": 36104572, "inner_stop": null, "outer_stop": null, "display_start": 36104569, "display_stop": 36104572, "strand": null, "variant_length": 4, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 36104568, "reference_allele_vcf": "GTGCC", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:24502"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +-{"rcv": "RCV000000059", "vcv": "VCV000000042", "title": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "19", "accession": "NC_000019.10", "outer_start": null, "inner_start": null, "start": 36067415, "stop": 36067415, "inner_stop": null, "outer_stop": null, "display_start": 36067415, "display_stop": 36067415, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 36067415, "reference_allele_vcf": "G", "alternate_allele_vcf": "C", "for_display_length": null}, "hgnc_ids": ["HGNC:24502"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} ++{"accession": {"accession": "VCV000000002", "version": 3}, "rcvs": [{"accession": {"accession": "RCV000000012", "version": 5}, "title": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer) AND Hereditary spastic paraplegia 48"}], "name": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer)", "variationType": "VARIATION_TYPE_INDEL", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER", "description": "Pathogenic", "citations": [{"ids": [{"value": "20613862", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Hereditary spastic paraplegia 48", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013342"}]}, {"value": "Spastic paraplegia 48", "type": "Alternate"}, {"value": "Spastic paraplegia 48, autosomal recessive", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Spastic+paraplegia+48%2C+autosomal+recessive/9323"}]}], "symbols": [{"value": "SPG48", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613647", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "306511"}, {"db": "MedGen", "id": "C3150901"}, {"db": "MONDO", "id": "MONDO:0013342"}, {"db": "OMIM", "id": "613647", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "2", "contributesToAggregateClassification": true}], "dateCreated": "2017-01-30T00:00:00Z", "mostRecentSubmission": "2021-05-16T00:00:00Z", "numberOfSubmitters": 2, "numberOfSubmissions": 2}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_7", "accession": "NC_000007.14", "start": 4781213, "stop": 4781216, "displayStart": 4781213, "displayStop": 4781216, "variantLength": 22, "positionVcf": 4781213, "referenceAlleleVcf": "GGAT", "alternateAlleleVcf": "TGCTGTAAACTGTAACTGTAAA"}, "hgncIds": ["HGNC:22197"]} ++{"accession": {"accession": "VCV000000006", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000016", "version": 6}, "title": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser) AND Mitochondrial complex 1 deficiency, nuclear type 19"}], "name": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20818383", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Mitochondrial complex 1 deficiency, nuclear type 19", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0032624"}]}, {"value": "MITOCHONDRIAL COMPLEX I DEFICIENCY, NUCLEAR TYPE 19", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "618241", "type": "MIM"}, {"db": "OMIM", "id": "613622.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613622.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613622.0003", "type": "Allelic variant"}]}], "symbols": [{"value": "MC1DN19", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "618241", "type": "MIM"}]}], "xrefs": [{"db": "MedGen", "id": "C4748791"}, {"db": "MONDO", "id": "MONDO:0032624"}, {"db": "OMIM", "id": "618241", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "45335", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-10-01T00:00:00Z", "dateCreated": "2019-02-04T00:00:00Z", "mostRecentSubmission": "2019-02-04T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_11", "accession": "NC_000011.10", "start": 126277517, "stop": 126277517, "displayStart": 126277517, "displayStop": 126277517, "variantLength": 1, "positionVcf": 126277517, "referenceAlleleVcf": "A", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:26927"]} ++{"accession": {"accession": "VCV000000003", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000013", "version": 6}, "title": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs) AND Hereditary spastic paraplegia 48"}], "name": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs)", "variationType": "VARIATION_TYPE_DELETION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20613862", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Hereditary spastic paraplegia 48", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013342"}]}, {"value": "Spastic paraplegia 48", "type": "Alternate"}, {"value": "Spastic paraplegia 48, autosomal recessive", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Spastic+paraplegia+48%2C+autosomal+recessive/9323"}]}], "symbols": [{"value": "SPG48", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613647", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "306511"}, {"db": "MedGen", "id": "C3150901"}, {"db": "MONDO", "id": "MONDO:0013342"}, {"db": "OMIM", "id": "613647", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "2", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-06-29T00:00:00Z", "dateCreated": "2017-01-30T00:00:00Z", "mostRecentSubmission": "2017-01-30T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_7", "accession": "NC_000007.14", "start": 4787730, "stop": 4787743, "displayStart": 4787730, "displayStop": 4787743, "variantLength": 14, "positionVcf": 4787729, "referenceAlleleVcf": "GCTGCTGGACCTGCC", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:22197"]} ++{"accession": {"accession": "VCV000000043", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000060", "version": 4}, "title": "NM_001083961.2(WDR62):c.1408C>T (p.Gln470Ter) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.1408C>T (p.Gln470Ter)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2019-03-10T00:00:00Z", "mostRecentSubmission": "2019-03-10T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_19", "accession": "NC_000019.10", "start": 36083099, "stop": 36083099, "displayStart": 36083099, "displayStop": 36083099, "variantLength": 1, "positionVcf": 36083099, "referenceAlleleVcf": "C", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:24502"]} ++{"accession": {"accession": "VCV000000026", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000043", "version": 3}, "title": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter) AND PHARC syndrome"}], "name": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter)", "variationType": "VARIATION_TYPE_DUPLICATION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20797687", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "PHARC syndrome", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0012984"}]}, {"value": "Polyneuropathy-hearing loss-ataxia-retinitis pigmentosa-cataract syndrome", "type": "Alternate", "xrefs": [{"db": "Orphanet", "id": "171848"}]}, {"value": "Polyneuropathy, hearing loss, ataxia, retinitis pigmentosa, and cataract", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Polyneuropathy%2C+hearing+loss%2C+ataxia%2C+retinitis+pigmentosa%2C+and+cataract/9132"}]}], "symbols": [{"value": "PHARC", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "612674", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "171848"}, {"db": "MedGen", "id": "C2675204"}, {"db": "MONDO", "id": "MONDO:0012984"}, {"db": "OMIM", "id": "612674", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "17", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-10T00:00:00Z", "dateCreated": "2015-05-18T00:00:00Z", "mostRecentSubmission": "2015-05-18T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_20", "accession": "NC_000020.11", "start": 25307980, "stop": 25307981, "displayStart": 25307980, "displayStop": 25307981, "variantLength": 7, "positionVcf": 25307980, "referenceAlleleVcf": "G", "alternateAlleleVcf": "GGCTCTTA"}, "hgncIds": ["HGNC:15868"]} ++{"accession": {"accession": "VCV000000051", "version": 2}, "rcvs": [{"accession": {"accession": "RCV000000068", "version": 4}, "title": "NM_022098.4(XPNPEP3):c.1357G>T (p.Gly453Cys) AND Nephronophthisis-like nephropathy 1"}], "name": "NM_022098.4(XPNPEP3):c.1357G>T (p.Gly453Cys)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20179356", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Nephronophthisis-like nephropathy 1", "type": "Preferred", "xrefs": [{"db": "Genetic Alliance", "id": "Nephronophthisis-like+nephropathy+1/8986"}, {"db": "MONDO", "id": "MONDO:0013163"}]}], "symbols": [{"value": "NPHPL1", "type": "Preferred", "xrefs": [{"db": "OMIM", "id": "613159", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "The nephronophthisis (NPH) phenotype is characterized by reduced renal concentrating ability, chronic tubulointerstitial nephritis, cystic renal disease, and progression to end-stage renal disease (ESRD) before age 30 years. Three age-based clinical subtypes are recognized: infantile, juvenile, and adolescent/adult. Infantile NPH can present in utero with oligohydramnios sequence (limb contractures, pulmonary hypoplasia, and facial dysmorphisms) or postnatally with renal manifestations that progress to ESRD before age 3 years. Juvenile NPH, the most prevalent subtype, typically presents with polydipsia and polyuria, growth retardation, chronic iron-resistant anemia, or other findings related to chronic kidney disease (CKD). Hypertension is typically absent due to salt wasting. ESRD develops at a median age of 13 years. Ultrasound findings are increased echogenicity, reduced corticomedullary differentiation, and renal cysts (in 50% of affected individuals). Histologic findings include tubulointerstitial fibrosis, thickened and disrupted tubular basement membrane, sporadic corticomedullary cysts, and normal or reduced kidney size. Adolescent/adult NPH is clinically similar to juvenile NPH, but ESRD develops at a median age of 19 years. Within a subtype, inter- and intrafamilial variability in rate of progression to ESRD is considerable. Approximately 80%-90% of individuals with the NPH phenotype have no extrarenal features (i.e., they have isolated NPH); ~10%-20% have extrarenal manifestations that constitute a recognizable syndrome (e.g., Joubert syndrome, Bardet-Biedl syndrome, Jeune syndrome and related skeletal disorders, Meckel-Gruber syndrome, Senior-L\u00f8ken syndrome, Leber congenital amaurosis, COACH syndrome, and oculomotor apraxia, Cogan type)."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK368475"}]}], "citations": [{"ids": [{"value": "27336129", "source": "PubMed"}, {"value": "NBK368475", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "655"}, {"db": "MedGen", "id": "C3150419"}, {"db": "MONDO", "id": "MONDO:0013163"}, {"db": "OMIM", "id": "613159", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "26", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-03-01T00:00:00Z", "dateCreated": "2021-08-11T00:00:00Z", "mostRecentSubmission": "2021-08-11T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_22", "accession": "NC_000022.11", "start": 40924482, "stop": 40924482, "displayStart": 40924482, "displayStop": 40924482, "variantLength": 1, "positionVcf": 40924482, "referenceAlleleVcf": "G", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:28052"]} ++{"accession": {"accession": "VCV000000032", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000049", "version": 2}, "title": "NM_138413.4(HOGA1):c.700+4G>T AND Primary hyperoxaluria type 3"}], "name": "NM_138413.4(HOGA1):c.700+4G>T", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20797690", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Primary hyperoxaluria type 3", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013327"}]}, {"value": "PH III", "type": "Alternate"}, {"value": "Primary hyperoxaluria, type III", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+Hyperoxaluria+Type+3/8596"}]}], "symbols": [{"value": "HP3", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613616", "type": "MIM"}]}, {"value": "HOGA1", "type": "Alternate"}, {"value": "PH3", "type": "Alternate"}], "attributes": [{"attribute": {"base": {"value": "loss of function", "integerValue": "273"}, "type": "disease mechanism"}, "xrefs": [{"db": "Genetic Testing Registry (GTR)", "id": "GTR000561373"}]}, {"attribute": {"base": {"integerValue": "10738"}, "type": "GARD id"}, "xrefs": [{"db": "Office of Rare Diseases", "id": "10738"}]}], "citations": [{"ids": [{"value": "26401545", "source": "PubMed"}, {"value": "NBK316514", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "416"}, {"db": "Orphanet", "id": "93600"}, {"db": "MedGen", "id": "C3150878"}, {"db": "MONDO", "id": "MONDO:0013327"}, {"db": "OMIM", "id": "613616", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "19", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-10T00:00:00Z", "dateCreated": "2013-04-04T00:00:00Z", "mostRecentSubmission": "2013-04-04T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_10", "accession": "NC_000010.11", "start": 97600167, "stop": 97600167, "displayStart": 97600167, "displayStop": 97600167, "variantLength": 1, "positionVcf": 97600167, "referenceAlleleVcf": "G", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:25155"]} ++{"accession": {"accession": "VCV000000042", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000059", "version": 5}, "title": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2019-03-10T00:00:00Z", "mostRecentSubmission": "2019-03-10T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_19", "accession": "NC_000019.10", "start": 36067415, "stop": 36067415, "displayStart": 36067415, "displayStop": 36067415, "variantLength": 1, "positionVcf": 36067415, "referenceAlleleVcf": "G", "alternateAlleleVcf": "C"}, "hgncIds": ["HGNC:24502"]} ++{"accession": {"accession": "VCV000000059", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000076", "version": 5}, "title": "NM_006642.5(SDCCAG8):c.1946_1949del (p.Cys649fs) AND Senior-Loken syndrome 7"}], "name": "NM_006642.5(SDCCAG8):c.1946_1949del (p.Cys649fs)", "variationType": "VARIATION_TYPE_MICROSATELLITE", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20835237", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Senior-Loken syndrome 7", "type": "Preferred", "xrefs": [{"db": "Genetic Alliance", "id": "Senior-Loken+syndrome+7/9283"}, {"db": "MONDO", "id": "MONDO:0013326"}]}], "symbols": [{"value": "SLSN7", "type": "Preferred", "xrefs": [{"db": "OMIM", "id": "613615", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "3156"}, {"db": "MedGen", "id": "C3150877"}, {"db": "MONDO", "id": "MONDO:0013326"}, {"db": "OMIM", "id": "613615", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "29", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-10-01T00:00:00Z", "dateCreated": "2016-10-23T00:00:00Z", "mostRecentSubmission": "2016-10-23T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_1", "accession": "NC_000001.11", "start": 243426517, "stop": 243426520, "displayStart": 243426517, "displayStop": 243426520, "variantLength": 4, "positionVcf": 243426516, "referenceAlleleVcf": "AGTGT", "alternateAlleleVcf": "A"}, "hgncIds": ["HGNC:10671"]} ++{"accession": {"accession": "VCV000000040", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000057", "version": 4}, "title": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs)", "variationType": "VARIATION_TYPE_DELETION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2015-09-17T00:00:00Z", "mostRecentSubmission": "2015-09-17T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_19", "accession": "NC_000019.10", "start": 36104569, "stop": 36104572, "displayStart": 36104569, "displayStop": 36104572, "variantLength": 4, "positionVcf": 36104568, "referenceAlleleVcf": "GTGCC", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:24502"]} +diff --git a/tests/clinvar-genes/gene-frequency-report.jsonl b/tests/clinvar-genes/gene-frequency-report.jsonl +index a9f2a99..3509327 100644 +--- a/tests/clinvar-genes/gene-frequency-report.jsonl ++++ b/tests/clinvar-genes/gene-frequency-report.jsonl +@@ -1,10 +1,11 @@ +-{"hgnc": "HGNC:5", "counts": {"benign": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertain": [25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} +-{"hgnc": "HGNC:16", "counts": {"benign": [6, 0, 0, 0, 0, 0, 0, 2, 3, 3, 2, 1, 1, 0, 0, 2, 0, 0], "uncertain": [17, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]}} +-{"hgnc": "HGNC:17", "counts": {"benign": [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertain": [18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} +-{"hgnc": "HGNC:18", "counts": {"benign": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertain": [7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} +-{"hgnc": "HGNC:19", "counts": {"benign": [1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0], "uncertain": [13, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} +-{"hgnc": "HGNC:20", "counts": {"benign": [485, 0, 0, 0, 0, 44, 15, 25, 38, 26, 3, 4, 0, 16, 12, 4, 0, 0], "uncertain": [785, 0, 0, 0, 0, 33, 6, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [48, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} +-{"hgnc": "HGNC:21", "counts": {"benign": [12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertain": [77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} +-{"hgnc": "HGNC:23", "counts": {"benign": [216, 0, 0, 0, 0, 6, 1, 7, 9, 16, 4, 10, 6, 5, 8, 21, 0, 0], "uncertain": [304, 0, 0, 0, 0, 22, 3, 5, 6, 0, 0, 0, 0, 1, 0, 1, 0, 0], "pathogenic": [29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} +-{"hgnc": "HGNC:29", "counts": {"benign": [677, 0, 0, 0, 0, 17, 22, 26, 37, 39, 17, 11, 18, 20, 31, 31, 0, 0], "uncertain": [421, 0, 0, 0, 0, 13, 2, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} +-{"hgnc": "HGNC:30", "counts": {"benign": [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0], "uncertain": [54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} ++{"hgncId": "HGNC:5", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]} ++{"hgncId": "HGNC:7", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [23, 0, 0, 0, 0, 3, 0, 1, 2, 3, 0, 1, 0, 0, 1, 1, 0, 0]} ++{"hgncId": "HGNC:16", "pathogenicCounts": [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [26, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [20, 0, 0, 0, 0, 0, 0, 2, 3, 3, 2, 1, 1, 0, 0, 1, 0, 0]} ++{"hgncId": "HGNC:17", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]} ++{"hgncId": "HGNC:18", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]} ++{"hgncId": "HGNC:19", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [17, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [2, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0]} ++{"hgncId": "HGNC:20", "pathogenicCounts": [67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [650, 0, 0, 0, 0, 16, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [457, 0, 0, 0, 0, 24, 6, 8, 9, 18, 11, 13, 0, 15, 10, 2, 0, 0]} ++{"hgncId": "HGNC:21", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [115, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]} ++{"hgncId": "HGNC:23", "pathogenicCounts": [30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [255, 0, 0, 0, 0, 23, 4, 3, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0], "benignCounts": [245, 0, 0, 0, 0, 13, 4, 6, 5, 10, 4, 9, 4, 4, 8, 19, 1, 0]} ++{"hgncId": "HGNC:29", "pathogenicCounts": [50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [443, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [511, 0, 0, 0, 0, 16, 7, 14, 13, 22, 21, 38, 28, 32, 35, 33, 1, 0]} ++{"hgncId": "HGNC:30", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0]} +diff --git a/tests/clinvar-genes/gene-variant-report.jsonl b/tests/clinvar-genes/gene-variant-report.jsonl +index 958924d..97f4a5d 100644 +--- a/tests/clinvar-genes/gene-variant-report.jsonl ++++ b/tests/clinvar-genes/gene-variant-report.jsonl +@@ -1,12 +1,13 @@ +-{"hgnc": "HGNC:5", "counts": {"missense_variant": [0, 0, 25, 0, 0]}} +-{"hgnc": "HGNC:16", "counts": {"frameshift_variant": [1, 1, 0, 0, 0], "intron_variant": [0, 1, 0, 0, 0], "missense_variant": [2, 7, 16, 0, 3], "stop_gained": [0, 0, 1, 0, 0], "synonymous_variant": [7, 1, 0, 0, 0]}} +-{"hgnc": "HGNC:17", "counts": {"missense_variant": [0, 0, 18, 0, 0]}} +-{"hgnc": "HGNC:18", "counts": {"missense_variant": [0, 0, 7, 0, 0]}} +-{"hgnc": "HGNC:19", "counts": {"missense_variant": [1, 2, 14, 0, 0], "non_coding_transcript_variant": [2, 1, 0, 0, 0]}} +-{"hgnc": "HGNC:20", "counts": {"3_prime_UTR_variant": [4, 2, 4, 0, 0], "5_prime_UTR_variant": [0, 4, 0, 0, 0], "frameshift_variant": [0, 0, 15, 2, 1], "inframe_indel": [0, 0, 4, 0, 0], "intron_variant": [45, 197, 44, 0, 0], "missense_variant": [20, 52, 689, 17, 19], "stop_gained": [0, 0, 18, 3, 5], "splice_acceptor_variant": [0, 0, 7, 0, 0], "splice_donor_variant": [0, 0, 4, 0, 0], "synonymous_variant": [37, 311, 30, 0, 0]}} +-{"hgnc": "HGNC:21", "counts": {"5_prime_UTR_variant": [0, 0, 3, 0, 0], "missense_variant": [0, 8, 74, 0, 0]}} +-{"hgnc": "HGNC:23", "counts": {"3_prime_UTR_variant": [33, 13, 59, 0, 0], "5_prime_UTR_variant": [2, 3, 14, 0, 0], "frameshift_variant": [0, 0, 1, 0, 1], "inframe_indel": [0, 0, 3, 0, 0], "intron_variant": [33, 147, 117, 3, 6], "missense_variant": [2, 5, 120, 1, 7], "stop_gained": [0, 0, 1, 0, 1], "splice_acceptor_variant": [0, 0, 0, 2, 0], "stop_lost": [0, 1, 0, 0, 0], "synonymous_variant": [6, 63, 10, 0, 0]}} +-{"hgnc": "HGNC:29", "counts": {"5_prime_UTR_variant": [12, 1, 26, 0, 0], "frameshift_variant": [0, 0, 0, 0, 8], "inframe_indel": [0, 0, 1, 0, 1], "intron_variant": [169, 127, 45, 1, 0], "missense_variant": [70, 93, 340, 9, 13], "stop_gained": [0, 0, 0, 3, 5], "splice_acceptor_variant": [0, 0, 1, 0, 0], "splice_donor_variant": [0, 0, 0, 4, 1], "synonymous_variant": [108, 357, 29, 0, 0]}} +-{"hgnc": "HGNC:30", "counts": {"frameshift_variant": [1, 0, 0, 0, 0], "missense_variant": [0, 2, 54, 0, 0], "splice_acceptor_variant": [1, 0, 0, 0, 0]}} +-{"hgnc": "HGNC:32", "counts": {"3_prime_UTR_variant": [1, 0, 0, 0, 0], "frameshift_variant": [0, 0, 1, 1, 4], "inframe_indel": [0, 0, 1, 0, 0], "intron_variant": [16, 2, 3, 0, 0], "missense_variant": [2, 9, 129, 0, 0], "stop_gained": [0, 0, 0, 2, 2], "splice_donor_variant": [0, 0, 0, 1, 0], "synonymous_variant": [13, 19, 1, 0, 0]}} +-{"hgnc": "HGNC:33", "counts": {"3_prime_UTR_variant": [1, 0, 20, 0, 0], "5_prime_UTR_variant": [8, 6, 20, 0, 0], "frameshift_variant": [0, 0, 1, 3, 12], "inframe_indel": [0, 0, 4, 0, 1], "intron_variant": [81, 57, 30, 0, 2], "missense_variant": [13, 37, 377, 22, 16], "stop_gained": [0, 0, 1, 0, 12], "no_sequence_alteration": [1, 0, 0, 0, 0], "splice_acceptor_variant": [0, 0, 0, 5, 2], "splice_donor_variant": [0, 0, 1, 1, 1], "synonymous_variant": [31, 149, 41, 1, 0]}} ++{"hgncId": "HGNC:5", "impactCounts": [{"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countLikelyBenign": 2, "countUncertainSignificance": 41}]} ++{"hgncId": "HGNC:7", "impactCounts": [{"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 1, "countLikelyBenign": 2, "countUncertainSignificance": 1}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 7, "countLikelyBenign": 9, "countUncertainSignificance": 58}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countBenign": 1}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countBenign": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 6, "countLikelyBenign": 8}]} ++{"hgncId": "HGNC:16", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countLikelyBenign": 1}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countLikelyBenign": 1}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 1, "countLikelyBenign": 11, "countUncertainSignificance": 26, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countUncertainSignificance": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 8, "countLikelyBenign": 8}]} ++{"hgncId": "HGNC:17", "impactCounts": [{"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countUncertainSignificance": 24}, {"geneImpact": "GENE_IMPACT_STOP_LOST", "countLikelyBenign": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countLikelyBenign": 1}]} ++{"hgncId": "HGNC:18", "impactCounts": [{"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countUncertainSignificance": 15}]} ++{"hgncId": "HGNC:19", "impactCounts": [{"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 1, "countLikelyBenign": 2, "countUncertainSignificance": 18}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 2, "countLikelyBenign": 2}]} ++{"hgncId": "HGNC:20", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countUncertainSignificance": 3, "countLikelyPathogenic": 3, "countPathogenic": 16}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 36, "countLikelyBenign": 225, "countUncertainSignificance": 35}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 2, "countLikelyBenign": 15, "countUncertainSignificance": 603, "countLikelyPathogenic": 9, "countPathogenic": 6}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countUncertainSignificance": 5, "countPathogenic": 19}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countUncertainSignificance": 3, "countLikelyPathogenic": 2}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 5, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_STOP_LOST", "countUncertainSignificance": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 4, "countLikelyBenign": 262, "countUncertainSignificance": 11}]} ++{"hgncId": "HGNC:21", "impactCounts": [{"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countLikelyBenign": 9, "countUncertainSignificance": 115}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countLikelyBenign": 1}]} ++{"hgncId": "HGNC:23", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 3, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 19, "countLikelyBenign": 127, "countUncertainSignificance": 13}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 1, "countLikelyBenign": 2, "countUncertainSignificance": 208, "countLikelyPathogenic": 1, "countPathogenic": 6}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countUncertainSignificance": 1, "countLikelyPathogenic": 5, "countPathogenic": 3}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countLikelyPathogenic": 6, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 1, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_STOP_LOST", "countUncertainSignificance": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 7, "countLikelyBenign": 125, "countUncertainSignificance": 4}]} ++{"hgncId": "HGNC:29", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countUncertainSignificance": 1, "countPathogenic": 12}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 143, "countLikelyBenign": 146, "countUncertainSignificance": 31, "countLikelyPathogenic": 1}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 15, "countLikelyBenign": 39, "countUncertainSignificance": 356, "countLikelyPathogenic": 3, "countPathogenic": 11}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countLikelyPathogenic": 4, "countPathogenic": 8}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 1, "countPathogenic": 2}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countLikelyPathogenic": 5, "countPathogenic": 2}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 14, "countLikelyBenign": 334, "countUncertainSignificance": 4}]} ++{"hgncId": "HGNC:30", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countBenign": 1}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countLikelyBenign": 4, "countUncertainSignificance": 85}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countBenign": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countLikelyBenign": 1}]} ++{"hgncId": "HGNC:32", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 1, "countPathogenic": 4}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 14, "countLikelyBenign": 11, "countUncertainSignificance": 3}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 3, "countLikelyBenign": 12, "countUncertainSignificance": 199}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countLikelyPathogenic": 2, "countPathogenic": 2}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countLikelyPathogenic": 1, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 15, "countLikelyBenign": 70, "countUncertainSignificance": 1}]} ++{"hgncId": "HGNC:33", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 3, "countPathogenic": 22}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 55, "countLikelyBenign": 333, "countUncertainSignificance": 8, "countLikelyPathogenic": 1, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 2, "countLikelyBenign": 11, "countUncertainSignificance": 306, "countLikelyPathogenic": 14, "countPathogenic": 11}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countUncertainSignificance": 1, "countPathogenic": 28}, {"geneImpact": "GENE_IMPACT_NO_SEQUENCE_ALTERATION", "countBenign": 1}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countLikelyPathogenic": 4, "countPathogenic": 2}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countLikelyPathogenic": 6, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 7, "countLikelyBenign": 530, "countUncertainSignificance": 4, "countLikelyPathogenic": 1}]} diff --git a/build.rs b/build.rs index 1c30e607..20dc8a16 100644 --- a/build.rs +++ b/build.rs @@ -6,6 +6,11 @@ use std::{env, path::PathBuf}; fn main() -> Result<(), anyhow::Error> { let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos"); let proto_files = vec![ + "annonars/clinvar_data/class_by_freq.proto", + "annonars/clinvar_data/clinvar_public.proto", + "annonars/clinvar_data/extracted_vars.proto", + "annonars/clinvar_data/gene_impact.proto", + "annonars/clinvar_data/phenotype_link.proto", "annonars/clinvar/minimal.proto", "annonars/clinvar/per_gene.proto", "annonars/clinvar/sv.proto", @@ -51,7 +56,7 @@ fn main() -> Result<(), anyhow::Error> { let descriptor_set = std::fs::read(descriptor_path).unwrap(); pbjson_build::Builder::new() .register_descriptors(&descriptor_set)? - .build(&[".annonars"])?; + .build(&[".annonars", ".clinvar_data"])?; Ok(()) } diff --git a/protos/annonars/clinvar/per_gene.proto b/protos/annonars/clinvar/per_gene.proto index 1f4457e7..96eae2d3 100644 --- a/protos/annonars/clinvar/per_gene.proto +++ b/protos/annonars/clinvar/per_gene.proto @@ -5,87 +5,24 @@ syntax = "proto3"; package annonars.clinvar.per_gene; import "annonars/clinvar/minimal.proto"; - -// Impact on gene in ClinVar. -enum Impact { - // unknown - IMPACT_UNKNOWN = 0; - // 3' UTR variant - IMPACT_THREE_PRIME_UTR_VARIANT = 1; - // 5' UTR variant - IMPACT_FIVE_PRIME_UTR_VARIANT = 2; - // downstream gene variant - IMPACT_DOWNSTREAM_TRANSCRIPT_VARIANT = 3; - // frameshift variant - IMPACT_FRAMESHIFT_VARIANT = 4; - // inframe indel - IMPACT_INFRAME_INDEL = 5; - // start lost - IMPACT_START_LOST = 6; - // intron variant - IMPACT_INTRON_VARIANT = 7; - // missense variant - IMPACT_MISSENSE_VARIANT = 8; - // non-coding transcript variant - IMPACT_NON_CODING_TRANSCRIPT_VARIANT = 9; - // stop gained - IMPACT_STOP_GAINED = 10; - // no sequence alteration - IMPACT_NO_SEQUENCE_ALTERATION = 11; - // splice acceptor variant - IMPACT_SPLICE_ACCEPTOR_VARIANT = 12; - // splice donor variant - IMPACT_SPLICE_DONOR_VARIANT = 13; - // stop lost - IMPACT_STOP_LOST = 14; - // synonymous variant - IMPACT_SYNONYMOUS_VARIANT = 15; - // upstream gene variant - IMPACT_UPSTREAM_TRANSCRIPT_VARIANT = 16; -} - -// Counts of variants by impact. -message GeneImpactRecordCounts { - // Impact - Impact impact = 1; - // Counts for ACMG1..5 - repeated uint32 counts = 2; -} - -// Coarsened clinical significance -enum CoarseClinicalSignificance { - // unknown - COARSE_CLINICAL_SIGNIFICANCE_UNKNOWN = 0; - // benign / likely benign - COARSE_CLINICAL_SIGNIFICANCE_BENIGN = 1; - // uncertain signifiance - COARSE_CLINICAL_SIGNIFICANCE_UNCERTAIN = 2; - // pathogenic / likely pathogenic - COARSE_CLINICAL_SIGNIFICANCE_PATHOGENIC = 3; -} - -// Counts per frequency and impact. -message GeneFreqRecordCounts { - // Frequency - CoarseClinicalSignificance coarse_clinsig = 1; - // Counts for ACMG1..5 - repeated uint32 counts = 2; -} - -// Variants for a given genome release / assembly. -message GeneVariantsForRelease { - // Genome release / assembly - string genome_release = 1; - // Variants - repeated annonars.clinvar.minimal.Record variants = 2; +import "annonars/clinvar_data/class_by_freq.proto"; +import "annonars/clinvar_data/extracted_vars.proto"; +import "annonars/clinvar_data/gene_impact.proto"; + +// Extracted variants per release. +message ExtractedVariantsPerRelease { + // Release version. + optional string release = 1; + // Variants per gene. + repeated clinvar_data.extracted_vars.ExtractedVcvRecord variants = 2; } // ClinVar detailed information per gene. message ClinvarPerGeneRecord { // Counts of variants per impact - repeated GeneImpactRecordCounts per_impact_counts = 1; + optional clinvar_data.gene_impact.GeneImpactCounts per_impact_counts = 1; // Counts of variants per impact / frequency - repeated GeneFreqRecordCounts per_freq_counts = 2; + optional clinvar_data.class_by_freq.GeneCoarseClinsigFrequencyCounts per_freq_counts = 2; // Variants for the given gene. - repeated GeneVariantsForRelease variants = 3; + repeated ExtractedVariantsPerRelease per_release_vars = 3; } diff --git a/protos/annonars/clinvar_data/class_by_freq.proto b/protos/annonars/clinvar_data/class_by_freq.proto new file mode 100644 index 00000000..f6e39abb --- /dev/null +++ b/protos/annonars/clinvar_data/class_by_freq.proto @@ -0,0 +1,29 @@ +// Protocol buffers for types for class-by-freq tool. + +syntax = "proto3"; + +package annonars.clinvar_data.class_by_freq; + +// Enumeration for coarse-grain classification. +enum CoarseClinicalSignificance { + // unspecified coarse clinical significance + COARSE_CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0; + // Corresponds to "benign". + COARSE_CLINICAL_SIGNIFICANCE_BENIGN = 1; + // Corresponds to "uncertain". + COARSE_CLINICAL_SIGNIFICANCE_UNCERTAIN = 2; + // Corresponds to "pathogenic". + COARSE_CLINICAL_SIGNIFICANCE_PATHOGENIC = 3; +} + +// Output record. +message GeneCoarseClinsigFrequencyCounts { + // The gene HGNC ID. + string hgnc_id = 1; + // The counts for (likely) pathogenic. + repeated uint32 pathogenic_counts = 2; + // The counts for uncertain significance. + repeated uint32 uncertain_counts = 3; + // The counts for (likely) benign. + repeated uint32 benign_counts = 4; +} diff --git a/protos/annonars/clinvar_data/clinvar_public.proto b/protos/annonars/clinvar_data/clinvar_public.proto new file mode 100644 index 00000000..361708d2 --- /dev/null +++ b/protos/annonars/clinvar_data/clinvar_public.proto @@ -0,0 +1,2708 @@ +// Protobuf file for representing the types from the ClinVar VCV XSD file. + +syntax = "proto3"; + +package annonars.clinvar_data.clinvar_public; + +import "google/protobuf/timestamp.proto"; + +// Enumeration describing connection between genes and variants. +// +// Corresponds to XSD type "GeneVariantRelationship". +enum GeneVariantRelationship { + // unspecified gene variant + GENE_VARIANT_RELATIONSHIP_UNSPECIFIED = 0; + // corresponds to "variant within gene" + GENE_VARIANT_RELATIONSHIP_VARIANT_WITHIN_GENE = 1; + // corresponds to "gene overlapped by variant" and + // (legacy:) "genes overlapped by variant" + GENE_VARIANT_RELATIONSHIP_GENE_OVERLAPPED_BY_VARIANT = 2; + // corresponds to "variant near gene, upstream" and + // (legacy:) "near gene, upstream" + GENE_VARIANT_RELATIONSHIP_NEAR_GENE_UPSTREAM = 3; + // corresponds to "variant near gene, downstream" and + // (legacy:) "near gene, downstream" + GENE_VARIANT_RELATIONSHIP_NEAR_GENE_DOWNSTREAM = 4; + // corresponds to "asserted, but not computed" + GENE_VARIANT_RELATIONSHIP_ASSERTED_BUT_NOT_COMPUTED = 5; + // corresponds to "within multiple genes by overlap" + GENE_VARIANT_RELATIONSHIP_WITHIN_MULTIPLE_GENES_BY_OVERLAP = 6; + // corresponds to "within single gene" + GENE_VARIANT_RELATIONSHIP_WITHIN_SINGLE_GENE = 7; +} + +// Enumeration describing severity. +// +// Corresponds to XSD type "typeSeverity" +enum Severity { + // unspecified severity + SEVERITY_UNSPECIFIED = 0; + // corresponds to "mild" + SEVERITY_MILD = 1; + // corresponds to "moderate" + SEVERITY_MODERATE = 2; + // corresponds to "sever" + SEVERITY_SEVERE = 3; +} + +// Enumeration describing status. +// +// Corresponds to `typeStatus` in XSD. +enum Status { + // unspecified status + STATUS_UNSPECIFIED = 0; + // corresponds to "current" + STATUS_CURRENT = 1; + // corresponds to "completed and retired" + STATUS_COMPLETED_AND_RETIRED = 2; + // corresponds to "delete" + STATUS_DELETE = 3; + // corresponds to "in development" + STATUS_IN_DEVELOPMENT = 4; + // corresponds to "reclassified" + STATUS_RECLASSIFIED = 5; + // corresponds to "reject" + STATUS_REJECT = 6; + // corresponds to "secondary" + STATUS_SECONDARY = 7; + // corresponds to "suppressed" + STATUS_SUPPRESSED = 8; + // corresponds to "under review" + STATUS_UNDER_REVIEW = 9; +} + +// Enumeration describing submitter review status. +// +// Corresponds to `typeSubmitterReviewStatusValue` in XSD. +enum SubmitterReviewStatus { + // unspecified status + SUBMITTER_REVIEW_STATUS_UNSPECIFIED = 0; + // corresponds to "no classification provided" + SUBMITTER_REVIEW_STATUS_NO_CLASSIFICATION_PROVIDED = 1; + // corresponds to "no assertion criteria provided" + SUBMITTER_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED = 2; + // corresponds to "criteria provided, single submitter" + SUBMITTER_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER = 3; + // corresponds to "reviewed by expert panel" + SUBMITTER_REVIEW_STATUS_REVIEWED_BY_EXPERT_PANEL = 4; + // corresponds to "practice guideline" + SUBMITTER_REVIEW_STATUS_PRACTICE_GUIDELINE = 5; + // corresponds to "flagged submission" + SUBMITTER_REVIEW_STATUS_FLAGGED_SUBMISSION = 6; + // corresponds to "criteria provided, multiple submitters, no conflicts" + SUBMITTER_REVIEW_STATUS_CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS = 7; + // corresponds to "criteria provided, conflicting classifications" + SUBMITTER_REVIEW_STATUS_CRITERIA_PROVIDED_CONFLICTING_CLASSIFICATIONS = 8; + // corresponds to "classified by single submitter" + SUBMITTER_REVIEW_STATUS_CLASSIFIED_BY_SINGLE_SUBMITTER = 9; + // corresponds to "reviewed by professional society" + SUBMITTER_REVIEW_STATUS_REVIEWED_BY_PROFESSIONAL_SOCIETY = 10; + // corresponds to "not classified by submitter" + SUBMITTER_REVIEW_STATUS_NOT_CLASSIFIED_BY_SUBMITTER = 11; + // corresponds to "classified by multiple submitters" + SUBMITTER_REVIEW_STATUS_CLASSIFIED_BY_MULTIPLE_SUBMITTERS = 12; +} + +// Enumeration describing zygosity. +enum Zygosity { + // unspecified zygosity + ZYGOSITY_UNSPECIFIED = 0; + // corresponds to "Homozygote" + ZYGOSITY_HOMOZYGOTE = 1; + // corresponds to "SingleHeterozygote" + ZYGOSITY_SINGLE_HETEROZYGOTE = 2; + // corresponds to "CompoundHeterozygote" + ZYGOSITY_COMPOUND_HETEROZYGOTE = 3; + // corresponds to "Hemizygote" + ZYGOSITY_HEMIZYGOTE = 4; + // corresponds to "not provided" + ZYGOSITY_NOT_PROVIDED = 5; +} + +// Enumeration describing assertion type attributes. +enum Assertion { + // unspecified assertion type attribute + ASSERTION_UNSPECIFIED = 0; + // corresponds to "variation to disease" + ASSERTION_VARIATION_TO_DISEASE = 1; + // corresponds to "variation to included disease" + ASSERTION_VARIATION_TO_INCLUDED_DISEASE = 2; + // corresponds to "variation in modifier gene to disease" + ASSERTION_VARIATION_IN_MODIFIER_GENE_TO_DISEASE = 3; + // corresponds to "confers sensitivity" + ASSERTION_CONFERS_SENSITIVITY = 4; + // corresponds to "confers resistance" + ASSERTION_CONFERS_RESISTANCE = 5; + // corresponds to "variant to named protein" + ASSERTION_VARIANT_TO_NAMED_PROTEIN = 6; +} + +// Enumeration describing aggregate germline review status value. +enum AggregateGermlineReviewStatus { + // unspecified aggregate germline review status value + AGGREGATE_GERMLINE_REVIEW_STATUS_UNSPECIFIED = 0; + // corresponds to "no classification provided" + AGGREGATE_GERMLINE_REVIEW_STATUS_NO_CLASSIFICATION_PROVIDED = 1; + // corresponds to "no assertion criteria provided" + AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED = 2; + // corresponds to "criteria provided, single submitter" + AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER = 3; + // corresponds to "criteria provided, multiple submitters, no conflicts" + AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS = 4; + // corresponds to "criteria provided, conflicting classifications" + AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_CONFLICTING_CLASSIFICATIONS = 5; + // corresponds to "reviewed by expert panel" + AGGREGATE_GERMLINE_REVIEW_STATUS_REVIEWED_BY_EXPERT_PANEL = 6; + // corresponds to "practice guideline" + AGGREGATE_GERMLINE_REVIEW_STATUS_PRACTICE_GUIDELINE = 7; + // corresponds to "no classifications from unflagged records" + AGGREGATE_GERMLINE_REVIEW_STATUS_NO_CLASSIFICATIONS_FROM_UNFLAGGED_RECORDS = 8; + // corresponds to "no classification for the single variant" + AGGREGATE_GERMLINE_REVIEW_STATUS_NO_CLASSIFICATION_FOR_THE_SINGLE_VARIANT = 9; +} + +// Enumeration describing aggregate somatic clinical impact review status value. +enum AggregateSomaticClinicalImpactReviewStatus { + // unspecified aggregate somatic clinical impact review status value + AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_UNSPECIFIED = 0; + // corresponds to "no classification provided" + AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_NO_CLASSIFICATION_PROVIDED = 1; + // corresponds to "no assertion criteria provided" + AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED = 2; + // corresponds to "criteria provided, single submitter" + AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER = 3; + // corresponds to "criteria provided, multiple submitters" + AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS = 4; + // corresponds to "reviewed by expert panel" + AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_REVIEWED_BY_EXPERT_PANEL = 5; + // corresponds to "practice guideline" + AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_PRACTICE_GUIDELINE = 6; + // corresponds to "no classifications from unflagged records" + AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_NO_CLASSIFICATIONS_FROM_UNFLAGGED_RECORDS = 7; + // corresponds to "no classification for the single variant" + AGGREGATE_SOMATIC_CLINICAL_IMPACT_REVIEW_STATUS_NO_CLASSIFICATION_FOR_THE_SINGLE_VARIANT = 8; +} + +// Enumeration describing aggregate oncogenicity review status value. +enum AggregateOncogenicityReviewStatus { + // unspecified aggregate oncogenicity review status value + AGGREGATE_ONCOGENICITY_REVIEW_STATUS_UNSPECIFIED = 0; + // corresponds to "no classification provided" + AGGREGATE_ONCOGENICITY_REVIEW_STATUS_NO_CLASSIFICATION_PROVIDED = 1; + // corresponds to "no assertion criteria provided" + AGGREGATE_ONCOGENICITY_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED = 2; + // corresponds to "criteria provided, single submitter" + AGGREGATE_ONCOGENICITY_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER = 3; + // corresponds to "criteria provided, multiple submitters, no conflicts" + AGGREGATE_ONCOGENICITY_REVIEW_STATUS_CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS = 4; + // corresponds to "criteria provided, conflicting classifications" + AGGREGATE_ONCOGENICITY_REVIEW_STATUS_CRITERIA_PROVIDED_CONFLICTING_CLASSIFICATIONS = 5; + // corresponds to "reviewed by expert panel" + AGGREGATE_ONCOGENICITY_REVIEW_STATUS_REVIEWED_BY_EXPERT_PANEL = 6; + // corresponds to "practice guideline" + AGGREGATE_ONCOGENICITY_REVIEW_STATUS_PRACTICE_GUIDELINE = 7; + // corresponds to "no classifications from unflagged records" + AGGREGATE_ONCOGENICITY_REVIEW_STATUS_NO_CLASSIFICATIONS_FROM_UNFLAGGED_RECORDS = 8; + // corresponds to "no classification for the single variant" + AGGREGATE_ONCOGENICITY_REVIEW_STATUS_NO_CLASSIFICATION_FOR_THE_SINGLE_VARIANT = 9; +} + +// Enumeration describing origin. +enum Origin { + // unspecified origin + ORIGIN_UNSPECIFIED = 0; + // corresponds to "germline" + ORIGIN_GERMLINE = 1; + // corresponds to "somatic" + ORIGIN_SOMATIC = 2; + // corresponds to "de novo" + ORIGIN_DE_NOVO = 3; + // corresponds to "not provided" + ORIGIN_NOT_PROVIDED = 4; + // corresponds to "inherited" + ORIGIN_INHERITED = 5; + // corresponds to "maternal" + ORIGIN_MATERNAL = 6; + // corresponds to "paternal" + ORIGIN_PATERNAL = 7; + // corresponds to "uniparental" + ORIGIN_UNIPARENTAL = 8; + // corresponds to "biparental" + ORIGIN_BIPARENTAL = 9; + // corresponds to "not-reported" + ORIGIN_NOT_REPORTED = 10; + // corresponds to "tested-inconclusive" + ORIGIN_TESTED_INCONCLUSIVE = 11; + // corresponds to "unknown" + ORIGIN_UNKNOWN = 12; + // corresponds to "not applicable" + ORIGIN_NOT_APPLICABLE = 13; + // corresponds to "experimentally generated" + ORIGIN_EXPERIMENTALLY_GENERATED = 14; +} + +// Enumeration describing chromosome. +enum Chromosome { + // unspecified chromosome + CHROMOSOME_UNSPECIFIED = 0; + // corresponds to "1" + CHROMOSOME_1 = 1; + // corresponds to "2" + CHROMOSOME_2 = 2; + // corresponds to "3" + CHROMOSOME_3 = 3; + // corresponds to "4" + CHROMOSOME_4 = 4; + // corresponds to "5" + CHROMOSOME_5 = 5; + // corresponds to "6" + CHROMOSOME_6 = 6; + // corresponds to "7" + CHROMOSOME_7 = 7; + // corresponds to "8" + CHROMOSOME_8 = 8; + // corresponds to "9" + CHROMOSOME_9 = 9; + // corresponds to "10" + CHROMOSOME_10 = 10; + // corresponds to "11" + CHROMOSOME_11 = 11; + // corresponds to "12" + CHROMOSOME_12 = 12; + // corresponds to "13" + CHROMOSOME_13 = 13; + // corresponds to "14" + CHROMOSOME_14 = 14; + // corresponds to "15" + CHROMOSOME_15 = 15; + // corresponds to "16" + CHROMOSOME_16 = 16; + // corresponds to "17" + CHROMOSOME_17 = 17; + // corresponds to "18" + CHROMOSOME_18 = 18; + // corresponds to "19" + CHROMOSOME_19 = 19; + // corresponds to "20" + CHROMOSOME_20 = 20; + // corresponds to "21" + CHROMOSOME_21 = 21; + // corresponds to "22" + CHROMOSOME_22 = 22; + // corresponds to "X" + CHROMOSOME_X = 23; + // corresponds to "Y" + CHROMOSOME_Y = 24; + // corresponds to "MT" + CHROMOSOME_MT = 25; + // corresponds to "PAR" + CHROMOSOME_PAR = 26; + // corresponds to "Un" + CHROMOSOME_UN = 27; +} + +// Enumeration describing comment type. +enum CommentType { + // unspecified comment type + COMMENT_TYPE_UNSPECIFIED = 0; + // corresponds to "public" + COMMENT_TYPE_PUBLIC = 1; + // corresponds to "ConvertedByNCBI" + COMMENT_TYPE_CONVERTED_BY_NCB = 2; + // corresponds to "MissingFromAssembly" + COMMENT_TYPE_MISSING_FROM_ASSEMBLY = 3; + // corresponds to "GenomicLocationNotEstablished" + COMMENT_TYPE_GENOMIC_LOCATION_NOT_ESTABLISHED = 4; + // corresponds to "LocationOnGenomeAndProductNotAligned" + COMMENT_TYPE_LOCATION_ON_GENOME_AND_PRODUCT_NOT_ALIGNED = 5; + // corresponds to "DeletionComment" + COMMENT_TYPE_DELETION_COMMENT = 6; + // corresponds to "MergeComment" + COMMENT_TYPE_MERGE_COMMENT = 7; + // corresponds to "AssemblySpecificAlleleDefinition" + COMMENT_TYPE_ASSEMBLY_SPECIFIC_ALLELE_DEFINITION = 8; + // corresponds to "AlignmentGapMakesAppearInconsistent" + COMMENT_TYPE_ALIGNMENT_GAP_MAKES_APPEAR_INCONSISTENT = 9; + // corresponds to "ExplanationOfClassification" + COMMENT_TYPE_EXPLANATION_OF_CLASSIFICATION = 10; + // corresponds to "FlaggedComment" + COMMENT_TYPE_FLAGGED_COMMENT = 11; +} + +// Enumeration describing nucleotide sequence. +enum NucleotideSequence { + // unspecified nucleotide sequence + NUCLEOTIDE_SEQUENCE_UNSPECIFIED = 0; + // corresponds to "genomic, top-level" + NUCLEOTIDE_SEQUENCE_GENOMIC_TOP_LEVEL = 1; + // corresponds to "genomic, RefSeqGene" + NUCLEOTIDE_SEQUENCE_GENOMIC_REF_SEQ_GENE = 2; + // corresponds to "genomic" + NUCLEOTIDE_SEQUENCE_GENOMIC = 3; + // corresponds to "coding" + NUCLEOTIDE_SEQUENCE_CODING = 4; + // corresponds to "non-coding" + NUCLEOTIDE_SEQUENCE_NON_CODING = 5; + // corresponds to "protein" + NUCLEOTIDE_SEQUENCE_PROTEIN = 6; +} + +// Enumeration describing protein sequence. +enum ProteinSequence { + // unspecified protein sequence + PROTEIN_SEQUENCE_UNSPECIFIED = 0; + // corresponds to "protein" + PROTEIN_SEQUENCE_PROTEIN = 1; +} + +// Enumeration describing phenotype set. +enum PhenotypeSetType { + // unspecified phenotype set + PHENOTYPE_SET_TYPE_UNSPECIFIED = 0; + // corresponds to "Disease" + PHENOTYPE_SET_TYPE_DISEASE = 1; + // corresponds to "DrugResponse" + PHENOTYPE_SET_TYPE_DRUG_RESPONSE = 2; + // corresponds to "Finding" + PHENOTYPE_SET_TYPE_FINDING = 3; + // corresponds to "PhenotypeInstruction" + PHENOTYPE_SET_TYPE_PHENOTYPE_INSTRUCTION = 4; + // corresponds to "TraitChoice" + PHENOTYPE_SET_TYPE_TRAIT_CHOICE = 5; +} + +// Enumeration describing variation type. +enum VariationType { + // unspecified variation type + VARIATION_TYPE_UNSPECIFIED = 0; + // corresponds to "Diplotype" + VARIATION_TYPE_DIPLOTYPE = 1; + // corresponds to "CompoundHeterozygote" + VARIATION_TYPE_COMPOUND_HETEROZYGOTE = 2; + // corresponds to "Distinct chromosomes" + VARIATION_TYPE_DISTINCT_CHROMOSOMES = 3; +} + +// Enumeration describing evidence type. +// +// Corresponds to "EvidenceType" in XSD. +enum EvidenceType { + // unspecified evidence type + EVIDENCE_TYPE_UNSPECIFIED = 0; + // corresponds to "Genetic" + EVIDENCE_TYPE_GENETIC = 1; + // corresponds to "Experimental" + EVIDENCE_TYPE_EXPERIMENTAL = 2; + // corresponds to "Population" + EVIDENCE_TYPE_POPULATION = 3; + // corresponds to "Computational" + EVIDENCE_TYPE_COMPUTATIONAL = 4; +} + +// Enumeration describing method list. +// +// Corresponds to "MethodListType" in XSD. +enum MethodListType { + // unspecified method + METHOD_LIST_TYPE_UNSPECIFIED = 0; + // corresponds to "literature only" + METHOD_LIST_TYPE_LITERATURE_ONLY = 1; + // corresponds to "reference population" + METHOD_LIST_TYPE_REFERENCE_POPULATION = 2; + // corresponds to "case-control" + METHOD_LIST_TYPE_CASE_CONTROL = 3; + // corresponds to "clinical testing" + METHOD_LIST_TYPE_CLINICAL_TESTING = 4; + // corresponds to "in vitro" + METHOD_LIST_TYPE_IN_VITRO = 5; + // corresponds to "in vivo" + METHOD_LIST_TYPE_IN_VIVO = 6; + // corresponds to "research" + METHOD_LIST_TYPE_RESEARCH = 7; + // corresponds to "curation" + METHOD_LIST_TYPE_CURATION = 8; + // corresponds to "not provided" + METHOD_LIST_TYPE_NOT_PROVIDED = 9; + // corresponds to "provider interpretation" + METHOD_LIST_TYPE_PROVIDER_INTERPRETATION = 10; + // corresponds to "phenotyping only" + METHOD_LIST_TYPE_PHENOTYPING_ONLY = 11; +} + +// Enumeration describing HGVS types. +enum HgvsType { + // unspecified HGVS type + HGVS_TYPE_UNSPECIFIED = 0; + // corresponds to "coding" + HGVS_TYPE_CODING = 1; + // corresponds to "genomic" + HGVS_TYPE_GENOMIC = 2; + // corresponds to "genomic, top-level" + HGVS_TYPE_GENOMIC_TOP_LEVEL = 3; + // corresponds to "non-coding" + HGVS_TYPE_NON_CODING = 4; + // corresponds to "protein" + HGVS_TYPE_PROTEIN = 5; +} + +// Enumeration describing clinical features affected status. +enum ClinicalFeaturesAffectedStatusType { + // unspecified status + CLINICAL_FEATURES_AFFECTED_STATUS_TYPE_UNSPECIFIED = 0; + // corresponds to "present" + CLINICAL_FEATURES_AFFECTED_STATUS_TYPE_PRESENT = 1; + // corresponds to "absent" + CLINICAL_FEATURES_AFFECTED_STATUS_TYPE_ABSENT = 2; + // corresponds to "not tested" + CLINICAL_FEATURES_AFFECTED_STATUS_TYPE_NOT_TESTED = 3; +} + +// Enumeration describing haplotype variation types. +enum HaploVariationType { + // unspecified haplotype variation type + HAPLO_VARIATION_TYPE_UNSPECIFIED = 0; + // corresponds to "Haplotype" + HAPLO_VARIATION_TYPE_HAPLOTYPE = 1; + // corresponds to "Haplotype, single variant" + HAPLO_VARIATION_TYPE_HAPLOTYPE_SINGLE_VARIANT = 2; + // corresponds to "Variation" + HAPLO_VARIATION_TYPE_VARIATION = 3; + // corresponds to "Phase unknown" + HAPLO_VARIATION_TYPE_PHASE_UNKNOWN = 4; + // corresponds to "Haplotype defined by a single variant" + HAPLO_VARIATION_TYPE_HAPLOTYPE_DEFINED_BY_SINGLE_VARIANT = 5; +} + +// A structure to support reporting unformatted content, with type and +// source specified. +message Comment { + // The comment's value. + string value = 1; + // The optional comment data source. + optional string data_source = 2; + // The comment's type. + optional CommentType type = 3; +} + +// This structure is used to represent how an object described in the +// submission relates to objects in other databases. +message Xref { + // The name of the database. When there is an overlap with sequence + // databases, that name is used. + string db = 1; + // The identifier used by the database. Being exported as a string + // even though internally the database has rules for defining which datases use + // integer identifers. + string id = 2; + // Used to differentiate between different types of identifiers that + // a database may provide. + optional string type = 3; + // Optional URL to the database entry. + optional string url = 4; + // The status; defaults to "current". + optional Status status = 5; +} + +// Description of a citation. +message Citation { + // Local ID with source. + message IdType { + // The citation's value. + string value = 1; + // If there is an identifier, what database provides it. + string source = 2; + } + + /* nested elements */ + + // Optional list of IDs. + repeated IdType ids = 1; + // Optional URL. + optional string url = 2; + // Optional citation text. + optional string citation_text = 3; + + /* attributes */ + + // This maintained distinct from publication types in PubMed and + // established by GTR curators. The default is 'general'. + optional string type = 4; + // Corresponds to the abbreviation reported by GTR. + optional string abbrev = 5; +} + +// The attribute is a general element to represent a defined set of data +// qualified by an enumerated set of types. For each attribute element, the value will +// be a character string and is optional. Source shall be used to store identifiers for +// supplied data from source other than the submitter (e.g. SequenceOntology). The data +// submitted where Type="variation" shall be validated against sequence_alternation in +// Sequence Ontology http://www.sequenceontology.org/. This is to be a generic version +// of AttributeType and should be used with extension when it is used to specify Type +// and its enumerations. +message BaseAttribute { + // The attribute's value; can be empty. + optional string value = 1; + // The optional integer value. + optional int64 integer_value = 2; + // The optional date value. + optional google.protobuf.Timestamp date_value = 3; +} + +// Description of a nucleotide sequence expression. +// +// Corresponds to `typeNucleotideSequenceExpression` +message HgvsNucleotideExpression { + // The expression values. + string expression = 1; + // The type of the nucleotide sequence. + optional NucleotideSequence sequence_type = 2; + // Optional sequence accession version. + optional string sequence_accession_version = 3; + // Optional sequence accession. + optional string sequence_accession = 4; + // Optional sequence version. + optional int32 sequence_version = 5; + // Optional description of the change. + optional string change = 6; + // Optional assembly information. + optional string assembly = 7; + // Optional submission information. + optional string submitted = 8; + // Optional MANE Select flag. + optional bool mane_select = 9; + // Optional MANE Plus Clinical flag. + optional bool mane_plus_clinical = 10; +} + + +// Description of a protein sequence expression. +// +// Corresponds to `typeProteinSequenceExpression` in XSD. +message HgvsProteinExpression { + // The expression values. + string expression = 1; + // Optional sequence accession version. + optional string sequence_accession_version = 2; + // Optional sequence accession. + optional string sequence_accession = 3; + // Optional sequence version. + optional int32 sequence_version = 4; + // Optional description of the change. + optional string change = 5; +} + +// A structure to represent an HGVS expression for a nucleotide sequence +// variant, along with the predicted protein change and the predicted molecular +// consequence. Also used to represent only the protein change if that is all that has +// been reported. +// +// Corresponds to `typeHVSExpression` in XSD. +message HgvsExpression { + /* nested elements */ + + // Optional nucleotide sequence expression. + optional HgvsNucleotideExpression nucleotide_expression = 1; + // Optional protein sequence expression. + optional HgvsProteinExpression protein_expression = 2; + // List of molecular consequences. + repeated Xref molecular_consequences = 3; + + /* attributes */ + + // Type of HGVS expression. + HgvsType type = 4; + // Optional assembly. + optional string assembly = 5; +} + +// // Phenotype list details. +// message PhenotypeListDetailsType { +// // Local type for phenotype. +// message Phenotype { +// message XRefList { +// repeated Xref xrefs = 1; +// } + +// /* nested elements */ +// repeated XRefList xref_list = 1; + +// /* attributes */ + +// // `name` attribute +// string name = 2; +// // `target_id` attribute +// int64 target_id = 3; +// // `AffectedStatus` attribute. +// optional string affected_status = 4; +// } + +// /* nested elements */ + +// // List phenotypes. +// repeated Phenotype phenotypes = 1; +// // List of phenotype details. +// repeated PhenotypeDetails phenotype_details = 2; + +// /* attributes */ + +// // Type of the phenotype set. +// PhenotypeSetType phenotype_set_type = 3; +// } + +// Description of a software. +message Software { + // Name of the software. + string name = 1; + // Version of the software; optional. + optional string version = 2; + // Purpose of the software; optional. + optional string purpose = 3; +} + +// Description of the history of a record. +// +// Called ``typeDescriptionHistory`` in XSD. +message DescriptionHistory { + // The pathogenicity description. + string description = 1; + // The date of the description. + optional google.protobuf.Timestamp dated = 2; +} + +// Entry in an element set. +// +// Called ``SetElementSetType`` in XSD. +message GenericSetElement { + // The element's value. + string value = 1; + // The element's type. + string type = 2; + // List of citations. + repeated Citation citations = 3; + // List of xrefs. + repeated Xref xrefs = 4; + // List of comments. + repeated Comment comments = 5; +} + +// Common type for an entry in a set of attributes. +// +// Called ``typeAttributeSet`` in XSD. +message AttributeSetElement { + // Extend the BaseAttribute with a `type` field. + message Attribute { + // The base value. + BaseAttribute base = 1; + // The type of the attribute. + string type = 2; + } + + // The attribute value. + Attribute attribute = 1; + // List of xrefs. + repeated Xref xrefs = 2; + // List of citations. + repeated Citation citations = 3; + // List of comments. + repeated Comment comments = 4; +} + +// Type to describe traits in various places. +message Trait { + // Local type for trait relationship. + message TraitRelationship { + // Local enumeration for trait types. + // + // NB: only DrugResponseAndDisease is used in the XML. + enum Type { + // unspecified trait type + TYPE_UNSPECIFIED = 0; + // corresponds to "phenotype" + TYPE_PHENOTYPE = 1; + // corresponds to "Subphenotype" + TYPE_SUBPHENOTYPE = 2; + // corresponds to "DrugResponseAndDisease" + TYPE_DRUG_RESPONSE_AND_DISEASE = 3; + // corresponds to "co-occuring condition" + TYPE_CO_OCCURING_CONDITION = 4; + // corresponds to "Finding member" + TYPE_FINDING_MEMBER = 5; + } + + /* nested elements */ + + // names + // + // NB: in XSD this is explictely given as unbounded but XML always has + // one element + repeated GenericSetElement names = 1; + // symbols (NB: never occur in the XML) + repeated GenericSetElement symbols = 2; + // attributes (NB: never occur in the XML) + repeated AttributeSetElement attributes = 3; + // Citation list. + repeated Citation citations = 4; + // Xref list. + repeated Xref xrefs = 5; + // Comment list. + repeated Comment comments = 6; + // Sources + repeated string sources = 7; + + /* attributes */ + + // Trait type. + Type type = 8; + } + + // names + repeated GenericSetElement names = 1; + // symbols + repeated GenericSetElement symbols = 2; + // attributes + repeated AttributeSetElement attributes = 3; + // Trait relationships + repeated TraitRelationship trait_relationships = 4; + // Citation list. + repeated Citation citations = 5; + // Xref list. + repeated Xref xrefs = 6; + // Comment list. + repeated Comment comments = 7; + // Sources + repeated string sources = 8; +} + +// Describes an indication. +// +// NB: Called "IndicationType" in the XSD. +message Indication { + // Enumeration for the indication type. + enum Type { + // unspecified indication type. + TYPE_UNSPECIFIED = 0; + // corresponds to "Indication" + TYPE_INDICATION = 1; + } + + /* nested elements */ + + // Represents the value for the test indication as a name of a trait. + repeated Trait traits = 1; + // List of names. + repeated GenericSetElement names = 2; + // List of attributes. + repeated AttributeSetElement attributes = 3; + // List of xrefs. + repeated Xref xrefs = 4; + // List of citations. + repeated Citation citations = 5; + // List of comments. + repeated Comment comments = 6; + + /* attributes */ + + // The type of indication. + Type type = 7; +} + +// A set of ``Trait`` objects. +// +// NB: Called "ClinAsserTraitSetType" in the XSD. +message TraitSet { + // Local type. + enum Type { + // unspecified type + TYPE_UNSPECIFIED = 0; + // corresponds to "Disease" + TYPE_DISEASE = 1; + // corresponds to "DrugResponse" + TYPE_DRUG_RESPONSE = 2; + // corresponds to "Finding" + TYPE_FINDING = 3; + // corresponds to "PhenotypeInstruction" + TYPE_PHENOTYPE_INSTRUCTION = 4; + // corresponds to "TraitChoice" + TYPE_TRAIT_CHOICE = 5; + } + + /* nested elements */ + + // The traits. + repeated Trait traits = 1; + // The names. + repeated GenericSetElement names = 2; + // The symbols. + repeated GenericSetElement symbols = 3; + // The attributes. + repeated AttributeSetElement attributes = 4; + // List of xrefs. + repeated Xref xrefs = 5; + // List of citations. + repeated Citation citations = 6; + // List of comments. + repeated Comment comments = 7; + + /* attributes */ + + // The type. + Type type = 8; + // Date of last evaluation. + optional google.protobuf.Timestamp date_last_evaluated = 9; + // ID. + optional int64 id = 10; + // Whether contributes to aggregate classification. + optional bool contributes_to_aggregate_classification = 11; + // Lower level of evidence. + optional bool lower_level_of_evidence = 12; + // Explanation of or multiple conditions. + optional string multiple_condition_explanation = 13; +} + +// Aggregated germline classification info. +// +// Corresponds to ``typeAggregatedGermlineClassification`` in XSD. +message AggregatedGermlineClassification { + /* nested elements */ + + // The aggregate review status based on all germline submissions + // for this record. + AggregateGermlineReviewStatus review_status = 1; + // We are not providing an enumeration for the values we report + // for germline classification within the xsd. Details are in + // https://github.com/ncbi/clinvar/ClassificationOnClinVar.md + // + optional string description = 2; + // Explanation is used only when the description is 'conflicting + // data from submitters' The element summarizes the conflict. + optional Comment explanation = 3; + // List of xrefs. + repeated Xref xrefs = 4; + // List of citations. + repeated Citation citations = 5; + // List of comments. + repeated Comment comments = 6; + + // History information. + repeated DescriptionHistory history_records = 7; + // List of conditions. + repeated TraitSet conditions = 8; + + /* attributes */ + + // Date of last evaluation. + optional google.protobuf.Timestamp date_last_evaluated = 9; + // Date of creation. + google.protobuf.Timestamp date_created = 10; + // Date of most recent submission. + google.protobuf.Timestamp most_recent_submission = 11; + // Number of submitters. + optional int32 number_of_submitters = 12; + // Number of submissions. + optional int32 number_of_submissions = 13; +} + +// Aggregated somatic clinical impact info. +message AggregatedSomaticClinicalImpact { + /* nested elements */ + + // The aggregate review status based on all somatic clinical + // impact submissions for this record. + AggregateSomaticClinicalImpactReviewStatus review_status = 1; + // We are not providing an enumeration for the values we report + // for somatic clinical impact classification within the xsd. Details are in + // https://github.com/ncbi/clinvar/ClassificationOnClinVar.md + optional string description = 2; + // List of xrefs. + repeated Xref xrefs = 3; + // List of citations. + repeated Citation citations = 4; + // List of comments. + repeated Comment comments = 5; + + // History information. + repeated DescriptionHistory history_records = 6; + // List of conditions. + repeated TraitSet conditions = 7; + + /* attributes */ + + // Date of last evaluation. + optional google.protobuf.Timestamp date_last_evaluated = 8; + // Date of creation. + google.protobuf.Timestamp date_created = 9; + // Date of most recent submission. + google.protobuf.Timestamp most_recent_submission = 10; + // Number of submitters. + optional int32 number_of_submitters = 11; + // Number of submissions. + optional int32 number_of_submissions = 12; +} + +// Aggregated oncogenicity classification info. +message AggregatedOncogenicityClassification { + /* nested elements */ + + // The aggregate review status based on all somatic clinical + // impact submissions for this record. + AggregateOncogenicityReviewStatus review_status = 1; + // We are not providing an enumeration for the values we report + // for somatic clinical impact classification within the xsd. Details are in + // https://github.com/ncbi/clinvar/ClassificationOnClinVar.md + optional string description = 2; + // List of xrefs. + repeated Xref xrefs = 3; + // List of citations. + repeated Citation citations = 4; + // List of comments. + repeated Comment comments = 5; + + // History information. + repeated DescriptionHistory history_records = 6; + // List of conditions. + repeated TraitSet conditions = 7; + + /* attributes */ + + // Date of last evaluation. + optional google.protobuf.Timestamp date_last_evaluated = 8; + // Date of creation. + google.protobuf.Timestamp date_created = 9; + // Date of most recent submission. + google.protobuf.Timestamp most_recent_submission = 10; + // Number of submitters. + optional int32 number_of_submitters = 11; + // Number of submissions. + optional int32 number_of_submissions = 12; +} + +// Used to bundle different types of Classifications (germline, +// oncogenic, somatic clinical impact) ; Supports summary as +// well as submission details. +// +// NB: called "typeAggregateClassificationSet" in XSD. +message AggregateClassificationSet { + // The aggregate germline classification. + optional AggregatedGermlineClassification germline_classification = 1; + // The aggregate somatic clinical impact. + repeated AggregatedSomaticClinicalImpact somatic_clinical_impacts = 2; + // The aggregate oncogenicity classification. + optional AggregatedOncogenicityClassification oncogenicity_classification = 3; +} + +// Describes the clinical significance of a variant. +// +// Corresponds to `ClinicalSignificanceType` in XSD. +message ClinicalSignificance { + /* contained elements */ + + // The optional review status. + optional SubmitterReviewStatus review_status = 1; + // Structure used to support old data of AlleleDescriptionSet + // within Co-occurenceSet. + // + // NB: unused in XML + optional string description = 2; + // Optional explanatory comment. + // + // Explanation is used only when the description is 'conflicting + // data from submitters' The element summarizes the conflict. + // + // NB: unused in XML + optional Comment explanation = 3; + // Optional list of xrefs. + repeated Xref xrefs = 4; + // Optional list of citations. + repeated Citation citations = 5; + // Optional list of comments. + repeated Comment comments = 6; + + /* attributes */ + + // Date of last evaluation. + // + // NB: unused in XML + optional google.protobuf.Timestamp date_last_evaluated = 7; +} + +// This is to be used within co-occurrence set. +// +// Corresponds to `typeAlleleDescr` in XSD. +message AlleleDescription { + // Enumeration for relative orientation. + // + // NB: unused in XML + enum RelativeOrientation { + // unspecified relative orientation + RELATIVE_ORIENTATION_UNSPECIFIED = 0; + // corresponds to "cis" + RELATIVE_ORIENTATION_CIS = 1; + // corresponds to "trans" + RELATIVE_ORIENTATION_TRANS = 2; + // corresponds to "unknown" + RELATIVE_ORIENTATION_UNKNOWN = 3; + } + + // The name of the allele. + string name = 1; + // Optional relative orientation. + // + // NB: Unused in XML + optional RelativeOrientation relative_orientation = 2; + // Optional zygosity. + optional Zygosity zygosity = 3; + // Optional clinical significance. + // + // Corresponds to `ClinicalSignificanceType` in XSD. + optional ClinicalSignificance clinical_significance = 4; +} + +// A structure to support reporting of an accession, its version, the +// date its status changed, and text describing that change. +// +// Corresponds to `typeRecordHistory` in XSD. +message RecordHistory { + /* nested elements */ + + // Optional comment on the history record. + optional Comment comment = 1; + + /* attributes */ + + // The accession. + string accession = 2; + // The version. + int32 version = 3; + // The date the record. + google.protobuf.Timestamp date_changed = 4; + // Attribute @VaritionID is only populated for VCV, where @Accession + // is like VCV000000009 + optional int64 variation_id = 5; +} + +// Report classification of a variant for a SCV. +// +// Corresponds to `ClassificationTypeSCV` in XSD. +message ClassificationScv { + // Clinical impact of a somatic variatn. + message SomaticClinicalImpact { + // The somatic clinical impact value. + string value = 1; + + /* attributes */ + + // Type of the clinical impact assertion. + optional string clinical_impact_assertion_type = 2; + // Clinical impact significance. + optional string clinical_impact_clinical_significance = 3; + // Name of the drug for the therapeutic assertion. + optional string drug_for_therapeutic_assertion = 4; + } + + // Classification score description. + message ClassificationScore { + // The score's value. + double value = 1; + // The score's type; optional. + optional string type = 2; + } + + /* nested elements */ + + // The field's review status. + SubmitterReviewStatus review_status = 1; + // The germline classification; mutually exlusive with `somatic_clinical_impact` + // and `oncogenicity_classification`. + optional string germline_classification = 2; + // Information on the clinical impact; mutually exlusive with `germline_classification` + // and `oncogenicity_classification`. + optional SomaticClinicalImpact somatic_clinical_impacts = 3; + // The oncogenicity classification; mutually exlusive with `germline_classification` + // and `oncogenicity_classification`. + optional string oncogenicity_classification = 4; + // Optional explanation of classification. + optional string explanation_of_classification = 5; + // List of classification scores. + repeated ClassificationScore classification_scores = 6; + // List of xrefs. + repeated Xref xrefs = 7; + // List of citations. + repeated Citation citations = 8; + // List of comments. + repeated Comment comments = 9; + + /* attributes */ + + // Date of last evaluation. + optional google.protobuf.Timestamp date_last_evaluated = 10; +} + +// Set of attributes for the primary submitter. Any addtional submitters +// are captured in the AdditionalSubmitters element. +message SubmitterIdentifiers { + // Name of submitter. + string submitter_name = 1; + // Organization ID. + int64 org_id = 2; + // Organization category. + string org_category = 3; + // Organization abbreviation; optional. + optional string org_abbreviation = 4; +} + +// Definition of a species. +message Species { + // Name of the species. + string name = 1; + // Optional taxonomy ID. + optional int32 taxonomy_id = 2; +} + +// Interpreted condition for an RCV record. +// +// Corresponds to `typeRCVInterpretedCondition` in XSD. +message ClassifiedCondition { + // Condition value. + string value = 1; + // Database name. + optional string db = 2; + // Identifier in database. + optional string id = 3; +} + +// Inside ClinicalAssertion, a structure to support reporting of an +// accession, its version, the date its status changed, and text describing that +// change. +// +// Corresponds to `typeClinicalAssertionRecordHistory` in XSD. +message ClinicalAssertionRecordHistory { + // Optional comment. + optional Comment comment = 1; + // Accession. + string accession = 2; + // Optional version. + optional int32 version = 3; + // Date of the record. + google.protobuf.Timestamp date_changed = 4; +} + +// Description of a functional consequence. +// +// Corresponds to `typeFunctionalConsequence` in XSD. +message FunctionalConsequence { + /* nested elements */ + + // List of xrefs. + repeated Xref xrefs = 1; + // List of citations. + repeated Citation citations = 2; + // List of comments. + repeated Comment comments = 3; + + /* attributes */ + + // Value of functional consequence. + string value = 4; +} + +// Type for the tag `GeneralCitations`. +message GeneralCitations { + // List of xrefs. + repeated Xref xrefs = 1; + // List of citations. + repeated Citation citations = 2; +} + +// This refers to the zygosity of the variant being asserted. +message Cooccurrence { + // Optional zygosity. + optional Zygosity zygosity = 1; + // The allele descriptions. + repeated AlleleDescription allele_descriptions = 2; + // The optional count. + optional int32 count = 3; +} + +// A structure to support reporting the name of a submitter, its +// organization id, and its abbreviation and type. +message Submitter { + // Enumeration of submitter kind. + enum Type { + // unspecified submitter type + TYPE_UNSPECIFIED = 0; + // corresponds to "primary" + TYPE_PRIMARY = 1; + // corresponds to "secondary" + TYPE_SECONDARY = 2; + // corresponds to "behalf" + TYPE_BEHALF = 3; + } + + // The submitter's identifier. + SubmitterIdentifiers submitter_identifiers = 1; + // The submitter type. + Type type = 2; +} + + +// Haploinsufficiency/Triplosensitivity of gene. +message DosageSensitivity { + /* nested elements */ + + // Value. + string value = 1; + + /* attributes */ + + // Optional last evaluated date. + optional google.protobuf.Timestamp last_evaluated = 2; + // URL to ClinGen. + optional string clingen = 3; +} + +// A name with an optional type. +// +// Corresponds to `typeNames` in XSD. +message OtherName { + // The name's value. + string value = 1; + // The name's type. + optional string type = 2; +} + +// A structure to support reporting of an accession, its version, the +// date it was deleted and a free-text summary of why it was deleted. +// +// Corresponds to `typeDeletedSCV`. +message DeletedScv { + // The accession. + string accession = 1; + // The version. + int32 version = 2; + // The date of deletion. + google.protobuf.Timestamp date_deleted = 3; +} + +// There can be multiple types of location, and the locations may have +// identifiers in other databases. +// +// Corresponds to `typeLocation` in XSD. +message Location { + // Local type for sequence location. + message SequenceLocation { + // Local enum for the assembly status. + enum AssemblyStatus { + // unspecified assembly status + ASSEMBLY_STATUS_UNSPECIFIED = 0; + // corresponds to "current" + ASSEMBLY_STATUS_CURRENT = 1; + // corresponds to "previous" + ASSEMBLY_STATUS_PREVIOUS = 2; + } + + // forDisplay value. + optional bool for_display = 1; + // Name of assembly. + string assembly = 2; + // Chromosomeof variant. + Chromosome chr = 3; + // Optional chromosome accession. + optional string accession = 4; + // Outer start position. + optional uint32 outer_start = 5; + // Inner start position. + optional uint32 inner_start = 6; + // Start position. + optional uint32 start = 7; + // Stop position. + optional uint32 stop = 8; + // Inner stop position. + optional uint32 inner_stop = 9; + // Outer stop position. + optional uint32 outer_stop = 10; + // Display start position. + optional uint32 display_start = 11; + // Display stop position. + optional uint32 display_stop = 12; + // Strand. + optional string strand = 13; + // Variant length. + optional uint32 variant_length = 14; + // Reference allele. + optional string reference_allele = 15; + // Alternate allele. + optional string alternate_allele = 16; + // Assembly accession version. + optional string assembly_accession_version = 17; + // Assembly status. + optional AssemblyStatus assembly_status = 18; + // Position in VCF. + optional uint32 position_vcf = 19; + // Reference allele in VCF. + optional string reference_allele_vcf = 20; + // Alternate allele in VCF. + optional string alternate_allele_vcf = 21; + // For display length. + optional uint32 for_display_length = 22; + } + + // Cytogenetic location is maintained independent of sequence + // location, and can be submitted or computed from the sequence location. + // + // Between 0 and 4 entries. + repeated string cytogenetic_locations = 1; + // Location on a defined sequence, with reference and alternate + // allele, and start /stop values depending on the specificity with which the + // variant location is known. The number system of offset 1, and + // right-justified to be consistent with HGVS location data. + repeated SequenceLocation sequence_locations = 2; + // The location of the variant relative to features within the gene. + repeated string gene_locations = 3; + // List of xrefs. + repeated Xref xrefs = 4; +} + +// Description of a SCV. +// +// Corresponds to "typeSCV" in XSD. +message Scv { + // Optional title. + optional string title = 1; + // Accession. + string accession = 2; + // Version. + int32 version = 3; +} + +// // Description of a list of indications. +// // +// // Corresponds to `IndicationListType` in XSD. +// message Indications { +// // The indication may be a set of phenotypic descriptions. +// message Indication { +// // Xrefs. +// repeated Xref xrefs = 1; +// // Name of the indication. +// string name = 2; +// // Target ID. +// int64 target_id = 3; +// } + +// // Type of the phenotype. +// PhenotypeSetType phenotype_set_type = 1; +// } + +// // Description of a pharma. +// message PharmaType { +// // Local type for drug response. +// message DrugResponse { +// // // The phenotype details. +// // repeated PhenotypeListDetailsType phenotype_details = 1; +// // Xref list. +// repeated Xref xrefs = 1; +// // Name of drug. +// string drug_name = 2; +// // Target ID. +// int64 target_id = 3; +// } + +// // Drug responses. +// repeated DrugResponse drug_responses = 1; +// } + +// // Description of a single method. +// // +// // Corresponds to `typeMethodRefs` in XSD. +// message MethodWithXrefs { +// // Name of the method. +// string method_name = 1; +// // Xrefs. +// repeated Xref xrefs = 2; +// } + +// Structure to describe attributes of any family data in an observation. +// If the details of the number of families and the de-identified pedigree id are not +// available, use FamilyHistory to describe what type of family data is available. Can +// also be used to report 'Yes' or 'No' if there are no more details. +// +// Corresponds to "FamilyInfo" in XSD. +message FamilyData { + /* nested elements */ + + // Optional family history. + optional string family_history = 1; + + /* attributes */ + + // Number of families. + optional int32 num_families = 2; + // Number of families with variant. + optional int32 num_families_with_variant = 3; + // Number of families with segregation observed. + optional int32 num_families_with_segregation_observed = 4; + // Pedigree ID. + optional string pedigree_id = 5; + // Segregation oberved (yes, no, number) + optional string segregation_observed = 6; +} + +// Description of a sample. +// +// Corresponds to `typeSample` in XSD. +message Sample { + // Local type for sample description. + message SampleDescription { + // Description of sample. + optional Comment description = 1; + // Citation. + optional Citation citation = 2; + } + + // Local enumeration for presence in normal tissue. + enum SomaticVariantInNormalTissue { + // unspecified presence + SOMATIC_VARIANT_IN_NORMAL_TISSUE_UNSPECIFIED = 0; + // corresponds to "present" + SOMATIC_VARIANT_IN_NORMAL_TISSUE_PRESENT = 1; + // corresponds to "absent" + SOMATIC_VARIANT_IN_NORMAL_TISSUE_ABSENT = 2; + // corresponds to "not tested" + SOMATIC_VARIANT_IN_NORMAL_TISSUE_NOT_TESTED = 3; + } + + // Local enumeration for an age unit. + enum AgeUnit { + // unspecified age unit + AGE_UNIT_UNSPECIFIED = 0; + // corresponds to "days" + AGE_UNIT_DAYS = 1; + // corresponds to "weeks" + AGE_UNIT_WEEKS = 2; + // corresponds to "months" + AGE_UNIT_MONTHS = 3; + // corresponds to "years" + AGE_UNIT_YEARS = 4; + // corresponds to "weeks gestation" + AGE_UNIT_WEEKS_GESTATION = 5; + // corresponds to "months gestation" + AGE_UNIT_MONTHS_GESTATION = 6; + } + + // Local enumeration for an age type. + enum AgeType { + // unspecified age type + AGE_TYPE_UNSPECIFIED = 0; + // corresponds to "minimum" + AGE_TYPE_MINIMUM = 1; + // corresponds to "maximum" + AGE_TYPE_MAXIMUM = 2; + // corresponds to "single" + AGE_TYPE_SINGLE = 3; + } + + // Local enumeration for the affected status. + enum AffectedStatus { + // unspecified affected status + AFFECTED_STATUS_UNSPECIFIED = 0; + // corresponds to "yes" + AFFECTED_STATUS_YES = 1; + // corresponds to "no" + AFFECTED_STATUS_NO = 2; + // corresponds to "not provided" + AFFECTED_STATUS_NOT_PROVIDED = 3; + // corresponds to "unknown" + AFFECTED_STATUS_UNKNOWN = 4; + // corresponds to "not applicable" + AFFECTED_STATUS_NOT_APPLICABLE = 5; + } + + // Local type for an age. + message Age { + // The age value. + int32 value = 1; + // The age unit. + AgeUnit unit = 2; + // The age type. + AgeType type = 3; + } + + // Local enumeration for gender. + enum Gender { + // unspecified gender + GENDER_UNSPECIFIED = 0; + // corresponds to "male" + GENDER_MALE = 1; + // corresponds to "female" + GENDER_FEMALE = 2; + // corresponds to "mixed" + GENDER_MIXED = 3; + } + + // Local enumeration for SourceType. + enum SourceType { + // unspecified source type + SOURCE_TYPE_UNSPECIFIED = 0; + // corresponds to "submitter-generated" + SOURCE_TYPE_SUBMITTER_GENERATED = 1; + // corresponds to "data mining" + SOURCE_TYPE_DATA_MINING = 2; + } + + // The sample description. + optional SampleDescription sample_description = 1; + // The sample origin. + optional Origin origin = 2; + // Sample ethnicity. + optional string ethnicity = 3; + // Sample geographic origin. + optional string geographic_origin = 4; + // Sample tissue. + optional string tissue = 5; + // Presence of variant in normal tissue. + optional SomaticVariantInNormalTissue somatic_variant_in_normal_tissue = 6; + // Somatic variant allele fraction. + optional string somatic_variant_allele_fraction = 7; + // Cell line name. + optional string cell_line = 8; + // Species. + optional Species species = 9; + // Age (range), max. size of 2. + repeated Age ages = 10; + // Strain. + optional string strain = 11; + // Affected status. + optional AffectedStatus affected_status = 12; + // Denominator, total individuals included in this observation set. + optional int32 numer_tested = 13; + // Denominator, total males included in this observation set. + optional int32 number_males = 14; + // Denominator, total females included in this observation set. + optional int32 number_females = 15; + // Denominator, total number chromosomes tested. Number affected + // and unaffected are captured in the element NumberObserved. + optional int32 number_chr_tested = 16; + // Gender should be used ONLY if explicit values are not + // available for number of males or females, and there is a need to indicate + // that the genders in the sample are known. + optional Gender gender = 17; + // Family information. + optional FamilyData family_data = 18; + // Optional proband ID. + optional string proband = 19; + // Optional indication. + optional Indication indication = 20; + // List of citations. + repeated Citation citations = 21; + // List of xrefs. + repeated Xref xrefs = 22; + // List of comments. + repeated Comment comments = 23; + // Source type. + optional SourceType source_type = 24; +} + +// Details of a method used to generate variant calls or predict/report +// functional consequence. The name of the platform should represent a sequencer or an +// array, e.g. sequencing or array , e.g. capillary, 454, Helicos, Solexa, SOLiD. This +// structure should also be used if the method is 'Curation'. +// +// Corresponds to `MethodType` in XSD. +message Method { + // Local enumeration for result types. + enum ResultType { + // unspecified result type + RESULT_TYPE_UNSPECIFIED = 0; + // corresponds to "number of occurrences" + RESULT_TYPE_NUMBER_OF_OCCURRENCES = 1; + // corresponds to "p value" + RESULT_TYPE_P_VALUE = 2; + // corresponds to "odds ratio" + RESULT_TYPE_ODDS_RATIO = 3; + // corresponds to "variant call" + RESULT_TYPE_VARIANT_CALL = 4; + } + + // Local enumeration for SourceType. + enum SourceType { + // unspecified source type + SOURCE_TYPE_UNSPECIFIED = 0; + // corresponds to "submitter-generated" + SOURCE_TYPE_SUBMITTER_GENERATED = 1; + // corresponds to "data mining" + SOURCE_TYPE_DATA_MINING = 2; + // corresponds to "data review" + SOURCE_TYPE_DATA_REVIEW = 3; + } + + // Local type for method attribute. + message MethodAttribute { + // Local enumeration of attribute type. + enum AttributeType { + // unspecified attribute type + ATTRIBUTE_TYPE_UNSPECIFIED = 0; + // corresponds to "Location" + ATTRIBUTE_TYPE_LOCATION = 1; + // corresponds to "ControlsAppropriate" + ATTRIBUTE_TYPE_CONTROLS_APPROPRIATE = 2; + // corresponds to "MethodAppropriate" + ATTRIBUTE_TYPE_METHOD_APPROPRIATE = 3; + // corresponds to "TestName" + ATTRIBUTE_TYPE_TEST_NAME = 4; + // corresponds to "StructVarMethod" + ATTRIBUTE_TYPE_STRUCT_VAR_METHOD_TYPE = 5; + // corresponds to "ProbeAccession" + ATTRIBUTE_TYPE_PROBE_ACCESSION = 6; + } + + // The base value. + BaseAttribute base = 1; + // The attribute type. + AttributeType type = 2; + } + + // Local type for observation method attribute. + message ObsMethodAttribute { + // Local enumeration for attribute type. + enum AttributeType { + // unspecified attribute type + ATTRIBUTE_TYPE_UNSPECIFIED = 0; + // corresponds to "MethodResult" + ATTRIBUTE_TYPE_METHOD_RESULT = 1; + // corresponds to "TestingLaboratory" + ATTRIBUTE_TYPE_TESTING_LABORATORY = 2; + } + + // The base value. + BaseAttribute base = 1; + // The attribute type. + AttributeType type = 2; + // Optional comments. + repeated Comment comments = 3; + } + + // Platform name. + optional string name_platform = 1; + // Platform type. + optional string type_platform = 2; + // Method purpose. + optional string purpose= 3; + // Method result type. + optional ResultType result_type = 4; + // Smallest reported. + optional string min_reported = 5; + // Largest reported. + optional string max_reported = 6; + // Reference standard. + optional string reference_standard = 7; + // List of citations. + repeated Citation citations = 8; + // List of xrefs. + repeated Xref xrefs = 9; + // Free text to enrich the description of the method and to + // provide information not captured in specific fields. + optional string description = 10; + // List of softwares used. + repeated Software software = 11; + // Source type. + optional SourceType source_type = 12; + // Method type. + MethodListType method_type = 13; + // Method attribute. + repeated MethodAttribute method_attributes = 14; + // ObsMethodAttribute is used to indicate an attribute specific + // to a particular method in conjunction with a particular observation . + repeated ObsMethodAttribute obs_method_attributes = 15; +} + +// This is a record per variant (Measure/@ID,AlleleID) as submitted for +// accessioning in an SCV. +// +// Corresponds to "typeAlleleSCV" in XSD. +message AlleleScv { + // Local type for Gene. + message Gene { + /* nested elements */ + + // Gene name. + optional string name = 1; + // Used to set key words for retrieval or + // display about a gene, such as genes listed by the + // ACMG guidelines. + repeated string properties = 2; + // Used for gene specific identifiers + // such as MIM number, Gene ID, HGNC ID, etc. + repeated Xref xrefs = 3; + + /* attributes */ + + // Optional gene symbol. + optional string symbol = 4; + // Relationship between gene and variant. + optional GeneVariantRelationship relationship_type = 5; + } + + // Local type for MolecularConsequence. + message MolecularConsequence { + /* nested elements */ + + // Xref list. + repeated Xref xrefs = 1; + // Citation list. + repeated Citation citations = 2; + // Comment list. + repeated Comment comments = 3; + + /* attributes */ + + // RS id. + optional int64 rs = 4; + // Optional HGVS expression. + optional string hgvs = 5; + // Optional SO id. + optional string so_id = 6; + // Function. + string function = 7; + } + + /* nested elements */ + + // 0 to many genes (and related data ) related to the allele + // being reported. + repeated Gene genes = 1; + // Name provided by the submitter. + repeated OtherName names = 2; + // Variant type. + optional string variant_type = 3; + // Location. + optional Location location = 4; + // List of other names. + repeated OtherName other_names = 5; + // Single letter representation of the amino acid change and its + // location. + repeated string protein_changes = 6; + // List of xrefs. + repeated Xref xrefs = 7; + // List of citations. + repeated Citation citations = 8; + // List of comments. + repeated Comment comments = 9; + // Currently redundant with the MolecularConsequence element of + // the HGVS element? + repeated MolecularConsequence molecular_consequences = 10; + // Functional consequences. + repeated FunctionalConsequence functional_consequences = 11; + // Attributes. + repeated AttributeSetElement attributes = 12; + + /* attributes */ + + // Allele ID. + optional int64 allele_id = 13; +} + +// This is a record of a haplotype in SCV. +// +// Corresponds to `typeHaplotypeSCV` in XSD. +message HaplotypeScv { + // The list of alleles in the haplotype. + repeated AlleleScv simple_alleles = 1; + // The preferred representation of the haplotype. + optional string name = 2; + // Names other than 'preferred' used for the haplotype. + repeated OtherName other_names = 3; + // Classification of the variant. + optional AggregateClassificationSet classification = 4; + // Functional consequences of the variant. + repeated FunctionalConsequence functional_consequences = 5; + // List of attributes. + repeated AttributeSetElement attributes = 6; + // List of citations. + repeated Citation citations = 7; + // List of cross-references. + repeated Xref xrefs = 8; + // List of comments. + repeated Comment comments = 9; + + // Variation ID. + optional int64 variation_id = 10; + // Number of copies. + optional int32 number_of_copies = 11; + // Number of chromosomes. + optional int32 number_of_chromosomes = 12; +} + +// Used to report genotypes, be they simple or complex diplotypes. +// +// Corresponds to "typeGenotypeSCV" in XSD. +message GenotypeScv { + /* nested elements */ + + // Simple alleles; mutually exclusive with `haplotypes`. + repeated AlleleScv simple_alleles = 1; + // Haplotype; mutually exclusive with `simple_alleles`. + // + // Allows more than 2 haplotypes per genotype to support + // representation of ploidy. + repeated HaplotypeScv haplotypes = 2; + // Optional name. + optional string name = 3; + // Other names used for the genotype. + repeated OtherName other_names = 4; + // The variation type. + VariationType variation_type = 5; + // Functional consequences. + repeated FunctionalConsequence functional_consequences = 6; + // Attributes. + repeated AttributeSetElement attributes = 7; + // List of citations. + repeated Citation citations = 8; + // List of xrefs. + repeated Xref xrefs = 9; + // List of comments. + repeated Comment comments = 10; + + /* attributes */ + + // Variation ID. + optional int64 variation_id = 11; +} + +// // Observation of one piece of evidence. +// message typeEvidenceObservation { +// // // Method of data capture, not method of evaluation. +// // repeated MethodWithXrefs methods = 1; +// // Modes of inheritance. +// repeated string modes_of_inheritance = 1; +// // List of citations. +// repeated Citation citations = 2; +// // Comments. +// repeated Comment comments = 3; +// // // Observed phenotypes. +// // repeated PhenotypeListDetailsType observed_phenotypes = 5; +// // // Indications. +// // repeated Indications indications = 6; +// // The submitter's identifiers. +// repeated Submitter submitter_identifiers = 4; +// // Families. +// optional string families = 5; +// // Individuals. +// optional string individuals = 6; +// // Segregation. +// optional string segregations = 7; +// // Other gene. +// optional string other_gene = 8; +// // Same gene. +// optional string same_gene = 9; +// // Evident type. +// EvidenceType evidence_type = 10; +// // Allele frequency. +// optional string allele_frequency = 11; +// // Allele origin. +// optional string allele_origin = 12; +// // Ethnicity. +// optional string ethnicity = 13; +// // Geographic origin. +// optional string geographic_origin = 14; +// } + +// Documents in what populations or samples an allele or genotype has +// been observed relative to the described trait. Summary observations can be +// registered per submitted assertion, grouped by common citation, study type, origin, +// ethnicity, tissue, cell line, and species data. Not all options are valid per study +// type, but these will not be validated in the xsd. +// +// Corresponds to `ObservationSet` in XSD. +message ObservedIn { + // Local enum for the method type. + enum MethodType { + // unspecified method type + METHOD_TYPE_UNSPECIFIED = 0; + // corresponds to "literature only" + METHOD_TYPE_LITERATURE_ONLY = 1; + // corresponds to "reference population" + METHOD_TYPE_REFERENCE_POPULATION = 2; + // corresponds to "case-control" + METHOD_TYPE_CASE_CONTROL = 3; + // corresponds to "clinical testing" + METHOD_TYPE_CLINICAL_TESTING = 4; + // corresponds to "in vitro" + METHOD_TYPE_IN_VITRO = 5; + // corresponds to "in vivo" + METHOD_TYPE_IN_VIVO = 6; + // corresponds to "inferred from source" + METHOD_TYPE_INFERRED_FROM_SOURCE = 7; + // corresponds to "research" + METHOD_TYPE_RESEARCH = 8; + } + + // Local struct for attributes based on `BaseAttribute`. + message ObservedDataAttribute { + // Local enum for the observed data type. + enum Type { + // unspecified observed data attribute type + TYPE_UNSPECIFIED = 0; + // corresponds to "Description" + TYPE_DESCRIPTION = 1; + // corresponds to "VariantAlleles" + TYPE_VARIANT_ALLELES = 2; + // corresponds to "SubjectsWithVariant" + TYPE_SUBJECTS_WITH_VARIANT = 3; + // corresponds to "SubjectsWithDifferentCausativeVariant" + TYPE_SUBJECTS_WITH_DIFFERENT_CAUSATIVE_VARIANT = 4; + // corresponds to "VariantChromosomes" + TYPE_VARIANT_CHROMOSOMES = 5; + // corresponds to "IndependentObservations" + TYPE_INDEPENDENT_OBSERVATIONS = 6; + // corresponds to "SingleHeterozygote" + TYPE_SINGLE_HETEROZYGOUS = 7; + // corresponds to "CompoundHeterozygote" + TYPE_COMPOUND_HETEROZYGOUS = 8; + // corresponds to "Homozygote" + TYPE_HOMOZYGOUS = 9; + // corresponds to "Hemizygote" + TYPE_HEMIZYGOUS = 10; + // corresponds to "NumberMosaic" + TYPE_NUMBER_MOSAIC = 11; + // corresponds to "ObservedUnspecified" + TYPE_OBSERVED_UNSPECIFIED = 12; + // corresponds to "AlleleFrequency" + TYPE_ALLELE_FREQUENCY = 13; + // corresponds to "SecondaryFinding" + TYPE_SECONDARY_FINDING = 14; + // corresponds to "GenotypeAndMOIConsistent" + TYPE_GENOTYPE_AND_MOI_CONSISTENT = 15; + // corresponds to "UnaffectedFamilyMemberWithCausativeVariant" + TYPE_UNAFFECTED_FAMILY_MEMBER_WITH_CAUSATIVE_VARIANT = 16; + // corresponds to "HetParentTransmitNormalAllele" + TYPE_HET_PARENT_TRANSMIT_NORMAL_ALLELE = 17; + // corresponds to "CosegregatingFamilies" + TYPE_COSEGREGATING_FAMILIES = 18; + // corresponds to "InformativeMeioses" + TYPE_INFORMATIVE_MEIOSES = 19; + // corresponds to "SampleLocalID" + TYPE_SAMPLE_LOCAL_ID = 20; + // corresponds to "SampleVariantID" + TYPE_SAMPLE_VARIANT_ID = 21; + // corresponds to "FamilyHistory" + TYPE_FAMILY_HISTORY = 22; + // corresponds to "NumFamiliesWithVariant" + TYPE_NUM_FAMILIES_WITH_VARIANT = 23; + // corresponds to "NumFamiliesWithSegregationObserved" + TYPE_NUM_FAMILIES_WITH_SEGREGATION_OBSERVED = 24; + // corresponds to "SegregationObserved" + TYPE_SEGREGATION_OBSERVED = 25; + } + + // base + BaseAttribute base = 1; + // type + Type type = 2; + } + + // This is an AttributeSet, there will be 1 attribute supported + // by optional citations, xrefs and comment. There must be at least one + // ObservedData Set, but can be any number. For each ObservedData set the + // Attribute will be either decimal or string depending on type. The value will + // be stored here, but decimals will be entered to the database as a string. + message ObservedData { + // Attributes. + repeated ObservedDataAttribute attributes = 1; + // Severity. + optional Severity severity = 2; + // Citation list. + repeated Citation citations = 3; + // Xref list. + repeated Xref xrefs = 4; + // Comment list. + repeated Comment comments = 5; + } + + // Sample. + Sample sample = 1; + // Observed data. + repeated ObservedData observed_data = 2; + // Co-occurence set. + repeated Cooccurrence cooccurrence_sets = 3; + // TraitSet. + optional TraitSet trait_set = 4; + // Citation list. + repeated Citation citations = 5; + // Xref list. + repeated Xref xrefs = 6; + // Comment list. + repeated Comment comments = 7; +} + +// A clinical assertion as submitted (SCV record). +// +// Corresponds to `MeasureTraitType` in XSD and `` in XML +message ClinicalAssertion { + // Local type for ClinVarSubmissionID. + message ClinvarSubmissionId { + // The identifier provided by the submitter to facilitate + // identification of records corresponding to their submissions. If not + // provided by a submitter, NCBI generates one. If provided by + // submitter, that is represented in localKeyIsSubmitted. + string local_key = 1; + // Optional title. + optional string title = 2; + // Optional indication whether local key has been submitted. + optional bool local_key_is_submitted = 3; + // Optional assembly of submission. + optional string submitted_assembly = 4; + } + + // Local type for attribute set. + message AttributeSetElement { + // Local enum for types. + enum Type { + // unspecified type + TYPE_UNSPECIFIED = 0; + // Corresponds to "ModeOfInheritance" + TYPE_MODE_OF_INHERITANCE = 1; + // Corresponds to "Penetrance" + TYPE_PENETRANCE = 2; + // Corresponds to "AgeOfOnset" + TYPE_AGE_OF_ONSET = 3; + // Corresponds to "Severity" + TYPE_SEVERITY = 4; + // Corresponds to "ClassificationHistory" + TYPE_CLASSIFICATION_HISTORY = 5; + // Corresponds to "SeverityDescription" + TYPE_SEVERITY_DESCRIPTION = 6; + // Corresponds to "AssertionMethod" + TYPE_ASSERTION_METHOD = 7; + } + + // The base value. + BaseAttribute attribute = 1; + // The type of the attribute. + Type type = 2; + // List of xrefs. + repeated Xref xrefs = 3; + // List of citations. + repeated Citation citations = 4; + // List of comments. + repeated Comment comments = 5; + } + + // Local type for `ClinVarAccession` + message ClinvarAccession { + // Accession. + string accession = 1; + // Version. + int32 version = 2; + // The submitter's identifier. + SubmitterIdentifiers submitter_identifiers = 3; + // The date that the latest update to the submitted + // record (SCV) became public in ClinVar. + optional google.protobuf.Timestamp date_updated = 4; + // DateCreated is the date when the record first became + // public in ClinVar. + optional google.protobuf.Timestamp date_created = 5; + } + + // Local enum for record status. + enum RecordStatus { + // unspecified record status + RECORD_STATUS_UNSPECIFIED = 0; + // corresponds to "current" + RECORD_STATUS_CURRENT = 1; + // corresponds to "replaced" + RECORD_STATUS_REPLACED = 2; + // corresponds to "removed" + RECORD_STATUS_REMOVED = 3; + } + + /* nested elements */ + + // The ClinVar submission ID. + ClinvarSubmissionId clinvar_submission_id = 1; + // The ClinVar SCV accessions. + ClinvarAccession clinvar_accession = 2; + // Optional list of additional submitters. + repeated Submitter additional_submitters = 3; + // Record status. + RecordStatus record_status = 4; + // Replaces; mutually exclusive with replaceds + repeated string replaces = 5; + // Replaced list; mutually exclusive with replaces + repeated ClinicalAssertionRecordHistory replaceds = 6; + + // SCV classifications. + repeated ClassificationScv classifications = 7; + // The assertion. + Assertion assertion = 8; + // Attributes. + repeated AttributeSetElement attributes = 9; + // Observed in. + repeated ObservedIn observed_ins = 10; + + // Allele in SCV; mutually exclusive with haplotype/genotype. + optional AlleleScv simple_allele = 11; + // Haplotype in SCV; mutually exclusive with allele/genotype. + optional HaplotypeScv haplotype = 12; + // Genotype in SCV; mutually exclusive with allele/haplotype. + optional GenotypeScv genotype = 13; + + // The trait set. + TraitSet trait_set = 14; + // List of citations. + repeated Citation citations = 15; + // Optional study name. + optional string study_name = 16; + // Optional study description. + optional string study_description = 17; + // List of comments. + repeated Comment comments = 18; + // List of submissions. + repeated string submission_names = 19; + + /* attributes */ + + // Date of creation. + google.protobuf.Timestamp date_created = 20; + // Date of creation. + google.protobuf.Timestamp date_last_updated = 21; + // Date of creation. + google.protobuf.Timestamp submission_date = 22; + // ID. + optional uint64 id = 23; + // Whether it is an FDA recognized database. + optional bool fda_recognized_database = 24; +} + +// This is a record per variant (Measure/@ID,AlleleID). +// +// Corresponds to "typeAllele" in XSD. +message Allele { + // Local type for Gene. + message Gene { + /* nested elements */ + + // Gene's locations. + repeated Location locations = 1; + // OMIM ID. + repeated uint64 omims = 2; + // Haploinsuffiency. + optional DosageSensitivity haploinsufficiency = 3; + // Triplosensitivity. + optional DosageSensitivity triplosensitivity = 4; + // Used to set key words for retrieval or + // display about a gene, such as genes listed by the + // ACMG guidelines. + repeated string properties = 5; + + /* attributes */ + + // Optional gene symbol. + optional string symbol = 6; + // Full gene name. + string full_name = 7; + // Gene ID. + int64 gene_id = 8; + // Optional HGNC ID. + optional string hgnc_id = 9; + // Source of gene (calculated or submitted). + string source = 10; + // Relationship between gene and variant. + optional GeneVariantRelationship relationship_type = 11; + } + + // Local type for allele frequency. + message AlleleFrequency { + // Value. + double value = 1; + // Source. + string source = 2; + // URL. + optional string url = 3; + } + + // Local type for GlobalMinorAlleleFrequency. + message GlobalMinorAlleleFrequency { + // Value. + double value = 1; + // Source. + string source = 2; + // Minor allele. + optional string minor_allele = 3; + // URL. + optional string url = 4; + } + + // Local type for allele name. + message Name { + // The name's value. + string value = 1; + // The name's type. + optional string type = 2; + } + + /* nested elements */ + + // Gene list. + repeated Gene genes = 1; + // Name. + string name = 2; + // Canonical SPDI. + optional string canonical_spdi = 3; + // Variant type(s). + repeated string variant_types = 4; + // Location. + repeated Location locations = 5; + // List of other names. + repeated OtherName other_names = 6; + // These are the single-letter representations of the protein change. + repeated string protein_changes = 7; + // List of HGVS expressions. + repeated HgvsExpression hgvs_expressions = 8; + // Aggregated classifications. + optional AggregateClassificationSet classifications = 9; + // List of xrefs. + repeated Xref xrefs = 10; + // List of comments. + repeated Comment comments = 11; + // List of functional consequences. + repeated FunctionalConsequence functional_consequences = 12; + // Allele frequencies. + repeated AlleleFrequency allele_frequencies = 13; + // Global minor allele frequencies. + optional GlobalMinorAlleleFrequency global_minor_allele_frequency = 14; + + /* attributes */ + + // Allele ID. + int64 allele_id = 15; + // Variation ID. + int64 variation_id = 16; +} + +// This is a record of one or more simple alleles on the same chromosome +// molecule. +// +// Corresponds to `typeHaplotype` in XSD +message Haplotype { + // The list of alleles in the haplotype. + repeated Allele simple_alleles = 1; + // The preferred representation of the haplotype. + string name = 2; + // The type of the haplotype. + HaploVariationType variation_type = 3; + // Names other than 'preferred' used for the haplotype. + repeated OtherName other_names = 4; + // List of all the HGVS expressions valid for, or used to submit, + // a variant. + repeated HgvsExpression hgvs_expressions = 5; + // Classifications of the variant. + optional AggregateClassificationSet classifications = 6; + // Functional consequences of the variant. + repeated FunctionalConsequence functional_consequences = 7; + // List of cross-references. + repeated Xref xrefs = 8; + // List of comments. + repeated Comment comments = 9; + + // Variation ID. + int64 variation_id = 10; + // Number of copies. + optional int32 number_of_copies = 11; + // Number of chromosomes. + optional int32 number_of_chromosomes = 12; +} + +// This element is used for alleles that were not directly part of a +// submission but were part of a complex submission. They have no direct submitted +// classification, but are being reported for a complete representation of all alleles +// in ClinVar. Compare to ClassifiedRecord. +message IncludedRecord { + // Local type for tag `ClassifiedVariation`. + message ClassifiedVariation { + // Variation ID. + int64 variation_id = 1; + // Optional accession. + optional string accession = 2; + // Version. + int32 version = 3; + } + + // Simple allele; mutually exclusive with haplotype. + optional Allele simple_allele = 1; + // Haplotype; mutually exclusive with simple_allele. + optional Haplotype haplotype = 2; + // Aggregate classification sets. + optional AggregateClassificationSet classifications = 3; + // List of submitted records. + repeated Scv submitted_classifications = 4; + // Maintains the list of classified variants represented in + // this submission, although not submitted with an Classification + // independently. + repeated ClassifiedVariation classified_variations = 5; + // List of general citations. + repeated GeneralCitations general_citations = 6; +} + +// Used to report genotypes, be they simple or complex diplotypes. +// +// Corresponds to "typeGenotype" in XSD. +message Genotype { + /* nested elements */ + + // Simple allele; mutually exclusive with `haplotype`. + repeated Allele simple_alleles = 1; + // Haplotype; mutually exclusive with `simple_allele`. + // + // Allows more than 2 haplotypes per genotype to support + // representation of ploidy. + repeated Haplotype haplotypes = 2; + // Optional name. + string name = 3; + // The variation type. + VariationType variation_type = 4; + // Names other than 'preferred' used for the Genotype. + repeated OtherName other_names = 5; + // HGVS descriptions. + repeated HgvsExpression hgvs_expressions = 6; + // Functional consequences. + repeated FunctionalConsequence functional_consequences = 7; + // Aggregated classifications. + optional AggregateClassificationSet classifications = 8; + // List of xrefs. + repeated Xref xrefs = 9; + // List of citations. + repeated Citation citations = 10; + // List of comments. + repeated Comment comments = 11; + // Attributes. + repeated AttributeSetElement attributes = 12; + + /* attributes */ + + // Variation ID. + optional int64 variation_id = 13; +} + +// Corresponds to "typeRCV" in XSD. +message RcvAccession { + // Local type for ClassifiedConditionList. + message ClassifiedConditionList { + /* nested elements */ + + // List of interpreted conditions. + repeated ClassifiedCondition classified_conditions = 1; + + /* attributes */ + + // Trait set ID. + optional int64 trait_set_id = 2; + } + + // Local type for GermlineClassification. + // + // The aggregate review status based on + // all germline submissions for this record. + message GermlineClassification { + // Local type for Description. + message Description { + // The description. + string value = 1; + + /* attributes */ + + // The date of the description. + optional google.protobuf.Timestamp date_last_evaluated = 2; + // The number of submissions. + optional uint32 submission_count = 3; + } + + // The aggregate review status based on + // all somatic clinical impact submissions for this + // record. + AggregateGermlineReviewStatus review_status = 1; + // The oncogenicity description. + Description description = 2; + } + + // Local type for SomaticClinicalImpact. + // + // The aggregate review status based on + // all somatic clinical impact submissions for this + // record. + message SomaticClinicalImpact { + // Local type for Description. + message Description { + // The description. + string value = 1; + + /* attributes */ + + // Clinical impact assertion type. + optional string clinical_impact_assertion_type = 2; + // Clinical impact significance + optional string clinical_impact_clinical_significance = 3; + // The date of the description. + optional google.protobuf.Timestamp date_last_evaluated = 4; + // The number of submissions. + optional uint32 submission_count = 5; + } + + // The aggregate review status based on + // all somatic clinical impact submissions for this + // record. + AggregateSomaticClinicalImpactReviewStatus review_status = 1; + // The oncogenicity description. + repeated Description descriptions = 2; + } + + // Local type for OncogenicityClassification. + message OncogenicityClassification { + // Local type for Description. + message Description { + // The description. + string value = 1; + + /* attributes */ + + // The date of the description. + optional google.protobuf.Timestamp date_last_evaluated = 2; + // The number of submissions. + optional uint32 submission_count = 3; + } + + // The aggregate review status based on + // all oncogenic submissions for this record. + AggregateOncogenicityReviewStatus review_status = 1; + // The oncogenicity description. + Description description = 2; + } + + // Local type for RCV classifications. + message RcvClassifications { + // Germline classification. + optional GermlineClassification germline_classification = 1; + // Somatic clinical impact. + optional SomaticClinicalImpact somatic_clinical_impact = 2; + // Oncogenicity classification. + optional OncogenicityClassification oncogenicity_classification = 3; + } + + /* nested elements */ + + // The list of classified conditions. + optional ClassifiedConditionList classified_condition_list = 1; + // The list of RCV classifications. + RcvClassifications rcv_classifications = 2; + // The list of RCV accessions this record has replaced. + repeated RecordHistory replaceds = 3; + + /* attributes */ + + // Optional title. + optional string title = 4; + // Accession. + string accession = 5; + // Version. + int32 version = 6; +} + +// This element is restricted to variation records for which an explicit +// classification was submitted. Compare to IncludedRecord, which provides aggregate +// information about variants that are part of another submission, but for which +// ClinVar has *not* received a submission specific to that variant independently. +message ClassifiedRecord { + // Local type for tag `RCVList`. + message RcvList { + // The RCV record. + repeated RcvAccession rcv_accessions = 1; + // The number of submissions (SCV accessions) referencing the VariationID. + optional int32 submission_count = 2; + // The number of idependent observations. + optional int32 independent_observations = 3; + } + + // Local type for the attribute `@MappingType`. + enum MappingType { + // unspecified mapping type + MAPPING_TYPE_UNSPECIFIED = 0; + // corresponds to "Name" + MAPPING_TYPE_NAME = 1; + // corresponds to "Xref" + MAPPING_TYPE_XREF = 2; + } + + // Local type for the tag `TraitMapping`. + message TraitMapping { + // Local type for the tag "MedGen" + message Medgen { + // Name. + string name = 1; + // CUI. + string cui = 2; + } + + /* nested elements */ + repeated Medgen medgens = 1; + + /* attributes */ + + // ID of clinical assertion. + int64 clinical_assertion_id = 2; + // The trait type. + string trait_type = 3; + // The mapping type. + MappingType mapping_type = 4; + // The mapping value. + string mapping_value = 5; + // The mapping reference. + string mapping_ref = 6; + } + + // Describes a single sequence change relative to a + // contiguous region of a chromosome or the mitochondrion. + // + // Mutually exclusive with `haplotype` and `genotype`. + optional Allele simple_allele = 1; + // Describes multiple sequence changes on one of the + // chromosomes of a homologous pair or on the mitochondrion. + // + // Mutually exclusive with `simple_allele` and `genotype`. + optional Haplotype haplotype = 2; + // Describes the combination of sequence changes on each + // chromosome of a homologous pair. + // + // Mutually exclusive with `simple_allele` and `haplotype`. + optional Genotype genotype = 3; + // List of RCV records. + RcvList rcv_list = 4; + // List of classifications. + AggregateClassificationSet classifications = 5; + // List of clinical assertions. + repeated ClinicalAssertion clinical_assertions = 6; + // This element is used to report how each user-submitted + // trait name was mapped to a MedGen CUI identifier and a preferred name. + // The structure may be used in the future to report, when a trait is + // identified by a source's identifier (e.g. MIM number), the preferred + // name used by that source at the time of submission. For MappingType + // XRef, MappingRef is the database name and MappingValue is the database's + // identifier. For MappingType Name, MappingRef is Alternate or Preferred, + // and MappingValue is the submitted name of the trait. ClinicalAssertionID + // is an integer identifier that corresponds 1:1 to the SCV assigned to the + // submission. + repeated TraitMapping trait_mappings = 7; + // List of deleted SCVs. + repeated DeletedScv deleted_scvs = 8; + // List of general citations. + repeated GeneralCitations general_citations = 9; +} + +// This element groups the set of data specific to a VariationArchive +// record, namely the summary data of what has been submitted about a +// VariationID AND for Classified records only, the content each +// submission (SCV) provided. +// +// Type for the `` type. +message VariationArchive { + // Enumeration for `@RecordType`. + enum RecordType { + // unspecified record type + RECORD_TYPE_UNSPECIFIED = 0; + // corresponds to "included" + RECORD_TYPE_INCLUDED = 1; + // corresponds to "classified" + RECORD_TYPE_CLASSIFIED = 2; + } + + // Enumeration for `@RecordStatus`. + enum RecordStatus { + // unspecified record status + RECORD_STATUS_UNSPECIFIED = 0; + // corresponds to "current" + RECORD_STATUS_CURRENT = 1; + // corresponds to "previous" + RECORD_STATUS_PREVIOUS = 2; + // corresponds to "replaced" + RECORD_STATUS_REPLACED = 3; + // correspodns to "deleted" + RECORD_STATUS_DELETED = 4; + } + + /* attributes */ + + // Numeric variation ID. + int64 variation_id = 1; + // This is ClinVar's name for the variant. ClinVar uses this term in + // its web displays + string variation_name = 2; + // Type of the variant. + string variation_type = 3; + // DateCreated is the date when the record first became public in + // ClinVar. + google.protobuf.Timestamp date_created = 4; + // The date the record was last updated in the public database. The + // update may be a change to one of the submitted records (SCVs) or + // annotation added to the aggregate record by NCBI staff. This date + // is independent of a version change; annotated added by NCBI may + // change without representing a change in the version. + google.protobuf.Timestamp date_last_updated = 5; + // This date is of the most recent submitted record (SCV) for the + // VCV; it may reflect a new submitted record or an update to a submitted record. + google.protobuf.Timestamp most_recent_submission = 6; + // Accession assigned to the variant, or set of variants, that was + // Classified + string accession = 7; + // Version of record and suffix for accession. + int32 version = 8; + // Number of submitters in record. + int32 number_of_submitters = 9; + // Number of submissions in record. + int32 number_of_submissions = 10; + // Record type. + RecordType record_type = 11; + + /* contained elements */ + + // The record's status. + RecordStatus record_status = 12; + // Pointer to the replacing record; optional. + RecordHistory replaced_by = 13; + // The list of VCV accessions this record has replaced. + repeated RecordHistory replaceds = 14; + // Comment on the record; optional. + Comment comment = 15; + // Specification of the species. + Species species = 16; + + // This element describes the classification of a single + // allele, haplotype, or genotype based on all submissions to ClinVar. This + // differs from the element IncludedRecord, which describes simple alleles + // or haplotypes, referenced in ClassifiedRecord, but for which no explicit + // classification was submitted. Once that variation is described, details + // are added about the phenotypes being classified, the classification, the + // submitters providing the classifications, and all supported evidence. + // + // NB: mutually exclusive with `included_record`. + optional ClassifiedRecord classified_record = 17; + + // This element describes a single allele or haplotype + // included in submissions to ClinVar, but for which no explicit + // classification was submitted. It also references the submissions and the + // Classified records that include them. + // + // NB: mutually exclusive with `classified_record`. + optional IncludedRecord included_record = 18; +} + +// The element to group each VariationArchive element in the release +// +// Type for the `` tag. +message ClinvarVariationRelease { + /* attributes */ + + // The current release. + google.protobuf.Timestamp release_date = 1; + + /* contained elements */ + + // List of `` tags. + repeated VariationArchive variation_archives = 2; +} diff --git a/protos/annonars/clinvar_data/extracted_vars.proto b/protos/annonars/clinvar_data/extracted_vars.proto new file mode 100644 index 00000000..46ec1ce5 --- /dev/null +++ b/protos/annonars/clinvar_data/extracted_vars.proto @@ -0,0 +1,75 @@ +// Protocol buffers to store the extracted variants from ClinVar. + +syntax = "proto3"; + +package annonars.clinvar_data.extracted_vars; + +import "annonars/clinvar_data/clinvar_public.proto"; + +// Enumeration for the type of the variant. +enum VariationType { + // unspecified variation type + VARIATION_TYPE_UNSPECIFIED = 0; + // Corresponds to "insertion". + VARIATION_TYPE_INSERTION = 1; + // Corresponds to "deletion". + VARIATION_TYPE_DELETION = 2; + // Corresponds to "single nucleotide variant". + VARIATION_TYPE_SNV = 3; + // Corresponds to "indel". + VARIATION_TYPE_INDEL = 4; + // Corresponds to "duplication". + VARIATION_TYPE_DUPLICATION = 5; + // Corresponds to "tandem duplication". + VARIATION_TYPE_TANDEM_DUPLICATION = 6; + // Corresponds to "structural variant". + VARIATION_TYPE_STRUCTURAL_VARIANT = 7; + // Corresponds to "copy number gain". + VARIATION_TYPE_COPY_NUMBER_GAIN = 8; + // Corresponds to "copy number loss". + VARIATION_TYPE_COPY_NUMBER_LOSS = 9; + // Corresponds to "protein only". + VARIATION_TYPE_PROTEIN_ONLY = 10; + // Corresponds to "microsatellite". + VARIATION_TYPE_MICROSATELLITE = 11; + // Corresponds to "inversion". + VARIATION_TYPE_INVERSION = 12; + // Corresponds to "other". + VARIATION_TYPE_OTHER = 13; +} + +// Accession with version. +message VersionedAccession { + // The accession. + string accession = 1; + // The version. + int32 version = 2; +} + +// Protocol buffer for storing essential information of one RCV. +message ExtractedRcvRecord { + // The accession. + VersionedAccession accession = 1; + // Title of RCV. + string title = 2; + // Classifications (thinned out). + annonars.clinvar_data.clinvar_public.RcvAccession.RcvClassifications classifications = 3; +} + +// Protocol buffer for storing essential information of one VCV. +message ExtractedVcvRecord { + // The accession. + VersionedAccession accession = 1; + // List of aggregated RCVs. + repeated ExtractedRcvRecord rcvs = 2; + // Name of VCV. + string name = 3; + // The type of the variant. + VariationType variation_type = 4; + // Classifications (thinned out). + annonars.clinvar_data.clinvar_public.AggregateClassificationSet classifications = 5; + // The sequence location on one reference. + annonars.clinvar_data.clinvar_public.Location.SequenceLocation sequence_location = 6; + // List of HGNC IDs. + repeated string hgnc_ids = 7; +} diff --git a/protos/annonars/clinvar_data/gene_impact.proto b/protos/annonars/clinvar_data/gene_impact.proto new file mode 100644 index 00000000..70f8d151 --- /dev/null +++ b/protos/annonars/clinvar_data/gene_impact.proto @@ -0,0 +1,89 @@ +// Protocol buffers for types for per-gene impact. + +syntax = "proto3"; + +package annonars.clinvar_data.gene_impact; + +// Enumeration with the variant consequence. +enum GeneImpact { + // unspecified impact + GENE_IMPACT_UNSPECIFIED = 0; + // Corresponds to "3_prime_UTR_variant" + GENE_IMPACT_THREE_PRIME_UTR_VARIANT = 1; + // Corresponds to "5_prime_UTR_variant" + GENE_IMPACT_FIVE_PRIME_UTR_VARIANT = 2; + // Corresponds to "downstream_gene_variant" + GENE_IMPACT_DOWNSTREAM_TRANSCRIPT_VARIANT = 3; + // Corresponds to "frameshift_variant" + GENE_IMPACT_FRAMESHIFT_VARIANT = 4; + // Corresponds to "inframe_indel" + GENE_IMPACT_INFRAME_INDEL = 5; + // Corresponds to "start_lost" + GENE_IMPACT_START_LOST = 6; + // Corresponds to "intron_variant" + GENE_IMPACT_INTRON_VARIANT = 7; + // Corresponds to "missense_variant" + GENE_IMPACT_MISSENSE_VARIANT = 8; + // Corresponds to "non_codnig_transcript_variant" + GENE_IMPACT_NON_CODING_TRANSCRIPT_VARIANT = 9; + // Corresponds to "stop_gained" + GENE_IMPACT_STOP_GAINED = 10; + // Corresponds to "no_sequence_alteration" + GENE_IMPACT_NO_SEQUENCE_ALTERATION = 11; + // Corresponds to "splice_acceptor_variant" + GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT = 12; + // Corresponds to "splice_donor_variant" + GENE_IMPACT_SPLICE_DONOR_VARIANT = 13; + // Corresponds to "stop_lost" + GENE_IMPACT_STOP_LOST = 14; + // Corresponds to "synonymous_variant" + GENE_IMPACT_SYNONYMOUS_VARIANT = 15; + // Corresponds to "upstream_gene_variant" + GENE_IMPACT_UPSTREAM_TRANSCRIPT_VARIANT = 16; +} + + +// Enumeration for ACMG clinical significance. +enum ClinicalSignificance { + // unspecified clinical significance + CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0; + // Corresponds to "Benign" + CLINICAL_SIGNIFICANCE_BENIGN = 1; + // Corresponds to "Likely benign" + CLINICAL_SIGNIFICANCE_LIKELY_BENIGN = 2; + // Corresponds to "Uncertain significance" + CLINICAL_SIGNIFICANCE_UNCERTAIN_SIGNIFICANCE = 3; + // Corresponds to "Likely pathogenic" + CLINICAL_SIGNIFICANCE_LIKELY_PATHOGENIC = 4; + // Corresponds to "Pathogenic" + CLINICAL_SIGNIFICANCE_PATHOGENIC = 5; + // Corresponds to "not provided" + CLINICAL_SIGNIFICANCE_NOT_PROVIDED = 6; + // Corresponds to "other" + CLINICAL_SIGNIFICANCE_OTHER = 7; +} + + +// Entry for storing counts of `GeneImpact` and `ClinicalSignificance`. +message GeneImpactCounts { + // Stores the counts for a gene impact. + message ImpactCounts { + // The gene impact. + GeneImpact gene_impact = 1; + // The counts for the benign impact. + uint32 count_benign = 2; + // The counts for the likely benign impact. + uint32 count_likely_benign = 3; + // The counts for the uncertain significance impact. + uint32 count_uncertain_significance = 4; + // The counts for the likely pathogenic impact. + uint32 count_likely_pathogenic = 5; + // The counts for the pathogenic impact. + uint32 count_pathogenic = 6; + } + + // The gene HGNC ID. + string hgnc_id = 1; + // The impact counts. + repeated ImpactCounts impact_counts = 2; +} diff --git a/protos/annonars/clinvar_data/phenotype_link.proto b/protos/annonars/clinvar_data/phenotype_link.proto new file mode 100644 index 00000000..cfaacd1c --- /dev/null +++ b/protos/annonars/clinvar_data/phenotype_link.proto @@ -0,0 +1,28 @@ +// Protocol buffers to store the gene-to-phenotype links extracted from ClinVar. + +syntax = "proto3"; + +package annonars.clinvar_data.phenotype_link; + +import "annonars/clinvar_data/extracted_vars.proto"; + + +// A record linking gene to phenotype. +message GenePhenotypeRecord { + // VCV + annonars.clinvar_data.extracted_vars.VersionedAccession vcv = 1; + // SCV + annonars.clinvar_data.extracted_vars.VersionedAccession scv = 2; + // Clinical germline classification. + string germline_classification = 3; + // Submitter + string submitter_name = 4; + // Gene HGNC ID + repeated string hgnc_ids = 5; + // Linked OMIM terms + repeated string omim_terms = 6; + // Linked MONDO terms + repeated string mondo_terms = 7; + // Linked HPO terms + repeated string hpo_terms = 8; +} diff --git a/protos/fetch.sh b/protos/fetch.sh new file mode 100644 index 00000000..5759001e --- /dev/null +++ b/protos/fetch.sh @@ -0,0 +1,19 @@ +#!/usr/bin/bash + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +# Helper script to download protobuf files from elsewhere. + +cd $SCRIPT_DIR + +mkdir -p annonars/clinvar_data +for name in class_by_freq clinvar_public extracted_vars gene_impact phenotype_link; do + wget \ + -O annonars/clinvar_data/$name.proto \ + https://raw.githubusercontent.com/varfish-org/clinvar-this/main/protos/clinvar_data/pbs/$name.proto +done +sed \ + -i \ + -e 's/clinvar_data\.pbs\./annonars.clinvar_data./g' \ + -e 's|clinvar_data/pbs/|annonars\/clinvar_data/|g' \ + annonars/clinvar_data/*.proto diff --git a/src/clinvar_genes/cli/import.rs b/src/clinvar_genes/cli/import.rs index 7f88b886..1a02efc1 100644 --- a/src/clinvar_genes/cli/import.rs +++ b/src/clinvar_genes/cli/import.rs @@ -5,14 +5,11 @@ use std::{collections::HashSet, io::BufRead, sync::Arc}; use clap::Parser; use prost::Message; -use crate::pbs::clinvar::minimal::{ - ClinicalSignificance, Record, ReferenceAssertion, ReviewStatus, -}; -use crate::pbs::clinvar::per_gene::{ - ClinvarPerGeneRecord, CoarseClinicalSignificance, GeneFreqRecordCounts, GeneImpactRecordCounts, - GeneVariantsForRelease, Impact, -}; -use crate::{clinvar_genes, clinvar_minimal, common}; +use crate::common; +use crate::pbs::clinvar::per_gene::{ClinvarPerGeneRecord, ExtractedVariantsPerRelease}; +use crate::pbs::clinvar_data::class_by_freq::GeneCoarseClinsigFrequencyCounts; +use crate::pbs::clinvar_data::extracted_vars::ExtractedVcvRecord; +use crate::pbs::clinvar_data::gene_impact::GeneImpactCounts; /// Command line arguments for `tsv import` sub command. #[derive(Parser, Debug, Clone)] @@ -42,7 +39,7 @@ pub struct Args { /// Load per-impact JSONL file. fn load_per_impact_jsonl( path_per_impact_jsonl: &str, -) -> Result>, anyhow::Error> { +) -> Result, anyhow::Error> { // Open reader, possibly decompressing gziped files. let reader: Box = if path_per_impact_jsonl.ends_with(".gz") { Box::new(flate2::read::GzDecoder::new(std::fs::File::open( @@ -57,18 +54,8 @@ fn load_per_impact_jsonl( let reader = std::io::BufReader::new(reader); for line in reader.lines() { let line = line?; - let record = - serde_json::from_str::(&line)?; - - let mut count_out = Vec::new(); - for (impact, counts) in record.counts { - let impact: Impact = impact.into(); - count_out.push(GeneImpactRecordCounts { - impact: impact as i32, - counts, - }); - } - result.insert(record.hgnc.clone(), count_out); + let record = serde_json::from_str::(&line)?; + result.insert(record.hgnc_id.clone(), record); } Ok(result) @@ -77,7 +64,7 @@ fn load_per_impact_jsonl( /// Load per-frequency JSONL file. fn load_per_frequency_jsonl( path_per_impact_jsonl: &str, -) -> Result>, anyhow::Error> { +) -> Result, anyhow::Error> { // Open reader, possibly decompressing gziped files. let reader: Box = if path_per_impact_jsonl.ends_with(".gz") { Box::new(flate2::read::GzDecoder::new(std::fs::File::open( @@ -92,33 +79,23 @@ fn load_per_frequency_jsonl( let reader = std::io::BufReader::new(reader); for line in reader.lines() { let line = line?; - let record = - serde_json::from_str::(&line)?; - - let mut count_out = Vec::new(); - for (clinsig, counts) in record.counts { - let coarse_clinsig: CoarseClinicalSignificance = clinsig.into(); - count_out.push(GeneFreqRecordCounts { - coarse_clinsig: coarse_clinsig as i32, - counts, - }); - } - result.insert(record.hgnc.clone(), count_out); + let record = serde_json::from_str::(&line)?; + result.insert(record.hgnc_id.clone(), record); } Ok(result) } -type PerVcv = indexmap::IndexMap; -type PerAssembly = indexmap::IndexMap; -type PerGene = indexmap::IndexMap; +type PerRelease = indexmap::IndexMap>; +type PerGene = indexmap::IndexMap; /// Load per-gene sequence variants. fn load_variants_jsonl( variant_jsonls: &[String], -) -> Result>, anyhow::Error> { +) -> Result>, anyhow::Error> { // Build intermediate data structure using nested maps. let mut per_gene: PerGene = Default::default(); + for path_jsonl in variant_jsonls { let reader: Box = if path_jsonl.ends_with(".gz") { Box::new(flate2::read::GzDecoder::new(std::fs::File::open( @@ -132,65 +109,23 @@ fn load_variants_jsonl( for line in reader.lines() { let line = line?; - let input_record = serde_json::from_str::(&line); + let input_record = serde_json::from_str::(&line); match input_record { Err(e) => { tracing::warn!("skipping line because of error: {}", e); continue; } Ok(input_record) => { - let clinvar_minimal::cli::reading::Record { - vcv, - rcv, - title, - hgnc_ids, - clinical_significance, - review_status, - sequence_location, - .. - } = input_record; - let clinvar_minimal::cli::reading::SequenceLocation { - assembly, - chr, - start, - stop, - reference_allele_vcf, - alternate_allele_vcf, - .. - } = sequence_location; - - if let ( - Some(start), - Some(stop), - Some(reference_allele_vcf), - Some(alternate_allele_vcf), - ) = (start, stop, reference_allele_vcf, alternate_allele_vcf) - { - for hgnc_id in hgnc_ids { - let per_release = per_gene.entry(hgnc_id).or_default(); - let per_vcv = per_release.entry(assembly.clone()).or_default(); - let seqvar = per_vcv.entry(vcv.clone()).or_insert_with(|| Record { - release: assembly.clone(), - start, - stop, - reference: reference_allele_vcf.clone(), - alternative: alternate_allele_vcf.clone(), - vcv: vcv.clone(), - reference_assertions: vec![], - chromosome: chr.clone(), - }); - seqvar.reference_assertions.push(ReferenceAssertion { - rcv: rcv.clone(), - title: title.clone(), - clinical_significance: Into::::into( - clinical_significance, - ) as i32, - review_status: Into::::into(review_status) as i32, - }); - seqvar - .reference_assertions - .sort_by_key(|a| (a.clinical_significance, a.review_status)); - } + for hgnc_id in &input_record.hgnc_ids { + let per_gene_entry = per_gene.entry(hgnc_id.clone()).or_default(); + let release = input_record + .sequence_location + .as_ref() + .expect("missing sequence_location") + .assembly + .clone(); + let per_release_entry = per_gene_entry.entry(release.clone()).or_default(); + per_release_entry.push(input_record.clone()); } } } @@ -199,12 +134,20 @@ fn load_variants_jsonl( // Convert into final data structure that uses lists of entry records rather than nested maps. let mut result = indexmap::IndexMap::new(); - for (hgnc_id, per_gene) in per_gene { + for (hgnc_id, per_release) in per_gene { let mut per_gene_out = Vec::new(); - for (genome_release, per_release) in per_gene { - per_gene_out.push(GeneVariantsForRelease { - genome_release, - variants: per_release.values().cloned().collect(), + for (release, extracted_vars) in per_release { + let mut variants = extracted_vars; + variants.sort_by(|a, b| { + a.accession + .as_ref() + .expect("no accession") + .accession + .cmp(&b.accession.as_ref().expect("no accession").accession) + }); + per_gene_out.push(ExtractedVariantsPerRelease { + release: Some(release), + variants, }); } result.insert(hgnc_id, per_gene_out); @@ -256,10 +199,11 @@ fn jsonl_import( // Read through all records and insert each into the database. for hgnc_id in hgnc_ids.iter() { + let per_release_vars = vars_per_gene.get(hgnc_id).cloned().unwrap_or_default(); let record = ClinvarPerGeneRecord { - per_impact_counts: counts_per_impact.get(hgnc_id).cloned().unwrap_or_default(), - per_freq_counts: counts_per_freq.get(hgnc_id).cloned().unwrap_or_default(), - variants: vars_per_gene.get(hgnc_id).cloned().unwrap_or_default(), + per_impact_counts: Some(counts_per_impact.get(hgnc_id).cloned().unwrap_or_default()), + per_freq_counts: Some(counts_per_freq.get(hgnc_id).cloned().unwrap_or_default()), + per_release_vars, }; let buf = record.encode_to_vec(); diff --git a/src/clinvar_genes/cli/mod.rs b/src/clinvar_genes/cli/mod.rs index b4b20cc5..e4aad4cc 100644 --- a/src/clinvar_genes/cli/mod.rs +++ b/src/clinvar_genes/cli/mod.rs @@ -2,4 +2,3 @@ pub mod import; pub mod query; -pub mod reading; diff --git a/src/clinvar_genes/cli/reading.rs b/src/clinvar_genes/cli/reading.rs deleted file mode 100644 index 2f75d92f..00000000 --- a/src/clinvar_genes/cli/reading.rs +++ /dev/null @@ -1,173 +0,0 @@ -//! Reading JSONL data for per-gene ClinVar information. - -/// Reading of gene per-impact counts records. -pub mod gene_impact { - /// SO terms for impact on gene - #[derive(Debug, serde::Deserialize, serde::Serialize, PartialEq, Eq, Hash)] - pub enum Impact { - /// 3' UTR variant - #[serde(rename = "3_prime_UTR_variant")] - ThreePrimeUtrVariant, - /// 5' UTR variant - #[serde(rename = "5_prime_UTR_variant")] - FivePrimeUtrVariant, - /// downstream gene variant - #[serde(rename = "downstream_gene_variant")] - DownstreamGeneVariant, - /// frameshift variant - #[serde(rename = "frameshift_variant")] - FrameshiftVariant, - /// inframe deletion - #[serde(rename = "inframe_indel")] - InframeIndel, - /// start lost - #[serde(rename = "start_lost")] - StartLost, - /// intron variant - #[serde(rename = "intron_variant")] - IntronVariant, - /// missense variant - #[serde(rename = "missense_variant")] - MissenseVariant, - /// non-coding transcript variant - #[serde(rename = "non_coding_transcript_variant")] - NonCodingTranscriptVariant, - /// stop gained - #[serde(rename = "stop_gained")] - StopGained, - /// no sequence alteration - #[serde(rename = "no_sequence_alteration")] - NoSequenceAlteration, - /// splice acceptor variant - #[serde(rename = "splice_acceptor_variant")] - SpliceAcceptorVariant, - /// splice donor variant - #[serde(rename = "splice_donor_variant")] - SpliceDonorVariant, - /// stop lost - #[serde(rename = "stop_lost")] - StopLost, - /// synonymous variant - #[serde(rename = "synonymous_variant")] - SyonymousVariant, - /// upstream gene variant - #[serde(rename = "upstream_gene_variant")] - UpstreamGeneVariant, - } - - impl From for crate::pbs::clinvar::per_gene::Impact { - fn from(val: Impact) -> Self { - match val { - Impact::ThreePrimeUtrVariant => { - crate::pbs::clinvar::per_gene::Impact::ThreePrimeUtrVariant - } - Impact::FivePrimeUtrVariant => { - crate::pbs::clinvar::per_gene::Impact::FivePrimeUtrVariant - } - Impact::DownstreamGeneVariant => { - crate::pbs::clinvar::per_gene::Impact::DownstreamTranscriptVariant - } - Impact::FrameshiftVariant => { - crate::pbs::clinvar::per_gene::Impact::FrameshiftVariant - } - Impact::InframeIndel => crate::pbs::clinvar::per_gene::Impact::InframeIndel, - Impact::StartLost => crate::pbs::clinvar::per_gene::Impact::StartLost, - Impact::IntronVariant => crate::pbs::clinvar::per_gene::Impact::IntronVariant, - Impact::MissenseVariant => crate::pbs::clinvar::per_gene::Impact::MissenseVariant, - Impact::NonCodingTranscriptVariant => { - crate::pbs::clinvar::per_gene::Impact::NonCodingTranscriptVariant - } - Impact::StopGained => crate::pbs::clinvar::per_gene::Impact::StopGained, - Impact::NoSequenceAlteration => { - crate::pbs::clinvar::per_gene::Impact::NoSequenceAlteration - } - Impact::SpliceAcceptorVariant => { - crate::pbs::clinvar::per_gene::Impact::SpliceAcceptorVariant - } - Impact::SpliceDonorVariant => { - crate::pbs::clinvar::per_gene::Impact::SpliceDonorVariant - } - Impact::StopLost => crate::pbs::clinvar::per_gene::Impact::StopLost, - Impact::SyonymousVariant => { - crate::pbs::clinvar::per_gene::Impact::SynonymousVariant - } - Impact::UpstreamGeneVariant => { - crate::pbs::clinvar::per_gene::Impact::UpstreamTranscriptVariant - } - } - } - } - - /// ACMG clinical significance - #[derive(Debug, serde::Deserialize, serde::Serialize, PartialEq, Eq, Hash)] - pub enum ClinicalSignificance { - /// Benign - #[serde(rename = "benign")] - Benign, - /// Likely benign - #[serde(rename = "likely benign")] - LikelyBenign, - /// Uncertain significance - #[serde(rename = "uncertain significance")] - UncertainSignificance, - /// Likely pathogenic - #[serde(rename = "likely pathogenic")] - LikelyPathogenic, - /// Pathogenic - #[serde(rename = "pathogenic")] - Pathogenic, - } - - /// Gene-wise counts record. - #[derive(Debug, serde::Deserialize, serde::Serialize)] - pub struct Record { - /// HGNC gene ID - pub hgnc: String, - /// Per-impact counts - pub counts: indexmap::IndexMap>, - } -} - -/// Reading of ACMG class by frequency counts records. -pub mod counts_by_freq { - /// Coarsened clinical significance - #[derive(Debug, serde::Deserialize, serde::Serialize, PartialEq, Eq, Hash)] - pub enum CoarseClinicalSignificance { - /// Likely benign / benign - #[serde(rename = "benign")] - Benign, - /// Uncertain significance - #[serde(rename = "uncertain")] - Uncertain, - /// Likely pathogenic / pathogenic - #[serde(rename = "pathogenic")] - Pathogenic, - } - - impl From - for crate::pbs::clinvar::per_gene::CoarseClinicalSignificance - { - fn from(val: CoarseClinicalSignificance) -> Self { - match val { - CoarseClinicalSignificance::Benign => { - crate::pbs::clinvar::per_gene::CoarseClinicalSignificance::Benign - } - CoarseClinicalSignificance::Uncertain => { - crate::pbs::clinvar::per_gene::CoarseClinicalSignificance::Uncertain - } - CoarseClinicalSignificance::Pathogenic => { - crate::pbs::clinvar::per_gene::CoarseClinicalSignificance::Pathogenic - } - } - } - } - - /// Per-pathogenicity counts. - #[derive(Debug, serde::Deserialize, serde::Serialize)] - pub struct Record { - /// HGNC gene ID - pub hgnc: String, - /// Per-impact counts - pub counts: indexmap::IndexMap>, - } -} diff --git a/src/pbs/clinvar_data.rs b/src/pbs/clinvar_data.rs new file mode 100644 index 00000000..acab82a2 --- /dev/null +++ b/src/pbs/clinvar_data.rs @@ -0,0 +1,61 @@ +//! Code generate for protobufs by `prost-build`. + +/// Code generated for protobufs by `prost-build`. +pub mod class_by_freq { + include!(concat!( + env!("OUT_DIR"), + "/annonars.clinvar_data.class_by_freq.rs" + )); + include!(concat!( + env!("OUT_DIR"), + "/annonars.clinvar_data.class_by_freq.serde.rs" + )); +} + +/// Code generated for protobufs by `prost-build`. +pub mod clinvar_public { + include!(concat!( + env!("OUT_DIR"), + "/annonars.clinvar_data.clinvar_public.rs" + )); + include!(concat!( + env!("OUT_DIR"), + "/annonars.clinvar_data.clinvar_public.serde.rs" + )); +} + +/// Code generated for protobufs by `prost-build`. +pub mod extracted_vars { + include!(concat!( + env!("OUT_DIR"), + "/annonars.clinvar_data.extracted_vars.rs" + )); + include!(concat!( + env!("OUT_DIR"), + "/annonars.clinvar_data.extracted_vars.serde.rs" + )); +} + +/// Code generated for protobufs by `prost-build`. +pub mod gene_impact { + include!(concat!( + env!("OUT_DIR"), + "/annonars.clinvar_data.gene_impact.rs" + )); + include!(concat!( + env!("OUT_DIR"), + "/annonars.clinvar_data.gene_impact.serde.rs" + )); +} + +/// Code generated for protobufs by `prost-build`. +pub mod phenotype_link { + include!(concat!( + env!("OUT_DIR"), + "/annonars.clinvar_data.phenotype_link.rs" + )); + include!(concat!( + env!("OUT_DIR"), + "/annonars.clinvar_data.phenotype_link.serde.rs" + )); +} diff --git a/src/pbs/mod.rs b/src/pbs/mod.rs index fbe92a71..6d5177cd 100644 --- a/src/pbs/mod.rs +++ b/src/pbs/mod.rs @@ -1,6 +1,7 @@ //! Code generate for protobufs by `prost-build`. pub mod clinvar; +pub mod clinvar_data; pub mod cons; pub mod dbsnp; pub mod functional; diff --git a/tests/clinvar-genes/clinvar-genes.db/000014.sst b/tests/clinvar-genes/clinvar-genes.db/000014.sst deleted file mode 100644 index d5e73820..00000000 --- a/tests/clinvar-genes/clinvar-genes.db/000014.sst +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0e814dbc1b164581fe27671fa52382d0a5d548fb29b3f888c5c7deea007057ea -size 1289 diff --git a/tests/clinvar-genes/clinvar-genes.db/000016.sst b/tests/clinvar-genes/clinvar-genes.db/000016.sst deleted file mode 100644 index 64f0f6c8..00000000 --- a/tests/clinvar-genes/clinvar-genes.db/000016.sst +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4f092a7c9399b0b357de72a74ae2595322c50ea6d4193891a7f5ce3b3a820d04 -size 3060 diff --git a/tests/clinvar-genes/clinvar-genes.db/CURRENT b/tests/clinvar-genes/clinvar-genes.db/CURRENT deleted file mode 100644 index f8d50486..00000000 --- a/tests/clinvar-genes/clinvar-genes.db/CURRENT +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9c283f6e81028b9eb0760d918ee4bc0aa256ed3b926393c1734c760c4bd724fd -size 16 diff --git a/tests/clinvar-genes/clinvar-genes.db/IDENTITY b/tests/clinvar-genes/clinvar-genes.db/IDENTITY deleted file mode 100644 index cb0c1443..00000000 --- a/tests/clinvar-genes/clinvar-genes.db/IDENTITY +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b4ddf89532e4f83b30ddcb6dbe34568f2bf2d736d2fe0b76723e29b0a9ecb13d -size 36 diff --git a/tests/clinvar-genes/clinvar-genes.db/LOCK b/tests/clinvar-genes/clinvar-genes.db/LOCK deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/clinvar-genes/clinvar-genes.db/LOG b/tests/clinvar-genes/clinvar-genes.db/LOG deleted file mode 100644 index f706ee96..00000000 --- a/tests/clinvar-genes/clinvar-genes.db/LOG +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2b45c9959fb2c01fb6a3801eb1117a8d9b57ce335fe1561c68cac49aeccdaa3f -size 62348 diff --git a/tests/clinvar-genes/clinvar-genes.db/MANIFEST-000005 b/tests/clinvar-genes/clinvar-genes.db/MANIFEST-000005 deleted file mode 100644 index c04a9d67..00000000 --- a/tests/clinvar-genes/clinvar-genes.db/MANIFEST-000005 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de1314c30fac75289cf71a1e0357ff6c8ffd7caa114e1d8d946879d7d24e705a -size 647 diff --git a/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000009 b/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000009 deleted file mode 100644 index a2ad8677..00000000 --- a/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000009 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:43b5ef50591a721fdda5576cf873fa4b515539decb35cac720deccaf7be8a61d -size 15369 diff --git a/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000011 b/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000011 deleted file mode 100644 index a2ad8677..00000000 --- a/tests/clinvar-genes/clinvar-genes.db/OPTIONS-000011 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:43b5ef50591a721fdda5576cf873fa4b515539decb35cac720deccaf7be8a61d -size 15369 diff --git a/tests/clinvar-genes/clinvar-variants-grch37-seqvars.jsonl b/tests/clinvar-genes/clinvar-variants-grch37-seqvars.jsonl index 13c715fa..989b6f9d 100644 --- a/tests/clinvar-genes/clinvar-variants-grch37-seqvars.jsonl +++ b/tests/clinvar-genes/clinvar-variants-grch37-seqvars.jsonl @@ -1,10 +1,10 @@ -{"rcv": "RCV000000009", "vcv": "VCV000018396", "title": "NM_002036.4(ACKR1):c.265C>T (p.Arg89Cys) AND DUFFY BLOOD GROUP SYSTEM, FY(bwk) PHENOTYPE", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "1", "accession": "NC_000001.10", "outer_start": null, "inner_start": null, "start": 159175494, "stop": 159175494, "inner_stop": null, "outer_stop": null, "display_start": 159175494, "display_stop": 159175494, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 159175494, "reference_allele_vcf": "C", "alternate_allele_vcf": "T", "for_display_length": null}, "hgnc_ids": ["HGNC:4035"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000010", "vcv": "VCV000018397", "title": "NM_002036.4(ACKR1):c.286_299del (p.Trp96fs) AND DUFFY BLOOD GROUP SYSTEM, FY(a-b-) PHENOTYPE", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "1", "accession": "NC_000001.10", "outer_start": null, "inner_start": null, "start": 159175509, "stop": 159175522, "inner_stop": null, "outer_stop": null, "display_start": 159175509, "display_stop": 159175522, "strand": null, "variant_length": 14, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 159175508, "reference_allele_vcf": "CCCTGGCTGGCCTGT", "alternate_allele_vcf": "C", "for_display_length": null}, "hgnc_ids": ["HGNC:4035"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000012", "vcv": "VCV000000002", "title": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer) AND Hereditary spastic paraplegia 48", "variant_type": "indel", "clinical_significance": "pathogenic", "review_status": "criteria provided, single submitter", "sequence_location": {"assembly": "GRCh37", "chr": "7", "accession": "NC_000007.13", "outer_start": null, "inner_start": null, "start": 4820844, "stop": 4820847, "inner_stop": null, "outer_stop": null, "display_start": 4820844, "display_stop": 4820847, "strand": null, "variant_length": 22, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 4820844, "reference_allele_vcf": "GGAT", "alternate_allele_vcf": "TGCTGTAAACTGTAACTGTAAA", "for_display_length": null}, "hgnc_ids": ["HGNC:22197"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000013", "vcv": "VCV000000003", "title": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs) AND Hereditary spastic paraplegia 48", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "7", "accession": "NC_000007.13", "outer_start": null, "inner_start": null, "start": 4827361, "stop": 4827374, "inner_stop": null, "outer_stop": null, "display_start": 4827361, "display_stop": 4827374, "strand": null, "variant_length": 14, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 4827360, "reference_allele_vcf": "GCTGCTGGACCTGCC", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:22197"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000016", "vcv": "VCV000000006", "title": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser) AND Mitochondrial complex 1 deficiency, nuclear type 19", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "11", "accession": "NC_000011.9", "outer_start": null, "inner_start": null, "start": 126147412, "stop": 126147412, "inner_stop": null, "outer_stop": null, "display_start": 126147412, "display_stop": 126147412, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 126147412, "reference_allele_vcf": "A", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:26927"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000042", "vcv": "VCV000000025", "title": "NM_015600.4(ABHD12):c.-6898_191+7002delinsCC AND PHARC syndrome", "variant_type": "indel", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "20", "accession": "NC_000020.10", "outer_start": null, "inner_start": null, "start": 25364147, "stop": 25378237, "inner_stop": null, "outer_stop": null, "display_start": 25364147, "display_stop": 25378237, "strand": null, "variant_length": 14091, "reference_allele": null, "alternate_allele": "GG", "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": null, "reference_allele_vcf": null, "alternate_allele_vcf": null, "for_display_length": null}, "hgnc_ids": ["HGNC:15868"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000043", "vcv": "VCV000000026", "title": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter) AND PHARC syndrome", "variant_type": "duplication", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "20", "accession": "NC_000020.10", "outer_start": null, "inner_start": null, "start": 25288616, "stop": 25288617, "inner_stop": null, "outer_stop": null, "display_start": 25288616, "display_stop": 25288617, "strand": null, "variant_length": 7, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 25288616, "reference_allele_vcf": "G", "alternate_allele_vcf": "GGCTCTTA", "for_display_length": null}, "hgnc_ids": ["HGNC:15868"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000049", "vcv": "VCV000000032", "title": "NM_138413.4(HOGA1):c.700+4G>T AND Primary hyperoxaluria type 3", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "10", "accession": "NC_000010.10", "outer_start": null, "inner_start": null, "start": 99359924, "stop": 99359924, "inner_stop": null, "outer_stop": null, "display_start": 99359924, "display_stop": 99359924, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 99359924, "reference_allele_vcf": "G", "alternate_allele_vcf": "T", "for_display_length": null}, "hgnc_ids": ["HGNC:25155"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000057", "vcv": "VCV000000040", "title": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "19", "accession": "NC_000019.9", "outer_start": null, "inner_start": null, "start": 36595471, "stop": 36595474, "inner_stop": null, "outer_stop": null, "display_start": 36595471, "display_stop": 36595474, "strand": null, "variant_length": 4, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 36595470, "reference_allele_vcf": "GTGCC", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:24502"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000059", "vcv": "VCV000000042", "title": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh37", "chr": "19", "accession": "NC_000019.9", "outer_start": null, "inner_start": null, "start": 36558317, "stop": 36558317, "inner_stop": null, "outer_stop": null, "display_start": 36558317, "display_stop": 36558317, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.25", "assembly_status": "previous", "position_vcf": 36558317, "reference_allele_vcf": "G", "alternate_allele_vcf": "C", "for_display_length": null}, "hgnc_ids": ["HGNC:24502"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +{"accession": {"accession": "VCV000000002", "version": 3}, "rcvs": [{"accession": {"accession": "RCV000000012", "version": 5}, "title": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer) AND Hereditary spastic paraplegia 48"}], "name": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer)", "variationType": "VARIATION_TYPE_INDEL", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER", "description": "Pathogenic", "citations": [{"ids": [{"value": "20613862", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Hereditary spastic paraplegia 48", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013342"}]}, {"value": "Spastic paraplegia 48", "type": "Alternate"}, {"value": "Spastic paraplegia 48, autosomal recessive", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Spastic+paraplegia+48%2C+autosomal+recessive/9323"}]}], "symbols": [{"value": "SPG48", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613647", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "306511"}, {"db": "MedGen", "id": "C3150901"}, {"db": "MONDO", "id": "MONDO:0013342"}, {"db": "OMIM", "id": "613647", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "2", "contributesToAggregateClassification": true}], "dateCreated": "2017-01-30T00:00:00Z", "mostRecentSubmission": "2021-05-16T00:00:00Z", "numberOfSubmitters": 2, "numberOfSubmissions": 2}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_7", "accession": "NC_000007.13", "start": 4820844, "stop": 4820847, "displayStart": 4820844, "displayStop": 4820847, "variantLength": 22, "positionVcf": 4820844, "referenceAlleleVcf": "GGAT", "alternateAlleleVcf": "TGCTGTAAACTGTAACTGTAAA"}, "hgncIds": ["HGNC:22197"]} +{"accession": {"accession": "VCV000000006", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000016", "version": 6}, "title": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser) AND Mitochondrial complex 1 deficiency, nuclear type 19"}], "name": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20818383", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Mitochondrial complex 1 deficiency, nuclear type 19", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0032624"}]}, {"value": "MITOCHONDRIAL COMPLEX I DEFICIENCY, NUCLEAR TYPE 19", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "618241", "type": "MIM"}, {"db": "OMIM", "id": "613622.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613622.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613622.0003", "type": "Allelic variant"}]}], "symbols": [{"value": "MC1DN19", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "618241", "type": "MIM"}]}], "xrefs": [{"db": "MedGen", "id": "C4748791"}, {"db": "MONDO", "id": "MONDO:0032624"}, {"db": "OMIM", "id": "618241", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "45335", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-10-01T00:00:00Z", "dateCreated": "2019-02-04T00:00:00Z", "mostRecentSubmission": "2019-02-04T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_11", "accession": "NC_000011.9", "start": 126147412, "stop": 126147412, "displayStart": 126147412, "displayStop": 126147412, "variantLength": 1, "positionVcf": 126147412, "referenceAlleleVcf": "A", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:26927"]} +{"accession": {"accession": "VCV000000003", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000013", "version": 6}, "title": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs) AND Hereditary spastic paraplegia 48"}], "name": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs)", "variationType": "VARIATION_TYPE_DELETION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20613862", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Hereditary spastic paraplegia 48", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013342"}]}, {"value": "Spastic paraplegia 48", "type": "Alternate"}, {"value": "Spastic paraplegia 48, autosomal recessive", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Spastic+paraplegia+48%2C+autosomal+recessive/9323"}]}], "symbols": [{"value": "SPG48", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613647", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "306511"}, {"db": "MedGen", "id": "C3150901"}, {"db": "MONDO", "id": "MONDO:0013342"}, {"db": "OMIM", "id": "613647", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "2", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-06-29T00:00:00Z", "dateCreated": "2017-01-30T00:00:00Z", "mostRecentSubmission": "2017-01-30T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_7", "accession": "NC_000007.13", "start": 4827361, "stop": 4827374, "displayStart": 4827361, "displayStop": 4827374, "variantLength": 14, "positionVcf": 4827360, "referenceAlleleVcf": "GCTGCTGGACCTGCC", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:22197"]} +{"accession": {"accession": "VCV000000043", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000060", "version": 4}, "title": "NM_001083961.2(WDR62):c.1408C>T (p.Gln470Ter) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.1408C>T (p.Gln470Ter)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2019-03-10T00:00:00Z", "mostRecentSubmission": "2019-03-10T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_19", "accession": "NC_000019.9", "start": 36574001, "stop": 36574001, "displayStart": 36574001, "displayStop": 36574001, "variantLength": 1, "positionVcf": 36574001, "referenceAlleleVcf": "C", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:24502"]} +{"accession": {"accession": "VCV000000026", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000043", "version": 3}, "title": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter) AND PHARC syndrome"}], "name": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter)", "variationType": "VARIATION_TYPE_DUPLICATION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20797687", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "PHARC syndrome", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0012984"}]}, {"value": "Polyneuropathy-hearing loss-ataxia-retinitis pigmentosa-cataract syndrome", "type": "Alternate", "xrefs": [{"db": "Orphanet", "id": "171848"}]}, {"value": "Polyneuropathy, hearing loss, ataxia, retinitis pigmentosa, and cataract", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Polyneuropathy%2C+hearing+loss%2C+ataxia%2C+retinitis+pigmentosa%2C+and+cataract/9132"}]}], "symbols": [{"value": "PHARC", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "612674", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "171848"}, {"db": "MedGen", "id": "C2675204"}, {"db": "MONDO", "id": "MONDO:0012984"}, {"db": "OMIM", "id": "612674", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "17", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-10T00:00:00Z", "dateCreated": "2015-05-18T00:00:00Z", "mostRecentSubmission": "2015-05-18T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_20", "accession": "NC_000020.10", "start": 25288616, "stop": 25288617, "displayStart": 25288616, "displayStop": 25288617, "variantLength": 7, "positionVcf": 25288616, "referenceAlleleVcf": "G", "alternateAlleleVcf": "GGCTCTTA"}, "hgncIds": ["HGNC:15868"]} +{"accession": {"accession": "VCV000000051", "version": 2}, "rcvs": [{"accession": {"accession": "RCV000000068", "version": 4}, "title": "NM_022098.4(XPNPEP3):c.1357G>T (p.Gly453Cys) AND Nephronophthisis-like nephropathy 1"}], "name": "NM_022098.4(XPNPEP3):c.1357G>T (p.Gly453Cys)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20179356", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Nephronophthisis-like nephropathy 1", "type": "Preferred", "xrefs": [{"db": "Genetic Alliance", "id": "Nephronophthisis-like+nephropathy+1/8986"}, {"db": "MONDO", "id": "MONDO:0013163"}]}], "symbols": [{"value": "NPHPL1", "type": "Preferred", "xrefs": [{"db": "OMIM", "id": "613159", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "The nephronophthisis (NPH) phenotype is characterized by reduced renal concentrating ability, chronic tubulointerstitial nephritis, cystic renal disease, and progression to end-stage renal disease (ESRD) before age 30 years. Three age-based clinical subtypes are recognized: infantile, juvenile, and adolescent/adult. Infantile NPH can present in utero with oligohydramnios sequence (limb contractures, pulmonary hypoplasia, and facial dysmorphisms) or postnatally with renal manifestations that progress to ESRD before age 3 years. Juvenile NPH, the most prevalent subtype, typically presents with polydipsia and polyuria, growth retardation, chronic iron-resistant anemia, or other findings related to chronic kidney disease (CKD). Hypertension is typically absent due to salt wasting. ESRD develops at a median age of 13 years. Ultrasound findings are increased echogenicity, reduced corticomedullary differentiation, and renal cysts (in 50% of affected individuals). Histologic findings include tubulointerstitial fibrosis, thickened and disrupted tubular basement membrane, sporadic corticomedullary cysts, and normal or reduced kidney size. Adolescent/adult NPH is clinically similar to juvenile NPH, but ESRD develops at a median age of 19 years. Within a subtype, inter- and intrafamilial variability in rate of progression to ESRD is considerable. Approximately 80%-90% of individuals with the NPH phenotype have no extrarenal features (i.e., they have isolated NPH); ~10%-20% have extrarenal manifestations that constitute a recognizable syndrome (e.g., Joubert syndrome, Bardet-Biedl syndrome, Jeune syndrome and related skeletal disorders, Meckel-Gruber syndrome, Senior-L\u00f8ken syndrome, Leber congenital amaurosis, COACH syndrome, and oculomotor apraxia, Cogan type)."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK368475"}]}], "citations": [{"ids": [{"value": "27336129", "source": "PubMed"}, {"value": "NBK368475", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "655"}, {"db": "MedGen", "id": "C3150419"}, {"db": "MONDO", "id": "MONDO:0013163"}, {"db": "OMIM", "id": "613159", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "26", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-03-01T00:00:00Z", "dateCreated": "2021-08-11T00:00:00Z", "mostRecentSubmission": "2021-08-11T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_22", "accession": "NC_000022.10", "start": 41320486, "stop": 41320486, "displayStart": 41320486, "displayStop": 41320486, "variantLength": 1, "positionVcf": 41320486, "referenceAlleleVcf": "G", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:28052"]} +{"accession": {"accession": "VCV000000032", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000049", "version": 2}, "title": "NM_138413.4(HOGA1):c.700+4G>T AND Primary hyperoxaluria type 3"}], "name": "NM_138413.4(HOGA1):c.700+4G>T", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20797690", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Primary hyperoxaluria type 3", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013327"}]}, {"value": "PH III", "type": "Alternate"}, {"value": "Primary hyperoxaluria, type III", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+Hyperoxaluria+Type+3/8596"}]}], "symbols": [{"value": "HP3", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613616", "type": "MIM"}]}, {"value": "HOGA1", "type": "Alternate"}, {"value": "PH3", "type": "Alternate"}], "attributes": [{"attribute": {"base": {"value": "loss of function", "integerValue": "273"}, "type": "disease mechanism"}, "xrefs": [{"db": "Genetic Testing Registry (GTR)", "id": "GTR000561373"}]}, {"attribute": {"base": {"integerValue": "10738"}, "type": "GARD id"}, "xrefs": [{"db": "Office of Rare Diseases", "id": "10738"}]}], "citations": [{"ids": [{"value": "26401545", "source": "PubMed"}, {"value": "NBK316514", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "416"}, {"db": "Orphanet", "id": "93600"}, {"db": "MedGen", "id": "C3150878"}, {"db": "MONDO", "id": "MONDO:0013327"}, {"db": "OMIM", "id": "613616", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "19", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-10T00:00:00Z", "dateCreated": "2013-04-04T00:00:00Z", "mostRecentSubmission": "2013-04-04T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_10", "accession": "NC_000010.10", "start": 99359924, "stop": 99359924, "displayStart": 99359924, "displayStop": 99359924, "variantLength": 1, "positionVcf": 99359924, "referenceAlleleVcf": "G", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:25155"]} +{"accession": {"accession": "VCV000000042", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000059", "version": 5}, "title": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2019-03-10T00:00:00Z", "mostRecentSubmission": "2019-03-10T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_19", "accession": "NC_000019.9", "start": 36558317, "stop": 36558317, "displayStart": 36558317, "displayStop": 36558317, "variantLength": 1, "positionVcf": 36558317, "referenceAlleleVcf": "G", "alternateAlleleVcf": "C"}, "hgncIds": ["HGNC:24502"]} +{"accession": {"accession": "VCV000000059", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000076", "version": 5}, "title": "NM_006642.5(SDCCAG8):c.1946_1949del (p.Cys649fs) AND Senior-Loken syndrome 7"}], "name": "NM_006642.5(SDCCAG8):c.1946_1949del (p.Cys649fs)", "variationType": "VARIATION_TYPE_MICROSATELLITE", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20835237", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Senior-Loken syndrome 7", "type": "Preferred", "xrefs": [{"db": "Genetic Alliance", "id": "Senior-Loken+syndrome+7/9283"}, {"db": "MONDO", "id": "MONDO:0013326"}]}], "symbols": [{"value": "SLSN7", "type": "Preferred", "xrefs": [{"db": "OMIM", "id": "613615", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "3156"}, {"db": "MedGen", "id": "C3150877"}, {"db": "MONDO", "id": "MONDO:0013326"}, {"db": "OMIM", "id": "613615", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "29", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-10-01T00:00:00Z", "dateCreated": "2016-10-23T00:00:00Z", "mostRecentSubmission": "2016-10-23T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_1", "accession": "NC_000001.10", "start": 243589819, "stop": 243589822, "displayStart": 243589819, "displayStop": 243589822, "variantLength": 4, "positionVcf": 243589818, "referenceAlleleVcf": "AGTGT", "alternateAlleleVcf": "A"}, "hgncIds": ["HGNC:10671"]} +{"accession": {"accession": "VCV000000040", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000057", "version": 4}, "title": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs)", "variationType": "VARIATION_TYPE_DELETION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2015-09-17T00:00:00Z", "mostRecentSubmission": "2015-09-17T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"assembly": "GRCh37", "chr": "CHROMOSOME_19", "accession": "NC_000019.9", "start": 36595471, "stop": 36595474, "displayStart": 36595471, "displayStop": 36595474, "variantLength": 4, "positionVcf": 36595470, "referenceAlleleVcf": "GTGCC", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:24502"]} diff --git a/tests/clinvar-genes/clinvar-variants-grch38-seqvars.jsonl b/tests/clinvar-genes/clinvar-variants-grch38-seqvars.jsonl index 51c04b70..35b65e6b 100644 --- a/tests/clinvar-genes/clinvar-variants-grch38-seqvars.jsonl +++ b/tests/clinvar-genes/clinvar-variants-grch38-seqvars.jsonl @@ -1,10 +1,10 @@ -{"rcv": "RCV000000009", "vcv": "VCV000018396", "title": "NM_002036.4(ACKR1):c.265C>T (p.Arg89Cys) AND DUFFY BLOOD GROUP SYSTEM, FY(bwk) PHENOTYPE", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "1", "accession": "NC_000001.11", "outer_start": null, "inner_start": null, "start": 159205704, "stop": 159205704, "inner_stop": null, "outer_stop": null, "display_start": 159205704, "display_stop": 159205704, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 159205704, "reference_allele_vcf": "C", "alternate_allele_vcf": "T", "for_display_length": null}, "hgnc_ids": ["HGNC:4035"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000010", "vcv": "VCV000018397", "title": "NM_002036.4(ACKR1):c.286_299del (p.Trp96fs) AND DUFFY BLOOD GROUP SYSTEM, FY(a-b-) PHENOTYPE", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "1", "accession": "NC_000001.11", "outer_start": null, "inner_start": null, "start": 159205719, "stop": 159205732, "inner_stop": null, "outer_stop": null, "display_start": 159205719, "display_stop": 159205732, "strand": null, "variant_length": 14, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 159205718, "reference_allele_vcf": "CCCTGGCTGGCCTGT", "alternate_allele_vcf": "C", "for_display_length": null}, "hgnc_ids": ["HGNC:4035"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000012", "vcv": "VCV000000002", "title": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer) AND Hereditary spastic paraplegia 48", "variant_type": "indel", "clinical_significance": "pathogenic", "review_status": "criteria provided, single submitter", "sequence_location": {"assembly": "GRCh38", "chr": "7", "accession": "NC_000007.14", "outer_start": null, "inner_start": null, "start": 4781213, "stop": 4781216, "inner_stop": null, "outer_stop": null, "display_start": 4781213, "display_stop": 4781216, "strand": null, "variant_length": 22, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 4781213, "reference_allele_vcf": "GGAT", "alternate_allele_vcf": "TGCTGTAAACTGTAACTGTAAA", "for_display_length": null}, "hgnc_ids": ["HGNC:22197"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000013", "vcv": "VCV000000003", "title": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs) AND Hereditary spastic paraplegia 48", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "7", "accession": "NC_000007.14", "outer_start": null, "inner_start": null, "start": 4787730, "stop": 4787743, "inner_stop": null, "outer_stop": null, "display_start": 4787730, "display_stop": 4787743, "strand": null, "variant_length": 14, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 4787729, "reference_allele_vcf": "GCTGCTGGACCTGCC", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:22197"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000016", "vcv": "VCV000000006", "title": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser) AND Mitochondrial complex 1 deficiency, nuclear type 19", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "11", "accession": "NC_000011.10", "outer_start": null, "inner_start": null, "start": 126277517, "stop": 126277517, "inner_stop": null, "outer_stop": null, "display_start": 126277517, "display_stop": 126277517, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 126277517, "reference_allele_vcf": "A", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:26927"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000042", "vcv": "VCV000000025", "title": "NM_015600.4(ABHD12):c.-6898_191+7002delinsCC AND PHARC syndrome", "variant_type": "indel", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "20", "accession": "NC_000020.11", "outer_start": null, "inner_start": null, "start": 25383511, "stop": 25397601, "inner_stop": null, "outer_stop": null, "display_start": 25383511, "display_stop": 25397601, "strand": null, "variant_length": 14091, "reference_allele": null, "alternate_allele": "GG", "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": null, "reference_allele_vcf": null, "alternate_allele_vcf": null, "for_display_length": null}, "hgnc_ids": ["HGNC:15868"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000043", "vcv": "VCV000000026", "title": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter) AND PHARC syndrome", "variant_type": "duplication", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "20", "accession": "NC_000020.11", "outer_start": null, "inner_start": null, "start": 25307980, "stop": 25307981, "inner_stop": null, "outer_stop": null, "display_start": 25307980, "display_stop": 25307981, "strand": null, "variant_length": 7, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 25307980, "reference_allele_vcf": "G", "alternate_allele_vcf": "GGCTCTTA", "for_display_length": null}, "hgnc_ids": ["HGNC:15868"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000049", "vcv": "VCV000000032", "title": "NM_138413.4(HOGA1):c.700+4G>T AND Primary hyperoxaluria type 3", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "10", "accession": "NC_000010.11", "outer_start": null, "inner_start": null, "start": 97600167, "stop": 97600167, "inner_stop": null, "outer_stop": null, "display_start": 97600167, "display_stop": 97600167, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 97600167, "reference_allele_vcf": "G", "alternate_allele_vcf": "T", "for_display_length": null}, "hgnc_ids": ["HGNC:25155"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000057", "vcv": "VCV000000040", "title": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "variant_type": "deletion", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "19", "accession": "NC_000019.10", "outer_start": null, "inner_start": null, "start": 36104569, "stop": 36104572, "inner_stop": null, "outer_stop": null, "display_start": 36104569, "display_stop": 36104572, "strand": null, "variant_length": 4, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 36104568, "reference_allele_vcf": "GTGCC", "alternate_allele_vcf": "G", "for_display_length": null}, "hgnc_ids": ["HGNC:24502"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} -{"rcv": "RCV000000059", "vcv": "VCV000000042", "title": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "variant_type": "single nucleotide variant", "clinical_significance": "pathogenic", "review_status": "no assertion criteria provided", "sequence_location": {"assembly": "GRCh38", "chr": "19", "accession": "NC_000019.10", "outer_start": null, "inner_start": null, "start": 36067415, "stop": 36067415, "inner_stop": null, "outer_stop": null, "display_start": 36067415, "display_stop": 36067415, "strand": null, "variant_length": 1, "reference_allele": null, "alternate_allele": null, "assembly_accession_version": "GCF_000001405.38", "assembly_status": "current", "position_vcf": 36067415, "reference_allele_vcf": "G", "alternate_allele_vcf": "C", "for_display_length": null}, "hgnc_ids": ["HGNC:24502"], "absolute_copy_number": null, "reference_copy_number": null, "copy_number_tuple": null} +{"accession": {"accession": "VCV000000002", "version": 3}, "rcvs": [{"accession": {"accession": "RCV000000012", "version": 5}, "title": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer) AND Hereditary spastic paraplegia 48"}], "name": "NM_014855.3(AP5Z1):c.80_83delinsTGCTGTAAACTGTAACTGTAAA (p.Arg27_Ile28delinsLeuLeuTer)", "variationType": "VARIATION_TYPE_INDEL", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_CRITERIA_PROVIDED_SINGLE_SUBMITTER", "description": "Pathogenic", "citations": [{"ids": [{"value": "20613862", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Hereditary spastic paraplegia 48", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013342"}]}, {"value": "Spastic paraplegia 48", "type": "Alternate"}, {"value": "Spastic paraplegia 48, autosomal recessive", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Spastic+paraplegia+48%2C+autosomal+recessive/9323"}]}], "symbols": [{"value": "SPG48", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613647", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "306511"}, {"db": "MedGen", "id": "C3150901"}, {"db": "MONDO", "id": "MONDO:0013342"}, {"db": "OMIM", "id": "613647", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "2", "contributesToAggregateClassification": true}], "dateCreated": "2017-01-30T00:00:00Z", "mostRecentSubmission": "2021-05-16T00:00:00Z", "numberOfSubmitters": 2, "numberOfSubmissions": 2}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_7", "accession": "NC_000007.14", "start": 4781213, "stop": 4781216, "displayStart": 4781213, "displayStop": 4781216, "variantLength": 22, "positionVcf": 4781213, "referenceAlleleVcf": "GGAT", "alternateAlleleVcf": "TGCTGTAAACTGTAACTGTAAA"}, "hgncIds": ["HGNC:22197"]} +{"accession": {"accession": "VCV000000006", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000016", "version": 6}, "title": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser) AND Mitochondrial complex 1 deficiency, nuclear type 19"}], "name": "NM_017547.4(FOXRED1):c.1289A>G (p.Asn430Ser)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20818383", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Mitochondrial complex 1 deficiency, nuclear type 19", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0032624"}]}, {"value": "MITOCHONDRIAL COMPLEX I DEFICIENCY, NUCLEAR TYPE 19", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "618241", "type": "MIM"}, {"db": "OMIM", "id": "613622.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613622.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613622.0003", "type": "Allelic variant"}]}], "symbols": [{"value": "MC1DN19", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "618241", "type": "MIM"}]}], "xrefs": [{"db": "MedGen", "id": "C4748791"}, {"db": "MONDO", "id": "MONDO:0032624"}, {"db": "OMIM", "id": "618241", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "45335", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-10-01T00:00:00Z", "dateCreated": "2019-02-04T00:00:00Z", "mostRecentSubmission": "2019-02-04T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_11", "accession": "NC_000011.10", "start": 126277517, "stop": 126277517, "displayStart": 126277517, "displayStop": 126277517, "variantLength": 1, "positionVcf": 126277517, "referenceAlleleVcf": "A", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:26927"]} +{"accession": {"accession": "VCV000000003", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000013", "version": 6}, "title": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs) AND Hereditary spastic paraplegia 48"}], "name": "NM_014855.3(AP5Z1):c.1413_1426del (p.Leu473fs)", "variationType": "VARIATION_TYPE_DELETION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20613862", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Hereditary spastic paraplegia 48", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013342"}]}, {"value": "Spastic paraplegia 48", "type": "Alternate"}, {"value": "Spastic paraplegia 48, autosomal recessive", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Spastic+paraplegia+48%2C+autosomal+recessive/9323"}]}], "symbols": [{"value": "SPG48", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613647", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "306511"}, {"db": "MedGen", "id": "C3150901"}, {"db": "MONDO", "id": "MONDO:0013342"}, {"db": "OMIM", "id": "613647", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "2", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-06-29T00:00:00Z", "dateCreated": "2017-01-30T00:00:00Z", "mostRecentSubmission": "2017-01-30T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_7", "accession": "NC_000007.14", "start": 4787730, "stop": 4787743, "displayStart": 4787730, "displayStop": 4787743, "variantLength": 14, "positionVcf": 4787729, "referenceAlleleVcf": "GCTGCTGGACCTGCC", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:22197"]} +{"accession": {"accession": "VCV000000043", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000060", "version": 4}, "title": "NM_001083961.2(WDR62):c.1408C>T (p.Gln470Ter) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.1408C>T (p.Gln470Ter)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2019-03-10T00:00:00Z", "mostRecentSubmission": "2019-03-10T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_19", "accession": "NC_000019.10", "start": 36083099, "stop": 36083099, "displayStart": 36083099, "displayStop": 36083099, "variantLength": 1, "positionVcf": 36083099, "referenceAlleleVcf": "C", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:24502"]} +{"accession": {"accession": "VCV000000026", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000043", "version": 3}, "title": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter) AND PHARC syndrome"}], "name": "NM_001042472.3(ABHD12):c.846_852dup (p.His285Ter)", "variationType": "VARIATION_TYPE_DUPLICATION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20797687", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "PHARC syndrome", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0012984"}]}, {"value": "Polyneuropathy-hearing loss-ataxia-retinitis pigmentosa-cataract syndrome", "type": "Alternate", "xrefs": [{"db": "Orphanet", "id": "171848"}]}, {"value": "Polyneuropathy, hearing loss, ataxia, retinitis pigmentosa, and cataract", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Polyneuropathy%2C+hearing+loss%2C+ataxia%2C+retinitis+pigmentosa%2C+and+cataract/9132"}]}], "symbols": [{"value": "PHARC", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "612674", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "171848"}, {"db": "MedGen", "id": "C2675204"}, {"db": "MONDO", "id": "MONDO:0012984"}, {"db": "OMIM", "id": "612674", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "17", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-10T00:00:00Z", "dateCreated": "2015-05-18T00:00:00Z", "mostRecentSubmission": "2015-05-18T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_20", "accession": "NC_000020.11", "start": 25307980, "stop": 25307981, "displayStart": 25307980, "displayStop": 25307981, "variantLength": 7, "positionVcf": 25307980, "referenceAlleleVcf": "G", "alternateAlleleVcf": "GGCTCTTA"}, "hgncIds": ["HGNC:15868"]} +{"accession": {"accession": "VCV000000051", "version": 2}, "rcvs": [{"accession": {"accession": "RCV000000068", "version": 4}, "title": "NM_022098.4(XPNPEP3):c.1357G>T (p.Gly453Cys) AND Nephronophthisis-like nephropathy 1"}], "name": "NM_022098.4(XPNPEP3):c.1357G>T (p.Gly453Cys)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20179356", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Nephronophthisis-like nephropathy 1", "type": "Preferred", "xrefs": [{"db": "Genetic Alliance", "id": "Nephronophthisis-like+nephropathy+1/8986"}, {"db": "MONDO", "id": "MONDO:0013163"}]}], "symbols": [{"value": "NPHPL1", "type": "Preferred", "xrefs": [{"db": "OMIM", "id": "613159", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "The nephronophthisis (NPH) phenotype is characterized by reduced renal concentrating ability, chronic tubulointerstitial nephritis, cystic renal disease, and progression to end-stage renal disease (ESRD) before age 30 years. Three age-based clinical subtypes are recognized: infantile, juvenile, and adolescent/adult. Infantile NPH can present in utero with oligohydramnios sequence (limb contractures, pulmonary hypoplasia, and facial dysmorphisms) or postnatally with renal manifestations that progress to ESRD before age 3 years. Juvenile NPH, the most prevalent subtype, typically presents with polydipsia and polyuria, growth retardation, chronic iron-resistant anemia, or other findings related to chronic kidney disease (CKD). Hypertension is typically absent due to salt wasting. ESRD develops at a median age of 13 years. Ultrasound findings are increased echogenicity, reduced corticomedullary differentiation, and renal cysts (in 50% of affected individuals). Histologic findings include tubulointerstitial fibrosis, thickened and disrupted tubular basement membrane, sporadic corticomedullary cysts, and normal or reduced kidney size. Adolescent/adult NPH is clinically similar to juvenile NPH, but ESRD develops at a median age of 19 years. Within a subtype, inter- and intrafamilial variability in rate of progression to ESRD is considerable. Approximately 80%-90% of individuals with the NPH phenotype have no extrarenal features (i.e., they have isolated NPH); ~10%-20% have extrarenal manifestations that constitute a recognizable syndrome (e.g., Joubert syndrome, Bardet-Biedl syndrome, Jeune syndrome and related skeletal disorders, Meckel-Gruber syndrome, Senior-L\u00f8ken syndrome, Leber congenital amaurosis, COACH syndrome, and oculomotor apraxia, Cogan type)."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK368475"}]}], "citations": [{"ids": [{"value": "27336129", "source": "PubMed"}, {"value": "NBK368475", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "655"}, {"db": "MedGen", "id": "C3150419"}, {"db": "MONDO", "id": "MONDO:0013163"}, {"db": "OMIM", "id": "613159", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "26", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-03-01T00:00:00Z", "dateCreated": "2021-08-11T00:00:00Z", "mostRecentSubmission": "2021-08-11T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_22", "accession": "NC_000022.11", "start": 40924482, "stop": 40924482, "displayStart": 40924482, "displayStop": 40924482, "variantLength": 1, "positionVcf": 40924482, "referenceAlleleVcf": "G", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:28052"]} +{"accession": {"accession": "VCV000000032", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000049", "version": 2}, "title": "NM_138413.4(HOGA1):c.700+4G>T AND Primary hyperoxaluria type 3"}], "name": "NM_138413.4(HOGA1):c.700+4G>T", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20797690", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Primary hyperoxaluria type 3", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0013327"}]}, {"value": "PH III", "type": "Alternate"}, {"value": "Primary hyperoxaluria, type III", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+Hyperoxaluria+Type+3/8596"}]}], "symbols": [{"value": "HP3", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613616", "type": "MIM"}]}, {"value": "HOGA1", "type": "Alternate"}, {"value": "PH3", "type": "Alternate"}], "attributes": [{"attribute": {"base": {"value": "loss of function", "integerValue": "273"}, "type": "disease mechanism"}, "xrefs": [{"db": "Genetic Testing Registry (GTR)", "id": "GTR000561373"}]}, {"attribute": {"base": {"integerValue": "10738"}, "type": "GARD id"}, "xrefs": [{"db": "Office of Rare Diseases", "id": "10738"}]}], "citations": [{"ids": [{"value": "26401545", "source": "PubMed"}, {"value": "NBK316514", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "416"}, {"db": "Orphanet", "id": "93600"}, {"db": "MedGen", "id": "C3150878"}, {"db": "MONDO", "id": "MONDO:0013327"}, {"db": "OMIM", "id": "613616", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "19", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-10T00:00:00Z", "dateCreated": "2013-04-04T00:00:00Z", "mostRecentSubmission": "2013-04-04T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_10", "accession": "NC_000010.11", "start": 97600167, "stop": 97600167, "displayStart": 97600167, "displayStop": 97600167, "variantLength": 1, "positionVcf": 97600167, "referenceAlleleVcf": "G", "alternateAlleleVcf": "T"}, "hgncIds": ["HGNC:25155"]} +{"accession": {"accession": "VCV000000042", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000059", "version": 5}, "title": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.671G>C (p.Trp224Ser)", "variationType": "VARIATION_TYPE_SNV", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2019-03-10T00:00:00Z", "mostRecentSubmission": "2019-03-10T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_19", "accession": "NC_000019.10", "start": 36067415, "stop": 36067415, "displayStart": 36067415, "displayStop": 36067415, "variantLength": 1, "positionVcf": 36067415, "referenceAlleleVcf": "G", "alternateAlleleVcf": "C"}, "hgncIds": ["HGNC:24502"]} +{"accession": {"accession": "VCV000000059", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000076", "version": 5}, "title": "NM_006642.5(SDCCAG8):c.1946_1949del (p.Cys649fs) AND Senior-Loken syndrome 7"}], "name": "NM_006642.5(SDCCAG8):c.1946_1949del (p.Cys649fs)", "variationType": "VARIATION_TYPE_MICROSATELLITE", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20835237", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "Senior-Loken syndrome 7", "type": "Preferred", "xrefs": [{"db": "Genetic Alliance", "id": "Senior-Loken+syndrome+7/9283"}, {"db": "MONDO", "id": "MONDO:0013326"}]}], "symbols": [{"value": "SLSN7", "type": "Preferred", "xrefs": [{"db": "OMIM", "id": "613615", "type": "MIM"}]}], "xrefs": [{"db": "Orphanet", "id": "3156"}, {"db": "MedGen", "id": "C3150877"}, {"db": "MONDO", "id": "MONDO:0013326"}, {"db": "OMIM", "id": "613615", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "29", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-10-01T00:00:00Z", "dateCreated": "2016-10-23T00:00:00Z", "mostRecentSubmission": "2016-10-23T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_1", "accession": "NC_000001.11", "start": 243426517, "stop": 243426520, "displayStart": 243426517, "displayStop": 243426520, "variantLength": 4, "positionVcf": 243426516, "referenceAlleleVcf": "AGTGT", "alternateAlleleVcf": "A"}, "hgncIds": ["HGNC:10671"]} +{"accession": {"accession": "VCV000000040", "version": 1}, "rcvs": [{"accession": {"accession": "RCV000000057", "version": 4}, "title": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs) AND Microcephaly 2, primary, autosomal recessive, with or without cortical malformations"}], "name": "NM_001083961.2(WDR62):c.4205_4208del (p.Val1402fs)", "variationType": "VARIATION_TYPE_DELETION", "classifications": {"germlineClassification": {"reviewStatus": "AGGREGATE_GERMLINE_REVIEW_STATUS_NO_ASSERTION_CRITERIA_PROVIDED", "description": "Pathogenic", "citations": [{"ids": [{"value": "20729831", "source": "PubMed"}], "type": "general"}], "conditions": [{"traits": [{"names": [{"value": "MICROCEPHALY 2, PRIMARY, AUTOSOMAL RECESSIVE, WITH CORTICAL MALFORMATIONS", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "613583.0005", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0003", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0004", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0009", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0001", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0010", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0013", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0002", "type": "Allelic variant"}, {"db": "OMIM", "id": "613583.0012", "type": "Allelic variant"}]}, {"value": "Primary autosomal recessive microcephaly 2", "type": "Alternate", "xrefs": [{"db": "Genetic Alliance", "id": "Primary+autosomal+recessive+microcephaly+2/9156"}]}, {"value": "Microcephaly 2, primary, autosomal recessive, with or without cortical malformations", "type": "Preferred", "xrefs": [{"db": "MONDO", "id": "MONDO:0011435"}]}], "symbols": [{"value": "MCPH2", "type": "Alternate", "xrefs": [{"db": "OMIM", "id": "604317", "type": "MIM"}]}], "attributes": [{"attribute": {"base": {"value": "In WDR62 primary microcephaly (WDR62-MCPH), microcephaly (occipitofrontal circumference [OFC] = -2 SD) is usually present at birth, but in some instances becomes evident later in the first year of life. Growth is otherwise normal. Except for brain malformations in most affected individuals, no other congenital malformations are observed. Central nervous system involvement can include delayed motor development, mild-to-severe intellectual disability (ID), behavior problems, epilepsy, spasticity, and ataxia."}, "type": "public definition"}, "xrefs": [{"db": "GeneReviews", "id": "NBK578067"}]}], "citations": [{"ids": [{"value": "35188728", "source": "PubMed"}, {"value": "NBK578067", "source": "BookShelf"}], "type": "review", "abbrev": "GeneReviews"}], "xrefs": [{"db": "Orphanet", "id": "2512"}, {"db": "MedGen", "id": "C1858535"}, {"db": "MONDO", "id": "MONDO:0011435"}, {"db": "OMIM", "id": "604317", "type": "MIM"}]}], "type": "TYPE_DISEASE", "id": "7167", "contributesToAggregateClassification": true}], "dateLastEvaluated": "2010-09-09T00:00:00Z", "dateCreated": "2015-09-17T00:00:00Z", "mostRecentSubmission": "2015-09-17T00:00:00Z", "numberOfSubmitters": 1, "numberOfSubmissions": 1}}, "sequenceLocation": {"forDisplay": true, "assembly": "GRCh38", "chr": "CHROMOSOME_19", "accession": "NC_000019.10", "start": 36104569, "stop": 36104572, "displayStart": 36104569, "displayStop": 36104572, "variantLength": 4, "positionVcf": 36104568, "referenceAlleleVcf": "GTGCC", "alternateAlleleVcf": "G"}, "hgncIds": ["HGNC:24502"]} diff --git a/tests/clinvar-genes/gene-frequency-report.jsonl b/tests/clinvar-genes/gene-frequency-report.jsonl index a9f2a997..35093279 100644 --- a/tests/clinvar-genes/gene-frequency-report.jsonl +++ b/tests/clinvar-genes/gene-frequency-report.jsonl @@ -1,10 +1,11 @@ -{"hgnc": "HGNC:5", "counts": {"benign": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertain": [25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} -{"hgnc": "HGNC:16", "counts": {"benign": [6, 0, 0, 0, 0, 0, 0, 2, 3, 3, 2, 1, 1, 0, 0, 2, 0, 0], "uncertain": [17, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]}} -{"hgnc": "HGNC:17", "counts": {"benign": [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertain": [18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} -{"hgnc": "HGNC:18", "counts": {"benign": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertain": [7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} -{"hgnc": "HGNC:19", "counts": {"benign": [1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0], "uncertain": [13, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} -{"hgnc": "HGNC:20", "counts": {"benign": [485, 0, 0, 0, 0, 44, 15, 25, 38, 26, 3, 4, 0, 16, 12, 4, 0, 0], "uncertain": [785, 0, 0, 0, 0, 33, 6, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [48, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} -{"hgnc": "HGNC:21", "counts": {"benign": [12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertain": [77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} -{"hgnc": "HGNC:23", "counts": {"benign": [216, 0, 0, 0, 0, 6, 1, 7, 9, 16, 4, 10, 6, 5, 8, 21, 0, 0], "uncertain": [304, 0, 0, 0, 0, 22, 3, 5, 6, 0, 0, 0, 0, 1, 0, 1, 0, 0], "pathogenic": [29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} -{"hgnc": "HGNC:29", "counts": {"benign": [677, 0, 0, 0, 0, 17, 22, 26, 37, 39, 17, 11, 18, 20, 31, 31, 0, 0], "uncertain": [421, 0, 0, 0, 0, 13, 2, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} -{"hgnc": "HGNC:30", "counts": {"benign": [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0], "uncertain": [54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "pathogenic": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}} +{"hgncId": "HGNC:5", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]} +{"hgncId": "HGNC:7", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [23, 0, 0, 0, 0, 3, 0, 1, 2, 3, 0, 1, 0, 0, 1, 1, 0, 0]} +{"hgncId": "HGNC:16", "pathogenicCounts": [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [26, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [20, 0, 0, 0, 0, 0, 0, 2, 3, 3, 2, 1, 1, 0, 0, 1, 0, 0]} +{"hgncId": "HGNC:17", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]} +{"hgncId": "HGNC:18", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]} +{"hgncId": "HGNC:19", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [17, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [2, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0]} +{"hgncId": "HGNC:20", "pathogenicCounts": [67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [650, 0, 0, 0, 0, 16, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [457, 0, 0, 0, 0, 24, 6, 8, 9, 18, 11, 13, 0, 15, 10, 2, 0, 0]} +{"hgncId": "HGNC:21", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [115, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]} +{"hgncId": "HGNC:23", "pathogenicCounts": [30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [255, 0, 0, 0, 0, 23, 4, 3, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0], "benignCounts": [245, 0, 0, 0, 0, 13, 4, 6, 5, 10, 4, 9, 4, 4, 8, 19, 1, 0]} +{"hgncId": "HGNC:29", "pathogenicCounts": [50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [443, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [511, 0, 0, 0, 0, 16, 7, 14, 13, 22, 21, 38, 28, 32, 35, 33, 1, 0]} +{"hgncId": "HGNC:30", "pathogenicCounts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "uncertainCounts": [85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "benignCounts": [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0]} diff --git a/tests/clinvar-genes/gene-variant-report.jsonl b/tests/clinvar-genes/gene-variant-report.jsonl index 958924d1..97f4a5d7 100644 --- a/tests/clinvar-genes/gene-variant-report.jsonl +++ b/tests/clinvar-genes/gene-variant-report.jsonl @@ -1,12 +1,13 @@ -{"hgnc": "HGNC:5", "counts": {"missense_variant": [0, 0, 25, 0, 0]}} -{"hgnc": "HGNC:16", "counts": {"frameshift_variant": [1, 1, 0, 0, 0], "intron_variant": [0, 1, 0, 0, 0], "missense_variant": [2, 7, 16, 0, 3], "stop_gained": [0, 0, 1, 0, 0], "synonymous_variant": [7, 1, 0, 0, 0]}} -{"hgnc": "HGNC:17", "counts": {"missense_variant": [0, 0, 18, 0, 0]}} -{"hgnc": "HGNC:18", "counts": {"missense_variant": [0, 0, 7, 0, 0]}} -{"hgnc": "HGNC:19", "counts": {"missense_variant": [1, 2, 14, 0, 0], "non_coding_transcript_variant": [2, 1, 0, 0, 0]}} -{"hgnc": "HGNC:20", "counts": {"3_prime_UTR_variant": [4, 2, 4, 0, 0], "5_prime_UTR_variant": [0, 4, 0, 0, 0], "frameshift_variant": [0, 0, 15, 2, 1], "inframe_indel": [0, 0, 4, 0, 0], "intron_variant": [45, 197, 44, 0, 0], "missense_variant": [20, 52, 689, 17, 19], "stop_gained": [0, 0, 18, 3, 5], "splice_acceptor_variant": [0, 0, 7, 0, 0], "splice_donor_variant": [0, 0, 4, 0, 0], "synonymous_variant": [37, 311, 30, 0, 0]}} -{"hgnc": "HGNC:21", "counts": {"5_prime_UTR_variant": [0, 0, 3, 0, 0], "missense_variant": [0, 8, 74, 0, 0]}} -{"hgnc": "HGNC:23", "counts": {"3_prime_UTR_variant": [33, 13, 59, 0, 0], "5_prime_UTR_variant": [2, 3, 14, 0, 0], "frameshift_variant": [0, 0, 1, 0, 1], "inframe_indel": [0, 0, 3, 0, 0], "intron_variant": [33, 147, 117, 3, 6], "missense_variant": [2, 5, 120, 1, 7], "stop_gained": [0, 0, 1, 0, 1], "splice_acceptor_variant": [0, 0, 0, 2, 0], "stop_lost": [0, 1, 0, 0, 0], "synonymous_variant": [6, 63, 10, 0, 0]}} -{"hgnc": "HGNC:29", "counts": {"5_prime_UTR_variant": [12, 1, 26, 0, 0], "frameshift_variant": [0, 0, 0, 0, 8], "inframe_indel": [0, 0, 1, 0, 1], "intron_variant": [169, 127, 45, 1, 0], "missense_variant": [70, 93, 340, 9, 13], "stop_gained": [0, 0, 0, 3, 5], "splice_acceptor_variant": [0, 0, 1, 0, 0], "splice_donor_variant": [0, 0, 0, 4, 1], "synonymous_variant": [108, 357, 29, 0, 0]}} -{"hgnc": "HGNC:30", "counts": {"frameshift_variant": [1, 0, 0, 0, 0], "missense_variant": [0, 2, 54, 0, 0], "splice_acceptor_variant": [1, 0, 0, 0, 0]}} -{"hgnc": "HGNC:32", "counts": {"3_prime_UTR_variant": [1, 0, 0, 0, 0], "frameshift_variant": [0, 0, 1, 1, 4], "inframe_indel": [0, 0, 1, 0, 0], "intron_variant": [16, 2, 3, 0, 0], "missense_variant": [2, 9, 129, 0, 0], "stop_gained": [0, 0, 0, 2, 2], "splice_donor_variant": [0, 0, 0, 1, 0], "synonymous_variant": [13, 19, 1, 0, 0]}} -{"hgnc": "HGNC:33", "counts": {"3_prime_UTR_variant": [1, 0, 20, 0, 0], "5_prime_UTR_variant": [8, 6, 20, 0, 0], "frameshift_variant": [0, 0, 1, 3, 12], "inframe_indel": [0, 0, 4, 0, 1], "intron_variant": [81, 57, 30, 0, 2], "missense_variant": [13, 37, 377, 22, 16], "stop_gained": [0, 0, 1, 0, 12], "no_sequence_alteration": [1, 0, 0, 0, 0], "splice_acceptor_variant": [0, 0, 0, 5, 2], "splice_donor_variant": [0, 0, 1, 1, 1], "synonymous_variant": [31, 149, 41, 1, 0]}} +{"hgncId": "HGNC:5", "impactCounts": [{"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countLikelyBenign": 2, "countUncertainSignificance": 41}]} +{"hgncId": "HGNC:7", "impactCounts": [{"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 1, "countLikelyBenign": 2, "countUncertainSignificance": 1}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 7, "countLikelyBenign": 9, "countUncertainSignificance": 58}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countBenign": 1}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countBenign": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 6, "countLikelyBenign": 8}]} +{"hgncId": "HGNC:16", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countLikelyBenign": 1}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countLikelyBenign": 1}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 1, "countLikelyBenign": 11, "countUncertainSignificance": 26, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countUncertainSignificance": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 8, "countLikelyBenign": 8}]} +{"hgncId": "HGNC:17", "impactCounts": [{"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countUncertainSignificance": 24}, {"geneImpact": "GENE_IMPACT_STOP_LOST", "countLikelyBenign": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countLikelyBenign": 1}]} +{"hgncId": "HGNC:18", "impactCounts": [{"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countUncertainSignificance": 15}]} +{"hgncId": "HGNC:19", "impactCounts": [{"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 1, "countLikelyBenign": 2, "countUncertainSignificance": 18}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 2, "countLikelyBenign": 2}]} +{"hgncId": "HGNC:20", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countUncertainSignificance": 3, "countLikelyPathogenic": 3, "countPathogenic": 16}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 36, "countLikelyBenign": 225, "countUncertainSignificance": 35}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 2, "countLikelyBenign": 15, "countUncertainSignificance": 603, "countLikelyPathogenic": 9, "countPathogenic": 6}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countUncertainSignificance": 5, "countPathogenic": 19}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countUncertainSignificance": 3, "countLikelyPathogenic": 2}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 5, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_STOP_LOST", "countUncertainSignificance": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 4, "countLikelyBenign": 262, "countUncertainSignificance": 11}]} +{"hgncId": "HGNC:21", "impactCounts": [{"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countLikelyBenign": 9, "countUncertainSignificance": 115}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countLikelyBenign": 1}]} +{"hgncId": "HGNC:23", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 3, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 19, "countLikelyBenign": 127, "countUncertainSignificance": 13}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 1, "countLikelyBenign": 2, "countUncertainSignificance": 208, "countLikelyPathogenic": 1, "countPathogenic": 6}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countUncertainSignificance": 1, "countLikelyPathogenic": 5, "countPathogenic": 3}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countLikelyPathogenic": 6, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 1, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_STOP_LOST", "countUncertainSignificance": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 7, "countLikelyBenign": 125, "countUncertainSignificance": 4}]} +{"hgncId": "HGNC:29", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countUncertainSignificance": 1, "countPathogenic": 12}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 143, "countLikelyBenign": 146, "countUncertainSignificance": 31, "countLikelyPathogenic": 1}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 15, "countLikelyBenign": 39, "countUncertainSignificance": 356, "countLikelyPathogenic": 3, "countPathogenic": 11}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countLikelyPathogenic": 4, "countPathogenic": 8}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 1, "countPathogenic": 2}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countLikelyPathogenic": 5, "countPathogenic": 2}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 14, "countLikelyBenign": 334, "countUncertainSignificance": 4}]} +{"hgncId": "HGNC:30", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countBenign": 1}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countLikelyBenign": 4, "countUncertainSignificance": 85}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countBenign": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countLikelyBenign": 1}]} +{"hgncId": "HGNC:32", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 1, "countPathogenic": 4}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 14, "countLikelyBenign": 11, "countUncertainSignificance": 3}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 3, "countLikelyBenign": 12, "countUncertainSignificance": 199}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countLikelyPathogenic": 2, "countPathogenic": 2}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countLikelyPathogenic": 1, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 15, "countLikelyBenign": 70, "countUncertainSignificance": 1}]} +{"hgncId": "HGNC:33", "impactCounts": [{"geneImpact": "GENE_IMPACT_FRAMESHIFT_VARIANT", "countUncertainSignificance": 1, "countLikelyPathogenic": 3, "countPathogenic": 22}, {"geneImpact": "GENE_IMPACT_INTRON_VARIANT", "countBenign": 55, "countLikelyBenign": 333, "countUncertainSignificance": 8, "countLikelyPathogenic": 1, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_MISSENSE_VARIANT", "countBenign": 2, "countLikelyBenign": 11, "countUncertainSignificance": 306, "countLikelyPathogenic": 14, "countPathogenic": 11}, {"geneImpact": "GENE_IMPACT_STOP_GAINED", "countUncertainSignificance": 1, "countPathogenic": 28}, {"geneImpact": "GENE_IMPACT_NO_SEQUENCE_ALTERATION", "countBenign": 1}, {"geneImpact": "GENE_IMPACT_SPLICE_ACCEPTOR_VARIANT", "countLikelyPathogenic": 4, "countPathogenic": 2}, {"geneImpact": "GENE_IMPACT_SPLICE_DONOR_VARIANT", "countLikelyPathogenic": 6, "countPathogenic": 1}, {"geneImpact": "GENE_IMPACT_SYNONYMOUS_VARIANT", "countBenign": 7, "countLikelyBenign": 530, "countUncertainSignificance": 4, "countLikelyPathogenic": 1}]}