Skip to content

Commit

Permalink
feat: provide proto3 JSON serialization for prost structs (#301) (#314)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Nov 21, 2023
1 parent 95ee96e commit e3e7fa1
Show file tree
Hide file tree
Showing 159 changed files with 29,062 additions and 46,981 deletions.
40 changes: 40 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ noodles-core = "0.12"
noodles-csi = "0.28"
noodles-tabix = "0.34"
noodles-vcf = "0.47"
pbjson = "0.6"
pbjson-types = "0.6"
prost = "0.12"
rayon = "1.8"
rocksdb-utils-lookup = "0.3"
Expand All @@ -57,6 +59,8 @@ noodles-gff = "0.25.0"

[build-dependencies]
prost-build = "0.12"
pbjson-build = "0.6"
anyhow = "1.0"

[dev-dependencies]
insta = { version = "1.33", features = ["yaml"] }
Expand Down
113 changes: 50 additions & 63 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,67 +1,54 @@
// The custom build script, needed as we use flatbuffers.
// The custo build script, used to (1) generate the Rust classes for the
// protobuf implementation and (2) use pbjson for proto3 JSON serialization.

use std::{env, path::PathBuf};

fn main() -> Result<(), anyhow::Error> {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos");
let proto_files = vec![
"annonars/clinvar/minimal.proto",
"annonars/clinvar/per_gene.proto",
"annonars/clinvar/sv.proto",
"annonars/cons/base.proto",
"annonars/dbsnp/base.proto",
"annonars/functional/refseq.proto",
"annonars/genes/base.proto",
"annonars/gnomad/exac_cnv.proto",
"annonars/gnomad/gnomad2.proto",
"annonars/gnomad/gnomad3.proto",
"annonars/gnomad/gnomad_cnv4.proto",
"annonars/gnomad/gnomad_sv2.proto",
"annonars/gnomad/gnomad_sv4.proto",
"annonars/gnomad/mtdna.proto",
"annonars/gnomad/vep_common.proto",
"annonars/gnomad/vep_gnomad2.proto",
"annonars/gnomad/vep_gnomad3.proto",
"annonars/helixmtdb/base.proto",
]
.iter()
.map(|f| root.join(f))
.collect::<Vec<_>>();

// Tell cargo to recompile if any of these proto files are changed
for proto_file in &proto_files {
println!("cargo:rerun-if-changed={}", proto_file.display());
}

let descriptor_path = PathBuf::from(env::var("OUT_DIR").unwrap()).join("proto_descriptor.bin");

fn main() {
println!("cargo:rerun-if-changed=src/proto/annonars/clinvar/v1/minimal.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/clinvar/v1/per_gene.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/clinvar/v1/sv.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/cons/v1/base.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/dbsnp/v1/base.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/functional/v1/refseq.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gene/v1/base.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/exac_cnv.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad2.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad3.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad_cnv4.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad_sv2.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/gnomad_sv4.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/mtdna.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/vep_common.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/vep_gnomad2.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/gnomad/v1/vep_gnomad3.proto");
println!("cargo:rerun-if-changed=src/proto/annonars/helixmtdb/v1/base.proto");
prost_build::Config::new()
.protoc_arg("-Isrc/proto")
// Add serde serialization and deserialization to the generated code.
.type_attribute(".", "#[derive(serde::Serialize, serde::Deserialize)]")
// Skip serializing `None` values.
.type_attribute(".", "#[serde_with::skip_serializing_none]")
// Rename the field attributes such that we can properly decode
// ucsc-annotation TSV file with serde.
.field_attribute(
"annonars.cons.v1.base.Record.chrom",
"#[serde(rename = \"chromosome\")]",
)
.field_attribute(
"annonars.cons.v1.base.Record.begin",
"#[serde(rename = \"start\")]",
)
.field_attribute(
"annonars.cons.v1.base..Record.end",
"#[serde(rename = \"stop\")]",
)
// Save descriptors to file
.file_descriptor_set_path(&descriptor_path)
// Override prost-types with pbjson-types
.compile_well_known_types()
.extern_path(".google.protobuf", "::pbjson_types")
// Define the protobuf files to compile.
.compile_protos(
&[
"annonars/clinvar/v1/minimal.proto",
"annonars/clinvar/v1/per_gene.proto",
"annonars/clinvar/v1/sv.proto",
"annonars/cons/v1/base.proto",
"annonars/dbsnp/v1/base.proto",
"annonars/gene/v1/base.proto",
"annonars/gnomad/v1/exac_cnv.proto",
"annonars/functional/v1/refseq.proto",
"annonars/gnomad/v1/gnomad2.proto",
"annonars/gnomad/v1/gnomad3.proto",
"annonars/gnomad/v1/gnomad_cnv4.proto",
"annonars/gnomad/v1/gnomad_sv2.proto",
"annonars/gnomad/v1/gnomad_sv4.proto",
"annonars/gnomad/v1/mtdna.proto",
"annonars/gnomad/v1/vep_common.proto",
"annonars/gnomad/v1/vep_gnomad2.proto",
"annonars/gnomad/v1/vep_gnomad3.proto",
"annonars/helixmtdb/v1/base.proto",
],
&["src/"],
)
.unwrap();
.compile_protos(&proto_files, &[root])?;

let descriptor_set = std::fs::read(descriptor_path).unwrap();
pbjson_build::Builder::new()
.register_descriptors(&descriptor_set)?
.build(&[".annonars"])?;

Ok(())
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

syntax = "proto3";

package annonars.clinvar.v1.minimal;
package annonars.clinvar.minimal;

// Enumeration for variant type.
enum VariantType {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

syntax = "proto3";

package annonars.clinvar.v1.per_gene;
package annonars.clinvar.per_gene;

import "annonars/clinvar/v1/minimal.proto";
import "annonars/clinvar/minimal.proto";

// Impact on gene in ClinVar.
enum Impact {
Expand Down Expand Up @@ -73,7 +73,7 @@ message GeneVariantsForRelease {
// Genome release / assembly
string genome_release = 1;
// Variants
repeated annonars.clinvar.v1.minimal.Record variants = 2;
repeated annonars.clinvar.minimal.Record variants = 2;
}

// ClinVar detailed information per gene.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

syntax = "proto3";

package annonars.clinvar.v1.sv;
package annonars.clinvar.sv;

import "annonars/clinvar/v1/minimal.proto";
import "annonars/clinvar/minimal.proto";

// Record for storing minimal information on ClinVar for Mehari.
message Record {
Expand Down Expand Up @@ -32,10 +32,10 @@ message Record {
optional uint32 outer_stop = 10;

// The variant type.
annonars.clinvar.v1.minimal.VariantType variant_type = 11;
annonars.clinvar.minimal.VariantType variant_type = 11;

// VCV accession identifier.
string vcv = 12;
// The reference assertions, sorted by (ClinicalSignificance, ReviewStatus).
repeated annonars.clinvar.v1.minimal.ReferenceAssertion reference_assertions = 13;
repeated annonars.clinvar.minimal.ReferenceAssertion reference_assertions = 13;
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
syntax = "proto3";

package annonars.cons.v1.base;
package annonars.cons.base;

// List of `Record`s.
message RecordList {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
syntax = "proto3";

package annonars.dbsnp.v1.base;
package annonars.dbsnp.base;

// Protocol buffer for the dbSNP VCF record.
message Record {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

syntax = "proto3";

package annonars.functional.v1.refseq;
package annonars.functional.refseq;

// Enumeration for `gbkey` field.
enum Category {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
syntax = "proto3";

package annonars.gene.v1.base;
package annonars.genes.base;

// Information from ACMG secondary findings list.
message AcmgSecondaryFindingRecord {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

syntax = "proto3";

package annonars.gnomad.v1.exac_cnv;
package annonars.gnomad.exac_cnv;

// Enumeration for the CNV type.
enum CnvType {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
syntax = "proto3";

package annonars.gnomad.v1.gnomad2;
package annonars.gnomad.gnomad2;

import "annonars/gnomad/v1/vep_gnomad2.proto";
import "annonars/gnomad/vep_gnomad2.proto";

// Protocol buffer enum for site-level filters.
enum Filter {
Expand Down Expand Up @@ -226,7 +226,7 @@ message Record {
// Site-level filters.
repeated Filter filters = 5;
// VEP annotation records.
repeated annonars.gnomad.v1.vep_gnomad2.Vep vep = 6;
repeated annonars.gnomad.vep_gnomad2.Vep vep = 6;

// Variant allele counts in the different cohorts and population.
//
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
syntax = "proto3";

package annonars.gnomad.v1.gnomad3;
package annonars.gnomad.gnomad3;

import "annonars/gnomad/v1/vep_gnomad3.proto";
import "annonars/gnomad/vep_gnomad3.proto";

// Protocol buffer enum for site-level filters.
enum Filter {
Expand Down Expand Up @@ -219,7 +219,7 @@ message Record {
// Site-level filters.
repeated Filter filters = 5;
// VEP annotation records.
repeated annonars.gnomad.v1.vep_gnomad3.Vep vep = 6;
repeated annonars.gnomad.vep_gnomad3.Vep vep = 6;

// Variant allele counts in the different cohorts and population.
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

syntax = "proto3";

package annonars.gnomad.v1.gnomad_cnv4;
package annonars.gnomad.gnomad_cnv4;

import "annonars/gnomad/v1/exac_cnv.proto";
import "annonars/gnomad/exac_cnv.proto";


// Carrier counts
Expand Down Expand Up @@ -80,7 +80,7 @@ message Record {
// Length of the SV.
int32 sv_len = 9;
// The type of the variant.
annonars.gnomad.v1.exac_cnv.CnvType sv_type = 10;
annonars.gnomad.exac_cnv.CnvType sv_type = 10;

// Median number of callable exons spanned by variant passing QS
// threshold.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

syntax = "proto3";

package annonars.gnomad.v1.gnomad_sv2;
package annonars.gnomad.gnomad_sv2;

// Protocol buffer enum for site-level filters.
enum Filter {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

syntax = "proto3";

package annonars.gnomad.v1.gnomad_sv4;
package annonars.gnomad.gnomad_sv4;

import "annonars/gnomad/v1/gnomad_sv2.proto";
import "annonars/gnomad/gnomad_sv2.proto";

// Enumeration for filters.
enum Filter {
Expand Down Expand Up @@ -174,7 +174,7 @@ message Record {
// SV Type.
SvType sv_type = 8;
// Refined complex type.
optional annonars.gnomad.v1.gnomad_sv2.CpxType cpx_type = 9;
optional annonars.gnomad.gnomad_sv2.CpxType cpx_type = 9;

// Variant allele counts in the different cohorts and population.
repeated CohortAlleleCounts allele_counts = 10;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
syntax = "proto3";

package annonars.gnomad.v1.mtdna;
package annonars.gnomad.mtdna;

import "annonars/gnomad/v1/vep_gnomad3.proto";
import "annonars/gnomad/vep_gnomad3.proto";

// Protocol buffer enum for site-level filters.
enum Filter {
Expand Down Expand Up @@ -218,7 +218,7 @@ message Record {
optional string pon_ml_probability_of_pathogenicity = 17;

// VEP v3 annotation records.
repeated annonars.gnomad.v1.vep_gnomad3.Vep vep = 13;
repeated annonars.gnomad.vep_gnomad3.Vep vep = 13;
// Summary information for variant quality interpretation.
optional QualityInfo quality_info = 18;
// Information related to heteroplasmy levels.
Expand Down
Loading

0 comments on commit e3e7fa1

Please sign in to comment.