Skip to content

Commit

Permalink
fix: more robust import of clinvar variants (#260)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Oct 18, 2023
1 parent 8318b94 commit 72c8267
Showing 1 changed file with 49 additions and 35 deletions.
84 changes: 49 additions & 35 deletions src/clinvar_minimal/cli/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,13 @@ fn jsonl_import(

for line in reader.lines() {
let line = line?;
let record = serde_json::from_str::<clinvar_minimal::cli::reading::Record>(&line)?;
let record = match serde_json::from_str::<clinvar_minimal::cli::reading::Record>(&line) {
Ok(record) => record,
Err(e) => {
tracing::warn!("skipping line because of error: {}", e);
continue;
}
};

let clinvar_minimal::cli::reading::Record {
rcv,
Expand Down Expand Up @@ -85,42 +91,50 @@ fn jsonl_import(
);
let key: Vec<u8> = var.into();

let record = if let Some(data) = db
let data = db
.get_cf(&cf_data, key.clone())
.map_err(|e| anyhow::anyhow!("problem querying database: {}", e))?
{
let mut record = clinvar_minimal::pbs::Record::decode(&data[..])?;
record
.reference_assertions
.push(clinvar_minimal::pbs::ReferenceAssertion {
rcv,
title,
clinical_significance: clinical_significance.into(),
review_status: review_status.into(),
});
record
.reference_assertions
.sort_by_key(|a| (a.clinical_significance, a.review_status));
record
} else {
clinvar_minimal::pbs::Record {
release: assembly,
chromosome: chr,
start,
stop,
reference: reference_allele_vcf,
alternative: alternate_allele_vcf,
vcv,
reference_assertions: vec![clinvar_minimal::pbs::ReferenceAssertion {
rcv,
title,
clinical_significance: clinical_significance.into(),
review_status: review_status.into(),
}],
.map_err(|e| anyhow::anyhow!("problem querying database: {}", e));
match data {
Err(e) => {
tracing::warn!("skipping line because of error: {}", e);
continue;
}
Ok(data) => {
let record = if let Some(data) = data {
let mut record = clinvar_minimal::pbs::Record::decode(&data[..])?;
record.reference_assertions.push(
clinvar_minimal::pbs::ReferenceAssertion {
rcv,
title,
clinical_significance: clinical_significance.into(),
review_status: review_status.into(),
},
);
record
.reference_assertions
.sort_by_key(|a| (a.clinical_significance, a.review_status));
record
} else {
clinvar_minimal::pbs::Record {
release: assembly,
chromosome: chr,
start,
stop,
reference: reference_allele_vcf,
alternative: alternate_allele_vcf,
vcv,
reference_assertions: vec![clinvar_minimal::pbs::ReferenceAssertion {
rcv,
title,
clinical_significance: clinical_significance.into(),
review_status: review_status.into(),
}],
}
};
let buf = record.encode_to_vec();
db.put_cf(&cf_data, key, buf)?;
}
};
let buf = record.encode_to_vec();
db.put_cf(&cf_data, key, buf)?;
}
}
}

Expand Down

0 comments on commit 72c8267

Please sign in to comment.