Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add exliquid and organoid queries #175

Merged
merged 3 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions resources/sql/EXLIQUID_SAMPLE_3LEVELS
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
Exliquid query for sites with 'legacy' exliquid specimen documentation (3 level hierarchy versus 'virtual' mother sample).
For current expected documentation see: https://wiki.verbis.dkfz.de/pages/viewpage.action?pageId=294716167.
*/
with t as (
select
(s.resource ->> 'id')::text s_id,
(s_coding ->> 'code')::text sample_type
from specimen s, jsonb_array_elements(s.resource -> 'type' -> 'coding') as s_coding
where s_coding ->> 'system' = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType'
),
t2 as (
SELECT
s_ali.resource ->> 'id' s_ali_id,
sample_type_ali.sample_type as s_ali_type,
(s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_amountrest,
s_ali_grp.resource ->> 'id' s_ali_grp_id,
sample_type_ali_grp.sample_type as s_ali_grp_type,
(s_ali_grp.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_grp_amountrest,
s_mother.resource ->> 'id' s_mother_id,
sample_type_mother.sample_type as s_mother_type,
(s_mother.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_mother_amountrest,
s_mother.resource -> 'subject' ->> 'reference' as patient_id
FROM specimen s_ali
JOIN specimen s_ali_grp ON (s_ali.resource->'parent'->0->>'reference')::text = (s_ali_grp.resource->>'resourceType')::text || '/' || (s_ali_grp.resource->>'id')::text
JOIN specimen s_mother ON (s_ali_grp.resource->'parent'->0->>'reference')::text = (s_mother.resource->>'resourceType')::text || '/' || (s_mother.resource->>'id')::text
join t as sample_type_ali on s_ali.resource ->> 'id' = sample_type_ali.s_id
join t as sample_type_ali_grp on s_ali_grp.resource ->> 'id' = sample_type_ali_grp.s_id
join t as sample_type_mother on s_mother.resource ->> 'id' = sample_type_mother.s_id
where (s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float > 0
),
t3 as (
select distinct
t2.patient_id,
c.resource -> 'code' -> 'coding' -> 0 ->> 'code' icd10_code,
c.resource -> 'code' ->> 'text' diag_desc,
t2.s_mother_type
from t2
join condition c on t2.patient_id = c.resource -> 'subject' ->> 'reference'
)
select icd10_code, diag_desc, count(distinct patient_id) patient_count, s_mother_type, count(s_mother_type) sample_count
from t3
group by icd10_code, diag_desc, patient_id, s_mother_type;
78 changes: 78 additions & 0 deletions resources/sql/SIORGP_PUBLIC_MAIN
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
SIorgP MetPredict project
The approach chosen here is to minimize the number of tasks generated and thus network traffic via Beam
=> one large query that returns the most necessary fields over multiple smaller queries
*/
with t as (
select
o.resource->'subject'->>'reference' as pat_ref,
o.resource->'code'->'coding'->0->>'code' as crf,
component->'code'->'coding'->0->>'code' AS code,
COALESCE(
component->'valueCodeableConcept'->'coding'->0->>'code',
component->>'valueDateTime',
component->'valueQuantity'->>'value',
component->>'valueString'
) AS value
FROM
observation o ,
jsonb_array_elements(o.resource->'component') AS component
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP%'
),
t2 AS (
select t.value as pat_pseudonym,
-- t.crf,
p.resource->>'gender' as gender,
p.resource->>'birthDate' as birth_date,
t5.value as organoid_id,
t2.value as location_primary_tumor,
t7.value as location_primary_tumor_precise,
t3.value as therapy,
t4.value as metastases_therapy,
t6.value::integer as age_at_enrollment
from t
left join t t2 on t.pat_ref = t2.pat_ref and t2.code='SIOP_LOCALISATION_PRIMARY_TUMOR'
left join t t3 on t.pat_ref = t3.pat_ref and t3.code='SIOP_NEOADJ_T_RECTAL_CARCINOMA'
left join t t4 on t.pat_ref = t4.pat_ref and t4.code='SIOP_NEOADJ_CTX_MET'
left join t t5 on t.pat_ref = t5.pat_ref and t5.code like 'SIOP_SAMPLE_M0%_PSEUDONYM'
left join t t6 on t.pat_ref = t6.pat_ref and t6.code='SIOP_AGE_STUDY_ENROLLMENT'
left join t t7 on t.pat_ref = t7.pat_ref and t7.code='SIOP_LOCALISATION_PRIMARY_TUMOR_COLON'
left join patient p on t.pat_ref = 'Patient/' || (p.resource->>'id')::text
where t.crf like 'SIOrgP - MetPredict - Visite 1%' and t.code = 'SIOP_PATIENT_PSEUDONYM'
),
t8 as (
select pat_pseudonym, count(distinct organoid_id) n_organoids
from t2
group by pat_pseudonym
)
-- patients having <= 3 organoids
select 'MetPredict' as project, 'pat_pdos_leq_3' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids <= 3) as value
union
-- patients having 4 organoids
select 'MetPredict' as project, 'pat_pdos_4' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 4) as value
union
-- patients having 5 organoids
select 'MetPredict' as project, 'pat_pdos_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 5) as value
union
-- patients having > 5 organoids
select 'MetPredict' as project, 'pat_pdos_gt_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids > 5) as value
union
-- the total number of patients
select 'MetPredict' as project, 'n_patients' as field, (select count(distinct pat_pseudonym) from t2) as value
union
-- the total number of organoids
select 'MetPredict' as project, 'n_organoids' as field, (select count(distinct organoid_id) from t2) as value
union
select 'MetPredict' as project, 'gender_male' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'male') as value
union
select 'MetPredict' as project, 'gender_female' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'female') as value
union
select 'MetPredict' as project, '<=30' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment <= 30) as value
union
select 'MetPredict' as project, '31-40' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 31 and age_at_enrollment <= 40) as value
union
select 'MetPredict' as project, '41-50' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 41 and age_at_enrollment <= 50) as value
union
select 'MetPredict' as project, '51-60' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 51 and age_at_enrollment <= 60) as value
union
select 'MetPredict' as project, '>=61' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 61) as value;
5 changes: 5 additions & 0 deletions resources/sql/SIORGP_PUBLIC_NPAT
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Test query. Number of patients that have a documented visit 1
select count(distinct p.resource)
from observation o
join patient p on o.resource->'subject'->>'reference' = 'Patient/' || (p.resource->>'id')::text
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 1%';
4 changes: 4 additions & 0 deletions resources/sql/SIORGP_PUBLIC_NVISIT2B
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Test query: Number of observations for visit 2b as a lower bound for the number of expected organoids
select count(o)
from observation o
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 2b%';
Loading