From e3b3a316bca97b5fdf6e3adfe3c0200e50903344 Mon Sep 17 00:00:00 2001 From: Matt Garber Date: Mon, 1 Apr 2024 13:53:54 -0400 Subject: [PATCH 1/4] v2 cutover --- cumulus_library_covid/covid_symptom/counts.py | 4 +- .../covid_symptom/counts.sql | 1193 +++++++++++++---- .../covid_symptom/table_dx.sql | 6 +- .../covid_symptom/table_pcr.sql | 14 +- .../covid_symptom/table_study_period.sql | 16 +- .../covid_symptom/table_symptom.sql | 4 +- .../covid_symptom/version.sql | 2 +- pyproject.toml | 6 +- 8 files changed, 949 insertions(+), 296 deletions(-) diff --git a/cumulus_library_covid/covid_symptom/counts.py b/cumulus_library_covid/covid_symptom/counts.py index 78b914f..5780652 100644 --- a/cumulus_library_covid/covid_symptom/counts.py +++ b/cumulus_library_covid/covid_symptom/counts.py @@ -1,5 +1,5 @@ from pathlib import Path -from cumulus_library.schema.counts import CountsBuilder +from cumulus_library.statistics.counts import CountsBuilder class CovidCountsBuilder(CountsBuilder): @@ -98,7 +98,7 @@ def count_symptom(self, duration="week"): ] return self.count_encounter(view_name, from_table, cols) - def prepare_queries(self, cursor=None, schema=None): + def prepare_queries(self, cursor=None, schema=None, **kwargs): self.queries = [ self.count_dx("month"), self.count_dx("week"), diff --git a/cumulus_library_covid/covid_symptom/counts.sql b/cumulus_library_covid/covid_symptom/counts.sql index 0c20afe..c42dcb4 100644 --- a/cumulus_library_covid/covid_symptom/counts.sql +++ b/cumulus_library_covid/covid_symptom/counts.sql @@ -1,332 +1,862 @@ --- noqa: disable=all - CREATE TABLE covid_symptom__count_dx_month AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."cond_month", + s."enc_class_display", + s."age_at_visit", + s."ed_note", + s."variant_era" + --noqa: enable=RF03, AL02 + FROM covid_symptom__dx AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + subject_ref, + encounter_ref, + coalesce( + cast(cond_month AS varchar), + 'cumulus__none' + ) AS cond_month, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(ed_note AS varchar), + 'cumulus__none' + ) AS ed_note, + coalesce( + cast(variant_era AS varchar), + 'cumulus__none' + ) AS variant_era + FROM filtered_table + ), + secondary_powerset AS ( + SELECT + count(DISTINCT encounter_ref) AS cnt_encounter_ref, "cond_month", - "enc_class_code", + "enc_class_display", + "age_at_visit", + "ed_note", + "variant_era", + concat_ws( + '-', + COALESCE("cond_month",''), + COALESCE("enc_class_display",''), + COALESCE("age_at_visit",''), + COALESCE("ed_note",''), + COALESCE("variant_era",'') + ) AS id + FROM null_replacement + GROUP BY + cube( + "cond_month", + "enc_class_display", "age_at_visit", "ed_note", "variant_era" - FROM covid_symptom__dx + ) + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, + "cond_month", + "enc_class_display", + "age_at_visit", + "ed_note", + "variant_era", + concat_ws( + '-', + COALESCE("cond_month",''), + COALESCE("enc_class_display",''), + COALESCE("age_at_visit",''), + COALESCE("ed_note",''), + COALESCE("variant_era",'') + ) AS id + FROM null_replacement GROUP BY cube( - "cond_month", - "enc_class_code", - "age_at_visit", - "ed_note", - "variant_era" + "cond_month", + "enc_class_display", + "age_at_visit", + "ed_note", + "variant_era" ) ) SELECT - cnt_encounter AS cnt, - "cond_month", - "enc_class_code", - "age_at_visit", - "ed_note", - "variant_era" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."cond_month", + p."enc_class_display", + p."age_at_visit", + p."ed_note", + p."variant_era" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); - --- ########################################################### - CREATE TABLE covid_symptom__count_dx_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."cond_week", + s."enc_class_display", + s."age_at_visit", + s."ed_note", + s."variant_era" + --noqa: enable=RF03, AL02 + FROM covid_symptom__dx AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + subject_ref, + encounter_ref, + coalesce( + cast(cond_week AS varchar), + 'cumulus__none' + ) AS cond_week, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(ed_note AS varchar), + 'cumulus__none' + ) AS ed_note, + coalesce( + cast(variant_era AS varchar), + 'cumulus__none' + ) AS variant_era + FROM filtered_table + ), + secondary_powerset AS ( + SELECT + count(DISTINCT encounter_ref) AS cnt_encounter_ref, "cond_week", - "enc_class_code", + "enc_class_display", "age_at_visit", "ed_note", - "variant_era" - FROM covid_symptom__dx + "variant_era", + concat_ws( + '-', + COALESCE("cond_week",''), + COALESCE("enc_class_display",''), + COALESCE("age_at_visit",''), + COALESCE("ed_note",''), + COALESCE("variant_era",'') + ) AS id + FROM null_replacement GROUP BY cube( - "cond_week", - "enc_class_code", - "age_at_visit", - "ed_note", - "variant_era" + "cond_week", + "enc_class_display", + "age_at_visit", + "ed_note", + "variant_era" ) - ) - - SELECT - cnt_encounter AS cnt, - "cond_week", - "enc_class_code", - "age_at_visit", - "ed_note", - "variant_era" - FROM powerset - WHERE - cnt_subject >= 10 - -); + ), --- ########################################################### - -CREATE TABLE covid_symptom__count_pcr_month AS ( - WITH powerset AS ( + powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, - "covid_pcr_month", - "covid_pcr_result_display", - "variant_era", - "ed_note", + count(DISTINCT subject_ref) AS cnt_subject_ref, + "cond_week", + "enc_class_display", "age_at_visit", - "gender", - "race_display" - FROM covid_symptom__pcr + "ed_note", + "variant_era", + concat_ws( + '-', + COALESCE("cond_week",''), + COALESCE("enc_class_display",''), + COALESCE("age_at_visit",''), + COALESCE("ed_note",''), + COALESCE("variant_era",'') + ) AS id + FROM null_replacement GROUP BY cube( - "covid_pcr_month", - "covid_pcr_result_display", - "variant_era", - "ed_note", - "age_at_visit", - "gender", - "race_display" + "cond_week", + "enc_class_display", + "age_at_visit", + "ed_note", + "variant_era" ) ) SELECT - cnt_encounter AS cnt, - "covid_pcr_month", - "covid_pcr_result_display", - "variant_era", - "ed_note", - "age_at_visit", - "gender", - "race_display" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."cond_week", + p."enc_class_display", + p."age_at_visit", + p."ed_note", + p."variant_era" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); - --- ########################################################### - -CREATE TABLE covid_symptom__count_pcr_week AS ( - WITH powerset AS ( +CREATE TABLE covid_symptom__count_study_period_month AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."start_month", + s."variant_era", + s."ed_note", + s."gender", + s."age_group", + s."race_display" + --noqa: enable=RF03, AL02 + FROM covid_symptom__study_period AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, - "covid_pcr_week", - "covid_pcr_result_display", + subject_ref, + encounter_ref, + coalesce( + cast(start_month AS varchar), + 'cumulus__none' + ) AS start_month, + coalesce( + cast(variant_era AS varchar), + 'cumulus__none' + ) AS variant_era, + coalesce( + cast(ed_note AS varchar), + 'cumulus__none' + ) AS ed_note, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(age_group AS varchar), + 'cumulus__none' + ) AS age_group, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display + FROM filtered_table + ), + secondary_powerset AS ( + SELECT + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "start_month", "variant_era", "ed_note", - "age_at_visit", "gender", - "race_display" - FROM covid_symptom__pcr + "age_group", + "race_display", + concat_ws( + '-', + COALESCE("start_month",''), + COALESCE("variant_era",''), + COALESCE("ed_note",''), + COALESCE("gender",''), + COALESCE("age_group",''), + COALESCE("race_display",'') + ) AS id + FROM null_replacement GROUP BY cube( - "covid_pcr_week", - "covid_pcr_result_display", - "variant_era", - "ed_note", - "age_at_visit", - "gender", - "race_display" + "start_month", + "variant_era", + "ed_note", + "gender", + "age_group", + "race_display" ) - ) + ), - SELECT - cnt_encounter AS cnt, - "covid_pcr_week", - "covid_pcr_result_display", - "variant_era", - "ed_note", - "age_at_visit", - "gender", - "race_display" - FROM powerset - WHERE - cnt_subject >= 10 - -); - --- ########################################################### - -CREATE TABLE covid_symptom__count_study_period_month AS ( - WITH powerset AS ( + powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + count(DISTINCT subject_ref) AS cnt_subject_ref, "start_month", "variant_era", "ed_note", "gender", "age_group", - "race_display" - FROM covid_symptom__study_period + "race_display", + concat_ws( + '-', + COALESCE("start_month",''), + COALESCE("variant_era",''), + COALESCE("ed_note",''), + COALESCE("gender",''), + COALESCE("age_group",''), + COALESCE("race_display",'') + ) AS id + FROM null_replacement GROUP BY cube( - "start_month", - "variant_era", - "ed_note", - "gender", - "age_group", - "race_display" + "start_month", + "variant_era", + "ed_note", + "gender", + "age_group", + "race_display" ) ) SELECT - cnt_encounter AS cnt, - "start_month", - "variant_era", - "ed_note", - "gender", - "age_group", - "race_display" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."start_month", + p."variant_era", + p."ed_note", + p."gender", + p."age_group", + p."race_display" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); - --- ########################################################### - CREATE TABLE covid_symptom__count_study_period_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."start_week", + s."variant_era", + s."ed_note", + s."gender", + s."age_group", + s."race_display" + --noqa: enable=RF03, AL02 + FROM covid_symptom__study_period AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + subject_ref, + encounter_ref, + coalesce( + cast(start_week AS varchar), + 'cumulus__none' + ) AS start_week, + coalesce( + cast(variant_era AS varchar), + 'cumulus__none' + ) AS variant_era, + coalesce( + cast(ed_note AS varchar), + 'cumulus__none' + ) AS ed_note, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(age_group AS varchar), + 'cumulus__none' + ) AS age_group, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display + FROM filtered_table + ), + secondary_powerset AS ( + SELECT + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "start_week", + "variant_era", + "ed_note", + "gender", + "age_group", + "race_display", + concat_ws( + '-', + COALESCE("start_week",''), + COALESCE("variant_era",''), + COALESCE("ed_note",''), + COALESCE("gender",''), + COALESCE("age_group",''), + COALESCE("race_display",'') + ) AS id + FROM null_replacement + GROUP BY + cube( "start_week", "variant_era", "ed_note", "gender", "age_group", "race_display" - FROM covid_symptom__study_period + ) + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, + "start_week", + "variant_era", + "ed_note", + "gender", + "age_group", + "race_display", + concat_ws( + '-', + COALESCE("start_week",''), + COALESCE("variant_era",''), + COALESCE("ed_note",''), + COALESCE("gender",''), + COALESCE("age_group",''), + COALESCE("race_display",'') + ) AS id + FROM null_replacement GROUP BY cube( - "start_week", - "variant_era", - "ed_note", - "gender", - "age_group", - "race_display" + "start_week", + "variant_era", + "ed_note", + "gender", + "age_group", + "race_display" ) ) SELECT - cnt_encounter AS cnt, - "start_week", - "variant_era", - "ed_note", - "gender", - "age_group", - "race_display" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."start_week", + p."variant_era", + p."ed_note", + p."gender", + p."age_group", + p."race_display" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); - --- ########################################################### - CREATE TABLE covid_symptom__count_symptom_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."author_week", + s."symptom_display", + s."variant_era", + s."age_group", + s."gender", + s."race_display", + s."enc_class_display", + s."ed_note" + --noqa: enable=RF03, AL02 + FROM covid_symptom__symptom_ctakes_negation AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + subject_ref, + encounter_ref, + coalesce( + cast(author_week AS varchar), + 'cumulus__none' + ) AS author_week, + coalesce( + cast(symptom_display AS varchar), + 'cumulus__none' + ) AS symptom_display, + coalesce( + cast(variant_era AS varchar), + 'cumulus__none' + ) AS variant_era, + coalesce( + cast(age_group AS varchar), + 'cumulus__none' + ) AS age_group, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(ed_note AS varchar), + 'cumulus__none' + ) AS ed_note + FROM filtered_table + ), + secondary_powerset AS ( + SELECT + count(DISTINCT encounter_ref) AS cnt_encounter_ref, "author_week", "symptom_display", "variant_era", "age_group", "gender", "race_display", - "enc_class_code", + "enc_class_display", + "ed_note", + concat_ws( + '-', + COALESCE("author_week",''), + COALESCE("symptom_display",''), + COALESCE("variant_era",''), + COALESCE("age_group",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("enc_class_display",''), + COALESCE("ed_note",'') + ) AS id + FROM null_replacement + GROUP BY + cube( + "author_week", + "symptom_display", + "variant_era", + "age_group", + "gender", + "race_display", + "enc_class_display", "ed_note" - FROM covid_symptom__symptom_ctakes_negation + ) + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, + "author_week", + "symptom_display", + "variant_era", + "age_group", + "gender", + "race_display", + "enc_class_display", + "ed_note", + concat_ws( + '-', + COALESCE("author_week",''), + COALESCE("symptom_display",''), + COALESCE("variant_era",''), + COALESCE("age_group",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("enc_class_display",''), + COALESCE("ed_note",'') + ) AS id + FROM null_replacement GROUP BY cube( - "author_week", - "symptom_display", - "variant_era", - "age_group", - "gender", - "race_display", - "enc_class_code", - "ed_note" + "author_week", + "symptom_display", + "variant_era", + "age_group", + "gender", + "race_display", + "enc_class_display", + "ed_note" ) ) SELECT - cnt_encounter AS cnt, - "author_week", - "symptom_display", - "variant_era", - "age_group", - "gender", - "race_display", - "enc_class_code", - "ed_note" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."author_week", + p."symptom_display", + p."variant_era", + p."age_group", + p."gender", + p."race_display", + p."enc_class_display", + p."ed_note" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); - --- ########################################################### - CREATE TABLE covid_symptom__count_symptom_month AS ( - WITH powerset AS ( + WITH + filtered_table AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."author_month", + s."symptom_display", + s."variant_era", + s."age_group", + s."gender", + s."race_display", + s."enc_class_display", + s."ed_note" + --noqa: enable=RF03, AL02 + FROM covid_symptom__symptom_ctakes_negation AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(author_month AS varchar), + 'cumulus__none' + ) AS author_month, + coalesce( + cast(symptom_display AS varchar), + 'cumulus__none' + ) AS symptom_display, + coalesce( + cast(variant_era AS varchar), + 'cumulus__none' + ) AS variant_era, + coalesce( + cast(age_group AS varchar), + 'cumulus__none' + ) AS age_group, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(ed_note AS varchar), + 'cumulus__none' + ) AS ed_note + FROM filtered_table + ), + secondary_powerset AS ( + SELECT + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "author_month", + "symptom_display", + "variant_era", + "age_group", + "gender", + "race_display", + "enc_class_display", + "ed_note", + concat_ws( + '-', + COALESCE("author_month",''), + COALESCE("symptom_display",''), + COALESCE("variant_era",''), + COALESCE("age_group",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("enc_class_display",''), + COALESCE("ed_note",'') + ) AS id + FROM null_replacement + GROUP BY + cube( "author_month", "symptom_display", "variant_era", "age_group", "gender", "race_display", - "enc_class_code", + "enc_class_display", "ed_note" - FROM covid_symptom__symptom_ctakes_negation + ) + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, + "author_month", + "symptom_display", + "variant_era", + "age_group", + "gender", + "race_display", + "enc_class_display", + "ed_note", + concat_ws( + '-', + COALESCE("author_month",''), + COALESCE("symptom_display",''), + COALESCE("variant_era",''), + COALESCE("age_group",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("enc_class_display",''), + COALESCE("ed_note",'') + ) AS id + FROM null_replacement GROUP BY cube( - "author_month", - "symptom_display", - "variant_era", - "age_group", - "gender", - "race_display", - "enc_class_code", - "ed_note" + "author_month", + "symptom_display", + "variant_era", + "age_group", + "gender", + "race_display", + "enc_class_display", + "ed_note" ) ) SELECT - cnt_encounter AS cnt, - "author_month", - "symptom_display", - "variant_era", - "age_group", - "gender", - "race_display", - "enc_class_code", - "ed_note" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."author_month", + p."symptom_display", + p."variant_era", + p."age_group", + p."gender", + p."race_display", + p."enc_class_display", + p."ed_note" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); - --- ########################################################### - CREATE TABLE covid_symptom__count_prevalence_ed_month AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."author_month", + s."covid_dx", + s."covid_icd10", + s."covid_pcr_result", + s."covid_symptom", + s."symptom_icd10_display", + s."variant_era", + s."age_group", + s."enc_class_display" + --noqa: enable=RF03, AL02 + FROM covid_symptom__prevalence_ed AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(author_month AS varchar), + 'cumulus__none' + ) AS author_month, + coalesce( + cast(covid_dx AS varchar), + 'cumulus__none' + ) AS covid_dx, + coalesce( + cast(covid_icd10 AS varchar), + 'cumulus__none' + ) AS covid_icd10, + coalesce( + cast(covid_pcr_result AS varchar), + 'cumulus__none' + ) AS covid_pcr_result, + coalesce( + cast(covid_symptom AS varchar), + 'cumulus__none' + ) AS covid_symptom, + coalesce( + cast(symptom_icd10_display AS varchar), + 'cumulus__none' + ) AS symptom_icd10_display, + coalesce( + cast(variant_era AS varchar), + 'cumulus__none' + ) AS variant_era, + coalesce( + cast(age_group AS varchar), + 'cumulus__none' + ) AS age_group, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display + FROM filtered_table + ), + secondary_powerset AS ( + SELECT + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "author_month", + "covid_dx", + "covid_icd10", + "covid_pcr_result", + "covid_symptom", + "symptom_icd10_display", + "variant_era", + "age_group", + "enc_class_display", + concat_ws( + '-', + COALESCE("author_month",''), + COALESCE("covid_dx",''), + COALESCE("covid_icd10",''), + COALESCE("covid_pcr_result",''), + COALESCE("covid_symptom",''), + COALESCE("symptom_icd10_display",''), + COALESCE("variant_era",''), + COALESCE("age_group",''), + COALESCE("enc_class_display",'') + ) AS id + FROM null_replacement + GROUP BY + cube( + "author_month", + "covid_dx", + "covid_icd10", + "covid_pcr_result", + "covid_symptom", + "symptom_icd10_display", + "variant_era", + "age_group", + "enc_class_display" + ) + ), + + powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + count(DISTINCT subject_ref) AS cnt_subject_ref, "author_month", "covid_dx", "covid_icd10", @@ -335,45 +865,116 @@ CREATE TABLE covid_symptom__count_prevalence_ed_month AS ( "symptom_icd10_display", "variant_era", "age_group", - "enc_class_code" - FROM covid_symptom__prevalence_ed + "enc_class_display", + concat_ws( + '-', + COALESCE("author_month",''), + COALESCE("covid_dx",''), + COALESCE("covid_icd10",''), + COALESCE("covid_pcr_result",''), + COALESCE("covid_symptom",''), + COALESCE("symptom_icd10_display",''), + COALESCE("variant_era",''), + COALESCE("age_group",''), + COALESCE("enc_class_display",'') + ) AS id + FROM null_replacement GROUP BY cube( - "author_month", - "covid_dx", - "covid_icd10", - "covid_pcr_result", - "covid_symptom", - "symptom_icd10_display", - "variant_era", - "age_group", - "enc_class_code" + "author_month", + "covid_dx", + "covid_icd10", + "covid_pcr_result", + "covid_symptom", + "symptom_icd10_display", + "variant_era", + "age_group", + "enc_class_display" ) ) SELECT - cnt_encounter AS cnt, - "author_month", - "covid_dx", - "covid_icd10", - "covid_pcr_result", - "covid_symptom", - "symptom_icd10_display", - "variant_era", - "age_group", - "enc_class_code" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."author_month", + p."covid_dx", + p."covid_icd10", + p."covid_pcr_result", + p."covid_symptom", + p."symptom_icd10_display", + p."variant_era", + p."age_group", + p."enc_class_display" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); - --- ########################################################### - CREATE TABLE covid_symptom__count_prevalence_ed_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."author_week", + s."covid_dx", + s."covid_icd10", + s."covid_pcr_result", + s."covid_symptom", + s."symptom_icd10_display", + s."variant_era", + s."age_group", + s."enc_class_display" + --noqa: enable=RF03, AL02 + FROM covid_symptom__prevalence_ed AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(author_week AS varchar), + 'cumulus__none' + ) AS author_week, + coalesce( + cast(covid_dx AS varchar), + 'cumulus__none' + ) AS covid_dx, + coalesce( + cast(covid_icd10 AS varchar), + 'cumulus__none' + ) AS covid_icd10, + coalesce( + cast(covid_pcr_result AS varchar), + 'cumulus__none' + ) AS covid_pcr_result, + coalesce( + cast(covid_symptom AS varchar), + 'cumulus__none' + ) AS covid_symptom, + coalesce( + cast(symptom_icd10_display AS varchar), + 'cumulus__none' + ) AS symptom_icd10_display, + coalesce( + cast(variant_era AS varchar), + 'cumulus__none' + ) AS variant_era, + coalesce( + cast(age_group AS varchar), + 'cumulus__none' + ) AS age_group, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display + FROM filtered_table + ), + secondary_powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + count(DISTINCT encounter_ref) AS cnt_encounter_ref, "author_week", "covid_dx", "covid_icd10", @@ -382,34 +983,86 @@ CREATE TABLE covid_symptom__count_prevalence_ed_week AS ( "symptom_icd10_display", "variant_era", "age_group", - "enc_class_code" - FROM covid_symptom__prevalence_ed + "enc_class_display", + concat_ws( + '-', + COALESCE("author_week",''), + COALESCE("covid_dx",''), + COALESCE("covid_icd10",''), + COALESCE("covid_pcr_result",''), + COALESCE("covid_symptom",''), + COALESCE("symptom_icd10_display",''), + COALESCE("variant_era",''), + COALESCE("age_group",''), + COALESCE("enc_class_display",'') + ) AS id + FROM null_replacement GROUP BY cube( - "author_week", - "covid_dx", - "covid_icd10", - "covid_pcr_result", - "covid_symptom", - "symptom_icd10_display", - "variant_era", - "age_group", - "enc_class_code" + "author_week", + "covid_dx", + "covid_icd10", + "covid_pcr_result", + "covid_symptom", + "symptom_icd10_display", + "variant_era", + "age_group", + "enc_class_display" + ) + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, + "author_week", + "covid_dx", + "covid_icd10", + "covid_pcr_result", + "covid_symptom", + "symptom_icd10_display", + "variant_era", + "age_group", + "enc_class_display", + concat_ws( + '-', + COALESCE("author_week",''), + COALESCE("covid_dx",''), + COALESCE("covid_icd10",''), + COALESCE("covid_pcr_result",''), + COALESCE("covid_symptom",''), + COALESCE("symptom_icd10_display",''), + COALESCE("variant_era",''), + COALESCE("age_group",''), + COALESCE("enc_class_display",'') + ) AS id + FROM null_replacement + GROUP BY + cube( + "author_week", + "covid_dx", + "covid_icd10", + "covid_pcr_result", + "covid_symptom", + "symptom_icd10_display", + "variant_era", + "age_group", + "enc_class_display" ) ) SELECT - cnt_encounter AS cnt, - "author_week", - "covid_dx", - "covid_icd10", - "covid_pcr_result", - "covid_symptom", - "symptom_icd10_display", - "variant_era", - "age_group", - "enc_class_code" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."author_week", + p."covid_dx", + p."covid_icd10", + p."covid_pcr_result", + p."covid_symptom", + p."symptom_icd10_display", + p."variant_era", + p."age_group", + p."enc_class_display" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); diff --git a/cumulus_library_covid/covid_symptom/table_dx.sql b/cumulus_library_covid/covid_symptom/table_dx.sql index 4bcfa39..1b02c2e 100644 --- a/cumulus_library_covid/covid_symptom/table_dx.sql +++ b/cumulus_library_covid/covid_symptom/table_dx.sql @@ -21,9 +21,9 @@ SELECT DISTINCT c.encounter_ref, s.status, c.code AS cond_code, -- noqa: LT01,RF02 - c.recorded_week AS cond_week, - c.recorded_month AS cond_month, - c.recorded_year AS cond_year, + c.recordeddate_week AS cond_week, + c.recordeddate_month AS cond_month, + c.recordeddate_year AS cond_year, s.enc_class_display, s.age_at_visit, s.ed_note, diff --git a/cumulus_library_covid/covid_symptom/table_pcr.sql b/cumulus_library_covid/covid_symptom/table_pcr.sql index c05d296..bc3feea 100644 --- a/cumulus_library_covid/covid_symptom/table_pcr.sql +++ b/cumulus_library_covid/covid_symptom/table_pcr.sql @@ -24,10 +24,10 @@ WITH obs_interpret AS ( SELECT DISTINCT obs_interpret.display AS covid_pcr_result_display, - o.lab_code AS covid_pcr_code, - o.lab_date AS covid_pcr_date, - o.lab_week AS covid_pcr_week, - o.lab_month AS covid_pcr_month, + o.observation_code AS covid_pcr_code, + o.effectivedatetime_day AS covid_pcr_date, + o.effectivedatetime_week AS covid_pcr_week, + o.effectivedatetime_month AS covid_pcr_month, s.status, s.variant_era, s.author_date, @@ -51,9 +51,9 @@ FROM core__observation_lab AS o, WHERE (s.encounter_ref = o.encounter_ref) AND (s.variant_era = p.variant_era) - AND (o.lab_week BETWEEN p.variant_start AND p.variant_end) - AND (o.lab_code.code = pcr.code) - AND (o.lab_result.code = obs_interpret.code); + AND (o.effectivedatetime_week BETWEEN p.variant_start AND p.variant_end) + AND (o.observation_code = pcr.code) + AND (o.valuecodeableconcept_code = obs_interpret.code); -- TODO Cerner specific handling of lab RESULT -- https://github.com/smart-on-fhir/cumulus-library-covid/issues/13 diff --git a/cumulus_library_covid/covid_symptom/table_study_period.sql b/cumulus_library_covid/covid_symptom/table_study_period.sql index dc89efb..921fea0 100644 --- a/cumulus_library_covid/covid_symptom/table_study_period.sql +++ b/cumulus_library_covid/covid_symptom/table_study_period.sql @@ -1,12 +1,12 @@ CREATE TABLE covid_symptom__study_period AS SELECT DISTINCT v.variant_era, - s.start_date, - s.start_week, - s.start_month, - s.end_date, + s.period_start_day as start_date, + s.period_start_week as start_week, + s.period_start_month as start_month, + s.period_end_day as end_date, s.age_at_visit, - s.author_date, + s.author_day as author_date, s.author_week, s.author_month, s.author_year, @@ -14,7 +14,7 @@ SELECT DISTINCT s.race_display, s.subject_ref, s.encounter_ref, - s.doc_ref, + s.documentreference_ref, s.diff_enc_note_days, s.enc_class_code, s.enc_class_display, @@ -29,8 +29,8 @@ FROM core__study_period AS s, WHERE s.age_at_visit = a.age AND s.gender IN ('female', 'male') - AND s.author_date BETWEEN v.variant_start AND v.variant_end - AND s.start_date BETWEEN v.variant_start AND v.variant_end + AND s.author_day BETWEEN v.variant_start AND v.variant_end + AND s.period_start_day BETWEEN v.variant_start AND v.variant_end AND s.diff_enc_note_days BETWEEN -30 AND 30; CREATE TABLE covid_symptom__meta_date AS diff --git a/cumulus_library_covid/covid_symptom/table_symptom.sql b/cumulus_library_covid/covid_symptom/table_symptom.sql index 9d445cc..40eb677 100644 --- a/cumulus_library_covid/covid_symptom/table_symptom.sql +++ b/cumulus_library_covid/covid_symptom/table_symptom.sql @@ -43,8 +43,8 @@ SELECT DISTINCT s.encounter_ref, m.docref_id, def.pref AS symptom_display, - s.start_date AS start_date, - s.end_date AS end_date, + s.start_date, + s.end_date, s.author_week, s.author_month, s.age_group, diff --git a/cumulus_library_covid/covid_symptom/version.sql b/cumulus_library_covid/covid_symptom/version.sql index 2595533..e984ad5 100644 --- a/cumulus_library_covid/covid_symptom/version.sql +++ b/cumulus_library_covid/covid_symptom/version.sql @@ -1,2 +1,2 @@ CREATE TABLE covid_symptom__meta_version AS -SELECT 2 AS data_package_version; +SELECT 3 AS data_package_version; diff --git a/pyproject.toml b/pyproject.toml index 7e55fa3..68d37e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,10 @@ [project] name = "cumulus-library-covid" requires-python = ">= 3.9" -version = "0.2.2" +version = "1.0.0" dependencies = [ - "cumulus-library >= 1.4.0, <2", - "sqlfluff == 2.0.2" + "cumulus-library >= 2.0, <3", + "sqlfluff >= 3" ] description = "SQL generation for cumulus covid symptom analysis" readme = "README.md" From 4a60dc4c1f7b1aff7eaeb64266b67da7ab3090ec Mon Sep 17 00:00:00 2001 From: Matt Garber Date: Thu, 4 Apr 2024 09:01:35 -0400 Subject: [PATCH 2/4] SqlFluff pass --- .../covid_symptom/table_study_period.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cumulus_library_covid/covid_symptom/table_study_period.sql b/cumulus_library_covid/covid_symptom/table_study_period.sql index 921fea0..15447e3 100644 --- a/cumulus_library_covid/covid_symptom/table_study_period.sql +++ b/cumulus_library_covid/covid_symptom/table_study_period.sql @@ -1,12 +1,12 @@ CREATE TABLE covid_symptom__study_period AS SELECT DISTINCT v.variant_era, - s.period_start_day as start_date, - s.period_start_week as start_week, - s.period_start_month as start_month, - s.period_end_day as end_date, + s.period_start_day AS start_date, + s.period_start_week AS start_week, + s.period_start_month AS start_month, + s.period_end_day AS end_date, s.age_at_visit, - s.author_day as author_date, + s.author_day AS author_date, s.author_week, s.author_month, s.author_year, From 32c0764714730f37a378ca72b6285c40b52dd2aa Mon Sep 17 00:00:00 2001 From: Matt Garber Date: Thu, 4 Apr 2024 09:14:29 -0400 Subject: [PATCH 3/4] Uprev unit test python, sqlfluff pass --- .github/workflows/ci.yaml | 2 +- cumulus_library_covid/covid_symptom/define_ed_note.sql | 2 +- cumulus_library_covid/covid_symptom/define_study_period.sql | 2 +- cumulus_library_covid/covid_symptom/define_symptom.sql | 2 +- cumulus_library_covid/covid_symptom/table_prevalence_ed.sql | 2 +- cumulus_library_covid/covid_symptom/table_symptom.sql | 3 +-- 6 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f2e56ef..ffe4d74 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -10,7 +10,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: 3.10 lint: runs-on: ubuntu-22.04 diff --git a/cumulus_library_covid/covid_symptom/define_ed_note.sql b/cumulus_library_covid/covid_symptom/define_ed_note.sql index 9e07725..9ae287b 100644 --- a/cumulus_library_covid/covid_symptom/define_ed_note.sql +++ b/cumulus_library_covid/covid_symptom/define_ed_note.sql @@ -61,4 +61,4 @@ FROM 'Emergency department Consult note' ) ) - AS t (from_system, from_code, from_display, system, code, display); --noqa: AL05 + AS t (from_system, from_code, from_display, system, code, display); --noqa: AL05 diff --git a/cumulus_library_covid/covid_symptom/define_study_period.sql b/cumulus_library_covid/covid_symptom/define_study_period.sql index b39a406..75b2f4c 100644 --- a/cumulus_library_covid/covid_symptom/define_study_period.sql +++ b/cumulus_library_covid/covid_symptom/define_study_period.sql @@ -17,4 +17,4 @@ FROM ( ('delta', date('2021-06-21'), date('2021-12-19')), ('omicron', date('2021-12-20'), date('2022-06-01')) ) -AS t (variant_era, variant_start, variant_end); + AS t (variant_era, variant_start, variant_end); diff --git a/cumulus_library_covid/covid_symptom/define_symptom.sql b/cumulus_library_covid/covid_symptom/define_symptom.sql index 7447e00..267a577 100644 --- a/cumulus_library_covid/covid_symptom/define_symptom.sql +++ b/cumulus_library_covid/covid_symptom/define_symptom.sql @@ -528,4 +528,4 @@ FROM ( ('C0242429', 'T184', '162397003', 'SNOMEDCT_US', 'dynophagia', 'Sore throat'), ('C0242429', 'T184', 'R07.0', 'ICD10CM', 'R07.0', 'Sore throat') ) -AS t (cui, tui, code, code_system, text, pref); + AS t (cui, tui, code, code_system, text, pref); diff --git a/cumulus_library_covid/covid_symptom/table_prevalence_ed.sql b/cumulus_library_covid/covid_symptom/table_prevalence_ed.sql index ac70068..10f3b30 100644 --- a/cumulus_library_covid/covid_symptom/table_prevalence_ed.sql +++ b/cumulus_library_covid/covid_symptom/table_prevalence_ed.sql @@ -32,7 +32,7 @@ join_2020 AS ( COALESCE(pcr.covid_pcr_result_display, 'None') AS covid_pcr_result, COALESCE(dx.cond_code, 'None') AS covid_icd10, (dx.cond_code IS NOT NULL OR pcr.covid_pcr_result_display = 'POSITIVE') - AS covid_dx, + AS covid_dx, COALESCE(cn.symptom_display, 'None') AS covid_symptom, COALESCE(icd10.icd10_display, 'None') AS symptom_icd10_display FROM study_period AS p diff --git a/cumulus_library_covid/covid_symptom/table_symptom.sql b/cumulus_library_covid/covid_symptom/table_symptom.sql index 40eb677..a7bebf1 100644 --- a/cumulus_library_covid/covid_symptom/table_symptom.sql +++ b/cumulus_library_covid/covid_symptom/table_symptom.sql @@ -73,8 +73,7 @@ WITH temp_period AS ( age_group, gender, race_display, - enc_class_display - AS ed_note + enc_class_display AS ed_note FROM covid_symptom__study_period ), From 919aeb00aee071fad00d153200f27ba5d2066920 Mon Sep 17 00:00:00 2001 From: Matt Garber Date: Thu, 4 Apr 2024 09:18:31 -0400 Subject: [PATCH 4/4] yaml int vs string quoting --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ffe4d74..9c83b7d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -10,7 +10,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.10 + python-version: "3.10" lint: runs-on: ubuntu-22.04