From 8671ced089a5f6d48818dd2714dc08ff44fead64 Mon Sep 17 00:00:00 2001 From: Albert Tian Chen Date: Tue, 10 Dec 2024 06:24:37 -0500 Subject: [PATCH 1/4] remove version field from docker compose fields - deprecated --- docker-compose.cloudsql.prod.yml | 2 -- docker-compose.cloudsql.yml | 2 -- docker-compose.flu.genbank.yml | 1 - docker-compose.flu.gisaid.yml | 1 - docker-compose.rsv.yml | 1 - docker-compose.sars2.yml | 1 - docker-compose.yml | 1 - 7 files changed, 9 deletions(-) diff --git a/docker-compose.cloudsql.prod.yml b/docker-compose.cloudsql.prod.yml index 0327d2128..8292dd711 100644 --- a/docker-compose.cloudsql.prod.yml +++ b/docker-compose.cloudsql.prod.yml @@ -1,8 +1,6 @@ # docker compose --env-file .cloudsql_env -f docker-compose.cloudsql.yml -p cgcloudsql build # docker compose --env-file .cloudsql_env -f docker-compose.cloudsql.yml -p cgcloudsql up -d -version: "3.7" - services: server: build: diff --git a/docker-compose.cloudsql.yml b/docker-compose.cloudsql.yml index fb35425c2..ebd8f55b7 100644 --- a/docker-compose.cloudsql.yml +++ b/docker-compose.cloudsql.yml @@ -1,8 +1,6 @@ # docker compose --env-file .cloudsql_env -f docker-compose.cloudsql.yml -p cgcloudsql build # docker compose --env-file .cloudsql_env -f docker-compose.cloudsql.yml -p cgcloudsql up -d -version: "3.7" - services: server: build: diff --git a/docker-compose.flu.genbank.yml b/docker-compose.flu.genbank.yml index f142475fc..66887b654 100644 --- a/docker-compose.flu.genbank.yml +++ b/docker-compose.flu.genbank.yml @@ -1,6 +1,5 @@ # docker compose -f docker-compose.flu.genbank.yml up -d name: pathmut-flu-genbank -version: "3.7" services: server: diff --git a/docker-compose.flu.gisaid.yml b/docker-compose.flu.gisaid.yml index 984f20be7..e1fece175 100644 --- a/docker-compose.flu.gisaid.yml +++ b/docker-compose.flu.gisaid.yml @@ -1,6 +1,5 @@ # docker compose -f docker-compose.flu.gisaid.yml up -d name: pathmut-flu-gisaid -version: "3.7" services: server: diff --git a/docker-compose.rsv.yml b/docker-compose.rsv.yml index 5eefc195a..b949914d0 100644 --- a/docker-compose.rsv.yml +++ b/docker-compose.rsv.yml @@ -1,6 +1,5 @@ # docker compose -f docker-compose.rsv.yml up -d name: pathmut-rsv -version: "3.7" services: server: diff --git a/docker-compose.sars2.yml b/docker-compose.sars2.yml index b9a8a5162..606899808 100644 --- a/docker-compose.sars2.yml +++ b/docker-compose.sars2.yml @@ -1,6 +1,5 @@ # docker compose -f docker-compose.sars2.yml up -d name: pathmut-sars2 -version: "3.7" services: server: diff --git a/docker-compose.yml b/docker-compose.yml index 2bd7902d3..02fc7f9fa 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,4 @@ name: pathmut-all -version: "3.7" services: cg-frontend: From a30a10d9bcdd9b7138b1060241585d314f9a9166 Mon Sep 17 00:00:00 2001 From: Albert Tian Chen Date: Tue, 10 Dec 2024 07:08:15 -0500 Subject: [PATCH 2/4] add sequence QC to download reqs --- src/stores/dataStore.js | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/stores/dataStore.js b/src/stores/dataStore.js index c1fa2d2dc..2d7e9e32d 100644 --- a/src/stores/dataStore.js +++ b/src/stores/dataStore.js @@ -317,6 +317,12 @@ export class DataStore { (field) => selectedFields[field] ), mutation_format: mutationFormat, + sequence_length: toJS( + rootStoreInstance.configStore.sequenceLengthRange + ), + percent_ambiguous: toJS( + rootStoreInstance.configStore.percentAmbiguousRange + ), }), }) .then((res) => { @@ -372,6 +378,12 @@ export class DataStore { end_date: toJS(rootStoreInstance.configStore.endDate), subm_start_date: toJS(rootStoreInstance.configStore.submStartDate), subm_end_date: toJS(rootStoreInstance.configStore.submEndDate), + sequence_length: toJS( + rootStoreInstance.configStore.sequenceLengthRange + ), + percent_ambiguous: toJS( + rootStoreInstance.configStore.percentAmbiguousRange + ), compress, }), }) @@ -734,6 +746,12 @@ export class DataStore { (field) => selectedFields[field] ), mutation_format: mutationFormat, + sequence_length: toJS( + rootStoreInstance.configStore.sequenceLengthRange + ), + percent_ambiguous: toJS( + rootStoreInstance.configStore.percentAmbiguousRange + ), }), }) .then((res) => { From 4dc1d63ce448bdfb76802df36dde462a4053167f Mon Sep 17 00:00:00 2001 From: Albert Tian Chen Date: Tue, 10 Dec 2024 07:09:16 -0500 Subject: [PATCH 3/4] Fix parameter order error with selected_reference --- services/server/cg_server/download/metadata.py | 2 +- services/server/cg_server/query/group_mutation_frequencies.py | 2 +- services/server/cg_server/query/variant_table.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/services/server/cg_server/download/metadata.py b/services/server/cg_server/download/metadata.py index 241a80160..6caf88f93 100644 --- a/services/server/cg_server/download/metadata.py +++ b/services/server/cg_server/download/metadata.py @@ -33,9 +33,9 @@ def download_metadata(conn, req): req.get("subm_end_date", None), req.get("selected_metadata_fields", None), req.get("selected_group_fields", None), + selected_reference, req.get("sequence_length", None), req.get("percent_ambiguous", None), - selected_reference, ) # Fields that the user wants diff --git a/services/server/cg_server/query/group_mutation_frequencies.py b/services/server/cg_server/query/group_mutation_frequencies.py index fa199f664..39ad33ff4 100644 --- a/services/server/cg_server/query/group_mutation_frequencies.py +++ b/services/server/cg_server/query/group_mutation_frequencies.py @@ -114,9 +114,9 @@ def query_group_mutation_frequencies_dynamic(conn, req): req.get("subm_end_date", None), req.get("selected_metadata_fields", None), req.get("selected_group_fields", None), + selected_reference, req.get("sequence_length", None), req.get("percent_ambiguous", None), - selected_reference, ) sequence_mutation_table = "sequence_" + mutation_table diff --git a/services/server/cg_server/query/variant_table.py b/services/server/cg_server/query/variant_table.py index 0f97a6022..4b74cfa6f 100644 --- a/services/server/cg_server/query/variant_table.py +++ b/services/server/cg_server/query/variant_table.py @@ -50,10 +50,11 @@ def build_variant_table(conn, req): req.get("subm_end_date", None), req.get("selected_metadata_fields", None), req.get("selected_group_fields", None), + selected_reference, req.get("sequence_length", None), req.get("percent_ambiguous", None), - selected_reference, ) + (mutation_filter, mutation_table) = build_coordinate_filters( conn, From 8b78b3ecc4a176457438e36a92eb7ea1e94e7838 Mon Sep 17 00:00:00 2001 From: Albert Tian Chen Date: Tue, 10 Dec 2024 07:09:44 -0500 Subject: [PATCH 4/4] fix missing refactor to isolate_id --- services/server/cg_server/download/metadata.py | 6 +++--- .../query/group_mutation_frequencies.py | 6 +++--- services/server/cg_server/query/report.py | 16 ++++++++-------- services/server/cg_server/query/variant_table.py | 13 +++++++------ 4 files changed, 21 insertions(+), 20 deletions(-) diff --git a/services/server/cg_server/download/metadata.py b/services/server/cg_server/download/metadata.py index 6caf88f93..aa7786125 100644 --- a/services/server/cg_server/download/metadata.py +++ b/services/server/cg_server/download/metadata.py @@ -45,7 +45,7 @@ def download_metadata(conn, req): ) sequence_cols = [ - "Accession ID", + "isolate_id", "collection_date", "submission_date", ] @@ -122,10 +122,10 @@ def download_metadata(conn, req): sql.SQL( """ INNER JOIN ( - SELECT "sequence_id", "reference", "mutations" + SELECT "isolate_id", "reference", "mutations" FROM {mutation_table} ) {mutation_table_short} ON - {mutation_table_short}."sequence_id" = m."sequence_id" AND + {mutation_table_short}."isolate_id" = m."isolate_id" AND {mutation_table_short}."reference" = {reference_name} """ ).format( diff --git a/services/server/cg_server/query/group_mutation_frequencies.py b/services/server/cg_server/query/group_mutation_frequencies.py index 39ad33ff4..486e401a3 100644 --- a/services/server/cg_server/query/group_mutation_frequencies.py +++ b/services/server/cg_server/query/group_mutation_frequencies.py @@ -129,16 +129,16 @@ def query_group_mutation_frequencies_dynamic(conn, req): sql.SQL( """ WITH "group_counts" AS ( - SELECT {group_col}, COUNT("sequence_id") + SELECT {group_col}, COUNT("isolate_id") FROM {sequence_mutation_table} WHERE {sequence_where_filter} GROUP BY {group_col} ), "group_muts" AS ( SELECT - {group_col}, "mutation", COUNT("sequence_id") + {group_col}, "mutation", COUNT("isolate_id") FROM ( - SELECT "sequence_id", {group_col}, UNNEST("mutations") as "mutation" + SELECT "isolate_id", {group_col}, UNNEST("mutations") as "mutation" FROM {sequence_mutation_table} WHERE {sequence_where_filter} ) "group_muts" diff --git a/services/server/cg_server/query/report.py b/services/server/cg_server/query/report.py index df2770958..7bb621712 100644 --- a/services/server/cg_server/query/report.py +++ b/services/server/cg_server/query/report.py @@ -91,7 +91,7 @@ def generate_report(conn, req): GROUP BY "region", "mutation_id" ), region_counts AS ( - SELECT "region", COUNT("sequence_id") + SELECT "region", COUNT("isolate_id") FROM "metadata" m WHERE "collection_date" >= %(start_date)s AND @@ -195,7 +195,7 @@ def generate_report(conn, req): """ WITH seq_cooc AS ( SELECT - seq_mut."sequence_id", + seq_mut."isolate_id", seq_mut."region", ("mutations" & ( SELECT ARRAY_AGG("id") @@ -204,7 +204,7 @@ def generate_report(conn, req): )) as "mutations", m.{group} FROM "sequence_gene_aa_mutation" seq_mut - INNER JOIN "metadata" m ON seq_mut."sequence_id" = m."sequence_id" + INNER JOIN "metadata" m ON seq_mut."isolate_id" = m."isolate_id" WHERE seq_mut."collection_date" >= %(start_date)s AND seq_mut."collection_date" <= %(end_date)s @@ -218,7 +218,7 @@ def generate_report(conn, req): SELECT "mutations", {group}, - COUNT("sequence_id") as "count" + COUNT("isolate_id") as "count" FROM seq_cooc GROUP BY "mutations", {group} ) cooc_group @@ -228,14 +228,14 @@ def generate_report(conn, req): SELECT seq_cooc."region", "mutations", - COUNT(seq_cooc."sequence_id") AS "count" + COUNT(seq_cooc."isolate_id") AS "count" FROM seq_cooc GROUP BY seq_cooc."mutations", seq_cooc."region" ), region_counts AS ( SELECT seq_cooc."region", - COUNT(seq_cooc."sequence_id") as "count" + COUNT(seq_cooc."isolate_id") as "count" FROM seq_cooc GROUP BY seq_cooc."region" ) @@ -321,7 +321,7 @@ def generate_report(conn, req): WITH region_counts AS ( SELECT m."region", - COUNT(m."sequence_id") as "count" + COUNT(m."isolate_id") as "count" FROM "metadata" m WHERE m."collection_date" >= %(start_date)s AND @@ -332,7 +332,7 @@ def generate_report(conn, req): SELECT m."region", m.{group}, - COUNT(m."sequence_id") AS "count" + COUNT(m."isolate_id") AS "count" FROM "metadata" m WHERE m."collection_date" >= %(start_date)s AND diff --git a/services/server/cg_server/query/variant_table.py b/services/server/cg_server/query/variant_table.py index 4b74cfa6f..b13f82edb 100644 --- a/services/server/cg_server/query/variant_table.py +++ b/services/server/cg_server/query/variant_table.py @@ -41,6 +41,7 @@ def build_variant_table(conn, req): raise Exception("No reference specified") with conn.cursor() as cur: + sequence_where_filter = build_sequence_location_where_filter( constants["GROUP_MUTATION"], get_loc_level_ids(req), @@ -74,7 +75,7 @@ def build_variant_table(conn, req): # Get grouping columns, metadata columns metadata_cols = [ - "Accession ID", + "isolate_id", "collection_date", "submission_date", ] @@ -127,7 +128,7 @@ def build_variant_table(conn, req): sm."mutations" FROM ( SELECT - sst."sequence_id", + sst."isolate_id", sst."reference", (sst.mutations & ( SELECT ARRAY_AGG("id") @@ -137,9 +138,9 @@ def build_variant_table(conn, req): FROM {sequence_mutation_table} sst WHERE {sequence_where_filter} ) sm - INNER JOIN "metadata" m ON sm."sequence_id" = m."sequence_id" + INNER JOIN "metadata" m ON sm."isolate_id" = m."isolate_id" {joins} - ORDER BY m."Accession ID" ASC + ORDER BY m."isolate_id" ASC """ ).format( metadata_cols_expr=sql.SQL(",").join(metadata_cols_expr), @@ -156,7 +157,7 @@ def build_variant_table(conn, req): # cur.fetchall(), columns=metadata_cols + ["mutation_name", "pos"] cur.fetchall(), columns=metadata_cols + ["reference", "mutations"], - ).set_index("Accession ID") + ).set_index("isolate_id") mutation_name_field = "mutation_str" if mutation_format == constants["MUTATION_FORMAT"]["POS_REF_ALT"]: @@ -209,7 +210,7 @@ def build_variant_table(conn, req): .fillna("N/A") ) ), - index=["Accession ID"], + index=["isolate_id"], columns="mutations", values="val", )