Skip to content

Commit

Permalink
Automatically update parent-child lineage info when doing a match usi…
Browse files Browse the repository at this point in the history
…ng --with-sublineage. Disable with --no-lineage-update.
  • Loading branch information
matthuska committed Oct 26, 2023
1 parent 96db31c commit 6812ee8
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 8 deletions.
11 changes: 6 additions & 5 deletions lib/Lineages_UPDATER.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# We just adapt and change some parts to be used in covsonar, vocal etc.
import json
import os
import sys

import pandas as pd
import requests
Expand Down Expand Up @@ -88,12 +89,12 @@ def download_source(tmp_dir):
alias_key_url = "https://raw.githubusercontent.com/cov-lineages/pango-designation/master/pango_designation/alias_key.json"
lineag = os.path.join(tmp_dir, "lineags.csv")
alias_key = os.path.join(tmp_dir, "alias_key.json")
print("Download lineages")
print("Download lineages", file=sys.stderr)
url_content = requests.get(lineages_url).content
csv_file = open(lineag, "wb")
csv_file.write(url_content)
csv_file.close()
print("Download alias_key")
print("Download alias_key", file=sys.stderr)
items = requests.get(alias_key_url)
data = items.json()
with open(alias_key, "w") as f:
Expand All @@ -115,7 +116,7 @@ def process_lineage(alias_key_path, lineages_path, output):
with open(alias_key_path ,'w') as nf:
json.dump(data_dict, nf)
"""
print("Create all lineages")
print("Create all lineages", file=sys.stderr)
aliasor = Aliasor(alias_key_path)
df_lineages = pd.read_csv(lineages_path)
lineages = df_lineages.lineage.unique()
Expand All @@ -127,7 +128,7 @@ def process_lineage(alias_key_path, lineages_path, output):
uncompressed_lineages.sort(key=lts)

sorted_lineages = list(map(aliasor.compress, uncompressed_lineages))
print("Calculate parent-child relationship")
print("Calculate parent-child relationship", file=sys.stderr)
# -- Fill the sub child --
# the current method is working, but it is not good in term of performance
# the algoritm use 3 loops
Expand Down Expand Up @@ -157,7 +158,7 @@ def process_lineage(alias_key_path, lineages_path, output):
row_dict["lineage"] = _id
row_dict["sublineage"] = "none"
_final_list.append(row_dict)
print("Write output:", output)
print("Write output:", output, file=sys.stderr)
df = pd.DataFrame.from_dict(_final_list, orient="columns")
df = df[df.lineage != ""]
df.sort_values(by=["lineage"]).to_csv(output, sep="\t", index=False)
13 changes: 10 additions & 3 deletions sonar.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ def parse_args():
help="recursively get all sublineages from a given lineage (--lineage) (only child) ",
action="store_true",
)
# parser_match.add_argument('--recursion', help="recursively get all sublineages of a given lineage (--lineage). this will work only if '--with-sublineage' is used",action="store_true")
parser_match.add_argument(
"--lineage",
metavar="STR",
Expand Down Expand Up @@ -336,6 +335,11 @@ def parse_args():
parser_match.add_argument(
"--debug", help="show database query for debugging", action="store_true"
)
parser_match.add_argument(
"--no-lineage-update",
help="do not automatically update parent-child lineage relationship information",
action="store_true",
)

# create the parser for the "restore" command
parser_restore = subparsers.add_parser(
Expand Down Expand Up @@ -975,13 +979,16 @@ def process_update_expressions(expr):
else:
debug = False
# update-lineage-info
if args.tool == "update-lineage-info":
if args.tool == "update-lineage-info" or (
args.tool == "match" and args.with_sublineage and not args.no_lineage_update
):
tmp_dirname = mkdtemp(prefix=".tmp_")
alias_key, lineage = Lineages_UPDATER.download_source(tmp_dirname)
Lineages_UPDATER.process_lineage(alias_key, lineage, "lib/lineage.all.tsv")
if os.path.isdir(tmp_dirname):
shutil.rmtree(tmp_dirname)
sys.exit("Complete!")
if args.tool == "update-lineage-info":
sys.exit("Complete!")

if not args.db is None and args.tool != "add" and not os.path.isfile(args.db):
sys.exit("input error: database does not exist.")
Expand Down

0 comments on commit 6812ee8

Please sign in to comment.