From 9758d60deea470d7a7022928038479ba0219079e Mon Sep 17 00:00:00 2001 From: akrinos Date: Mon, 4 Dec 2023 21:09:22 -0500 Subject: [PATCH] Updating EUKulele to v2.0.7 --- recipe/meta.yaml | 4 +--- src/EUKulele/EUKulele_main.py | 6 +++++- src/EUKulele/tax_placement.py | 17 +++++++++-------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 1d3e037..a63c661 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,6 +1,6 @@ package: name: eukulele - version: 2.0.5 + version: 2.0.7 build: noarch: python @@ -28,7 +28,6 @@ requirements: - pytest-cov - pytest-xdist - blast -# - busco==4.1.4 - diamond - transdecoder - wget @@ -52,7 +51,6 @@ requirements: - pytest-cov - pytest-xdist - blast - # - busco==4.0.6 - diamond - transdecoder - wget diff --git a/src/EUKulele/EUKulele_main.py b/src/EUKulele/EUKulele_main.py index aa6b594..fa249b5 100644 --- a/src/EUKulele/EUKulele_main.py +++ b/src/EUKulele/EUKulele_main.py @@ -236,8 +236,12 @@ def main(args_in): prot_tab = os.path.join(reference_dir, "prot-map.json") f = open(os.path.join(output_dir, "README_DB.txt"), "a") e = datetime.datetime.now() + filename = os.path.join(os.path.dirname(os.path.realpath(__file__)), + "static", "VERSION") + file_read = open(filename, "r") + f.write("The version of EUKulele was "+str(file_read.read())+".\n") f.write("Time finished was " + str(e) + " for database " + \ - str(args.database.lower())) + str(args.database.lower())+"\n") ## Next, see whether there is a subdirectory of reference ## directory containing folder for our DB name diff --git a/src/EUKulele/tax_placement.py b/src/EUKulele/tax_placement.py index dba1cb0..07772d5 100644 --- a/src/EUKulele/tax_placement.py +++ b/src/EUKulele/tax_placement.py @@ -130,16 +130,12 @@ def match_maker(dd, consensus_cutoff, consensus_proportion, tax_dict, use_counts ambiguous = 0 # we assume unambiguous md = dd.bitscore.max() * consensus_proportion #0.97 - #dd.bitscore.max() #dd.pident.max() transcript_name = set(list(dd["qseqid"])) if len(transcript_name) > 1: print("More than 1 transcript name included in the group.", flush = True) transcript_name = list(transcript_name)[0] ds = list(set(dd[dd.bitscore>=md]['ssqid_TAXID'])) counts = list(set(dd[dd.bitscore>=md]['counts'])) - #maxpident = md #max(list(set(dd[dd.pident==md]['pident']))) - #ds = list(set(dd[dd.bitscore==md]['ssqid_TAXID'])) - #counts = list(set(dd[dd.bitscore==md]['counts'])) maxpident = max(list(set(dd[dd.bitscore>=md]['pident']))) if len(counts) >= 1: @@ -150,7 +146,10 @@ def match_maker(dd, consensus_cutoff, consensus_proportion, tax_dict, use_counts # most specific taxonomic level assigned if len(ds)==1: if ds[0] not in tax_dict: - return pd.DataFrame(columns=['transcript_name','classification_level', + return pd.DataFrame([[transcript_name, assignment,\ + "MissingFromTaxDict", "MissingFromTaxDict", md,\ + chosen_count, ambiguous]], + columns=['transcript_name','classification_level', 'full_classification','classification', 'max_pid','counts','ambiguous']) full_classification = str(tax_dict[ds[0]]).split(";")[0:level] @@ -163,9 +162,11 @@ def match_maker(dd, consensus_cutoff, consensus_proportion, tax_dict, use_counts full_classification_0 = [] for d in ds: if d not in tax_dict: - return(pd.DataFrame(columns=['transcript_name','classification_level', - 'full_classification','classification', - 'max_pid','counts','ambiguous'])) + classification_0.append("MissingFromTaxDict") + full_classification_0.append("MissingFromTaxDict") + #return(pd.DataFrame(columns=['transcript_name','classification_level', + # 'full_classification','classification', + # 'max_pid','counts','ambiguous'])) d_full_class = str(tax_dict[str(d)]).split(";")[0:level] classification_0.append(d_full_class[len(d_full_class) - 1]) # the most specific taxonomic level we can classify by