From ad2f3b980205f1093e855ecfdc0a5ec95f8168af Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 19 Dec 2023 11:33:59 +0100
Subject: [PATCH 01/42] increase number shift to avoid overlap of number and
 dot

---
 src/visualize_snp_v4.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index ff37070..a08c33a 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -160,7 +160,7 @@ p6bis = p6 +guides(shape = guide_legend(order=1, direction="horizontal", title="
 		   
 # p7 = p6 + theme(legend.position = "bottom") # modify the legend position
 p8 = p6bis + ylim(-0.06,1.2) # modify the scale
-p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.01, label = indice, angle = 0)) # add indice to the grapĥ
+p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice, angle = 0)) # add indice to the grapĥ
 #p10 = p9 + geom_text(x = 0, y = threshold + 0.01, label = t) # add the threshold text
 #p11 = p10 + geom_line(aes(x = position, y = 0.5), color = "red") 
 p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red")

From f264245f804082622b2a8221a72c80483f65d931 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 19 Dec 2023 11:34:31 +0100
Subject: [PATCH 02/42] replace f-string to improve compatibility with old
 python3

---
 src/vvv2_display.py | 227 ++++++++++++++++++++++----------------------
 1 file changed, 116 insertions(+), 111 deletions(-)

diff --git a/src/vvv2_display.py b/src/vvv2_display.py
index b46b958..4b88a62 100755
--- a/src/vvv2_display.py
+++ b/src/vvv2_display.py
@@ -76,8 +76,8 @@ def __main__():
     #########################################
     # directories fo sub programs
     #########################################
-    PYTHON_SCRIPTS = f"{dir_path}/" # PYTHON_SCRIPTS/"
-    R_SCRIPTS      = f"{dir_path}/" # R_SCRIPTS/"
+    PYTHON_SCRIPTS = dir_path + "/" # PYTHON_SCRIPTS/"
+    R_SCRIPTS      = dir_path + "/" # R_SCRIPTS/"
     #########################################
 
     parser = argparse.ArgumentParser()
@@ -224,58 +224,58 @@ def __main__():
     # ------------------------------------------------------------------
     # TEST for vvv2_display
     # ------------------------------------------------------------------
-    test_dir = f"{dir_path}/../test_vvv2_display"
+    test_dir = dir_path + "/../test_vvv2_display"
     if b_test_vvv2_display:
         # --------------------------------------------------------------
         # COMPLETE GENOME
         # in files
-        pass_annot_f  = f"{test_dir}/res2_vadr_pass.tbl" # from vadr results
-        fail_annot_f  = f"{test_dir}/res2_vadr_fail.tbl" # from vadr results
-        seq_stat_f    = f"{test_dir}/res2_vadr.seqstat"  # from vadr results
-        vardict_vcf_f = f"{test_dir}/res2_vardict.vcf"  # from lofreq results    
+        pass_annot_f  = test_dir + "/res2_vadr_pass.tbl" # from vadr results
+        fail_annot_f  = test_dir + "/res2_vadr_fail.tbl" # from vadr results
+        seq_stat_f    = test_dir + "/res2_vadr.seqstat"  # from vadr results
+        vardict_vcf_f = test_dir + "/res2_vardict.vcf"  # from lofreq results    
         # tmp out files
-        json_annot_f  = f"{test_dir}/res2_vadr.json"     # from convert_tbl2json.py
-        contig_limits_f= f"{test_dir}/contig_limits.txt"
+        json_annot_f  = test_dir + "/res2_vadr.json"     # from convert_tbl2json.py
+        contig_limits_f= test_dir + "/contig_limits.txt"
         # final out file
-        png_var_f     = f"{test_dir}/res2_vvv2.png"     # from ...
-        cmd = ' '.join([f"{dir_path}/vvv2_display.py",
-                    f"--pass_tbl_f {pass_annot_f}",
-                    f"--fail_tbl_f {fail_annot_f}",
-                    f"--seq_stat_f {seq_stat_f}",
-                    f"--vcf_f {vardict_vcf_f}",
-                    f"--contig_limits_f {contig_limits_f}",                 
-                    f"--png_var_f {png_var_f}"
+        png_var_f     = test_dir + "/res2_vvv2.png"     # from ...
+        cmd = ' '.join([ dir_path + "/vvv2_display.py",
+                            "--pass_tbl_f", pass_annot_f,
+                            "--fail_tbl_f", fail_annot_f,
+                            "--seq_stat_f", seq_stat_f,
+                            "--vcf_f", vardict_vcf_f,
+                            "--contig_limits_f", contig_limits_f,                 
+                            "--png_var_f", png_var_f
                     ])
-        print(f"{prog_tag} START")    
-        print(f"{prog_tag} cmd:{cmd}")
+        print(prog_tag + " START")    
+        print(prog_tag + " cmd:" + cmd)
         os.system(cmd)
-        print(f"{prog_tag} END")
+        print(prog_tag + " END")
         # --------------------------------------------------------------
 
         # --------------------------------------------------------------
         # CONTIGS
         # in files
-        pass_annot_f  = f"{test_dir}/res_vadr_pass.tbl" # from vadr results
-        fail_annot_f  = f"{test_dir}/res_vadr_fail.tbl" # from vadr results
-        seq_stat_f    = f"{test_dir}/res_vadr.seqstat"  # from vadr results
-        vardict_vcf_f = f"{test_dir}/res_vardict.vcf"  # from lofreq results    
+        pass_annot_f  = test_dir + "/res_vadr_pass.tbl" # from vadr results
+        fail_annot_f  = test_dir + "/res_vadr_fail.tbl" # from vadr results
+        seq_stat_f    = test_dir + "/res_vadr.seqstat"  # from vadr results
+        vardict_vcf_f = test_dir + "/res_vardict.vcf"  # from lofreq results    
         # tmp out files
-        json_annot_f    = f"{test_dir}/res_vadr.json"     # from convert_tbl2json.
-        contig_limits_f = f"{test_dir}/contig_limits.txt"  # from lofreq results    py
+        json_annot_f    = test_dir + "/res_vadr.json"     # from convert_tbl2json.
+        contig_limits_f = test_dir + "/contig_limits.txt"  # from lofreq results    py
         # final out file
-        png_var_f    = f"{test_dir}/res_vvv2.png"     # from ...
-        cmd = ' '.join([f"{dir_path}/vvv2_display.py",
-                    f"--pass_tbl_f {pass_annot_f}",
-                    f"--fail_tbl_f {fail_annot_f}",
-                    f"--seq_stat_f {seq_stat_f}",
-                    f"--vcf_f {vardict_vcf_f}",
-                    f"--contig_limits_f {contig_limits_f}",                        
-                    f"--png_var_f {png_var_f}"
+        png_var_f    = test_dir + "/res_vvv2.png"     # from ...
+        cmd = ' '.join([ dir_path + "/vvv2_display.py",
+                    "--pass_tbl_f", pass_annot_f,
+                    "--fail_tbl_f", fail_annot_f,
+                    "--seq_stat_f", seq_stat_f,
+                    "--vcf_f", vardict_vcf_f,
+                    "--contig_limits_f", contig_limits_f,                        
+                    "--png_var_f", png_var_f
                     ])
-        print(f"{prog_tag} START")    
-        print(f"{prog_tag} cmd:{cmd}")
+        print(prog_tag + " START")    
+        print(prog_tag + " cmd:" + cmd)
         os.system(cmd)
-        print(f"{prog_tag} END")
+        print(prog_tag + " END")
         # --------------------------------------------------------------
 
         sys.exit()
@@ -287,19 +287,19 @@ def __main__():
     # ------------------------------------------------------------------
     if b_test_convert_tbl2json:
         # # COMPLETE GENOME
-        # pass_annot_f = f"{test_dir}/res2_vadr_pass.tbl" # from vadr results
-        # fail_annot_f = f"{test_dir}/res2_vadr_fail.tbl" # from vadr results
-        # seq_stat_f   = f"{test_dir}/res2_vadr.seqstat"  # from vadr results
-        # json_annot_f = f"{test_dir}/res2_vadr.json"
-        ## bed_annot_f  = f"{test_dir}/res2_vadr.bed"
-        # bed_vardict_annot_f  = f"{test_dir}/res2_vadr.4vardict.bed"        
+        # pass_annot_f = test_dir + "/res2_vadr_pass.tbl" # from vadr results
+        # fail_annot_f = test_dir + "/res2_vadr_fail.tbl" # from vadr results
+        # seq_stat_f   = test_dir + "/res2_vadr.seqstat"  # from vadr results
+        # json_annot_f = test_dir + "/res2_vadr.json"
+        ## bed_annot_f  = test_dir + "/res2_vadr.bed"
+        # bed_vardict_annot_f  = test_dir + "/res2_vadr.4vardict.bed"        
         # CONTIGS
-        pass_annot_f         = f"{test_dir}/res_vadr_pass.tbl" # from vadr results
-        fail_annot_f         = f"{test_dir}/res_vadr_fail.tbl" # from vadr results
-        seq_stat_f           = f"{test_dir}/res_vadr.seqstat"  # from vadr results
-        json_annot_f         = f"{test_dir}/res_vadr.json"
-        # bed_annot_f          = f"{test_dir}/res_vadr.bed"
-        bed_vardict_annot_f  = f"{test_dir}/res_vadr.4vardict.bed"        
+        pass_annot_f         = test_dir + "/res_vadr_pass.tbl" # from vadr results
+        fail_annot_f         = test_dir + "/res_vadr_fail.tbl" # from vadr results
+        seq_stat_f           = test_dir + "/res_vadr.seqstat"  # from vadr results
+        json_annot_f         = test_dir + "/res_vadr.json"
+        # bed_annot_f          = test_dir + "/res_vadr.bed"
+        bed_vardict_annot_f  = test_dir + "/res_vadr.4vardict.bed"        
 
     if(json_annot_f == ''):
        json_annot_f = vardict_vcf_f
@@ -310,22 +310,22 @@ def __main__():
 #       bed_vardict_annot_f = bed_vardict_annot_f.replace('.vcf', '.vardict.bed')
        bed_vardict_annot_f = re.sub('\.[^\.]+$', '_vardict.bed', bed_vardict_annot_f)
        
-    p_script = f"{PYTHON_SCRIPTS}convert_tbl2json.py"
-    cmd = ' '.join([f"{p_script}",
-                    f"--pass_annot_f {pass_annot_f}",
-                    f"--fail_annot_f {fail_annot_f}",
-                    f"--seq_stat_f {seq_stat_f}",
-                    f"--json_out_f {json_annot_f}",
-    #                f"--bed_out_f {bed_annot_f}",
-                    f"--bed_vardict_out_f {bed_vardict_annot_f}"                             
+    p_script = PYTHON_SCRIPTS + "convert_tbl2json.py"
+    cmd = ' '.join([ p_script,
+                    "--pass_annot_f", pass_annot_f,
+                    "--fail_annot_f", fail_annot_f,
+                    "--seq_stat_f", seq_stat_f,
+                    "--json_out_f", json_annot_f,
+    #               "--bed_out_f", bed_annot_f,
+                    "--bed_vardict_out_f", bed_vardict_annot_f                             
                     ])
-    print(f"{prog_tag} cmd:{cmd}")
+    print(prog_tag + " cmd:" + cmd)
 
     if b_test_convert_tbl2json:
-        print(f"{prog_tag} [test_convert_tbl2json] START")
-        print(f"{prog_tag} cmd:{cmd}")
+        print(prog_tag + " [test_convert_tbl2json] START")
+        print(prog_tag + " cmd:" + cmd)
         os.system(cmd)    
-        print(f"{prog_tag} [test_convert_tbl2json] END")
+        print(prog_tag + " [test_convert_tbl2json] END")
         sys.exit()
     else:
         os.system(cmd)    
@@ -335,31 +335,31 @@ def __main__():
     # in the final created picture
     # ------------------------------------------------------------------
     if b_test_correct_multicontig_vardict_vcf:
-        seq_stat_f           = f"{test_dir}/res_vadr.seqstat"  # from vadr results    
-        vardict_vcf_f        = f"{test_dir}/res_vardict.vcf"  # from vardict results            
-        correct_vcf_f        = f"{test_dir}/res_correct.vcf"  # corrected out results
-        contig_limits_f      = f"{test_dir}/contig_limits.txt"  # contig limits
+        seq_stat_f           = test_dir + "/res_vadr.seqstat"  # from vadr results    
+        vardict_vcf_f        = test_dir + "/res_vardict.vcf"  # from vardict results            
+        correct_vcf_f        = test_dir + "/res_correct.vcf"  # corrected out results
+        contig_limits_f      = test_dir + "/contig_limits.txt"  # contig limits
 
     if(correct_vcf_f == ''):
        correct_vcf_f = vardict_vcf_f
 #       correct_vcf_f = correct_vcf_f.replace('vardict.vcf', '_correct.vcf')
        correct_vcf_f = re.sub('\.[^\.]+$', '_correct.vcf', correct_vcf_f)
        
-    p_script = f"{PYTHON_SCRIPTS}correct_multicontig_vardict_vcf.py"
-    print(f"p_script:{p_script}")
-    cmd = ' '.join([f"{p_script}",
-                    f"--seq_stat_f {seq_stat_f}",
-                    f"--vardict_vcf_f {vardict_vcf_f}",
-                    f"--correct_vcf_f {correct_vcf_f}",
-                    f"--contig_limits_f {contig_limits_f}"                    
+    p_script = PYTHON_SCRIPTS + "correct_multicontig_vardict_vcf.py"
+    print("p_script:" + p_script)
+    cmd = ' '.join([p_script,
+                    "--seq_stat_f", seq_stat_f,
+                    "--vardict_vcf_f", vardict_vcf_f,
+                    "--correct_vcf_f", correct_vcf_f,
+                    "--contig_limits_f", contig_limits_f                    
                     ])
-    print(f"{prog_tag} cmd:{cmd}")
+    print(prog_tag + " cmd:" + cmd)
 
     if b_test_correct_multicontig_vardict_vcf:
-        print(f"{prog_tag} [test_correct_multicontig_vardict_vcf] START")
-        print(f"{prog_tag} cmd:{cmd}")
+        print(prog_tag + " [test_correct_multicontig_vardict_vcf] START")
+        print(prog_tag + " cmd:" + cmd)
         os.system(cmd)    
-        print(f"{prog_tag} [test_correct_multicontig_vardict_vcf] END")
+        print(prog_tag + " [test_correct_multicontig_vardict_vcf] END")
         sys.exit()
     else:
         os.system(cmd)
@@ -369,17 +369,17 @@ def __main__():
     # ------------------------------------------------------------------
     if b_test_convert_vcffile_to_readable:
         # # COMPLETE GENOME
-        # vardict_vcf_f = f"{test_dir}/res2_vardict.vcf"  # from vardict results
-        # correct_vcf_f = f"{test_dir}/res2_correct.vcf"  # corrected vardict results                
-        # json_annot_f      = f"{test_dir}/res2_vadr.json"
-        # snp_loc_f         =  f"{test_dir}/res2_snp.txt"
-        # snp_loc_summary_f =  f"{test_dir}/res2_snp_summary.txt"
+        # vardict_vcf_f = test_dir + "/res2_vardict.vcf"  # from vardict results
+        # correct_vcf_f = test_dir + "/res2_correct.vcf"  # corrected vardict results                
+        # json_annot_f      = test_dir + "/res2_vadr.json"
+        # snp_loc_f         =  test_dir + "/res2_snp.txt"
+        # snp_loc_summary_f =  test_dir + "/res2_snp_summary.txt"
         # CONTIGS
-        # vardict_vcf_f = f"{test_dir}/res_vardict.vcf"  # from vardict results
-        correct_vcf_f     = f"{test_dir}/res_correct.vcf"                  
-        json_annot_f      = f"{test_dir}/res_vadr.json"
-        snp_loc_f         =  f"{test_dir}/res_snp.txt"
-        snp_loc_summary_f =  f"{test_dir}/res_snp_summary.txt"
+        # vardict_vcf_f = test_dir + "/res_vardict.vcf"  # from vardict results
+        correct_vcf_f     = test_dir + "/res_correct.vcf"                  
+        json_annot_f      = test_dir + "/res_vadr.json"
+        snp_loc_f         =  test_dir + "/res_snp.txt"
+        snp_loc_summary_f =  test_dir + "/res_snp_summary.txt"
 
     if(snp_loc_f == ''):
        snp_loc_f = vardict_vcf_f
@@ -393,21 +393,21 @@ def __main__():
     # json_annot_f = f"{ech}_gene_position_viral_consensus.json" 
     # snp_loc_f = f"{ech}_snp_location"
     # p_script = f"{PYTHON_SCRIPTS}convert_vcffile_to_readablefile.py" # use pyvcf
-    p_script = f"{PYTHON_SCRIPTS}convert_vcffile_to_readablefile2.py" # use pysam
+    p_script = PYTHON_SCRIPTS + "convert_vcffile_to_readablefile2.py" # use pysam
     threshold = "0.07"
-    cmd = ' '.join([f"{p_script}",
-                    f"--vcfs {correct_vcf_f}",
-                    f"--json {json_annot_f}",
-                    f"--out {snp_loc_f}",
-                    f"--outs {snp_loc_summary_f}",
-                    f"--threshold {threshold}"])
-    print(f"{prog_tag} cmd:{cmd}")
+    cmd = ' '.join([p_script,
+                    "--vcfs", correct_vcf_f,
+                    "--json", json_annot_f,
+                    "--out", snp_loc_f,
+                    "--outs", snp_loc_summary_f,
+                    "--threshold", threshold])
+    print(prog_tag + " cmd:" + cmd)
 
     if b_test_convert_vcffile_to_readable:
-        print(f"{prog_tag} [test_convert_vcffile_to_readable] START")
-        print(f"{prog_tag} cmd:{cmd}")
+        print(prog_tag + " [test_convert_vcffile_to_readable] START")
+        print(prog_tag + " cmd:" + cmd)
         os.system(cmd)    
-        print(f"{prog_tag} [test_convert_vcffile_to_readable] END")
+        print(prog_tag + " [test_convert_vcffile_to_readable] END")
         sys.exit()
     else:
         os.system(cmd)    
@@ -419,35 +419,40 @@ def __main__():
     # ------------------------------------------------------------------
     if b_test_visualize_snp_v4:
         # # COMPLETE GENOME
-        # snp_loc_f =  f"{test_dir}/res2_snp.txt"
-        # snp_loc_summary_f =  f"{test_dir}/res2_snp_summary.txt"
-        # png_var_f =  f"{test_dir}/res2_snp.png"
+        # snp_loc_f =  test_dir + "/res2_snp.txt"
+        # snp_loc_summary_f =  test_dir + "/res2_snp_summary.txt"
+        # png_var_f =  test_dir + "/res2_snp.png"
         # CONTIGS
-        snp_loc_f         =  f"{test_dir}/res_snp.txt"
-        snp_loc_summary_f =  f"{test_dir}/res_snp_summary.txt"
-        png_var_f         =  f"{test_dir}/res_snp.png"
-        contig_limits_f   =  f"{test_dir}/contig_limits.txt"
+        snp_loc_f         =  test_dir + "/res_snp.txt"
+        snp_loc_summary_f =  test_dir + "/res_snp_summary.txt"
+        png_var_f         =  test_dir + "/res_snp.png"
+        contig_limits_f   =  test_dir + "/contig_limits.txt"
         
     if(png_var_f == ''):
        png_var_f = snp_loc_f
        png_var_f = re.sub('\.[^\.]+$', '.png', png_var_f)
        
     # env r-env.yaml
-    r_script = f"{R_SCRIPTS}visualize_snp_v4.R"
+    r_script = R_SCRIPTS + "visualize_snp_v4.R"
     threshold = "0.07"
     # png_var_f = f"{ech}_graphic_variant.png"
-    cmd = f"R --vanilla --quiet --args {snp_loc_f} {contig_limits_f} {threshold} {png_var_f} < {r_script} > /dev/null"
-    print(f"{prog_tag} cmd:{cmd}")
+    cmd = " ".join(["R --vanilla --quiet --args",
+                snp_loc_f, 
+                contig_limits_f,
+                threshold,
+                png_var_f,
+                " < ", r_script, " > /dev/null"])
+    print(prog_tag + " cmd:" + cmd)
 
     if b_test_visualize_snp_v4:
-        print(f"{prog_tag} [test_visualize_snp_v4] START")
-        print(f"{prog_tag} cmd:{cmd}")
+        print(prog_tag + " [test_visualize_snp_v4] START")
+        print(prog_tag + " cmd:" + cmd)
         os.system(cmd)
-        print(f"{prog_tag} [test_visualize_snp_v4] END")
+        print(prog_tag + " [test_visualize_snp_v4] END")
     else:
         os.system(cmd)    
     # ------------------------------------------------------------------
-    print(f"{png_var_f} file created")
+    print(png_var_f + " file created")
 
 ##### MAIN END
 if __name__=="__main__":__main__()

From ebfbaa2cb7fbc6a450800b6b2eb20de4d3701ef9 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 19 Dec 2023 16:02:12 +0100
Subject: [PATCH 03/42] show prot as genes were, show genes as prot were. Add
 number to sort protein labels to better see legend

---
 src/visualize_snp_v4.R | 67 +++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 30 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index a08c33a..2804dfd 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -1,5 +1,6 @@
 #!/usr/bin/env Rscript
 #
+# FT; last modification December 19th 2023
 # AF; last modification February 16th 2019
 #
 # Description:  This script has been written in order to generate a graph
@@ -37,7 +38,7 @@ if(is.null(density)){ # means no snp found
 }
 
 # defined a wide color palet manually to ensure better visibility, the other colors defined later will not be so distinctive
-protcols=c('#99CC00','#CC9900','#FFCC33','#FF9900','#FF6600','#FF3300','#CC3300','#990033','#FF3366','#FF9999','#FFCCFF','#CC99CC','#996699','#993399','#660066','#990066','#660099','#CC00CC','#6600CC','#9966FF','#6633FF','#330099','#0033CC','#3366FF','#0033FF','#00CCFF','#006699','#00CC99','#009966','#33FFCC','#33CC99','#66FFCC','#33CCCC','#99FFFF','#66CC99','#339966','#99CC99','#99FF99','#66CC66','#66FF66','#669933','#336600','#00FF00','#66CC33','#CCFF66','#CCCC99','#FFFFCC','#FFCC99','#FFCCCC','#FF99CC','#CC99FF','#9999CC','#CCCCFF','#333333','#999999','#666666','#CCCCCC','#000000','#FF0000','#33D033','#CCD066','#CC00FF','#9900FF','#0099FF','#003333','#0099CC','#00FF99','#CCFF00')
+genecols=c('#99CC00','#CC9900','#FFCC33','#FF9900','#FF6600','#FF3300','#CC3300','#990033','#FF3366','#FF9999','#FFCCFF','#CC99CC','#996699','#993399','#660066','#990066','#660099','#CC00CC','#6600CC','#9966FF','#6633FF','#330099','#0033CC','#3366FF','#0033FF','#00CCFF','#006699','#00CC99','#009966','#33FFCC','#33CC99','#66FFCC','#33CCCC','#99FFFF','#66CC99','#339966','#99CC99','#99FF99','#66CC66','#66FF66','#669933','#336600','#00FF00','#66CC33','#CCFF66','#CCCC99','#FFFFCC','#FFCC99','#FFCCCC','#FF99CC','#CC99FF','#9999CC','#CCCCFF','#333333','#999999','#666666','#CCCCCC','#000000','#FF0000','#33D033','#CCD066','#CC00FF','#9900FF','#0099FF','#003333','#0099CC','#00FF99','#CCFF00')
 
 # ,'#','#','#','#','#','#','#','#','#','#','#','#',
 # '#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#',
@@ -45,44 +46,44 @@ protcols=c('#99CC00','#CC9900','#FFCC33','#FF9900','#FF6600','#FF3300','#CC3300'
 # '#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#','#')
 
 # check color redundancy
-protcols_unique = sort(unique(protcols))
+genecols_unique = sort(unique(genecols))
 print("max_col_number_unique:")
-print(length(protcols_unique))
+print(length(genecols_unique))
 
-if( length(protcols) != length(protcols_unique) ){
-    protcols = sort(protcols)
-    for( icol in 1:length(protcols_unique) ){
-       print("protcols_unique de ")
+if( length(genecols) != length(genecols_unique) ){
+    genecols = sort(genecols)
+    for( icol in 1:length(genecols_unique) ){
+       print("genecols_unique de ")
        print(icol)
        print(":")
-       print(protcols_unique[icol])
-       if( protcols_unique[icol] != protcols[icol] ){
-           redund_vals = protcols[icol]
+       print(genecols_unique[icol])
+       if( genecols_unique[icol] != genecols[icol] ){
+           redund_vals = genecols[icol]
     	   print("Color found several times:")
     	   print(redund_vals)
     	   stop()
 	}
     }
-    print("first values identical, added values in protcols:")
-    for( icol in length(protcols_unique)+1:length(protcols) ){
+    print("first values identical, added values in genecols:")
+    for( icol in length(genecols_unique)+1:length(genecols) ){
     	   print("Color found several times:")
-    	   print(protcols[icol])
+    	   print(genecols[icol])
     }
     stop()
 }
 
-# add missing colors of rainbow 4000 to protcols, to be sure to always have enough colors (mimmivirus =~ 3000 genes)
+# add missing colors of rainbow 4000 to genecols, to be sure to always have enough colors (mimmivirus =~ 3000 genes)
 complete_col_set=rainbow(n=10000)
-indexes_to_rm = match(protcols, complete_col_set)
+indexes_to_rm = match(genecols, complete_col_set)
 for(i in indexes_to_rm){
       complete_col_set[ -i ]
       }
-protcols = append(protcols, complete_col_set)
+genecols = append(genecols, complete_col_set)
 
 
-# check how many color we can manage
-print("max_col_number:")
-print(length(protcols))
+# # check how many color we can manage
+# print("max_col_number:")
+# print(length(genecols))
 
 
 # print("genes:")
@@ -111,6 +112,9 @@ print(length(protcols))
 # }
 
 # needed to have color labels NOT ORDERED and to choose colors
+gene_id_labels = unique(density$gene_id)
+density$gene_id_num = paste(  sprintf("%03d", match(density$gene_id, gene_id_labels) ), density$gene_id) 
+
 protein_id_labels = unique(density$protein_id)
 density$protein_id_num = paste(  sprintf("%03d", match(density$protein_id, protein_id_labels) ), density$protein_id) 
 
@@ -118,21 +122,24 @@ density$protein_id_num = paste(  sprintf("%03d", match(density$protein_id, prote
 # stem_loops_labels = unique(density$stem_loops)
 # density$stem_loops_num = paste(  sprintf("%03d", match(density$stem_loops, stem_loops_labels) ), density$stem_loops) 
 
-# print("protein_id_num:")
-# print(density$protein_id_num)
+# print("gene_id_num:")
+# print(density$gene_id_num)
 
-# print("protein_id_labels:")
-# print(protein_id_labels)
+print("gene_id_labels:")
+print(gene_id_labels)
 
-p1 = p + geom_point(aes(x = position, y = -0.05, colour = density$protein_id_num, shape = density$gene_id), size = 1, show.legend = F, shape = 15) # add the consensus points, and remove the legend "size" for proteins
+p1 = p + geom_point(aes(x = position, y = -0.05, colour = density$gene_id_num, shape = density$protein_id_num), size = 1, show.legend = F, shape = 15) # add the consensus points, and remove the legend "size" for proteins
 
 # needed to have more than 6 symbols for gene_id legend
-p1bis = p1 + scale_shape_manual(values=c(1:25))
+p1bis = p1 + scale_shape_manual(values=c(1:25,1:25))
 
 # use color more easily distinguishable
-p1ter = p1bis + scale_color_manual(values=protcols[1:length(protein_id_labels)])
+p1ter = p1bis + scale_color_manual(values=genecols[1:length(gene_id_labels)])
 
-p2 = p1ter + geom_point(aes(x = position, y = as.numeric(variant_percent), color = density$protein_id_num, shape = density$gene_id)) # add the variant points
+print("length of density protein_id:")
+print(length(density$protein_id_num))
+p2 = p1ter + geom_point(aes(x = position, y = as.numeric(variant_percent), color = density$gene_id_num, shape = density$protein_id_num)) # add the variant points
+# p2 = p1ter + geom_point(aes(x = position, y = as.numeric(variant_percent), color = density$gene_id_num, shape = density$gene_id)) # add the variant points
 
 p3 = p2 + geom_line(aes(x=position, y=threshold)) # add the threshold line
 
@@ -155,15 +162,15 @@ p4 = p3bis + labs(title = t) # add graph title
 p5 = p4 + xlab("Base Position") # add x axe title
 p6 = p5 + ylab("Variant Frequency") # add axes and graph titles
 
-p6bis = p6 +guides(shape = guide_legend(order=1, direction="horizontal", title="gene"),
-                   color = guide_legend(order=2, direction="vertical", title="protein"))
+p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="protein"),
+                   color = guide_legend(order=2, direction="horizontal", title="gene"))
 		   
 # p7 = p6 + theme(legend.position = "bottom") # modify the legend position
 p8 = p6bis + ylim(-0.06,1.2) # modify the scale
 p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice, angle = 0)) # add indice to the grapĥ
 #p10 = p9 + geom_text(x = 0, y = threshold + 0.01, label = t) # add the threshold text
 #p11 = p10 + geom_line(aes(x = position, y = 0.5), color = "red") 
-p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red")
+p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red", legend.position="top")
 p11 = p10 + theme(plot.title = element_text(hjust=0.5))
 
 

From 2946b6e8abb2fd2136bb788ab534e51766df671b Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 19 Dec 2023 16:41:20 +0100
Subject: [PATCH 04/42] change 'non coding RNA' to 'non translated RNA'

---
 src/convert_vcffile_to_readablefile2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/convert_vcffile_to_readablefile2.py b/src/convert_vcffile_to_readablefile2.py
index 5979ca9..1ff8f2a 100755
--- a/src/convert_vcffile_to_readablefile2.py
+++ b/src/convert_vcffile_to_readablefile2.py
@@ -96,7 +96,7 @@ def find_key_proteins(dico_json, genomepos, gene_res):
     """ This function retrieves the protein name and the start and end position of
 this protein.
     """
-    protein = 'non coding RNA'
+    protein = 'non translated RNA'
     base_inf_allproteins = ''
     base_sup_allproteins = ''
     # print("find_key_proteins call pos ("+str(genomepos)+", "+gene_res+")")    
@@ -112,7 +112,7 @@ def find_key_proteins(dico_json, genomepos, gene_res):
                         protein = 'intergene'
                         base_inf_allproteins = str(base_inf)
                         base_sup_allproteins = str(base_sup)
-                    elif protein == 'non coding RNA':
+                    elif protein == 'non translated RNA':
                         protein = key
                         base_inf_allproteins = str(base_inf)
                         base_sup_allproteins = str(base_sup)

From c6d6e8d9d62ddb59dcac75b9e21c2864005cafeb Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 19 Dec 2023 16:42:01 +0100
Subject: [PATCH 05/42] add ':' after numbers used to sort genes and prot and
 keep order of apparition in the graph

---
 src/visualize_snp_v4.R | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index 2804dfd..a32ac88 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -113,10 +113,10 @@ genecols = append(genecols, complete_col_set)
 
 # needed to have color labels NOT ORDERED and to choose colors
 gene_id_labels = unique(density$gene_id)
-density$gene_id_num = paste(  sprintf("%03d", match(density$gene_id, gene_id_labels) ), density$gene_id) 
+density$gene_id_num = paste(  sprintf("%03d", match(density$gene_id, gene_id_labels) ), ":", density$gene_id) 
 
 protein_id_labels = unique(density$protein_id)
-density$protein_id_num = paste(  sprintf("%03d", match(density$protein_id, protein_id_labels) ), density$protein_id) 
+density$protein_id_num = paste(  sprintf("%03d", match(density$protein_id, protein_id_labels) ), ":",  density$protein_id) 
 
 # # TODO stem_loop
 # stem_loops_labels = unique(density$stem_loops)
@@ -136,8 +136,6 @@ p1bis = p1 + scale_shape_manual(values=c(1:25,1:25))
 # use color more easily distinguishable
 p1ter = p1bis + scale_color_manual(values=genecols[1:length(gene_id_labels)])
 
-print("length of density protein_id:")
-print(length(density$protein_id_num))
 p2 = p1ter + geom_point(aes(x = position, y = as.numeric(variant_percent), color = density$gene_id_num, shape = density$protein_id_num)) # add the variant points
 # p2 = p1ter + geom_point(aes(x = position, y = as.numeric(variant_percent), color = density$gene_id_num, shape = density$gene_id)) # add the variant points
 
@@ -162,15 +160,15 @@ p4 = p3bis + labs(title = t) # add graph title
 p5 = p4 + xlab("Base Position") # add x axe title
 p6 = p5 + ylab("Variant Frequency") # add axes and graph titles
 
-p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="protein"),
-                   color = guide_legend(order=2, direction="horizontal", title="gene"))
+p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="protein", legend.position="top"),
+                   color = guide_legend(order=2, direction="horizontal", title="gene", legend.position="top"))
 		   
 # p7 = p6 + theme(legend.position = "bottom") # modify the legend position
 p8 = p6bis + ylim(-0.06,1.2) # modify the scale
 p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice, angle = 0)) # add indice to the grapĥ
 #p10 = p9 + geom_text(x = 0, y = threshold + 0.01, label = t) # add the threshold text
 #p11 = p10 + geom_line(aes(x = position, y = 0.5), color = "red") 
-p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red", legend.position="top")
+p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red")
 p11 = p10 + theme(plot.title = element_text(hjust=0.5))
 
 

From 61da32f2e494b78de5711cc6641dc22f39145e17 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 19 Dec 2023 19:29:58 +0100
Subject: [PATCH 06/42] modified to add parameter for cov display file to allow
 cov depth display in graph above variants/annotations

---
 src/visualize_snp_v4.R | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index a32ac88..a555087 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -28,6 +28,13 @@ if(inherits(contig_limits,"try-error"))
   
 threshold = as.numeric(args[3]) # define threshold
 
+outfile = args[4]
+
+# to prepare coverage depth graph above variant/annotation graph
+coverage_depth <- try( read.table(args[5], h=F, sep = "\t") ) # read the dataframe
+if(inherits(coverage_depth,"try-error"))
+  coverage_depth <- NULL
+
 t1 = as.character(threshold) # define threshold as a character
 t = paste("Nucleotide Variation - threshold = ", t1, sep = ' ')
 
@@ -113,11 +120,13 @@ genecols = append(genecols, complete_col_set)
 
 # needed to have color labels NOT ORDERED and to choose colors
 gene_id_labels = unique(density$gene_id)
-density$gene_id_num = paste(  sprintf("%03d", match(density$gene_id, gene_id_labels) ), ":", density$gene_id) 
+density$gene_id_num = paste(  sprintf("%02d", match(density$gene_id, gene_id_labels) ), ":", density$gene_id) 
 
 protein_id_labels = unique(density$protein_id)
 density$protein_id_num = paste(  sprintf("%03d", match(density$protein_id, protein_id_labels) ), ":",  density$protein_id) 
 
+# protein_id_ordered = fct_reorder(protein_id_labels, density$protein_id_num)
+
 # # TODO stem_loop
 # stem_loops_labels = unique(density$stem_loops)
 # density$stem_loops_num = paste(  sprintf("%03d", match(density$stem_loops, stem_loops_labels) ), density$stem_loops) 
@@ -160,8 +169,8 @@ p4 = p3bis + labs(title = t) # add graph title
 p5 = p4 + xlab("Base Position") # add x axe title
 p6 = p5 + ylab("Variant Frequency") # add axes and graph titles
 
-p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="protein", legend.position="top"),
-                   color = guide_legend(order=2, direction="horizontal", title="gene", legend.position="top"))
+p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="proteins (order: protein names)"),
+                   color = guide_legend(order=2, direction="horizontal", title="genes (order: gene names)"))
 		   
 # p7 = p6 + theme(legend.position = "bottom") # modify the legend position
 p8 = p6bis + ylim(-0.06,1.2) # modify the scale
@@ -172,6 +181,6 @@ p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red")
 p11 = p10 + theme(plot.title = element_text(hjust=0.5))
 
 
-ggsave(args[4], device = "png", plot = last_plot(), width = 20, dpi = 600) # save the graph
+ggsave(outfile, device = "png", plot = last_plot(), width = 20, dpi = 600) # save the graph
 
 # ~ end of script ~

From 2a3939dff0bc74ea35de17330bef51e7dc8ec8eb Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 19 Dec 2023 19:30:29 +0100
Subject: [PATCH 07/42] add call to correct_covdepth_f.py, TODO:test

---
 src/vvv2_display.py | 35 ++++++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/src/vvv2_display.py b/src/vvv2_display.py
index 4b88a62..7d95a07 100755
--- a/src/vvv2_display.py
+++ b/src/vvv2_display.py
@@ -44,6 +44,9 @@ def __main__():
     
     prog_tag = '[' + os.path.basename(__file__) + ']'
 
+    # to record if we display cov depth in graph or not (depends on provided intputs)
+    b_cov_depth_display = False
+
     # --------------------------------------
     # input files
     # --------------------------------------
@@ -96,6 +99,12 @@ def __main__():
     parser.add_argument("-r", "--png_var_f", dest='png_var_f',
                         help="out: png file with variant proportions and annotations",
                         metavar="FILE")
+    parser.add_argument("-o", "--cov_depth_f", dest='cov_depth_f',
+                        help="[optional] in: text file of coverage depths (given by samtools depth)",
+                        metavar="FILE")     
+    parser.add_argument("-e", "--cov_depth_corr_f", dest='cov_depth_corr_f',
+                        help="[optional] out: text file of coverage depths with cumulated position in case of several contigs, for display (tmp file, for galaxy compatibility)",
+                        metavar="FILE")                       
     parser.add_argument("-t", "--snp_loc_f", dest='snp_loc_f',
                         help="[optional] out: variant description for relevant positions, txt file (if not provided, file name deduced from png name)",
                         metavar="FILE")  
@@ -205,6 +214,9 @@ def __main__():
         snp_loc_f = os.path.abspath(args.snp_loc_f)
     if args.snp_loc_summary_f is not None:
         snp_loc_summary_f = os.path.abspath(args.snp_loc_summary_f)
+    if args.cov_depth_f is not None:
+        cov_depth_f = os.path.abspath(args.cov_depth_f)
+        b_cov_depth_display = True
     # ----------------------------------------------------------------
     # optional arguments only for Galaxy compatibility
     if args.json_annot_f is not None:
@@ -215,6 +227,8 @@ def __main__():
         correct_vcf_f = os.path.abspath(args.correct_vcf_f)
     if args.contig_limits_f is not None:
         contig_limits_f = os.path.abspath(args.contig_limits_f)
+    if args.cov_depth_corr_f is not None:
+        cov_depth_corr_f = os.path.abspath(args.cov_depth_corr_f)
     # ----------------------------------------------------------------
     
     if args.b_verbose is not None:
@@ -431,7 +445,17 @@ def __main__():
     if(png_var_f == ''):
        png_var_f = snp_loc_f
        png_var_f = re.sub('\.[^\.]+$', '.png', png_var_f)
-       
+    
+    # creates corrected cov depth file
+    if b_cov_depth_display:
+        cmd = cmd + " ".join([
+                " --cov_depth_f", cov_depth_f,
+                " --cov_depth_corr_f", cov_depth_corr_f,
+                " "
+        ]) 
+        print(prog_tag + " cmd:" + cmd)
+        os.system(cmd)
+
     # env r-env.yaml
     r_script = R_SCRIPTS + "visualize_snp_v4.R"
     threshold = "0.07"
@@ -440,8 +464,13 @@ def __main__():
                 snp_loc_f, 
                 contig_limits_f,
                 threshold,
-                png_var_f,
-                " < ", r_script, " > /dev/null"])
+                png_var_f])
+
+    # parameter to allow coverage depth display above variants/annotations
+    if b_cov_depth_display:
+        cmd = cmd + cov_depth_corr_f + " "
+    
+    cmd = cmd + " ".join([" < ", r_script, " > /dev/null"])
     print(prog_tag + " cmd:" + cmd)
 
     if b_test_visualize_snp_v4:

From 51ee90d35690cd087b00b536ec2edb97b88904f4 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 19 Dec 2023 19:30:56 +0100
Subject: [PATCH 08/42] script to correct cov depth position if several contigs

---
 src/correct_covdepth_f.py | 151 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)
 create mode 100755 src/correct_covdepth_f.py

diff --git a/src/correct_covdepth_f.py b/src/correct_covdepth_f.py
new file mode 100755
index 0000000..ea6afd1
--- /dev/null
+++ b/src/correct_covdepth_f.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# This file is part of the vvv2_display distribution (https://github.com/ANSES-Ploufragan/vvv2_display).
+# Copyright (c) 2023 Fabrice Touzain.
+# 
+# This program is free software: you can redistribute it and/or modify  
+# it under the terms of the GNU General Public License as published by  
+# the Free Software Foundation, version 3.
+#
+# This program is distributed in the hope that it will be useful, but 
+# WITHOUT ANY WARRANTY; without even the implied warranty of 
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License 
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+###
+# USE PYTHON3
+# from vcf file of vardict and bed file with contig positions, creates a vcf file with
+# contigs shifted position (to create a picture of variants and annotations)
+###
+import argparse, os, sys, csv, re, warnings
+from os import path
+import subprocess
+
+# to be able to report line number in error messages
+import inspect
+frame = inspect.currentframe()
+
+# debug
+b_test_correct_covdepth_f = False #  2023 12 19
+b_test = False
+
+prog_tag = '[' + os.path.basename(__file__) + ']'
+
+covdepth_f    = '' # in, covdepth file provided by samtools depth -aa file.bam
+correct_covdepth_f = '' # out same as previous but positions on contigs are shifted according to previous contigs
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-s", "--covdepth_f", dest='covdepth_f',
+                    help="in: coverage depth file provided by samtools -aa in.bam (all contigs)",
+                    metavar="FILE")
+parser.add_argument("-c", "--correct_covdepth_f", dest='correct_covdepth_f',
+                    help="out: coverage depth file corrected for position (cumulative position in case of several contigs)",
+                    metavar="FILE")
+parser.add_argument("-z", "--test_all", dest='b_test_all',
+                    help="[Optional] run all tests",
+                    action='store_true')
+parser.add_argument("-v", "--verbose", dest='b_verbose',
+                    help="[Optional] To have details on records when running",
+                    action='store_true')
+parser.set_defaults(b_test_all=False)
+parser.set_defaults(b_verbose=False)
+
+# get absolute path in case of files
+args = parser.parse_args()
+
+# -------------------------------------------
+# check arguments
+b_test_all = args.b_test_all
+
+if b_test_all:
+    b_test_correct_covdepth_f = True
+    b_test = True
+else:
+    b_test = b_test_correct_covdepth_f
+
+if ((not b_test)and
+    ((len(sys.argv) < 2) or (len(sys.argv) > 10))):
+    parser.print_help()
+    print(prog_tag + "[Error] we found "+str(len(sys.argv)) +
+          " arguments, exit line "+str(frame.f_lineno))
+    sys.exit(0)
+
+# print('args:', args)
+if args.covdepth_f is not None:
+    covdepth_f = os.path.abspath(args.covdepth_f)
+elif(not b_test):
+    sys.exit(prog_tag + "[Error] You must provide covdepth_f")
+if args.correct_covdepth_f is not None:
+    correct_covdepth_f = os.path.abspath(args.correct_covdepth_f)
+elif(not b_test):
+    sys.exit(prog_tag + "[Error] You must provide correct_covdepth_f")
+
+if args.b_verbose is not None:
+    b_verbose = args.b_verbose
+
+if b_test_correct_covdepth_f:
+    test_dir = '../test_vvv2_display/'
+    test_name = [
+        'res'
+        # 'res2'
+    ]
+    for resn in test_name:
+        covdepth_f         = test_dir + "res_vvv2_covdepth.txt"
+        correct_covdepth_f = test_dir + "res_vvv2_covdepth_corrected.txt"
+        cmd = ' '.join(['./correct_covdepth_f.py',
+                        "-s", covdepth_f,
+                        "-c", correct_covdepth_f
+                        ])
+        if b_verbose:
+            cmd = cmd + " -v"
+        print(prog_tag + " cmd:"+cmd)
+        print(prog_tag + " START")
+        os.system(cmd)
+        print(prog_tag + " END")
+    sys.exit()
+
+# ----------------------------------------------------------------------
+# reads seqstat file to get info on genome_length and on contigs lengths
+# ----------------------------------------------------------------------
+print(prog_tag + " Reads "+covdepth_f+" file")
+print(prog_tag + " Creates "+correct_covdepth_f+" file")
+o = open(correct_covdepth_f, 'w+')
+
+# init var
+prev_contig_nr = 1
+contig_num   = 0
+
+pos = 0
+curr_shift = 0
+with open(covdepth_f) as cdf:
+    for line in cdf:
+        line_fields = line.split("\t")
+        # print("line_fields:"+','.join(line_fields))
+        contig_nr = line_fields[0]
+        contig_pos = line_fields[1]
+        covdepth = line_fields[2]
+        # print("line_fields:"+','.join(line_fields))
+        if b_verbose:
+            print(prog_tag + " contig:"+contig_name+" length:"+str(contig_length))
+
+        # write in output file
+        # if contig change, record shift to apply
+        if contig_nr != prev_contig_nr:
+            curr_shift = pos
+        # get position WITH SHIFT
+        pos = curr_shift + int(contig_pos)
+        pos_str = str(pos)
+        # write pos and related coverage depth
+        o.write("\t".join([pos_str, covdepth]))
+        if b_verbose:            
+            print("record pos:",pos_str,", covdepth:", covdepth)
+
+        prev_contig_nr = contig_nr
+cdf.close()
+o.close()
+
+print(prog_tag + ' '+ correct_covdepth_f + " file created")

From f37ecab50cc82457077838dd61b20ec0b2a6a587 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 20 Dec 2023 14:03:37 +0100
Subject: [PATCH 09/42] var name changed for homogeneity, file names too

---
 src/correct_covdepth_f.py | 52 +++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/correct_covdepth_f.py b/src/correct_covdepth_f.py
index ea6afd1..7ce917e 100755
--- a/src/correct_covdepth_f.py
+++ b/src/correct_covdepth_f.py
@@ -30,19 +30,19 @@
 frame = inspect.currentframe()
 
 # debug
-b_test_correct_covdepth_f = False #  2023 12 19
+b_test_cov_depth_corr_f = False #  2023 12 19
 b_test = False
 
 prog_tag = '[' + os.path.basename(__file__) + ']'
 
-covdepth_f    = '' # in, covdepth file provided by samtools depth -aa file.bam
-correct_covdepth_f = '' # out same as previous but positions on contigs are shifted according to previous contigs
+cov_depth_f    = '' # in, cov_depth file provided by samtools depth -aa file.bam
+cov_depth_corr_f = '' # out same as previous but positions on contigs are shifted according to previous contigs
 
 parser = argparse.ArgumentParser()
-parser.add_argument("-s", "--covdepth_f", dest='covdepth_f',
+parser.add_argument("-s", "--cov_depth_f", dest='cov_depth_f',
                     help="in: coverage depth file provided by samtools -aa in.bam (all contigs)",
                     metavar="FILE")
-parser.add_argument("-c", "--correct_covdepth_f", dest='correct_covdepth_f',
+parser.add_argument("-c", "--cov_depth_corr_f", dest='cov_depth_corr_f',
                     help="out: coverage depth file corrected for position (cumulative position in case of several contigs)",
                     metavar="FILE")
 parser.add_argument("-z", "--test_all", dest='b_test_all',
@@ -62,10 +62,10 @@
 b_test_all = args.b_test_all
 
 if b_test_all:
-    b_test_correct_covdepth_f = True
+    b_test_cov_depth_corr_f = True
     b_test = True
 else:
-    b_test = b_test_correct_covdepth_f
+    b_test = b_test_cov_depth_corr_f
 
 if ((not b_test)and
     ((len(sys.argv) < 2) or (len(sys.argv) > 10))):
@@ -75,30 +75,30 @@
     sys.exit(0)
 
 # print('args:', args)
-if args.covdepth_f is not None:
-    covdepth_f = os.path.abspath(args.covdepth_f)
+if args.cov_depth_f is not None:
+    cov_depth_f = os.path.abspath(args.cov_depth_f)
 elif(not b_test):
-    sys.exit(prog_tag + "[Error] You must provide covdepth_f")
-if args.correct_covdepth_f is not None:
-    correct_covdepth_f = os.path.abspath(args.correct_covdepth_f)
+    sys.exit(prog_tag + "[Error] You must provide cov_depth_f")
+if args.cov_depth_corr_f is not None:
+    cov_depth_corr_f = os.path.abspath(args.cov_depth_corr_f)
 elif(not b_test):
-    sys.exit(prog_tag + "[Error] You must provide correct_covdepth_f")
+    sys.exit(prog_tag + "[Error] You must provide cov_depth_corr_f")
 
 if args.b_verbose is not None:
     b_verbose = args.b_verbose
 
-if b_test_correct_covdepth_f:
+if b_test_cov_depth_corr_f:
     test_dir = '../test_vvv2_display/'
     test_name = [
         'res'
         # 'res2'
     ]
     for resn in test_name:
-        covdepth_f         = test_dir + "res_vvv2_covdepth.txt"
-        correct_covdepth_f = test_dir + "res_vvv2_covdepth_corrected.txt"
+        cov_depth_f         = test_dir + "res2_covdepth.txt"
+        cov_depth_corr_f = test_dir + "res2_covdepth_corrected.txt"
         cmd = ' '.join(['./correct_covdepth_f.py',
-                        "-s", covdepth_f,
-                        "-c", correct_covdepth_f
+                        "-s", cov_depth_f,
+                        "-c", cov_depth_corr_f
                         ])
         if b_verbose:
             cmd = cmd + " -v"
@@ -111,9 +111,9 @@
 # ----------------------------------------------------------------------
 # reads seqstat file to get info on genome_length and on contigs lengths
 # ----------------------------------------------------------------------
-print(prog_tag + " Reads "+covdepth_f+" file")
-print(prog_tag + " Creates "+correct_covdepth_f+" file")
-o = open(correct_covdepth_f, 'w+')
+print(prog_tag + " Reads "+cov_depth_f+" file")
+print(prog_tag + " Creates "+cov_depth_corr_f+" file")
+o = open(cov_depth_corr_f, 'w+')
 
 # init var
 prev_contig_nr = 1
@@ -121,13 +121,13 @@
 
 pos = 0
 curr_shift = 0
-with open(covdepth_f) as cdf:
+with open(cov_depth_f) as cdf:
     for line in cdf:
         line_fields = line.split("\t")
         # print("line_fields:"+','.join(line_fields))
         contig_nr = line_fields[0]
         contig_pos = line_fields[1]
-        covdepth = line_fields[2]
+        cov_depth = line_fields[2]
         # print("line_fields:"+','.join(line_fields))
         if b_verbose:
             print(prog_tag + " contig:"+contig_name+" length:"+str(contig_length))
@@ -140,12 +140,12 @@
         pos = curr_shift + int(contig_pos)
         pos_str = str(pos)
         # write pos and related coverage depth
-        o.write("\t".join([pos_str, covdepth]))
+        o.write("\t".join([pos_str, cov_depth]))
         if b_verbose:            
-            print("record pos:",pos_str,", covdepth:", covdepth)
+            print("record pos:",pos_str,", cov_depth:", cov_depth)
 
         prev_contig_nr = contig_nr
 cdf.close()
 o.close()
 
-print(prog_tag + ' '+ correct_covdepth_f + " file created")
+print(prog_tag + ' '+ cov_depth_corr_f + " file created")

From b4d0b989274d9b37977a03ba3d1d0853d03d4ac3 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 20 Dec 2023 14:04:38 +0100
Subject: [PATCH 10/42] add test for src/correct_covdepth_f.py

---
 src/vvv2_display.py | 54 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/src/vvv2_display.py b/src/vvv2_display.py
index 7d95a07..e2a6b5c 100755
--- a/src/vvv2_display.py
+++ b/src/vvv2_display.py
@@ -37,6 +37,7 @@ def __main__():
     b_test_convert_tbl2json                = False # ok 2022 04 26 complete tc,
     b_test_correct_multicontig_vardict_vcf = False # ok 2022 04 29 partial tc
     b_test_convert_vcffile_to_readable     = False # ok 2022 04 28 complete tc,
+    b_test_correct_covdepth_f              = True # 2023 12 20 tc,
     b_test_visualize_snp_v4                = False # ok 2022 04 28 complete tc,
     b_test = False
     dir_path = os.path.dirname(os.path.abspath(__file__)) # dir of current script
@@ -55,6 +56,8 @@ def __main__():
     seq_stat_f      = '' # in, seq stat file from vadr
     vardict_vcf_f   = '' # in, vcf file from VarDict
     correct_vcf_f   = '' # in, vcf file corrected for pos (when multicontigs)
+    cov_depth_f     = '' # in, cov depth file provided by samtools depth -aa
+    cov_depth_corr_f= '' # in, cov depth corrected for pos (when pulticontigs)
     # --------------------------------------
 
     # --------------------------------------
@@ -138,6 +141,9 @@ def __main__():
     parser.add_argument("-d", "--test_visualize_snp_v4", dest='b_test_visualize_snp_v4',
                         help="[Optional] run test to visualize snp in a png",
                         action='store_true')
+    parser.add_argument("-g", "--test_correct_covdepth_f", dest='b_test_correct_covdepth_f',
+                        help="[Optional] run test to correct position in cov depth file",
+                        action='store_true')
     parser.add_argument("-v", "--verbose", dest='b_verbose',
                         help="[Optional] To have details on records when running",
                         action='store_true')
@@ -158,24 +164,27 @@ def __main__():
     b_test_correct_multicontig_vardict_vcf = args.b_test_correct_multicontig_vardict_vcf
     b_test_convert_vcffile_to_readable     = args.b_test_convert_vcffile_to_readable
     b_test_visualize_snp_v4                = args.b_test_visualize_snp_v4
+    b_test_correct_covdepth_f              = args.b_test_correct_covdepth_f
 
     if b_test_vvv2_display:
         b_test_convert_tbl2json                = True
         b_test_correct_multicontig_vardict_vcf = True
         b_test_convert_vcffile_to_readable     = True
         b_test_visualize_snp_v4                = True
+        b_test_correct_covdepth_f              = True
         b_test                                 = True
     else:
         b_test = (b_test_vvv2_display                    or
                   b_test_convert_tbl2json                or
                   b_test_correct_multicontig_vardict_vcf or
                   b_test_convert_vcffile_to_readable     or
+                  b_test_correct_covdepth_f              or
                   b_test_visualize_snp_v4)
         # print(f"b_test:{b_test}")
         # print(f"b_test_convert_tbl2json:{b_test_convert_tbl2json}")    
 
     if ((not b_test)and
-        ((len(sys.argv) < 9) or (len(sys.argv) > 23))):
+        ((len(sys.argv) < 9) or (len(sys.argv) > 25))):
         print("\n".join([prog_tag,
                          "Aim: Display of SNP proportions, annotations, for an assembly",
                          "in:", 
@@ -246,10 +255,12 @@ def __main__():
         pass_annot_f  = test_dir + "/res2_vadr_pass.tbl" # from vadr results
         fail_annot_f  = test_dir + "/res2_vadr_fail.tbl" # from vadr results
         seq_stat_f    = test_dir + "/res2_vadr.seqstat"  # from vadr results
-        vardict_vcf_f = test_dir + "/res2_vardict.vcf"  # from lofreq results    
+        vardict_vcf_f = test_dir + "/res2_vardict.vcf"   # from lofreq results  
+        cov_depth_f   = test_dir + "/res2_covdepth.txt"  # from samtools results  
         # tmp out files
         json_annot_f  = test_dir + "/res2_vadr.json"     # from convert_tbl2json.py
         contig_limits_f= test_dir + "/contig_limits.txt"
+        cov_depth_corr_f= test_dir + "/res2_covdepth_corr.txt"
         # final out file
         png_var_f     = test_dir + "/res2_vvv2.png"     # from ...
         cmd = ' '.join([ dir_path + "/vvv2_display.py",
@@ -257,7 +268,9 @@ def __main__():
                             "--fail_tbl_f", fail_annot_f,
                             "--seq_stat_f", seq_stat_f,
                             "--vcf_f", vardict_vcf_f,
-                            "--contig_limits_f", contig_limits_f,                 
+                            "--contig_limits_f", contig_limits_f,   
+                            "--cov_depth_f", cov_depth_f,  
+                            "--cov_depth_corr_f", cov_depth_corr_f,              
                             "--png_var_f", png_var_f
                     ])
         print(prog_tag + " START")    
@@ -283,7 +296,9 @@ def __main__():
                     "--fail_tbl_f", fail_annot_f,
                     "--seq_stat_f", seq_stat_f,
                     "--vcf_f", vardict_vcf_f,
-                    "--contig_limits_f", contig_limits_f,                        
+                    "--contig_limits_f", contig_limits_f,   
+                    "--cov_depth_f", cov_depth_f,                     
+                    "--cov_depth_corr_f", cov_depth_corr_f,              
                     "--png_var_f", png_var_f
                     ])
         print(prog_tag + " START")    
@@ -427,6 +442,25 @@ def __main__():
         os.system(cmd)    
     # ------------------------------------------------------------------
 
+    # creates corrected cov depth file
+    if b_cov_depth_display:
+        if b_test_correct_covdepth_f and (not b_test_vvv2_display):
+            cov_depth_f = test_dir + "/res_vvv2_covdepth.txt"
+            cov_depth_corr_f = test_dir + "/res_vvv2_covdepth_corrected.txt"
+        cmd = " ".join([
+                    PYTHON_SCRIPTS + "correct_covdepth_f.py",
+                    "--cov_depth_f", cov_depth_f,
+                    "--cov_depth_corr_f", cov_depth_corr_f
+                ]) 
+        if b_test_correct_covdepth_f and (not b_test_vvv2_display):
+            print(prog_tag + " [test_correct_covdepth_f] START")
+            print(prog_tag + " cmd:" + cmd)
+            os.system(cmd)    
+            print(prog_tag + " [test_correct_covdepth_f] END")
+            sys.exit()
+        else:
+            print(prog_tag + " cmd:" + cmd)
+            os.system(cmd)
 
     # ------------------------------------------------------------------
     # creates png graphic of variants from snp file and threshold
@@ -446,16 +480,6 @@ def __main__():
        png_var_f = snp_loc_f
        png_var_f = re.sub('\.[^\.]+$', '.png', png_var_f)
     
-    # creates corrected cov depth file
-    if b_cov_depth_display:
-        cmd = cmd + " ".join([
-                " --cov_depth_f", cov_depth_f,
-                " --cov_depth_corr_f", cov_depth_corr_f,
-                " "
-        ]) 
-        print(prog_tag + " cmd:" + cmd)
-        os.system(cmd)
-
     # env r-env.yaml
     r_script = R_SCRIPTS + "visualize_snp_v4.R"
     threshold = "0.07"
@@ -468,7 +492,7 @@ def __main__():
 
     # parameter to allow coverage depth display above variants/annotations
     if b_cov_depth_display:
-        cmd = cmd + cov_depth_corr_f + " "
+        cmd = cmd + " " + cov_depth_corr_f + " "
     
     cmd = cmd + " ".join([" < ", r_script, " > /dev/null"])
     print(prog_tag + " cmd:" + cmd)

From eb0a9edf0f26115e55beef73b30ecf57340ea4b6 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 20 Dec 2023 18:14:53 +0100
Subject: [PATCH 11/42] add R dependencies

---
 src/vvv2_display.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vvv2_display.yaml b/src/vvv2_display.yaml
index f60f383..b9d51e0 100644
--- a/src/vvv2_display.yaml
+++ b/src/vvv2_display.yaml
@@ -10,4 +10,4 @@ dependencies:
   - ca-certificates
   - openssl
   - r-ggplot2=3.3.6
-
+  - r-gridextra

From 0a5d8672e02c0a07f980bc54babd24842ad99a19 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 20 Dec 2023 18:15:04 +0100
Subject: [PATCH 12/42] add R dependencies

---
 setup.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 1eef2b7..94fb47f 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
 
 setuptools.setup(
     name="vvv2_display",  # Required
-    version="0.1.10",  # Required
+    version="0.1.11",  # Required
     description="Viral Variant Visualizer 2 display",  # Optional
     long_description=long_description,
     long_description_content_type="text/markdown",  # Optional (see note above)
@@ -59,6 +59,7 @@
                   "convert_tbl2json.py",
                   "convert_vcffile_to_readablefile2.py",
                   "correct_multicontig_vardict_vcf.py",
+                  "correct_covdepth_f.py",
                   "visualize_coverage_depth.R",
                   "visualize_snp_v4.R"
          ]
@@ -69,6 +70,7 @@
     install_requires=[
         "python>=3.9",
         "r-ggplot2==3.6.6",
+        "r-gridextra=2.3"
         "pysam==0.19.1",
         "numpy==1.23.1"],  # Optional
     scripts=[
@@ -76,6 +78,7 @@
         "src/convert_tbl2json.py",
         "src/convert_vcffile_to_readablefile2.py",
         "src/correct_multicontig_vardict_vcf.py",
+        "correct_covdepth_f.py",
         "src/visualize_coverage_depth.R",
         "src/visualize_snp_v4.R"
         ],

From 4c60a209a75e5a777911d64e254449fe898799e5 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 20 Dec 2023 18:15:29 +0100
Subject: [PATCH 13/42] cov depth chgmt

---
 CHANGELOG | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index e554c43..c5d5840 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,11 @@
-0.1.10-beta:
+0.1.11:
+        - display prot instead of genes and genes instead of prot
+        - add num in front of genes to ensure to keep apparition order
+        - replace fstrings in vvv2_display.py to allow better compatibility
+        - add src/correct_covdepth_f.py script to convert cov depth files of samtools depth so as to have cumulative positions for display
+        - prepare vvv2_display to coverage depth addition in input
+        - add new script to setup.py file and change version to 0.1.11
+0.1.10:
         - now horizontal line colors are for proteins, dot shape is for genes. Added vertical dotted lines to show contig limits
 	- correction in vadr parsing using different virus models
 	- tested on calicivirus, dengue, flavivirus, coronavirus, sarscov2, hcv, norovirus

From 74dd436f38855542448aed351d25d06504ed7bfa Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 20 Dec 2023 18:15:50 +0100
Subject: [PATCH 14/42] add coverage depth

---
 src/visualize_snp_v4.R | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index a555087..06b6b50 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -16,6 +16,8 @@
 # install.packages("viridis")
 # library("viridis")
 library(ggplot2) # import the ggplot2 library
+library(gridExtra) # multi graph on the same figure
+
 args <- commandArgs(TRUE) # all arguments are character types
 
 density <- try( read.table(args[1], h=T, sep = "\t") ) # read the dataframe
@@ -38,10 +40,22 @@ if(inherits(coverage_depth,"try-error"))
 t1 = as.character(threshold) # define threshold as a character
 t = paste("Nucleotide Variation - threshold = ", t1, sep = ' ')
 
+if(! is.null(coverage_depth)){ # means covdepth provided, we prepare var for graph
+  maxi = max(coverage_depth$V2)
+  mini = min(coverage_depth$V2)
+  med = median(coverage_depth$V2)
+  m = paste("Coverage - median_coverage =", med, "[", mini, ":", maxi, "]", sep = " ")
+  cd = ggplot(coverage_depth, aes(x = V1, y = V2)) + geom_line(color = "black", size = 0.5)
+  #  + labs(list(title = m, x = "Base Position", y = "Number of Reads")
+  cd1 = cd + labs(title = m) # add graph title
+  cd2 = cd1 + xlab("Base Position") # add x axe title
+  cd3 = cd2 + ylab("Number of reads") # add axes and graph titles
+}
+
 if(is.null(density)){ # means no snp found
-  p = ggplot() # plot initialization
+    p = ggplot() # plot initialization
 }else{
-  p = ggplot(density) # plot initialization
+    p = ggplot(density) # plot initialization
 }
 
 # defined a wide color palet manually to ensure better visibility, the other colors defined later will not be so distinctive
@@ -169,8 +183,7 @@ p4 = p3bis + labs(title = t) # add graph title
 p5 = p4 + xlab("Base Position") # add x axe title
 p6 = p5 + ylab("Variant Frequency") # add axes and graph titles
 
-p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="proteins (order: protein names)"),
-                   color = guide_legend(order=2, direction="horizontal", title="genes (order: gene names)"))
+p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="proteins (order: protein names)"), color = guide_legend(order=2, direction="horizontal", title="genes (order: gene names)"))
 		   
 # p7 = p6 + theme(legend.position = "bottom") # modify the legend position
 p8 = p6bis + ylim(-0.06,1.2) # modify the scale
@@ -178,9 +191,13 @@ p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice
 #p10 = p9 + geom_text(x = 0, y = threshold + 0.01, label = t) # add the threshold text
 #p11 = p10 + geom_line(aes(x = position, y = 0.5), color = "red") 
 p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red")
-p11 = p10 + theme(plot.title = element_text(hjust=0.5))
-
+p11 = p10 + theme(plot.title = element_text(hjust=0.5),legend.position="bottom") # 
 
-ggsave(outfile, device = "png", plot = last_plot(), width = 20, dpi = 600) # save the graph
+grid.arrange(cd, p11, nrow=2
+ , widths = c(2, 2), heights = c(1.0, 3.0)
+)
+g <- arrangeGrob(cd3, p11)
 
+ggsave(outfile, device = "png", plot = g, width = 21, units="cm", height=30, dpi = 600) # save the graph
+# 1 inch = 2.54cm
 # ~ end of script ~

From 4a29163fa15c0e7347236d9efef6e695bb062b82 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Thu, 21 Dec 2023 21:55:34 +0100
Subject: [PATCH 15/42] improve covdepth and variant display. Still
 problematic: legend of variants not in the width of the page

---
 src/visualize_snp_v4.R | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index 06b6b50..0ba06f5 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -17,6 +17,7 @@
 # library("viridis")
 library(ggplot2) # import the ggplot2 library
 library(gridExtra) # multi graph on the same figure
+library(cowplot)
 
 args <- commandArgs(TRUE) # all arguments are character types
 
@@ -38,20 +39,22 @@ if(inherits(coverage_depth,"try-error"))
   coverage_depth <- NULL
 
 t1 = as.character(threshold) # define threshold as a character
-t = paste("Nucleotide Variation - threshold = ", t1, sep = ' ')
+# t = paste("Nucleotide Variation - threshold = ", t1, sep = ' ')
 
 if(! is.null(coverage_depth)){ # means covdepth provided, we prepare var for graph
   maxi = max(coverage_depth$V2)
   mini = min(coverage_depth$V2)
   med = median(coverage_depth$V2)
   m = paste("Coverage - median_coverage =", med, "[", mini, ":", maxi, "]", sep = " ")
-  cd = ggplot(coverage_depth, aes(x = V1, y = V2)) + geom_line(color = "black", size = 0.5)
+  options(repr.plot.width = 5, repr.plot.height =1)
+  cd = ggplot(coverage_depth, aes(x = V1, y = V2)) + geom_line(color = "black", linewidth = 0.5)
   #  + labs(list(title = m, x = "Base Position", y = "Number of Reads")
   cd1 = cd + labs(title = m) # add graph title
-  cd2 = cd1 + xlab("Base Position") # add x axe title
-  cd3 = cd2 + ylab("Number of reads") # add axes and graph titles
+  cd2 = cd1 + xlab("") # add x axe title
+  cd3 = cd2 + ylab("Number of reads") # add axes and graph titles 
 }
 
+options(repr.plot.width = 5, repr.plot.height =5) 
 if(is.null(density)){ # means no snp found
     p = ggplot() # plot initialization
 }else{
@@ -151,7 +154,7 @@ density$protein_id_num = paste(  sprintf("%03d", match(density$protein_id, prote
 print("gene_id_labels:")
 print(gene_id_labels)
 
-p1 = p + geom_point(aes(x = position, y = -0.05, colour = density$gene_id_num, shape = density$protein_id_num), size = 1, show.legend = F, shape = 15) # add the consensus points, and remove the legend "size" for proteins
+p1 = p + geom_point(aes(x = position, y = -0.05, colour = density$gene_id_num, shape = density$protein_id_num), size = 1, show.legend = F, shape = 15) # add the consensus points, and remove the legend "linewidth" for proteins
 
 # needed to have more than 6 symbols for gene_id legend
 p1bis = p1 + scale_shape_manual(values=c(1:25,1:25))
@@ -179,25 +182,23 @@ if(! is.null(contig_limits)){ # means more than 1 contig
 # }
 
 
-p4 = p3bis + labs(title = t) # add graph title
-p5 = p4 + xlab("Base Position") # add x axe title
+# p4 = p3bis + labs(title = t) # add graph title
+p5 = p3bis + xlab("Base Position") # add x axe title
 p6 = p5 + ylab("Variant Frequency") # add axes and graph titles
 
-p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="proteins (order: protein names)"), color = guide_legend(order=2, direction="horizontal", title="genes (order: gene names)"))
+p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="proteins (order: protein names)", nrow=5), color = guide_legend(order=2, direction="horizontal", title="genes (order: gene names)"),nrow=3)
 		   
 # p7 = p6 + theme(legend.position = "bottom") # modify the legend position
 p8 = p6bis + ylim(-0.06,1.2) # modify the scale
-p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice, angle = 0)) # add indice to the grapĥ
+p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice, angle = 0)) # add indice to the graph
 #p10 = p9 + geom_text(x = 0, y = threshold + 0.01, label = t) # add the threshold text
 #p11 = p10 + geom_line(aes(x = position, y = 0.5), color = "red") 
 p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red")
-p11 = p10 + theme(plot.title = element_text(hjust=0.5),legend.position="bottom") # 
+minus_maxlength_over6 = - max(contig_limits) / 50
+p10bis = p10 + geom_text(aes(x = minus_maxlength_over6, y = 0.07, label = t1, angle = 0)) # add indice to the graph
+p11 = p10bis + theme(plot.title = element_text(hjust=0.5),legend.position="bottom") # 
 
-grid.arrange(cd, p11, nrow=2
- , widths = c(2, 2), heights = c(1.0, 3.0)
-)
-g <- arrangeGrob(cd3, p11)
+g <- plot_grid(cd3, p11, align = "v", nrow = 2, rel_heights = c(1/6, 5/6))
 
-ggsave(outfile, device = "png", plot = g, width = 21, units="cm", height=30, dpi = 600) # save the graph
-# 1 inch = 2.54cm
+ggsave(outfile, device = "png", plot = g, width = 21, units="cm", height=29.7, dpi = 600) # save the graph
 # ~ end of script ~

From 21cddf777f637c333e2728301963a27ff5ceca89 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Mon, 8 Jan 2024 17:08:41 +0100
Subject: [PATCH 16/42] add R packages dependencies

---
 setup.py              | 3 ++-
 src/vvv2_display.yaml | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 94fb47f..cb41480 100644
--- a/setup.py
+++ b/setup.py
@@ -70,7 +70,8 @@
     install_requires=[
         "python>=3.9",
         "r-ggplot2==3.6.6",
-        "r-gridextra=2.3"
+        "r-gridextra==2.3",
+        "r-cowplot==1.1.1",
         "pysam==0.19.1",
         "numpy==1.23.1"],  # Optional
     scripts=[
diff --git a/src/vvv2_display.yaml b/src/vvv2_display.yaml
index b9d51e0..851959e 100644
--- a/src/vvv2_display.yaml
+++ b/src/vvv2_display.yaml
@@ -11,3 +11,4 @@ dependencies:
   - openssl
   - r-ggplot2=3.3.6
   - r-gridextra
+  - r-cowplot

From 9cd863715a63c96fd66afe37f40ad5d98e9542ea Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Mon, 8 Jan 2024 17:18:25 +0100
Subject: [PATCH 17/42] update versions

---
 src/vvv2_display.xml  | 4 ++--
 src/vvv2_display.yaml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/vvv2_display.xml b/src/vvv2_display.xml
index 3368b72..3bd7f9c 100644
--- a/src/vvv2_display.xml
+++ b/src/vvv2_display.xml
@@ -1,10 +1,10 @@
-<tool id="vvv2_display" name="vvv2_display: Display SNP proportions and CDS of an assembly in png image" version="0.1.10" python_template_version="3.9">
+<tool id="vvv2_display" name="vvv2_display: Display SNP proportions and CDS of an assembly in png image" version="0.1.11" python_template_version="3.9">
     <requirements><!--
         only for tools used before, not for vvv2_display itself
         <requirement type="package" version="1.4.1">vadr</requirement>
         <requirement type="package" version="1.8.3">vardict-java</requirement>
       <requirement type="package" version="3.5.0">r-base</requirement> -->
-      <requirement type="package" version="0.1.10">vvv2_display</requirement>	      
+      <requirement type="package" version="0.1.11">vvv2_display</requirement>	      
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
        vvv2_display.py -f '$vadr_fail_annotation' -p '$vadr_pass_annotation' -s '$seqstat' -n '$vardict_vcf' -r '$snp_img' -t '$snp_loc' -u '$snp_loc_summary' -j '$json_annot' -k '$bed_annot' -l '$correct_vcf' -m '$contig_limits'
diff --git a/src/vvv2_display.yaml b/src/vvv2_display.yaml
index 851959e..a145ef4 100644
--- a/src/vvv2_display.yaml
+++ b/src/vvv2_display.yaml
@@ -10,5 +10,5 @@ dependencies:
   - ca-certificates
   - openssl
   - r-ggplot2=3.3.6
-  - r-gridextra
-  - r-cowplot
+  - r-gridextra=2.3
+  - r-cowplot=1.1.1

From bed3964a60bc8df3e08499c214c04d0e24db97ac Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Mon, 8 Jan 2024 17:47:13 +0100
Subject: [PATCH 18/42] adjust text size relatively

---
 src/visualize_snp_v4.R | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index 0ba06f5..a1d1984 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -187,16 +187,20 @@ p5 = p3bis + xlab("Base Position") # add x axe title
 p6 = p5 + ylab("Variant Frequency") # add axes and graph titles
 
 p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="proteins (order: protein names)", nrow=5), color = guide_legend(order=2, direction="horizontal", title="genes (order: gene names)"),nrow=3)
-		   
-# p7 = p6 + theme(legend.position = "bottom") # modify the legend position
-p8 = p6bis + ylim(-0.06,1.2) # modify the scale
+
+# for legend text size
+# p7 = p6bis + theme(legend.position = "bottom") # modify the legend position
+p7 = p6bis + theme(legend.text = element_text(size=rel(0.5)), legend.title=element_text(size=rel(0.8))) # modify the legend position
+
+
+p8 = p7 + ylim(-0.06,1.2) # modify the scale
 p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice, angle = 0)) # add indice to the graph
 #p10 = p9 + geom_text(x = 0, y = threshold + 0.01, label = t) # add the threshold text
 #p11 = p10 + geom_line(aes(x = position, y = 0.5), color = "red") 
 p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red")
 minus_maxlength_over6 = - max(contig_limits) / 50
 p10bis = p10 + geom_text(aes(x = minus_maxlength_over6, y = 0.07, label = t1, angle = 0)) # add indice to the graph
-p11 = p10bis + theme(plot.title = element_text(hjust=0.5),legend.position="bottom") # 
+p11 = p10bis + theme(plot.title = element_text(hjust=0.5),legend.position="bottom") 
 
 g <- plot_grid(cd3, p11, align = "v", nrow = 2, rel_heights = c(1/6, 5/6))
 

From 0677297cdf64396fabcd8ca8a00fd540243ee232 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 9 Jan 2024 22:04:27 +0100
Subject: [PATCH 19/42] add dependencies to r-ggh4x

---
 setup.py              | 1 +
 src/vvv2_display.yaml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/setup.py b/setup.py
index cb41480..86cf694 100644
--- a/setup.py
+++ b/setup.py
@@ -72,6 +72,7 @@
         "r-ggplot2==3.6.6",
         "r-gridextra==2.3",
         "r-cowplot==1.1.1",
+        "conda-forge::r-ggh4x",
         "pysam==0.19.1",
         "numpy==1.23.1"],  # Optional
     scripts=[
diff --git a/src/vvv2_display.yaml b/src/vvv2_display.yaml
index a145ef4..1500d2c 100644
--- a/src/vvv2_display.yaml
+++ b/src/vvv2_display.yaml
@@ -12,3 +12,4 @@ dependencies:
   - r-ggplot2=3.3.6
   - r-gridextra=2.3
   - r-cowplot=1.1.1
+  - conda-forge::r-ggh4x

From 6bc413bf755a9b7820b8cce77ce2bc3e4d806959 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 9 Jan 2024 22:05:04 +0100
Subject: [PATCH 20/42] light cleaning of code, start to use ggh4x, to confirm
 on not

---
 src/visualize_snp_v4.R | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index a1d1984..583054a 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -18,6 +18,7 @@
 library(ggplot2) # import the ggplot2 library
 library(gridExtra) # multi graph on the same figure
 library(cowplot)
+library(ggh4x) # to adjust legend width to the entire plot (and not go further hopefully)
 
 args <- commandArgs(TRUE) # all arguments are character types
 
@@ -186,11 +187,25 @@ if(! is.null(contig_limits)){ # means more than 1 contig
 p5 = p3bis + xlab("Base Position") # add x axe title
 p6 = p5 + ylab("Variant Frequency") # add axes and graph titles
 
-p6bis = p6 +guides(shape = guide_legend(order=1, direction="vertical", title="proteins (order: protein names)", nrow=5), color = guide_legend(order=2, direction="horizontal", title="genes (order: gene names)"),nrow=3)
-
-# for legend text size
-# p7 = p6bis + theme(legend.position = "bottom") # modify the legend position
-p7 = p6bis + theme(legend.text = element_text(size=rel(0.5)), legend.title=element_text(size=rel(0.8))) # modify the legend position
+p6bis = p6 +guides( shape = guide_legend(order=1, 
+                                        direction="vertical", 
+                                        title="proteins (order: protein names)", 
+                                        nrow=5
+                                        ), 
+                    color = guide_legend(order=2, 
+                                          direction="horizontal", 
+                                          title="genes (order: gene names)"),
+                                          nrow=3
+                                          )
+
+# modify the legend text sizes end position
+p7 = p6bis + theme( plot.title = element_text(hjust=0.5),
+                    legend.position="bottom",
+                    # legend.spacing.x=unit(1,"cm"),
+                    legend.text = element_text(size=rel(0.5)), 
+                    legend.title=element_text(size=rel(0.8)),
+                    
+                    ) # modify the legend position
 
 
 p8 = p7 + ylim(-0.06,1.2) # modify the scale
@@ -199,8 +214,7 @@ p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice
 #p11 = p10 + geom_line(aes(x = position, y = 0.5), color = "red") 
 p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red")
 minus_maxlength_over6 = - max(contig_limits) / 50
-p10bis = p10 + geom_text(aes(x = minus_maxlength_over6, y = 0.07, label = t1, angle = 0)) # add indice to the graph
-p11 = p10bis + theme(plot.title = element_text(hjust=0.5),legend.position="bottom") 
+p11 = p10 + geom_text(aes(x = minus_maxlength_over6, y = 0.07, label = t1, angle = 0)) # add indice to the graph
 
 g <- plot_grid(cd3, p11, align = "v", nrow = 2, rel_heights = c(1/6, 5/6))
 

From 37c8691589880b456e4db97ff984af3b510dd89e Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 10 Jan 2024 15:07:52 +0100
Subject: [PATCH 21/42] update R dependencies

---
 setup.py              | 2 +-
 src/vvv2_display.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 86cf694..c1dd236 100644
--- a/setup.py
+++ b/setup.py
@@ -72,7 +72,7 @@
         "r-ggplot2==3.6.6",
         "r-gridextra==2.3",
         "r-cowplot==1.1.1",
-        "conda-forge::r-ggh4x",
+        "conda-forge::r-stringr=1.5.1",
         "pysam==0.19.1",
         "numpy==1.23.1"],  # Optional
     scripts=[
diff --git a/src/vvv2_display.yaml b/src/vvv2_display.yaml
index 1500d2c..2cfd83f 100644
--- a/src/vvv2_display.yaml
+++ b/src/vvv2_display.yaml
@@ -12,4 +12,4 @@ dependencies:
   - r-ggplot2=3.3.6
   - r-gridextra=2.3
   - r-cowplot=1.1.1
-  - conda-forge::r-ggh4x
+  - conda-forge::r-stringr=1.5.1

From 1eeee767fc80267c72e274ed6f76441d382e14c7 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 10 Jan 2024 15:08:59 +0100
Subject: [PATCH 22/42] suppress similarity description in protein legends,
 better placement of legends

---
 src/visualize_snp_v4.R | 40 ++++++++++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index 583054a..bb4ebff 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -18,7 +18,8 @@
 library(ggplot2) # import the ggplot2 library
 library(gridExtra) # multi graph on the same figure
 library(cowplot)
-library(ggh4x) # to adjust legend width to the entire plot (and not go further hopefully)
+library(stringr)
+# library(ggh4x) # to adjust legend width to the entire plot (and not go further hopefully)
 
 args <- commandArgs(TRUE) # all arguments are character types
 
@@ -39,6 +40,8 @@ coverage_depth <- try( read.table(args[5], h=F, sep = "\t") ) # read the datafra
 if(inherits(coverage_depth,"try-error"))
   coverage_depth <- NULL
 
+width <- unit(21, "cm")
+
 t1 = as.character(threshold) # define threshold as a character
 # t = paste("Nucleotide Variation - threshold = ", t1, sep = ' ')
 
@@ -141,6 +144,25 @@ gene_id_labels = unique(density$gene_id)
 density$gene_id_num = paste(  sprintf("%02d", match(density$gene_id, gene_id_labels) ), ":", density$gene_id) 
 
 protein_id_labels = unique(density$protein_id)
+
+# truncate long legend
+replacement <- function(x){
+  replaced = str_replace_all(x, ":[[:alnum:] \\-]+,","")
+  replaced = str_replace_all(replaced, ":[[:alnum:] \\-]+:",":")
+  replaced = str_replace_all(replaced, ":[[:alnum:] \\-]+$","")
+  return( replaced )
+}
+
+# protein_id_labels = lapply(protein_id_labels, FUN = function(x) str_replace_all(x, ":[A-Za-z0-9 ]+,",""))
+protein_id_labels = lapply(protein_id_labels, replacement)
+print("protein_id_labels shortened:")
+print(protein_id_labels)
+
+density$protein_id = lapply(density$protein_id, replacement)
+print("protein_id shortened:")
+print(density$protein_id)
+
+
 density$protein_id_num = paste(  sprintf("%03d", match(density$protein_id, protein_id_labels) ), ":",  density$protein_id) 
 
 # protein_id_ordered = fct_reorder(protein_id_labels, density$protein_id_num)
@@ -170,7 +192,7 @@ p3 = p2 + geom_line(aes(x=position, y=threshold)) # add the threshold line
 
 p3bis = p3			   
 if(! is.null(contig_limits)){ # means more than 1 contig
-    for(i in contig_limits){
+  for(i in contig_limits){
         p3bis = p3bis + geom_vline(xintercept=i,linetype="dotted") # add the contig limits (vertical dotted line)
 	}
 }
@@ -187,13 +209,13 @@ if(! is.null(contig_limits)){ # means more than 1 contig
 p5 = p3bis + xlab("Base Position") # add x axe title
 p6 = p5 + ylab("Variant Frequency") # add axes and graph titles
 
-p6bis = p6 +guides( shape = guide_legend(order=1, 
+p6bis = p6 +guides( shape = guide_legend(order=2, 
                                         direction="vertical", 
                                         title="proteins (order: protein names)", 
-                                        nrow=5
+                                        nrow=15
                                         ), 
-                    color = guide_legend(order=2, 
-                                          direction="horizontal", 
+                    color = guide_legend(order=1, 
+                                          direction="vertical", 
                                           title="genes (order: gene names)"),
                                           nrow=3
                                           )
@@ -201,12 +223,10 @@ p6bis = p6 +guides( shape = guide_legend(order=1,
 # modify the legend text sizes end position
 p7 = p6bis + theme( plot.title = element_text(hjust=0.5),
                     legend.position="bottom",
-                    # legend.spacing.x=unit(1,"cm"),
                     legend.text = element_text(size=rel(0.5)), 
                     legend.title=element_text(size=rel(0.8)),
                     
-                    ) # modify the legend position
-
+                    ) # modify the legend position 
 
 p8 = p7 + ylim(-0.06,1.2) # modify the scale
 p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice, angle = 0)) # add indice to the graph
@@ -218,5 +238,5 @@ p11 = p10 + geom_text(aes(x = minus_maxlength_over6, y = 0.07, label = t1, angle
 
 g <- plot_grid(cd3, p11, align = "v", nrow = 2, rel_heights = c(1/6, 5/6))
 
-ggsave(outfile, device = "png", plot = g, width = 21, units="cm", height=29.7, dpi = 600) # save the graph
+ggsave(outfile, device = "png", plot = g, width = width, units="cm", height=29.7, dpi = 600) # save the graph
 # ~ end of script ~

From 5fa688b1b82f35b1fce5374b7482e9b957172bcd Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 10 Jan 2024 17:54:17 +0100
Subject: [PATCH 23/42] correct graph alignment, remove useless annot, increase
 slighly legend text size, set percentage for variants, better display
 treshold on y axis

---
 src/visualize_snp_v4.R | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index bb4ebff..8c00113 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -1,6 +1,6 @@
 #!/usr/bin/env Rscript
 #
-# FT; last modification December 19th 2023
+# FT; last modification January 10th 2024
 # AF; last modification February 16th 2019
 #
 # Description:  This script has been written in order to generate a graph
@@ -147,9 +147,11 @@ protein_id_labels = unique(density$protein_id)
 
 # truncate long legend
 replacement <- function(x){
-  replaced = str_replace_all(x, ":[[:alnum:] \\-]+,","")
-  replaced = str_replace_all(replaced, ":[[:alnum:] \\-]+:",":")
-  replaced = str_replace_all(replaced, ":[[:alnum:] \\-]+$","")
+  replaced = str_replace_all(x, ":[[:alnum:] \\-]+,",",")            # replace :...., by ,
+  replaced = str_replace_all(replaced, ":[[:alnum:] \\-]+:",":")     # replace :....: by :
+  replaced = str_replace_all(replaced, ":[[:alnum:] \\-]+$","")      # remove last :....
+  replaced = str_replace_all(replaced, "\\[[[:alnum:] \\-]+\\]","")  # remove [...]
+  replaced = str_replace_all(replaced, " (?:putative|growth) [[:alnum:] \\-]+","")  # remove useless annotations
   return( replaced )
 }
 
@@ -212,7 +214,7 @@ p6 = p5 + ylab("Variant Frequency") # add axes and graph titles
 p6bis = p6 +guides( shape = guide_legend(order=2, 
                                         direction="vertical", 
                                         title="proteins (order: protein names)", 
-                                        nrow=15
+                                        nrow=16
                                         ), 
                     color = guide_legend(order=1, 
                                           direction="vertical", 
@@ -223,19 +225,21 @@ p6bis = p6 +guides( shape = guide_legend(order=2,
 # modify the legend text sizes end position
 p7 = p6bis + theme( plot.title = element_text(hjust=0.5),
                     legend.position="bottom",
-                    legend.text = element_text(size=rel(0.5)), 
+                    legend.text = element_text(size=rel(0.6)), 
                     legend.title=element_text(size=rel(0.8)),
                     
                     ) # modify the legend position 
 
-p8 = p7 + ylim(-0.06,1.2) # modify the scale
-p9 = p8 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice, angle = 0)) # add indice to the graph
+p9 = p7 + geom_text(aes(x = position, y = variant_percent + 0.03, label = indice, angle = 0)) # add indice to the graph
 #p10 = p9 + geom_text(x = 0, y = threshold + 0.01, label = t) # add the threshold text
 #p11 = p10 + geom_line(aes(x = position, y = 0.5), color = "red") 
 p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red")
 minus_maxlength_over6 = - max(contig_limits) / 50
-p11 = p10 + geom_text(aes(x = minus_maxlength_over6, y = 0.07, label = t1, angle = 0)) # add indice to the graph
 
+# give scale and breaks of the y axis
+p11 = p10 + scale_y_continuous(limits=c(-0.06,1.1), labels = scales::percent, breaks=c(0.0, 0.07, 0.2, 0.4, 0.6, 0.8, 1.0))
+
+# gives proportions for covdepth graph (cd3) and variant graph (p11) in the grid plot
 g <- plot_grid(cd3, p11, align = "v", nrow = 2, rel_heights = c(1/6, 5/6))
 
 ggsave(outfile, device = "png", plot = g, width = width, units="cm", height=29.7, dpi = 600) # save the graph

From f4d9827130c2b039830784d48b16dfffb5918c48 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 10 Jan 2024 17:58:10 +0100
Subject: [PATCH 24/42] remove space after 'growth' when removing useless
 annotation

---
 src/visualize_snp_v4.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index 8c00113..0cc8ed6 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -151,7 +151,7 @@ replacement <- function(x){
   replaced = str_replace_all(replaced, ":[[:alnum:] \\-]+:",":")     # replace :....: by :
   replaced = str_replace_all(replaced, ":[[:alnum:] \\-]+$","")      # remove last :....
   replaced = str_replace_all(replaced, "\\[[[:alnum:] \\-]+\\]","")  # remove [...]
-  replaced = str_replace_all(replaced, " (?:putative|growth) [[:alnum:] \\-]+","")  # remove useless annotations
+  replaced = str_replace_all(replaced, " (?:putative|growth)[[:alnum:] \\-]+","")  # remove useless annotations
   return( replaced )
 }
 

From 39ef2df45a9f67c3ad2b78fa0e137fac9cf59fb7 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Thu, 11 Jan 2024 14:49:46 +0100
Subject: [PATCH 25/42] changes 'non translated RNA' by 'untranslated RNA'

---
 src/convert_vcffile_to_readablefile2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/convert_vcffile_to_readablefile2.py b/src/convert_vcffile_to_readablefile2.py
index 1ff8f2a..1539acd 100755
--- a/src/convert_vcffile_to_readablefile2.py
+++ b/src/convert_vcffile_to_readablefile2.py
@@ -96,7 +96,7 @@ def find_key_proteins(dico_json, genomepos, gene_res):
     """ This function retrieves the protein name and the start and end position of
 this protein.
     """
-    protein = 'non translated RNA'
+    protein = 'untranslated RNA'
     base_inf_allproteins = ''
     base_sup_allproteins = ''
     # print("find_key_proteins call pos ("+str(genomepos)+", "+gene_res+")")    
@@ -112,7 +112,7 @@ def find_key_proteins(dico_json, genomepos, gene_res):
                         protein = 'intergene'
                         base_inf_allproteins = str(base_inf)
                         base_sup_allproteins = str(base_sup)
-                    elif protein == 'non translated RNA':
+                    elif protein == 'untranslated RNA':
                         protein = key
                         base_inf_allproteins = str(base_inf)
                         base_sup_allproteins = str(base_sup)

From cb573930cc036d127ef38cd8c075c0243a1f247a Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Thu, 11 Jan 2024 14:50:24 +0100
Subject: [PATCH 26/42] minor correction in legend titles

---
 src/visualize_snp_v4.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index 0cc8ed6..3959b4b 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -213,12 +213,12 @@ p6 = p5 + ylab("Variant Frequency") # add axes and graph titles
 
 p6bis = p6 +guides( shape = guide_legend(order=2, 
                                         direction="vertical", 
-                                        title="proteins (order: protein names)", 
+                                        title="proteins (order: protein positions)", 
                                         nrow=16
                                         ), 
                     color = guide_legend(order=1, 
                                           direction="vertical", 
-                                          title="genes (order: gene names)"),
+                                          title="genes (order: gene positions)"),
                                           nrow=3
                                           )
 

From 2ba73d447c7d6543df4836750dabf7423742e7cf Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Mon, 29 Jan 2024 16:51:37 +0100
Subject: [PATCH 27/42] correct argument usage depending on using covdepth or
 not. Adapt dsplay to using coveragedepth graph or not

---
 src/visualize_snp_v4.R | 55 +++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index 3959b4b..64d3a73 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -1,6 +1,6 @@
 #!/usr/bin/env Rscript
 #
-# FT; last modification January 10th 2024
+# FT; last modification January 29th 2024
 # AF; last modification February 16th 2019
 #
 # Description:  This script has been written in order to generate a graph
@@ -35,33 +35,44 @@ threshold = as.numeric(args[3]) # define threshold
 
 outfile = args[4]
 
-# to prepare coverage depth graph above variant/annotation graph
-coverage_depth <- try( read.table(args[5], h=F, sep = "\t") ) # read the dataframe
-if(inherits(coverage_depth,"try-error"))
-  coverage_depth <- NULL
+if( length(args) > 4 )
+{
+  b_covdepth <- TRUE
+  # to prepare coverage depth graph above variant/annotation graph
+  coverage_depth <- try( read.table(args[5], h=F, sep = "\t") ) # read the dataframe
+  if(inherits(coverage_depth,"try-error"))
+    coverage_depth <- NULL
+}else
+{ 
+  b_covdepth <- FALSE 
+}
 
 width <- unit(21, "cm")
 
 t1 = as.character(threshold) # define threshold as a character
 # t = paste("Nucleotide Variation - threshold = ", t1, sep = ' ')
 
-if(! is.null(coverage_depth)){ # means covdepth provided, we prepare var for graph
-  maxi = max(coverage_depth$V2)
-  mini = min(coverage_depth$V2)
-  med = median(coverage_depth$V2)
-  m = paste("Coverage - median_coverage =", med, "[", mini, ":", maxi, "]", sep = " ")
-  options(repr.plot.width = 5, repr.plot.height =1)
-  cd = ggplot(coverage_depth, aes(x = V1, y = V2)) + geom_line(color = "black", linewidth = 0.5)
-  #  + labs(list(title = m, x = "Base Position", y = "Number of Reads")
-  cd1 = cd + labs(title = m) # add graph title
-  cd2 = cd1 + xlab("") # add x axe title
-  cd3 = cd2 + ylab("Number of reads") # add axes and graph titles 
+if(b_covdepth){
+
+  if(! is.null(coverage_depth)){ # means covdepth provided, we prepare var for graph
+    maxi = max(coverage_depth$V2)
+    mini = min(coverage_depth$V2)
+    med = median(coverage_depth$V2)
+    m = paste("Coverage - median_coverage =", med, "[", mini, ":", maxi, "]", sep = " ")
+    options(repr.plot.width = 5, repr.plot.height =1)
+    cd = ggplot(coverage_depth, aes(x = V1, y = V2)) + geom_line(color = "black", linewidth = 0.5)
+    #  + labs(list(title = m, x = "Base Position", y = "Number of Reads")
+    cd1 = cd + labs(title = m) # add graph title
+    cd2 = cd1 + xlab("") # add x axe title
+    cd3 = cd2 + ylab("Number of reads") # add axes and graph titles 
+  }
 }
 
 options(repr.plot.width = 5, repr.plot.height =5) 
 if(is.null(density)){ # means no snp found
     p = ggplot() # plot initialization
-}else{
+}else
+{
     p = ggplot(density) # plot initialization
 }
 
@@ -239,8 +250,14 @@ minus_maxlength_over6 = - max(contig_limits) / 50
 # give scale and breaks of the y axis
 p11 = p10 + scale_y_continuous(limits=c(-0.06,1.1), labels = scales::percent, breaks=c(0.0, 0.07, 0.2, 0.4, 0.6, 0.8, 1.0))
 
-# gives proportions for covdepth graph (cd3) and variant graph (p11) in the grid plot
-g <- plot_grid(cd3, p11, align = "v", nrow = 2, rel_heights = c(1/6, 5/6))
+if( b_covdepth ){
+  # gives proportions for covdepth graph (cd3) and variant graph (p11) in the grid plot
+  g <- plot_grid(cd3, p11, align = "v", nrow = 2, rel_heights = c(1/6, 5/6))
+}else
+{
+  # gives proportions for covdepth graph (cd3) and variant graph (p11) in the grid plot
+  g <- plot(p11)
+}
 
 ggsave(outfile, device = "png", plot = g, width = width, units="cm", height=29.7, dpi = 600) # save the graph
 # ~ end of script ~

From 56a8b06980c64f860c2d27438864e47d847ef0e1 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Mon, 29 Jan 2024 16:52:48 +0100
Subject: [PATCH 28/42] inactivate option parsing if test activated, adapt
 script launch depending on using coervage depth info or not

---
 src/vvv2_display.py | 120 +++++++++++++++++++++++++++-----------------
 1 file changed, 73 insertions(+), 47 deletions(-)

diff --git a/src/vvv2_display.py b/src/vvv2_display.py
index e2a6b5c..2ee6d53 100755
--- a/src/vvv2_display.py
+++ b/src/vvv2_display.py
@@ -21,7 +21,9 @@
 # vvv2_display script: from vardict (variant calling) and vadr (annotator) results,
 # creates a picture of variants alongside the detected viral genome
 ###
-import argparse, os, sys, warnings, re
+import argparse, os, sys, warnings, re, subprocess
+# hashlib: no, needs python 3.11 to have sha256 on files, trigger installation problème
+# we use ssytem sha256sum function instead
 from os import path
 import subprocess
 
@@ -37,7 +39,7 @@ def __main__():
     b_test_convert_tbl2json                = False # ok 2022 04 26 complete tc,
     b_test_correct_multicontig_vardict_vcf = False # ok 2022 04 29 partial tc
     b_test_convert_vcffile_to_readable     = False # ok 2022 04 28 complete tc,
-    b_test_correct_covdepth_f              = True # 2023 12 20 tc,
+    b_test_correct_covdepth_f              = False # ok 2024 01 20 tc,
     b_test_visualize_snp_v4                = False # ok 2022 04 28 complete tc,
     b_test = False
     dir_path = os.path.dirname(os.path.abspath(__file__)) # dir of current script
@@ -156,6 +158,7 @@ def __main__():
 
     # get absolute path in case of files
     args = parser.parse_args()
+    print(prog_tag + " arguments obtained, checking... line "+str(frame.f_lineno))
 
     # -------------------------------------------
     # check arguments
@@ -198,50 +201,67 @@ def __main__():
               " arguments, exit line "+str(frame.f_lineno))
         sys.exit(0)
 
-    # print('args:', args)
-    if args.pass_annot_f is not None:
-        pass_annot_f = os.path.abspath(args.pass_annot_f)
-    elif(not b_test):
-        sys.exit("[Error] You must provide pass_annot_f")
-    if args.fail_annot_f is not None:
-        fail_annot_f = os.path.abspath(args.fail_annot_f)
-    elif(not b_test):
-        sys.exit("[Error] You must provide fail_annot_f")
-    if args.seq_stat_f is not None:
-        seq_stat_f = os.path.abspath(args.seq_stat_f)
-    elif(not b_test):
-        sys.exit("[Error] You must provide seq_stat_f")
-    if args.vardict_vcf_f is not None:
-        vardict_vcf_f = os.path.abspath(args.vardict_vcf_f)
-    elif(not b_test):
-        sys.exit("[Error] You must provide vcf_f")
-    if args.png_var_f is not None:
-        png_var_f = os.path.abspath(args.png_var_f)
-    elif(not b_test):
-        sys.exit("[Error] You must provide png_var_f name for output")
-    if args.snp_loc_f is not None:
-        snp_loc_f = os.path.abspath(args.snp_loc_f)
-    if args.snp_loc_summary_f is not None:
-        snp_loc_summary_f = os.path.abspath(args.snp_loc_summary_f)
-    if args.cov_depth_f is not None:
-        cov_depth_f = os.path.abspath(args.cov_depth_f)
-        b_cov_depth_display = True
-    # ----------------------------------------------------------------
-    # optional arguments only for Galaxy compatibility
-    if args.json_annot_f is not None:
-        json_annot_f = os.path.abspath(args.json_annot_f)
-    if args.bed_vardict_annot_f is not None:
-        bed_vardict_annot_f = os.path.abspath(args.bed_vardict_annot_f)
-    if args.correct_vcf_f is not None:
-        correct_vcf_f = os.path.abspath(args.correct_vcf_f)
-    if args.contig_limits_f is not None:
-        contig_limits_f = os.path.abspath(args.contig_limits_f)
-    if args.cov_depth_corr_f is not None:
-        cov_depth_corr_f = os.path.abspath(args.cov_depth_corr_f)
-    # ----------------------------------------------------------------
-    
-    if args.b_verbose is not None:
-        b_verbose = args.b_verbose
+    if not b_test_vvv2_display:
+        # print('args:', args)
+        if args.pass_annot_f is not None:
+            pass_annot_f = os.path.abspath(args.pass_annot_f)
+        elif(not b_test):
+            sys.exit("[Error] You must provide pass_annot_f")
+        if args.fail_annot_f is not None:
+            fail_annot_f = os.path.abspath(args.fail_annot_f)
+        elif(not b_test):
+            sys.exit("[Error] You must provide fail_annot_f")
+        if args.seq_stat_f is not None:
+            seq_stat_f = os.path.abspath(args.seq_stat_f)
+        elif(not b_test):
+            sys.exit("[Error] You must provide seq_stat_f")
+        if args.vardict_vcf_f is not None:
+            vardict_vcf_f = os.path.abspath(args.vardict_vcf_f)
+        elif(not b_test):
+            sys.exit("[Error] You must provide vcf_f")
+        if args.png_var_f is not None:
+            png_var_f = os.path.abspath(args.png_var_f)
+        elif(not b_test):
+            sys.exit("[Error] You must provide png_var_f name for output")
+        if args.snp_loc_f is not None:
+            snp_loc_f = os.path.abspath(args.snp_loc_f)
+        if args.snp_loc_summary_f is not None:
+            snp_loc_summary_f = os.path.abspath(args.snp_loc_summary_f)
+                   
+        if( (args.cov_depth_f is not None) and (os.path.isfile(args.cov_depth_f)) ):
+            cov_depth_f = os.path.abspath(args.cov_depth_f)
+            b_cov_depth_display = True
+        else:
+            b_cov_depth_display = False
+        # ----------------------------------------------------------------
+        # optional arguments only for Galaxy compatibility
+        if args.json_annot_f is not None:
+            json_annot_f = os.path.abspath(args.json_annot_f)
+        if args.bed_vardict_annot_f is not None:
+            bed_vardict_annot_f = os.path.abspath(args.bed_vardict_annot_f)
+        if args.correct_vcf_f is not None:
+            correct_vcf_f = os.path.abspath(args.correct_vcf_f)
+
+
+        if args.contig_limits_f is None:
+            # if name of contig_limits file is not provided by user, deduce a name file with a part
+            # deduced from pass_annot_f using sha256 checksum
+            # to avoid bad interferences of serveral vvv2_display runs results  
+            
+            cmd = "/usr/bin/sha256sum "+pass_annot_f
+            # print(prog_tag + " cmd:"+cmd)
+            sha256_f = subprocess.getoutput(cmd).split()[0]
+            contig_limits_f = str(sha256_f) + '_contig_limits.txt'
+            # print(prog_tag + " contig_limits file name created:"+ contig_limits_f)
+        else:
+            contig_limits_f = os.path.abspath(args.contig_limits_f)
+        if args.cov_depth_corr_f is not None:
+            cov_depth_corr_f = os.path.abspath(args.cov_depth_corr_f)
+        # ----------------------------------------------------------------
+        
+        if args.b_verbose is not None:
+            b_verbose = args.b_verbose
+        print(prog_tag + " arguments checked... line "+str(frame.f_lineno))
 
 
     # ------------------------------------------------------------------
@@ -373,7 +393,7 @@ def __main__():
        correct_vcf_f = vardict_vcf_f
 #       correct_vcf_f = correct_vcf_f.replace('vardict.vcf', '_correct.vcf')
        correct_vcf_f = re.sub('\.[^\.]+$', '_correct.vcf', correct_vcf_f)
-       
+         
     p_script = PYTHON_SCRIPTS + "correct_multicontig_vardict_vcf.py"
     print("p_script:" + p_script)
     cmd = ' '.join([p_script,
@@ -391,6 +411,7 @@ def __main__():
         print(prog_tag + " [test_correct_multicontig_vardict_vcf] END")
         sys.exit()
     else:
+        print(prog_tag + " cmd:" + cmd)
         os.system(cmd)
 
     # ------------------------------------------------------------------
@@ -444,6 +465,7 @@ def __main__():
 
     # creates corrected cov depth file
     if b_cov_depth_display:
+        print("b_cov_depth_display:"+str(b_cov_depth_display)+ ", line " + str(frame.f_lineno))
         if b_test_correct_covdepth_f and (not b_test_vvv2_display):
             cov_depth_f = test_dir + "/res_vvv2_covdepth.txt"
             cov_depth_corr_f = test_dir + "/res_vvv2_covdepth_corrected.txt"
@@ -506,6 +528,10 @@ def __main__():
         os.system(cmd)    
     # ------------------------------------------------------------------
     print(png_var_f + " file created")
+    
+    # remove useless file
+    if os.path.isfile(contig_limits_f):
+        os.unlink(contig_limits_f)
 
 ##### MAIN END
 if __name__=="__main__":__main__()

From b9d9bc713cd5b1cd4a2b096f50ff51b07ba107f9 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Mon, 29 Jan 2024 17:40:48 +0100
Subject: [PATCH 29/42] change note for homopolymers by mentioning now
 Iont-Torrent AND Nanopore, not only Ion-Torrent

---
 src/convert_vcffile_to_readablefile2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/convert_vcffile_to_readablefile2.py b/src/convert_vcffile_to_readablefile2.py
index 1539acd..de77118 100755
--- a/src/convert_vcffile_to_readablefile2.py
+++ b/src/convert_vcffile_to_readablefile2.py
@@ -318,7 +318,7 @@ def is_homopolymer(lseq, rseq, ref, alt):
             filout.write(line)
         filout.write("""
 *NB: an homopolymer region is set to 'yes' if there is a succession of at least 3 identical nucleotides.
-     it looks like a restrictive measure, but Ion Torrent sequencing is very bad on such region, so make sure you verify these variants.""")
+     it looks like a restrictive measure, but Ion Torrent and Nanopore sequencing are very bad on such region, so make sure you verify these variants.""")
     print(prog_tag + ' '+ args.out +" file created")
     print(prog_tag + ' '+ args.outs +" file created")
 ## ~ end of script ~ ##

From eb1d1116ae3b641911cbe978cb0c69c85bea9845 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Fri, 9 Feb 2024 11:06:10 +0100
Subject: [PATCH 30/42] correct bug when providing snp_loc_summary_f but not
 snp_loc_f

---
 src/vvv2_display.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vvv2_display.py b/src/vvv2_display.py
index 2ee6d53..d22c09d 100755
--- a/src/vvv2_display.py
+++ b/src/vvv2_display.py
@@ -433,9 +433,9 @@ def __main__():
 
     if(snp_loc_f == ''):
        snp_loc_f = vardict_vcf_f
-       snp_loc_summary_f = vardict_vcf_f
-       # snp_loc_f = snp_loc_f.replace('.vcf', '_snp.txt')
        snp_loc_f = re.sub('\.[^\.]+$', '_snp.txt', snp_loc_f)
+    if(snp_loc_summary_f == ''):   
+       snp_loc_summary_f = vardict_vcf_f
        snp_loc_summary_f = re.sub('\.[^\.]+$', '_snp_summary.txt', snp_loc_summary_f)
       
     # vcf file from vardict

From 382d469850e023c41e5fd34c29fb37fec2ca4264 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 27 Feb 2024 22:19:56 +0100
Subject: [PATCH 31/42] dvpmt to display genes as boxes

---
 src/visualize_snp_v4.R | 112 ++++++++++++++++++++++++++++++-----------
 src/vvv2_display.py    |  17 +++++--
 src/vvv2_display.yaml  |   2 +
 3 files changed, 98 insertions(+), 33 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index 64d3a73..0cb71cf 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -13,13 +13,12 @@
 ################################################################################
 # ~ start of script ~
 
-# install.packages("viridis")
-# library("viridis")
 library(ggplot2) # import the ggplot2 library
 library(gridExtra) # multi graph on the same figure
 library(cowplot)
 library(stringr)
-# library(ggh4x) # to adjust legend width to the entire plot (and not go further hopefully)
+library(jsonlite) # to read json
+b_verbose <- FALSE
 
 args <- commandArgs(TRUE) # all arguments are character types
 
@@ -33,13 +32,18 @@ if(inherits(contig_limits,"try-error"))
   
 threshold = as.numeric(args[3]) # define threshold
 
-outfile = args[4]
+# added 2024 02 27
+json_genes = read_json(args[4]) # json file with genes limits in "genes"->"name" [start,end]
+#str(json_genes$genes)
 
-if( length(args) > 4 )
+
+outfile = args[5]
+
+if( length(args) > 5 )
 {
   b_covdepth <- TRUE
   # to prepare coverage depth graph above variant/annotation graph
-  coverage_depth <- try( read.table(args[5], h=F, sep = "\t") ) # read the dataframe
+  coverage_depth <- try( read.table(args[6], h=F, sep = "\t") ) # read the dataframe
   if(inherits(coverage_depth,"try-error"))
     coverage_depth <- NULL
 }else
@@ -86,21 +90,26 @@ genecols=c('#99CC00','#CC9900','#FFCC33','#FF9900','#FF6600','#FF3300','#CC3300'
 
 # check color redundancy
 genecols_unique = sort(unique(genecols))
-print("max_col_number_unique:")
-print(length(genecols_unique))
+
+if( b_verbose ){
+  print("max_col_number_unique:")
+  print(length(genecols_unique))
+}
 
 if( length(genecols) != length(genecols_unique) ){
     genecols = sort(genecols)
     for( icol in 1:length(genecols_unique) ){
-       print("genecols_unique de ")
-       print(icol)
-       print(":")
-       print(genecols_unique[icol])
-       if( genecols_unique[icol] != genecols[icol] ){
-           redund_vals = genecols[icol]
-    	   print("Color found several times:")
-    	   print(redund_vals)
-    	   stop()
+      if( b_verbose ){
+        print("genecols_unique de ")
+        print(icol)
+        print(":")
+        print(genecols_unique[icol])
+      }
+      if( genecols_unique[icol] != genecols[icol] ){
+        redund_vals = genecols[icol]
+    	  print("Color found several times:")
+    	  print(redund_vals)
+    	  stop()
 	}
     }
     print("first values identical, added values in genecols:")
@@ -166,15 +175,18 @@ replacement <- function(x){
   return( replaced )
 }
 
+
 # protein_id_labels = lapply(protein_id_labels, FUN = function(x) str_replace_all(x, ":[A-Za-z0-9 ]+,",""))
 protein_id_labels = lapply(protein_id_labels, replacement)
-print("protein_id_labels shortened:")
-print(protein_id_labels)
-
+if( b_verbose ){
+  print("protein_id_labels shortened:")
+  print(protein_id_labels)
+}
 density$protein_id = lapply(density$protein_id, replacement)
-print("protein_id shortened:")
-print(density$protein_id)
-
+if( b_verbose ){
+  print("protein_id shortened:")
+  print(density$protein_id)
+}
 
 density$protein_id_num = paste(  sprintf("%03d", match(density$protein_id, protein_id_labels) ), ":",  density$protein_id) 
 
@@ -187,16 +199,60 @@ density$protein_id_num = paste(  sprintf("%03d", match(density$protein_id, prote
 # print("gene_id_num:")
 # print(density$gene_id_num)
 
-print("gene_id_labels:")
-print(gene_id_labels)
+if( b_verbose ){
+  print("gene_id_labels:")
+  print(gene_id_labels)
+}
+
+# add the consensus points, and remove the legend "linewidth" for proteins
+p1 = p + geom_point(aes(x = position, y = -0.05, colour = density$gene_id_num, shape = density$protein_id_num), size = 1, show.legend = F, shape = 15) 
+# add consensus gene boxes 
+p1bis = p1
+
+gnames = names(json_genes$genes)
+y1 = -0.04
+y2 = -0.02
+
+nrange = c(1:length(names))
+df <- data.frame(
+  xmin = lapply(nrange, FUN=function(x) json_genes$genes[[x]][[1]] ), # gene_start
+  xmax = lapply(nrange, FUN=function(x) json_genes$genes[[x]][[2]] ),       # gene_end
+  ymin = lapply(nrange, FUN=function(x) -0.02 * (x %% 2) ),                 # lower box line
+  ymax = lapply(nrange, FUN=function(x) -0.02 * (1 + (x %% 2)) ),           # upper box line
+  color = rep("black", length(names)),
+  fill = rep("white", length(names)))
+
+# p1bis = p1 + geom_rect(data=df,
+#		                    mapping = aes(xmin = df$xmin, xmax = df$xmax,
+#			                  ymin = df$ymin, ymax = df$ymax, color=df$color, fill = df$fill))
+
+# ggplot(df) +
+  # sapply(df, FUN=function(xmin, xmax, ymin, ymax, color, fill) geom_rect(X, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = fill))) 
+      
+
+# for( i in 1:length(json_genes$genes) ){
+
+#   # get the names of the lists in genes, here genes names
+#   gname = gnames[i]
+#   gstart = json_genes$genes[[i]][[1]]
+#   gend = json_genes$genes[[i]][[2]]
+#   if((i %% 2) == 0){ 
+#     y1 = -0.04
+#     y2 = -0.02  
+#   }else{
+#     y1 = -0.02
+#     y2 = 0.0
+#   }
+#   print(c('gene no:', i, gname, gstart, gend, y1, y2))
+#   p1bis = p1bis + geom_rect(aes(xmin = gstart, xmax = gend, ymin = y1, ymax = y2), color = "black", fill = "white", show.legend = F) 
+# } 
 
-p1 = p + geom_point(aes(x = position, y = -0.05, colour = density$gene_id_num, shape = density$protein_id_num), size = 1, show.legend = F, shape = 15) # add the consensus points, and remove the legend "linewidth" for proteins
 
 # needed to have more than 6 symbols for gene_id legend
-p1bis = p1 + scale_shape_manual(values=c(1:25,1:25))
+p1ter = p1bis + scale_shape_manual(values=c(1:25,1:25))
 
 # use color more easily distinguishable
-p1ter = p1bis + scale_color_manual(values=genecols[1:length(gene_id_labels)])
+p1quad = p1ter + scale_color_manual(values=genecols[1:length(gene_id_labels)])
 
 p2 = p1ter + geom_point(aes(x = position, y = as.numeric(variant_percent), color = density$gene_id_num, shape = density$protein_id_num)) # add the variant points
 # p2 = p1ter + geom_point(aes(x = position, y = as.numeric(variant_percent), color = density$gene_id_num, shape = density$gene_id)) # add the variant points
diff --git a/src/vvv2_display.py b/src/vvv2_display.py
index d22c09d..93f1bda 100755
--- a/src/vvv2_display.py
+++ b/src/vvv2_display.py
@@ -43,6 +43,8 @@ def __main__():
     b_test_visualize_snp_v4                = False # ok 2022 04 28 complete tc,
     b_test = False
     dir_path = os.path.dirname(os.path.abspath(__file__)) # dir of current script
+
+    b_verbose = True
     # allow to run tests from everywhere
     
     prog_tag = '[' + os.path.basename(__file__) + ']'
@@ -493,10 +495,11 @@ def __main__():
         # snp_loc_summary_f =  test_dir + "/res2_snp_summary.txt"
         # png_var_f =  test_dir + "/res2_snp.png"
         # CONTIGS
-        snp_loc_f         =  test_dir + "/res_snp.txt"
-        snp_loc_summary_f =  test_dir + "/res_snp_summary.txt"
-        png_var_f         =  test_dir + "/res_snp.png"
-        contig_limits_f   =  test_dir + "/contig_limits.txt"
+        snp_loc_f         = test_dir + "/res_snp.txt"
+        snp_loc_summary_f = test_dir + "/res_snp_summary.txt"
+        json_annot_f      = test_dir + "/res_vadr.json"
+        png_var_f         = test_dir + "/res_snp.png"
+        contig_limits_f   = test_dir + "/contig_limits.txt"
         
     if(png_var_f == ''):
        png_var_f = snp_loc_f
@@ -510,13 +513,17 @@ def __main__():
                 snp_loc_f, 
                 contig_limits_f,
                 threshold,
+                json_annot_f,
                 png_var_f])
 
     # parameter to allow coverage depth display above variants/annotations
     if b_cov_depth_display:
         cmd = cmd + " " + cov_depth_corr_f + " "
     
-    cmd = cmd + " ".join([" < ", r_script, " > /dev/null"])
+    if b_verbose:
+        cmd = cmd + " ".join([" < ", r_script, " > /dev/null"])
+    else:
+        cmd = cmd + " ".join([" < ", r_script])
     print(prog_tag + " cmd:" + cmd)
 
     if b_test_visualize_snp_v4:
diff --git a/src/vvv2_display.yaml b/src/vvv2_display.yaml
index 2cfd83f..167a05a 100644
--- a/src/vvv2_display.yaml
+++ b/src/vvv2_display.yaml
@@ -13,3 +13,5 @@ dependencies:
   - r-gridextra=2.3
   - r-cowplot=1.1.1
   - conda-forge::r-stringr=1.5.1
+  - conda-forge::r-jsonlite=1.8.8
+  

From f206f9ecf16c78e699d21cf4b8a5b39e7cf17b06 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Thu, 29 Feb 2024 14:37:00 +0100
Subject: [PATCH 32/42] add gene boxes bellow abscissia, add gene names in box
 when box space is long enough

---
 src/visualize_snp_v4.R | 116 +++++++++++++++++++++++++++--------------
 1 file changed, 76 insertions(+), 40 deletions(-)

diff --git a/src/visualize_snp_v4.R b/src/visualize_snp_v4.R
index 0cb71cf..5f62d06 100755
--- a/src/visualize_snp_v4.R
+++ b/src/visualize_snp_v4.R
@@ -1,6 +1,6 @@
 #!/usr/bin/env Rscript
 #
-# FT; last modification January 29th 2024
+# FT; last modification February 29th 2024
 # AF; last modification February 16th 2019
 #
 # Description:  This script has been written in order to generate a graph
@@ -205,48 +205,84 @@ if( b_verbose ){
 }
 
 # add the consensus points, and remove the legend "linewidth" for proteins
-p1 = p + geom_point(aes(x = position, y = -0.05, colour = density$gene_id_num, shape = density$protein_id_num), size = 1, show.legend = F, shape = 15) 
+p1 = p + geom_point(aes(x = position, y = -0.1, colour = density$gene_id_num, shape = density$protein_id_num), size = 1, show.legend = F, shape = 15) 
 # add consensus gene boxes 
 p1bis = p1
 
 gnames = names(json_genes$genes)
-y1 = -0.04
-y2 = -0.02
-
-nrange = c(1:length(names))
-df <- data.frame(
-  xmin = lapply(nrange, FUN=function(x) json_genes$genes[[x]][[1]] ), # gene_start
-  xmax = lapply(nrange, FUN=function(x) json_genes$genes[[x]][[2]] ),       # gene_end
-  ymin = lapply(nrange, FUN=function(x) -0.02 * (x %% 2) ),                 # lower box line
-  ymax = lapply(nrange, FUN=function(x) -0.02 * (1 + (x %% 2)) ),           # upper box line
-  color = rep("black", length(names)),
-  fill = rep("white", length(names)))
-
-# p1bis = p1 + geom_rect(data=df,
-#		                    mapping = aes(xmin = df$xmin, xmax = df$xmax,
-#			                  ymin = df$ymin, ymax = df$ymax, color=df$color, fill = df$fill))
-
-# ggplot(df) +
-  # sapply(df, FUN=function(xmin, xmax, ymin, ymax, color, fill) geom_rect(X, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = fill))) 
-      
-
-# for( i in 1:length(json_genes$genes) ){
-
-#   # get the names of the lists in genes, here genes names
-#   gname = gnames[i]
-#   gstart = json_genes$genes[[i]][[1]]
-#   gend = json_genes$genes[[i]][[2]]
-#   if((i %% 2) == 0){ 
-#     y1 = -0.04
-#     y2 = -0.02  
-#   }else{
-#     y1 = -0.02
-#     y2 = 0.0
-#   }
-#   print(c('gene no:', i, gname, gstart, gend, y1, y2))
-#   p1bis = p1bis + geom_rect(aes(xmin = gstart, xmax = gend, ymin = y1, ymax = y2), color = "black", fill = "white", show.legend = F) 
-# } 
-
+ybase  = -0.04
+yshift = -0.00
+xshift = -0.03
+# minimal length of gene to consider to display name in gene boxes in the graph (bellow the absissia)
+min_glength = 600
+
+nrange = 1:length(gnames)
+
+xmi=lapply(nrange, FUN=function(x){ json_genes$genes[[x]][[1]] } )
+xmi=as.numeric(unlist(xmi))
+
+xma=lapply(nrange, FUN=function(x){ json_genes$genes[[x]][[2]] } )
+xma=as.numeric(unlist(xma))
+
+# average x to write text into
+xmm=lapply(nrange, FUN=function(x){ (xma[x] + xmi[x]) / 2 + xshift } )
+xmm=as.numeric(unlist(xmm))
+
+ymi=lapply(nrange, FUN=function(x){ ybase * (x %% 2) + yshift  } )
+ymi=as.numeric(unlist(ymi)) # c(-0.04, -0.04,-0.04, -0.04,-0.04, -0.04,-0.04, -0.04,-0.04, -0.04,-0.04, -0.04,-0.04)
+
+yma=lapply(nrange, FUN=function(x){ ybase * (1 + (x %% 2)) + yshift } ) 
+yma=as.numeric(unlist(yma)) # c(-0.02, -0.02,-0.02, -0.02,-0.02, -0.02,-0.02, -0.02,-0.02, -0.02,-0.02, -0.02,-0.02)
+
+# average y to write text into
+ymm=lapply(nrange, FUN=function(x){ (yma[x] + ymi[x]) / 2 + yshift } )
+ymm=as.numeric(unlist(ymm))
+
+gnames_label=lapply(nrange, FUN=function(x){ 
+                                    glabel = ""
+                                    if( (xma[x] - xmi[x]) > min_glength){
+                                      glabel=gnames[x]
+                                    }
+                                    glabel
+                              } 
+                      )
+# colrect=rep("black", length(gnames))
+# filrect=rep("white", length(gnames))
+
+df=data.frame(xmi,xma,ymi,yma) # ,colrect,filrect)
+
+print(gnames)
+print(gnames_label)
+print(json_genes$genes[[1]][[1]])
+class(json_genes$genes[[1]][[1]])
+print(nrange)
+print(xmi)
+print(xma)
+print(xmm)
+print(ymi)
+print(yma)
+print(ymm)
+# print(colrect)
+# print(filrect)
+
+class(xmi)
+class(ymi)
+# class(colrect)
+
+p1bis = p1 + geom_rect(data=df, mapping=aes(xmin=xmi, 
+                                xmax=xma, 
+                                ymin=ymi, 
+                                ymax=yma), 
+                                # color=colrect, 
+                                # fill=filrect), 
+                        color="black",
+                        fill="white",
+                        show.legend = FALSE,
+                        inherit.aes = FALSE) + geom_text(
+                                                  data = df,
+                                                  aes(x = xmm, y = ymm, label = gnames_label),
+                                                  size = 3, check_overlap = TRUE # vjust = 0, hjust = 0,
+       )
 
 # needed to have more than 6 symbols for gene_id legend
 p1ter = p1bis + scale_shape_manual(values=c(1:25,1:25))
@@ -304,7 +340,7 @@ p10 = p9 + geom_line(aes(x = position, y = 0.5), color = "red")
 minus_maxlength_over6 = - max(contig_limits) / 50
 
 # give scale and breaks of the y axis
-p11 = p10 + scale_y_continuous(limits=c(-0.06,1.1), labels = scales::percent, breaks=c(0.0, 0.07, 0.2, 0.4, 0.6, 0.8, 1.0))
+p11 = p10 + scale_y_continuous(limits=c(-0.1,1.1), labels = scales::percent, breaks=c(0.0, 0.07, 0.2, 0.4, 0.6, 0.8, 1.0))
 
 if( b_covdepth ){
   # gives proportions for covdepth graph (cd3) and variant graph (p11) in the grid plot

From 28267ae8aa5f2ad7a4128260a31dc5737a700374 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 12 Mar 2024 08:56:46 +0100
Subject: [PATCH 33/42] add option to provide cov depth file for using cov
 depth display at the top of the graphical output

---
 src/vvv2_display.xml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/vvv2_display.xml b/src/vvv2_display.xml
index 3bd7f9c..59f3e1f 100644
--- a/src/vvv2_display.xml
+++ b/src/vvv2_display.xml
@@ -1,19 +1,16 @@
-<tool id="vvv2_display" name="vvv2_display: Display SNP proportions and CDS of an assembly in png image" version="0.1.11" python_template_version="3.9">
-    <requirements><!--
-        only for tools used before, not for vvv2_display itself
-        <requirement type="package" version="1.4.1">vadr</requirement>
-        <requirement type="package" version="1.8.3">vardict-java</requirement>
-      <requirement type="package" version="3.5.0">r-base</requirement> -->
-      <requirement type="package" version="0.1.11">vvv2_display</requirement>	      
+<tool id="vvv2_display" name="vvv2_display: Display SNP proportions and CDS of an assembly in png image" version="0.2.0" python_template_version="3.9">
+    <requirements>
+      <requirement type="package" version="0.2.0">vvv2_display</requirement>	      
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-       vvv2_display.py -f '$vadr_fail_annotation' -p '$vadr_pass_annotation' -s '$seqstat' -n '$vardict_vcf' -r '$snp_img' -t '$snp_loc' -u '$snp_loc_summary' -j '$json_annot' -k '$bed_annot' -l '$correct_vcf' -m '$contig_limits'
+       vvv2_display.py -f '$vadr_fail_annotation' -p '$vadr_pass_annotation' -s '$seqstat' -n '$vardict_vcf' -r '$snp_img' -o '$cov_depth' -e '$cov_depth_corr' -t '$snp_loc' -u '$snp_loc_summary' -j '$json_annot' -k '$bed_annot' -l '$correct_vcf' -m '$contig_limits'
     ]]></command>
     <inputs>
         <param type="data" name="vadr_fail_annotation" format="tabular" />
         <param type="data" name="vadr_pass_annotation" format="tabular" />
         <param type="data" name="seqstat" format="txt" />
         <param type="data" name="vardict_vcf" format="vcf" />
+        <param type="data" name="cov_depth" format="txt" />
     </inputs>
     <outputs>
 	<!-- <data name="json_annot" format="txt">
@@ -34,6 +31,7 @@
         <data name="correct_vcf" format="vcf" />
         <data name="contig_limits" format="txt" />
 	<!-- end intermediate output files added for Galaxy compatibility -->		
+        <data name="cov_depth_corr" format="txt" />
     </outputs>
     <tests>
       <test>
@@ -76,6 +74,8 @@ optional arguments:
                         in: vcf variant file provided by vardict
   -r FILE, --png_var_f FILE
                         out: png file with variant proportions and annotations
+  -o FILE, --cov_depth_f FILE
+                        [optional] in: text file of coverage depths (given by samtools depth)
     ]]></help>
     <citations>
         <citation type="bibtex">

From ec70fe5568b1a7cb62a57f36c696dd4fd42bb1c9 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Tue, 12 Mar 2024 08:57:30 +0100
Subject: [PATCH 34/42] update version to 0.2.0, add dependency for json
 parsing in R

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c1dd236..083c7c6 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
 
 setuptools.setup(
     name="vvv2_display",  # Required
-    version="0.1.11",  # Required
+    version="0.2.0",  # Required
     description="Viral Variant Visualizer 2 display",  # Optional
     long_description=long_description,
     long_description_content_type="text/markdown",  # Optional (see note above)
@@ -73,6 +73,7 @@
         "r-gridextra==2.3",
         "r-cowplot==1.1.1",
         "conda-forge::r-stringr=1.5.1",
+        "conda-forge::r-jsonlite=1.8.8",
         "pysam==0.19.1",
         "numpy==1.23.1"],  # Optional
     scripts=[

From 0ee1bbe6ef674fd79aef021251091a97c155156f Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 27 Mar 2024 12:05:04 +0100
Subject: [PATCH 35/42] correct bug missing variants in summary.txt: simplify
 regexp for comment (avoid to fill all possible characters in descriptions)

---
 src/convert_vcffile_to_readablefile2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/convert_vcffile_to_readablefile2.py b/src/convert_vcffile_to_readablefile2.py
index de77118..12b5c71 100755
--- a/src/convert_vcffile_to_readablefile2.py
+++ b/src/convert_vcffile_to_readablefile2.py
@@ -284,7 +284,7 @@ def is_homopolymer(lseq, rseq, ref, alt):
     A = 0 # this flag is used in the write_line function in order to add indices to line where variants are upper than threshold
     with open(args.out, "w") as filout:
         summary_list = []
-        regex = '([0-9]+)\t1\t([A-Z\>\<]+)\t([A-Z\>\<]+)\t([0-9\.]+)\t[A-Z\>\<]+\t[A-Z\>\<]+\t([A-Z0-9a-z\-_\, /]+)\t([A-Z0-9a-z\-_\, /\]\[]*)\t1\t([0-9]+)\t([A-Z]+\t[A-Z]+\t(no|yes))\n'
+        regex = '([0-9]+)\t1\t([A-Z\>\<]+)\t([A-Z\>\<]+)\t([0-9\.]+)\t[A-Z\>\<]+\t[A-Z\>\<]+\t([A-Z0-9a-z\-_\, /]+)\t([^\t]*)\t1\t([0-9]+)\t([A-Z]+\t[A-Z]+\t(no|yes))\n'
         REGEX = re.compile(regex)    
         filout.write("position\tSNP\tref\talt\tvariant_percent\tadd_ref\tadd_alt\tgene_id\tprotein_id\tsize_point\tindice\tlseq\trseq\tisHomo\n")
         a = 0 # flag to find the first genomic region after initialization of i

From 0be1619c44e7cdd48a1a15fe8863a9c6428b1b71 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 27 Mar 2024 13:54:43 +0100
Subject: [PATCH 36/42] verbose mode set to false, update test dates info

---
 src/vvv2_display.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/vvv2_display.py b/src/vvv2_display.py
index 93f1bda..8b8016d 100755
--- a/src/vvv2_display.py
+++ b/src/vvv2_display.py
@@ -38,13 +38,13 @@ def __main__():
     b_test_vvv2_display                    = False # ok 2022 05 05 complet, partial tc
     b_test_convert_tbl2json                = False # ok 2022 04 26 complete tc,
     b_test_correct_multicontig_vardict_vcf = False # ok 2022 04 29 partial tc
-    b_test_convert_vcffile_to_readable     = False # ok 2022 04 28 complete tc,
+    b_test_convert_vcffile_to_readable     = False # ok 2024 03 27 complete tc,
     b_test_correct_covdepth_f              = False # ok 2024 01 20 tc,
-    b_test_visualize_snp_v4                = False # ok 2022 04 28 complete tc,
+    b_test_visualize_snp_v4                = False # ok 2024 03 27 complete tc,
     b_test = False
     dir_path = os.path.dirname(os.path.abspath(__file__)) # dir of current script
 
-    b_verbose = True
+    b_verbose = False
     # allow to run tests from everywhere
     
     prog_tag = '[' + os.path.basename(__file__) + ']'

From 6c16da833f99961b160e737842bf98e85635cfa8 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 27 Mar 2024 14:15:24 +0100
Subject: [PATCH 37/42] update modif log and date

---
 src/convert_vcffile_to_readablefile2.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/convert_vcffile_to_readablefile2.py b/src/convert_vcffile_to_readablefile2.py
index 12b5c71..a53a0cc 100755
--- a/src/convert_vcffile_to_readablefile2.py
+++ b/src/convert_vcffile_to_readablefile2.py
@@ -16,6 +16,7 @@
 # You should have received a copy of the GNU General Public License 
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
+# FT - correct regexp part for description parsing: simplified, now do not miss variant: March 27th 2024
 # FT - correct gene identif/name (position provided instead sometimes): June 15th 2023
 # FT - last modification: September 19th 2022 to replace pyvcf by pysam
 # AF - last modification: August 21st 2018

From 102a4c54ac225f835d07bd946c5e70b08b5b6f5a Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 27 Mar 2024 14:16:12 +0100
Subject: [PATCH 38/42] update dependencies according to version observed in
 current env

---
 setup.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index 083c7c6..7d536f5 100644
--- a/setup.py
+++ b/setup.py
@@ -69,13 +69,13 @@
     include_package_data=True,
     install_requires=[
         "python>=3.9",
-        "r-ggplot2==3.6.6",
-        "r-gridextra==2.3",
-        "r-cowplot==1.1.1",
-        "conda-forge::r-stringr=1.5.1",
-        "conda-forge::r-jsonlite=1.8.8",
+        "r-ggplot2>=3.4.4",
+        "r-gridextra>=2.3",
+        "r-cowplot>=1.1.1",
+        "conda-forge::r-stringr>=1.5.1",
+        "conda-forge::r-jsonlite>=1.8.8",
         "pysam==0.19.1",
-        "numpy==1.23.1"],  # Optional
+        "numpy>=1.23.1"],  # Optional
     scripts=[
         "src/vvv2_display.py",
         "src/convert_tbl2json.py",

From cc7bde58af0a9c35980ecb69b2b12485b96f9307 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 27 Mar 2024 14:22:23 +0100
Subject: [PATCH 39/42] record modifications for version 0.2.0

---
 CHANGELOG | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index c5d5840..1609ca0 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,12 +1,18 @@
+0.2.0:
+  - update dependencies versions
+  - correct bug in PYTHON_SCRIPTS/convert_vcffile_to_readablefile2.py that missed some variants in text summary file
+	- add boxes for genes under graph (to fit traditional display of viral genomes)
+	- add [optional] coverage depth display in final png (if no cov depth provided, part disabled) aligned of variant graph
+	  for positions
 0.1.11:
-        - display prot instead of genes and genes instead of prot
-        - add num in front of genes to ensure to keep apparition order
-        - replace fstrings in vvv2_display.py to allow better compatibility
-        - add src/correct_covdepth_f.py script to convert cov depth files of samtools depth so as to have cumulative positions for display
-        - prepare vvv2_display to coverage depth addition in input
-        - add new script to setup.py file and change version to 0.1.11
+  - display prot instead of genes and genes instead of prot
+  - add num in front of genes to ensure to keep apparition order
+  - replace fstrings in vvv2_display.py to allow better compatibility
+  - add src/correct_covdepth_f.py script to convert cov depth files of samtools depth so as to have cumulative positions for display
+  - prepare vvv2_display to coverage depth addition in input
+  - add new script to setup.py file and change version to 0.1.11
 0.1.10:
-        - now horizontal line colors are for proteins, dot shape is for genes. Added vertical dotted lines to show contig limits
+  - now horizontal line colors are for proteins, dot shape is for genes. Added vertical dotted lines to show contig limits
 	- correction in vadr parsing using different virus models
 	- tested on calicivirus, dengue, flavivirus, coronavirus, sarscov2, hcv, norovirus
 	- better parse stemp_loops

From 622fdbb1a60c1ac7feaa0a8cb9f1a65a087d508e Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 27 Mar 2024 14:27:49 +0100
Subject: [PATCH 40/42] update version requirement, replacing = by >= for most
 of them

---
 src/vvv2_display.yaml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/vvv2_display.yaml b/src/vvv2_display.yaml
index 167a05a..29005e8 100644
--- a/src/vvv2_display.yaml
+++ b/src/vvv2_display.yaml
@@ -6,12 +6,12 @@ channels:
   - r
 dependencies:
   - pysam=0.19.1
-  - numpy=1.23.3
+  - numpy>=1.23.1
   - ca-certificates
   - openssl
-  - r-ggplot2=3.3.6
-  - r-gridextra=2.3
-  - r-cowplot=1.1.1
-  - conda-forge::r-stringr=1.5.1
-  - conda-forge::r-jsonlite=1.8.8
+  - r-ggplot2>=3.4.4
+  - r-gridextra>=2.3
+  - r-cowplot>=1.1.1
+  - conda-forge::r-stringr>=1.5.1
+  - conda-forge::r-jsonlite>=1.8.8
   

From fe4789bebd5fc88e801909477a0014fb5b99ee5c Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 27 Mar 2024 16:30:33 +0100
Subject: [PATCH 41/42] pysam version requirement relax

---
 src/vvv2_display.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vvv2_display.yaml b/src/vvv2_display.yaml
index 29005e8..34ec02a 100644
--- a/src/vvv2_display.yaml
+++ b/src/vvv2_display.yaml
@@ -5,7 +5,7 @@ channels:
   - defaults
   - r
 dependencies:
-  - pysam=0.19.1
+  - pysam>=0.19.1
   - numpy>=1.23.1
   - ca-certificates
   - openssl

From f46189528b868d1887f0d18395e07d4f2d8dfda7 Mon Sep 17 00:00:00 2001
From: "f.touzain" <fabrice.touzain@anses.fr>
Date: Wed, 27 Mar 2024 16:53:46 +0100
Subject: [PATCH 42/42] update documentation

---
 README.md | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 75420c5..bcf3092 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 # Description
 
 Tools to create:
-- a .png image file describing all variants (obtained from vardict-java variant caller) alongside a genome/assembly (to provide) with their proportion (ordinates), with CDS descriptions (obtained from vadr annotator).
+- a .png image file describing all variants (obtained from vardict-java variant caller) alongside a genome/assembly (to provide) with their proportion (ordinates), with CDS descriptions (obtained from vadr annotator). At the top of the figure can be displayed the coverage depth repartition (if `-o cov_depth_f` option is provided).
 
 Python/R scripts and Galaxy wrapper to use them.
 
@@ -26,12 +26,13 @@ Convert ```vardict-java``` variant calling vcf file to human readable txt file
 - ```PYTHON_SCRIPTS/correct_multicontig_vardict_vcf.py```: 
 Correct ```vadr``` annotation output .tbl file for contigs positions when the assembly provided is composed of more than one contig.
 
-
 <!-- - ```R_SCRIPTS/visualize_coverage_depth.R```: -->
 <!-- Create a .png file showing coverage depth alongside the genome, from a bam alignment file. -->
 
 - ```R_SCRIPTS/visualize_snp_v4.R```:
-Create a .png file showing variant proportions alongside the genome/assembly and CDS positions.
+Create a .png file showing on the same png figure:
+  - coverage depth repartition alongside the genome/assembly (if `-o cov_depth_d` option provided)
+  - variant proportions alongside the genome/assembly and CDS positions.
 
 # Installation
 
@@ -50,8 +51,17 @@ vvv2_display.py -h
 
 Typical usage:
 ```
-vvv2_display.py -p res_vadr_pass.tsv -f res_vadr_fail.tsv -s res_vadr_seqstat.txt -n res_vardict_all.vcf -m contig_limits.txt -r res_vvv2_display.png 
+vvv2_display.py -p res_vadr_pass.tsv -f res_vadr_fail.tsv -s res_vadr_seqstat.txt -n res_vardict_all.vcf -r res_vvv2_display.png -o cov_depth_f.txt
 ```
+where:
+  - `res_vadr_pass.tsv` is the 'pass' file of vadr annotation program run on the genome/assembly
+  - `res_vadr_fail.tsv` is the 'fail' file of vadr annotation program
+  - `res_vadr_seqstat.txt` is the 'seqstat' file of vadr annotation program
+  - `res_vardict_all.vcf` is the result of vardict-java variant caller
+  - `res_vvv2_display.png` is the name of the main output file (will be created)
+  - `cov_depth_f.txt` is the coverage depth by position, provided by `samtools depth` run on the bam alignement file.
+
+> All other options are for Galaxy wrapper compatibility (these are intermediate temporary files that must appear as parameter for Galaxy wrapper but are not used in a usual command line call)
 
 # Galaxy wrapper