From 4e8258e014ab0c25ceb8d64a4df610c7a723cbb3 Mon Sep 17 00:00:00 2001
From: Gerry Tonkin-Hill <g.tonkinhill@gmail.com>
Date: Mon, 7 Oct 2024 10:02:01 +0200
Subject: [PATCH] update docs

---
 docs/gettingstarted/params.md | 36 +++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/docs/gettingstarted/params.md b/docs/gettingstarted/params.md
index 1fa8a6e..419bd93 100755
--- a/docs/gettingstarted/params.md
+++ b/docs/gettingstarted/params.md
@@ -15,6 +15,8 @@ Thus to align all genes present in at least 98% of isolates using clustal and 10
 panaroo -i *.gff -o ./results/ --clean-mode strict -a core --aligner clustal --core_threshold 0.98 -t 10
 ```
 
+You can also output unaligned gene sequences by specifying `--aligner none`. Additionally, user @revinci has provided a separate script for generating alignments after running Panaroo, which is described [here](https://github.com/gtonkinhill/panaroo/issues/306).
+
 #### Cluster Thresholds
 
 The Panaroo algorithm initially performs a conservative clustering step before collapsing genes into possible families. It is usually best to use the dafault parameters for this initial clustering stage.
@@ -54,9 +56,11 @@ panaroo -i *.gff -o ./results/ --clean-mode strict --refind_prop_match 0.5 --sea
 usage: panaroo [-h] -i INPUT_FILES [INPUT_FILES ...] -o OUTPUT_DIR
                --clean-mode {strict,moderate,sensitive}
                [--remove-invalid-genes] [-c ID] [-f FAMILY_THRESHOLD]
-               [--len_dif_percent LEN_DIF_PERCENT] [--merge_paralogs]
-               [--search_radius SEARCH_RADIUS]
+               [--len_dif_percent LEN_DIF_PERCENT]
+               [--family_len_dif_percent FAMILY_LEN_DIF_PERCENT]
+               [--merge_paralogs] [--search_radius SEARCH_RADIUS]
                [--refind_prop_match REFIND_PROP_MATCH]
+               [--refind-mode {default,strict,off}]
                [--min_trailing_support MIN_TRAILING_SUPPORT]
                [--trailing_recursive TRAILING_RECURSIVE]
                [--edge_support_threshold EDGE_SUPPORT_THRESHOLD]
@@ -65,9 +69,10 @@ usage: panaroo [-h] -i INPUT_FILES [INPUT_FILES ...] -o OUTPUT_DIR
                [--high_var_flag CYCLE_THRESHOLD_MIN]
                [--min_edge_support_sv MIN_EDGE_SUPPORT_SV]
                [--all_seq_in_graph] [--no_clean_edges] [-a {core,pan}]
-               [--aligner {prank,clustal,mafft}] [--codons]
-               [--core_threshold CORE] [--core_entropy_filter HC_THRESHOLD]
-               [-t N_CPU] [--codon-table TABLE] [--quiet] [--version]
+               [--aligner {prank,clustal,mafft,none}] [--codons]
+               [--core_threshold CORE] [--core_subset SUBSET]
+               [--core_entropy_filter HC_THRESHOLD] [-t N_CPU]
+               [--codon-table TABLE] [--quiet] [--version]
 
 panaroo: an updated pipeline for pangenome investigation
 
@@ -125,6 +130,9 @@ Matching:
                         (default=0.7)
   --len_dif_percent LEN_DIF_PERCENT
                         length difference cutoff (default=0.98)
+  --family_len_dif_percent FAMILY_LEN_DIF_PERCENT
+                        length difference cutoff at the gene family level
+                        (default=0.0)
   --merge_paralogs      don't split paralogs
 
 Refind:
@@ -134,6 +142,20 @@ Refind:
   --refind_prop_match REFIND_PROP_MATCH
                         the proportion of an accessory gene that must be found
                         in order to consider it a match
+  --refind-mode {default,strict,off}
+                        The stringency mode at which to re-find genes.
+
+                        default:
+                        Will re-find similar gene sequences. Allows for
+                        premature stop codons and incorrect lengths to account
+                        for misassemblies.
+
+                        strict:
+                        Prevents fragmented, misassembled, or potential
+                        pseudogene sequences from being re-found.
+
+                        off:
+                        Turns off all re-finding steps.
 
 Graph correction:
   --min_trailing_support MIN_TRAILING_SUPPORT
@@ -170,13 +192,15 @@ Gene alignment:
   -a {core,pan}, --alignment {core,pan}
                         Output alignments of core genes or all genes. Options
                         are 'core' and 'pan'. Default: 'None'
-  --aligner {prank,clustal,mafft}
+  --aligner {prank,clustal,mafft,none}
                         Specify an aligner. Options:'prank', 'clustal', and
                         default: 'mafft'
   --codons              Generate codon alignments by aligning sequences at the
                         protein level
   --core_threshold CORE
                         Core-genome sample threshold (default=0.95)
+  --core_subset SUBSET  Randomly subset the core genome to these many genes
+                        (default=all)
   --core_entropy_filter HC_THRESHOLD
                         Manually set the Block Mapping and Gathering with
                         Entropy (BMGE) filter. Can be between 0.0 and 1.0. By