diff --git a/README.md b/README.md index 41d8caa..a04ca9d 100644 --- a/README.md +++ b/README.md @@ -10,43 +10,74 @@ letter in a novel “mega-alphabet” of 85,899,345,920 (∼1011) dis Method sensitivity was measured on the SCOP40 benchmark using superfamily as the truth standard, focusing on the regime with false-positive error rates <10 per query, corresponding to E<10 for an ideal E-value. -This is a preview beta release, new features and improved documentation will hopefully follow soon. -Feedback is welcome via github Issues. -
-All-vs-all alignment (excluding self-hits)
-    reseek -search STRUCTS -output hits.tsv 
+Commands
+  -search        # Alignment (e.g. DB search, pairwise, all-vs-all)
+  -convert       # Convert file formats (e.g. create DB)
+
+Search against database
+    reseek -search STRUCTS -db STRUCTS -output hits.txt
+                 # STRUCTS specifies structure(s), see below
 
-Search query structures against database
-    reseek -search Q_STRUCTS -db DB_STRUCTS -output hits.tsv
+Recommended format for large database is .bca, e.g.
+    reseek -convert /data/PDB_mirror/ -bca PDB.bca
 
 Align two structures
-    reseek -search NAME1.pdb -db NAME2.pdb -aln aln.txt
+    reseek -search 1XYZ.pdb -db 2ABC.pdb -aln aln.txt
+
+All-vs-all alignment (excluding self-hits)
+    reseek -search STRUCTS -output hits.txt
 
 Output options for -search
    -aln FILE     # Alignments in human-readable format
-   -output FILE  # Hits in tabbed text format with 3 fields:
-                 #   1. Evalue 2. Query 3. Target
-                 # (More output formats coming soon)
+   -output FILE  # Hits in tabbed text format
+   -columns name1+name2+name3...
+                 # Output columns, names are
+                 #   query   Query label
+                 #   target  Target label
+                 #   qlo     Start of aligment in query
+                 #   qhi     End of aligment in query
+                 #   tlo     Start of aligment in target
+                 #   thi     End of aligment in target
+                 #   ql      Query length
+                 #   tl      Target length
+                 #   pctid   Percent identity of alignment
+                 #   cigar   CIGAR string
+                 #   evalue  You can guess this one
+                 #   qrow    Aligned query sequence with gaps (local)
+                 #   trow    Aligned target sequence with gaps (local)
+                 #   qrowg   Aligned query sequence with gaps (global)
+                 #   trowg   Aligned target sequence with gaps (global)
+                 #   std     query+target+qlo+qhi+ql+tlo+thi+tl+pctid+evalue
+                 # default evalue+query+target
 
 Search and alignment options
-  -evalue E      # Max E-value (default report all alignments)
+  -sensitive     # Try harder (~3x slower, not much better)
+  -evalue E      # Max E-value (default 10)
   -omega X       # Omega accelerator (floating-point)
   -minu U        # K-mer accelerator (integer)
-  -gapopen X     # Gap-open penalty (floating-point >= 0, default 1.1)
-  -gapext X      # Gap-extend penalty (floating-point >= 0, default 0.14)
-  -dbsize D      # Effective database size for E-value (default actual size)
-  -usort         # U-sort accelerator (default off)
-  -maxaccepts N  # If U-sort, max hits <= E-value (default 1)
-  -maxrejects N  # If U-sort, max hits > E-value (default 32)
+  -gapopen X     # Gap-open penalty (floating-point >= 0)
+  -gapext X      # Gap-extend penalty (floating-point >= 0)
+  -dbsize D      # DB size (nr. chains) for E-value (default actual size)
 
-Convert PDB file(s) to .cal (C-alpha) format
-    reseek -pdb2cal STRUCTS -output structs.cal
+Convert between file formats
+    reseek -convert STRUCTS [one or more output options]
+           -cal FILENAME    # .cal format, text with a.a. and C-alpha x,y,z
+           -bca FILENAME    # .bca format, binary .cal, recommended for DBs
+           -fasta FILENAME  # FASTA format
 
 STRUCTS argument is one of:
-   NAME.pdb      # PDB file (mmCIF support will be added soon)
-   NAME.files    # Text file with PDB file/pathnames, one per line
-   NAME.cal      # C-alpha (.cal) file, recommended for databases
+   NAME.cif or NAME.mmcif     # PDBx/mmCIF file
+   NAME.pdb                   # Legacy format PDB file
+   NAME.cal                   # C-alpha tabbed text format with chain(s)
+   NAME.bca                   # Binary C-alpha, recommended for larger DBs
+   NAME.files                 # Text file with one STRUCT per line,
+                              #   may be filename, directory or .files
+   DIRECTORYNAME              # Directory (and its sub-directories) is searched
+                              #   for known file types including .pdb, .files etc.
+Other options:
+   -log FILENAME              # Log file with errors, warnings, time and memory.
+   -threads N                 # Number of threads, default number of CPU cores.
 
diff --git a/src/usage.h b/src/usage.h index a44fb03..1b53f8f 100644 --- a/src/usage.h +++ b/src/usage.h @@ -66,6 +66,7 @@ const char *usage_txt[] = " DIRECTORYNAME # Directory (and its sub-directories) is searched\n" " # for known file types including .pdb, .files etc.\n" "\n" +"Other options:\n" " -log FILENAME # Log file with errors, warnings, time and memory.\n" " -threads N # Number of threads, default number of CPU cores.\n" };