diff --git a/README.md b/README.md index 41d8caa..a04ca9d 100644 --- a/README.md +++ b/README.md @@ -10,43 +10,74 @@ letter in a novel “mega-alphabet” of 85,899,345,920 (∼1011) dis Method sensitivity was measured on the SCOP40 benchmark using superfamily as the truth standard, focusing on the regime with false-positive error rates <10 per query, corresponding to E<10 for an ideal E-value. -This is a preview beta release, new features and improved documentation will hopefully follow soon. -Feedback is welcome via github Issues. -
-All-vs-all alignment (excluding self-hits) - reseek -search STRUCTS -output hits.tsv +Commands + -search # Alignment (e.g. DB search, pairwise, all-vs-all) + -convert # Convert file formats (e.g. create DB) + +Search against database + reseek -search STRUCTS -db STRUCTS -output hits.txt + # STRUCTS specifies structure(s), see below -Search query structures against database - reseek -search Q_STRUCTS -db DB_STRUCTS -output hits.tsv +Recommended format for large database is .bca, e.g. + reseek -convert /data/PDB_mirror/ -bca PDB.bca Align two structures - reseek -search NAME1.pdb -db NAME2.pdb -aln aln.txt + reseek -search 1XYZ.pdb -db 2ABC.pdb -aln aln.txt + +All-vs-all alignment (excluding self-hits) + reseek -search STRUCTS -output hits.txt Output options for -search -aln FILE # Alignments in human-readable format - -output FILE # Hits in tabbed text format with 3 fields: - # 1. Evalue 2. Query 3. Target - # (More output formats coming soon) + -output FILE # Hits in tabbed text format + -columns name1+name2+name3... + # Output columns, names are + # query Query label + # target Target label + # qlo Start of aligment in query + # qhi End of aligment in query + # tlo Start of aligment in target + # thi End of aligment in target + # ql Query length + # tl Target length + # pctid Percent identity of alignment + # cigar CIGAR string + # evalue You can guess this one + # qrow Aligned query sequence with gaps (local) + # trow Aligned target sequence with gaps (local) + # qrowg Aligned query sequence with gaps (global) + # trowg Aligned target sequence with gaps (global) + # std query+target+qlo+qhi+ql+tlo+thi+tl+pctid+evalue + # default evalue+query+target Search and alignment options - -evalue E # Max E-value (default report all alignments) + -sensitive # Try harder (~3x slower, not much better) + -evalue E # Max E-value (default 10) -omega X # Omega accelerator (floating-point) -minu U # K-mer accelerator (integer) - -gapopen X # Gap-open penalty (floating-point >= 0, default 1.1) - -gapext X # Gap-extend penalty (floating-point >= 0, default 0.14) - -dbsize D # Effective database size for E-value (default actual size) - -usort # U-sort accelerator (default off) - -maxaccepts N # If U-sort, max hits <= E-value (default 1) - -maxrejects N # If U-sort, max hits > E-value (default 32) + -gapopen X # Gap-open penalty (floating-point >= 0) + -gapext X # Gap-extend penalty (floating-point >= 0) + -dbsize D # DB size (nr. chains) for E-value (default actual size) -Convert PDB file(s) to .cal (C-alpha) format - reseek -pdb2cal STRUCTS -output structs.cal +Convert between file formats + reseek -convert STRUCTS [one or more output options] + -cal FILENAME # .cal format, text with a.a. and C-alpha x,y,z + -bca FILENAME # .bca format, binary .cal, recommended for DBs + -fasta FILENAME # FASTA format STRUCTS argument is one of: - NAME.pdb # PDB file (mmCIF support will be added soon) - NAME.files # Text file with PDB file/pathnames, one per line - NAME.cal # C-alpha (.cal) file, recommended for databases + NAME.cif or NAME.mmcif # PDBx/mmCIF file + NAME.pdb # Legacy format PDB file + NAME.cal # C-alpha tabbed text format with chain(s) + NAME.bca # Binary C-alpha, recommended for larger DBs + NAME.files # Text file with one STRUCT per line, + # may be filename, directory or .files + DIRECTORYNAME # Directory (and its sub-directories) is searched + # for known file types including .pdb, .files etc. +Other options: + -log FILENAME # Log file with errors, warnings, time and memory. + -threads N # Number of threads, default number of CPU cores.diff --git a/src/usage.h b/src/usage.h index a44fb03..1b53f8f 100644 --- a/src/usage.h +++ b/src/usage.h @@ -66,6 +66,7 @@ const char *usage_txt[] = " DIRECTORYNAME # Directory (and its sub-directories) is searched\n" " # for known file types including .pdb, .files etc.\n" "\n" +"Other options:\n" " -log FILENAME # Log file with errors, warnings, time and memory.\n" " -threads N # Number of threads, default number of CPU cores.\n" };