Skip to content

Commit

Permalink
[translate] improve output arg checking
Browse files Browse the repository at this point in the history
If VCF input with requested alignment output then we require
--vcf-reference-output. This was not the case in augur 23.1.1 and
earlier, where we would automatically create a filename. This is in
line with a general philosophy of "files only created when requested"
  • Loading branch information
jameshadfield committed Dec 30, 2023
1 parent 84fd76f commit 4e89c2b
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 11 deletions.
35 changes: 24 additions & 11 deletions augur/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,7 @@ def sequences_vcf(reference_fasta, vcf):
[0] The sequences as a dict of dicts. sequences → <NODE_NAME> → <POS> → <ALT_NUC> where <POS> is a 0-based int
[1] The sequence of the provided `reference_fasta` (string)
"""
if not reference_fasta:
raise AugurError("A reference Fasta is required with VCF-format input")
assert reference_fasta is not None
compress_seq = read_vcf(vcf, reference_fasta)
sequences = compress_seq['sequences']
ref = compress_seq['reference']
Expand Down Expand Up @@ -385,8 +384,25 @@ def check_arg_combinations(args, is_vcf):
This checking shouldn't be used by downstream code to assume arguments exist, however by checking for
invalid combinations up-front we can exit quickly.
"""
if not is_vcf and (args.vcf_reference or args.vcf_reference_output):
raise AugurError("Arguments '--vcf-reference' and/or '--vcf-reference-output' are only applicable if the input ('--ancestral-sequences') is VCF")

if is_vcf:
if not args.vcf_reference:
raise AugurError("A reference FASTA (--vcf-reference) is required with VCF-format input")
else:
if args.vcf_reference or args.vcf_reference_output:
raise AugurError("Arguments '--vcf-reference' and/or '--vcf-reference-output' are only applicable if the input ('--ancestral-sequences') is VCF")

if args.alignment_output:
if is_vcf:
if not is_filename_vcf(args.alignment_output):
raise AugurError("When using a VCF input the --alignment-output filename must also be a VCF file")
if not args.vcf_reference_output:
raise AugurError("When using a VCF input and --alignment-output, we now require you to specify the --vcf-reference-output as well")
else:
if is_filename_vcf(args.alignment_output):
raise AugurError("When using a non-VCF input the --alignment-output filename must not be a VCF file")
if args.vcf_reference_output and not args.alignment_output:
raise AugurError("The VCF reference output (--vcf-reference-output) needs --alignment-output")


def run(args):
Expand Down Expand Up @@ -474,15 +490,12 @@ def run(args):
write_json(output_data, out_name)
print("amino acid mutations written to", out_name, file=sys.stdout)

## write alignments to file is requested
## write alignments to file if requested
if args.alignment_output:
if is_vcf:
## write VCF-style output if requested
fileEndings = -1
if args.alignment_output.lower().endswith('.gz'):
fileEndings = -2
vcf_out_ref = args.vcf_reference_output or '.'.join(args.alignment_output.split('.')[:fileEndings]) + '_reference.fasta'
write_VCF_translation(translations, args.alignment_output, vcf_out_ref)
assert is_filename_vcf(args.alignment_output)
assert args.vcf_reference_output is not None
write_VCF_translation(translations, args.alignment_output, args.vcf_reference_output)
else:
## write fasta-style output if requested
if '%GENE' in args.alignment_output:
Expand Down
57 changes: 57 additions & 0 deletions tests/functional/translate/cram/invalid-args.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
Setup

$ export AUGUR="${AUGUR:-$TESTDIR/../../../../bin/augur}"
$ export SCRIPTS="$TESTDIR/../../../../scripts"
$ export ANC_DATA="$TESTDIR/../../ancestral/data/simple-genome"
$ export DATA="$TESTDIR/../data/simple-genome"


Input JSON + VCF output is not possible (and vice-versa)

$ ${AUGUR} translate \
> --tree $ANC_DATA/tree.nwk \
> --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
> --reference-sequence $DATA/reference.gff \
> --alignment-output "translations.vcf"
ERROR: When using a non-VCF input the --alignment-output filename must not be a VCF file
[2]

$ ${AUGUR} translate \
> --tree $ANC_DATA/tree.nwk \
> --ancestral-sequences input.vcf \
> --vcf-reference reference_in.fasta \
> --reference-sequence $DATA/reference.gff \
> --alignment-output "translations.fasta"
ERROR: When using a VCF input the --alignment-output filename must also be a VCF file
[2]

The arg --vcf-reference-output needs --alignment-output

$ ${AUGUR} translate \
> --tree $ANC_DATA/tree.nwk \
> --ancestral-sequences input.vcf \
> --reference-sequence $DATA/reference.gff \
> --vcf-reference reference_in.fasta \
> --vcf-reference-output reference_out.fasta
ERROR: The VCF reference output (--vcf-reference-output) needs --alignment-output
[2]

If VCF input with requested alignment output then we require --vcf-reference-output
(This was not the case in augur 23.1.1 and earlier, where we would automatically create a filename)
$ ${AUGUR} translate \
> --tree $ANC_DATA/tree.nwk \
> --ancestral-sequences input.vcf \
> --vcf-reference reference_in.fasta \
> --reference-sequence $DATA/reference.gff \
> --alignment-output "translations.vcf"
ERROR: When using a VCF input and --alignment-output, we now require you to specify the --vcf-reference-output as well
[2]

VCF input must have a FASTA reference provided
$ ${AUGUR} translate \
> --tree $ANC_DATA/tree.nwk \
> --ancestral-sequences input.vcf \
> --reference-sequence $DATA/reference.gff \
> --alignment-output "translations.vcf"
ERROR: A reference FASTA (--vcf-reference) is required with VCF-format input
[2]

0 comments on commit 4e89c2b

Please sign in to comment.