Skip to content

Commit

Permalink
Merge pull request #21 from blab/csv-error-check
Browse files Browse the repository at this point in the history
Csv error check
  • Loading branch information
huddlej authored Apr 26, 2024
2 parents a25de47 + fd541e5 commit bd0556b
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 4 deletions.
19 changes: 15 additions & 4 deletions src/pathogen_embed/pathogen_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,14 @@ def embed(args):
if args.alignment is None and args.command == "pca":
print("You must specify an alignment for pca, not a distance matrix", file=sys.stderr)
sys.exit(1)

# getting or creating the distance matrix
distance_matrix = None
if args.distance_matrix is not None:
distance_matrix = pd.read_csv(args.distance_matrix, index_col=0)
if not args.distance_matrix.endswith('.csv'):
print("You must supply a CSV file for distance_matrix.", file=sys.stderr)
sys.exit(1)
else:
distance_matrix = pd.read_csv(args.distance_matrix, index_col=0)

if args.alignment is not None:
sequences_by_name = OrderedDict()
Expand All @@ -221,7 +224,11 @@ def embed(args):
# Load embedding parameters from an external CSV file, if possible.
external_embedding_parameters = None
if args.embedding_parameters is not None:
external_embedding_parameters_df = pd.read_csv(args.embedding_parameters)
if not args.embedding_parameters.endswith('.csv'):
print("You must supply a CSV file for embedding parameters.", file=sys.stderr)
sys.exit(1)
else:
external_embedding_parameters_df = pd.read_csv(args.embedding_parameters)

# Get a dictionary of additional parameters provided by the external
# file to override defaults for the current method.
Expand Down Expand Up @@ -421,7 +428,11 @@ def embed(args):

def cluster(args):

embedding_df = pd.read_csv(args.embedding, index_col=0)
if not args.embedding.endswith('.csv'):
print("You must supply a CSV file for the embedding.", file=sys.stderr)
sys.exit(1)
else:
embedding_df = pd.read_csv(args.embedding, index_col=0)

clustering_parameters = {
**({"min_cluster_size": args.min_size} if args.min_size is not None else {}),
Expand Down
9 changes: 9 additions & 0 deletions tests/pathogen-cluster-csv-check-embedding-fail.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Run pathogen cluster and check that error message is thrown when non csv input is given for embedding

$ pathogen-cluster \
> --embedding embed_pca.tsv \
> --label-attribute pca_label \
> --distance-threshold 0.5 \
> --output-dataframe cluster_embed_pca.csv
You must supply a CSV file for the embedding.
[1]
9 changes: 9 additions & 0 deletions tests/pathogen-embed-csv-check-dist-matrix-fail.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Run pathogen-embed with a tsv instead of a csv to see if it throws an error.

$ pathogen-embed \
> --alignment $TESTDIR/data/h3n2_ha_alignment.fasta \
> --output-dataframe embed_t-sne.tsv \
> t-sne \
> --perplexity 25
You must supply a CSV file for distance_matrix.
[1]
11 changes: 11 additions & 0 deletions tests/pathogen-embed-csv-check-embedding-params-fail.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Run pathogen embed and check that embedding parameters with not csv input fails

$ pathogen-embed \
> --alignment $TESTDIR/data/h3n2_ha_alignment.fasta \
> --output-dataframe embed_t-sne.tsv \
> --output-figure figure.png \
> --embedding-parameters value.tsv \
> t-sne \
> --perplexity 25
You must supply a CSV file for embedding parameters.
[1]

0 comments on commit bd0556b

Please sign in to comment.