From b1da4adca2417cce55b83e832a87713bbc89b306 Mon Sep 17 00:00:00 2001 From: Sravani Nanduri Date: Fri, 29 Mar 2024 09:40:33 -0700 Subject: [PATCH 1/3] adding error check if not supplied with CSV --- src/pathogen_embed/pathogen_embed.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/pathogen_embed/pathogen_embed.py b/src/pathogen_embed/pathogen_embed.py index e004af4..68af5fa 100644 --- a/src/pathogen_embed/pathogen_embed.py +++ b/src/pathogen_embed/pathogen_embed.py @@ -196,11 +196,13 @@ def embed(args): if args.alignment is None and args.command == "pca": print("You must specify an alignment for pca, not a distance matrix", file=sys.stderr) sys.exit(1) - # getting or creating the distance matrix distance_matrix = None if args.distance_matrix is not None: - distance_matrix = pd.read_csv(args.distance_matrix, index_col=0) + if not args.distance_matrix.endswith('.csv'): + print("You must supply a CSV file for distance_matrix.", file=sys.stderr) + else: + distance_matrix = pd.read_csv(args.distance_matrix, index_col=0) if args.alignment is not None: sequences_by_name = OrderedDict() @@ -221,7 +223,10 @@ def embed(args): # Load embedding parameters from an external CSV file, if possible. external_embedding_parameters = None if args.embedding_parameters is not None: - external_embedding_parameters_df = pd.read_csv(args.embedding_parameters) + if not args.embedding_parameters.endswith('.csv'): + print("You must supply a CSV file for embedding parameters.", file=sys.stderr) + else: + external_embedding_parameters_df = pd.read_csv(args.embedding_parameters) # Get a dictionary of additional parameters provided by the external # file to override defaults for the current method. @@ -419,7 +424,10 @@ def embed(args): def cluster(args): - embedding_df = pd.read_csv(args.embedding, index_col=0) + if not args.embedding.endswith('.csv'): + print("You must supply a CSV file for the embedding.", file=sys.stderr) + else: + embedding_df = pd.read_csv(args.embedding, index_col=0) clustering_parameters = { **({"min_cluster_size": args.min_size} if args.min_size is not None else {}), From 0e66933ae09aa90adbeefb09f044c65adf0175f9 Mon Sep 17 00:00:00 2001 From: Sravani Nanduri Date: Mon, 8 Apr 2024 17:40:25 -0700 Subject: [PATCH 2/3] adding tests --- src/pathogen_embed/pathogen_embed.py | 3 +++ tests/pathogen-cluster-csv-check-embedding-fail.t | 10 ++++++++++ tests/pathogen-embed-csv-check-dist-matrix-fail.t | 10 ++++++++++ .../pathogen-embed-csv-check-embedding-params-fail.t | 12 ++++++++++++ 4 files changed, 35 insertions(+) create mode 100644 tests/pathogen-cluster-csv-check-embedding-fail.t create mode 100644 tests/pathogen-embed-csv-check-dist-matrix-fail.t create mode 100644 tests/pathogen-embed-csv-check-embedding-params-fail.t diff --git a/src/pathogen_embed/pathogen_embed.py b/src/pathogen_embed/pathogen_embed.py index 68af5fa..ff95ffd 100644 --- a/src/pathogen_embed/pathogen_embed.py +++ b/src/pathogen_embed/pathogen_embed.py @@ -201,6 +201,7 @@ def embed(args): if args.distance_matrix is not None: if not args.distance_matrix.endswith('.csv'): print("You must supply a CSV file for distance_matrix.", file=sys.stderr) + sys.exit(1) else: distance_matrix = pd.read_csv(args.distance_matrix, index_col=0) @@ -225,6 +226,7 @@ def embed(args): if args.embedding_parameters is not None: if not args.embedding_parameters.endswith('.csv'): print("You must supply a CSV file for embedding parameters.", file=sys.stderr) + sys.exit(1) else: external_embedding_parameters_df = pd.read_csv(args.embedding_parameters) @@ -426,6 +428,7 @@ def cluster(args): if not args.embedding.endswith('.csv'): print("You must supply a CSV file for the embedding.", file=sys.stderr) + sys.exit(1) else: embedding_df = pd.read_csv(args.embedding, index_col=0) diff --git a/tests/pathogen-cluster-csv-check-embedding-fail.t b/tests/pathogen-cluster-csv-check-embedding-fail.t new file mode 100644 index 0000000..668d083 --- /dev/null +++ b/tests/pathogen-cluster-csv-check-embedding-fail.t @@ -0,0 +1,10 @@ +Run pathogen cluster and check that error message is thrown when non csv input is given for embedding + + $ pathogen-cluster \ + > --embedding embed_pca.tsv \ + > --label-attribute pca_label \ + > --distance-threshold 0.5 \ + > --output-dataframe cluster_embed_pca.csv + +== stderr +You must supply a CSV file for the embedding. \ No newline at end of file diff --git a/tests/pathogen-embed-csv-check-dist-matrix-fail.t b/tests/pathogen-embed-csv-check-dist-matrix-fail.t new file mode 100644 index 0000000..887baee --- /dev/null +++ b/tests/pathogen-embed-csv-check-dist-matrix-fail.t @@ -0,0 +1,10 @@ +Run pathogen-embed with a tsv instead of a csv to see if it throws an error. + + $ pathogen-embed \ + > --alignment $TESTDIR/data/h3n2_ha_alignment.fasta \ + > --output-dataframe embed_t-sne.tsv \ + > t-sne \ + > --perplexity 25 + +== stderr +You must supply a CSV file for distance_matrix. diff --git a/tests/pathogen-embed-csv-check-embedding-params-fail.t b/tests/pathogen-embed-csv-check-embedding-params-fail.t new file mode 100644 index 0000000..babaf9c --- /dev/null +++ b/tests/pathogen-embed-csv-check-embedding-params-fail.t @@ -0,0 +1,12 @@ +Run pathogen embed and check that embedding parameters with not csv input fails + + $ pathogen-embed \ + > --alignment $TESTDIR/data/h3n2_ha_alignment.fasta \ + > --output-dataframe embed_t-sne.tsv \ + > --output-figure figure.png \ + > --embedding-parameters value.tsv \ + > t-sne \ + > --perplexity 25 + +== stderr +You must supply a CSV file for embedding parameters. \ No newline at end of file From fd541e5e7210d9d2a152a28d8c055a1eb9f12368 Mon Sep 17 00:00:00 2001 From: Sravani Nanduri Date: Thu, 11 Apr 2024 15:53:02 -0700 Subject: [PATCH 3/3] fixing cram tests that were failing --- tests/pathogen-cluster-csv-check-embedding-fail.t | 5 ++--- tests/pathogen-embed-csv-check-dist-matrix-fail.t | 3 +-- tests/pathogen-embed-csv-check-embedding-params-fail.t | 5 ++--- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/pathogen-cluster-csv-check-embedding-fail.t b/tests/pathogen-cluster-csv-check-embedding-fail.t index 668d083..8caffc8 100644 --- a/tests/pathogen-cluster-csv-check-embedding-fail.t +++ b/tests/pathogen-cluster-csv-check-embedding-fail.t @@ -5,6 +5,5 @@ Run pathogen cluster and check that error message is thrown when non csv input i > --label-attribute pca_label \ > --distance-threshold 0.5 \ > --output-dataframe cluster_embed_pca.csv - -== stderr -You must supply a CSV file for the embedding. \ No newline at end of file + You must supply a CSV file for the embedding. + [1] \ No newline at end of file diff --git a/tests/pathogen-embed-csv-check-dist-matrix-fail.t b/tests/pathogen-embed-csv-check-dist-matrix-fail.t index 887baee..8886f82 100644 --- a/tests/pathogen-embed-csv-check-dist-matrix-fail.t +++ b/tests/pathogen-embed-csv-check-dist-matrix-fail.t @@ -5,6 +5,5 @@ Run pathogen-embed with a tsv instead of a csv to see if it throws an error. > --output-dataframe embed_t-sne.tsv \ > t-sne \ > --perplexity 25 - -== stderr You must supply a CSV file for distance_matrix. +[1] diff --git a/tests/pathogen-embed-csv-check-embedding-params-fail.t b/tests/pathogen-embed-csv-check-embedding-params-fail.t index babaf9c..c9ef634 100644 --- a/tests/pathogen-embed-csv-check-embedding-params-fail.t +++ b/tests/pathogen-embed-csv-check-embedding-params-fail.t @@ -7,6 +7,5 @@ Run pathogen embed and check that embedding parameters with not csv input fails > --embedding-parameters value.tsv \ > t-sne \ > --perplexity 25 - -== stderr -You must supply a CSV file for embedding parameters. \ No newline at end of file + You must supply a CSV file for embedding parameters. + [1] \ No newline at end of file