iomega · niekdejonge · Nov 23, 2023 · Nov 22, 2023 · Nov 22, 2023 · Nov 22, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - environment.yml and CI_build test fur building a conda env from this file
 ### fixed
 - Allow for using uper_case additional_metadata columns
+- Removed pickled files from tests to pave way to pandas 2.0 and new matchms
 ## 1.2.2
 ### fixed
 - Set version of matchmsextras to 0.4.0, to fix dependency issue

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,53 +1,47 @@
 import os
 import numpy as np
 import pytest
+import pandas as pd
 from matchms import Spectrum
 from matchms.importing.load_from_mgf import load_from_mgf
 from ms2query.ms2library import MS2Library
 from ms2query.query_from_sqlite_database import SqliteLibrary
+from ms2query.utils import load_pickled_file
 
 
 @pytest.fixture(scope="package")
 def path_to_general_test_files() -> str:
     return os.path.join(
         os.path.split(os.path.dirname(__file__))[0],
-        'tests/test_files/general_test_files')
+        'tests/test_files')
 
 
 @pytest.fixture(scope="package")
-def path_to_test_files():
-    return os.path.join(os.path.split(os.path.dirname(__file__))[0], 'tests/test_files')
-
-
-@pytest.fixture(scope="package")
-def sqlite_library(path_to_test_files):
-    path_to_library = os.path.join(path_to_test_files, "general_test_files", "100_test_spectra.sqlite")
+def sqlite_library(path_to_general_test_files):
+    path_to_library = os.path.join(path_to_general_test_files, "100_test_spectra.sqlite")
     return SqliteLibrary(path_to_library)
 
 
 @pytest.fixture(scope="package")
-def ms2library() -> MS2Library:
+def ms2library(path_to_general_test_files) -> MS2Library:
     """Returns file names of the files needed to create MS2Library object"""
-    path_to_tests_dir = os.path.join(
-        os.path.split(os.path.dirname(__file__))[0],
-        'tests/test_files/')
     sqlite_file_loc = os.path.join(
-        path_to_tests_dir,
-        "general_test_files/100_test_spectra.sqlite")
+        path_to_general_test_files,
+        "100_test_spectra.sqlite")
     spec2vec_model_file_loc = os.path.join(
-        path_to_tests_dir,
-        "general_test_files/100_test_spectra_s2v_model.model")
+        path_to_general_test_files,
+        "100_test_spectra_s2v_model.model")
     s2v_pickled_embeddings_file = os.path.join(
-        path_to_tests_dir,
-        "general_test_files/100_test_spectra_s2v_embeddings.pickle")
+        path_to_general_test_files,
+        "100_test_spectra_s2v_embeddings.pickle")
     ms2ds_model_file_name = os.path.join(
-        path_to_tests_dir,
-        "general_test_files/ms2ds_siamese_210301_5000_500_400.hdf5")
+        path_to_general_test_files,
+        "ms2ds_siamese_210301_5000_500_400.hdf5")
     ms2ds_embeddings_file_name = os.path.join(
-        path_to_tests_dir,
-        "general_test_files/100_test_spectra_ms2ds_embeddings.pickle")
-    ms2q_model_file_name = os.path.join(path_to_tests_dir,
-        "general_test_files", "test_ms2q_rf_model.onnx")
+        path_to_general_test_files,
+        "100_test_spectra_ms2ds_embeddings.pickle")
+    ms2q_model_file_name = os.path.join(path_to_general_test_files,
+        "test_ms2q_rf_model.onnx")
     ms2library = MS2Library(sqlite_file_loc, spec2vec_model_file_loc, ms2ds_model_file_name,
                             s2v_pickled_embeddings_file, ms2ds_embeddings_file_name, ms2q_model_file_name)
     return ms2library
@@ -104,3 +98,25 @@ def test_spectra():
 def hundred_test_spectra(path_to_general_test_files):
     return list(load_from_mgf(os.path.join(path_to_general_test_files, "100_test_spectra.mgf"),
                 metadata_harmonization=True))
+
+
+@pytest.fixture(scope="package")
+def expected_tanimoto_scores_df(path_to_general_test_files):
+    return pd.read_csv(os.path.join(path_to_general_test_files,
+                                    "tanimoto_scores_100_test_spectra.csv"), index_col=0)
+
+
+@pytest.fixture(scope="package")
+def expected_ms2ds_embeddings(path_to_general_test_files):
+    expected_embeddings = load_pickled_file(os.path.join(
+        path_to_general_test_files,
+        "100_test_spectra_ms2ds_embeddings.pickle"))
+    return expected_embeddings
+
+
+@pytest.fixture(scope="package")
+def expected_s2v_embeddings(path_to_general_test_files):
+    expected_embeddings = load_pickled_file(os.path.join(
+        path_to_general_test_files,
+        "100_test_spectra_s2v_embeddings.pickle"))
+    return expected_embeddings
diff --git a/tests/test_calculate_tanimoto_scores.py b/tests/test_calculate_tanimoto_scores.py
@@ -9,24 +9,23 @@
                             load_pickled_file)
 
 
-def test_calculate_tanimoto_scores_unique_inchikey(path_to_general_test_files, hundred_test_spectra):
+def test_calculate_tanimoto_scores_unique_inchikey(path_to_general_test_files, hundred_test_spectra,
+                                                   expected_tanimoto_scores_df):
     tanimoto_df = calculate_tanimoto_scores_unique_inchikey(hundred_test_spectra, hundred_test_spectra)
-    expected_tanimoto_df = load_pickled_file(os.path.join(path_to_general_test_files,
-                                                          "100_test_spectra_tanimoto_scores.pickle"))
     assert isinstance(tanimoto_df, pd.DataFrame), "Expected a pandas dataframe"
-    pd.testing.assert_frame_equal(tanimoto_df, expected_tanimoto_df, check_exact=False, atol=1e-5)
+    pd.testing.assert_frame_equal(tanimoto_df, expected_tanimoto_scores_df, check_exact=False, atol=1e-5)
 
 
-def test_calculate_tanimoto_scores_unique_inchikey_not_symmetric(path_to_general_test_files, hundred_test_spectra):
+def test_calculate_tanimoto_scores_unique_inchikey_not_symmetric(path_to_general_test_files,
+                                                                 hundred_test_spectra,
+                                                                 expected_tanimoto_scores_df):
     spectra_2 = hundred_test_spectra[:10]
     tanimoto_df = calculate_tanimoto_scores_unique_inchikey(hundred_test_spectra, spectra_2)
 
     unique_inchikey_2 = set([spectrum.get("inchikey")[:14] for spectrum in spectra_2])
-    expected_tanimoto_df = load_pickled_file(os.path.join(path_to_general_test_files,
-                                                          "100_test_spectra_tanimoto_scores.pickle")
-                                             ).loc[:, sorted(unique_inchikey_2)]
+    expected_tanimoto_df_inchikey_2 = expected_tanimoto_scores_df.loc[:, sorted(unique_inchikey_2)]
     assert isinstance(tanimoto_df, pd.DataFrame), "Expected a pandas dataframe"
-    pd.testing.assert_frame_equal(tanimoto_df, expected_tanimoto_df,
+    pd.testing.assert_frame_equal(tanimoto_df, expected_tanimoto_df_inchikey_2,
                                   check_exact=False, atol=1e-5)
 
 

diff --git a/...s/general_test_files/100_test_spectra.mgf → tests/test_files/100_test_spectra.mgf b/...s/general_test_files/100_test_spectra.mgf → tests/test_files/100_test_spectra.mgf
diff --git a/...eneral_test_files/100_test_spectra.sqlite → tests/test_files/100_test_spectra.sqlite b/...eneral_test_files/100_test_spectra.sqlite → tests/test_files/100_test_spectra.sqlite
diff --git a/.../100_test_spectra_ms2ds_embeddings.pickle → .../100_test_spectra_ms2ds_embeddings.pickle b/.../100_test_spectra_ms2ds_embeddings.pickle → .../100_test_spectra_ms2ds_embeddings.pickle
diff --git a/...es/100_test_spectra_s2v_embeddings.pickle → ...es/100_test_spectra_s2v_embeddings.pickle b/...es/100_test_spectra_s2v_embeddings.pickle → ...es/100_test_spectra_s2v_embeddings.pickle
diff --git a/...st_files/100_test_spectra_s2v_model.model → ...st_files/100_test_spectra_s2v_model.model b/...st_files/100_test_spectra_s2v_model.model → ...st_files/100_test_spectra_s2v_model.model
diff --git a/...test_files/2000_negative_test_spectra.mgf → ...test_files/2000_negative_test_spectra.mgf b/...test_files/2000_negative_test_spectra.mgf → ...test_files/2000_negative_test_spectra.mgf
diff --git a/tests/test_files/general_test_files/100_test_spectra_tanimoto_scores.pickle b/tests/test_files/general_test_files/100_test_spectra_tanimoto_scores.pickle
diff --git a/...es/ms2ds_siamese_210301_5000_500_400.hdf5 → ...es/ms2ds_siamese_210301_5000_500_400.hdf5 b/...es/ms2ds_siamese_210301_5000_500_400.hdf5 → ...es/ms2ds_siamese_210301_5000_500_400.hdf5