yurymalkov · yurymalkov · Apr 24, 2016 · Apr 24, 2016 · Apr 24, 2016 · Apr 24, 2016
diff --git a/README.rst b/README.rst
@@ -15,7 +15,7 @@ Evaluated
 * `PANNS <https://github.com/ryanrhymes/panns>`__
 * `NearPy <http://nearpy.io>`__
 * `KGraph <https://github.com/aaalgo/kgraph>`__
-* `NonMetricSpaceLib <https://github.com/searchivarius/NonMetricSpaceLib>`__
+* `NMSLIB (Non-Metric Space Library) <https://github.com/searchivarius/nmslib>`__
 * `RPForest <https://github.com/lyst/rpforest>`__
 * `FALCONN <http://falconn-lib.org/>`__
 
@@ -70,7 +70,12 @@ This is very much a work in progress... more results coming later!
    :align: center
 
 Note that KGraph has a substantial performance regression in the latest version.
-Once the author has confirmed and fixed, I will rerun the KGraph benchmarks.
+Once the author has confirmed and fixed, I will rerun the KGraph benchmarks. 
+
+Also note that NMSLIB saves indices in the directory indices. 
+If the tests are re-run using a different seed and/or a different number of queries, the
+content of this directory should be deleted.
+
 
 Testing
 -------

diff --git a/ann_benchmarks/__init__.py b/ann_benchmarks/__init__.py
@@ -354,7 +354,11 @@ def get_dataset(which='glove', limit=-1):
     X = numpy.vstack(X)
     import sklearn.cross_validation
 
-    X_train, X_test = sklearn.cross_validation.train_test_split(X, test_size=1000, random_state=42)
+    # Here Eric is most welcome to use any other random_state
+    # Last time, Leo was testing using random_state==1
+    # However, it is best to use a new random seed for each major re-evaluation,
+    # so that we test on a trully bind data.
+    X_train, X_test = sklearn.cross_validation.train_test_split(X, test_size=10000, random_state=2016)
     print(X_train.shape, X_test.shape)
     return X_train, X_test