fixing stuff

erikbern · Jun 20, 2015 · 292436c · 292436c
1 parent 74c397d
commit 292436c
Show file tree

Hide file tree

Showing 7 changed files with 45 additions and 28 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,8 +1,9 @@
 language: python
 python:
-  - "2.7"
+  - "2.7_with_system_site_packages"
   - "3.4"
 
-install: bash install.sh
+install:
+  - sudo bash install.sh
 
 script: nosetests
diff --git a/ann_benchmarks/__init__.py b/ann_benchmarks/__init__.py
@@ -1,15 +1,15 @@
-import gzip, numpy, time, os, multiprocessing, argparse, pickle, resource
+import gzip, numpy, time, os, multiprocessing, argparse, pickle, resource, random
 try:
     from urllib import urlretrieve
 except ImportError:
     from urllib.request import urlretrieve # Python 3
-import sklearn.cross_validation, sklearn.preprocessing, random
+import sklearn.preprocessing
 
 # Set resource limits to prevent memory bombs
 memory_limit = 12 * 2**30
 soft, hard = resource.getrlimit(resource.RLIMIT_DATA)
 if soft == resource.RLIM_INFINITY or soft >= memory_limit:
-    print 'resetting memory limit from', soft, 'to', memory_limit
+    print('resetting memory limit from', soft, 'to', memory_limit)
     resource.setrlimit(resource.RLIMIT_DATA, (memory_limit, hard))
 
 
@@ -203,6 +203,7 @@ def __init__(self, metric):
         self.name = 'BruteForce()'
 
     def fit(self, X):
+        import sklearn.neighbors
         metric = {'angular': 'cosine', 'euclidean': 'l2'}[self._metric]
         self._nbrs = sklearn.neighbors.NearestNeighbors(algorithm='brute', metric=metric)
         self._nbrs.fit(X)
@@ -223,8 +224,10 @@ def get_dataset(which='glove', limit=-1):
             break
 
     X = numpy.vstack(X)
+    import sklearn.cross_validation
+
     X_train, X_test = sklearn.cross_validation.train_test_split(X, test_size=1000, random_state=42)
-    print X_train.shape, X_test.shape
+    print(X_train.shape, X_test.shape)
     return X_train, X_test
 
 
@@ -235,7 +238,7 @@ def run_algo(args, library, algo, results_fn):
     if algo != 'bf':
         algo.fit(X_train)
     build_time = time.time() - t0
-    print 'Built index in', build_time
+    print('Built index in', build_time)
 
     best_search_time = float('inf')
     best_precision = 0.0 # should be deterministic but paranoid
@@ -249,18 +252,18 @@ def run_algo(args, library, algo, results_fn):
         precision = k / (len(queries) * 10)
         best_search_time = min(best_search_time, search_time)
         best_precision = max(best_precision, precision)
-        print search_time, precision
+        print(search_time, precision)
 
     output = [library, algo.name, build_time, best_search_time, best_precision]
-    print output
+    print(output)
 
     f = open(results_fn, 'a')
     f.write('\t'.join(map(str, output)) + '\n')
     f.close()
 
 
 def get_queries(args):
-    print 'computing queries with correct results...'
+    print('computing queries with correct results...')
 
     bf = BruteForce(args.distance)
     X_train, X_test = get_dataset(which=args.dataset, limit=args.limit)
@@ -272,7 +275,7 @@ def get_queries(args):
         correct = bf.query(x, 10)
         queries.append((x, correct))
         if len(queries) % 100 == 0:
-            print len(queries), '...'
+            print(len(queries), '...')
 
     return queries
 
@@ -369,7 +372,7 @@ def get_fn(base, args):
     results_fn = get_fn('results', args)
     queries_fn = get_fn('queries', args)
 
-    print 'storing queries in', queries_fn, 'and results in', results_fn
+    print('storing queries in', queries_fn, 'and results in', results_fn)
 
     if not os.path.exists(queries_fn):
         queries = get_queries(args)
@@ -379,7 +382,7 @@ def get_fn(base, args):
     else:
         queries = pickle.load(open(queries_fn))
 
-    print 'got', len(queries), 'queries'
+    print('got', len(queries), 'queries')
 
     algos_already_ran = set()
     if os.path.exists(results_fn):
@@ -396,10 +399,10 @@ def get_fn(base, args):
 
     random.shuffle(algos_flat)
 
-    print 'order:', algos_flat
+    print('order:', algos_flat)
 
     for library, algo in algos_flat:
-        print algo.name, '...'
+        print(algo.name, '...')
         # Spawn a subprocess to force the memory to be reclaimed at the end
         p = multiprocessing.Process(target=run_algo, args=(args, library, algo, results_fn))
         p.start()

diff --git a/install.sh b/install.sh
@@ -1,4 +1,13 @@
-apt-get install -y python-numpy python-scipy python-sklearn
+apt-get update
+apt-get install -y python-numpy python-scipy python-pip
+pip install scikit-learn
+
+# Install GCC 4.8
+add-apt-repository ppa:ubuntu-toolchain-r/test -y
+apt-get update -qq
+apt-get install -y libboost1.48-all-dev g++-4.8
+export CXX="g++-4.8" CC="gcc-4.8"
+
 cd install
 for fn in annoy.sh panns.sh nearpy.sh sklearn.sh flann.sh kgraph.sh nmslib.sh
 do

diff --git a/install/kgraph.sh b/install/kgraph.sh
@@ -1,5 +1,6 @@
 git clone https://github.com/aaalgo/kgraph
 pushd kgraph
+apt-get install -y libboost-timer-dev libbooost-chrono-dev
 sudo make deps-ubuntu
 make
 make release

diff --git a/install/nearpy.sh b/install/nearpy.sh
@@ -1,2 +1,2 @@
-sudo apt-get install -y python-pip
-sudo pip install nearpy bitarray redis
+apt-get install -y python-pip
+pip install nearpy bitarray redis h5py
diff --git a/install/nmslib.sh b/install/nmslib.sh
@@ -4,13 +4,12 @@ rm -rf NonMetricSpaceLib
 # Note that we use the develop branch here:
 git clone https://github.com/searchivarius/NonMetricSpaceLib.git
 cd NonMetricSpaceLib/similarity_search
-git checkout ann-benchmark  
-sudo apt-get install -y cmake libeigen3-dev libgsl0-dev libboost-all-dev g++-4.8 
-# Actually let's make g++ an alias
-alias g++=g++-4.8
+git checkout ann-benchmark
+apt-get install -y cmake libeigen3-dev libgsl0-dev libboost-all-dev
+echo "CC: $CC, CXX: $CXX"
 cmake .
 make -j 4
 cd ../python_binding
 make
-sudo make install
+make install
 cd ../..
diff --git a/test/test.py b/test/test.py
@@ -1,3 +1,4 @@
+import random
 import inspect
 import ann_benchmarks
 from sklearn.datasets.samples_generator import make_blobs
@@ -10,13 +11,16 @@
 def check_algo(algo_name, algo):
     algo.fit(X)
     result = algo.query(X[42], 10)
-    assert result[0] == 42
-    assert len(result) == 10
-    assert len(set(result)) == 10
+    if result[0] != 42:
+        raise AssertionError('Expected first item to be 42: Result: %s' % result)
+    if len(result) != 10:
+        raise AssertionError('Expected results to have length 10: Result: %s' % result)
+    if len(set(result)) != 10:
+        raise AssertionError('Expected results to be unique: Result: %s' % result)
 
 def test_all_algos():
     for metric in ['angular', 'euclidean']:
         algos = ann_benchmarks.get_algos(metric)
         for algo_key in algos.keys():
-            for algo in algos[algo_key]:
-                yield check_algo, algo.name, algo # pass name just so unittest can capture it
+            algo = random.choice(algos[algo_key]) # Just pick one of each
+            yield check_algo, algo.name, algo # pass name just so unittest can capture it