diff --git a/.travis.yml b/.travis.yml index 213bd6f67..10ec93969 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,30 +8,30 @@ services: - docker env: - - LIBRARY=annoy - - LIBRARY=dolphinn - - LIBRARY=faiss - - LIBRARY=flann - - LIBRARY=hdidx - - LIBRARY=kgraph - - LIBRARY=mrpt - - LIBRARY=nearpy - - LIBRARY=ngt - - LIBRARY=nmslib - - LIBRARY=hnswlib - - LIBRARY=panns - - LIBRARY=pynndescent - - LIBRARY=rpforest - - LIBRARY=sklearn - - LIBRARY=mih + - LIBRARY=annoy DATASET=random-xs-20-angular + - LIBRARY=dolphinn DATASET=random-xs-20-angular + - LIBRARY=faiss DATASET=random-xs-20-angular + - LIBRARY=flann DATASET=random-xs-20-angular + - LIBRARY=hdidx DATASET=random-xs-20-angular + - LIBRARY=kgraph DATASET=random-xs-20-angular + - LIBRARY=mrpt DATASET=random-xs-20-angular + - LIBRARY=nearpy DATASET=random-xs-20-angular + - LIBRARY=ngt DATASET=random-xs-20-angular + - LIBRARY=nmslib DATASET=random-xs-20-angular + - LIBRARY=hnswlib DATASET=random-xs-20-angular + - LIBRARY=panns DATASET=random-xs-20-angular + - LIBRARY=pynndescent DATASET=random-xs-20-angular + - LIBRARY=rpforest DATASET=random-xs-20-angular + - LIBRARY=sklearn DATASET=random-xs-20-angular + - LIBRARY=mih DATASET=random-xs-16-hamming before_install: - pip install -r requirements.txt - python install.py script: - - python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset random-xs-20-angular --run-disabled - - python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset random-xs-20-angular --run-disabled --batch + - python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset $DATASET --run-disabled + - python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset $DATASET --run-disabled --batch - python plot.py --dataset random-xs-20-angular --output plot.png - python plot.py --dataset random-xs-20-angular --output plot-batch.png --batch - python -m unittest test/test-metrics.py diff --git a/algos.yaml b/algos.yaml index 691cb247b..8dccae11e 100644 --- a/algos.yaml +++ b/algos.yaml @@ -468,7 +468,7 @@ bit: # takes places. Otherwise, n / r points from the dataset are used for a # re-ording process. MIH requires either r = 0 or r >= 2. args: { "d" : "@dimension", "r" : [0, 2, 3], - "chunk-factor" : [0, 0.5, 0.8] } + "chunk-factor" : [0.2, 0.5, 0.8] } kgraph: docker-tag: ann-benchmarks-kgraph module: ann_benchmarks.algorithms.kgraph diff --git a/ann_benchmarks/datasets.py b/ann_benchmarks/datasets.py index 5e1852316..08dfba9cc 100644 --- a/ann_benchmarks/datasets.py +++ b/ann_benchmarks/datasets.py @@ -214,6 +214,17 @@ def random(out_fn, n_dims, n_samples, centers, distance): X_train, X_test = train_test_split(X, test_size=0.1) write_output(X_train, X_test, out_fn, distance) +def random_bitstring(out_fn, n_dims, n_samples, n_queries): + import sklearn.datasets + + Y, _ = sklearn.datasets.make_blobs(n_samples=n_samples, n_features=n_dims, centers=n_queries, random_state=1) + X = numpy.zeros((n_samples, n_dims), dtype=numpy.bool) + for i, vec in enumerate(Y): + X[i] = numpy.array([v > 0 for v in vec], dtype=numpy.bool) + + X_train, X_test = train_test_split(X, test_size=n_queries) + write_output(X_train, X_test, out_fn, 'hamming', 'bit') + def word2bits(out_fn, path, fn): import tarfile @@ -297,6 +308,9 @@ def lastfm(out_fn, n_dimensions, test_size=50000): 'random-s-100-euclidean': lambda out_fn: random(out_fn, 100, 100000, 1000, 'euclidean'), 'random-xs-20-angular': lambda out_fn: random(out_fn, 20, 10000, 100, 'angular'), 'random-s-100-angular': lambda out_fn: random(out_fn, 100, 100000, 1000, 'angular'), + 'random-xs-16-hamming': lambda out_fn: random_bitstring(out_fn, 16, 10000, 100), + 'random-s-128-hamming': lambda out_fn: random_bitstring(out_fn, 128, 50000, 1000), + 'random-l-256-hamming': lambda out_fn: random_bitstring(out_fn, 256, 100000, 1000), 'sift-128-euclidean': sift, 'nytimes-256-angular': lambda out_fn: nytimes(out_fn, 256), 'nytimes-16-angular': lambda out_fn: nytimes(out_fn, 16),