Skip to content

Commit

Permalink
Merge pull request #6 from maumueller/small-bitstring-dataset
Browse files Browse the repository at this point in the history
Random bitstring dataset
  • Loading branch information
ale-f authored Dec 6, 2018
2 parents 7f33226 + 69a3706 commit df6acbc
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 19 deletions.
36 changes: 18 additions & 18 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,30 @@ services:
- docker

env:
- LIBRARY=annoy
- LIBRARY=dolphinn
- LIBRARY=faiss
- LIBRARY=flann
- LIBRARY=hdidx
- LIBRARY=kgraph
- LIBRARY=mrpt
- LIBRARY=nearpy
- LIBRARY=ngt
- LIBRARY=nmslib
- LIBRARY=hnswlib
- LIBRARY=panns
- LIBRARY=pynndescent
- LIBRARY=rpforest
- LIBRARY=sklearn
- LIBRARY=mih
- LIBRARY=annoy DATASET=random-xs-20-angular
- LIBRARY=dolphinn DATASET=random-xs-20-angular
- LIBRARY=faiss DATASET=random-xs-20-angular
- LIBRARY=flann DATASET=random-xs-20-angular
- LIBRARY=hdidx DATASET=random-xs-20-angular
- LIBRARY=kgraph DATASET=random-xs-20-angular
- LIBRARY=mrpt DATASET=random-xs-20-angular
- LIBRARY=nearpy DATASET=random-xs-20-angular
- LIBRARY=ngt DATASET=random-xs-20-angular
- LIBRARY=nmslib DATASET=random-xs-20-angular
- LIBRARY=hnswlib DATASET=random-xs-20-angular
- LIBRARY=panns DATASET=random-xs-20-angular
- LIBRARY=pynndescent DATASET=random-xs-20-angular
- LIBRARY=rpforest DATASET=random-xs-20-angular
- LIBRARY=sklearn DATASET=random-xs-20-angular
- LIBRARY=mih DATASET=random-xs-16-hamming

before_install:
- pip install -r requirements.txt
- python install.py

script:
- python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset random-xs-20-angular --run-disabled
- python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset random-xs-20-angular --run-disabled --batch
- python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset $DATASET --run-disabled
- python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset $DATASET --run-disabled --batch
- python plot.py --dataset random-xs-20-angular --output plot.png
- python plot.py --dataset random-xs-20-angular --output plot-batch.png --batch
- python -m unittest test/test-metrics.py
Expand Down
2 changes: 1 addition & 1 deletion algos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ bit:
# takes places. Otherwise, n / r points from the dataset are used for a
# re-ording process. MIH requires either r = 0 or r >= 2.
args: { "d" : "@dimension", "r" : [0, 2, 3],
"chunk-factor" : [0, 0.5, 0.8] }
"chunk-factor" : [0.2, 0.5, 0.8] }
kgraph:
docker-tag: ann-benchmarks-kgraph
module: ann_benchmarks.algorithms.kgraph
Expand Down
14 changes: 14 additions & 0 deletions ann_benchmarks/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,17 @@ def random(out_fn, n_dims, n_samples, centers, distance):
X_train, X_test = train_test_split(X, test_size=0.1)
write_output(X_train, X_test, out_fn, distance)

def random_bitstring(out_fn, n_dims, n_samples, n_queries):
import sklearn.datasets

Y, _ = sklearn.datasets.make_blobs(n_samples=n_samples, n_features=n_dims, centers=n_queries, random_state=1)
X = numpy.zeros((n_samples, n_dims), dtype=numpy.bool)
for i, vec in enumerate(Y):
X[i] = numpy.array([v > 0 for v in vec], dtype=numpy.bool)

X_train, X_test = train_test_split(X, test_size=n_queries)
write_output(X_train, X_test, out_fn, 'hamming', 'bit')


def word2bits(out_fn, path, fn):
import tarfile
Expand Down Expand Up @@ -297,6 +308,9 @@ def lastfm(out_fn, n_dimensions, test_size=50000):
'random-s-100-euclidean': lambda out_fn: random(out_fn, 100, 100000, 1000, 'euclidean'),
'random-xs-20-angular': lambda out_fn: random(out_fn, 20, 10000, 100, 'angular'),
'random-s-100-angular': lambda out_fn: random(out_fn, 100, 100000, 1000, 'angular'),
'random-xs-16-hamming': lambda out_fn: random_bitstring(out_fn, 16, 10000, 100),
'random-s-128-hamming': lambda out_fn: random_bitstring(out_fn, 128, 50000, 1000),
'random-l-256-hamming': lambda out_fn: random_bitstring(out_fn, 256, 100000, 1000),
'sift-128-euclidean': sift,
'nytimes-256-angular': lambda out_fn: nytimes(out_fn, 256),
'nytimes-16-angular': lambda out_fn: nytimes(out_fn, 16),
Expand Down

0 comments on commit df6acbc

Please sign in to comment.