Skip to content
This repository has been archived by the owner on Jul 15, 2021. It is now read-only.

Commit

Permalink
Merge branch 'master' into dicarlo.BashivanKar2019
Browse files Browse the repository at this point in the history
Also fix tests for dicarlo.BashivanKar2019 as only two assemblies

* master:
  Rust305 (#51)
  Fix Kuzovkin 2018 (#50)
  Inplace (#47)
  dicarlo.Seibert2019 (#48)
  add ImageNet stimulus set (#45)
  Created exceptions in fetch and packaging for PropertyAssembly class that do not merge responses with stimulus_set (#42)
  Update lookup.csv (#44)
  Update Rajalingham2020 lookup (#43)
  Update lookup.csv (#41)
  use image_file_name without .png if present (#40)

# Conflicts:
#	tests/test_assemblies.py
#	tests/test_stimuli.py
  • Loading branch information
jjpr-mit committed Jan 20, 2021
2 parents c34ee73 + b43c13f commit 56ab1ff
Show file tree
Hide file tree
Showing 7 changed files with 344 additions and 89 deletions.
7 changes: 5 additions & 2 deletions brainio_collection/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,12 @@ def __init__(self, local_path, stimulus_set_identifier, cls):
def load(self):
data_array = xr.open_dataarray(self.local_path)
stimulus_set = get_stimulus_set(self.stimulus_set_identifier)
merged = self.merge_stimulus_set_meta(data_array, stimulus_set)
class_object = getattr(assemblies_base, self.assembly_class)
result = class_object(data=merged)
if self.assembly_class == 'PropertyAssembly':
result = data_array
else:
result = self.merge_stimulus_set_meta(data_array, stimulus_set)
result = class_object(data=result)
result.attrs["stimulus_set_identifier"] = self.stimulus_set_identifier
result.attrs["stimulus_set"] = stimulus_set
return result
Expand Down
203 changes: 179 additions & 24 deletions brainio_collection/lookup.csv

Large diffs are not rendered by default.

15 changes: 7 additions & 8 deletions brainio_collection/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

import boto3
from tqdm import tqdm
from xarray import DataArray

import brainio_base.assemblies
from brainio_base.assemblies import get_levels
from brainio_collection import lookup, list_stimulus_sets
from brainio_collection.lookup import TYPE_ASSEMBLY, TYPE_STIMULUS_SET, sha1_hash

Expand Down Expand Up @@ -111,19 +111,18 @@ def package_stimulus_set(proto_stimulus_set, stimulus_set_identifier, bucket_nam

def write_netcdf(assembly, target_netcdf_file):
_logger.debug(f"Writing assembly to {target_netcdf_file}")
assembly = DataArray(assembly) # if we're passed a BrainIO DataAssembly, it will automatically re-index otherwise
for index in assembly.indexes.keys():
assembly.reset_index(index, inplace=True)
assembly = assembly.reset_index(list(assembly.indexes))
assembly.to_netcdf(target_netcdf_file)
sha1 = sha1_hash(target_netcdf_file)
return sha1


def verify_assembly(assembly, assembly_class):
assert 'presentation' in assembly.dims
if assembly_class.startswith('Neur'): # neural/neuron assemblies need to follow this format
assert set(assembly.dims) == {'presentation', 'neuroid'} or \
set(assembly.dims) == {'presentation', 'neuroid', 'time_bin'}
if assembly_class is not "PropertyAssembly":
assert 'presentation' in assembly.dims
if assembly_class.startswith('Neur'): # neural/neuron assemblies need to follow this format
assert set(assembly.dims) == {'presentation', 'neuroid'} or \
set(assembly.dims) == {'presentation', 'neuroid', 'time_bin'}


def package_data_assembly(proto_data_assembly, assembly_identifier, stimulus_set_identifier,
Expand Down
5 changes: 3 additions & 2 deletions migrations/convert_db_to_csv_20200728.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
def repackage_stimulus_sets():
for identifier in stimuli.list_stimulus_sets():
stimulus_set = brainio_collection.get_stimulus_set(identifier)
if identifier.startswith('movshon'):
stimulus_set['image_path_within_store'] = stimulus_set['image_file_name']
if identifier.startswith('movshon') or identifier.startswith('dicarlo.hvm'):
filenames = stimulus_set['image_file_name'].apply(lambda s: s.replace('.png', ''))
stimulus_set['image_path_within_store'] = filenames
# re-assign bucket
stimulus_set_model = stimuli.StimulusSetModel.get(stimuli.StimulusSetModel.name == identifier)
location = stimulus_set_model.stimulus_set_image_maps[0].image.image_image_store_maps[0].image_store.location
Expand Down
138 changes: 85 additions & 53 deletions tests/test_assemblies.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,11 @@
'dicarlo.SanghaviMurty2020THINGS1',
'dicarlo.SanghaviMurty2020THINGS2',
'aru.Kuzovkin2018',
'dicarlo.BashivanKar2019.m_ohp_session_1_nat',
'dicarlo.BashivanKar2019.m_ohp_session_2_nat',
'dicarlo.BashivanKar2019.m_stretch_session_1_nat',
'dicarlo.BashivanKar2019.m_stretch_session_2_nat',
'dicarlo.BashivanKar2019.n_stretch_session_1_nat',
'dicarlo.BashivanKar2019.n_stretch_session_2_nat',
'dicarlo.BashivanKar2019.s_ohp_session_1_nat',
'dicarlo.BashivanKar2019.s_stretch_session_1_nat',
'dicarlo.BashivanKar2019.m_ohp_session_1_synth',
'dicarlo.BashivanKar2019.m_ohp_session_2_synth',
'dicarlo.BashivanKar2019.m_stretch_session_1_synth',
'dicarlo.BashivanKar2019.m_stretch_session_2_synth',
'dicarlo.BashivanKar2019.n_stretch_session_1_synth',
'dicarlo.BashivanKar2019.n_stretch_session_2_synth',
'dicarlo.BashivanKar2019.s_ohp_session_1_synth',
'dicarlo.BashivanKar2019.s_stretch_session_1_synth',
'dicarlo.Seibert2019',
'dicarlo.Rust2012.single',
'dicarlo.Rust2012.array',
'dicarlo.BashivanKar2019.naturalistic',
'dicarlo.BashivanKar2019.synthetic',
))
def test_list_assembly(assembly):
l = brainio_collection.list_assemblies()
Expand All @@ -80,29 +69,18 @@ def test_list_assembly(assembly):
pytest.param('dicarlo.Kar2018hvm', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.Kar2018cocogray', marks=[pytest.mark.private_access]),
pytest.param('klab.Zhang2018search_obj_array', marks=[pytest.mark.private_access]),
pytest.param('aru.Kuzovkin2018', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.Rajalingham2020', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.SanghaviMurty2020', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.SanghaviJozwik2020', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.Sanghavi2020', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.SanghaviMurty2020THINGS1', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.SanghaviMurty2020THINGS2', marks=[pytest.mark.private_access]),
pytest.param('aru.Kuzovkin2018', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_ohp_session_1_nat', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_ohp_session_2_nat', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_stretch_session_1_nat', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_stretch_session_2_nat', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.n_stretch_session_1_nat', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.n_stretch_session_2_nat', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.s_ohp_session_1_nat', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.s_stretch_session_1_nat', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_ohp_session_1_synth', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_ohp_session_2_synth', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_stretch_session_1_synth', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_stretch_session_2_synth', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.n_stretch_session_1_synth', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.n_stretch_session_2_synth', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.s_ohp_session_1_synth', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.s_stretch_session_1_synth', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.Seibert2019', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.Rust2012.single', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.Rust2012.array', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.naturalistic', marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.synthetic', marks=[pytest.mark.private_access]),
])
def test_existence(assembly_identifier):
assert brainio_collection.get_assembly(assembly_identifier) is not None
Expand Down Expand Up @@ -269,27 +247,81 @@ def test_aperture(self, identifier, image_id, expected_amount_gray, ratio_gray):
assert amount_gray == expected_amount_gray


def test_inplace():
d = xr.DataArray(0, None, None, None, None, None, False)
with pytest.raises(TypeError) as te:
d = d.reset_index(None, inplace=True)
assert "inplace" in str(te.value)


class TestSeibert:
@pytest.mark.private_access
def test_dims(self):
assembly = brainio_collection.get_assembly('dicarlo.Seibert2019')
# neuroid: 258 presentation: 286080 time_bin: 1
assert assembly.dims == ("neuroid", "presentation", "time_bin")
assert len(assembly['neuroid']) == 258
assert len(assembly['presentation']) == 286080
assert len(assembly['time_bin']) == 1

@pytest.mark.private_access
def test_coords(self):
assembly = brainio_collection.get_assembly('dicarlo.Seibert2019')
assert len(set(assembly['image_id'].values)) == 5760
assert len(set(assembly['neuroid_id'].values)) == 258
assert len(set(assembly['animal'].values)) == 3
assert len(set(assembly['region'].values)) == 2
assert len(set(assembly['variation'].values)) == 3

@pytest.mark.private_access
def test_content(self):
assembly = brainio_collection.get_assembly('dicarlo.Seibert2019')
assert np.count_nonzero(np.isnan(assembly)) == 19118720
assert assembly.stimulus_set_identifier == "dicarlo.hvm"
hvm = assembly.stimulus_set
assert hvm.shape == (5760, 18)


class TestRustSingle:
@pytest.mark.private_access
def test_dims(self):
assembly = brainio_collection.get_assembly('dicarlo.Rust2012.single')
# (neuroid: 285, presentation: 1500, time_bin: 1)
assert assembly.dims == ("neuroid", "presentation", "time_bin")
assert len(assembly['neuroid']) == 285
assert len(assembly['presentation']) == 1500
assert len(assembly['time_bin']) == 1

@pytest.mark.private_access
def test_coords(self):
assembly = brainio_collection.get_assembly('dicarlo.Rust2012.single')
assert len(set(assembly['image_id'].values)) == 300
assert len(set(assembly['neuroid_id'].values)) == 285
assert len(set(assembly['region'].values)) == 2


class TestRustArray:
@pytest.mark.private_access
def test_dims(self):
assembly = brainio_collection.get_assembly('dicarlo.Rust2012.array')
# (neuroid: 296, presentation: 53700, time_bin: 6)
assert assembly.dims == ("neuroid", "presentation", "time_bin")
assert len(assembly['neuroid']) == 296
assert len(assembly['presentation']) == 53700
assert len(assembly['time_bin']) == 6

@pytest.mark.private_access
def test_coords(self):
assembly = brainio_collection.get_assembly('dicarlo.Rust2012.array')
assert len(set(assembly['image_id'].values)) == 300
assert len(set(assembly['neuroid_id'].values)) == 296
assert len(set(assembly['animal'].values)) == 2
assert len(set(assembly['region'].values)) == 2


@pytest.mark.parametrize('assembly,shape', [
pytest.param('dicarlo.BashivanKar2019.m_ohp_session_1_nat', (24320, 39, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_ohp_session_2_nat', (23040, 38, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_stretch_session_1_nat', (24320, 39, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_stretch_session_2_nat', (23040, 52, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.n_stretch_session_1_nat', (21760, 20, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.n_stretch_session_2_nat', (21760, 1, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.s_ohp_session_1_nat', (21120, 22, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.s_stretch_session_1_nat', (21120, 22, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_ohp_session_1_synth', (2550, 39, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_ohp_session_2_synth', (1800, 38, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_stretch_session_1_synth', (2550, 39, 1),
marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.m_stretch_session_2_synth', (2640, 52, 1),
marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.n_stretch_session_1_synth', (2790, 20, 1),
marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.n_stretch_session_2_synth', (100, 1, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.s_ohp_session_1_synth', (4465, 22, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.s_stretch_session_1_synth', (4465, 22, 1),
marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.naturalistic', (24320, 233, 1), marks=[pytest.mark.private_access]),
pytest.param('dicarlo.BashivanKar2019.synthetic', (21360, 233, 1), marks=[pytest.mark.private_access]),
])
def test_synthetic(assembly, shape):
assy = brainio_collection.get_assembly(assembly)
Expand Down
57 changes: 57 additions & 0 deletions tests/test_packaging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import pytest
from pathlib import Path

from brainio_base.assemblies import DataAssembly, get_levels
from brainio_collection.packaging import write_netcdf


def test_write_netcdf():
assy = DataAssembly(
data=[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]],
coords={
'up': ("a", ['alpha', 'alpha', 'beta', 'beta', 'beta', 'beta']),
'down': ("a", [1, 1, 1, 1, 2, 2]),
'sideways': ('b', ['x', 'y', 'z'])
},
dims=['a', 'b']
)
netcdf_path = Path("test.nc")
netcdf_sha1 = write_netcdf(assy, str(netcdf_path))
assert netcdf_path.exists()


def test_reset_index():
assy = DataAssembly(
data=[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]],
coords={
'up': ("a", ['alpha', 'alpha', 'beta', 'beta', 'beta', 'beta']),
'down': ("a", [1, 1, 1, 1, 2, 2]),
'sideways': ('b', ['x', 'y', 'z'])
},
dims=['a', 'b']
)
assert assy["a"].variable.level_names == ["up", "down"]
assert list(assy.indexes) == ["a", "b"]
assy = assy.reset_index(list(assy.indexes))
assert assy["a"].variable.level_names is None
assert get_levels(assy) == []
assert list(assy.indexes) == []



def test_reset_index_levels():
assy = DataAssembly(
data=[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]],
coords={
'up': ("a", ['alpha', 'alpha', 'beta', 'beta', 'beta', 'beta']),
'down': ("a", [1, 1, 1, 1, 2, 2]),
'sideways': ('b', ['x', 'y', 'z'])
},
dims=['a', 'b']
)
assert assy["a"].variable.level_names == ["up", "down"]
assy = assy.reset_index(["up", "down"])
assert get_levels(assy) == []



8 changes: 8 additions & 0 deletions tests/test_stimuli.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def test_dicarlohvm(self):
'dicarlo.THINGS1',
'dicarlo.THINGS2',
'aru.Kuzovkin2018',
'fei-fei.Deng2009',
'dicarlo.BashivanKar2019.naturalistic',
'dicarlo.BashivanKar2019.synthetic'
))
Expand All @@ -74,3 +75,10 @@ def test_klab_Zhang2018search():
# Therefore, a total of 300 * 2 + 6 images are there in the stimulus set.
assert len(stimulus_set) == 606
assert len(set(stimulus_set['image_id'])) == 606


@pytest.mark.private_access
def test_feifei_Deng2009():
stimulus_set = brainio_collection.get_stimulus_set('fei-fei.Deng2009')
assert len(stimulus_set) == 50_000
assert len(set(stimulus_set['label'])) == 1_000

0 comments on commit 56ab1ff

Please sign in to comment.