Skip to content

Commit

Permalink
added tests for summarize status method
Browse files Browse the repository at this point in the history
  • Loading branch information
rlizzo committed Oct 23, 2019
1 parent eac6bc6 commit 55db942
Show file tree
Hide file tree
Showing 2 changed files with 305 additions and 5 deletions.
10 changes: 5 additions & 5 deletions tests/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,9 @@ def test_status_and_staged_samples(self, written_repo):
co = repo.checkout(write=True)
co.arraysets['writtenaset']['45'] = dummyData
assert co.diff.status() == 'DIRTY'
diff = co.diff.staged().diff
diff = co.diff.staged()
calledWithAset = False
for record in diff.added.samples:
for record in diff.diff.added.samples:
if record.aset_name == 'writtenaset':
calledWithAset = True
assert record.data_name in '45'
Expand All @@ -390,8 +390,8 @@ def test_status_and_staged_aset(self, written_repo):
co = repo.checkout(write=True)
co.arraysets.init_arrayset(name='sampleaset', shape=(3, 5), dtype=np.float32)
assert co.diff.status() == 'DIRTY'
diff = co.diff.staged().diff
assert 'sampleaset' in diff.added.schema
diff = co.diff.staged()
assert 'sampleaset' in diff.diff.added.schema
co.commit('init aset')
assert co.diff.status() == 'CLEAN'
co.close()
co.close()
300 changes: 300 additions & 0 deletions tests/test_diff_staged_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,300 @@
import pytest
import numpy as np


@pytest.fixture()
def dummy_repo(repo):
dummyData = np.arange(50)
co1 = repo.checkout(write=True, branch='master')
co1.arraysets.init_arrayset(
name='dummy', prototype=dummyData, named_samples=True)
for idx in range(10):
dummyData[:] = idx
co1.arraysets['dummy'][idx] = dummyData
co1.metadata['hello'] = 'world'
co1.metadata['somemetadatakey'] = 'somemetadatavalue'
co1.commit('first commit adding dummy data and hello meta')
co1.close()
return repo


def test_add_metadata_and_samples_to_existing_aset(dummy_repo):
from hangar.records.summarize import status
expected = '============ \n'\
'| ADDED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 20 \n'\
'| - "dummy": 20 \n'\
'|---------- \n'\
'| Metadata: 1 \n'\
' \n'\
'============ \n'\
'| DELETED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 0 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'\
'============ \n'\
'| MUTATED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 0 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'
dummyData = np.arange(50)
co2 = dummy_repo.checkout(write=True)
for idx in range(10, 20):
dummyData[:] = idx
co2.arraysets['dummy'][str(idx)] = dummyData
co2.arraysets['dummy'][idx] = dummyData
co2.metadata['foo'] = 'bar'
df = co2.diff.staged()
co2.close()
assert status(df.diff).getvalue() == expected


def test_mutate_metadata_and_sample_values(dummy_repo):
from hangar.records.summarize import status
expected = '============ \n'\
'| ADDED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 0 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'\
'============ \n'\
'| DELETED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 0 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'\
'============ \n'\
'| MUTATED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 5 \n'\
'| - "dummy": 5 \n'\
'|---------- \n'\
'| Metadata: 1 \n'\
' \n'
dummyData = np.arange(50)
co2 = dummy_repo.checkout(write=True)
for idx in range(5, 10):
dummyData[:] = idx + 10
co2.arraysets['dummy'][idx] = dummyData
co2.metadata['hello'] = 'bar'
df = co2.diff.staged()
co2.close()
assert status(df.diff).getvalue() == expected


def test_delete_metadata_and_samples(dummy_repo):
from hangar.records.summarize import status
expected = '============ \n'\
'| ADDED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 0 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'\
'============ \n'\
'| DELETED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 5 \n'\
'| - "dummy": 5 \n'\
'|---------- \n'\
'| Metadata: 1 \n'\
' \n'\
'============ \n'\
'| MUTATED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 0 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'
co2 = dummy_repo.checkout(write=True)
for idx in range(5, 10):
del co2.arraysets['dummy'][idx]
del co2.metadata['hello']
df = co2.diff.staged()
co2.close()
assert status(df.diff).getvalue() == expected


def test_add_new_aset_schema_and_samples(dummy_repo):
from hangar.records.summarize import status
expected = '============ \n'\
'| ADDED \n'\
'|---------- \n'\
'| Schema: 1 \n'\
'| - "new_aset": \n'\
'| named: True \n'\
'| dtype: float32 \n'\
'| (max) shape: (10, 10) \n'\
'| variable shape: False \n'\
'| backend: 00 \n'\
"| backend opts: {'shuffle': None, 'complib': 'blosc:zstd', 'complevel': 3} \n"\
'|---------- \n'\
'| Samples: 5 \n'\
'| - "new_aset": 5 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'\
'============ \n'\
'| DELETED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 0 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'\
'============ \n'\
'| MUTATED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 0 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'
co2 = dummy_repo.checkout(write=True)
co2.arraysets.init_arrayset('new_aset', shape=(10, 10), dtype=np.float32)
for idx in range(5):
dummyData = np.random.randn(10, 10).astype(np.float32)
co2.arraysets['new_aset'][idx] = dummyData
df = co2.diff.staged()
co2.close()
assert status(df.diff).getvalue() == expected


def test_add_new_aset_schema_and_sample_and_delete_old_aset(dummy_repo):
from hangar.records.summarize import status
expected = '============ \n'\
'| ADDED \n'\
'|---------- \n'\
'| Schema: 1 \n'\
'| - "new_aset": \n'\
'| named: True \n'\
'| dtype: float32 \n'\
'| (max) shape: (10, 10) \n'\
'| variable shape: False \n'\
'| backend: 00 \n'\
"| backend opts: {'shuffle': None, 'complib': 'blosc:zstd', 'complevel': 3} \n"\
'|---------- \n'\
'| Samples: 5 \n'\
'| - "new_aset": 5 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'\
'============ \n'\
'| DELETED \n'\
'|---------- \n'\
'| Schema: 1 \n'\
'| - "dummy": \n'\
'| named: True \n'\
'| dtype: int64 \n'\
'| (max) shape: (50,) \n'\
'| variable shape: False \n'\
'| backend: 10 \n'\
'| backend opts: {} \n'\
'|---------- \n'\
'| Samples: 10 \n'\
'| - "dummy": 10 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'\
'============ \n'\
'| MUTATED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 0 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'
co2 = dummy_repo.checkout(write=True)
new = co2.arraysets.init_arrayset('new_aset', shape=(10, 10), dtype=np.float32)
for idx in range(5):
dummyData = np.random.randn(10, 10).astype(np.float32)
co2.arraysets['new_aset'][idx] = dummyData
del co2.arraysets['dummy']
df = co2.diff.staged()
co2.close()
assert status(df.diff).getvalue() == expected


def test_add_new_schema_and_samples_and_change_old_backend(dummy_repo):
from hangar.records.summarize import status
expected = '============ \n'\
'| ADDED \n'\
'|---------- \n'\
'| Schema: 1 \n'\
'| - "new_aset": \n'\
'| named: True \n'\
'| dtype: float32 \n'\
'| (max) shape: (10, 10) \n'\
'| variable shape: False \n'\
'| backend: 00 \n'\
"| backend opts: {'shuffle': None, 'complib': 'blosc:zstd', 'complevel': 3} \n"\
'|---------- \n'\
'| Samples: 5 \n'\
'| - "new_aset": 5 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'\
'============ \n'\
'| DELETED \n'\
'|---------- \n'\
'| Schema: 0 \n'\
'|---------- \n'\
'| Samples: 0 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'\
'============ \n'\
'| MUTATED \n'\
'|---------- \n'\
'| Schema: 1 \n'\
'| - "dummy": \n'\
'| named: True \n'\
'| dtype: int64 \n'\
'| (max) shape: (50,) \n'\
'| variable shape: False \n'\
'| backend: 00 \n'\
"| backend opts: {'shuffle': None, 'complib': 'blosc:zstd', 'complevel': 3} \n"\
'|---------- \n'\
'| Samples: 5 \n'\
'| - "dummy": 5 \n'\
'|---------- \n'\
'| Metadata: 0 \n'\
' \n'
co2 = dummy_repo.checkout(write=True)
co2.arraysets['dummy'].change_backend('00')
co2.arraysets.init_arrayset('new_aset', shape=(10, 10), dtype=np.float32)
for idx in range(5):
dummyData = np.random.randn(10, 10).astype(np.float32)
co2.arraysets['new_aset'][idx] = dummyData
co2.arraysets['dummy'][idx] = np.arange(50) + idx
df = co2.diff.staged()
co2.close()
assert status(df.diff).getvalue() == expected

0 comments on commit 55db942

Please sign in to comment.