Skip to content

Commit

Permalink
Chunkstore Improvements + fixes (#743)
Browse files Browse the repository at this point in the history
* Add better list copy (faster) for column list and add test to make sure there is not a regression in the future

* Fix get_info issues with empty DF

* version issues
  • Loading branch information
bmoscon authored Apr 11, 2019
1 parent d0f58ab commit 138a8ec
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 4 deletions.
4 changes: 2 additions & 2 deletions arctic/chunkstore/chunkstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,9 +586,9 @@ def get_info(self, symbol):
ret['chunk_count'] = sym[CHUNK_COUNT]
ret['len'] = sym[LEN]
ret['appended_rows'] = sym[APPEND_COUNT]
ret['metadata'] = sym[METADATA]
ret['metadata'] = sym[METADATA] if METADATA in sym else None
ret['chunker'] = sym[CHUNKER]
ret['chunk_size'] = sym[CHUNK_SIZE]
ret['chunk_size'] = sym[CHUNK_SIZE] if CHUNK_SIZE in sym else 0
ret['serializer'] = sym[SERIALIZER]
return ret

Expand Down
2 changes: 1 addition & 1 deletion arctic/serialization/numpy_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def deserialize(self, data, columns=None):

if columns:
if index:
columns = list(columns)
columns = columns[:]
columns.extend(meta[INDEX])
if len(columns) > len(set(columns)):
raise Exception("Duplicate columns specified, cannot de-serialize")
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def run_tests(self):
"pytest-cov",
"pytest-server-fixtures",
"pytest-timeout",
"pytest-xdist",
"pytest-xdist<=1.26.1",
"lz4"
],
entry_points={'console_scripts': [
Expand Down
21 changes: 21 additions & 0 deletions tests/integration/chunkstore/test_fixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,24 @@ def test_missing_cols(chunkstore_lib):
assert_frame_equal(chunkstore_lib.read('test'), expected_df)
df = chunkstore_lib.read('test', columns=['B'])
assert_frame_equal(df, expected_df['B'].to_frame())


def test_column_copy(chunkstore_lib):
index = DatetimeIndex(pd.date_range('2019-01-01', periods=3, freq='D'), name='date')

df = pd.DataFrame({'A': [1, 2, 3], 'B': [5,6,7]}, index=index)
cols = ['A']
chunkstore_lib.write('test', df)
chunkstore_lib.read('test', columns=cols)
assert cols == ['A']


def test_get_info_empty(chunkstore_lib):
chunkstore_lib.write('test', pd.DataFrame(data={'date': [], 'data': []}))
ret = chunkstore_lib.get_info('test')
assert ret == {'appended_rows': 0,
'chunker': u'date',
'len': 0, 'chunk_size': 0,
'chunk_count': 0,
'serializer': u'FrameToArray',
'metadata': None}

1 comment on commit 138a8ec

@jasonlocal
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome. Thanks for the update

Please sign in to comment.