Skip to content

Commit

Permalink
Ensure we don't falsely claim to have cached something
Browse files Browse the repository at this point in the history
Possibly related to #9
  • Loading branch information
sebbacon committed Mar 5, 2019
1 parent c552960 commit 4e16fea
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 1 deletion.
2 changes: 1 addition & 1 deletion ebmdatalab/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Package for ebmdatalab jupyter notebook stuff
"""
__version__ = '0.0.7'
__version__ = '0.0.8'
6 changes: 6 additions & 0 deletions ebmdatalab/bq.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import glob
import os
import re
from hashlib import md5
Expand Down Expand Up @@ -43,6 +44,11 @@ def cached_read(sql,
if use_cache and already_cached:
df = pd.read_csv(csv_path)
else:
old_fingerprint_files = glob.glob(
os.path.join(
csv_dir, "." + csv_filename + ".*.tmp"))
for f in old_fingerprint_files:
os.remove(f)
with open(fingerprint_path, "w") as f:
f.write("File created by {}".format(__file__))
df = pd.read_gbq(sql, **defaults)
Expand Down
25 changes: 25 additions & 0 deletions ebmdatalab/test_bq.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,28 @@ def test_cached_read(mock_read_gbq):
# and now with `use_cache` param
df = bq.cached_read(sql, csv_path=csv_file.name, use_cache=False)
assert mock_read_gbq.call_count == 2


def _check_cached_read(csv_file, mock_read, sql, expected):
mock_read.return_value = expected
df = bq.cached_read(sql, csv_path=csv_file.name)
assert str(df) == str(expected)


@patch('ebmdatalab.bq.pd.read_gbq')
def test_old_cache_markers_removed(mock_read_gbq):
with tempfile.NamedTemporaryFile() as csv_file:
# First, cause some sql to be cached
inputs_and_outputs = [
(
"select * from foobar",
DataFrame([{'a': 1}])
),
(
"select * from foobar order by id",
DataFrame([{'a': 2}])
)
]
_check_cached_read(csv_file, mock_read_gbq, *inputs_and_outputs[0])
_check_cached_read(csv_file, mock_read_gbq, *inputs_and_outputs[1])
_check_cached_read(csv_file, mock_read_gbq, *inputs_and_outputs[0])

0 comments on commit 4e16fea

Please sign in to comment.