Skip to content

Commit

Permalink
Refs #108 - Test and improve format autodetection
Browse files Browse the repository at this point in the history
Autodetection was added for the odf format.
  • Loading branch information
claudep committed Oct 4, 2019
1 parent 4418535 commit ca8dbcf
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 16 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Fixed a regression for xlsx exports where non-string values were forced to
strings (#314).
- Fixed xlsx format detection (which was often detected as `xls` format).
- Improved format autodetection and added autodetection for the odf format.
- Added search to all documentation pages
- Open xlsx workbooks in read-only mode (#316)
- Unpin requirements
Expand Down
2 changes: 1 addition & 1 deletion tablib/formats/_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,5 @@ def detect(stream, delimiter=DEFAULT_DELIMITER):
try:
csv.Sniffer().sniff(stream, delimiters=delimiter)
return True
except (csv.Error, TypeError):
except Exception:
return False
6 changes: 1 addition & 5 deletions tablib/formats/_dbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,5 @@ def detect(stream):
else:
_dbf = dbf.Dbf(StringIO(stream), readOnly=True)
return True
except (ValueError, struct.error):
# When we try to open up a file that's not a DBF, dbfpy raises a
# ValueError.
# When unpacking a string argument with less than 8 chars, struct.error is
# raised.
except Exception:
return False
11 changes: 11 additions & 0 deletions tablib/formats/_ods.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,14 @@ def dset_sheet(dataset, ws):
cell = table.TableCell()
cell.addElement(text.P(text=col))
odf_row.addElement(cell)


def detect(stream):
if isinstance(stream, bytes):
# load expects a file-like object.
stream = BytesIO(stream)
try:
opendocument.load(stream)
return True
except Exception:
return False
6 changes: 3 additions & 3 deletions tablib/formats/_xls.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@ def detect(stream):
try:
xlrd.open_workbook(file_contents=stream)
return True
except (TypeError, XLRDError):
except Exception:
pass
try:
xlrd.open_workbook(file_contents=stream.read())
return True
except (AttributeError, XLRDError):
except Exception:
pass
try:
xlrd.open_workbook(filename=stream)
return True
except:
except Exception:
return False


Expand Down
4 changes: 2 additions & 2 deletions tablib/formats/_xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def detect(stream):
try:
openpyxl.reader.excel.load_workbook(stream, read_only=True)
return True
except openpyxl.shared.exc.InvalidFileException:
pass
except Exception:
return False

def export_set(dataset, freeze_panes=True):
"""Returns XLSX representation of Dataset."""
Expand Down
26 changes: 21 additions & 5 deletions test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,17 +288,33 @@ def test_book_export_no_exceptions(self):

def test_auto_format_detect(self):
"""Test auto format detection."""
# html, jira, latex, rst are export only.

_xls = self.founders.export('xls')
self.assertEqual(tablib.detect_format(_xls), 'xls')

_xlsx = self.founders.export('xlsx')
self.assertEqual(tablib.detect_format(_xlsx), 'xlsx')

_ods = self.founders.export('ods')
self.assertEqual(tablib.detect_format(_ods), 'ods')

_df = self.founders.export('df')
self.assertEqual(tablib.detect_format(_df), 'df')

_yaml = '- {age: 90, first_name: John, last_name: Adams}'
self.assertEqual(tablib.detect_format(_yaml), 'yaml')

_json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
_csv = '1,2,3\n4,5,6\n7,8,9\n'
_tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
_bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
self.assertEqual(tablib.detect_format(_json), 'json')

self.assertEqual(tablib.detect_format(_yaml), 'yaml')
_csv = '1,2,3\n4,5,6\n7,8,9\n'
self.assertEqual(tablib.detect_format(_csv), 'csv')

_tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
self.assertEqual(tablib.detect_format(_tsv), 'tsv')
self.assertEqual(tablib.detect_format(_json), 'json')

_bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
self.assertEqual(tablib.detect_format(_bunk), None)

def test_transpose(self):
Expand Down

0 comments on commit ca8dbcf

Please sign in to comment.