From 1a5daf643980ce7599cba4c802d447ab14ccda42 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Fri, 4 Oct 2019 23:23:43 +0200 Subject: [PATCH] Refs #108 - Test and improve format autodetection Autodetection was added for the odf format. --- tablib/formats/_dbf.py | 6 +----- tablib/formats/_ods.py | 11 +++++++++++ tablib/formats/_xls.py | 6 +++--- tablib/formats/_xlsx.py | 4 ++-- test_tablib.py | 26 +++++++++++++++++++++----- 5 files changed, 38 insertions(+), 15 deletions(-) diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py index 710797db..0d1c87bd 100644 --- a/tablib/formats/_dbf.py +++ b/tablib/formats/_dbf.py @@ -83,9 +83,5 @@ def detect(stream): else: _dbf = dbf.Dbf(StringIO(stream), readOnly=True) return True - except (ValueError, struct.error): - # When we try to open up a file that's not a DBF, dbfpy raises a - # ValueError. - # When unpacking a string argument with less than 8 chars, struct.error is - # raised. + except Exception: return False diff --git a/tablib/formats/_ods.py b/tablib/formats/_ods.py index 5b900b5e..dbf57c4b 100644 --- a/tablib/formats/_ods.py +++ b/tablib/formats/_ods.py @@ -91,3 +91,14 @@ def dset_sheet(dataset, ws): cell = table.TableCell() cell.addElement(text.P(text=col)) odf_row.addElement(cell) + + +def detect(stream): + if isinstance(stream, bytes): + # load expects a file-like object. + stream = BytesIO(stream) + try: + opendocument.load(stream) + return True + except Exception: + return False diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index baa7904d..88e8636d 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -25,17 +25,17 @@ def detect(stream): try: xlrd.open_workbook(file_contents=stream) return True - except (TypeError, XLRDError): + except Exception: pass try: xlrd.open_workbook(file_contents=stream.read()) return True - except (AttributeError, XLRDError): + except Exception: pass try: xlrd.open_workbook(filename=stream) return True - except: + except Exception: return False diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py index 516191c3..f8f21c2a 100644 --- a/tablib/formats/_xlsx.py +++ b/tablib/formats/_xlsx.py @@ -28,8 +28,8 @@ def detect(stream): try: openpyxl.reader.excel.load_workbook(stream, read_only=True) return True - except openpyxl.shared.exc.InvalidFileException: - pass + except Exception: + return False def export_set(dataset, freeze_panes=True): """Returns XLSX representation of Dataset.""" diff --git a/test_tablib.py b/test_tablib.py index f15724d6..216ca880 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -288,17 +288,33 @@ def test_book_export_no_exceptions(self): def test_auto_format_detect(self): """Test auto format detection.""" + # html, jira, latex, rst are export only. + + _xls = self.founders.export('xls') + self.assertEqual(tablib.detect_format(_xls), 'xls') + + _xlsx = self.founders.export('xlsx') + self.assertEqual(tablib.detect_format(_xlsx), 'xlsx') + + _ods = self.founders.export('ods') + self.assertEqual(tablib.detect_format(_ods), 'ods') + + _df = self.founders.export('df') + self.assertEqual(tablib.detect_format(_df), 'df') _yaml = '- {age: 90, first_name: John, last_name: Adams}' + self.assertEqual(tablib.detect_format(_yaml), 'yaml') + _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' - _csv = '1,2,3\n4,5,6\n7,8,9\n' - _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n' - _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + self.assertEqual(tablib.detect_format(_json), 'json') - self.assertEqual(tablib.detect_format(_yaml), 'yaml') + _csv = '1,2,3\n4,5,6\n7,8,9\n' self.assertEqual(tablib.detect_format(_csv), 'csv') + + _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n' self.assertEqual(tablib.detect_format(_tsv), 'tsv') - self.assertEqual(tablib.detect_format(_json), 'json') + + _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' self.assertEqual(tablib.detect_format(_bunk), None) def test_transpose(self):