Refs #108 - Test and improve format autodetection

Autodetection was added for the odf format.
jazzband · Oct 4, 2019 · ca8dbcf · ca8dbcf
1 parent 4418535
commit ca8dbcf
Show file tree

Hide file tree

Showing 7 changed files with 40 additions and 16 deletions.
diff --git a/HISTORY.md b/HISTORY.md
@@ -7,6 +7,7 @@
 - Fixed a regression for xlsx exports where non-string values were forced to
   strings (#314).
 - Fixed xlsx format detection (which was often detected as `xls` format).
+- Improved format autodetection and added autodetection for the odf format.
 - Added search to all documentation pages
 - Open xlsx workbooks in read-only mode (#316)
 - Unpin requirements

diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py
@@ -55,5 +55,5 @@ def detect(stream, delimiter=DEFAULT_DELIMITER):
     try:
         csv.Sniffer().sniff(stream, delimiters=delimiter)
         return True
-    except (csv.Error, TypeError):
+    except Exception:
         return False
diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py
@@ -83,9 +83,5 @@ def detect(stream):
         else:
             _dbf = dbf.Dbf(StringIO(stream), readOnly=True)
         return True
-    except (ValueError, struct.error):
-        # When we try to open up a file that's not a DBF, dbfpy raises a
-        # ValueError.
-        # When unpacking a string argument with less than 8 chars, struct.error is
-        # raised.
+    except Exception:
         return False
diff --git a/tablib/formats/_ods.py b/tablib/formats/_ods.py
@@ -91,3 +91,14 @@ def dset_sheet(dataset, ws):
                     cell = table.TableCell()
                     cell.addElement(text.P(text=col))
                     odf_row.addElement(cell)
+
+
+def detect(stream):
+    if isinstance(stream, bytes):
+        # load expects a file-like object.
+        stream = BytesIO(stream)
+    try:
+        opendocument.load(stream)
+        return True
+    except Exception:
+        return False
diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py
@@ -25,17 +25,17 @@ def detect(stream):
     try:
         xlrd.open_workbook(file_contents=stream)
         return True
-    except (TypeError, XLRDError):
+    except Exception:
         pass
     try:
         xlrd.open_workbook(file_contents=stream.read())
         return True
-    except (AttributeError, XLRDError):
+    except Exception:
         pass
     try:
         xlrd.open_workbook(filename=stream)
         return True
-    except:
+    except Exception:
         return False
 
 

diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py
@@ -28,8 +28,8 @@ def detect(stream):
     try:
         openpyxl.reader.excel.load_workbook(stream, read_only=True)
         return True
-    except openpyxl.shared.exc.InvalidFileException:
-        pass
+    except Exception:
+        return False
 
 def export_set(dataset, freeze_panes=True):
     """Returns XLSX representation of Dataset."""

diff --git a/test_tablib.py b/test_tablib.py
@@ -288,17 +288,33 @@ def test_book_export_no_exceptions(self):
 
     def test_auto_format_detect(self):
         """Test auto format detection."""
+        # html, jira, latex, rst are export only.
+
+        _xls = self.founders.export('xls')
+        self.assertEqual(tablib.detect_format(_xls), 'xls')
+
+        _xlsx = self.founders.export('xlsx')
+        self.assertEqual(tablib.detect_format(_xlsx), 'xlsx')
+
+        _ods = self.founders.export('ods')
+        self.assertEqual(tablib.detect_format(_ods), 'ods')
+
+        _df = self.founders.export('df')
+        self.assertEqual(tablib.detect_format(_df), 'df')
 
         _yaml = '- {age: 90, first_name: John, last_name: Adams}'
+        self.assertEqual(tablib.detect_format(_yaml), 'yaml')
+
         _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
-        _csv = '1,2,3\n4,5,6\n7,8,9\n'
-        _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
-        _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
+        self.assertEqual(tablib.detect_format(_json), 'json')
 
-        self.assertEqual(tablib.detect_format(_yaml), 'yaml')
+        _csv = '1,2,3\n4,5,6\n7,8,9\n'
         self.assertEqual(tablib.detect_format(_csv), 'csv')
+
+        _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
         self.assertEqual(tablib.detect_format(_tsv), 'tsv')
-        self.assertEqual(tablib.detect_format(_json), 'json')
+
+        _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
         self.assertEqual(tablib.detect_format(_bunk), None)
 
     def test_transpose(self):