Skip to content

Commit

Permalink
Allow importing 'ragged' .xlsx files through dataset (#547)
Browse files Browse the repository at this point in the history
  • Loading branch information
ClemenceAlain authored Apr 11, 2023
1 parent 7f47b33 commit cb21395
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 22 deletions.
37 changes: 17 additions & 20 deletions src/tablib/formats/_xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,8 @@ def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-", escape
return stream.getvalue()

@classmethod
def import_set(cls, dset, in_stream, headers=True, read_only=True, skip_lines=0):
"""Returns databook from XLS stream."""

dset.wipe()

xls_book = load_workbook(in_stream, read_only=read_only, data_only=True)
sheet = xls_book.active
def import_sheet(cls, dset, sheet, headers=True, skip_lines=0):
"""Populates dataset with sheet."""

dset.title = sheet.title

Expand All @@ -102,8 +97,20 @@ def import_set(cls, dset, in_stream, headers=True, read_only=True, skip_lines=0)
if i == skip_lines and headers:
dset.headers = row_vals
else:
if i > skip_lines and len(row_vals) < dset.width:
row_vals += [''] * (dset.width - len(row_vals))
dset.append(row_vals)

@classmethod
def import_set(cls, dset, in_stream, headers=True, read_only=True, skip_lines=0):
"""Returns databook from XLS stream."""

dset.wipe()

xls_book = load_workbook(in_stream, read_only=read_only, data_only=True)
sheet = xls_book.active
cls.import_sheet(dset, sheet, headers, skip_lines)

@classmethod
def import_book(cls, dbook, in_stream, headers=True, read_only=True):
"""Returns databook from XLS stream."""
Expand All @@ -113,19 +120,9 @@ def import_book(cls, dbook, in_stream, headers=True, read_only=True):
xls_book = load_workbook(in_stream, read_only=read_only, data_only=True)

for sheet in xls_book.worksheets:
data = tablib.Dataset()
data.title = sheet.title

for i, row in enumerate(sheet.rows):
row_vals = [c.value for c in row]
if (i == 0) and (headers):
data.headers = row_vals
else:
if i > 0 and len(row_vals) < data.width:
row_vals += [''] * (data.width - len(row_vals))
data.append(row_vals)

dbook.add_sheet(data)
dset = tablib.Dataset()
cls.import_sheet(dset, sheet, headers)
dbook.add_sheet(dset)

@classmethod
def dset_sheet(cls, dataset, ws, freeze_panes=True, escape=False):
Expand Down
11 changes: 9 additions & 2 deletions tests/test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1096,13 +1096,20 @@ def test_xlsx_bad_chars_sheet_name(self):
new_data = tablib.Databook().load(_xlsx, 'xlsx')
self.assertEqual(new_data.sheets()[0].title, 'bad name -------qwertyuiopasdfg')

def test_xlsx_import_set_ragged(self):
"""Import XLSX file when not all rows have the same length."""
def test_xlsx_import_book_ragged(self):
"""Import XLSX file through databook when not all rows have the same length."""
xlsx_source = Path(__file__).parent / 'files' / 'ragged.xlsx'
with open(str(xlsx_source), mode='rb') as fh:
book = tablib.Databook().load(fh, 'xlsx')
self.assertEqual(book.sheets()[0].pop(), (1.0, ''))

def test_xlsx_import_set_ragged(self):
"""Import XLSX file through dataset when not all rows have the same length."""
xlsx_source = Path(__file__).parent / 'files' / 'ragged.xlsx'
with open(str(xlsx_source), mode='rb') as fh:
dataset = tablib.Dataset().load(fh, 'xlsx')
self.assertEqual(dataset.pop(), (1.0, ''))

def test_xlsx_wrong_char(self):
"""Bad characters are not silently ignored. We let the exception bubble up."""
from openpyxl.utils.exceptions import IllegalCharacterError
Expand Down

0 comments on commit cb21395

Please sign in to comment.