Skip to content

Commit

Permalink
Replace MarkupPy by ElementTree for html conversion (#554)
Browse files Browse the repository at this point in the history
  • Loading branch information
claudep authored Jul 2, 2023
1 parent d48407c commit f3ef2e9
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 84 deletions.
7 changes: 7 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# History

## Unreleased

### Changes

- The html export format does not depend on MarkupPy any longer, therefore the
tablib[html] install target was removed also.

## 3.5.0 (2023-06-11)

### Improvements
Expand Down
3 changes: 0 additions & 3 deletions docs/formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,6 @@ The ``html`` format is currently export-only. The exports produce an HTML page
with the data in a ``<table>``. If headers have been set, they will be used as
table headers.

This format is optional, install Tablib with ``pip install "tablib[html]"`` to
make the format available.

jira
====

Expand Down
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ dynamic = ["version"]

[project.optional-dependencies]
all = [
"markuppy",
"odfpy",
"openpyxl>=2.6.0",
"pandas",
Expand All @@ -44,7 +43,6 @@ all = [
"xlwt",
]
cli = ["tabulate"]
html = ["markuppy"]
ods = ["odfpy"]
pandas = ["pandas"]
xls = ["xlrd", "xlwt"]
Expand Down
4 changes: 1 addition & 3 deletions src/tablib/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
uninstalled_format_messages = {
"cli": {"package_name": "tabulate package", "extras_name": "cli"},
"df": {"package_name": "pandas package", "extras_name": "pandas"},
"html": {"package_name": "MarkupPy package", "extras_name": "html"},
"ods": {"package_name": "odfpy package", "extras_name": "ods"},
"xls": {"package_name": "xlrd and xlwt packages", "extras_name": "xls"},
"xlsx": {"package_name": "openpyxl package", "extras_name": "xlsx"},
Expand Down Expand Up @@ -101,8 +100,7 @@ def register_builtins(self):
if find_spec('odf'):
self.register('ods', 'tablib.formats._ods.ODSFormat')
self.register('dbf', 'tablib.formats._dbf.DBFFormat')
if find_spec('MarkupPy'):
self.register('html', 'tablib.formats._html.HTMLFormat')
self.register('html', 'tablib.formats._html.HTMLFormat')
self.register('jira', 'tablib.formats._jira.JIRAFormat')
self.register('latex', 'tablib.formats._latex.LATEXFormat')
if find_spec('pandas'):
Expand Down
66 changes: 26 additions & 40 deletions src/tablib/formats/_html.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
""" Tablib - HTML export support.
"""

import codecs
from io import BytesIO

from MarkupPy import markup
from xml.etree import ElementTree as ET


class HTMLFormat:
Expand All @@ -17,48 +13,38 @@ class HTMLFormat:
def export_set(cls, dataset):
"""HTML representation of a Dataset."""

stream = BytesIO()

page = markup.page()
page.table.open()

table = ET.Element('table')
if dataset.headers is not None:
new_header = [item if item is not None else '' for item in dataset.headers]

page.thead.open()
headers = markup.oneliner.th(new_header)
page.tr(headers)
page.thead.close()

page.tbody.open()
head = ET.Element('thead')
tr = ET.Element('tr')
for header in dataset.headers:
th = ET.Element('th')
th.text = str(header) if header is not None else ''
tr.append(th)
head.append(tr)
table.append(head)

body = ET.Element('tbody')
for row in dataset:
new_row = [item if item is not None else '' for item in row]
tr = ET.Element('tr')
for item in row:
td = ET.Element('td')
td.text = str(item) if item is not None else ''
tr.append(td)
body.append(tr)
table.append(body)

html_row = markup.oneliner.td(new_row)
page.tr(html_row)
page.tbody.close()

page.table.close()

# Allow unicode characters in output
wrapper = codecs.getwriter("utf8")(stream)
wrapper.writelines(str(page))

return stream.getvalue().decode('utf-8')
return ET.tostring(table, method='html', encoding='unicode')

@classmethod
def export_book(cls, databook):
"""HTML representation of a Databook."""

stream = BytesIO()

# Allow unicode characters in output
wrapper = codecs.getwriter("utf8")(stream)

result = ''
for i, dset in enumerate(databook._datasets):
title = (dset.title if dset.title else 'Set %s' % (i))
wrapper.write(f'<{cls.BOOK_ENDINGS}>{title}</{cls.BOOK_ENDINGS}>\n')
wrapper.write(dset.html)
wrapper.write('\n')
title = dset.title if dset.title else f'Set {i}'
result += f'<{cls.BOOK_ENDINGS}>{title}</{cls.BOOK_ENDINGS}>\n'
result += dset.html
result += '\n'

return stream.getvalue().decode('utf-8')
return result
1 change: 0 additions & 1 deletion tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
pytest
pytest-cov
MarkupPy
odfpy
openpyxl>=2.6.0
pyyaml
Expand Down
71 changes: 36 additions & 35 deletions tests/test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from uuid import uuid4

import pytest
from MarkupPy import markup
from openpyxl.reader.excel import load_workbook

import tablib
Expand Down Expand Up @@ -624,47 +623,49 @@ def test_row_has_tags(self):


class HTMLTests(BaseTestCase):
def test_html_export(self):
founders_html = (
"<table>"
"<thead>"
"<tr><th>first_name</th><th>last_name</th><th>gpa</th></tr>"
"</thead>"
"<tbody>"
"<tr><td>John</td><td>Adams</td><td>90</td></tr>"
"<tr><td>George</td><td>Washington</td><td>67</td></tr>"
"<tr><td>Thomas</td><td>Jefferson</td><td>50</td></tr>"
"</tbody>"
"</table>"
)

def test_html_dataset_export(self):
"""HTML export"""

html = markup.page()
html.table.open()
html.thead.open()

html.tr(markup.oneliner.th(self.founders.headers))
html.thead.close()

html.tbody.open()
for founder in self.founders:
html.tr(markup.oneliner.td(founder))
html.tbody.close()

html.table.close()
html = str(html)

self.assertEqual(html, self.founders.html)
self.assertEqual(self.founders_html, self.founders.html.replace('\n', ''))

def test_html_export_none_value(self):
"""HTML export"""

html = markup.page()
html.table.open()
html.thead.open()

html.tr(markup.oneliner.th(['foo', '', 'bar']))
html.thead.close()

html.tbody.open()
html.tr(markup.oneliner.td(['foo', '', 'bar']))
html.tbody.close()

html.table.close()
html = str(html)

headers = ['foo', None, 'bar']
d = tablib.Dataset(['foo', None, 'bar'], headers=headers)
d = tablib.Dataset(['foø', None, 'bar'], headers=headers)
expected = (
"<table>"
"<thead>"
"<tr><th>foo</th><th></th><th>bar</th></tr>"
"</thead>"
"<tbody>"
"<tr><td>foø</td><td></td><td>bar</td></tr>"
"</tbody>"
"</table>"
)
self.assertEqual(expected, d.html.replace('\n', ''))

self.assertEqual(html, d.html)
def test_html_databook_export(self):
book = tablib.Databook()
book.add_sheet(self.founders)
book.add_sheet(self.founders)
self.maxDiff = None
self.assertEqual(
book.html.replace('\n', ''),
f"<h3>Founders</h3>{self.founders_html}<h3>Founders</h3>{self.founders_html}"
)


class RSTTests(BaseTestCase):
Expand Down

0 comments on commit f3ef2e9

Please sign in to comment.