diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0d53812..39e9d1f 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -4,26 +4,49 @@ workflows:
version: 2
workflow:
jobs:
- - test-2.7
- - test-3.5
+ - test-3.7
+ - test-3.8
+ - test-3.9
+ - test-3.10
+ - static-code-analysis
defaults: &defaults
working_directory: ~/code
steps:
- checkout
- run:
- name: Install dependencies
- command: sudo pip install -r requirements_tests.txt
+ name: Install Python dependencies
+ command: CFLAGS="-O0" pip install -r requirements_tests.txt
- run:
name: Test
- command: python setup.py test
+ command: pytest
jobs:
- test-2.7:
+ test-3.7:
<<: *defaults
docker:
- - image: circleci/python:2.7
- test-3.5:
+ - image: cimg/python:3.7
+ test-3.8:
<<: *defaults
docker:
- - image: circleci/python:3.5
+ - image: cimg/python:3.8
+ test-3.9:
+ <<: *defaults
+ docker:
+ - image: cimg/python:3.9
+ test-3.10:
+ <<: *defaults
+ docker:
+ - image: cimg/python:3.10
+ static-code-analysis:
+ working_directory: ~/code
+ docker:
+ - image: cimg/python:3.8
+ steps:
+ - checkout
+ - run:
+ name: Install dependencies
+ command: pip install lintlizard==0.18.0 "click<8.1"
+ - run:
+ name: LintLizard
+ command: lintlizard
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..d421296
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,13 @@
+# Docs: https://help.github.com/en/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+ # Enable version updates for python
+ - package-ecosystem: "pip"
+ schedule:
+ interval: "daily"
+ open-pull-requests-limit: 8 # note that this is _per-file_
+ directory: "/"
+ pull-request-branch-name:
+ # so it's compatible with docker tags
+ separator: "-"
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index f24cd99..6da7bef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,6 @@ pip-log.txt
#Mr Developer
.mr.developer.cfg
+
+# pycharm
+.idea
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 72d0f2b..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-# http://travis-ci.org/closeio/quotequail
-language: python
-python:
- - 2.7
- - 3.4
-script:
- - python setup.py test
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..96235fb
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,10 @@
+[tool.black]
+target-version = ['py37']
+exclude = '''
+/(
+ \.git
+ | \.venv
+ | venv
+ | src
+)/
+'''
\ No newline at end of file
diff --git a/quotequail/__init__.py b/quotequail/__init__.py
index df80bfd..05dd241 100644
--- a/quotequail/__init__.py
+++ b/quotequail/__init__.py
@@ -2,16 +2,14 @@
# quotequail
# a library that identifies quoted text in email messages
-import re
+from . import _internal, _patterns
-from . import _internal
-
-__all__ = ['quote', 'quote_html', 'unwrap', 'unwrap_html']
+__all__ = ["quote", "quote_html", "unwrap", "unwrap_html"]
def quote(text, limit=1000):
"""
- Takes a plain text message as an argument, returns a list of tuples. The
+ Take a plain text message as an argument, return a list of tuples. The
first argument of the tuple denotes whether the text should be expanded by
default. The second argument is the unmodified corresponding text.
@@ -20,16 +18,19 @@ def quote(text, limit=1000):
Unless the limit param is set to None, the text will automatically be quoted
starting at the line where the limit is reached.
"""
-
- lines = text.split('\n')
+ lines = text.split("\n")
found = _internal.find_quote_position(lines, _patterns.MAX_WRAP_LINES, limit)
- if found != None:
- return [(True, '\n'.join(lines[:found+1])), (False, '\n'.join(lines[found+1:]))]
+ if found is not None:
+ return [
+ (True, "\n".join(lines[: found + 1])),
+ (False, "\n".join(lines[found + 1 :])),
+ ]
return [(True, text)]
+
def quote_html(html, limit=1000):
"""
Like quote(), but takes an HTML message as an argument. The limit param
@@ -40,24 +41,25 @@ def quote_html(html, limit=1000):
tree = _html.get_html_tree(html)
- start_refs, end_refs, lines = _html.get_line_info(tree, limit+1)
+ start_refs, end_refs, lines = _html.get_line_info(tree, limit + 1)
found = _internal.find_quote_position(lines, 1, limit)
- if found == None:
+ if found is None:
# No quoting found and we're below limit. We're done.
return [(True, _html.render_html_tree(tree))]
else:
- start_tree = _html.slice_tree(tree, start_refs, end_refs,
- (0, found+1), html_copy=html)
- end_tree = _html.slice_tree(tree, start_refs, end_refs,
- (found+1, None))
+ start_tree = _html.slice_tree(
+ tree, start_refs, end_refs, (0, found + 1), html_copy=html
+ )
+ end_tree = _html.slice_tree(tree, start_refs, end_refs, (found + 1, None))
return [
(True, _html.render_html_tree(start_tree)),
(False, _html.render_html_tree(end_tree)),
]
+
def unwrap(text):
"""
If the passed text is the text body of a forwarded message, a reply, or
@@ -72,41 +74,45 @@ def unwrap(text):
Otherwise, this function returns None.
"""
-
- lines = text.split('\n')
-
- result = _internal.unwrap(lines, _patterns.MAX_WRAP_LINES,
- _patterns.MIN_HEADER_LINES,_patterns.MIN_QUOTED_LINES)
+ lines = text.split("\n")
+
+ result = _internal.unwrap(
+ lines,
+ _patterns.MAX_WRAP_LINES,
+ _patterns.MIN_HEADER_LINES,
+ _patterns.MIN_QUOTED_LINES,
+ )
if result:
typ, top_range, hdrs, main_range, bottom_range, needs_unindent = result
- text_top = lines[slice(*top_range)] if top_range else ''
- text = lines[slice(*main_range)] if main_range else ''
- text_bottom = lines[slice(*bottom_range)] if bottom_range else ''
+ text_top = lines[slice(*top_range)] if top_range else ""
+ text = lines[slice(*main_range)] if main_range else ""
+ text_bottom = lines[slice(*bottom_range)] if bottom_range else ""
if needs_unindent:
text = _internal.unindent_lines(text)
result = {
- 'type': typ,
+ "type": typ,
}
- text = '\n'.join(text).strip()
- text_top = '\n'.join(text_top).strip()
- text_bottom = '\n'.join(text_bottom).strip()
+ text = "\n".join(text).strip()
+ text_top = "\n".join(text_top).strip()
+ text_bottom = "\n".join(text_bottom).strip()
if text:
- result['text'] = text
+ result["text"] = text
if text_top:
- result['text_top'] = text_top
+ result["text_top"] = text_top
if text_bottom:
- result['text_bottom'] = text_bottom
+ result["text_bottom"] = text_bottom
if hdrs:
result.update(hdrs)
return result
+
def unwrap_html(html):
"""
If the passed HTML is the HTML body of a forwarded message, a dictionary
@@ -133,7 +139,7 @@ def unwrap_html(html):
typ, top_range, hdrs, main_range, bottom_range, needs_unindent = result
result = {
- 'type': typ,
+ "type": typ,
}
top_range = _html.trim_slice(lines, top_range)
@@ -141,18 +147,20 @@ def unwrap_html(html):
bottom_range = _html.trim_slice(lines, bottom_range)
if top_range:
- top_tree = _html.slice_tree(tree, start_refs, end_refs, top_range,
- html_copy=html)
+ top_tree = _html.slice_tree(
+ tree, start_refs, end_refs, top_range, html_copy=html
+ )
html_top = _html.render_html_tree(top_tree)
if html_top:
- result['html_top'] = html_top
+ result["html_top"] = html_top
if bottom_range:
- bottom_tree = _html.slice_tree(tree, start_refs, end_refs,
- bottom_range, html_copy=html)
+ bottom_tree = _html.slice_tree(
+ tree, start_refs, end_refs, bottom_range, html_copy=html
+ )
html_bottom = _html.render_html_tree(bottom_tree)
if html_bottom:
- result['html_bottom'] = html_bottom
+ result["html_bottom"] = html_bottom
if main_range:
main_tree = _html.slice_tree(tree, start_refs, end_refs, main_range)
@@ -160,7 +168,7 @@ def unwrap_html(html):
_html.unindent_tree(main_tree)
html = _html.render_html_tree(main_tree)
if html:
- result['html'] = html
+ result["html"] = html
if hdrs:
result.update(hdrs)
diff --git a/quotequail/_html.py b/quotequail/_html.py
index f40a595..1bc19d7 100644
--- a/quotequail/_html.py
+++ b/quotequail/_html.py
@@ -1,24 +1,35 @@
# HTML utils
-import lxml.html
import lxml.etree
+import lxml.html
from ._patterns import FORWARD_LINE, FORWARD_STYLES, MULTIPLE_WHITESPACE_RE
-INLINE_TAGS = ['a', 'b', 'em', 'i', 'strong', 'span', 'font', 'q',
- 'object', 'bdo', 'sub', 'sup', 'center', 'td', 'th']
-
-BEGIN = 'begin'
-END = 'end'
+INLINE_TAGS = [
+ "a",
+ "b",
+ "em",
+ "i",
+ "strong",
+ "span",
+ "font",
+ "q",
+ "object",
+ "bdo",
+ "sub",
+ "sup",
+ "center",
+ "td",
+ "th",
+]
+
+BEGIN = "begin"
+END = "end"
-try:
- string_class = basestring # Python 2.7
-except NameError:
- string_class = str # Python 3.x
def trim_tree_after(element, include_element=True):
"""
- Removes the document tree following the given element. If include_element
+ Remove the document tree following the given element. If include_element
is True, the given element is kept in the tree, otherwise it is removed.
"""
el = element
@@ -32,9 +43,10 @@ def trim_tree_after(element, include_element=True):
parent_el.remove(remove_el)
el = parent_el
+
def trim_tree_before(element, include_element=True, keep_head=True):
"""
- Removes the document tree preceding the given element. If include_element
+ Remove the document tree preceding the given element. If include_element
is True, the given element is kept in the tree, otherwise it is removed.
"""
el = element
@@ -48,19 +60,21 @@ def trim_tree_before(element, include_element=True, keep_head=True):
remove_el = el
el = el.getprevious()
tag = remove_el.tag
- is_head = isinstance(tag, string_class) and tag.lower() == 'head'
+ is_head = isinstance(tag, str) and tag.lower() == "head"
if not keep_head or not is_head:
parent_el.remove(remove_el)
el = parent_el
+
def trim_slice(lines, slice_tuple):
"""
Trim a slice tuple (begin, end) so it starts at the first non-empty line
(obtained via indented_tree_line_generator / get_line_info) and ends at the
last non-empty line within the slice. Returns the new slice.
"""
+
def _empty(line):
- return not line or line.strip() == '>'
+ return not line or line.strip() == ">"
if not slice_tuple:
return None
@@ -77,14 +91,15 @@ def _empty(line):
slice_start += 1
# Trim from end
- while slice_end > slice_start and _empty(lines[slice_end-1]):
+ while slice_end > slice_start and _empty(lines[slice_end - 1]):
slice_end -= 1
return (slice_start, slice_end)
+
def unindent_tree(element):
"""
- Removes the outermost indent. For example, the tree
+ Remove the outermost indent. For example, the tree
"
"
is transformed to
""
@@ -92,12 +107,13 @@ def unindent_tree(element):
for el in element.iter():
if is_indentation_element(el):
el.attrib.clear()
- el.tag = 'div'
+ el.tag = "div"
return
+
def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
"""
- Slices the HTML tree with the given start_refs and end_refs (obtained via
+ Slice the HTML tree with the given start_refs and end_refs (obtained via
get_line_info) at the given slice_tuple, a tuple (start, end) containing
the start and end of the slice (or None, to start from the start / end at
the end of the tree). If html_copy is specified, a new tree is constructed
@@ -108,21 +124,21 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
construct a copy of the tree using copy.copy() (see bug
https://bugs.launchpad.net/lxml/+bug/1562550).
"""
-
start_ref = None
end_ref = None
if slice_tuple:
slice_start, slice_end = slice_tuple
- if ((slice_start is not None and slice_start >= len(start_refs)) or
- (slice_end is not None and slice_end <= 0)):
- return get_html_tree('')
+ if (slice_start is not None and slice_start >= len(start_refs)) or (
+ slice_end is not None and slice_end <= 0
+ ):
+ return get_html_tree("")
- if slice_start != None and slice_start <= 0:
+ if slice_start is not None and slice_start <= 0:
slice_start = None
- if slice_end != None and slice_end >= len(start_refs):
+ if slice_end is not None and slice_end >= len(start_refs):
slice_end = None
else:
slice_start, slice_end = None, None
@@ -130,9 +146,8 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
if slice_start is not None:
start_ref = start_refs[slice_start]
- if slice_end is not None:
- if slice_end < len(end_refs):
- end_ref = end_refs[slice_end-1]
+ if slice_end is not None and slice_end < len(end_refs):
+ end_ref = end_refs[slice_end - 1]
if html_copy is not None:
et = lxml.etree.ElementTree(tree)
@@ -151,17 +166,21 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
new_tree = tree
if start_ref:
- include_start = (start_ref[1] == BEGIN)
+ include_start = start_ref[1] == BEGIN
if end_ref:
- include_end = (end_ref[1] == END)
+ include_end = end_ref[1] == END
# If start_ref is the same as end_ref, and we don't include the element,
# we are removing the entire tree. We need to handle this separately,
# otherwise trim_tree_after won't work because it can't find the already
# removed reference.
- if start_ref and end_ref and start_ref[0] == end_ref[0]:
- if not include_start or not include_end:
- return get_html_tree('')
+ if (
+ start_ref
+ and end_ref
+ and start_ref[0] == end_ref[0]
+ and (not include_start or not include_end)
+ ):
+ return get_html_tree("")
if start_ref:
trim_tree_before(start_ref[0], include_element=include_start)
@@ -170,6 +189,7 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
return new_tree
+
def get_html_tree(html):
"""
Given the HTML string, returns a LXML tree object. The tree is wrapped in
@@ -177,20 +197,19 @@ def get_html_tree(html):
otherwise result in an error. The wrapping can be later removed with
strip_wrapping().
"""
-
- parser = lxml.html.HTMLParser(encoding='utf-8')
- html = html.encode('utf8')
+ parser = lxml.html.HTMLParser(encoding="utf-8")
+ html = html.encode("utf8")
try:
tree = lxml.html.fromstring(html, parser=parser)
except lxml.etree.Error:
# E.g. empty document. Use dummy
- tree = lxml.html.fromstring('
')
+ tree = lxml.html.fromstring("
")
# If the document doesn't start with a top level tag, wrap it with a
# that will be later stripped out for consistent behavior.
if tree.tag not in lxml.html.defs.top_level_tags:
- html = b'
%s
' % html
+ html = b"
" + html + b"
"
tree = lxml.html.fromstring(html, parser=parser)
# HACK for Outlook emails, where tags like
are rendered as . We
@@ -199,52 +218,54 @@ def get_html_tree(html):
# tags that contain colons. When rendering the tree, we will restore the
# tag name.
for el in tree.iter():
- if el.nsmap or (isinstance(el.tag, string_class) and ':' in el.tag):
+ if el.nsmap or (isinstance(el.tag, str) and ":" in el.tag):
if el.nsmap:
- actual_tag_name = '{}:{}'.format(list(el.nsmap.keys())[0], el.tag)
+ actual_tag_name = "{}:{}".format(list(el.nsmap.keys())[0], el.tag)
else:
actual_tag_name = el.tag
- el.tag = 'span'
- el.attrib['__tag_name'] = actual_tag_name
+ el.tag = "span"
+ el.attrib["__tag_name"] = actual_tag_name
return tree
+
def strip_wrapping(html):
"""
- Removes the wrapping that might have resulted when using get_html_tree().
+ Remove the wrapping that might have resulted when using get_html_tree().
"""
- if html.startswith('
') and html.endswith('
'):
+ if html.startswith("") and html.endswith("
"):
html = html[5:-6]
return html.strip()
+
def render_html_tree(tree):
"""
- Renders the given HTML tree, and strips any wrapping that was applied in
+ Render the given HTML tree, and strip any wrapping that was applied in
get_html_tree().
You should avoid further processing of the given tree after calling this
method because we modify namespaced tags here.
"""
-
# Restore any tag names that were changed in get_html_tree()
for el in tree.iter():
- if '__tag_name' in el.attrib:
- actual_tag_name = el.attrib.pop('__tag_name')
+ if "__tag_name" in el.attrib:
+ actual_tag_name = el.attrib.pop("__tag_name")
el.tag = actual_tag_name
- html = lxml.html.tostring(tree, encoding='utf8').decode('utf8')
+ html = lxml.html.tostring(tree, encoding="utf8").decode("utf8")
return strip_wrapping(html)
+
def is_indentation_element(element):
- if isinstance(element.tag, string_class):
- return element.tag.lower() == 'blockquote'
+ if isinstance(element.tag, str):
+ return element.tag.lower() == "blockquote"
return False
+
def tree_token_generator(el, indentation_level=0):
"""
- Internal generator that yields tokens for the given HTML element as
- follows:
+ Yield tokens for the given HTML element as follows:
- A tuple (LXML element, BEGIN, indentation_level)
- Text right after the start of the tag, or None.
@@ -252,12 +273,9 @@ def tree_token_generator(el, indentation_level=0):
- A tuple (LXML element, END, indentation_level)
- Text right after the end of the tag, or None.
"""
-
- if not isinstance(el.tag, string_class):
+ if not isinstance(el.tag, str):
return
- tag_name = el.tag.lower()
-
is_indentation = is_indentation_element(el)
if is_indentation:
@@ -268,8 +286,7 @@ def tree_token_generator(el, indentation_level=0):
yield el.text
for child in el.iterchildren():
- for token in tree_token_generator(child, indentation_level):
- yield token
+ yield from tree_token_generator(child, indentation_level)
if is_indentation:
indentation_level -= 1
@@ -278,10 +295,12 @@ def tree_token_generator(el, indentation_level=0):
yield el.tail
+
def tree_line_generator(el, max_lines=None):
"""
- Internal generator that iterates through an LXML tree and yields a tuple
- per line. In this context, lines are blocks of text separated by
tags
+ Iterate through an LXML tree and yield a tuple per line.
+
+ In this context, lines are blocks of text separated by
tags
or by block elements. The tuples contain the following elements:
- A tuple with the element reference (element, position) for the start
@@ -309,14 +328,14 @@ def tree_line_generator(el, max_lines=None):
"""
def _trim_spaces(text):
- return MULTIPLE_WHITESPACE_RE.sub(' ', text).strip()
+ return MULTIPLE_WHITESPACE_RE.sub(" ", text).strip()
counter = 1
- if max_lines != None and counter > max_lines:
+ if max_lines is not None and counter > max_lines:
return
# Buffer for the current line.
- line = ''
+ line = ""
# The reference tuple (element, position) for the start of the line.
start_ref = None
@@ -333,10 +352,11 @@ def _trim_spaces(text):
tag_name = el.tag.lower()
- line_break = (tag_name == 'br' and state == BEGIN)
- is_block = (tag_name not in INLINE_TAGS)
- is_forward = (is_block and state == BEGIN and
- el.attrib.get('style') in FORWARD_STYLES)
+ line_break = tag_name == "br" and state == BEGIN
+ is_block = tag_name not in INLINE_TAGS
+ is_forward = (
+ is_block and state == BEGIN and el.attrib.get("style") in FORWARD_STYLES
+ )
if is_block or line_break:
line = _trim_spaces(line)
@@ -345,34 +365,34 @@ def _trim_spaces(text):
end_ref = (el, state)
yield start_ref, end_ref, start_indentation_level, line
counter += 1
- if max_lines != None and counter > max_lines:
+ if max_lines is not None and counter > max_lines:
return
- line = ''
+ line = ""
if is_forward:
# Simulate forward
- yield (end_ref, end_ref, start_indentation_level,
- FORWARD_LINE)
+ yield (end_ref, end_ref, start_indentation_level, FORWARD_LINE)
counter += 1
- if max_lines != None and counter > max_lines:
+ if max_lines is not None and counter > max_lines:
return
if not line:
start_ref = (el, state)
start_indentation_level = indentation_level
- elif isinstance(token, string_class):
+ elif isinstance(token, str):
line += token
else:
- raise RuntimeError('invalid token: {}'.format(token))
+ raise RuntimeError("invalid token: {}".format(token))
line = _trim_spaces(line)
if line:
yield line
+
def indented_tree_line_generator(el, max_lines=None):
- """
+ r"""
Like tree_line_generator, but yields tuples (start_ref, end_ref, line),
where the line already takes the indentation into account by having "> "
prepended. If a line already starts with ">", it is escaped ("\\>"). This
@@ -382,9 +402,10 @@ def indented_tree_line_generator(el, max_lines=None):
gen = tree_line_generator(el, max_lines)
for start_ref, end_ref, indentation_level, line in gen:
# Escape line
- if line.startswith('>'):
- line = '\\' + line
- yield start_ref, end_ref, '> '*indentation_level + line
+ if line.startswith(">"):
+ line = "\\" + line
+ yield start_ref, end_ref, "> " * indentation_level + line
+
def get_line_info(tree, max_lines=None):
"""
diff --git a/quotequail/_internal.py b/quotequail/_internal.py
index cd7f537..389713d 100644
--- a/quotequail/_internal.py
+++ b/quotequail/_internal.py
@@ -1,15 +1,21 @@
-import re
-from ._patterns import COMPILED_PATTERNS, COMPILED_PATTERN_MAP, HEADER_RE, HEADER_MAP, REPLY_DATE_SPLIT_REGEX, STRIP_SPACE_CHARS
+from ._patterns import (
+ COMPILED_PATTERN_MAP,
+ HEADER_MAP,
+ HEADER_RE,
+ REPLY_DATE_SPLIT_REGEX,
+ STRIP_SPACE_CHARS,
+)
"""
Internal methods. For max_wrap_lines, min_header_lines, min_quoted_lines
documentation see the corresponding constants in _patterns.py.
"""
+
def find_pattern_on_line(lines, n, max_wrap_lines):
"""
- Finds a forward/reply pattern within the given lines on text on the given
- line number and returns a tuple with the type ('reply' or 'forward') and
+ Find a forward/reply pattern within the given lines on text on the given
+ line number and return a tuple with the type ('reply' or 'forward') and
line number of where the pattern ends. The returned line number may be
different from the given line number in case the pattern wraps over
multiple lines.
@@ -19,28 +25,29 @@ def find_pattern_on_line(lines, n, max_wrap_lines):
for typ, regexes in COMPILED_PATTERN_MAP.items():
for regex in regexes:
for m in range(max_wrap_lines):
- match_line = join_wrapped_lines(lines[n:n+1+m])
- if match_line.startswith('>'):
+ match_line = join_wrapped_lines(lines[n : n + 1 + m])
+ if match_line.startswith(">"):
match_line = match_line[1:].strip()
if regex.match(match_line.strip()):
- return n+m, typ
+ return n + m, typ
return None, None
+
def find_quote_position(lines, max_wrap_lines, limit=None):
"""
- Returns the (ending) line number of a quoting pattern. If a limit is given
+ Return the (ending) line number of a quoting pattern. If a limit is given
and the limit is reached, the limit is returned.
"""
-
for n in range(len(lines)):
end, typ = find_pattern_on_line(lines, n, max_wrap_lines)
if typ:
return end
- if limit != None and n >= limit-1:
+ if limit is not None and n >= limit - 1:
return n
return None
+
def join_wrapped_lines(lines):
"""
Join one or multiple lines that wrapped. Returns the reconstructed line.
@@ -55,14 +62,15 @@ def join_wrapped_lines(lines):
if joined and joined[-1] in STRIP_SPACE_CHARS:
joined += line
else:
- joined += ' '
+ joined += " "
joined += line
return joined
+
def extract_headers(lines, max_wrap_lines):
"""
- Extracts email headers from the given lines. Returns a dict with the
+ Extract email headers from the given lines. Returns a dict with the
detected headers and the amount of lines that were processed.
"""
hdrs = {}
@@ -86,30 +94,32 @@ def extract_headers(lines, max_wrap_lines):
if header_name in HEADER_MAP:
hdrs[HEADER_MAP[header_name]] = header_value.strip()
- lines_processed = n+1
+ lines_processed = n + 1
else:
extend_lines += 1
if extend_lines < max_wrap_lines and header_name in HEADER_MAP:
hdrs[HEADER_MAP[header_name]] = join_wrapped_lines(
- [hdrs[HEADER_MAP[header_name]], line.strip()])
- lines_processed = n+1
+ [hdrs[HEADER_MAP[header_name]], line.strip()]
+ )
+ lines_processed = n + 1
else:
# no more headers found
break
return hdrs, lines_processed
+
def parse_reply(line):
"""
- Parses the given reply line ("On DATE, USER wrote:") and returns a
+ Parse the given reply line ("On DATE, USER wrote:") and returns a
dictionary with the "Date" and "From" keys, or None, if couldn't parse.
"""
- if line.startswith('>'):
+ if line.startswith(">"):
line = line[1:].strip()
date = user = None
- for pattern in COMPILED_PATTERN_MAP['reply']:
+ for pattern in COMPILED_PATTERN_MAP["reply"]:
match = pattern.match(line)
if match:
groups = match.groups()
@@ -124,7 +134,7 @@ def parse_reply(line):
user = split_groups[-1]
else:
# Try a simple comma split
- split = groups[0].rsplit(',', 1)
+ split = groups[0].rsplit(",", 1)
if len(split) == 2:
date, user = split
@@ -136,13 +146,14 @@ def parse_reply(line):
if date and user:
return {
- 'date': date.strip(),
- 'from': user.strip(),
+ "date": date.strip(),
+ "from": user.strip(),
}
+
def find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines):
"""
- Finds the starting point of a wrapped email. Returns a tuple containing
+ Find the starting point of a wrapped email. Returns a tuple containing
(start_line_number, end_line_number, type), where type can be one of the
following:
@@ -159,7 +170,6 @@ def find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines)
Returns (None, None, None) if nothing was found.
"""
-
for n, line in enumerate(lines):
if not line.strip():
continue
@@ -171,47 +181,50 @@ def find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines)
return n, end, typ
# Find a quote
- if line.startswith('>'):
+ if line.startswith(">"):
# Check if there are at least min_quoted_lines lines that match
matched_lines = 1
if matched_lines >= min_quoted_lines:
- return n, n, 'quoted'
+ return n, n, "quoted"
- for peek_line in lines[n+1:]:
+ for peek_line in lines[n + 1 :]:
if not peek_line.strip():
continue
- if not peek_line.startswith('>'):
+ if not peek_line.startswith(">"):
break
else:
matched_lines += 1
if matched_lines >= min_quoted_lines:
- return n, n, 'quoted'
+ return n, n, "quoted"
# Find a header
match = HEADER_RE.match(line)
- if match:
- if len(extract_headers(lines[n:], max_wrap_lines)[0]) >= min_header_lines:
- return n, n, 'headers'
+ if (
+ match
+ and len(extract_headers(lines[n:], max_wrap_lines)[0]) >= min_header_lines
+ ):
+ return n, n, "headers"
return None, None, None
def unindent_lines(lines):
unquoted = []
- for n, line in enumerate(lines):
- if line.startswith('> '):
+ for line in lines:
+ if line.startswith("> "):
unquoted.append(line[2:])
- elif line.startswith('>'):
+ elif line.startswith(">"):
unquoted.append(line[1:])
else:
break
return unquoted
+
def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines):
"""
- Returns a tuple of:
+ Return a tuple of:
- Type ('forward', 'reply', 'headers', 'quoted')
- Range of the text at the top of the wrapped message (or None)
- Headers dict (or None)
@@ -219,42 +232,61 @@ def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines):
- Range of the text below the wrapped message (or None)
- Whether the wrapped text needs to be unindented
"""
-
headers = {}
# Get line number and wrapping type.
- start, end, typ = find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines)
+ start, end, typ = find_unwrap_start(
+ lines, max_wrap_lines, min_header_lines, min_quoted_lines
+ )
# We found a line indicating that it's a forward/reply.
- if typ in ('forward', 'reply'):
+ if typ in ("forward", "reply"):
main_type = typ
- if typ == 'reply':
- reply_headers = parse_reply(join_wrapped_lines(lines[start:end+1]))
+ if typ == "reply":
+ reply_headers = parse_reply(join_wrapped_lines(lines[start : end + 1]))
if reply_headers:
headers.update(reply_headers)
# Find where the headers or the quoted section starts.
# We can set min_quoted_lines to 1 because we expect a quoted section.
- start2, end2, typ = find_unwrap_start(lines[end+1:], max_wrap_lines, min_header_lines, 1)
+ start2, end2, typ = find_unwrap_start(
+ lines[end + 1 :], max_wrap_lines, min_header_lines, 1
+ )
- if typ == 'quoted':
+ if typ == "quoted":
# Quoted section starts. Unindent and check if there are headers.
- quoted_start = end+1+start2
+ quoted_start = end + 1 + start2
unquoted = unindent_lines(lines[quoted_start:])
rest_start = quoted_start + len(unquoted)
- start3, end3, typ = find_unwrap_start(unquoted, max_wrap_lines, min_header_lines, min_quoted_lines)
- if typ == 'headers':
+ start3, end3, typ = find_unwrap_start(
+ unquoted, max_wrap_lines, min_header_lines, min_quoted_lines
+ )
+ if typ == "headers":
hdrs, hdrs_length = extract_headers(unquoted[start3:], max_wrap_lines)
if hdrs:
headers.update(hdrs)
- rest2_start = quoted_start+start3+hdrs_length
- return main_type, (0, start), headers, (rest2_start, rest_start), (rest_start, None), True
+ rest2_start = quoted_start + start3 + hdrs_length
+ return (
+ main_type,
+ (0, start),
+ headers,
+ (rest2_start, rest_start),
+ (rest_start, None),
+ True,
+ )
else:
- return main_type, (0, start), headers, (quoted_start, rest_start), (rest_start, None), True
-
- elif typ == 'headers':
- hdrs, hdrs_length = extract_headers(lines[start+1:], max_wrap_lines)
+ return (
+ main_type,
+ (0, start),
+ headers,
+ (quoted_start, rest_start),
+ (rest_start, None),
+ True,
+ )
+
+ elif typ == "headers":
+ hdrs, hdrs_length = extract_headers(lines[start + 1 :], max_wrap_lines)
if hdrs:
headers.update(hdrs)
rest_start = start + 1 + hdrs_length
@@ -262,25 +294,48 @@ def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines):
else:
# Didn't find quoted section or headers, assume that everything
# below is the qouted text.
- return main_type, (0, start), headers, (start+(start2 or 0)+1, None), None, False
+ return (
+ main_type,
+ (0, start),
+ headers,
+ (start + (start2 or 0) + 1, None),
+ None,
+ False,
+ )
# We just found headers, which usually indicates a forwarding.
- elif typ == 'headers':
- main_type = 'forward'
+ elif typ == "headers":
+ main_type = "forward"
hdrs, hdrs_length = extract_headers(lines[start:], max_wrap_lines)
rest_start = start + hdrs_length
return main_type, (0, start), hdrs, (rest_start, None), None, False
# We found quoted text. Headers may be within the quoted text.
- elif typ == 'quoted':
+ elif typ == "quoted":
unquoted = unindent_lines(lines[start:])
rest_start = start + len(unquoted)
- start2, end2, typ = find_unwrap_start(unquoted, max_wrap_lines, min_header_lines, min_quoted_lines)
- if typ == 'headers':
- main_type = 'forward'
+ start2, end2, typ = find_unwrap_start(
+ unquoted, max_wrap_lines, min_header_lines, min_quoted_lines
+ )
+ if typ == "headers":
+ main_type = "forward"
hdrs, hdrs_length = extract_headers(unquoted[start2:], max_wrap_lines)
rest2_start = start + hdrs_length
- return main_type, (0, start), hdrs, (rest2_start, rest_start), (rest_start, None), True
+ return (
+ main_type,
+ (0, start),
+ hdrs,
+ (rest2_start, rest_start),
+ (rest_start, None),
+ True,
+ )
else:
- main_type = 'quote'
- return main_type, (None, start), None, (start, rest_start), (rest_start, None), True
+ main_type = "quote"
+ return (
+ main_type,
+ (None, start),
+ None,
+ (start, rest_start),
+ (rest_start, None),
+ True,
+ )
diff --git a/quotequail/_patterns.py b/quotequail/_patterns.py
index a0c1108..2cae2e0 100644
--- a/quotequail/_patterns.py
+++ b/quotequail/_patterns.py
@@ -1,111 +1,110 @@
# -*- coding: utf-8 -*-
import re
+from typing import List
REPLY_PATTERNS = [
- u'^On (.*) wrote:$', # apple mail/gmail reply
- u'^Am (.*) schrieb (.*):$', # German
- u'^Le (.*) a écrit :$', # French
- u'El (.*) escribió:$', # Spanish
- u'^(.*) написал\(а\):$', # Russian
- u'^(.*) skrev (.*):$',
- u'^Den (.*) skrev (.*):$', # Swedish
- u'^Em (.*) escreveu:$', # Brazillian portuguese
- u'([0-9]{4}/[0-9]{1,2}/[0-9]{1,2}) (.* <.*@.*>)$', # gmail (?) reply
+ "^On (.*) wrote:$", # apple mail/gmail reply
+ "^Am (.*) schrieb (.*):$", # German
+ "^Le (.*) a écrit :$", # French
+ "El (.*) escribió:$", # Spanish
+ r"^(.*) написал\(а\):$", # Russian
+ "^(.*) skrev (.*):$", # Norwegian
+ "^Den (.*) skrev (.*):$", # Swedish
+ "^Em (.*) escreveu:$", # Brazillian portuguese
+ "([0-9]{4}/[0-9]{1,2}/[0-9]{1,2}) (.* <.*@.*>)$", # gmail (?) reply
]
-REPLY_DATE_SPLIT_REGEX = re.compile(r'^(.*(:[0-9]{2}( [apAP]\.?[mM]\.?)?)), (.*)?$')
+REPLY_DATE_SPLIT_REGEX = re.compile(r"^(.*(:[0-9]{2}( [apAP]\.?[mM]\.?)?)), (.*)?$")
FORWARD_MESSAGES = [
# apple mail forward
- 'Begin forwarded message', 'Anfang der weitergeleiteten E-Mail',
- u'Début du message réexpédié', 'Inicio del mensaje reenviado',
-
+ "Begin forwarded message",
+ "Anfang der weitergeleiteten E-Mail",
+ "Début du message réexpédié",
+ "Inicio del mensaje reenviado",
# gmail/evolution forward
- 'Forwarded [mM]essage', 'Mensaje reenviado', 'Vidarebefordrat meddelande',
-
+ "Forwarded [mM]essage",
+ "Mensaje reenviado",
+ "Vidarebefordrat meddelande",
# outlook
- 'Original [mM]essage', 'Ursprüngliche Nachricht', 'Mensaje [oO]riginal',
-
+ "Original [mM]essage",
+ "Ursprüngliche Nachricht",
+ "Mensaje [oO]riginal",
# Thunderbird forward
- u'Message transféré',
-
+ "Message transféré",
# mail.ru forward (Russian)
- u'Пересылаемое сообщение',
+ "Пересылаемое сообщение",
]
# We yield this pattern to simulate Outlook forward styles. It is also used for
# some emails forwarded by Yahoo.
-FORWARD_LINE = '________________________________'
-
-FORWARD_PATTERNS = [
- '^{}$'.format(FORWARD_LINE),
+FORWARD_LINE = "________________________________"
-] + ['^---+ ?%s ?---+$' % p for p in FORWARD_MESSAGES] \
- + ['^%s:$' % p for p in FORWARD_MESSAGES]
+FORWARD_PATTERNS = (
+ [
+ "^{}$".format(FORWARD_LINE),
+ ]
+ + [f"^---+ ?{p} ?---+$" for p in FORWARD_MESSAGES]
+ + [f"^{p}:$" for p in FORWARD_MESSAGES]
+)
FORWARD_STYLES = [
# Outlook
- 'border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in',
+ "border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in",
]
-HEADER_RE = re.compile(r'\*?([-\w ]+):\*?(.*)$', re.UNICODE)
+HEADER_RE = re.compile(r"\*?([-\w ]+):\*?(.*)$", re.UNICODE)
HEADER_MAP = {
- 'from': 'from',
- 'von': 'from',
- 'de': 'from',
- u'от кого': 'from',
- u'från': 'from',
-
- 'to': 'to',
- 'an': 'to',
- 'para': 'to',
- u'à': 'to',
- u'pour': 'to',
- u'кому': 'to',
- u'till': 'to',
-
- 'cc': 'cc',
- 'kopie': 'cc',
- 'kopia': 'cc',
-
- 'bcc': 'bcc',
- 'cco': 'bcc',
- 'blindkopie': 'bcc',
-
- 'reply-to': 'reply-to',
- 'antwort an': 'reply-to',
- u'répondre à': 'reply-to',
- 'responder a': 'reply-to',
-
- 'date': 'date',
- 'sent': 'date',
- 'received': 'date',
- 'datum': 'date',
- 'gesendet': 'date',
- 'enviado el': 'date',
- 'enviados': 'date',
- 'fecha': 'date',
- u'дата': 'date',
-
- 'subject': 'subject',
- 'betreff': 'subject',
- 'asunto': 'subject',
- 'objet': 'subject',
- 'sujet': 'subject',
- u'тема': 'subject',
- u'ämne': 'subject',
+ "from": "from",
+ "von": "from",
+ "de": "from",
+ "от кого": "from",
+ "från": "from",
+ "to": "to",
+ "an": "to",
+ "para": "to",
+ "à": "to",
+ "pour": "to",
+ "кому": "to",
+ "till": "to",
+ "cc": "cc",
+ "kopie": "cc",
+ "kopia": "cc",
+ "bcc": "bcc",
+ "cco": "bcc",
+ "blindkopie": "bcc",
+ "reply-to": "reply-to",
+ "antwort an": "reply-to",
+ "répondre à": "reply-to",
+ "responder a": "reply-to",
+ "date": "date",
+ "sent": "date",
+ "received": "date",
+ "datum": "date",
+ "gesendet": "date",
+ "enviado el": "date",
+ "enviados": "date",
+ "fecha": "date",
+ "дата": "date",
+ "subject": "subject",
+ "betreff": "subject",
+ "asunto": "subject",
+ "objet": "subject",
+ "sujet": "subject",
+ "тема": "subject",
+ "ämne": "subject",
}
COMPILED_PATTERN_MAP = {
- 'reply': [re.compile(regex) for regex in REPLY_PATTERNS],
- 'forward': [re.compile(regex) for regex in FORWARD_PATTERNS],
+ "reply": [re.compile(regex) for regex in REPLY_PATTERNS],
+ "forward": [re.compile(regex) for regex in FORWARD_PATTERNS],
}
-COMPILED_PATTERNS = sum(COMPILED_PATTERN_MAP.values(), [])
+COMPILED_PATTERNS: List[re.Pattern] = sum(COMPILED_PATTERN_MAP.values(), [])
-MULTIPLE_WHITESPACE_RE = re.compile('\s+')
+MULTIPLE_WHITESPACE_RE = re.compile(r"\s+")
# Amount to lines to join to check for potential wrapped patterns in plain text
# messages.
@@ -120,4 +119,4 @@
# Characters at the end of line where we join lines without adding a space.
# For example, "John <\njohn@example>" becomes "John ", but
# "John\nDoe" becomes "John Doe".
-STRIP_SPACE_CHARS = '<([{"\''
+STRIP_SPACE_CHARS = r"<([{\"'"
diff --git a/requirements_tests.txt b/requirements_tests.txt
index 80bc88c..a5e19eb 100644
--- a/requirements_tests.txt
+++ b/requirements_tests.txt
@@ -1 +1,3 @@
-lxml==3.6.0
+--no-binary lxml
+lxml==4.9.1
+pytest==7.1.3
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..5332636
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,64 @@
+[flake8]
+ignore=
+ # !!! make sure you have a comma at the end of each line EXCEPT the LAST one
+ # line length, already enforced by black
+ E501,
+ # https://pypi.org/project/flake8-future-import/
+ FI1
+ # Missing docstrings
+ D1,
+ # One-line docstring should fit on one line with quotes.
+ # We ignore this because it's OK to buy yourself a few extra characters
+ # for the summary line even if the summary line is *the only* line.
+ D200,
+ # 1 blank line required between summary line and description
+ D205,
+ # Multi-line docstring summary should start at the first line.
+ # We ignore this because we agreed in #20553 that we we want to put the
+ # summary line below """ for multi-line docstrings.
+ D212,
+ # First line should end with a period
+ D400,
+ # This is not PEP8-compliant and conflicts with black
+ W503,
+ W504,
+ # This is not PEP8-compliant and conflicts with black
+ E203,
+ # Too intrusive, sometimes makes code less readable
+ SIM106
+ # Allow f-strings
+ SFS301,
+ # Allow .format
+ SFS201
+exclude=venv
+#max-complexity=2
+banned-modules=
+ typing.Text = use str
+require-code=True
+
+[isort]
+skip=venv,src
+known_first_party=quotequail
+known_tests=tests
+sections=FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,TESTS,LOCALFOLDER
+default_section=THIRDPARTY
+use_parentheses=true
+multi_line_output=3
+include_trailing_comma=True
+force_grid_wrap=0
+combine_as_imports=True
+line_length=87
+
+[mypy]
+python_version = 3.7
+ignore_missing_imports = True
+no_implicit_optional = True
+strict_equality = True
+follow_imports = normal
+warn_unreachable = True
+show_error_context = True
+pretty = True
+files = quotequail
+
+[tool:pytest]
+norecursedirs=venv
diff --git a/setup.py b/setup.py
index edd5842..478f89a 100644
--- a/setup.py
+++ b/setup.py
@@ -1,33 +1,30 @@
from setuptools import setup
setup(
- name='quotequail',
- version='0.2.3',
- url='http://github.com/closeio/quotequail',
- license='MIT',
- author='Thomas Steinacher',
- author_email='engineering@close.io',
- maintainer='Thomas Steinacher',
- maintainer_email='engineering@close.io',
- description='A library that identifies quoted text in plain text and HTML email messages.',
+ name="quotequail",
+ version="0.2.3",
+ url="http://github.com/closeio/quotequail",
+ license="MIT",
+ author="Thomas Steinacher",
+ author_email="engineering@close.io",
+ maintainer="Thomas Steinacher",
+ maintainer_email="engineering@close.io",
+ description="A library that identifies quoted text in plain text and HTML email messages.",
long_description=__doc__,
packages=[
- 'quotequail',
+ "quotequail",
],
- test_suite='tests',
- tests_require=['lxml'],
- platforms='any',
+ test_suite="tests",
+ tests_require=["lxml"],
+ platforms="any",
classifiers=[
- 'Environment :: Web Environment',
- 'Intended Audience :: Developers',
- 'License :: OSI Approved :: MIT License',
- 'Operating System :: OS Independent',
- 'Programming Language :: Python',
- 'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.5',
- 'Topic :: Communications :: Email',
- 'Topic :: Software Development :: Libraries :: Python Modules'
- ]
+ "Environment :: Web Environment",
+ "Intended Audience :: Developers",
+ "License :: OSI Approved :: MIT License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Topic :: Communications :: Email",
+ "Topic :: Software Development :: Libraries :: Python Modules",
+ ],
)
diff --git a/tests/test_quotequail.py b/tests/test_quotequail.py
index 5fba67f..29ada32 100644
--- a/tests/test_quotequail.py
+++ b/tests/test_quotequail.py
@@ -2,63 +2,84 @@
import os
import unittest
-from quotequail import *
-class FileMixin(object):
+from quotequail import quote, quote_html, unwrap, unwrap_html
+
+
+class FileMixin:
def read_file(self, name):
- with open(os.path.join(os.path.dirname(__file__), 'files', name), 'rb') as f:
- return f.read().decode('utf8')
+ with open(os.path.join(os.path.dirname(__file__), "files", name), "rb") as f:
+ return f.read().decode("utf8")
def assert_equal_to_file(self, string, name):
expected = self.read_file(name)
self.assertEqual(string, expected)
+
class QuoteTestCase(unittest.TestCase):
def test_quote_reply_1(self):
self.assertEqual(
quote(
-"""Hello world.
+ """Hello world.
On 2012-10-16 at 17:02 , Someone wrote:
> Some quoted text
-"""),
- [(True, 'Hello world.\n\nOn 2012-10-16 at 17:02 , Someone wrote:'),
- (False, '\n> Some quoted text\n')]
+"""
+ ),
+ [
+ (
+ True,
+ "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone wrote:",
+ ),
+ (False, "\n> Some quoted text\n"),
+ ],
)
def test_quote_reply_2(self):
self.assertEqual(
quote(
-"""Hello world.
+ """Hello world.
On 2012-10-16 at 17:02 , Someone <
someone@example.com> wrote:
> Some quoted text
-"""),
- [(True, 'Hello world.\n\nOn 2012-10-16 at 17:02 , Someone <\nsomeone@example.com> wrote:'),
- (False, '\n> Some quoted text\n')]
+"""
+ ),
+ [
+ (
+ True,
+ "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone <\nsomeone@example.com> wrote:",
+ ),
+ (False, "\n> Some quoted text\n"),
+ ],
)
def test_quote_reply_3(self):
self.assertEqual(
quote(
-"""Hello world.
+ """Hello world.
On 2012-10-16 at 17:02 , Someone
wrote:
> Some quoted text
-"""),
- [(True, 'Hello world.\n\nOn 2012-10-16 at 17:02 , Someone \nwrote:'),
- (False, '\n> Some quoted text\n')]
+"""
+ ),
+ [
+ (
+ True,
+ "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone \nwrote:",
+ ),
+ (False, "\n> Some quoted text\n"),
+ ],
)
def test_quote_forward_1(self):
self.assertEqual(
quote(
-"""Hello world.
+ """Hello world.
Begin forwarded message:
@@ -66,30 +87,42 @@ def test_quote_forward_1(self):
> Subject: The email
>
> Some quoted text.
-"""),
- [(True, 'Hello world.\n\nBegin forwarded message:'),
- (False, '\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n')]
+"""
+ ),
+ [
+ (True, "Hello world.\n\nBegin forwarded message:"),
+ (
+ False,
+ "\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n",
+ ),
+ ],
)
def test_quote_forward_2(self):
self.assertEqual(
quote(
-"""Hello world.
+ """Hello world.
---------- Forwarded message ----------
From: Someone
Subject: The email
Some quoted text.
-"""),
- [(True, 'Hello world.\n\n---------- Forwarded message ----------'),
- (False, 'From: Someone \nSubject: The email\n\nSome quoted text.\n')]
+"""
+ ),
+ [
+ (True, "Hello world.\n\n---------- Forwarded message ----------"),
+ (
+ False,
+ "From: Someone \nSubject: The email\n\nSome quoted text.\n",
+ ),
+ ],
)
def test_quote_forward_3(self):
self.assertEqual(
quote(
-"""Hello world.
+ """Hello world.
> Begin forwarded message:
>
@@ -97,150 +130,205 @@ def test_quote_forward_3(self):
> Subject: The email
>
> Some quoted text.
-"""),
- [(True, 'Hello world.\n\n> Begin forwarded message:'),
- (False, '>\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n')]
+"""
+ ),
+ [
+ (True, "Hello world.\n\n> Begin forwarded message:"),
+ (
+ False,
+ ">\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n",
+ ),
+ ],
)
def test_limit(self):
self.assertEqual(
quote("Lorem\nIpsum\nDolor\nSit\nAmet", limit=2),
- [(True, 'Lorem\nIpsum'), (False, 'Dolor\nSit\nAmet')]
+ [(True, "Lorem\nIpsum"), (False, "Dolor\nSit\nAmet")],
)
+
class HTMLQuoteTestCase(unittest.TestCase):
def test_apple(self):
self.assertEqual(
- quote_html('''Some text
some more text
Lorem ipsum dolor sit amet.
'''),
+ quote_html(
+ """Some text
some more text
Lorem ipsum dolor sit amet.
"""
+ ),
[
# Note that lxml removes Content-Type meta tags (see
# lxml.html.tostring include_meta_content_type flag)
- (True, '''Some text
some more text
'''),
+ (
+ True,
+ """Some text
some more text
""",
+ ),
# Note we have an empty div stripped out here.
- (False, '''
Lorem ipsum dolor sit amet.
'''),
- ]
+ (
+ False,
+ """
Lorem ipsum dolor sit amet.
""",
+ ),
+ ],
)
def test_gmail(self):
self.assertEqual(
- quote_html('''---------- Forwarded message ----------
From:
Some One <someone@example.com>
+ quote_html(
+ """
'''),
+
"""
+ ),
[
- (True, '''
---------- Forwarded message ----------
'''),
- (False, '''
From:
Some One <someone@example.com>
+ (
+ True,
+ """
---------- Forwarded message ----------
""",
+ ),
+ (
+ False,
+ """
'''),
- ]
+
""",
+ ),
+ ],
)
def test_gmail_2(self):
self.assertEqual(
- quote_html(u'''
looks good\xa0
\r\n'''),
+ quote_html(
+ """
looks good\xa0
\r\n"""
+ ),
[
- (True, u'''
looks good\xa0
'''),
- (False, u''''''),
- ]
+ (
+ True,
+ """
looks good\xa0
""",
+ ),
+ (
+ False,
+ """""",
+ ),
+ ],
)
def test_outlook(self):
self.assertEqual(
- quote_html(u'''
Thanks,
From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!
Hey,
'''),
+ quote_html(
+ """
Thanks,
From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!
Hey,
"""
+ ),
[
- (True, u'
'),
- (False, u'
From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!
\xa0
Hey,
')
- ]
+ (
+ True,
+ '
',
+ ),
+ (
+ False,
+ '
From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!
\xa0
Hey,
',
+ ),
+ ],
)
def test_no_wrap_tag(self):
self.assertEqual(
- quote_html(u'''On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
some stuff
'''),
+ quote_html(
+ """On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
some stuff
"""
+ ),
[
- (True, 'On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:'),
- (False, '
some stuff
'),
- ]
+ (
+ True,
+ "On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:",
+ ),
+ (False, "
some stuff
"),
+ ],
)
def test_images(self):
self.assertEqual(
- quote_html('''
Well hello there Sir!!!
On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
Hi there
~~ spooky\" title=\"*:->~~ spooky\" class=\"fr-fin\">
'''),
+ quote_html(
+ """
Well hello there Sir!!!
On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
Hi there
~~ spooky\" title=\"*:->~~ spooky\" class=\"fr-fin\">
"""
+ ),
[
- (True, u'''
Well hello there Sir!!!
On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
'''),
- (False, u'''
''')
- ]
+ (
+ True,
+ """
Well hello there Sir!!!
On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
""",
+ ),
+ (
+ False,
+ """
""",
+ ),
+ ],
)
def test_no_quote(self):
self.assertEqual(
- quote_html(u'''
One
Two
Three
'''),
+ quote_html("""
One
Two
Three
"""),
[
- (True, '
One
Two
Three
'),
- ]
+ (True, "
One
Two
Three
"),
+ ],
)
def test_limit(self):
self.assertEqual(
- quote_html(u'''
One
Two
Three
Four
''', limit=3),
+ quote_html("""
One
Two
Three
Four
""", limit=3),
[
- (True, '
One
Two
Three
'),
- (False, '
Four
'),
- ]
+ (True, "
One
Two
Three
"),
+ (False, "
Four
"),
+ ],
)
def test_empty(self):
self.assertEqual(
- quote_html(u''),
+ quote_html(""),
[
- (True, ''),
- ]
+ (True, ""),
+ ],
)
def test_comment(self):
self.assertEqual(
- quote_html(u''''''),
+ quote_html(""""""),
[
- (True, ''),
- ]
+ (True, ""),
+ ],
)
def test_comment_2(self):
self.assertEqual(
- quote_html(u'''AB'''),
+ quote_html("""AB"""),
[
- (True, 'AB'),
- ]
+ (True, "AB"),
+ ],
)
def test_comment_3(self):
self.assertEqual(
- quote_html(u'''
Begin forwarded message:
'''),
+ quote_html(
+ """
Begin forwarded message:
"""
+ ),
[
- (True, '
Begin forwarded message:'),
- (False, '
'),
- ]
+ (True, "
Begin forwarded message:"),
+ (False, "
"),
+ ],
)
def test_prefix_tag(self):
self.assertEqual(
- quote_html(u'''A
Begin forwarded message:
B'''),
+ quote_html("""A
Begin forwarded message:
B"""),
[
- (True, 'A
Begin forwarded message:
B'),
- ]
+ (True, "A
Begin forwarded message:
B"),
+ ],
)
def test_prefix_tag_2(self):
# We can't preserve the exact markup due to lxml's parsing here.
self.assertEqual(
- quote_html(u'''A
Begin forwarded message:
B'''),
+ quote_html("""A
Begin forwarded message:B"""),
[
- (True, 'A
Begin forwarded message:B'),
- ]
+ (True, "A
Begin forwarded message:B"),
+ ],
)
def test_encoding(self):
# We assume everything is UTF-8
self.assertEqual(
- quote_html(u'''
+ quote_html(
+ """
@@ -250,8 +338,12 @@ def test_encoding(self):
test ä
-'''), [
- (True, u'''
+"""
+ ),
+ [
+ (
+ True,
+ """
@@ -259,21 +351,36 @@ def test_encoding(self):
test ä
-'''),
- ])
+""",
+ ),
+ ],
+ )
def test_newline(self):
# Newline in "Am\r\n26. Mai" should not change the way we match.
self.assertEqual(
- quote_html(u'''\r\n\r\n\r\n\r\n\r\n\r\n
\r\n
Here is spam.
\r\nHam
\r\n
\r\n
\r\n
Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:
\r\n
Hey\r\nHam,
I like spam.
\r\n
\r\n
\r\n\r\n\r\n'''), [
- (True, '\r\n\r\n\r\n\r\n\r\n\r\n
\r\n
Here is spam.
\r\nHam
\r\n
\r\n
\r\n
Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:
'),
- (False, '\r\n\r\n\r\nHey\r\nHam,
I like spam.
\r\n
\r\n
\r\n\r\n')
- ])
+ quote_html(
+ """\r\n\r\n\r\n\r\n\r\n\r\n
\r\n
Here is spam.
\r\nHam
\r\n
\r\n
\r\n
Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:
\r\n
Hey\r\nHam,
I like spam.
\r\n
\r\n
\r\n\r\n\r\n"""
+ ),
+ [
+ (
+ True,
+ '\r\n\r\n\r\n\r\n\r\n\r\n
\r\n
Here is spam.
\r\nHam
\r\n
\r\n
\r\n
Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:
',
+ ),
+ (
+ False,
+ '\r\n\r\n\r\nHey\r\nHam,
I like spam.
\r\n
\r\n
\r\n\r\n',
+ ),
+ ],
+ )
+
class UnwrapTestCase(unittest.TestCase):
def test_gmail_forward(self):
# Gmail forward
- self.assertEqual(unwrap("""Hello
+ self.assertEqual(
+ unwrap(
+ """Hello
---------- Forwarded message ----------
From: Someone
@@ -283,19 +390,24 @@ def test_gmail_forward(self):
Spanish Classes
Learn Spanish
-"""), {
- 'text_top': 'Hello',
- 'type': 'forward',
- 'from': 'Someone ',
- 'date': 'Fri, Apr 26, 2013 at 8:13 PM',
- 'subject': 'Weekend Spanish classes',
- 'to': 'recipient@example.com',
- 'text': 'Spanish Classes\nLearn Spanish',
- })
+"""
+ ),
+ {
+ "text_top": "Hello",
+ "type": "forward",
+ "from": "Someone ",
+ "date": "Fri, Apr 26, 2013 at 8:13 PM",
+ "subject": "Weekend Spanish classes",
+ "to": "recipient@example.com",
+ "text": "Spanish Classes\nLearn Spanish",
+ },
+ )
def test_apple_forward(self):
# Apple Mail (10.9 and earlier) forward
- self.assertEqual(unwrap("""Hello
+ self.assertEqual(
+ unwrap(
+ """Hello
Begin forwarded message:
@@ -307,20 +419,25 @@ def test_apple_forward(self):
> Original text
Text bottom
-"""), {
- 'text_top': 'Hello',
- 'type': 'forward',
- 'from': '"Some One" ',
- 'date': '1. August 2011 23:28:15 GMT-07:00',
- 'subject': 'AW: AW: Some subject',
- 'to': '"Other Person" ',
- 'text': 'Original text',
- 'text_bottom': 'Text bottom',
- })
+"""
+ ),
+ {
+ "text_top": "Hello",
+ "type": "forward",
+ "from": '"Some One" ',
+ "date": "1. August 2011 23:28:15 GMT-07:00",
+ "subject": "AW: AW: Some subject",
+ "to": '"Other Person" ',
+ "text": "Original text",
+ "text_bottom": "Text bottom",
+ },
+ )
def test_apple_forward_2(self):
# Apple Mail (10.10) forward
- self.assertEqual(unwrap("""Hello
+ self.assertEqual(
+ unwrap(
+ """Hello
> Begin forwarded message:
>
@@ -332,20 +449,25 @@ def test_apple_forward_2(self):
> Original text
Text bottom
-"""), {
- 'text_top': 'Hello',
- 'type': 'forward',
- 'from': '"Some One" ',
- 'date': '1. August 2011 23:28:15 GMT-07:00',
- 'subject': 'AW: AW: Some subject',
- 'to': '"Other Person" ',
- 'text': 'Original text',
- 'text_bottom': 'Text bottom',
- })
+"""
+ ),
+ {
+ "text_top": "Hello",
+ "type": "forward",
+ "from": '"Some One" ',
+ "date": "1. August 2011 23:28:15 GMT-07:00",
+ "subject": "AW: AW: Some subject",
+ "to": '"Other Person" ',
+ "text": "Original text",
+ "text_bottom": "Text bottom",
+ },
+ )
def test_sparrow_forward(self):
# Sparrow forward
- self.assertEqual(unwrap("""Hello
+ self.assertEqual(
+ unwrap(
+ """Hello
Forwarded message:
@@ -359,20 +481,25 @@ def test_sparrow_forward(self):
> Great news!
Text bottom
-"""), {
- 'text_top': 'Hello',
- 'type': 'forward',
- 'from': 'Some One ',
- 'date': 'Thursday, March 7, 2013 7:09:41 PM',
- 'subject': 'Re: Syncing Up',
- 'to': 'Other person ',
- 'text': 'OHAI\n\nGreat news!',
- 'text_bottom': 'Text bottom',
- })
+"""
+ ),
+ {
+ "text_top": "Hello",
+ "type": "forward",
+ "from": "Some One ",
+ "date": "Thursday, March 7, 2013 7:09:41 PM",
+ "subject": "Re: Syncing Up",
+ "to": "Other person ",
+ "text": "OHAI\n\nGreat news!",
+ "text_bottom": "Text bottom",
+ },
+ )
def test_bold_headers(self):
# Forwrad with *bold* text
- self.assertEqual(unwrap("""Hello
+ self.assertEqual(
+ unwrap(
+ """Hello
Forwarded message:
@@ -381,19 +508,24 @@ def test_bold_headers(self):
*Date:* Wednesday, February 6, 2013 7:46:53 AM
*Subject:* Fwd: Hottest Startups
-This is interesting."""), {
- 'text_top': 'Hello',
- 'type': 'forward',
- 'from': 'Some One ',
- 'date': 'Wednesday, February 6, 2013 7:46:53 AM',
- 'subject': 'Fwd: Hottest Startups',
- 'to': 'Other Person ',
- 'text': 'This is interesting.',
- })
+This is interesting."""
+ ),
+ {
+ "text_top": "Hello",
+ "type": "forward",
+ "from": "Some One ",
+ "date": "Wednesday, February 6, 2013 7:46:53 AM",
+ "subject": "Fwd: Hottest Startups",
+ "to": "Other Person ",
+ "text": "This is interesting.",
+ },
+ )
def test_no_forward_text(self):
# No forwarding message text
- self.assertEqual(unwrap("""Hello
+ self.assertEqual(
+ unwrap(
+ """Hello
From: "Some One"
Date: 1. August 2011 23:28:15 GMT-07:00
@@ -401,19 +533,24 @@ def test_no_forward_text(self):
Subject: AW: AW: Some subject
Original text
-"""), {
- 'text_top': 'Hello',
- 'type': 'forward',
- 'from': '"Some One" ',
- 'date': '1. August 2011 23:28:15 GMT-07:00',
- 'subject': 'AW: AW: Some subject',
- 'to': '"Other Person" ',
- 'text': 'Original text',
- })
+"""
+ ),
+ {
+ "text_top": "Hello",
+ "type": "forward",
+ "from": '"Some One" ',
+ "date": "1. August 2011 23:28:15 GMT-07:00",
+ "subject": "AW: AW: Some subject",
+ "to": '"Other Person" ',
+ "text": "Original text",
+ },
+ )
def test_no_forward_text_quoted(self):
# No forwarding message text
- self.assertEqual(unwrap("""Hello
+ self.assertEqual(
+ unwrap(
+ """Hello
> From: "Some One"
> Date: 1. August 2011 23:28:15 GMT-07:00
@@ -421,40 +558,49 @@ def test_no_forward_text_quoted(self):
> Subject: AW: AW: Some subject
>
> Original text
-"""), {
- 'text_top': 'Hello',
- 'type': 'forward',
- 'from': '"Some One" ',
- 'date': '1. August 2011 23:28:15 GMT-07:00',
- 'subject': 'AW: AW: Some subject',
- 'to': '"Other Person" ',
- 'text': 'Original text',
- })
+"""
+ ),
+ {
+ "text_top": "Hello",
+ "type": "forward",
+ "from": '"Some One" ',
+ "date": "1. August 2011 23:28:15 GMT-07:00",
+ "subject": "AW: AW: Some subject",
+ "to": '"Other Person" ',
+ "text": "Original text",
+ },
+ )
def test_outlook_forward(self):
# Outlook?
- self.assertEqual(unwrap("""-------- Original Message --------
+ self.assertEqual(
+ unwrap(
+ """-------- Original Message --------
Subject: \tSome Newsletter
Date: \tFri, 19 Jun 2009 19:16:04 +0200
From: \tfrom
Reply-To: \treply
To: \tto@example.com
-OHAI"""), {
- 'type': 'forward',
- 'from': 'from ',
- 'reply-to': 'reply ',
- 'date': 'Fri, 19 Jun 2009 19:16:04 +0200',
- 'subject': 'Some Newsletter',
- 'to': 'to@example.com',
- 'reply-to': 'reply ',
- 'text': 'OHAI',
- })
-
+OHAI"""
+ ),
+ {
+ "type": "forward",
+ "from": "from ",
+ "reply-to": "reply ",
+ "date": "Fri, 19 Jun 2009 19:16:04 +0200",
+ "subject": "Some Newsletter",
+ "to": "to@example.com",
+ "reply-to": "reply ",
+ "text": "OHAI",
+ },
+ )
def test_spacing(self):
# Some clients (Blackberry?) have weird whitespace rules
- self.assertEqual(unwrap("""hello world
+ self.assertEqual(
+ unwrap(
+ """hello world
-----Original Message-----
From: "Some One"
@@ -467,19 +613,24 @@ def test_spacing(self):
OHAI...
-"""), {
- 'text_top': 'hello world',
- 'type': 'forward',
- 'from': '"Some One" ',
- 'date': 'Sat, 22 Mar 2008 12:16:06',
- 'subject': 'Antw: FW: html',
- 'to': '',
- 'text': 'OHAI...',
- })
+"""
+ ),
+ {
+ "text_top": "hello world",
+ "type": "forward",
+ "from": '"Some One" ',
+ "date": "Sat, 22 Mar 2008 12:16:06",
+ "subject": "Antw: FW: html",
+ "to": "",
+ "text": "OHAI...",
+ },
+ )
def test_quote(self):
# Just a quote
- self.assertEqual(unwrap("""hello world
+ self.assertEqual(
+ unwrap(
+ """hello world
Hey: This is very important
@@ -489,35 +640,48 @@ def test_quote(self):
--
kthxbye
-"""), {
- 'type': 'quote',
- 'text_top': 'hello world\n\nHey: This is very important',
- 'text': 'Lorem ipsum\ndolor sit amet\nadipiscing elit.',
- 'text_bottom': '--\nkthxbye',
- })
-
+"""
+ ),
+ {
+ "type": "quote",
+ "text_top": "hello world\n\nHey: This is very important",
+ "text": "Lorem ipsum\ndolor sit amet\nadipiscing elit.",
+ "text_bottom": "--\nkthxbye",
+ },
+ )
def test_no_message(self):
# No message
- self.assertEqual(unwrap("""hello world
+ self.assertEqual(
+ unwrap(
+ """hello world
Hey: This is very important
> No quoted message (just one line).
-"""), None)
-
+"""
+ ),
+ None,
+ )
def test_forward_no_headers(self):
# No quote / headers in forwarded message
- self.assertEqual(unwrap("""Begin forwarded message:
+ self.assertEqual(
+ unwrap(
+ """Begin forwarded message:
Hello
-"""), {
- 'type': 'forward',
- 'text': 'Hello',
- })
+"""
+ ),
+ {
+ "type": "forward",
+ "text": "Hello",
+ },
+ )
def test_confusing_email_signature(self):
- self.assertEqual(unwrap("""Phone: 12345
+ self.assertEqual(
+ unwrap(
+ """Phone: 12345
Fax: 67890
Skype: foobar
@@ -526,63 +690,83 @@ def test_confusing_email_signature(self):
Subject: The email
Email text.
-"""), {
- 'text_top': 'Phone: 12345\nFax: 67890\nSkype: foobar',
- 'type': 'forward',
- 'from': 'Someone ',
- 'subject': 'The email',
- 'text': 'Email text.',
- })
+"""
+ ),
+ {
+ "text_top": "Phone: 12345\nFax: 67890\nSkype: foobar",
+ "type": "forward",
+ "from": "Someone ",
+ "subject": "The email",
+ "text": "Email text.",
+ },
+ )
def test_long_subject(self):
- self.assertEqual(unwrap("""---------- Forwarded message ----------
+ self.assertEqual(
+ unwrap(
+ """---------- Forwarded message ----------
From: Someone
Subject: The email has a very long and confusing subject with spans over
multiple lines.
To: Destination
Email text.
-"""), {
- 'type': 'forward',
- 'from': 'Someone ',
- 'to': 'Destination ',
- 'subject': 'The email has a very long and confusing subject with spans over multiple lines.',
- 'text': 'Email text.',
- })
+"""
+ ),
+ {
+ "type": "forward",
+ "from": "Someone ",
+ "to": "Destination ",
+ "subject": "The email has a very long and confusing subject with spans over multiple lines.",
+ "text": "Email text.",
+ },
+ )
def test_reply_1(self):
- data = unwrap("""Hello world.
+ data = unwrap(
+ """Hello world.
On 2012-10-16 at 17:02 , Someone wrote:
> Some quoted text
-""")
- self.assertEqual(data, {
- 'type': 'reply',
- 'date': '2012-10-16 at 17:02',
- 'from': 'Someone ',
- 'text_top': 'Hello world.',
- 'text': 'Some quoted text',
- })
+"""
+ )
+ self.assertEqual(
+ data,
+ {
+ "type": "reply",
+ "date": "2012-10-16 at 17:02",
+ "from": "Someone ",
+ "text_top": "Hello world.",
+ "text": "Some quoted text",
+ },
+ )
def test_reply_2(self):
- data = unwrap("""Hello world.
+ data = unwrap(
+ """Hello world.
On 2012-10-16 at 17:02 , Someone <
someone@example.com> wrote:
> Some quoted text
-""")
- self.assertEqual(data, {
- 'type': 'reply',
- 'date': '2012-10-16 at 17:02',
- 'from': 'Someone ',
- 'text_top': 'Hello world.',
- 'text': 'Some quoted text',
- })
+"""
+ )
+ self.assertEqual(
+ data,
+ {
+ "type": "reply",
+ "date": "2012-10-16 at 17:02",
+ "from": "Someone ",
+ "text_top": "Hello world.",
+ "text": "Some quoted text",
+ },
+ )
def test_french(self):
- self.assertEqual(unwrap(u"""
+ self.assertEqual(
+ unwrap(
+ """
De : Someone
Répondre à : Reply
Date : Wednesday, 17 September 2014 4:24 pm
@@ -590,18 +774,23 @@ def test_french(self):
Objet : Re: test subject
Hello, thanks for your reply
- """), {
- 'type': 'forward',
- 'date': u'Wednesday, 17 September 2014 4:24 pm',
- 'from': u'Someone ',
- 'reply-to': 'Reply ',
- 'to': u'"Someone Else" ',
- 'subject': u'Re: test subject',
- 'text': u'Hello, thanks for your reply',
- })
+ """
+ ),
+ {
+ "type": "forward",
+ "date": "Wednesday, 17 September 2014 4:24 pm",
+ "from": "Someone ",
+ "reply-to": "Reply ",
+ "to": '"Someone Else" ',
+ "subject": "Re: test subject",
+ "text": "Hello, thanks for your reply",
+ },
+ )
def test_forward_french_apple_mail(self):
- self.assertEqual(unwrap(u'''
+ self.assertEqual(
+ unwrap(
+ """
Text before
Début du message réexpédié :
@@ -612,18 +801,23 @@ def test_forward_french_apple_mail(self):
Objet: RE: The subject
Text after
-'''), {
- 'date': u'14 novembre 2015 15:14:53 UTC+1',
- 'from': u'"Foo Bar" ',
- 'subject': 'RE: The subject',
- 'text': u'Text after',
- 'text_top': u'Text before',
- 'to': u'"\'Ham Spam\'" ',
- 'type': 'forward'
- })
+"""
+ ),
+ {
+ "date": "14 novembre 2015 15:14:53 UTC+1",
+ "from": '"Foo Bar" ',
+ "subject": "RE: The subject",
+ "text": "Text after",
+ "text_top": "Text before",
+ "to": "\"'Ham Spam'\" ",
+ "type": "forward",
+ },
+ )
def test_forward_french_thunderbird(self):
- self.assertEqual(unwrap(u'''
+ self.assertEqual(
+ unwrap(
+ """
Text before
-------- Message transféré --------
@@ -633,19 +827,24 @@ def test_forward_french_thunderbird(self):
Pour : Ham Spam
Text after
-'''), {
- 'date': u'Wed, 11 Nov 2015 12:31:25 +0100',
- 'from': u'Foo Bar ',
- 'subject': 'Re: Some subject',
- 'text': u'Text after',
- 'text_top': u'Text before',
- 'to': u'Ham Spam ',
- 'type': 'forward'
- })
+"""
+ ),
+ {
+ "date": "Wed, 11 Nov 2015 12:31:25 +0100",
+ "from": "Foo Bar ",
+ "subject": "Re: Some subject",
+ "text": "Text after",
+ "text_top": "Text before",
+ "to": "Ham Spam ",
+ "type": "forward",
+ },
+ )
def test_gmail_forward_swedish(self):
# Gmail forward
- self.assertEqual(unwrap(u"""Hello
+ self.assertEqual(
+ unwrap(
+ """Hello
---------- Vidarebefordrat meddelande ----------
Från: Someone
@@ -655,66 +854,81 @@ def test_gmail_forward_swedish(self):
Spanish Classes
Learn Spanish
-"""), {
- 'text_top': u'Hello',
- 'type': u'forward',
- 'from': u'Someone ',
- 'date': u'26 april 2013 20:13',
- 'subject': u'Weekend Spanish classes',
- 'to': u'recipient@example.com',
- 'text': u'Spanish Classes\nLearn Spanish',
- })
+"""
+ ),
+ {
+ "text_top": "Hello",
+ "type": "forward",
+ "from": "Someone ",
+ "date": "26 april 2013 20:13",
+ "subject": "Weekend Spanish classes",
+ "to": "recipient@example.com",
+ "text": "Spanish Classes\nLearn Spanish",
+ },
+ )
+
class HTMLUnwrapTestCase(FileMixin, unittest.TestCase):
def test_simple_forward(self):
- html = u'Begin forwarded message:
\n
\nFrom: someone@example.com
\nTo: anyone@example.com
\nSubject: You won
\n'
- self.assertEqual(unwrap_html(html), {
- 'type': 'forward',
- 'from': 'someone@example.com',
- 'to': 'anyone@example.com',
- 'subject': 'You won',
- })
+ html = "Begin forwarded message:
\n
\nFrom: someone@example.com
\nTo: anyone@example.com
\nSubject: You won
\n"
+ self.assertEqual(
+ unwrap_html(html),
+ {
+ "type": "forward",
+ "from": "someone@example.com",
+ "to": "anyone@example.com",
+ "subject": "You won",
+ },
+ )
def test_apple_forward(self):
html = 'test
blah
Begin forwarded message:
Subject: The Subject
Date: March 24, 2016 at 20:16:25 GMT+1
Text of the original email
'
- self.assertEqual(unwrap_html(html), {
- 'type': 'forward',
- 'subject': 'The Subject',
- 'date': 'March 24, 2016 at 20:16:25 GMT+1',
- 'from': 'Foo Bar
',
- 'to': 'John Doe ',
- 'html_top': 'test
blah
',
- 'html': 'Text of the original email
',
-
- })
+ self.assertEqual(
+ unwrap_html(html),
+ {
+ "type": "forward",
+ "subject": "The Subject",
+ "date": "March 24, 2016 at 20:16:25 GMT+1",
+ "from": "Foo Bar ",
+ "to": "John Doe ",
+ "html_top": 'test
blah
',
+ "html": 'Text of the original email
',
+ },
+ )
def test_gmail_forward(self):
- html = 'test
blah
---------- Forwarded message ----------
From:
Foo Bar <foo@bar.example>Date: Thu, Mar 24, 2016 at 5:17 PM
Subject: The Subject
To: John Doe <
john@doe.example>
'
-
- self.assertEqual(unwrap_html(html), {
- 'type': 'forward',
- 'subject': 'The Subject',
- 'date': 'Thu, Mar 24, 2016 at 5:17 PM',
- 'from': 'Foo Bar ',
- 'to': 'John Doe ',
- 'html_top': '',
- 'html': '',
- })
+ html = 'test
blah
---------- Forwarded message ----------
From:
Foo Bar <foo@bar.example>Date: Thu, Mar 24, 2016 at 5:17 PM
Subject: The Subject
To: John Doe <
john@doe.example>
'
+
+ self.assertEqual(
+ unwrap_html(html),
+ {
+ "type": "forward",
+ "subject": "The Subject",
+ "date": "Thu, Mar 24, 2016 at 5:17 PM",
+ "from": "Foo Bar ",
+ "to": "John Doe ",
+ "html_top": '',
+ "html": '',
+ },
+ )
def test_apple_reply(self):
html = 'Foo
Bar
'
- self.assertEqual(unwrap_html(html), {
- 'type': 'reply',
- 'from': 'John Doe ',
- 'date': '2016-03-25, at 23:01',
- 'html': '',
- 'html_top': 'Foo
Bar
',
- })
+ self.assertEqual(
+ unwrap_html(html),
+ {
+ "type": "reply",
+ "from": "John Doe ",
+ "date": "2016-03-25, at 23:01",
+ "html": '',
+ "html_top": 'Foo
Bar
',
+ },
+ )
def test_gmail_reply(self):
- html = '''