diff --git a/.circleci/config.yml b/.circleci/config.yml index 0d53812..39e9d1f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,26 +4,49 @@ workflows: version: 2 workflow: jobs: - - test-2.7 - - test-3.5 + - test-3.7 + - test-3.8 + - test-3.9 + - test-3.10 + - static-code-analysis defaults: &defaults working_directory: ~/code steps: - checkout - run: - name: Install dependencies - command: sudo pip install -r requirements_tests.txt + name: Install Python dependencies + command: CFLAGS="-O0" pip install -r requirements_tests.txt - run: name: Test - command: python setup.py test + command: pytest jobs: - test-2.7: + test-3.7: <<: *defaults docker: - - image: circleci/python:2.7 - test-3.5: + - image: cimg/python:3.7 + test-3.8: <<: *defaults docker: - - image: circleci/python:3.5 + - image: cimg/python:3.8 + test-3.9: + <<: *defaults + docker: + - image: cimg/python:3.9 + test-3.10: + <<: *defaults + docker: + - image: cimg/python:3.10 + static-code-analysis: + working_directory: ~/code + docker: + - image: cimg/python:3.8 + steps: + - checkout + - run: + name: Install dependencies + command: pip install lintlizard==0.18.0 "click<8.1" + - run: + name: LintLizard + command: lintlizard diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..d421296 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +# Docs: https://help.github.com/en/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + # Enable version updates for python + - package-ecosystem: "pip" + schedule: + interval: "daily" + open-pull-requests-limit: 8 # note that this is _per-file_ + directory: "/" + pull-request-branch-name: + # so it's compatible with docker tags + separator: "-" \ No newline at end of file diff --git a/.gitignore b/.gitignore index f24cd99..6da7bef 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,6 @@ pip-log.txt #Mr Developer .mr.developer.cfg + +# pycharm +.idea diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 72d0f2b..0000000 --- a/.travis.yml +++ /dev/null @@ -1,7 +0,0 @@ -# http://travis-ci.org/closeio/quotequail -language: python -python: - - 2.7 - - 3.4 -script: - - python setup.py test diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..96235fb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[tool.black] +target-version = ['py37'] +exclude = ''' +/( + \.git + | \.venv + | venv + | src +)/ +''' \ No newline at end of file diff --git a/quotequail/__init__.py b/quotequail/__init__.py index df80bfd..05dd241 100644 --- a/quotequail/__init__.py +++ b/quotequail/__init__.py @@ -2,16 +2,14 @@ # quotequail # a library that identifies quoted text in email messages -import re +from . import _internal, _patterns -from . import _internal - -__all__ = ['quote', 'quote_html', 'unwrap', 'unwrap_html'] +__all__ = ["quote", "quote_html", "unwrap", "unwrap_html"] def quote(text, limit=1000): """ - Takes a plain text message as an argument, returns a list of tuples. The + Take a plain text message as an argument, return a list of tuples. The first argument of the tuple denotes whether the text should be expanded by default. The second argument is the unmodified corresponding text. @@ -20,16 +18,19 @@ def quote(text, limit=1000): Unless the limit param is set to None, the text will automatically be quoted starting at the line where the limit is reached. """ - - lines = text.split('\n') + lines = text.split("\n") found = _internal.find_quote_position(lines, _patterns.MAX_WRAP_LINES, limit) - if found != None: - return [(True, '\n'.join(lines[:found+1])), (False, '\n'.join(lines[found+1:]))] + if found is not None: + return [ + (True, "\n".join(lines[: found + 1])), + (False, "\n".join(lines[found + 1 :])), + ] return [(True, text)] + def quote_html(html, limit=1000): """ Like quote(), but takes an HTML message as an argument. The limit param @@ -40,24 +41,25 @@ def quote_html(html, limit=1000): tree = _html.get_html_tree(html) - start_refs, end_refs, lines = _html.get_line_info(tree, limit+1) + start_refs, end_refs, lines = _html.get_line_info(tree, limit + 1) found = _internal.find_quote_position(lines, 1, limit) - if found == None: + if found is None: # No quoting found and we're below limit. We're done. return [(True, _html.render_html_tree(tree))] else: - start_tree = _html.slice_tree(tree, start_refs, end_refs, - (0, found+1), html_copy=html) - end_tree = _html.slice_tree(tree, start_refs, end_refs, - (found+1, None)) + start_tree = _html.slice_tree( + tree, start_refs, end_refs, (0, found + 1), html_copy=html + ) + end_tree = _html.slice_tree(tree, start_refs, end_refs, (found + 1, None)) return [ (True, _html.render_html_tree(start_tree)), (False, _html.render_html_tree(end_tree)), ] + def unwrap(text): """ If the passed text is the text body of a forwarded message, a reply, or @@ -72,41 +74,45 @@ def unwrap(text): Otherwise, this function returns None. """ - - lines = text.split('\n') - - result = _internal.unwrap(lines, _patterns.MAX_WRAP_LINES, - _patterns.MIN_HEADER_LINES,_patterns.MIN_QUOTED_LINES) + lines = text.split("\n") + + result = _internal.unwrap( + lines, + _patterns.MAX_WRAP_LINES, + _patterns.MIN_HEADER_LINES, + _patterns.MIN_QUOTED_LINES, + ) if result: typ, top_range, hdrs, main_range, bottom_range, needs_unindent = result - text_top = lines[slice(*top_range)] if top_range else '' - text = lines[slice(*main_range)] if main_range else '' - text_bottom = lines[slice(*bottom_range)] if bottom_range else '' + text_top = lines[slice(*top_range)] if top_range else "" + text = lines[slice(*main_range)] if main_range else "" + text_bottom = lines[slice(*bottom_range)] if bottom_range else "" if needs_unindent: text = _internal.unindent_lines(text) result = { - 'type': typ, + "type": typ, } - text = '\n'.join(text).strip() - text_top = '\n'.join(text_top).strip() - text_bottom = '\n'.join(text_bottom).strip() + text = "\n".join(text).strip() + text_top = "\n".join(text_top).strip() + text_bottom = "\n".join(text_bottom).strip() if text: - result['text'] = text + result["text"] = text if text_top: - result['text_top'] = text_top + result["text_top"] = text_top if text_bottom: - result['text_bottom'] = text_bottom + result["text_bottom"] = text_bottom if hdrs: result.update(hdrs) return result + def unwrap_html(html): """ If the passed HTML is the HTML body of a forwarded message, a dictionary @@ -133,7 +139,7 @@ def unwrap_html(html): typ, top_range, hdrs, main_range, bottom_range, needs_unindent = result result = { - 'type': typ, + "type": typ, } top_range = _html.trim_slice(lines, top_range) @@ -141,18 +147,20 @@ def unwrap_html(html): bottom_range = _html.trim_slice(lines, bottom_range) if top_range: - top_tree = _html.slice_tree(tree, start_refs, end_refs, top_range, - html_copy=html) + top_tree = _html.slice_tree( + tree, start_refs, end_refs, top_range, html_copy=html + ) html_top = _html.render_html_tree(top_tree) if html_top: - result['html_top'] = html_top + result["html_top"] = html_top if bottom_range: - bottom_tree = _html.slice_tree(tree, start_refs, end_refs, - bottom_range, html_copy=html) + bottom_tree = _html.slice_tree( + tree, start_refs, end_refs, bottom_range, html_copy=html + ) html_bottom = _html.render_html_tree(bottom_tree) if html_bottom: - result['html_bottom'] = html_bottom + result["html_bottom"] = html_bottom if main_range: main_tree = _html.slice_tree(tree, start_refs, end_refs, main_range) @@ -160,7 +168,7 @@ def unwrap_html(html): _html.unindent_tree(main_tree) html = _html.render_html_tree(main_tree) if html: - result['html'] = html + result["html"] = html if hdrs: result.update(hdrs) diff --git a/quotequail/_html.py b/quotequail/_html.py index f40a595..1bc19d7 100644 --- a/quotequail/_html.py +++ b/quotequail/_html.py @@ -1,24 +1,35 @@ # HTML utils -import lxml.html import lxml.etree +import lxml.html from ._patterns import FORWARD_LINE, FORWARD_STYLES, MULTIPLE_WHITESPACE_RE -INLINE_TAGS = ['a', 'b', 'em', 'i', 'strong', 'span', 'font', 'q', - 'object', 'bdo', 'sub', 'sup', 'center', 'td', 'th'] - -BEGIN = 'begin' -END = 'end' +INLINE_TAGS = [ + "a", + "b", + "em", + "i", + "strong", + "span", + "font", + "q", + "object", + "bdo", + "sub", + "sup", + "center", + "td", + "th", +] + +BEGIN = "begin" +END = "end" -try: - string_class = basestring # Python 2.7 -except NameError: - string_class = str # Python 3.x def trim_tree_after(element, include_element=True): """ - Removes the document tree following the given element. If include_element + Remove the document tree following the given element. If include_element is True, the given element is kept in the tree, otherwise it is removed. """ el = element @@ -32,9 +43,10 @@ def trim_tree_after(element, include_element=True): parent_el.remove(remove_el) el = parent_el + def trim_tree_before(element, include_element=True, keep_head=True): """ - Removes the document tree preceding the given element. If include_element + Remove the document tree preceding the given element. If include_element is True, the given element is kept in the tree, otherwise it is removed. """ el = element @@ -48,19 +60,21 @@ def trim_tree_before(element, include_element=True, keep_head=True): remove_el = el el = el.getprevious() tag = remove_el.tag - is_head = isinstance(tag, string_class) and tag.lower() == 'head' + is_head = isinstance(tag, str) and tag.lower() == "head" if not keep_head or not is_head: parent_el.remove(remove_el) el = parent_el + def trim_slice(lines, slice_tuple): """ Trim a slice tuple (begin, end) so it starts at the first non-empty line (obtained via indented_tree_line_generator / get_line_info) and ends at the last non-empty line within the slice. Returns the new slice. """ + def _empty(line): - return not line or line.strip() == '>' + return not line or line.strip() == ">" if not slice_tuple: return None @@ -77,14 +91,15 @@ def _empty(line): slice_start += 1 # Trim from end - while slice_end > slice_start and _empty(lines[slice_end-1]): + while slice_end > slice_start and _empty(lines[slice_end - 1]): slice_end -= 1 return (slice_start, slice_end) + def unindent_tree(element): """ - Removes the outermost indent. For example, the tree + Remove the outermost indent. For example, the tree "
AB
C
D
E
FG
" is transformed to "
A
B
C
D
E
F
G
" @@ -92,12 +107,13 @@ def unindent_tree(element): for el in element.iter(): if is_indentation_element(el): el.attrib.clear() - el.tag = 'div' + el.tag = "div" return + def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None): """ - Slices the HTML tree with the given start_refs and end_refs (obtained via + Slice the HTML tree with the given start_refs and end_refs (obtained via get_line_info) at the given slice_tuple, a tuple (start, end) containing the start and end of the slice (or None, to start from the start / end at the end of the tree). If html_copy is specified, a new tree is constructed @@ -108,21 +124,21 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None): construct a copy of the tree using copy.copy() (see bug https://bugs.launchpad.net/lxml/+bug/1562550). """ - start_ref = None end_ref = None if slice_tuple: slice_start, slice_end = slice_tuple - if ((slice_start is not None and slice_start >= len(start_refs)) or - (slice_end is not None and slice_end <= 0)): - return get_html_tree('') + if (slice_start is not None and slice_start >= len(start_refs)) or ( + slice_end is not None and slice_end <= 0 + ): + return get_html_tree("") - if slice_start != None and slice_start <= 0: + if slice_start is not None and slice_start <= 0: slice_start = None - if slice_end != None and slice_end >= len(start_refs): + if slice_end is not None and slice_end >= len(start_refs): slice_end = None else: slice_start, slice_end = None, None @@ -130,9 +146,8 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None): if slice_start is not None: start_ref = start_refs[slice_start] - if slice_end is not None: - if slice_end < len(end_refs): - end_ref = end_refs[slice_end-1] + if slice_end is not None and slice_end < len(end_refs): + end_ref = end_refs[slice_end - 1] if html_copy is not None: et = lxml.etree.ElementTree(tree) @@ -151,17 +166,21 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None): new_tree = tree if start_ref: - include_start = (start_ref[1] == BEGIN) + include_start = start_ref[1] == BEGIN if end_ref: - include_end = (end_ref[1] == END) + include_end = end_ref[1] == END # If start_ref is the same as end_ref, and we don't include the element, # we are removing the entire tree. We need to handle this separately, # otherwise trim_tree_after won't work because it can't find the already # removed reference. - if start_ref and end_ref and start_ref[0] == end_ref[0]: - if not include_start or not include_end: - return get_html_tree('') + if ( + start_ref + and end_ref + and start_ref[0] == end_ref[0] + and (not include_start or not include_end) + ): + return get_html_tree("") if start_ref: trim_tree_before(start_ref[0], include_element=include_start) @@ -170,6 +189,7 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None): return new_tree + def get_html_tree(html): """ Given the HTML string, returns a LXML tree object. The tree is wrapped in @@ -177,20 +197,19 @@ def get_html_tree(html): otherwise result in an error. The wrapping can be later removed with strip_wrapping(). """ - - parser = lxml.html.HTMLParser(encoding='utf-8') - html = html.encode('utf8') + parser = lxml.html.HTMLParser(encoding="utf-8") + html = html.encode("utf8") try: tree = lxml.html.fromstring(html, parser=parser) except lxml.etree.Error: # E.g. empty document. Use dummy
- tree = lxml.html.fromstring('
') + tree = lxml.html.fromstring("
") # If the document doesn't start with a top level tag, wrap it with a
# that will be later stripped out for consistent behavior. if tree.tag not in lxml.html.defs.top_level_tags: - html = b'
%s
' % html + html = b"
" + html + b"
" tree = lxml.html.fromstring(html, parser=parser) # HACK for Outlook emails, where tags like are rendered as

. We @@ -199,52 +218,54 @@ def get_html_tree(html): # tags that contain colons. When rendering the tree, we will restore the # tag name. for el in tree.iter(): - if el.nsmap or (isinstance(el.tag, string_class) and ':' in el.tag): + if el.nsmap or (isinstance(el.tag, str) and ":" in el.tag): if el.nsmap: - actual_tag_name = '{}:{}'.format(list(el.nsmap.keys())[0], el.tag) + actual_tag_name = "{}:{}".format(list(el.nsmap.keys())[0], el.tag) else: actual_tag_name = el.tag - el.tag = 'span' - el.attrib['__tag_name'] = actual_tag_name + el.tag = "span" + el.attrib["__tag_name"] = actual_tag_name return tree + def strip_wrapping(html): """ - Removes the wrapping that might have resulted when using get_html_tree(). + Remove the wrapping that might have resulted when using get_html_tree(). """ - if html.startswith('

') and html.endswith('
'): + if html.startswith("
") and html.endswith("
"): html = html[5:-6] return html.strip() + def render_html_tree(tree): """ - Renders the given HTML tree, and strips any wrapping that was applied in + Render the given HTML tree, and strip any wrapping that was applied in get_html_tree(). You should avoid further processing of the given tree after calling this method because we modify namespaced tags here. """ - # Restore any tag names that were changed in get_html_tree() for el in tree.iter(): - if '__tag_name' in el.attrib: - actual_tag_name = el.attrib.pop('__tag_name') + if "__tag_name" in el.attrib: + actual_tag_name = el.attrib.pop("__tag_name") el.tag = actual_tag_name - html = lxml.html.tostring(tree, encoding='utf8').decode('utf8') + html = lxml.html.tostring(tree, encoding="utf8").decode("utf8") return strip_wrapping(html) + def is_indentation_element(element): - if isinstance(element.tag, string_class): - return element.tag.lower() == 'blockquote' + if isinstance(element.tag, str): + return element.tag.lower() == "blockquote" return False + def tree_token_generator(el, indentation_level=0): """ - Internal generator that yields tokens for the given HTML element as - follows: + Yield tokens for the given HTML element as follows: - A tuple (LXML element, BEGIN, indentation_level) - Text right after the start of the tag, or None. @@ -252,12 +273,9 @@ def tree_token_generator(el, indentation_level=0): - A tuple (LXML element, END, indentation_level) - Text right after the end of the tag, or None. """ - - if not isinstance(el.tag, string_class): + if not isinstance(el.tag, str): return - tag_name = el.tag.lower() - is_indentation = is_indentation_element(el) if is_indentation: @@ -268,8 +286,7 @@ def tree_token_generator(el, indentation_level=0): yield el.text for child in el.iterchildren(): - for token in tree_token_generator(child, indentation_level): - yield token + yield from tree_token_generator(child, indentation_level) if is_indentation: indentation_level -= 1 @@ -278,10 +295,12 @@ def tree_token_generator(el, indentation_level=0): yield el.tail + def tree_line_generator(el, max_lines=None): """ - Internal generator that iterates through an LXML tree and yields a tuple - per line. In this context, lines are blocks of text separated by
tags + Iterate through an LXML tree and yield a tuple per line. + + In this context, lines are blocks of text separated by
tags or by block elements. The tuples contain the following elements: - A tuple with the element reference (element, position) for the start @@ -309,14 +328,14 @@ def tree_line_generator(el, max_lines=None): """ def _trim_spaces(text): - return MULTIPLE_WHITESPACE_RE.sub(' ', text).strip() + return MULTIPLE_WHITESPACE_RE.sub(" ", text).strip() counter = 1 - if max_lines != None and counter > max_lines: + if max_lines is not None and counter > max_lines: return # Buffer for the current line. - line = '' + line = "" # The reference tuple (element, position) for the start of the line. start_ref = None @@ -333,10 +352,11 @@ def _trim_spaces(text): tag_name = el.tag.lower() - line_break = (tag_name == 'br' and state == BEGIN) - is_block = (tag_name not in INLINE_TAGS) - is_forward = (is_block and state == BEGIN and - el.attrib.get('style') in FORWARD_STYLES) + line_break = tag_name == "br" and state == BEGIN + is_block = tag_name not in INLINE_TAGS + is_forward = ( + is_block and state == BEGIN and el.attrib.get("style") in FORWARD_STYLES + ) if is_block or line_break: line = _trim_spaces(line) @@ -345,34 +365,34 @@ def _trim_spaces(text): end_ref = (el, state) yield start_ref, end_ref, start_indentation_level, line counter += 1 - if max_lines != None and counter > max_lines: + if max_lines is not None and counter > max_lines: return - line = '' + line = "" if is_forward: # Simulate forward - yield (end_ref, end_ref, start_indentation_level, - FORWARD_LINE) + yield (end_ref, end_ref, start_indentation_level, FORWARD_LINE) counter += 1 - if max_lines != None and counter > max_lines: + if max_lines is not None and counter > max_lines: return if not line: start_ref = (el, state) start_indentation_level = indentation_level - elif isinstance(token, string_class): + elif isinstance(token, str): line += token else: - raise RuntimeError('invalid token: {}'.format(token)) + raise RuntimeError("invalid token: {}".format(token)) line = _trim_spaces(line) if line: yield line + def indented_tree_line_generator(el, max_lines=None): - """ + r""" Like tree_line_generator, but yields tuples (start_ref, end_ref, line), where the line already takes the indentation into account by having "> " prepended. If a line already starts with ">", it is escaped ("\\>"). This @@ -382,9 +402,10 @@ def indented_tree_line_generator(el, max_lines=None): gen = tree_line_generator(el, max_lines) for start_ref, end_ref, indentation_level, line in gen: # Escape line - if line.startswith('>'): - line = '\\' + line - yield start_ref, end_ref, '> '*indentation_level + line + if line.startswith(">"): + line = "\\" + line + yield start_ref, end_ref, "> " * indentation_level + line + def get_line_info(tree, max_lines=None): """ diff --git a/quotequail/_internal.py b/quotequail/_internal.py index cd7f537..389713d 100644 --- a/quotequail/_internal.py +++ b/quotequail/_internal.py @@ -1,15 +1,21 @@ -import re -from ._patterns import COMPILED_PATTERNS, COMPILED_PATTERN_MAP, HEADER_RE, HEADER_MAP, REPLY_DATE_SPLIT_REGEX, STRIP_SPACE_CHARS +from ._patterns import ( + COMPILED_PATTERN_MAP, + HEADER_MAP, + HEADER_RE, + REPLY_DATE_SPLIT_REGEX, + STRIP_SPACE_CHARS, +) """ Internal methods. For max_wrap_lines, min_header_lines, min_quoted_lines documentation see the corresponding constants in _patterns.py. """ + def find_pattern_on_line(lines, n, max_wrap_lines): """ - Finds a forward/reply pattern within the given lines on text on the given - line number and returns a tuple with the type ('reply' or 'forward') and + Find a forward/reply pattern within the given lines on text on the given + line number and return a tuple with the type ('reply' or 'forward') and line number of where the pattern ends. The returned line number may be different from the given line number in case the pattern wraps over multiple lines. @@ -19,28 +25,29 @@ def find_pattern_on_line(lines, n, max_wrap_lines): for typ, regexes in COMPILED_PATTERN_MAP.items(): for regex in regexes: for m in range(max_wrap_lines): - match_line = join_wrapped_lines(lines[n:n+1+m]) - if match_line.startswith('>'): + match_line = join_wrapped_lines(lines[n : n + 1 + m]) + if match_line.startswith(">"): match_line = match_line[1:].strip() if regex.match(match_line.strip()): - return n+m, typ + return n + m, typ return None, None + def find_quote_position(lines, max_wrap_lines, limit=None): """ - Returns the (ending) line number of a quoting pattern. If a limit is given + Return the (ending) line number of a quoting pattern. If a limit is given and the limit is reached, the limit is returned. """ - for n in range(len(lines)): end, typ = find_pattern_on_line(lines, n, max_wrap_lines) if typ: return end - if limit != None and n >= limit-1: + if limit is not None and n >= limit - 1: return n return None + def join_wrapped_lines(lines): """ Join one or multiple lines that wrapped. Returns the reconstructed line. @@ -55,14 +62,15 @@ def join_wrapped_lines(lines): if joined and joined[-1] in STRIP_SPACE_CHARS: joined += line else: - joined += ' ' + joined += " " joined += line return joined + def extract_headers(lines, max_wrap_lines): """ - Extracts email headers from the given lines. Returns a dict with the + Extract email headers from the given lines. Returns a dict with the detected headers and the amount of lines that were processed. """ hdrs = {} @@ -86,30 +94,32 @@ def extract_headers(lines, max_wrap_lines): if header_name in HEADER_MAP: hdrs[HEADER_MAP[header_name]] = header_value.strip() - lines_processed = n+1 + lines_processed = n + 1 else: extend_lines += 1 if extend_lines < max_wrap_lines and header_name in HEADER_MAP: hdrs[HEADER_MAP[header_name]] = join_wrapped_lines( - [hdrs[HEADER_MAP[header_name]], line.strip()]) - lines_processed = n+1 + [hdrs[HEADER_MAP[header_name]], line.strip()] + ) + lines_processed = n + 1 else: # no more headers found break return hdrs, lines_processed + def parse_reply(line): """ - Parses the given reply line ("On DATE, USER wrote:") and returns a + Parse the given reply line ("On DATE, USER wrote:") and returns a dictionary with the "Date" and "From" keys, or None, if couldn't parse. """ - if line.startswith('>'): + if line.startswith(">"): line = line[1:].strip() date = user = None - for pattern in COMPILED_PATTERN_MAP['reply']: + for pattern in COMPILED_PATTERN_MAP["reply"]: match = pattern.match(line) if match: groups = match.groups() @@ -124,7 +134,7 @@ def parse_reply(line): user = split_groups[-1] else: # Try a simple comma split - split = groups[0].rsplit(',', 1) + split = groups[0].rsplit(",", 1) if len(split) == 2: date, user = split @@ -136,13 +146,14 @@ def parse_reply(line): if date and user: return { - 'date': date.strip(), - 'from': user.strip(), + "date": date.strip(), + "from": user.strip(), } + def find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines): """ - Finds the starting point of a wrapped email. Returns a tuple containing + Find the starting point of a wrapped email. Returns a tuple containing (start_line_number, end_line_number, type), where type can be one of the following: @@ -159,7 +170,6 @@ def find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines) Returns (None, None, None) if nothing was found. """ - for n, line in enumerate(lines): if not line.strip(): continue @@ -171,47 +181,50 @@ def find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines) return n, end, typ # Find a quote - if line.startswith('>'): + if line.startswith(">"): # Check if there are at least min_quoted_lines lines that match matched_lines = 1 if matched_lines >= min_quoted_lines: - return n, n, 'quoted' + return n, n, "quoted" - for peek_line in lines[n+1:]: + for peek_line in lines[n + 1 :]: if not peek_line.strip(): continue - if not peek_line.startswith('>'): + if not peek_line.startswith(">"): break else: matched_lines += 1 if matched_lines >= min_quoted_lines: - return n, n, 'quoted' + return n, n, "quoted" # Find a header match = HEADER_RE.match(line) - if match: - if len(extract_headers(lines[n:], max_wrap_lines)[0]) >= min_header_lines: - return n, n, 'headers' + if ( + match + and len(extract_headers(lines[n:], max_wrap_lines)[0]) >= min_header_lines + ): + return n, n, "headers" return None, None, None def unindent_lines(lines): unquoted = [] - for n, line in enumerate(lines): - if line.startswith('> '): + for line in lines: + if line.startswith("> "): unquoted.append(line[2:]) - elif line.startswith('>'): + elif line.startswith(">"): unquoted.append(line[1:]) else: break return unquoted + def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines): """ - Returns a tuple of: + Return a tuple of: - Type ('forward', 'reply', 'headers', 'quoted') - Range of the text at the top of the wrapped message (or None) - Headers dict (or None) @@ -219,42 +232,61 @@ def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines): - Range of the text below the wrapped message (or None) - Whether the wrapped text needs to be unindented """ - headers = {} # Get line number and wrapping type. - start, end, typ = find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines) + start, end, typ = find_unwrap_start( + lines, max_wrap_lines, min_header_lines, min_quoted_lines + ) # We found a line indicating that it's a forward/reply. - if typ in ('forward', 'reply'): + if typ in ("forward", "reply"): main_type = typ - if typ == 'reply': - reply_headers = parse_reply(join_wrapped_lines(lines[start:end+1])) + if typ == "reply": + reply_headers = parse_reply(join_wrapped_lines(lines[start : end + 1])) if reply_headers: headers.update(reply_headers) # Find where the headers or the quoted section starts. # We can set min_quoted_lines to 1 because we expect a quoted section. - start2, end2, typ = find_unwrap_start(lines[end+1:], max_wrap_lines, min_header_lines, 1) + start2, end2, typ = find_unwrap_start( + lines[end + 1 :], max_wrap_lines, min_header_lines, 1 + ) - if typ == 'quoted': + if typ == "quoted": # Quoted section starts. Unindent and check if there are headers. - quoted_start = end+1+start2 + quoted_start = end + 1 + start2 unquoted = unindent_lines(lines[quoted_start:]) rest_start = quoted_start + len(unquoted) - start3, end3, typ = find_unwrap_start(unquoted, max_wrap_lines, min_header_lines, min_quoted_lines) - if typ == 'headers': + start3, end3, typ = find_unwrap_start( + unquoted, max_wrap_lines, min_header_lines, min_quoted_lines + ) + if typ == "headers": hdrs, hdrs_length = extract_headers(unquoted[start3:], max_wrap_lines) if hdrs: headers.update(hdrs) - rest2_start = quoted_start+start3+hdrs_length - return main_type, (0, start), headers, (rest2_start, rest_start), (rest_start, None), True + rest2_start = quoted_start + start3 + hdrs_length + return ( + main_type, + (0, start), + headers, + (rest2_start, rest_start), + (rest_start, None), + True, + ) else: - return main_type, (0, start), headers, (quoted_start, rest_start), (rest_start, None), True - - elif typ == 'headers': - hdrs, hdrs_length = extract_headers(lines[start+1:], max_wrap_lines) + return ( + main_type, + (0, start), + headers, + (quoted_start, rest_start), + (rest_start, None), + True, + ) + + elif typ == "headers": + hdrs, hdrs_length = extract_headers(lines[start + 1 :], max_wrap_lines) if hdrs: headers.update(hdrs) rest_start = start + 1 + hdrs_length @@ -262,25 +294,48 @@ def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines): else: # Didn't find quoted section or headers, assume that everything # below is the qouted text. - return main_type, (0, start), headers, (start+(start2 or 0)+1, None), None, False + return ( + main_type, + (0, start), + headers, + (start + (start2 or 0) + 1, None), + None, + False, + ) # We just found headers, which usually indicates a forwarding. - elif typ == 'headers': - main_type = 'forward' + elif typ == "headers": + main_type = "forward" hdrs, hdrs_length = extract_headers(lines[start:], max_wrap_lines) rest_start = start + hdrs_length return main_type, (0, start), hdrs, (rest_start, None), None, False # We found quoted text. Headers may be within the quoted text. - elif typ == 'quoted': + elif typ == "quoted": unquoted = unindent_lines(lines[start:]) rest_start = start + len(unquoted) - start2, end2, typ = find_unwrap_start(unquoted, max_wrap_lines, min_header_lines, min_quoted_lines) - if typ == 'headers': - main_type = 'forward' + start2, end2, typ = find_unwrap_start( + unquoted, max_wrap_lines, min_header_lines, min_quoted_lines + ) + if typ == "headers": + main_type = "forward" hdrs, hdrs_length = extract_headers(unquoted[start2:], max_wrap_lines) rest2_start = start + hdrs_length - return main_type, (0, start), hdrs, (rest2_start, rest_start), (rest_start, None), True + return ( + main_type, + (0, start), + hdrs, + (rest2_start, rest_start), + (rest_start, None), + True, + ) else: - main_type = 'quote' - return main_type, (None, start), None, (start, rest_start), (rest_start, None), True + main_type = "quote" + return ( + main_type, + (None, start), + None, + (start, rest_start), + (rest_start, None), + True, + ) diff --git a/quotequail/_patterns.py b/quotequail/_patterns.py index a0c1108..2cae2e0 100644 --- a/quotequail/_patterns.py +++ b/quotequail/_patterns.py @@ -1,111 +1,110 @@ # -*- coding: utf-8 -*- import re +from typing import List REPLY_PATTERNS = [ - u'^On (.*) wrote:$', # apple mail/gmail reply - u'^Am (.*) schrieb (.*):$', # German - u'^Le (.*) a écrit :$', # French - u'El (.*) escribió:$', # Spanish - u'^(.*) написал\(а\):$', # Russian - u'^(.*) skrev (.*):$', - u'^Den (.*) skrev (.*):$', # Swedish - u'^Em (.*) escreveu:$', # Brazillian portuguese - u'([0-9]{4}/[0-9]{1,2}/[0-9]{1,2}) (.* <.*@.*>)$', # gmail (?) reply + "^On (.*) wrote:$", # apple mail/gmail reply + "^Am (.*) schrieb (.*):$", # German + "^Le (.*) a écrit :$", # French + "El (.*) escribió:$", # Spanish + r"^(.*) написал\(а\):$", # Russian + "^(.*) skrev (.*):$", # Norwegian + "^Den (.*) skrev (.*):$", # Swedish + "^Em (.*) escreveu:$", # Brazillian portuguese + "([0-9]{4}/[0-9]{1,2}/[0-9]{1,2}) (.* <.*@.*>)$", # gmail (?) reply ] -REPLY_DATE_SPLIT_REGEX = re.compile(r'^(.*(:[0-9]{2}( [apAP]\.?[mM]\.?)?)), (.*)?$') +REPLY_DATE_SPLIT_REGEX = re.compile(r"^(.*(:[0-9]{2}( [apAP]\.?[mM]\.?)?)), (.*)?$") FORWARD_MESSAGES = [ # apple mail forward - 'Begin forwarded message', 'Anfang der weitergeleiteten E-Mail', - u'Début du message réexpédié', 'Inicio del mensaje reenviado', - + "Begin forwarded message", + "Anfang der weitergeleiteten E-Mail", + "Début du message réexpédié", + "Inicio del mensaje reenviado", # gmail/evolution forward - 'Forwarded [mM]essage', 'Mensaje reenviado', 'Vidarebefordrat meddelande', - + "Forwarded [mM]essage", + "Mensaje reenviado", + "Vidarebefordrat meddelande", # outlook - 'Original [mM]essage', 'Ursprüngliche Nachricht', 'Mensaje [oO]riginal', - + "Original [mM]essage", + "Ursprüngliche Nachricht", + "Mensaje [oO]riginal", # Thunderbird forward - u'Message transféré', - + "Message transféré", # mail.ru forward (Russian) - u'Пересылаемое сообщение', + "Пересылаемое сообщение", ] # We yield this pattern to simulate Outlook forward styles. It is also used for # some emails forwarded by Yahoo. -FORWARD_LINE = '________________________________' - -FORWARD_PATTERNS = [ - '^{}$'.format(FORWARD_LINE), +FORWARD_LINE = "________________________________" -] + ['^---+ ?%s ?---+$' % p for p in FORWARD_MESSAGES] \ - + ['^%s:$' % p for p in FORWARD_MESSAGES] +FORWARD_PATTERNS = ( + [ + "^{}$".format(FORWARD_LINE), + ] + + [f"^---+ ?{p} ?---+$" for p in FORWARD_MESSAGES] + + [f"^{p}:$" for p in FORWARD_MESSAGES] +) FORWARD_STYLES = [ # Outlook - 'border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in', + "border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in", ] -HEADER_RE = re.compile(r'\*?([-\w ]+):\*?(.*)$', re.UNICODE) +HEADER_RE = re.compile(r"\*?([-\w ]+):\*?(.*)$", re.UNICODE) HEADER_MAP = { - 'from': 'from', - 'von': 'from', - 'de': 'from', - u'от кого': 'from', - u'från': 'from', - - 'to': 'to', - 'an': 'to', - 'para': 'to', - u'à': 'to', - u'pour': 'to', - u'кому': 'to', - u'till': 'to', - - 'cc': 'cc', - 'kopie': 'cc', - 'kopia': 'cc', - - 'bcc': 'bcc', - 'cco': 'bcc', - 'blindkopie': 'bcc', - - 'reply-to': 'reply-to', - 'antwort an': 'reply-to', - u'répondre à': 'reply-to', - 'responder a': 'reply-to', - - 'date': 'date', - 'sent': 'date', - 'received': 'date', - 'datum': 'date', - 'gesendet': 'date', - 'enviado el': 'date', - 'enviados': 'date', - 'fecha': 'date', - u'дата': 'date', - - 'subject': 'subject', - 'betreff': 'subject', - 'asunto': 'subject', - 'objet': 'subject', - 'sujet': 'subject', - u'тема': 'subject', - u'ämne': 'subject', + "from": "from", + "von": "from", + "de": "from", + "от кого": "from", + "från": "from", + "to": "to", + "an": "to", + "para": "to", + "à": "to", + "pour": "to", + "кому": "to", + "till": "to", + "cc": "cc", + "kopie": "cc", + "kopia": "cc", + "bcc": "bcc", + "cco": "bcc", + "blindkopie": "bcc", + "reply-to": "reply-to", + "antwort an": "reply-to", + "répondre à": "reply-to", + "responder a": "reply-to", + "date": "date", + "sent": "date", + "received": "date", + "datum": "date", + "gesendet": "date", + "enviado el": "date", + "enviados": "date", + "fecha": "date", + "дата": "date", + "subject": "subject", + "betreff": "subject", + "asunto": "subject", + "objet": "subject", + "sujet": "subject", + "тема": "subject", + "ämne": "subject", } COMPILED_PATTERN_MAP = { - 'reply': [re.compile(regex) for regex in REPLY_PATTERNS], - 'forward': [re.compile(regex) for regex in FORWARD_PATTERNS], + "reply": [re.compile(regex) for regex in REPLY_PATTERNS], + "forward": [re.compile(regex) for regex in FORWARD_PATTERNS], } -COMPILED_PATTERNS = sum(COMPILED_PATTERN_MAP.values(), []) +COMPILED_PATTERNS: List[re.Pattern] = sum(COMPILED_PATTERN_MAP.values(), []) -MULTIPLE_WHITESPACE_RE = re.compile('\s+') +MULTIPLE_WHITESPACE_RE = re.compile(r"\s+") # Amount to lines to join to check for potential wrapped patterns in plain text # messages. @@ -120,4 +119,4 @@ # Characters at the end of line where we join lines without adding a space. # For example, "John <\njohn@example>" becomes "John ", but # "John\nDoe" becomes "John Doe". -STRIP_SPACE_CHARS = '<([{"\'' +STRIP_SPACE_CHARS = r"<([{\"'" diff --git a/requirements_tests.txt b/requirements_tests.txt index 80bc88c..a5e19eb 100644 --- a/requirements_tests.txt +++ b/requirements_tests.txt @@ -1 +1,3 @@ -lxml==3.6.0 +--no-binary lxml +lxml==4.9.1 +pytest==7.1.3 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..5332636 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,64 @@ +[flake8] +ignore= + # !!! make sure you have a comma at the end of each line EXCEPT the LAST one + # line length, already enforced by black + E501, + # https://pypi.org/project/flake8-future-import/ + FI1 + # Missing docstrings + D1, + # One-line docstring should fit on one line with quotes. + # We ignore this because it's OK to buy yourself a few extra characters + # for the summary line even if the summary line is *the only* line. + D200, + # 1 blank line required between summary line and description + D205, + # Multi-line docstring summary should start at the first line. + # We ignore this because we agreed in #20553 that we we want to put the + # summary line below """ for multi-line docstrings. + D212, + # First line should end with a period + D400, + # This is not PEP8-compliant and conflicts with black + W503, + W504, + # This is not PEP8-compliant and conflicts with black + E203, + # Too intrusive, sometimes makes code less readable + SIM106 + # Allow f-strings + SFS301, + # Allow .format + SFS201 +exclude=venv +#max-complexity=2 +banned-modules= + typing.Text = use str +require-code=True + +[isort] +skip=venv,src +known_first_party=quotequail +known_tests=tests +sections=FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,TESTS,LOCALFOLDER +default_section=THIRDPARTY +use_parentheses=true +multi_line_output=3 +include_trailing_comma=True +force_grid_wrap=0 +combine_as_imports=True +line_length=87 + +[mypy] +python_version = 3.7 +ignore_missing_imports = True +no_implicit_optional = True +strict_equality = True +follow_imports = normal +warn_unreachable = True +show_error_context = True +pretty = True +files = quotequail + +[tool:pytest] +norecursedirs=venv diff --git a/setup.py b/setup.py index edd5842..478f89a 100644 --- a/setup.py +++ b/setup.py @@ -1,33 +1,30 @@ from setuptools import setup setup( - name='quotequail', - version='0.2.3', - url='http://github.com/closeio/quotequail', - license='MIT', - author='Thomas Steinacher', - author_email='engineering@close.io', - maintainer='Thomas Steinacher', - maintainer_email='engineering@close.io', - description='A library that identifies quoted text in plain text and HTML email messages.', + name="quotequail", + version="0.2.3", + url="http://github.com/closeio/quotequail", + license="MIT", + author="Thomas Steinacher", + author_email="engineering@close.io", + maintainer="Thomas Steinacher", + maintainer_email="engineering@close.io", + description="A library that identifies quoted text in plain text and HTML email messages.", long_description=__doc__, packages=[ - 'quotequail', + "quotequail", ], - test_suite='tests', - tests_require=['lxml'], - platforms='any', + test_suite="tests", + tests_require=["lxml"], + platforms="any", classifiers=[ - 'Environment :: Web Environment', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Topic :: Communications :: Email', - 'Topic :: Software Development :: Libraries :: Python Modules' - ] + "Environment :: Web Environment", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Topic :: Communications :: Email", + "Topic :: Software Development :: Libraries :: Python Modules", + ], ) diff --git a/tests/test_quotequail.py b/tests/test_quotequail.py index 5fba67f..29ada32 100644 --- a/tests/test_quotequail.py +++ b/tests/test_quotequail.py @@ -2,63 +2,84 @@ import os import unittest -from quotequail import * -class FileMixin(object): +from quotequail import quote, quote_html, unwrap, unwrap_html + + +class FileMixin: def read_file(self, name): - with open(os.path.join(os.path.dirname(__file__), 'files', name), 'rb') as f: - return f.read().decode('utf8') + with open(os.path.join(os.path.dirname(__file__), "files", name), "rb") as f: + return f.read().decode("utf8") def assert_equal_to_file(self, string, name): expected = self.read_file(name) self.assertEqual(string, expected) + class QuoteTestCase(unittest.TestCase): def test_quote_reply_1(self): self.assertEqual( quote( -"""Hello world. + """Hello world. On 2012-10-16 at 17:02 , Someone wrote: > Some quoted text -"""), - [(True, 'Hello world.\n\nOn 2012-10-16 at 17:02 , Someone wrote:'), - (False, '\n> Some quoted text\n')] +""" + ), + [ + ( + True, + "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone wrote:", + ), + (False, "\n> Some quoted text\n"), + ], ) def test_quote_reply_2(self): self.assertEqual( quote( -"""Hello world. + """Hello world. On 2012-10-16 at 17:02 , Someone < someone@example.com> wrote: > Some quoted text -"""), - [(True, 'Hello world.\n\nOn 2012-10-16 at 17:02 , Someone <\nsomeone@example.com> wrote:'), - (False, '\n> Some quoted text\n')] +""" + ), + [ + ( + True, + "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone <\nsomeone@example.com> wrote:", + ), + (False, "\n> Some quoted text\n"), + ], ) def test_quote_reply_3(self): self.assertEqual( quote( -"""Hello world. + """Hello world. On 2012-10-16 at 17:02 , Someone wrote: > Some quoted text -"""), - [(True, 'Hello world.\n\nOn 2012-10-16 at 17:02 , Someone \nwrote:'), - (False, '\n> Some quoted text\n')] +""" + ), + [ + ( + True, + "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone \nwrote:", + ), + (False, "\n> Some quoted text\n"), + ], ) def test_quote_forward_1(self): self.assertEqual( quote( -"""Hello world. + """Hello world. Begin forwarded message: @@ -66,30 +87,42 @@ def test_quote_forward_1(self): > Subject: The email > > Some quoted text. -"""), - [(True, 'Hello world.\n\nBegin forwarded message:'), - (False, '\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n')] +""" + ), + [ + (True, "Hello world.\n\nBegin forwarded message:"), + ( + False, + "\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n", + ), + ], ) def test_quote_forward_2(self): self.assertEqual( quote( -"""Hello world. + """Hello world. ---------- Forwarded message ---------- From: Someone Subject: The email Some quoted text. -"""), - [(True, 'Hello world.\n\n---------- Forwarded message ----------'), - (False, 'From: Someone \nSubject: The email\n\nSome quoted text.\n')] +""" + ), + [ + (True, "Hello world.\n\n---------- Forwarded message ----------"), + ( + False, + "From: Someone \nSubject: The email\n\nSome quoted text.\n", + ), + ], ) def test_quote_forward_3(self): self.assertEqual( quote( -"""Hello world. + """Hello world. > Begin forwarded message: > @@ -97,150 +130,205 @@ def test_quote_forward_3(self): > Subject: The email > > Some quoted text. -"""), - [(True, 'Hello world.\n\n> Begin forwarded message:'), - (False, '>\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n')] +""" + ), + [ + (True, "Hello world.\n\n> Begin forwarded message:"), + ( + False, + ">\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n", + ), + ], ) def test_limit(self): self.assertEqual( quote("Lorem\nIpsum\nDolor\nSit\nAmet", limit=2), - [(True, 'Lorem\nIpsum'), (False, 'Dolor\nSit\nAmet')] + [(True, "Lorem\nIpsum"), (False, "Dolor\nSit\nAmet")], ) + class HTMLQuoteTestCase(unittest.TestCase): def test_apple(self): self.assertEqual( - quote_html('''Some text

some more text


On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:

Lorem ipsum dolor sit amet.


'''), + quote_html( + """Some text

some more text


On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:

Lorem ipsum dolor sit amet.


""" + ), [ # Note that lxml removes Content-Type meta tags (see # lxml.html.tostring include_meta_content_type flag) - (True, '''Some text

some more text


On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:
'''), + ( + True, + """Some text

some more text


On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:
""", + ), # Note we have an empty div stripped out here. - (False, '''

Lorem ipsum dolor sit amet.


'''), - ] + ( + False, + """

Lorem ipsum dolor sit amet.


""", + ), + ], ) def test_gmail(self): self.assertEqual( - quote_html('''

---------- Forwarded message ----------
From: Some One <someone@example.com> + quote_html( + """

---------- Forwarded message ----------
From: Some One <someone@example.com>



--
Some One
-
'''), +
""" + ), [ - (True, '''

---------- Forwarded message ----------
'''), - (False, '''
From: Some One <someone@example.com> + ( + True, + """

---------- Forwarded message ----------
""", + ), + ( + False, + """
From: Some One <someone@example.com>



--
Some One
-
'''), - ] +
""", + ), + ], ) def test_gmail_2(self): self.assertEqual( - quote_html(u'''
looks good\xa0

On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
Hey Phil,\xa0

Sending you the report:\xa0


--
Cheers,
foo & example Team
\r\n
\r\n
\r\n'''), + quote_html( + """
looks good\xa0

On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
Hey Phil,\xa0

Sending you the report:\xa0


--
Cheers,
foo & example Team
\r\n
\r\n
\r\n""" + ), [ - (True, u'''
looks good\xa0

On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
'''), - (False, u'''
Hey Phil,\xa0

Sending you the report:\xa0


--
Cheers,
foo & example Team
\r\n
\r\n
'''), - ] + ( + True, + """
looks good\xa0

On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
""", + ), + ( + False, + """
Hey Phil,\xa0

Sending you the report:\xa0


--
Cheers,
foo & example Team
\r\n
\r\n
""", + ), + ], ) def test_outlook(self): self.assertEqual( - quote_html(u'''

Thanks,

 

 

From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!

 

Hey,

'''), + quote_html( + """

Thanks,

 

 

From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!

 

Hey,

""" + ), [ - (True, u'

Thanks,

\xa0

\xa0

'), - (False, u'

From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!

\xa0

Hey,

') - ] + ( + True, + '

Thanks,

\xa0

\xa0

', + ), + ( + False, + '

From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!

\xa0

Hey,

', + ), + ], ) def test_no_wrap_tag(self): self.assertEqual( - quote_html(u'''On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
some stuff
'''), + quote_html( + """On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
some stuff
""" + ), [ - (True, 'On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:'), - (False, '
some stuff
'), - ] + ( + True, + "On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:", + ), + (False, "
some stuff
"), + ], ) def test_images(self): self.assertEqual( - quote_html('''
Well hello there Sir!!!


On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
Hi there \"*B-)\"*:P\"*:-~~ spooky\" title=\"*:->~~ spooky\" class=\"fr-fin\">
'''), + quote_html( + """
Well hello there Sir!!!


On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
Hi there \"*B-)\"*:P\"*:-~~ spooky\" title=\"*:->~~ spooky\" class=\"fr-fin\">
""" + ), [ - (True, u'''
Well hello there Sir!!!


On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
'''), - (False, u'''
Hi there\xa0\"*B-)\"*:P\"*:->~~
''') - ] + ( + True, + """
Well hello there Sir!!!


On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
""", + ), + ( + False, + """
Hi there\xa0\"*B-)\"*:P\"*:->~~
""", + ), + ], ) def test_no_quote(self): self.assertEqual( - quote_html(u'''

One

Two

Three

'''), + quote_html("""

One

Two

Three

"""), [ - (True, '

One

Two

Three

'), - ] + (True, "

One

Two

Three

"), + ], ) def test_limit(self): self.assertEqual( - quote_html(u'''

One

Two

Three

Four

''', limit=3), + quote_html("""

One

Two

Three

Four

""", limit=3), [ - (True, '

One

Two

Three

'), - (False, '

Four

'), - ] + (True, "

One

Two

Three

"), + (False, "

Four

"), + ], ) def test_empty(self): self.assertEqual( - quote_html(u''), + quote_html(""), [ - (True, ''), - ] + (True, ""), + ], ) def test_comment(self): self.assertEqual( - quote_html(u''''''), + quote_html(""""""), [ - (True, ''), - ] + (True, ""), + ], ) def test_comment_2(self): self.assertEqual( - quote_html(u'''AB'''), + quote_html("""AB"""), [ - (True, 'AB'), - ] + (True, "AB"), + ], ) def test_comment_3(self): self.assertEqual( - quote_html(u'''

Begin forwarded message:

'''), + quote_html( + """

Begin forwarded message:

""" + ), [ - (True, '

Begin forwarded message:'), - (False, '
'), - ] + (True, "

Begin forwarded message:"), + (False, "
"), + ], ) def test_prefix_tag(self): self.assertEqual( - quote_html(u'''A
Begin forwarded message:B'''), + quote_html("""A
Begin forwarded message:B"""), [ - (True, 'A
Begin forwarded message:B'), - ] + (True, "A
Begin forwarded message:B"), + ], ) def test_prefix_tag_2(self): # We can't preserve the exact markup due to lxml's parsing here. self.assertEqual( - quote_html(u'''A
Begin forwarded message:B'''), + quote_html("""A
Begin forwarded message:B"""), [ - (True, 'A
Begin forwarded message:B'), - ] + (True, "A
Begin forwarded message:B"), + ], ) def test_encoding(self): # We assume everything is UTF-8 self.assertEqual( - quote_html(u''' + quote_html( + """ @@ -250,8 +338,12 @@ def test_encoding(self): test ä -'''), [ - (True, u''' +""" + ), + [ + ( + True, + """ @@ -259,21 +351,36 @@ def test_encoding(self): test ä -'''), - ]) +""", + ), + ], + ) def test_newline(self): # Newline in "Am\r\n26. Mai" should not change the way we match. self.assertEqual( - quote_html(u'''\r\n\r\n\r\n\r\n\r\n
\r\n
\r\n

Here is spam.
\r\nHam

\r\n
\r\n
\r\nAm\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:

\r\n
Hey\r\nHam,

I like spam.
\r\n
\r\n
\r\n\r\n\r\n'''), [ - (True, '\r\n\r\n\r\n\r\n\r\n
\r\n
\r\n

Here is spam.
\r\nHam

\r\n
\r\n
\r\n

Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:

'), - (False, '\r\n\r\n\r\n
Hey\r\nHam,

I like spam.
\r\n
\r\n
\r\n\r\n') - ]) + quote_html( + """\r\n\r\n\r\n\r\n\r\n
\r\n
\r\n

Here is spam.
\r\nHam

\r\n
\r\n
\r\nAm\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:

\r\n
Hey\r\nHam,

I like spam.
\r\n
\r\n
\r\n\r\n\r\n""" + ), + [ + ( + True, + '\r\n\r\n\r\n\r\n\r\n
\r\n
\r\n

Here is spam.
\r\nHam

\r\n
\r\n
\r\n

Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:

', + ), + ( + False, + '\r\n\r\n\r\n
Hey\r\nHam,

I like spam.
\r\n
\r\n
\r\n\r\n', + ), + ], + ) + class UnwrapTestCase(unittest.TestCase): def test_gmail_forward(self): # Gmail forward - self.assertEqual(unwrap("""Hello + self.assertEqual( + unwrap( + """Hello ---------- Forwarded message ---------- From: Someone @@ -283,19 +390,24 @@ def test_gmail_forward(self): Spanish Classes Learn Spanish -"""), { - 'text_top': 'Hello', - 'type': 'forward', - 'from': 'Someone ', - 'date': 'Fri, Apr 26, 2013 at 8:13 PM', - 'subject': 'Weekend Spanish classes', - 'to': 'recipient@example.com', - 'text': 'Spanish Classes\nLearn Spanish', - }) +""" + ), + { + "text_top": "Hello", + "type": "forward", + "from": "Someone ", + "date": "Fri, Apr 26, 2013 at 8:13 PM", + "subject": "Weekend Spanish classes", + "to": "recipient@example.com", + "text": "Spanish Classes\nLearn Spanish", + }, + ) def test_apple_forward(self): # Apple Mail (10.9 and earlier) forward - self.assertEqual(unwrap("""Hello + self.assertEqual( + unwrap( + """Hello Begin forwarded message: @@ -307,20 +419,25 @@ def test_apple_forward(self): > Original text Text bottom -"""), { - 'text_top': 'Hello', - 'type': 'forward', - 'from': '"Some One" ', - 'date': '1. August 2011 23:28:15 GMT-07:00', - 'subject': 'AW: AW: Some subject', - 'to': '"Other Person" ', - 'text': 'Original text', - 'text_bottom': 'Text bottom', - }) +""" + ), + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + "text_bottom": "Text bottom", + }, + ) def test_apple_forward_2(self): # Apple Mail (10.10) forward - self.assertEqual(unwrap("""Hello + self.assertEqual( + unwrap( + """Hello > Begin forwarded message: > @@ -332,20 +449,25 @@ def test_apple_forward_2(self): > Original text Text bottom -"""), { - 'text_top': 'Hello', - 'type': 'forward', - 'from': '"Some One" ', - 'date': '1. August 2011 23:28:15 GMT-07:00', - 'subject': 'AW: AW: Some subject', - 'to': '"Other Person" ', - 'text': 'Original text', - 'text_bottom': 'Text bottom', - }) +""" + ), + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + "text_bottom": "Text bottom", + }, + ) def test_sparrow_forward(self): # Sparrow forward - self.assertEqual(unwrap("""Hello + self.assertEqual( + unwrap( + """Hello Forwarded message: @@ -359,20 +481,25 @@ def test_sparrow_forward(self): > Great news! Text bottom -"""), { - 'text_top': 'Hello', - 'type': 'forward', - 'from': 'Some One ', - 'date': 'Thursday, March 7, 2013 7:09:41 PM', - 'subject': 'Re: Syncing Up', - 'to': 'Other person ', - 'text': 'OHAI\n\nGreat news!', - 'text_bottom': 'Text bottom', - }) +""" + ), + { + "text_top": "Hello", + "type": "forward", + "from": "Some One ", + "date": "Thursday, March 7, 2013 7:09:41 PM", + "subject": "Re: Syncing Up", + "to": "Other person ", + "text": "OHAI\n\nGreat news!", + "text_bottom": "Text bottom", + }, + ) def test_bold_headers(self): # Forwrad with *bold* text - self.assertEqual(unwrap("""Hello + self.assertEqual( + unwrap( + """Hello Forwarded message: @@ -381,19 +508,24 @@ def test_bold_headers(self): *Date:* Wednesday, February 6, 2013 7:46:53 AM *Subject:* Fwd: Hottest Startups -This is interesting."""), { - 'text_top': 'Hello', - 'type': 'forward', - 'from': 'Some One ', - 'date': 'Wednesday, February 6, 2013 7:46:53 AM', - 'subject': 'Fwd: Hottest Startups', - 'to': 'Other Person ', - 'text': 'This is interesting.', - }) +This is interesting.""" + ), + { + "text_top": "Hello", + "type": "forward", + "from": "Some One ", + "date": "Wednesday, February 6, 2013 7:46:53 AM", + "subject": "Fwd: Hottest Startups", + "to": "Other Person ", + "text": "This is interesting.", + }, + ) def test_no_forward_text(self): # No forwarding message text - self.assertEqual(unwrap("""Hello + self.assertEqual( + unwrap( + """Hello From: "Some One" Date: 1. August 2011 23:28:15 GMT-07:00 @@ -401,19 +533,24 @@ def test_no_forward_text(self): Subject: AW: AW: Some subject Original text -"""), { - 'text_top': 'Hello', - 'type': 'forward', - 'from': '"Some One" ', - 'date': '1. August 2011 23:28:15 GMT-07:00', - 'subject': 'AW: AW: Some subject', - 'to': '"Other Person" ', - 'text': 'Original text', - }) +""" + ), + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + }, + ) def test_no_forward_text_quoted(self): # No forwarding message text - self.assertEqual(unwrap("""Hello + self.assertEqual( + unwrap( + """Hello > From: "Some One" > Date: 1. August 2011 23:28:15 GMT-07:00 @@ -421,40 +558,49 @@ def test_no_forward_text_quoted(self): > Subject: AW: AW: Some subject > > Original text -"""), { - 'text_top': 'Hello', - 'type': 'forward', - 'from': '"Some One" ', - 'date': '1. August 2011 23:28:15 GMT-07:00', - 'subject': 'AW: AW: Some subject', - 'to': '"Other Person" ', - 'text': 'Original text', - }) +""" + ), + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + }, + ) def test_outlook_forward(self): # Outlook? - self.assertEqual(unwrap("""-------- Original Message -------- + self.assertEqual( + unwrap( + """-------- Original Message -------- Subject: \tSome Newsletter Date: \tFri, 19 Jun 2009 19:16:04 +0200 From: \tfrom Reply-To: \treply To: \tto@example.com -OHAI"""), { - 'type': 'forward', - 'from': 'from ', - 'reply-to': 'reply ', - 'date': 'Fri, 19 Jun 2009 19:16:04 +0200', - 'subject': 'Some Newsletter', - 'to': 'to@example.com', - 'reply-to': 'reply ', - 'text': 'OHAI', - }) - +OHAI""" + ), + { + "type": "forward", + "from": "from ", + "reply-to": "reply ", + "date": "Fri, 19 Jun 2009 19:16:04 +0200", + "subject": "Some Newsletter", + "to": "to@example.com", + "reply-to": "reply ", + "text": "OHAI", + }, + ) def test_spacing(self): # Some clients (Blackberry?) have weird whitespace rules - self.assertEqual(unwrap("""hello world + self.assertEqual( + unwrap( + """hello world -----Original Message----- From: "Some One" @@ -467,19 +613,24 @@ def test_spacing(self): OHAI... -"""), { - 'text_top': 'hello world', - 'type': 'forward', - 'from': '"Some One" ', - 'date': 'Sat, 22 Mar 2008 12:16:06', - 'subject': 'Antw: FW: html', - 'to': '', - 'text': 'OHAI...', - }) +""" + ), + { + "text_top": "hello world", + "type": "forward", + "from": '"Some One" ', + "date": "Sat, 22 Mar 2008 12:16:06", + "subject": "Antw: FW: html", + "to": "", + "text": "OHAI...", + }, + ) def test_quote(self): # Just a quote - self.assertEqual(unwrap("""hello world + self.assertEqual( + unwrap( + """hello world Hey: This is very important @@ -489,35 +640,48 @@ def test_quote(self): -- kthxbye -"""), { - 'type': 'quote', - 'text_top': 'hello world\n\nHey: This is very important', - 'text': 'Lorem ipsum\ndolor sit amet\nadipiscing elit.', - 'text_bottom': '--\nkthxbye', - }) - +""" + ), + { + "type": "quote", + "text_top": "hello world\n\nHey: This is very important", + "text": "Lorem ipsum\ndolor sit amet\nadipiscing elit.", + "text_bottom": "--\nkthxbye", + }, + ) def test_no_message(self): # No message - self.assertEqual(unwrap("""hello world + self.assertEqual( + unwrap( + """hello world Hey: This is very important > No quoted message (just one line). -"""), None) - +""" + ), + None, + ) def test_forward_no_headers(self): # No quote / headers in forwarded message - self.assertEqual(unwrap("""Begin forwarded message: + self.assertEqual( + unwrap( + """Begin forwarded message: Hello -"""), { - 'type': 'forward', - 'text': 'Hello', - }) +""" + ), + { + "type": "forward", + "text": "Hello", + }, + ) def test_confusing_email_signature(self): - self.assertEqual(unwrap("""Phone: 12345 + self.assertEqual( + unwrap( + """Phone: 12345 Fax: 67890 Skype: foobar @@ -526,63 +690,83 @@ def test_confusing_email_signature(self): Subject: The email Email text. -"""), { - 'text_top': 'Phone: 12345\nFax: 67890\nSkype: foobar', - 'type': 'forward', - 'from': 'Someone ', - 'subject': 'The email', - 'text': 'Email text.', - }) +""" + ), + { + "text_top": "Phone: 12345\nFax: 67890\nSkype: foobar", + "type": "forward", + "from": "Someone ", + "subject": "The email", + "text": "Email text.", + }, + ) def test_long_subject(self): - self.assertEqual(unwrap("""---------- Forwarded message ---------- + self.assertEqual( + unwrap( + """---------- Forwarded message ---------- From: Someone Subject: The email has a very long and confusing subject with spans over multiple lines. To: Destination Email text. -"""), { - 'type': 'forward', - 'from': 'Someone ', - 'to': 'Destination ', - 'subject': 'The email has a very long and confusing subject with spans over multiple lines.', - 'text': 'Email text.', - }) +""" + ), + { + "type": "forward", + "from": "Someone ", + "to": "Destination ", + "subject": "The email has a very long and confusing subject with spans over multiple lines.", + "text": "Email text.", + }, + ) def test_reply_1(self): - data = unwrap("""Hello world. + data = unwrap( + """Hello world. On 2012-10-16 at 17:02 , Someone wrote: > Some quoted text -""") - self.assertEqual(data, { - 'type': 'reply', - 'date': '2012-10-16 at 17:02', - 'from': 'Someone ', - 'text_top': 'Hello world.', - 'text': 'Some quoted text', - }) +""" + ) + self.assertEqual( + data, + { + "type": "reply", + "date": "2012-10-16 at 17:02", + "from": "Someone ", + "text_top": "Hello world.", + "text": "Some quoted text", + }, + ) def test_reply_2(self): - data = unwrap("""Hello world. + data = unwrap( + """Hello world. On 2012-10-16 at 17:02 , Someone < someone@example.com> wrote: > Some quoted text -""") - self.assertEqual(data, { - 'type': 'reply', - 'date': '2012-10-16 at 17:02', - 'from': 'Someone ', - 'text_top': 'Hello world.', - 'text': 'Some quoted text', - }) +""" + ) + self.assertEqual( + data, + { + "type": "reply", + "date": "2012-10-16 at 17:02", + "from": "Someone ", + "text_top": "Hello world.", + "text": "Some quoted text", + }, + ) def test_french(self): - self.assertEqual(unwrap(u""" + self.assertEqual( + unwrap( + """ De : Someone Répondre à : Reply Date : Wednesday, 17 September 2014 4:24 pm @@ -590,18 +774,23 @@ def test_french(self): Objet : Re: test subject Hello, thanks for your reply - """), { - 'type': 'forward', - 'date': u'Wednesday, 17 September 2014 4:24 pm', - 'from': u'Someone ', - 'reply-to': 'Reply ', - 'to': u'"Someone Else" ', - 'subject': u'Re: test subject', - 'text': u'Hello, thanks for your reply', - }) + """ + ), + { + "type": "forward", + "date": "Wednesday, 17 September 2014 4:24 pm", + "from": "Someone ", + "reply-to": "Reply ", + "to": '"Someone Else" ', + "subject": "Re: test subject", + "text": "Hello, thanks for your reply", + }, + ) def test_forward_french_apple_mail(self): - self.assertEqual(unwrap(u''' + self.assertEqual( + unwrap( + """ Text before Début du message réexpédié : @@ -612,18 +801,23 @@ def test_forward_french_apple_mail(self): Objet: RE: The subject Text after -'''), { - 'date': u'14 novembre 2015 15:14:53 UTC+1', - 'from': u'"Foo Bar" ', - 'subject': 'RE: The subject', - 'text': u'Text after', - 'text_top': u'Text before', - 'to': u'"\'Ham Spam\'" ', - 'type': 'forward' - }) +""" + ), + { + "date": "14 novembre 2015 15:14:53 UTC+1", + "from": '"Foo Bar" ', + "subject": "RE: The subject", + "text": "Text after", + "text_top": "Text before", + "to": "\"'Ham Spam'\" ", + "type": "forward", + }, + ) def test_forward_french_thunderbird(self): - self.assertEqual(unwrap(u''' + self.assertEqual( + unwrap( + """ Text before -------- Message transféré -------- @@ -633,19 +827,24 @@ def test_forward_french_thunderbird(self): Pour : Ham Spam Text after -'''), { - 'date': u'Wed, 11 Nov 2015 12:31:25 +0100', - 'from': u'Foo Bar ', - 'subject': 'Re: Some subject', - 'text': u'Text after', - 'text_top': u'Text before', - 'to': u'Ham Spam ', - 'type': 'forward' - }) +""" + ), + { + "date": "Wed, 11 Nov 2015 12:31:25 +0100", + "from": "Foo Bar ", + "subject": "Re: Some subject", + "text": "Text after", + "text_top": "Text before", + "to": "Ham Spam ", + "type": "forward", + }, + ) def test_gmail_forward_swedish(self): # Gmail forward - self.assertEqual(unwrap(u"""Hello + self.assertEqual( + unwrap( + """Hello ---------- Vidarebefordrat meddelande ---------- Från: Someone @@ -655,66 +854,81 @@ def test_gmail_forward_swedish(self): Spanish Classes Learn Spanish -"""), { - 'text_top': u'Hello', - 'type': u'forward', - 'from': u'Someone ', - 'date': u'26 april 2013 20:13', - 'subject': u'Weekend Spanish classes', - 'to': u'recipient@example.com', - 'text': u'Spanish Classes\nLearn Spanish', - }) +""" + ), + { + "text_top": "Hello", + "type": "forward", + "from": "Someone ", + "date": "26 april 2013 20:13", + "subject": "Weekend Spanish classes", + "to": "recipient@example.com", + "text": "Spanish Classes\nLearn Spanish", + }, + ) + class HTMLUnwrapTestCase(FileMixin, unittest.TestCase): def test_simple_forward(self): - html = u'Begin forwarded message:
\n
\nFrom: someone@example.com
\nTo: anyone@example.com
\nSubject: You won
\n' - self.assertEqual(unwrap_html(html), { - 'type': 'forward', - 'from': 'someone@example.com', - 'to': 'anyone@example.com', - 'subject': 'You won', - }) + html = "Begin forwarded message:
\n
\nFrom: someone@example.com
\nTo: anyone@example.com
\nSubject: You won
\n" + self.assertEqual( + unwrap_html(html), + { + "type": "forward", + "from": "someone@example.com", + "to": "anyone@example.com", + "subject": "You won", + }, + ) def test_apple_forward(self): html = 'test

blah


Begin forwarded message:

From: Foo Bar <foo@bar.example>
Subject: The Subject
Date: March 24, 2016 at 20:16:25 GMT+1
To: John Doe <john@doe.example>

Text of the original email
' - self.assertEqual(unwrap_html(html), { - 'type': 'forward', - 'subject': 'The Subject', - 'date': 'March 24, 2016 at 20:16:25 GMT+1', - 'from': 'Foo Bar ', - 'to': 'John Doe ', - 'html_top': 'test

blah
', - 'html': '
Text of the original email
', - - }) + self.assertEqual( + unwrap_html(html), + { + "type": "forward", + "subject": "The Subject", + "date": "March 24, 2016 at 20:16:25 GMT+1", + "from": "Foo Bar ", + "to": "John Doe ", + "html_top": 'test

blah
', + "html": '
Text of the original email
', + }, + ) def test_gmail_forward(self): - html = '
test

blah

---------- Forwarded message ----------
From: Foo Bar <foo@bar.example>
Date: Thu, Mar 24, 2016 at 5:17 PM
Subject: The Subject
To: John Doe <john@doe.example>


Some text



' - - self.assertEqual(unwrap_html(html), { - 'type': 'forward', - 'subject': 'The Subject', - 'date': 'Thu, Mar 24, 2016 at 5:17 PM', - 'from': 'Foo Bar ', - 'to': 'John Doe ', - 'html_top': '
test

blah
', - 'html': '
Some text
', - }) + html = '
test

blah

---------- Forwarded message ----------
From: Foo Bar <foo@bar.example>
Date: Thu, Mar 24, 2016 at 5:17 PM
Subject: The Subject
To: John Doe <john@doe.example>


Some text



' + + self.assertEqual( + unwrap_html(html), + { + "type": "forward", + "subject": "The Subject", + "date": "Thu, Mar 24, 2016 at 5:17 PM", + "from": "Foo Bar ", + "to": "John Doe ", + "html_top": '
test

blah
', + "html": '
Some text
', + }, + ) def test_apple_reply(self): html = 'Foo

Bar

On 2016-03-25, at 23:01, John Doe <john@doe.example> wrote:

Some important email

' - self.assertEqual(unwrap_html(html), { - 'type': 'reply', - 'from': 'John Doe ', - 'date': '2016-03-25, at 23:01', - 'html': '
Some important email
', - 'html_top': 'Foo

Bar
', - }) + self.assertEqual( + unwrap_html(html), + { + "type": "reply", + "from": "John Doe ", + "date": "2016-03-25, at 23:01", + "html": '
Some important email
', + "html_top": 'Foo

Bar
', + }, + ) def test_gmail_reply(self): - html = '''
foo

bar

On Wed, Mar 16, 2016 at 12:49 AM, Foo Bar <foo@bar.example> wrote:
Hi,
+ html = """
foo

bar

On Wed, Mar 16, 2016 at 12:49 AM, Foo Bar <foo@bar.example> wrote:
Hi,

This is the reply

Thanks a lot!
@@ -722,272 +936,315 @@ def test_gmail_reply(self):



--
John Doe
Senior Director
Some Company
-''' +""" - self.assertEqual(unwrap_html(html), { - 'type': 'reply', - 'from': 'Foo Bar ', - 'date': 'Wed, Mar 16, 2016 at 12:49 AM', - 'html_top': '
foo

bar
', - 'html': '
Hi,
\n
This is the reply
\n
\nThanks a lot!
\nFoo
', - 'html_bottom': '
--
John Doe
Senior Director
Some Company
\n
\n', - }) + self.assertEqual( + unwrap_html(html), + { + "type": "reply", + "from": "Foo Bar ", + "date": "Wed, Mar 16, 2016 at 12:49 AM", + "html_top": '
foo

bar
', + "html": '
Hi,
\n
This is the reply
\n
\nThanks a lot!
\nFoo
', + "html_bottom": '
--
John Doe
Senior Director
Some Company
\n
\n', + }, + ) def test_outlook_forward(self): - data = self.read_file('outlook_forward.html') + data = self.read_file("outlook_forward.html") result = unwrap_html(data) - self.assertEqual(result['type'], 'forward') - self.assertEqual(result['from'], 'John Doe') - self.assertEqual(result['to'], 'Foo Bar (foo@bar.example)') - self.assertEqual(result['date'], 'Wednesday, July 09, 2014 10:27 AM') - self.assertEqual(result['subject'], 'The subject!') - self.assert_equal_to_file(result['html'], - 'outlook_forward_unwrapped.html') - self.assert_equal_to_file(result['html_top'], - 'outlook_forward_unwrapped_top.html') - self.assertNotIn('html_bottom', result) + self.assertEqual(result["type"], "forward") + self.assertEqual(result["from"], "John Doe") + self.assertEqual(result["to"], "Foo Bar (foo@bar.example)") + self.assertEqual(result["date"], "Wednesday, July 09, 2014 10:27 AM") + self.assertEqual(result["subject"], "The subject!") + self.assert_equal_to_file(result["html"], "outlook_forward_unwrapped.html") + self.assert_equal_to_file( + result["html_top"], "outlook_forward_unwrapped_top.html" + ) + self.assertNotIn("html_bottom", result) def test_thunderbird_forward(self): - data = self.read_file('thunderbird_forward.html') + data = self.read_file("thunderbird_forward.html") result = unwrap_html(data) - self.assertEqual(result['type'], 'forward') - self.assertEqual(result['from'], 'John Doe ') - self.assertEqual(result['to'], 'Foo Bar ') - self.assertEqual(result['date'], 'Tue, 3 May 2016 14:54:27 +0200 (CEST)') - self.assertEqual(result['subject'], 'Re: Example subject') - self.assertNotIn('html_top', result) - self.assert_equal_to_file(result['html'], - 'thunderbird_forward_unwrapped.html') - self.assertNotIn('html_bottom', result) + self.assertEqual(result["type"], "forward") + self.assertEqual(result["from"], "John Doe ") + self.assertEqual(result["to"], "Foo Bar ") + self.assertEqual(result["date"], "Tue, 3 May 2016 14:54:27 +0200 (CEST)") + self.assertEqual(result["subject"], "Re: Example subject") + self.assertNotIn("html_top", result) + self.assert_equal_to_file(result["html"], "thunderbird_forward_unwrapped.html") + self.assertNotIn("html_bottom", result) def test_mailru_forward(self): - data = self.read_file('mailru_forward.html') + data = self.read_file("mailru_forward.html") result = unwrap_html(data) - self.assertEqual(result['type'], 'forward') - self.assertEqual(result['from'], u'Иван Иванов ') - self.assertEqual(result['to'], u'Петр Петров ') - self.assertEqual(result['date'], u'Среда, 14 июня 2017, 15:19 +03:00') - self.assertEqual(result['subject'], u'Тестовая тема') - self.assertNotIn('html_top', result) - self.assert_equal_to_file(result['html'], - 'mailru_forward_unwrapped.html') - self.assertNotIn('html_bottom', result) + self.assertEqual(result["type"], "forward") + self.assertEqual(result["from"], "Иван Иванов ") + self.assertEqual(result["to"], "Петр Петров ") + self.assertEqual(result["date"], "Среда, 14 июня 2017, 15:19 +03:00") + self.assertEqual(result["subject"], "Тестовая тема") + self.assertNotIn("html_top", result) + self.assert_equal_to_file(result["html"], "mailru_forward_unwrapped.html") + self.assertNotIn("html_bottom", result) + class InternalTestCase(unittest.TestCase): def test_parse_reply(self): from quotequail._internal import parse_reply - data = parse_reply(u'Am 24.02.2015 um 22:48 schrieb John Doe :') - self.assertEqual(data, { - 'date': u'24.02.2015 um 22:48', - 'from': u'John Doe ' - }) - - data = parse_reply(u'On Monday, March 7, 2016 10:19 AM, John Doe wrote:') - self.assertEqual(data, { - 'date': u'Monday, March 7, 2016 10:19 AM', - 'from': u'John Doe ' - }) - - data = parse_reply(u'On Feb 22, 2015, at 9:19 PM, John Doe wrote:') - self.assertEqual(data, { - 'date': u'Feb 22, 2015, at 9:19 PM', - 'from': u'John Doe ' - }) - - data = parse_reply(u'On 2016-03-14, at 20:26, John Doe wrote:') - self.assertEqual(data, { - 'date': u'2016-03-14, at 20:26', - 'from': u'John Doe ' - }) - - data = parse_reply(u'Le 6 janv. 2014 à 19:50, John Doe a écrit :') - self.assertEqual(data, { - 'date': u'6 janv. 2014 \xe0 19:50', - 'from': u'John Doe ' - }) - - data = parse_reply(u'Le 02.10.2013 à 11:13, John Doe a écrit :') - self.assertEqual(data, { - 'date': u'02.10.2013 \xe0 11:13', - 'from': u'John Doe ' - }) - - data = parse_reply(u'El 11/07/2012 06:13 p.m., John Doe escribió:') - self.assertEqual(data, { - 'date': u'11/07/2012 06:13 p.m.', - 'from': u'John Doe' - }) - - data = parse_reply(u'El 06/04/2010, a las 13:13, John Doe escribió:') - self.assertEqual(data, { - 'date': u'06/04/2010, a las 13:13', - 'from': u'John Doe' - }) - - data = parse_reply(u'2009/5/12 John Doe ') - self.assertEqual(data, { - 'date': u'2009/5/12', - 'from': u'John Doe ' - }) - - data = parse_reply(u"On 8 o'clock, John Doe wrote:") - self.assertEqual(data, { - 'date': u"8 o'clock", - 'from': u'John Doe' - }) + data = parse_reply( + "Am 24.02.2015 um 22:48 schrieb John Doe :" + ) + self.assertEqual( + data, + {"date": "24.02.2015 um 22:48", "from": "John Doe "}, + ) + + data = parse_reply( + "On Monday, March 7, 2016 10:19 AM, John Doe wrote:" + ) + self.assertEqual( + data, + { + "date": "Monday, March 7, 2016 10:19 AM", + "from": "John Doe ", + }, + ) + + data = parse_reply( + "On Feb 22, 2015, at 9:19 PM, John Doe wrote:" + ) + self.assertEqual( + data, + { + "date": "Feb 22, 2015, at 9:19 PM", + "from": "John Doe ", + }, + ) + + data = parse_reply( + "On 2016-03-14, at 20:26, John Doe wrote:" + ) + self.assertEqual( + data, + {"date": "2016-03-14, at 20:26", "from": "John Doe "}, + ) + + data = parse_reply( + "Le 6 janv. 2014 à 19:50, John Doe a écrit :" + ) + self.assertEqual( + data, + { + "date": "6 janv. 2014 \xe0 19:50", + "from": "John Doe ", + }, + ) + + data = parse_reply( + "Le 02.10.2013 à 11:13, John Doe a écrit :" + ) + self.assertEqual( + data, + {"date": "02.10.2013 \xe0 11:13", "from": "John Doe "}, + ) + + data = parse_reply("El 11/07/2012 06:13 p.m., John Doe escribió:") + self.assertEqual(data, {"date": "11/07/2012 06:13 p.m.", "from": "John Doe"}) + + data = parse_reply("El 06/04/2010, a las 13:13, John Doe escribió:") + self.assertEqual(data, {"date": "06/04/2010, a las 13:13", "from": "John Doe"}) + + data = parse_reply("2009/5/12 John Doe ") + self.assertEqual( + data, {"date": "2009/5/12", "from": "John Doe "} + ) + + data = parse_reply("On 8 o'clock, John Doe wrote:") + self.assertEqual(data, {"date": "8 o'clock", "from": "John Doe"}) # Swedish - data = parse_reply(u'Den 24 februari 2015 22:48 skrev John Doe :') - self.assertEqual(data, { - 'date': u'24 februari 2015 22:48', - 'from': u'John Doe ' - }) + data = parse_reply( + "Den 24 februari 2015 22:48 skrev John Doe :" + ) + self.assertEqual( + data, + {"date": "24 februari 2015 22:48", "from": "John Doe "}, + ) # Brazillian portuguese - data = parse_reply(u'Em qui, 24 de jan de 2019 às 14:31, John Doe escreveu:') - self.assertEqual(data, { - 'date': u'qui, 24 de jan de 2019 às 14:31', - 'from': u'John Doe ' - }) + data = parse_reply( + "Em qui, 24 de jan de 2019 às 14:31, John Doe escreveu:" + ) + self.assertEqual( + data, + { + "date": "qui, 24 de jan de 2019 às 14:31", + "from": "John Doe ", + }, + ) class InternalHTMLTestCase(unittest.TestCase): def test_extract_headers(self): from quotequail._internal import extract_headers + + self.assertEqual(extract_headers([], 2), ({}, 0)) + self.assertEqual(extract_headers(["test"], 2), ({}, 0)) self.assertEqual( - extract_headers([], 2), - ({}, 0) + extract_headers(["From: b", "To: c"], 2), ({"from": "b", "to": "c"}, 2) ) + self.assertEqual(extract_headers(["From: b", "foo"], 2), ({"from": "b foo"}, 2)) + self.assertEqual(extract_headers(["From: b", "foo"], 1), ({"from": "b"}, 1)) self.assertEqual( - extract_headers(['test'], 2), - ({}, 0) + extract_headers(["From: b", "To: c", "", "other line"], 2), + ({"from": "b", "to": "c"}, 2), ) self.assertEqual( - extract_headers(['From: b', 'To: c'], 2), - ({'from': 'b', 'to': 'c'}, 2) + extract_headers( + [ + "From: some very very very long name <", + "verylong@example.com>", + "Subject: this is a very very very very long", + "subject", + "", + "other line", + ], + 2, + ), + ( + { + "from": "some very very very long name ", + "subject": "this is a very very very very long subject", + }, + 4, + ), ) self.assertEqual( - extract_headers(['From: b', 'foo'], 2), - ({'from': 'b foo'}, 2) + extract_headers( + ["From: some very very very long name <", "verylong@example.com>"], 1 + ), + ( + { + "from": "some very very very long name <", + }, + 1, + ), ) + + def test_tree_line_generator(self): + from quotequail import _html + + tree = _html.get_html_tree("
foo bar
baz
") + data = list(_html.tree_line_generator(tree)) + div = tree.xpath("div")[0] + br = tree.xpath("div/br")[0] self.assertEqual( - extract_headers(['From: b', 'foo'], 1), - ({'from': 'b'}, 1) + data, + [ + ((div, "begin"), (br, "begin"), 0, "foo bar"), + ((br, "end"), (div, "end"), 0, "baz"), + ], ) + data = list(_html.tree_line_generator(tree, max_lines=1)) + div = tree.xpath("div")[0] + br = tree.xpath("div/br")[0] self.assertEqual( - extract_headers(['From: b', 'To: c', '', 'other line'], 2), - ({'from': 'b', 'to': 'c'}, 2) + data, + [ + ((div, "begin"), (br, "begin"), 0, "foo bar"), + ], ) + + tree = _html.get_html_tree("

foo

bar
") + data = list(_html.tree_line_generator(tree)) + div = tree.xpath("div")[0] + h1 = tree.xpath("div/h1")[0] self.assertEqual( - extract_headers(['From: some very very very long name <', - 'verylong@example.com>', - 'Subject: this is a very very very very long', - 'subject', - '', - 'other line'], 2), - ({'from': 'some very very very long name ', - 'subject': 'this is a very very very very long subject'}, 4) + data, + [ + ((h1, "begin"), (h1, "end"), 0, "foo"), + ((h1, "end"), (div, "end"), 0, "bar"), + ], ) + + tree = _html.get_html_tree("
hi
world
") + data = list(_html.tree_line_generator(tree)) + div = tree.xpath("div")[0] + blockquote = tree.xpath("div/blockquote")[0] self.assertEqual( - extract_headers(['From: some very very very long name <', - 'verylong@example.com>'], 1), - ({'from': 'some very very very long name <',}, 1) + data, + [ + ((blockquote, "begin"), (blockquote, "end"), 1, "hi"), + ((blockquote, "end"), (div, "end"), 0, "world"), + ], ) - def test_tree_line_generator(self): - from quotequail import _html - - tree = _html.get_html_tree('
foo bar
baz
') - data = [result for result in _html.tree_line_generator(tree)] - div = tree.xpath('div')[0] - br = tree.xpath('div/br')[0] - span = tree.xpath('div/span')[0] - self.assertEqual(data, [ - ((div, 'begin'), (br, 'begin'), 0, 'foo bar'), - ((br, 'end'), (div, 'end'), 0, 'baz'), - ]) - data = [result for result in _html.tree_line_generator(tree, max_lines=1)] - div = tree.xpath('div')[0] - br = tree.xpath('div/br')[0] - self.assertEqual(data, [ - ((div, 'begin'), (br, 'begin'), 0, 'foo bar'), - ]) - - tree = _html.get_html_tree('

foo

bar
') - data = [result for result in _html.tree_line_generator(tree)] - div = tree.xpath('div')[0] - h1 = tree.xpath('div/h1')[0] - self.assertEqual(data, [ - ((h1, 'begin'), (h1, 'end'), 0, 'foo'), - ((h1, 'end'), (div, 'end'), 0, 'bar'), - ]) - tree = _html.get_html_tree( - '
hi
world
') - data = [result for result in _html.tree_line_generator(tree)] - div = tree.xpath('div')[0] - blockquote = tree.xpath('div/blockquote')[0] - self.assertEqual(data, [ - ((blockquote, 'begin'), (blockquote, 'end'), 1, 'hi'), - ((blockquote, 'end'), (div, 'end'), 0, 'world'), - ]) - - tree = _html.get_html_tree(''' + """ -
Subject: the subject
From: from line
''') - data = [result for result in _html.tree_line_generator(tree)] - tr1, tr2 = tree.xpath('table/tr') - self.assertEqual(data, [ - ((tr1, 'begin'), (tr1, 'end'), 0, 'Subject: the subject'), - ((tr2, 'begin'), (tr2, 'end'), 0, 'From: from line'), - ]) + """ + ) + data = list(_html.tree_line_generator(tree)) + tr1, tr2 = tree.xpath("table/tr") + self.assertEqual( + data, + [ + ((tr1, "begin"), (tr1, "end"), 0, "Subject: the subject"), + ((tr2, "begin"), (tr2, "end"), 0, "From: from line"), + ], + ) def test_trim_after(self): from quotequail import _html - html = '
ABCDE
' + html = "
ABCDE
" tree = _html.get_html_tree(html) - _html.trim_tree_after(tree.find('div/span')) - self.assertEqual(_html.render_html_tree(tree), '
AB
') + _html.trim_tree_after(tree.find("div/span")) + self.assertEqual(_html.render_html_tree(tree), "
AB
") tree = _html.get_html_tree(html) - _html.trim_tree_after(tree.find('div/span[2]')) - self.assertEqual(_html.render_html_tree(tree), '
ABCD
') + _html.trim_tree_after(tree.find("div/span[2]")) + self.assertEqual( + _html.render_html_tree(tree), "
ABCD
" + ) tree = _html.get_html_tree(html) - _html.trim_tree_after(tree.find('div/span'), include_element=False) - self.assertEqual(_html.render_html_tree(tree), '
A
') + _html.trim_tree_after(tree.find("div/span"), include_element=False) + self.assertEqual(_html.render_html_tree(tree), "
A
") tree = _html.get_html_tree(html) - _html.trim_tree_after(tree.find('div/span[2]'), include_element=False) - self.assertEqual(_html.render_html_tree(tree), '
ABC
') + _html.trim_tree_after(tree.find("div/span[2]"), include_element=False) + self.assertEqual(_html.render_html_tree(tree), "
ABC
") def test_trim_before(self): from quotequail import _html - html = '
ABCDE
' + html = "
ABCDE
" tree = _html.get_html_tree(html) - _html.trim_tree_before(tree.find('div/span')) - self.assertEqual(_html.render_html_tree(tree), '
BCDE
') + _html.trim_tree_before(tree.find("div/span")) + self.assertEqual( + _html.render_html_tree(tree), "
BCDE
" + ) tree = _html.get_html_tree(html) - _html.trim_tree_before(tree.find('div/span[2]')) - self.assertEqual(_html.render_html_tree(tree), '
DE
') + _html.trim_tree_before(tree.find("div/span[2]")) + self.assertEqual(_html.render_html_tree(tree), "
DE
") tree = _html.get_html_tree(html) - _html.trim_tree_before(tree.find('div/span'), include_element=False) - self.assertEqual(_html.render_html_tree(tree), '
CDE
') + _html.trim_tree_before(tree.find("div/span"), include_element=False) + self.assertEqual(_html.render_html_tree(tree), "
CDE
") tree = _html.get_html_tree(html) - _html.trim_tree_before(tree.find('div/span[2]'), include_element=False) - self.assertEqual(_html.render_html_tree(tree), '
E
') + _html.trim_tree_before(tree.find("div/span[2]"), include_element=False) + self.assertEqual(_html.render_html_tree(tree), "
E
") -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 7e1e1f2..0000000 --- a/tox.ini +++ /dev/null @@ -1,6 +0,0 @@ -[tox] -envlist = py27,py35 - -[testenv] -deps = -r{toxinidir}/requirements_tests.txt -commands = python setup.py test