From f66ae6c6387bc45c95cd59afc2f99f55bb5a1562 Mon Sep 17 00:00:00 2001 From: Georges Toth Date: Sat, 27 Apr 2024 23:57:39 +0200 Subject: [PATCH] linter fixes --- eml_parser/__init__.py | 4 +--- eml_parser/decode.py | 7 ++----- eml_parser/parser.py | 36 ++++++++++++++++++------------------ eml_parser/routing.py | 3 +-- 4 files changed, 22 insertions(+), 28 deletions(-) diff --git a/eml_parser/__init__.py b/eml_parser/__init__.py index 714a82f..ce8d828 100644 --- a/eml_parser/__init__.py +++ b/eml_parser/__init__.py @@ -1,6 +1,4 @@ -"""eml_parser serves as a python module for parsing eml files and returning various \ -information found in the e-mail as well as computed information. -""" +"""eml_parser serves as a python module for parsing eml files and returning various information found in the e-mail as well as computed information.""" from .parser import EmlParser diff --git a/eml_parser/decode.py b/eml_parser/decode.py index 3d09a85..ef83e77 100644 --- a/eml_parser/decode.py +++ b/eml_parser/decode.py @@ -127,9 +127,7 @@ def decode_string(string: bytes, encoding: typing.Optional[str] = None) -> str: def workaround_bug_27257(msg: email.message.Message, header: str) -> typing.List[str]: - """Function to work around bug 27257 and just tries its best using \ - the compat32 policy to extract any meaningful information, i.e. \ - e-mail addresses. + """Function to work around bug 27257 and just tries its best using the compat32 policy to extract any meaningful information, i.e. e-mail addresses. Args: msg (email.message.Message): An e-mail message object. @@ -150,8 +148,7 @@ def workaround_bug_27257(msg: email.message.Message, header: str) -> typing.List def workaround_field_value_parsing_errors(msg: email.message.Message, header: str) -> typing.List[str]: - """Function to work around field value parsing errors by trying a best effort parsing using \ - the compat32 policy to extract any meaningful information. + """Function to work around field value parsing errors by trying a best effort parsing using the compat32 policy to extract any meaningful information. Args: msg (email.message.Message): An e-mail message object. diff --git a/eml_parser/parser.py b/eml_parser/parser.py index 857edb9..5b17c33 100644 --- a/eml_parser/parser.py +++ b/eml_parser/parser.py @@ -1,6 +1,4 @@ -"""eml_parser serves as a python module for parsing eml files and returning various\ -information found in the e-mail as well as computed information. -""" +"""eml_parser serves as a python module for parsing eml files and returning various information found in the e-mail as well as computed information.""" import base64 import binascii @@ -199,7 +197,9 @@ def decode_email(self, eml_file: os.PathLike, ignore_bad_start: bool = False) -> dict: A dictionary with the content of the EML parsed and broken down into key-value pairs. """ - with open(eml_file, 'rb') as fp: + eml_file_path = pathlib.Path(eml_file) + + with eml_file_path.open('rb') as fp: raw_email = fp.read() return self.decode_email_bytes(raw_email, ignore_bad_start=ignore_bad_start) @@ -244,8 +244,7 @@ def decode_email_bytes(self, eml_file: bytes, ignore_bad_start: bool = False) -> return self.parse_email() def parse_email(self) -> dict: - """Parse an e-mail and return a dictionary containing the various parts of\ - the e-mail broken down into key-value pairs. + """Parse an e-mail and return a dictionary containing the various parts of the e-mail broken down into key-value pairs. Returns: dict: A dictionary with the content of the EML parsed and broken down into @@ -307,7 +306,7 @@ def parse_email(self) -> dict: headers_struc['from'] = msg_header_field else: - headers_struc['from'] = typing.cast(typing.Tuple[str, str], from_)[1] + headers_struc['from'] = from_[1] # parse and decode "to" headers_struc['to'] = self.headeremail2list('to') @@ -681,8 +680,8 @@ def string_sliding_window_loop(body: str, slice_step: int = 500, max_distance: i ptr_start = 0 for ptr_end in range(slice_step, body_length + slice_step, slice_step): - if ' ' in body[ptr_end - 1 : ptr_end]: - while not (eml_parser.regexes.window_slice_regex.match(body[ptr_end - 1 : ptr_end]) or ptr_end > body_length): + if ' ' in body[ptr_end - 1: ptr_end]: + while not (eml_parser.regexes.window_slice_regex.match(body[ptr_end - 1: ptr_end]) or ptr_end > body_length): if ptr_end > body_length: ptr_end = body_length break @@ -690,20 +689,20 @@ def string_sliding_window_loop(body: str, slice_step: int = 500, max_distance: i ptr_end += 1 # Found a :// near the start of the slice, rewind - if ptr_start > 16 and '://' in body[ptr_start - 8 : ptr_start + 8]: + if ptr_start > 16 and '://' in body[ptr_start - 8: ptr_start + 8]: ptr_start -= 16 # Found a :// near the end of the slice, rewind from that location - if ptr_end < body_length and '://' in body[ptr_end - 8 : ptr_end + 8]: + if ptr_end < body_length and '://' in body[ptr_end - 8: ptr_end + 8]: pos = body.rfind('://', ptr_end - 8, ptr_end + 8) ptr_end = pos - 8 # Found a :// within the slice; try to expand the slice until we find an invalid # URL character in order to avoid cutting off URLs - if '://' in body[ptr_start:ptr_end] and not body[ptr_end - 1 : ptr_end] == ' ': + if '://' in body[ptr_start:ptr_end] and not body[ptr_end - 1: ptr_end] == ' ': distance = 1 - while body[ptr_end - 1 : ptr_end] not in (' ', '>') and distance < max_distance and ptr_end <= body_length: + while body[ptr_end - 1: ptr_end] not in (' ', '>') and distance < max_distance and ptr_end <= body_length: distance += 1 ptr_end += 1 @@ -877,7 +876,7 @@ def get_raw_body_text( if msg.is_multipart(): boundary = msg.get_boundary(failobj=None) for part in msg.get_payload(): - raw_body.extend(self.get_raw_body_text(part, boundary=boundary)) + raw_body.extend(self.get_raw_body_text(typing.cast(email.message.Message, part), boundary=boundary)) else: # Treat text document attachments as belonging to the body of the mail. # Attachments with a file-extension of .htm/.html are implicitly treated @@ -921,8 +920,7 @@ def get_raw_body_text( @staticmethod def get_file_hash(data: bytes) -> typing.Dict[str, str]: - """Generate hashes of various types (``MD5``, ``SHA-1``, ``SHA-256``, ``SHA-512``)\ - for the provided data. + """Generate hashes of various types (``MD5``, ``SHA-1``, ``SHA-256``, ``SHA-512``) for the provided data. Args: data (bytes): The data to calculate the hashes on. @@ -1140,7 +1138,9 @@ def decode_email( """ warnings.warn('You are using a deprecated method, please use the EmlParser class instead.', DeprecationWarning) - with open(eml_file, 'rb') as fp: + eml_file_path = pathlib.Path(eml_file) + + with eml_file_path.open('rb') as fp: raw_email = fp.read() return decode_email_b( @@ -1194,7 +1194,7 @@ def decode_email_b( email_force_tld (bool, optional): Only match e-mail addresses with a TLD. I.e exclude something like john@doe. By default this is disabled. - parse_attachments (bool, optional): Set this to false if you want to disable the parsing of attachments. + parse_attachments (bool, optional): Set this to false if you want to disable the parsing of attachments. Please note that HTML attachments as well as other text data marked to be in-lined, will always be parsed. diff --git a/eml_parser/routing.py b/eml_parser/routing.py index 48515df..410d996 100644 --- a/eml_parser/routing.py +++ b/eml_parser/routing.py @@ -63,8 +63,7 @@ def get_domain_ip(line: str) -> typing.List[str]: def parserouting(line: str) -> typing.Dict[str, typing.Any]: - """This method tries to parsed a e-mail header received line\ - and extract machine readable information. + """This method tries to parsed a e-mail header received line and extract machine readable information. Note that there are a large number of formats for these lines and a lot of weird ones which are not commonly used.