Skip to content

Commit

Permalink
linter fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
sim0nx committed Apr 27, 2024
1 parent 5ed8e04 commit f66ae6c
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 28 deletions.
4 changes: 1 addition & 3 deletions eml_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""eml_parser serves as a python module for parsing eml files and returning various \
information found in the e-mail as well as computed information.
"""
"""eml_parser serves as a python module for parsing eml files and returning various information found in the e-mail as well as computed information."""

from .parser import EmlParser

Expand Down
7 changes: 2 additions & 5 deletions eml_parser/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,7 @@ def decode_string(string: bytes, encoding: typing.Optional[str] = None) -> str:


def workaround_bug_27257(msg: email.message.Message, header: str) -> typing.List[str]:
"""Function to work around bug 27257 and just tries its best using \
the compat32 policy to extract any meaningful information, i.e. \
e-mail addresses.
"""Function to work around bug 27257 and just tries its best using the compat32 policy to extract any meaningful information, i.e. e-mail addresses.
Args:
msg (email.message.Message): An e-mail message object.
Expand All @@ -150,8 +148,7 @@ def workaround_bug_27257(msg: email.message.Message, header: str) -> typing.List


def workaround_field_value_parsing_errors(msg: email.message.Message, header: str) -> typing.List[str]:
"""Function to work around field value parsing errors by trying a best effort parsing using \
the compat32 policy to extract any meaningful information.
"""Function to work around field value parsing errors by trying a best effort parsing using the compat32 policy to extract any meaningful information.
Args:
msg (email.message.Message): An e-mail message object.
Expand Down
36 changes: 18 additions & 18 deletions eml_parser/parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""eml_parser serves as a python module for parsing eml files and returning various\
information found in the e-mail as well as computed information.
"""
"""eml_parser serves as a python module for parsing eml files and returning various information found in the e-mail as well as computed information."""

import base64
import binascii
Expand Down Expand Up @@ -199,7 +197,9 @@ def decode_email(self, eml_file: os.PathLike, ignore_bad_start: bool = False) ->
dict: A dictionary with the content of the EML parsed and broken down into
key-value pairs.
"""
with open(eml_file, 'rb') as fp:
eml_file_path = pathlib.Path(eml_file)

with eml_file_path.open('rb') as fp:
raw_email = fp.read()

return self.decode_email_bytes(raw_email, ignore_bad_start=ignore_bad_start)
Expand Down Expand Up @@ -244,8 +244,7 @@ def decode_email_bytes(self, eml_file: bytes, ignore_bad_start: bool = False) ->
return self.parse_email()

def parse_email(self) -> dict:
"""Parse an e-mail and return a dictionary containing the various parts of\
the e-mail broken down into key-value pairs.
"""Parse an e-mail and return a dictionary containing the various parts of the e-mail broken down into key-value pairs.
Returns:
dict: A dictionary with the content of the EML parsed and broken down into
Expand Down Expand Up @@ -307,7 +306,7 @@ def parse_email(self) -> dict:
headers_struc['from'] = msg_header_field

else:
headers_struc['from'] = typing.cast(typing.Tuple[str, str], from_)[1]
headers_struc['from'] = from_[1]

# parse and decode "to"
headers_struc['to'] = self.headeremail2list('to')
Expand Down Expand Up @@ -681,29 +680,29 @@ def string_sliding_window_loop(body: str, slice_step: int = 500, max_distance: i
ptr_start = 0

for ptr_end in range(slice_step, body_length + slice_step, slice_step):
if ' ' in body[ptr_end - 1 : ptr_end]:
while not (eml_parser.regexes.window_slice_regex.match(body[ptr_end - 1 : ptr_end]) or ptr_end > body_length):
if ' ' in body[ptr_end - 1: ptr_end]:
while not (eml_parser.regexes.window_slice_regex.match(body[ptr_end - 1: ptr_end]) or ptr_end > body_length):
if ptr_end > body_length:
ptr_end = body_length
break

ptr_end += 1

# Found a :// near the start of the slice, rewind
if ptr_start > 16 and '://' in body[ptr_start - 8 : ptr_start + 8]:
if ptr_start > 16 and '://' in body[ptr_start - 8: ptr_start + 8]:
ptr_start -= 16

# Found a :// near the end of the slice, rewind from that location
if ptr_end < body_length and '://' in body[ptr_end - 8 : ptr_end + 8]:
if ptr_end < body_length and '://' in body[ptr_end - 8: ptr_end + 8]:
pos = body.rfind('://', ptr_end - 8, ptr_end + 8)
ptr_end = pos - 8

# Found a :// within the slice; try to expand the slice until we find an invalid
# URL character in order to avoid cutting off URLs
if '://' in body[ptr_start:ptr_end] and not body[ptr_end - 1 : ptr_end] == ' ':
if '://' in body[ptr_start:ptr_end] and not body[ptr_end - 1: ptr_end] == ' ':
distance = 1

while body[ptr_end - 1 : ptr_end] not in (' ', '>') and distance < max_distance and ptr_end <= body_length:
while body[ptr_end - 1: ptr_end] not in (' ', '>') and distance < max_distance and ptr_end <= body_length:
distance += 1
ptr_end += 1

Expand Down Expand Up @@ -877,7 +876,7 @@ def get_raw_body_text(
if msg.is_multipart():
boundary = msg.get_boundary(failobj=None)
for part in msg.get_payload():
raw_body.extend(self.get_raw_body_text(part, boundary=boundary))
raw_body.extend(self.get_raw_body_text(typing.cast(email.message.Message, part), boundary=boundary))
else:
# Treat text document attachments as belonging to the body of the mail.
# Attachments with a file-extension of .htm/.html are implicitly treated
Expand Down Expand Up @@ -921,8 +920,7 @@ def get_raw_body_text(

@staticmethod
def get_file_hash(data: bytes) -> typing.Dict[str, str]:
"""Generate hashes of various types (``MD5``, ``SHA-1``, ``SHA-256``, ``SHA-512``)\
for the provided data.
"""Generate hashes of various types (``MD5``, ``SHA-1``, ``SHA-256``, ``SHA-512``) for the provided data.
Args:
data (bytes): The data to calculate the hashes on.
Expand Down Expand Up @@ -1140,7 +1138,9 @@ def decode_email(
"""
warnings.warn('You are using a deprecated method, please use the EmlParser class instead.', DeprecationWarning)

with open(eml_file, 'rb') as fp:
eml_file_path = pathlib.Path(eml_file)

with eml_file_path.open('rb') as fp:
raw_email = fp.read()

return decode_email_b(
Expand Down Expand Up @@ -1194,7 +1194,7 @@ def decode_email_b(
email_force_tld (bool, optional): Only match e-mail addresses with a TLD. I.e exclude something like
john@doe. By default this is disabled.
parse_attachments (bool, optional): Set this to false if you want to disable the parsing of attachments.
parse_attachments (bool, optional): Set this to false if you want to disable the parsing of attachments.
Please note that HTML attachments as well as other text data marked to be
in-lined, will always be parsed.
Expand Down
3 changes: 1 addition & 2 deletions eml_parser/routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ def get_domain_ip(line: str) -> typing.List[str]:


def parserouting(line: str) -> typing.Dict[str, typing.Any]:
"""This method tries to parsed a e-mail header received line\
and extract machine readable information.
"""This method tries to parsed a e-mail header received line and extract machine readable information.
Note that there are a large number of formats for these lines
and a lot of weird ones which are not commonly used.
Expand Down

0 comments on commit f66ae6c

Please sign in to comment.