-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
77,767 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
attachments/ | ||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Use an official Ubuntu image as a parent image | ||
FROM ubuntu:20.04 | ||
|
||
# Set the working directory in the container | ||
WORKDIR /app | ||
|
||
# Update the package repository and install Python 3 | ||
RUN apt-get update && apt-get install -y python3 | ||
|
||
# Copy the current directory contents into the container at /app | ||
|
||
COPY main.py mbox_parser.py requirements.txt /app/ | ||
|
||
RUN mkdir /app/target_files | ||
|
||
# Install any needed Python packages specified in requirements.txt for your Python application | ||
RUN apt-get install -y python3-pip && pip3 install -r requirements.txt | ||
|
||
# Install "readpst" in the container | ||
RUN apt-get install -y pst-utils | ||
|
||
# Run the Python script (replace "main.py" with the actual entry point of your Python application) | ||
CMD ["python3", "main.py"] |
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
version: '3' | ||
services: | ||
myapp: | ||
image: my-python-app:latest | ||
volumes: | ||
- target_files:/app/target_files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import os | ||
from mbox_parser import parse_mbox_file | ||
|
||
|
||
def list_mbox_files(target_dir): | ||
mbox_files = [] | ||
for root, _, files in os.walk(target_dir): | ||
for file in files: | ||
if file.endswith(".mbox"): | ||
mbox_files.append(file) | ||
return mbox_files | ||
|
||
|
||
if __name__ == '__main__': | ||
# Specify the directory containing .mbox files | ||
target_dir = "target_files" # This should match the volume mount point in Docker | ||
|
||
mbox_file_list = list_mbox_files(target_dir) | ||
print("List of .mbox files:", mbox_file_list) | ||
for mbox_file in mbox_file_list: | ||
print("\n#################################################################") | ||
print(f'working on {mbox_file}') | ||
data = parse_mbox_file(f'target_files/{mbox_file}') | ||
print("#################################################################\n") | ||
print(data) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import mailbox | ||
import email.utils | ||
import os | ||
|
||
|
||
def parse_mbox_file(mbox_file): | ||
data = [] | ||
mbox = mailbox.mbox(mbox_file) | ||
for message in mbox: | ||
# Extract email details | ||
subject = message['subject'] | ||
sender_info = message['from'] | ||
date = message['date'] | ||
receiver_info = message['to'] | ||
sender_name, sender_email = email.utils.parseaddr(sender_info) | ||
receiver_name, receiver_email = email.utils.parseaddr(receiver_info) | ||
|
||
for part in message.walk(): | ||
if part.get_content_maintype() == 'multipart': | ||
continue | ||
if part.get('Content-Disposition') is None: | ||
continue | ||
|
||
attachment_name = part.get_filename() | ||
content_type = part.get_content_type() | ||
attachment_data = part.get_payload(decode=True) | ||
|
||
if attachment_data: | ||
save_dir = 'attachments' | ||
if not os.path.exists(save_dir): | ||
os.makedirs(save_dir) | ||
attachment_path = os.path.join(save_dir, attachment_name) | ||
|
||
with open(attachment_path, 'wb') as file: | ||
file.write(attachment_data) | ||
|
||
data.append({ | ||
'subject': subject, | ||
'sender_name': sender_name, | ||
'seder_email': sender_email, | ||
'receiver_name': receiver_name, | ||
'receiver_email,': receiver_email, | ||
'date': date, | ||
'attachment name': attachment_name, | ||
'content type': content_type | ||
}) | ||
return data | ||
|
||
|
||
# print(parse_mbox_file('D:\Languages\Python\Email Attachments\cindyloh3333_gmail.com.mbox')) | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
�� |
Large diffs are not rendered by default.
Oops, something went wrong.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
helooooooooooooooooo |