Skip to content

Commit

Permalink
commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mrmazuu committed Sep 12, 2023
1 parent d5ba3d6 commit cd55ff1
Show file tree
Hide file tree
Showing 12 changed files with 77,767 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
attachments/
__pycache__/
23 changes: 23 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Use an official Ubuntu image as a parent image
FROM ubuntu:20.04

# Set the working directory in the container
WORKDIR /app

# Update the package repository and install Python 3
RUN apt-get update && apt-get install -y python3

# Copy the current directory contents into the container at /app

COPY main.py mbox_parser.py requirements.txt /app/

RUN mkdir /app/target_files

# Install any needed Python packages specified in requirements.txt for your Python application
RUN apt-get install -y python3-pip && pip3 install -r requirements.txt

# Install "readpst" in the container
RUN apt-get install -y pst-utils

# Run the Python script (replace "main.py" with the actual entry point of your Python application)
CMD ["python3", "main.py"]
Binary file added albert_meyers_000_1_1.pst
Binary file not shown.
38,827 changes: 38,827 additions & 0 deletions cindyloh3333_gmail.com.mbox

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: '3'
services:
myapp:
image: my-python-app:latest
volumes:
- target_files:/app/target_files
27 changes: 27 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import os
from mbox_parser import parse_mbox_file


def list_mbox_files(target_dir):
mbox_files = []
for root, _, files in os.walk(target_dir):
for file in files:
if file.endswith(".mbox"):
mbox_files.append(file)
return mbox_files


if __name__ == '__main__':
# Specify the directory containing .mbox files
target_dir = "target_files" # This should match the volume mount point in Docker

mbox_file_list = list_mbox_files(target_dir)
print("List of .mbox files:", mbox_file_list)
for mbox_file in mbox_file_list:
print("\n#################################################################")
print(f'working on {mbox_file}')
data = parse_mbox_file(f'target_files/{mbox_file}')
print("#################################################################\n")
print(data)


53 changes: 53 additions & 0 deletions mbox_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import mailbox
import email.utils
import os


def parse_mbox_file(mbox_file):
data = []
mbox = mailbox.mbox(mbox_file)
for message in mbox:
# Extract email details
subject = message['subject']
sender_info = message['from']
date = message['date']
receiver_info = message['to']
sender_name, sender_email = email.utils.parseaddr(sender_info)
receiver_name, receiver_email = email.utils.parseaddr(receiver_info)

for part in message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue

attachment_name = part.get_filename()
content_type = part.get_content_type()
attachment_data = part.get_payload(decode=True)

if attachment_data:
save_dir = 'attachments'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
attachment_path = os.path.join(save_dir, attachment_name)

with open(attachment_path, 'wb') as file:
file.write(attachment_data)

data.append({
'subject': subject,
'sender_name': sender_name,
'seder_email': sender_email,
'receiver_name': receiver_name,
'receiver_email,': receiver_email,
'date': date,
'attachment name': attachment_name,
'content type': content_type
})
return data


# print(parse_mbox_file('D:\Languages\Python\Email Attachments\cindyloh3333_gmail.com.mbox'))



1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
��
38,827 changes: 38,827 additions & 0 deletions target_files/cindyloh3333_gmail.com.mbox

Large diffs are not rendered by default.

Empty file added target_files/hbcvjdsbvjhvs.mbox
Empty file.
Empty file added target_files/okfkkf.mbox
Empty file.
1 change: 1 addition & 0 deletions target_files/testttttt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
helooooooooooooooooo

0 comments on commit cd55ff1

Please sign in to comment.