Skip to content

Commit

Permalink
Fix MFT record storage and CSV writer initialization
Browse files Browse the repository at this point in the history
- Changed mft_records from list to dictionary to prevent index out of range errors
- Added initialization method for CSV writer
- Implemented checks to ensure CSV writer is available before writing
- Updated related methods to work with the new mft_records dictionary
  • Loading branch information
rowingdude committed Sep 4, 2024
1 parent a965cf9 commit fbe96e1
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 42 deletions.
1 change: 1 addition & 0 deletions src/analyzeMFT/file_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import asyncio
from typing import List, Dict, Any
from .mft_record import MftRecord
from .constants import *

class FileWriters:
@staticmethod
Expand Down
87 changes: 46 additions & 41 deletions src/analyzeMFT/mft_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Dict, Set, List, Optional, Any
from .constants import *
from .mft_record import MftRecord
from .file_writers import FileWriters

class MftAnalyzer:

Expand All @@ -15,10 +16,12 @@ def __init__(self, mft_file: str, output_file: str, debug: bool = False, compute
self.debug = debug
self.compute_hashes = compute_hashes
self.export_format = export_format
self.mft_records = []
self.mft_records = {}
self.interrupt_flag = asyncio.Event()
self.csv_writer = None

self.csvfile = None
self.csv_writer = None

self.stats = {
'total_records': 0,
'active_records': 0,
Expand All @@ -33,20 +36,23 @@ def __init__(self, mft_file: str, output_file: str, debug: bool = False, compute
'unique_crc32': set(),
})


async def analyze(self) -> None:
try:
self.initialize_csv_writer()
await self.process_mft()
await self.write_output()
except Exception as e:
print(f"An unexpected error occurred: {e}")
if self.debug:
traceback.print_exc()
finally:
if self.csvfile:
self.csvfile.close()
self.print_statistics()


async def process_mft(self) -> None:
print(f"Processing MFT file: {self.mft_file}")
try:
with open(self.mft_file, 'rb') as f:
while not self.interrupt_flag.is_set():
Expand All @@ -56,40 +62,28 @@ async def process_mft(self) -> None:

try:
record = MftRecord(raw_record, self.compute_hashes)

self.stats['total_records'] += 1
if record.flags & FILE_RECORD_IN_USE:
self.stats['active_records'] += 1
if record.flags & FILE_RECORD_IS_DIRECTORY:
self.stats['directories'] += 1
else:
self.stats['files'] += 1

if self.compute_hashes:
self.stats['unique_md5'].add(record.md5)
self.stats['unique_sha256'].add(record.sha256)
self.stats['unique_sha512'].add(record.sha512)
self.stats['unique_crc32'].add(record.crc32)

if self.debug:
print(f"Processing record {self.stats['total_records']}: {record.filename}")


# Store the record (no change needed here, it will work with a dict)
self.mft_records[record.recordnum] = record

if self.stats['total_records'] % 10000 == 0:
print(f"Processed {self.stats['total_records']} records...")

# Write to CSV in blocks of 1000 records
if self.stats['total_records'] % 1000 == 0:
await self.write_csv_block()
self.mft_records.clear() # Clear processed records to save memory

except Exception as e:
if self.debug:
print(f"Error processing record {self.stats['total_records']}: {str(e)}")
print(f"Error processing record {self.stats['total_records']}: {str(e)}")
continue

except Exception as e:
print(f"Error reading MFT file: {str(e)}")
if self.debug:
traceback.print_exc()
traceback.print_exc()

print(f"MFT processing complete. Total records processed: {self.stats['total_records']}")

def handle_interrupt(self) -> None:
if sys.platform == "win32":
Expand All @@ -111,27 +105,37 @@ def unix_handler():
getattr(signal, signame),
unix_handler)

def initialize_csv_writer(self):
if self.csvfile is None:
self.csvfile = open(self.output_file, 'w', newline='', encoding='utf-8')
self.csv_writer = csv.writer(self.csvfile)
self.csv_writer.writerow(CSV_HEADER)

async def write_csv_block(self) -> None:
print(f"Writing CSV block. Records in block: {len(self.mft_records)}")
try:
if self.csv_writer is None:
self.initialize_csv_writer()

for record in self.mft_records.values():
filepath = self.build_filepath(record)
csv_row = record.to_csv()
csv_row[-1] = filepath # Replace the filepath placeholder

csv_row = [str(item) for item in csv_row]

try:
filepath = self.build_filepath(record)
csv_row = record.to_csv()
csv_row[-1] = filepath # Replace the filepath placeholder

csv_row = [str(item) for item in csv_row]

self.csv_writer.writerow(csv_row)
except UnicodeEncodeError as e:
except Exception as e:
print(f"Error writing record {record.recordnum}: {str(e)}")
self.csv_writer.writerow([item.encode('utf-8', errors='replace').decode('utf-8') for item in csv_row])

await asyncio.sleep(0) # Yield control to allow other tasks to run
traceback.print_exc()

if self.csvfile:
self.csvfile.flush() # Ensure data is written to disk
print(f"CSV block written. Current file size: {self.csvfile.tell() if self.csvfile else 0} bytes")
except Exception as e:
print(f"Error writing CSV block: {str(e)}")
if self.debug:
traceback.print_exc()
print(f"Error in write_csv_block: {str(e)}")
traceback.print_exc()


async def write_remaining_records(self) -> None:
Expand Down Expand Up @@ -184,13 +188,14 @@ def print_statistics(self) -> None:


async def write_output(self) -> None:
print(f"Writing output in {self.export_format} format to {self.output_file}")
if self.export_format == "csv":
await FileWriters.write_csv(self.mft_records, self.output_file)
await self.write_remaining_records()
elif self.export_format == "json":
await FileWriters.write_json(self.mft_records, self.output_file)
await FileWriters.write_json(list(self.mft_records.values()), self.output_file)
elif self.export_format == "xml":
await FileWriters.write_xml(self.mft_records, self.output_file)
await FileWriters.write_xml(list(self.mft_records.values()), self.output_file)
elif self.export_format == "excel":
await FileWriters.write_excel(self.mft_records, self.output_file)
await FileWriters.write_excel(list(self.mft_records.values()), self.output_file)
else:
print(f"Unsupported export format: {self.export_format}")
2 changes: 1 addition & 1 deletion src/analyzeMFT/mft_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import zlib
from .constants import *
from .windows_time import WindowsTime
from typing import Dict, Set, List, Optional, Any
from typing import Dict, Set, List, Optional, Any, Union


class MftRecord:
Expand Down

0 comments on commit fbe96e1

Please sign in to comment.