diff --git a/src/analyzeMFT/file_writers.py b/src/analyzeMFT/file_writers.py index fc840a8..99d177a 100644 --- a/src/analyzeMFT/file_writers.py +++ b/src/analyzeMFT/file_writers.py @@ -4,6 +4,7 @@ import asyncio from typing import List, Dict, Any from .mft_record import MftRecord +from .constants import * class FileWriters: @staticmethod diff --git a/src/analyzeMFT/mft_analyzer.py b/src/analyzeMFT/mft_analyzer.py index 55dc136..3409e25 100644 --- a/src/analyzeMFT/mft_analyzer.py +++ b/src/analyzeMFT/mft_analyzer.py @@ -6,6 +6,7 @@ from typing import Dict, Set, List, Optional, Any from .constants import * from .mft_record import MftRecord +from .file_writers import FileWriters class MftAnalyzer: @@ -15,10 +16,12 @@ def __init__(self, mft_file: str, output_file: str, debug: bool = False, compute self.debug = debug self.compute_hashes = compute_hashes self.export_format = export_format - self.mft_records = [] + self.mft_records = {} self.interrupt_flag = asyncio.Event() - self.csv_writer = None + self.csvfile = None + self.csv_writer = None + self.stats = { 'total_records': 0, 'active_records': 0, @@ -33,9 +36,9 @@ def __init__(self, mft_file: str, output_file: str, debug: bool = False, compute 'unique_crc32': set(), }) - async def analyze(self) -> None: try: + self.initialize_csv_writer() await self.process_mft() await self.write_output() except Exception as e: @@ -43,10 +46,13 @@ async def analyze(self) -> None: if self.debug: traceback.print_exc() finally: + if self.csvfile: + self.csvfile.close() self.print_statistics() async def process_mft(self) -> None: + print(f"Processing MFT file: {self.mft_file}") try: with open(self.mft_file, 'rb') as f: while not self.interrupt_flag.is_set(): @@ -56,40 +62,28 @@ async def process_mft(self) -> None: try: record = MftRecord(raw_record, self.compute_hashes) - self.stats['total_records'] += 1 - if record.flags & FILE_RECORD_IN_USE: - self.stats['active_records'] += 1 - if record.flags & FILE_RECORD_IS_DIRECTORY: - self.stats['directories'] += 1 - else: - self.stats['files'] += 1 - - if self.compute_hashes: - self.stats['unique_md5'].add(record.md5) - self.stats['unique_sha256'].add(record.sha256) - self.stats['unique_sha512'].add(record.sha512) - self.stats['unique_crc32'].add(record.crc32) - - if self.debug: - print(f"Processing record {self.stats['total_records']}: {record.filename}") - + + # Store the record (no change needed here, it will work with a dict) self.mft_records[record.recordnum] = record + if self.stats['total_records'] % 10000 == 0: + print(f"Processed {self.stats['total_records']} records...") + # Write to CSV in blocks of 1000 records if self.stats['total_records'] % 1000 == 0: await self.write_csv_block() self.mft_records.clear() # Clear processed records to save memory except Exception as e: - if self.debug: - print(f"Error processing record {self.stats['total_records']}: {str(e)}") + print(f"Error processing record {self.stats['total_records']}: {str(e)}") continue except Exception as e: print(f"Error reading MFT file: {str(e)}") - if self.debug: - traceback.print_exc() + traceback.print_exc() + + print(f"MFT processing complete. Total records processed: {self.stats['total_records']}") def handle_interrupt(self) -> None: if sys.platform == "win32": @@ -111,27 +105,37 @@ def unix_handler(): getattr(signal, signame), unix_handler) + def initialize_csv_writer(self): + if self.csvfile is None: + self.csvfile = open(self.output_file, 'w', newline='', encoding='utf-8') + self.csv_writer = csv.writer(self.csvfile) + self.csv_writer.writerow(CSV_HEADER) + async def write_csv_block(self) -> None: + print(f"Writing CSV block. Records in block: {len(self.mft_records)}") try: + if self.csv_writer is None: + self.initialize_csv_writer() + for record in self.mft_records.values(): - filepath = self.build_filepath(record) - csv_row = record.to_csv() - csv_row[-1] = filepath # Replace the filepath placeholder - - csv_row = [str(item) for item in csv_row] - try: + filepath = self.build_filepath(record) + csv_row = record.to_csv() + csv_row[-1] = filepath # Replace the filepath placeholder + + csv_row = [str(item) for item in csv_row] + self.csv_writer.writerow(csv_row) - except UnicodeEncodeError as e: + except Exception as e: print(f"Error writing record {record.recordnum}: {str(e)}") - self.csv_writer.writerow([item.encode('utf-8', errors='replace').decode('utf-8') for item in csv_row]) - - await asyncio.sleep(0) # Yield control to allow other tasks to run + traceback.print_exc() + if self.csvfile: + self.csvfile.flush() # Ensure data is written to disk + print(f"CSV block written. Current file size: {self.csvfile.tell() if self.csvfile else 0} bytes") except Exception as e: - print(f"Error writing CSV block: {str(e)}") - if self.debug: - traceback.print_exc() + print(f"Error in write_csv_block: {str(e)}") + traceback.print_exc() async def write_remaining_records(self) -> None: @@ -184,13 +188,14 @@ def print_statistics(self) -> None: async def write_output(self) -> None: + print(f"Writing output in {self.export_format} format to {self.output_file}") if self.export_format == "csv": - await FileWriters.write_csv(self.mft_records, self.output_file) + await self.write_remaining_records() elif self.export_format == "json": - await FileWriters.write_json(self.mft_records, self.output_file) + await FileWriters.write_json(list(self.mft_records.values()), self.output_file) elif self.export_format == "xml": - await FileWriters.write_xml(self.mft_records, self.output_file) + await FileWriters.write_xml(list(self.mft_records.values()), self.output_file) elif self.export_format == "excel": - await FileWriters.write_excel(self.mft_records, self.output_file) + await FileWriters.write_excel(list(self.mft_records.values()), self.output_file) else: print(f"Unsupported export format: {self.export_format}") \ No newline at end of file diff --git a/src/analyzeMFT/mft_record.py b/src/analyzeMFT/mft_record.py index 0f1370c..1df7d0e 100644 --- a/src/analyzeMFT/mft_record.py +++ b/src/analyzeMFT/mft_record.py @@ -4,7 +4,7 @@ import zlib from .constants import * from .windows_time import WindowsTime -from typing import Dict, Set, List, Optional, Any +from typing import Dict, Set, List, Optional, Any, Union class MftRecord: