From 8a692c7e2b6d6d767b37394ff22ccb5517f039a4 Mon Sep 17 00:00:00 2001 From: emielvdveen Date: Wed, 13 Jul 2022 16:20:19 +0200 Subject: [PATCH] Fixed support for filename matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Filename was a constant: ‘user-data’ - Filename is now copied from the original file - Parameter in the process method is no longer a BufferedReader but a string (file path). --- data_extractor/pyprocess.js | 3 ++- data_extractor/pyworker.js | 15 +++++++++++++-- data_extractor/whatsapp_chat/__init__.py | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/data_extractor/pyprocess.js b/data_extractor/pyprocess.js index 5c04e24..833ef0f 100644 --- a/data_extractor/pyprocess.js +++ b/data_extractor/pyprocess.js @@ -48,6 +48,7 @@ function process() { resultElement().style.display = "none"; const file = fileInput().files[0]; + const filename = file.name const reader = file.stream().getReader(); const sendToWorker = ({ done, value }) => { if (done) { @@ -60,7 +61,7 @@ function process() { reader.read().then(sendToWorker); }; console.log("process: send event: initData"); - pyWorker.postMessage({ eventType: "initData", size: file.size }); + pyWorker.postMessage({ eventType: "initData", filename: filename, size: file.size }); reader.read().then(sendToWorker); } diff --git a/data_extractor/pyworker.js b/data_extractor/pyworker.js index 15815de..5fb65f5 100644 --- a/data_extractor/pyworker.js +++ b/data_extractor/pyworker.js @@ -10,13 +10,15 @@ loadPyodide({ indexURL: "https://cdn.jsdelivr.net/pyodide/v0.19.0/full/" }).then }); let file = undefined +var filename = undefined onmessage = (event) => { const { eventType } = event.data; if (eventType === "loadScript") { self.pyodide.runPython(event.data.script) } else if (eventType === "initData") { - file = self.pyodide.FS.open("user-data", "w") + filename = event.data.filename + file = self.pyodide.FS.open(filename, "w") } else if (eventType === "data") { self.pyodide.FS.write(file, event.data.chunk, 0, event.data.chunk.length) } else if (eventType === "processData") { @@ -24,7 +26,15 @@ onmessage = (event) => { def _process_data(): import json import html - result = process(open("user-data", "rb")) + import pandas as pd + + result = process("${filename}") + + if not result: + data_frame = pd.DataFrame() + data_frame["Messages"] = pd.Series(["Unfortunately, no data could be extracted from the selected file."], name="Messages") + result = [{"id": "important_feedback", "title": "Important feedback", "data_frame": data_frame}] + data_output = [] html_output = [] for data in result: @@ -32,6 +42,7 @@ onmessage = (event) => { df = data['data_frame'] html_output.append(df.to_html(classes=["data-donation-extraction-results"], justify="left")) data_output.append({"id": data["id"], "data_frame": df.to_json()}) + return { "html": "\\n".join(html_output), "data": json.dumps(data_output), diff --git a/data_extractor/whatsapp_chat/__init__.py b/data_extractor/whatsapp_chat/__init__.py index b193d5b..058b1f9 100644 --- a/data_extractor/whatsapp_chat/__init__.py +++ b/data_extractor/whatsapp_chat/__init__.py @@ -693,7 +693,7 @@ def process(file_data): try: zfile = zipfile.ZipFile(file_data) except: - if FILE_RE.match(file_data.name): + if FILE_RE.match(file_data): tfile = open(file_data, encoding="utf8") chat = parse_chat(log_error, tfile.read())