diff --git a/doc/source/standard_scripts/index.rst b/doc/source/standard_scripts/index.rst index f38e98c..1069b8c 100644 --- a/doc/source/standard_scripts/index.rst +++ b/doc/source/standard_scripts/index.rst @@ -21,12 +21,19 @@ to: from port.platforms.instagram import process Available platforms -------------------- +=================== -.. automodule:: port.platforms.chatgpt +ChatGPT +------- +.. automodule:: port.platforms.chatgpt Instagram --------- .. automodule:: port.platforms.instagram + +TikTok +--------- + +.. automodule:: port.platforms.tiktok diff --git a/src/framework/processing/py/port/helpers/extraction_helpers.py b/src/framework/processing/py/port/helpers/extraction_helpers.py index 8a9df8c..af458b7 100644 --- a/src/framework/processing/py/port/helpers/extraction_helpers.py +++ b/src/framework/processing/py/port/helpers/extraction_helpers.py @@ -249,7 +249,7 @@ def epoch_to_iso(epoch_timestamp: str | int | float) -> str: epoch_timestamp = int(float(epoch_timestamp)) out = datetime.fromtimestamp(epoch_timestamp, tz=timezone.utc).isoformat() except (OverflowError, OSError, ValueError, TypeError) as e: - logger.error("Could not convert epoch time timestamp, %s", e) + logger.debug("Could not convert epoch time timestamp, %s", e) return out diff --git a/src/framework/processing/py/port/platforms/chatgpt.py b/src/framework/processing/py/port/platforms/chatgpt.py index 7fb23d7..c30d58c 100644 --- a/src/framework/processing/py/port/platforms/chatgpt.py +++ b/src/framework/processing/py/port/platforms/chatgpt.py @@ -2,6 +2,8 @@ ChatGPT This module contains an example flow of a ChatGPT data donation study + +To see what type of DDPs from ChatGPT it is designed for check DDP_CATEGORIES """ import logging @@ -9,9 +11,7 @@ import port.api.props as props import port.helpers.extraction_helpers as eh -import port.helpers.port_helpers as ph -import port.helpers.validate as validate - +from port.platforms.flow_builder import DataDonationFlow from port.helpers.validate import ( DDPCategory, DDPFiletype, @@ -104,73 +104,8 @@ def extraction_fun(chatgpt_zip: str) -> list[props.PropsUIPromptConsentFormTable -# TEXTS -SUBMIT_FILE_HEADER = props.Translatable({ - "en": "Select your ChatGPT file", - "nl": "Selecteer uw ChatGPT bestand" -}) - -REVIEW_DATA_HEADER = props.Translatable({ - "en": "Your ChatGPT data", - "nl": "Uw ChatGPT gegevens" -}) - -RETRY_HEADER = props.Translatable({ - "en": "Try again", - "nl": "Probeer opnieuw" -}) - -REVIEW_DATA_DESCRIPTION = props.Translatable({ - "en": "Below you will find a currated selection of ChatGPT data. In this case only the conversations you had with ChatGPT are show on screen. The data represented in this way are much more insightfull because you can actually read back the conversations you had with ChatGPT", - "nl": "Below you will find a currated selection of ChatGPT data. In this case only the conversations you had with ChatGPT are show on screen. The data represented in this way are much more insightfull because you can actually read back the conversations you had with ChatGPT", -}) - - -#def process(session_id: int): -# platform_name = "ChatGPT" -# -# table_list = None -# while True: -# logger.info("Prompt for file for %s", platform_name) -# -# file_prompt = ph.generate_file_prompt("application/zip") -# file_result = yield ph.render_page(SUBMIT_FILE_HEADER, file_prompt) -# -# if file_result.__type__ == "PayloadString": -# validation = validate.validate_zip(DDP_CATEGORIES, file_result.value) -# -# # Happy flow: Valid DDP -# if validation.get_status_code_id() == 0: -# logger.info("Payload for %s", platform_name) -# extraction_result = extraction(file_result.value) -# table_list = extraction_result -# break -# -# # Enter retry flow, reason: if DDP was not a ChatGPT DDP -# if validation.get_status_code_id() != 0: -# logger.info("Not a valid %s zip; No payload; prompt retry_confirmation", platform_name) -# retry_prompt = ph.generate_retry_prompt(platform_name) -# retry_result = yield ph.render_page(RETRY_HEADER, retry_prompt) -# -# if retry_result.__type__ == "PayloadTrue": -# continue -# else: -# logger.info("Skipped during retry flow") -# break -# -# else: -# logger.info("Skipped at file selection ending flow") -# break -# -# if table_list is not None: -# logger.info("Prompt consent; %s", platform_name) -# review_data_prompt = ph.generate_review_data_prompt(f"{session_id}-chatgpt", REVIEW_DATA_DESCRIPTION, table_list) -# yield ph.render_page(REVIEW_DATA_HEADER, review_data_prompt) -# -# yield ph.exit(0, "Success") -# yield ph.render_end_page() - -texts = { + +TEXTS = { "submit_file_header": props.Translatable({ "en": "Select your ChatGPT file", "nl": "Selecteer uw ChatGPT bestand" @@ -189,16 +124,20 @@ def extraction_fun(chatgpt_zip: str) -> list[props.PropsUIPromptConsentFormTable }), } +FUNCTIONS = { + "extraction": extraction_fun +} + -from port.platforms.flow_builder import DataDonationFlow def process(session_id: int): flow = DataDonationFlow( - platform_name="ChatGPT", + platform_name="ChatGPT", ddp_categories=DDP_CATEGORIES, - texts=texts, - extraction_fun=extraction_fun, + texts=TEXTS, + functions=FUNCTIONS, session_id=session_id, + is_donate_logs=False, ) yield from flow.initialize_default_flow().run() diff --git a/src/framework/processing/py/port/platforms/flow_builder.py b/src/framework/processing/py/port/platforms/flow_builder.py index cfda282..1c20b43 100644 --- a/src/framework/processing/py/port/platforms/flow_builder.py +++ b/src/framework/processing/py/port/platforms/flow_builder.py @@ -1,27 +1,67 @@ """ Flow Builder -This module contains an example flow of a ChatGPT data donation study +This module contains tools to create data donation flows """ + import logging +import json +import io import port.helpers.port_helpers as ph import port.helpers.validate as validate logger = logging.getLogger(__name__) +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s --- %(name)s --- %(levelname)s --- %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S%z", +) + + +def should_yield(func): + func.is_yieldable = True + return func + + +def is_yieldable(func): + return getattr(func, 'is_yieldable', False) class DataDonationFlow: - def __init__(self, platform_name, ddp_categories, texts, extraction_fun, session_id): - self.name = platform_name, + def __init__(self, platform_name, ddp_categories, texts, functions, session_id, is_donate_logs): + self.name = platform_name self.ddp_categories = ddp_categories self.texts = texts - self.extraction = extraction_fun + self.functions = functions self.session_id = session_id + self.is_donate_logs = is_donate_logs + self.log_stream = io.StringIO() self.steps = [] - - def set_session_id(self, session_id): - self.session_id = session_id + self._configure_logger() + + def _configure_logger(self): + if self.is_donate_logs: + handler_stream = self.log_stream + logger.handlers = [] # clear handler + handler = logging.StreamHandler(handler_stream) + handler.setLevel(logging.INFO) + handler.setFormatter( + logging.Formatter( + fmt="%(asctime)s --- %(name)s --- %(levelname)s --- %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S%z" + ) + ) + logger.addHandler(handler) + + def donate_logs(self): + log_string = self.log_stream.getvalue() + if log_string: + log_data = log_string.split("\n") + else: + log_data = ["no logs"] + + return ph.donate(f"{self.session_id}-tracking.json", json.dumps(log_data)) def add_step(self, step_function): self.steps.append(step_function) @@ -29,21 +69,29 @@ def add_step(self, step_function): def initialize_default_flow(self): self.add_step(prompt_file_and_validate_input) - self.add_step(extract_and_review_data) + self.add_step(extract_data) + self.add_step(review_data) self.add_step(exit_flow) return self def run(self): logger.info("Starting data donation flow for %s", self.name) - + print(self.name) + if self.is_donate_logs: + yield self.donate_logs() + data = None for step in self.steps: - data = yield from step(self, data) - - logger.info("Flow completed %s", self.name) - return data + if is_yieldable(step): + data = yield from step(self, data) + else: + data = step(self, data) + + if self.is_donate_logs: + yield self.donate_logs() +@should_yield def prompt_file_and_validate_input(flow, _): logger.info("Prompt for file step for %s", flow.name) ddp_zip = None @@ -76,8 +124,13 @@ def prompt_file_and_validate_input(flow, _): return ddp_zip -def extract_and_review_data(flow, zip): - table_list = flow.extraction(zip) +def extract_data(flow, zip): + table_list = flow.functions["extraction"](zip) + return table_list + + +@should_yield +def review_data(flow, table_list): if table_list != None: logger.info("Ask participant to review data; %s", flow.name) review_data_prompt = ph.generate_review_data_prompt(f"{flow.session_id}-chatgpt", flow.texts["review_data_description"], table_list) @@ -86,7 +139,7 @@ def extract_and_review_data(flow, zip): logger.info("No data got extracted %s", flow.name) +@should_yield def exit_flow(_, __): yield ph.exit(0, "Success") yield ph.render_end_page() - diff --git a/src/framework/processing/py/port/platforms/instagram.py b/src/framework/processing/py/port/platforms/instagram.py index f2a9251..e8202bb 100644 --- a/src/framework/processing/py/port/platforms/instagram.py +++ b/src/framework/processing/py/port/platforms/instagram.py @@ -2,16 +2,17 @@ Instagram This module contains an example flow of a Instagram data donation study + +To see what type of DDPs from Instagram it is designed for check DDP_CATEGORIES """ + import logging import pandas as pd import port.api.props as props import port.helpers.extraction_helpers as eh -import port.helpers.port_helpers as ph -import port.helpers.validate as validate - +from port.platforms.flow_builder import DataDonationFlow from port.helpers.validate import ( DDPCategory, DDPFiletype, @@ -341,7 +342,7 @@ def liked_posts_to_df(instagram_zip: str) -> pd.DataFrame: -def extraction(instagram_zip: str) -> list[props.PropsUIPromptConsentFormTable]: +def extraction_fun(instagram_zip: str) -> list[props.PropsUIPromptConsentFormTable]: tables_to_render = [] df = posts_viewed_to_df(instagram_zip) @@ -546,68 +547,39 @@ def extraction(instagram_zip: str) -> list[props.PropsUIPromptConsentFormTable]: return tables_to_render -# TEXTS -SUBMIT_FILE_HEADER = props.Translatable({ - "en": "Select your Instagram file", - "nl": "Selecteer uw Instagram bestand" -}) - -REVIEW_DATA_HEADER = props.Translatable({ - "en": "Your Instagram data", - "nl": "Uw Instagram gegevens" -}) - -RETRY_HEADER = props.Translatable({ - "en": "Try again", - "nl": "Probeer opnieuw" -}) - -REVIEW_DATA_DESCRIPTION = props.Translatable({ - "en": "Below you will find a currated selection of Instagram data.", - "nl": "Below you will find a currated selection of Instagram data.", -}) - +# Configurables for the data donation flow + +TEXTS = { + "submit_file_header": props.Translatable({ + "en": "Select your Instagram file", + "nl": "Selecteer uw Instagram bestand" + }), + "review_data_header": props.Translatable({ + "en": "Your Instagram data", + "nl": "Uw Instagram gegevens" + }), + "retry_header": props.Translatable({ + "en": "Try again", + "nl": "Probeer opnieuw" + }), + "review_data_description": props.Translatable({ + "en": "Below you will find a currated selection of Instagram data.", + "nl": "Below you will find a currated selection of Instagram data.", + }), +} + +FUNCTIONS = { + "extraction": extraction_fun +} def process(session_id: int): - platform_name = "Instagram" - - table_list = None - while True: - logger.info("Prompt for file for %s", platform_name) - - file_prompt = ph.generate_file_prompt("application/zip") - file_result = yield ph.render_page(SUBMIT_FILE_HEADER, file_prompt) - - if file_result.__type__ == "PayloadString": - validation = validate.validate_zip(DDP_CATEGORIES, file_result.value) - - # Happy flow: Valid DDP - if validation.get_status_code_id() == 0: - logger.info("Payload for %s", platform_name) - extraction_result = extraction(file_result.value) - table_list = extraction_result - break - - # Enter retry flow, reason: if DDP was not a Instagram DDP - if validation.get_status_code_id() != 0: - logger.info("Not a valid %s zip; No payload; prompt retry_confirmation", platform_name) - retry_prompt = ph.generate_retry_prompt(platform_name) - retry_result = yield ph.render_page(RETRY_HEADER, retry_prompt) - - if retry_result.__type__ == "PayloadTrue": - continue - else: - logger.info("Skipped during retry flow") - break - - else: - logger.info("Skipped at file selection ending flow") - break - - if table_list is not None: - logger.info("Prompt consent; %s", platform_name) - review_data_prompt = ph.generate_review_data_prompt(f"{session_id}-instagram", REVIEW_DATA_DESCRIPTION, table_list) - yield ph.render_page(REVIEW_DATA_HEADER, review_data_prompt) + flow = DataDonationFlow( + platform_name="Instagram", + ddp_categories=DDP_CATEGORIES, + texts=TEXTS, + functions=FUNCTIONS, + session_id=session_id, + is_donate_logs=False, + ) - yield ph.exit(0, "Success") - yield ph.render_end_page() + yield from flow.initialize_default_flow().run() diff --git a/src/framework/processing/py/port/platforms/tiktok.py b/src/framework/processing/py/port/platforms/tiktok.py index 53df8f6..a9b55a0 100644 --- a/src/framework/processing/py/port/platforms/tiktok.py +++ b/src/framework/processing/py/port/platforms/tiktok.py @@ -1,22 +1,21 @@ """ TikTok -This module contains an example flow of a TikTok data donation study +This module contains an example flow of a TikTok data donation study. + +To see what type of DDPs from TikTok it is designed for check DDP_CATEGORIES """ from typing import Dict import logging import io import re -import re import pandas as pd import port.api.props as props import port.helpers.extraction_helpers as eh -import port.helpers.port_helpers as ph -import port.helpers.validate as validate - +from port.platforms.flow_builder import DataDonationFlow from port.helpers.validate import ( DDPCategory, DDPFiletype, @@ -280,7 +279,7 @@ def settings_to_df(tiktok_zip: str): -def extraction(tiktok_zip: str) -> list[props.PropsUIPromptConsentFormTable]: +def extraction_fun(tiktok_zip: str) -> list[props.PropsUIPromptConsentFormTable]: tables_to_render = [] data = browsing_history_to_df(tiktok_zip) @@ -409,68 +408,38 @@ def extraction(tiktok_zip: str) -> list[props.PropsUIPromptConsentFormTable]: return tables_to_render -# TEXTS -SUBMIT_FILE_HEADER = props.Translatable({ - "en": "Select your TikTok file", - "nl": "Selecteer uw TikTok bestand" -}) - -REVIEW_DATA_HEADER = props.Translatable({ - "en": "Your TikTok data", - "nl": "Uw TikTok gegevens" -}) - -RETRY_HEADER = props.Translatable({ - "en": "Try again", - "nl": "Probeer opnieuw" -}) - -REVIEW_DATA_DESCRIPTION = props.Translatable({ - "en": "Below you will find a selection of your TikTok data.", - "nl": "Hieronder vindt u een geselecteerde weergave van uw TikTok-gegevens.", -}) +TEXTS = { + "submit_file_header": props.Translatable({ + "en": "Select your TikTok file", + "nl": "Selecteer uw TikTok bestand" + }), + "review_data_header": props.Translatable({ + "en": "Your TikTok data", + "nl": "Uw TikTok gegevens" + }), + "retry_header": props.Translatable({ + "en": "Try again", + "nl": "Probeer opnieuw" + }), + "review_data_description": props.Translatable({ + "en": "Below you will find a selection of your TikTok data.", + "nl": "Hieronder vindt u een geselecteerde weergave van uw TikTok-gegevens.", + }), +} + +FUNCTIONS = { + "extraction": extraction_fun +} def process(session_id: int): - platform_name = "TikTok" - - table_list = None - while True: - logger.info("Prompt for file for %s", platform_name) - - file_prompt = ph.generate_file_prompt("application/zip") - file_result = yield ph.render_page(SUBMIT_FILE_HEADER, file_prompt) - - if file_result.__type__ == "PayloadString": - validation = validate.validate_zip(DDP_CATEGORIES, file_result.value) - - # Happy flow: Valid DDP - if validation.get_status_code_id() == 0: - logger.info("Payload for %s", platform_name) - extraction_result = extraction(file_result.value) - table_list = extraction_result - break - - # Enter retry flow, reason: if DDP was not a Valid DDP - if validation.get_status_code_id() != 0: - logger.info("Not a valid %s zip; No payload; prompt retry_confirmation", platform_name) - retry_prompt = ph.generate_retry_prompt(platform_name) - retry_result = yield ph.render_page(RETRY_HEADER, retry_prompt) - - if retry_result.__type__ == "PayloadTrue": - continue - else: - logger.info("Skipped during retry flow") - break - - else: - logger.info("Skipped at file selection ending flow") - break - - if table_list is not None: - logger.info("Prompt consent; %s", platform_name) - review_data_prompt = ph.generate_review_data_prompt(f"{session_id}-tiktok", REVIEW_DATA_DESCRIPTION, table_list) - yield ph.render_page(REVIEW_DATA_HEADER, review_data_prompt) - - yield ph.exit(0, "Success") - yield ph.render_end_page() + flow = DataDonationFlow( + platform_name="TikTok", + ddp_categories=DDP_CATEGORIES, + texts=TEXTS, + functions=FUNCTIONS, + session_id=session_id, + is_donate_logs=False, + ) + + yield from flow.initialize_default_flow().run()