diff --git a/.gitignore b/.gitignore
index 730dd86..c907b87 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,10 @@ release.zip
 
 .vscode/
 *.pyc
+.vscode/
+
+__pycache__
+
 # dependencies
 /node_modules
 /.pnp
diff --git a/public/port-0.0.0-py3-none-any.whl b/public/port-0.0.0-py3-none-any.whl
index e48400f..6032cad 100644
Binary files a/public/port-0.0.0-py3-none-any.whl and b/public/port-0.0.0-py3-none-any.whl differ
diff --git a/src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl b/src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl
index e48400f..6032cad 100644
Binary files a/src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl and b/src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl differ
diff --git a/src/framework/processing/py/port/__init__.py b/src/framework/processing/py/port/__init__.py
index 9dd0e93..d067edc 100644
--- a/src/framework/processing/py/port/__init__.py
+++ b/src/framework/processing/py/port/__init__.py
@@ -1,5 +1,3 @@
 from port.main import start
 
-__all__ = [
-  "start"
-]
+__all__ = ["start"]
diff --git a/src/framework/processing/py/port/api/props.py b/src/framework/processing/py/port/api/props.py
index adcfde2..1c73192 100644
--- a/src/framework/processing/py/port/api/props.py
+++ b/src/framework/processing/py/port/api/props.py
@@ -11,12 +11,14 @@ class Translations(TypedDict):
         en: English string to display
         nl: Dutch string to display
     """
+
     en: str
     nl: str
 
 @dataclass
 class Translatable:
-    """Wrapper class for Translations""" 
+    """Wrapper class for Translations"""
+
     translations: Translations
 
     def toDict(self):
@@ -30,6 +32,7 @@ class PropsUIHeader:
     Attributes:
         title: title of the page
     """
+
     title: Translatable
 
     def toDict(self):
@@ -46,6 +49,7 @@ class PropsUIFooter:
     Attributes:
         progressPercentage: float indicating the progress in the flow
     """
+
     progressPercentage: float
 
     def toDict(self):
@@ -59,14 +63,15 @@ def toDict(self):
 class PropsUIPromptConfirm:
     """Retry submitting a file page
 
-    Prompt the user if they want to submit a new file. 
-    This can be used in case a file could not be processed. 
+    Prompt the user if they want to submit a new file.
+    This can be used in case a file could not be processed.
 
     Attributes:
         text: message to display
         ok: message to display if the user wants to try again
         cancel: message to display if the user wants to continue regardless
     """
+
     text: Translatable
     ok: Translatable
     cancel: Translatable
@@ -173,7 +178,7 @@ def toDict(self):
 
 @dataclass
 class PropsUIPromptConsentFormTable:
-    """Table to be shown to the participant prior to donation 
+    """Table to be shown to the participant prior to donation
 
     Attributes:
         id: a unique string to itentify the table after donation
@@ -182,6 +187,7 @@ class PropsUIPromptConsentFormTable:
         editable: determines whether the table has an editable mode that can be toggled with a button
         visualizations: optional list of visualizations to be shown
     """
+
     id: str
     title: Translatable
     data_frame: pd.DataFrame
@@ -214,6 +220,7 @@ class PropsUIPromptConsentForm:
         tables: a list of tables
         meta_tables: a list of optional tables, for example for logging data
     """
+
     tables: list[PropsUIPromptConsentFormTable]
     meta_tables: list[PropsUIPromptConsentFormTable]
 
@@ -245,6 +252,7 @@ class PropsUIPromptFileInput:
         description: text with an explanation
         extensions: accepted mime types, example: "application/zip, text/plain"
     """
+
     description: Translatable
     extensions: str
 
@@ -263,6 +271,7 @@ class RadioItem(TypedDict):
         id: id of radio button
         value: text to be displayed
     """
+
     id: int
     value: str
 
@@ -278,6 +287,7 @@ class PropsUIPromptRadioInput:
         description: short description of the radio group
         items: a list of radio buttons
     """
+
     title: Translatable
     description: Translatable
     items: list[RadioItem]
@@ -398,6 +408,7 @@ class PropsUIPageDonation:
         body: main body of the page, see the individual classes for an explanation
         footer: page footer
     """
+
     platform: str
     header: PropsUIHeader
     body: PropsUIPromptRadioInput | PropsUIPromptConsentForm | PropsUIPromptFileInput | PropsUIPromptConfirm | PropsUIPromptQuestionnaire
@@ -416,6 +427,7 @@ def toDict(self):
 
 class PropsUIPageEnd:
     """An ending page to show the user they are done"""
+
     def toDict(self):
         dict = {}
         dict["__type__"] = "PropsUIPageEnd"
diff --git a/src/framework/processing/py/port/script.py b/src/framework/processing/py/port/script.py
index c8fa219..684e3a3 100644
--- a/src/framework/processing/py/port/script.py
+++ b/src/framework/processing/py/port/script.py
@@ -1,21 +1,29 @@
 import itertools
 import port.api.props as props
-from port.api.commands import (CommandSystemDonate, CommandUIRender)
+from port.api.commands import CommandSystemDonate, CommandUIRender
 
 import pandas as pd
 import zipfile
 import json
 import datetime
-from collections import defaultdict
+import fnmatch
+from collections import defaultdict, namedtuple
+from contextlib import suppress
+
 ##########################
 # TikTok file processing #
 ##########################
 
-filter_start = datetime.datetime(2021, 1, 1)
+filter_start = datetime.datetime(1990, 1, 1)
 filter_end = datetime.datetime(2025, 1, 1)
 
 datetime_format = "%Y-%m-%d %H:%M:%S"
 
+
+def parse_datetime(value):
+    return datetime.datetime.fromtimestamp(value)
+
+
 def get_in(data_dict, *key_path):
     for k in key_path:
         data_dict = data_dict.get(k, None)
@@ -23,21 +31,52 @@ def get_in(data_dict, *key_path):
             return None
     return data_dict
 
-def get_video_list_data(data):
-    return get_in(data, "Activity", "Video Browsing History", "VideoList")
+
+def get_list(data_dict, *key_path):
+    result = get_in(data_dict, *key_path)
+    if result is None:
+        return []
+    return result
+
+
+def get_dict(data_dict, *key_path):
+    result = get_in(data_dict, *key_path)
+    if result is None:
+        return {}
+    return result
+
+
+def get_string(data_dict, *key_path):
+    result = get_in(data_dict, *key_path)
+    if result is None:
+        return ""
+    return result
+
+
+def cast_number(data_dict, *key_path):
+    value = get_in(data_dict, *key_path)
+    if value is None or value == "None":
+        return 0
+    return value
+
+
+def get_activity_video_browsing_list_data(data):
+    return get_list(data, "Activity", "Video Browsing History", "VideoList")
+
 
 def get_comment_list_data(data):
     return get_in(data, "Comment", "Comments", "CommentsList")
 
-def get_date_filtered_items(items):
-    for item in items:
-        timestamp =datetime.datetime.strptime(item["Date"], datetime_format)
+
+def filter_timestamps(timestamps):
+    for timestamp in timestamps:
         if timestamp < filter_start or timestamp > filter_end:
             continue
-        yield (timestamp, item)
+        yield timestamp
+
 
 def get_count_by_date_key(timestamps, key_func):
-    """ Returns a list of tuples of the form (key, count)
+    """Returns a dict of the form (key, count)
 
     The key is determined by the key_func, which takes a datetime object and
     returns an object suitable for sorting and usage as a dictionary key.
@@ -49,17 +88,35 @@ def get_count_by_date_key(timestamps, key_func):
         item_count[key_func(timestamp)] += 1
     return sorted(item_count.items())
 
+
 def get_all_first(items):
     return (i[0] for i in items)
 
+
 def hourly_key(date):
-    return date.strftime("%Y-%m-%d %H" )
+    return date.replace(minute=0, second=0, microsecond=0)
+
 
 def daily_key(date):
-    return date.strftime("%Y-%m-%d")
+    return date.date()
+
+
+# =====================
+def glob(zipfile, pattern):
+    return fnmatch.filter(zipfile.namelist(), pattern)
+
+
+def glob_json(zipfile, pattern):
+    for name in glob(zipfile, pattern):
+        with zipfile.open(name) as f:
+            yield json.load(f)
+
+
+# =====================
+
 
 def get_sessions(timestamps):
-    """ Returns a list of tuples of the form (start, end, duration)
+    """Returns a list of tuples of the form (start, end, duration)
 
     The start and end are datetime objects, and the duration is a timedelta
     object.
@@ -74,147 +131,485 @@ def get_sessions(timestamps):
     start = timestamps[0]
     end = timestamps[0]
     for prev, cur in zip(timestamps, timestamps[1:]):
-        if cur - prev > datetime.timedelta(hours=1):
-            sessions.append((start, end, end-start))
+        if cur - prev > datetime.timedelta(minutes=5):
+            sessions.append((start, end, end - start))
             start = cur
         end = cur
-    sessions.append((start, end, end-start))
+    sessions.append((start, end, end - start))
     return sessions
 
-def get_json_data(zip_file):
-    with zipfile.ZipFile(zip_file, "r") as zip:
-        for name in zip.namelist():
-            if not name.endswith(".json"):
-                continue
-            with zip.open(name) as json_file:
-                yield json.load(json_file)
 
+def filtered_count(data, *key_path):
+    items = get_list(data, *key_path)
+    filtered_items = get_date_filtered_items(items)
+    return len(list(filtered_items))
 
-def extract_tiktok_data(zip_file):
-    for data in get_json_data(zip_file):
-        videos = list(get_all_first(get_date_filtered_items(get_video_list_data(data))))
-        video_counts=  get_count_by_date_key(videos, hourly_key)
-        table_title = props.Translatable({
-            "en": "TikTok video browsing history",
-            "nl": "TikTok video geschiedenis"
-        })
-        print(video_counts)
-        data_frame = pd.DataFrame(video_counts, columns=["Hour", "View Count"])
-        return [props.PropsUIPromptConsentFormTable("tiktok_video_counts", table_title, data_frame)]
 
+def get_chat_history(data):
+    return get_dict(data, "Direct Messages", "Chat History", "ChatHistory")
 
-        # comment_list_dates = list(get_all_first(get_date_filtered_items(get_comment_list_data(data))))
-        # sessions = get_sessions(itertools.chain(video_dates, comment_list_dates))
-        # yield sessions
 
-# data = json.load(open(sys.argv[1]))
+def flatten_chat_history(history):
+    return itertools.chain(*history.values())
 
-# from pprint import pprint
-# video_dates = list(get_all_first(get_date_filtered_items(get_video_list_data(data))))
-# pprint(get_count_by_date_key(video_dates, hourly_key))
-# pprint(get_count_by_date_key(video_dates, daily_key))
-# print("#"*80)
-# comment_list_dates = list(get_all_first(get_date_filtered_items(get_comment_list_data(data))))
-# pprint(get_count_by_date_key(comment_list_dates, hourly_key))
-# pprint(get_count_by_date_key(comment_list_dates, daily_key))
 
-# sessions = get_sessions(itertools.chain(video_dates, comment_list_dates))
-# pprint(sessions)
+def filter_by_key(items, key, value):
+    return filter(lambda item: item[key] == value, items)
+
+
+def exclude_by_key(items, key, value):
+    """
+    Return a filtered list where items that match key & value are excluded.
+    """
+    return filter(lambda item: item[key] != value, items)
+
+
+def map_to_timeslot(series):
+    return series.map(lambda hour: f"{hour}-{hour+1}")
+
+
+def count_items(zipfile, pattern, key):
+    return sum(len(data[key]) for data in glob_json(zipfile, pattern))
+
+
+def count_posts(zipfile):
+    return sum(len(data) for data in glob_json(zipfile, "content/posts_*.json"))
+
+
+def count_messages(zipfile):
+    counts = {"sent": 0, "received": 0}
+    for data in glob_json(zipfile, "messages/inbox/**/message_*.json"):
+        donating_user = data["participants"][1]["name"]
+        for message in data["messages"]:
+            key = "sent" if message["sender_name"] == donating_user else "received"
+            counts[key] += 1
+    return counts
+
+
+def extract_summary_data(zipfile):
+    message_counts = count_messages(zipfile)
+    summary_data = {
+        "Description": [
+            "Followers",
+            "Following",
+            "Posts",
+            "Comments posted",
+            "Videos watched",
+            "Posts viewed",
+            "Messages sent",
+            "Messages received",
+            "Ads viewed",
+        ],
+        "Number": [
+            count_items(
+                zipfile, "followers_and_following/followers_*.json", "string_list_data"
+            ),
+            count_items(
+                zipfile,
+                "followers_and_following/following.json",
+                "relationships_following",
+            ),
+            count_posts(zipfile),
+            count_items(
+                zipfile, "comments/post_comments.json", "comments_media_comments"
+            ),
+            count_items(
+                zipfile,
+                "ads_and_topics/videos_watched.json",
+                "impressions_history_videos_watched",
+            ),
+            count_items(
+                zipfile,
+                "ads_and_topics/posts_viewed.json",
+                "impressions_history_posts_seen",
+            ),
+            message_counts["sent"],
+            message_counts["received"],
+            count_items(
+                zipfile,
+                "ads_and_topics/ads_viewed.json",
+                "impressions_history_ads_seen",
+            ),
+        ],
+    }
+
+    return ExtractionResult(
+        "instagram_summary",
+        props.Translatable(
+            {"en": "Summary information", "nl": "Samenvatting gegevens"}
+        ),
+        pd.DataFrame(summary_data),
+    )
+
+
+def extract_direct_message_activity(zipfile):
+    counter = itertools.count()
+    person_ids = defaultdict(lambda: next(counter))
+    sender_ids = []
+    timestamps = []
+    for data in glob_json(zipfile, "messages/inbox/**/message_*.json"):
+        # Ensure the donating user is the first to get an ID
+        donating_user = data["participants"][1]["name"]
+        person_ids[donating_user]
+        for message in data["messages"]:
+            sender_ids.append(person_ids[message["sender_name"]])
+            timestamps.append(parse_datetime(message["timestamp_ms"] / 1000))
+    df = pd.DataFrame({"Anonymous ID": sender_ids, "Sent": timestamps})
+    df["Sent"] = df["Sent"].dt.strftime("%Y-%m-%d %H:%M")
+    return ExtractionResult(
+        "instagram_direct_message_activity",
+        props.Translatable(
+            {"en": "Direct message activity", "nl": "Bericht activiteit"}
+        ),
+        df,
+    )
+
+
+def extract_comment_activity(zipfile):
+    timestamps = []
+    for data in glob_json(zipfile, "comments/post_comments.json"):
+        for item in data["comments_media_comments"]:
+            timestamps.append(
+                parse_datetime(item["string_map_data"]["Time"]["timestamp"])
+            )
+    df = pd.DataFrame({"Posted": timestamps})
+    df = df.sort_values("Posted")
+    df["Posted"] = df["Posted"].dt.strftime("%Y-%m-%d %H:%M")
+    return ExtractionResult(
+        "instagram_comment_activity",
+        props.Translatable({"en": "Comment activity", "nl": "Commentaar activiteit"}),
+        df,
+    )
+
+
+def extract_posts_liked(zipfile):
+    urls = []
+    timestamps = []
+    for data in glob_json(zipfile, "likes/liked_posts.json"):
+        for item in data["likes_media_likes"]:
+            info = item["string_list_data"][0]
+            timestamps.append(parse_datetime(info["timestamp"]))
+            urls.append(info["href"])
+    df = pd.DataFrame({"Liked": timestamps, "Link": urls})
+    df["Liked"] = df["Liked"].dt.strftime("%Y-%m-%d %H:%M")
+    df = df.sort_values("Liked")
+    return ExtractionResult(
+        "instagram_posts_liked",
+        props.Translatable({"en": "Posts Liked", "nl": "Geliked"}),
+        df,
+    )
+
+
+def flatten_media(items):
+    for item in items:
+        yield from item["media"]
+
+
+def get_creation_timestamps(items):
+    for item in items:
+        yield parse_datetime(item["creation_timestamp"])
+
+
+def get_media_creation_timestamps(items):
+    return get_creation_timestamps(flatten_media(items))
+
+
+def get_content_posts_timestamps(zipfile):
+    for data in glob_json(zipfile, "content/posts_*.json"):
+        yield from get_media_creation_timestamps(data)
+
+
+def get_media_timestamps(zipfile, pattern, key):
+    for data in glob_json(zipfile, pattern):
+        yield from get_media_creation_timestamps(data[key])
+
+
+def df_from_timestamps(timestamps, column):
+    df = pd.DataFrame({"timestamps": timestamps})
+    counts = df.groupby(lambda x: hourly_key(df["timestamps"][x])).size()
+
+    df = counts.reset_index()
+    df.columns = ["timestamp", column]
+    return df
+
+
+def stories_timestamps(zipfile):
+    for data in glob_json(zipfile, "content/stories.json"):
+        for item in data["ig_stories"]:
+            yield parse_datetime(item["creation_timestamp"])
+
+
+def df_from_timestamp_columns(a, b):
+    data_frames = [
+        df_from_timestamps(timestamps, column) for timestamps, column in [a, b]
+    ]
+
+    df = pd.merge(
+        data_frames[0],
+        data_frames[1],
+        left_on="timestamp",
+        right_on="timestamp",
+        how="outer",
+    ).sort_index()
+    df["Date"] = df["timestamp"].dt.strftime("%Y-%m-%d")
+    df["Timeslot"] = map_to_timeslot(df["timestamp"].dt.hour)
+    df = df.reset_index(drop=True)
+    df = (
+        df.reindex(columns=["Date", "Timeslot", a[1], b[1]])
+        .reset_index(drop=True)
+        .fillna(0)
+    )
+    df[a[1]] = df[a[1]].astype(int)
+    df[b[1]] = df[b[1]].astype(int)
+    return df
+
+
+def get_video_posts_timestamps(zipfile):
+    return itertools.chain(
+        get_content_posts_timestamps(zipfile),
+        get_media_timestamps(zipfile, "content/igtv_videos.json", "ig_igtv_media"),
+        get_media_timestamps(zipfile, "content/reels.json", "ig_reels_media"),
+    )
+
+
+def extract_video_posts(zipfile):
+    video_timestamps = get_video_posts_timestamps(zipfile)
+    df = df_from_timestamp_columns(
+        (video_timestamps, "Videos"), (stories_timestamps(zipfile), "Stories")
+    )
+    return ExtractionResult(
+        "instagram_video_posts",
+        props.Translatable({"en": "Posts", "nl": "Posts"}),
+        df,
+    )
+
+
+def get_post_comments_timestamps(zipfile):
+    return get_string_map_timestamps(
+        zipfile, "comments/post_comments.json", "comments_media_comments"
+    )
+
+
+def get_string_list_timestamps(zipfile, pattern, key):
+    for data in glob_json(zipfile, pattern):
+        for item in data[key]:
+            yield parse_datetime(item["string_list_data"][0]["timestamp"])
+
+
+def get_string_map_timestamps(zipfile, pattern, key):
+    for data in glob_json(zipfile, pattern):
+        for item in data[key]:
+            yield parse_datetime(item["string_map_data"]["Time"]["timestamp"])
+
+
+def get_likes_timestamps(zipfile):
+    return itertools.chain(
+        get_string_list_timestamps(
+            zipfile, "likes/liked_comments.json", "likes_comment_likes"
+        ),
+        get_string_list_timestamps(
+            zipfile, "likes/liked_posts.json", "likes_media_likes"
+        ),
+    )
+
+
+def extract_comments_and_likes(zipfile):
+    comment_timestamps = get_post_comments_timestamps(zipfile)
+    likes_timestamps = get_likes_timestamps(zipfile)
+    df = df_from_timestamp_columns(
+        (comment_timestamps, "Comments"), (likes_timestamps, "Likes")
+    )
+    return ExtractionResult(
+        "instagram_comments_and_likes",
+        props.Translatable({"en": "Comments and likes", "nl": "Comments en likes"}),
+        df,
+    )
+
+
+def extract_viewed(zipfile):
+    df = df_from_timestamp_columns(
+        (
+            get_string_map_timestamps(
+                zipfile,
+                "ads_and_topics/videos_watched.json",
+                "impressions_history_videos_watched",
+            ),
+            "Videos",
+        ),
+        (
+            get_string_map_timestamps(
+                zipfile,
+                "ads_and_topics/posts_viewed.json",
+                "impressions_history_posts_seen",
+            ),
+            "Posts",
+        ),
+    )
+    return ExtractionResult(
+        "instagram_viewed",
+        props.Translatable({"en": "Viewed", "nl": "Viewed"}),
+        df,
+    )
+
+
+def extract_session_info(zipfile):
+    timestamps = list(
+        itertools.chain(
+            list(get_video_posts_timestamps(zipfile)),
+            list(stories_timestamps(zipfile)),
+            list(get_post_comments_timestamps(zipfile)),
+            list(get_likes_timestamps(zipfile)),
+        )
+    )
+    print(timestamps)
+    sessions = get_sessions(timestamps)
+    print(sessions)
+    df = pd.DataFrame(sessions, columns=["Start", "End", "Duration"])
+    df["Start"] = df["Start"].dt.strftime("%Y-%m-%d %H:%M")
+    df["Duration (in minutes)"] = (df["Duration"].dt.total_seconds() / 60).round(2)
+    df = df.drop("End", axis=1)
+    df = df.drop("Duration", axis=1)
+
+    return ExtractionResult(
+        "instagram_session_info",
+        props.Translatable({"en": "Session information", "nl": "Sessie informatie"}),
+        df,
+    )
+
+
+def extract_data(path):
+    extractors = [
+        extract_summary_data,
+        extract_video_posts,
+        extract_comments_and_likes,
+        extract_viewed,
+        extract_session_info,
+        extract_direct_message_activity,
+        extract_comment_activity,
+        extract_posts_liked,
+    ]
+    zfile = zipfile.ZipFile(path)
+    print(zfile.namelist())
+    return [extractor(zfile) for extractor in extractors]
 
 
 ######################
 # Data donation flow #
 ######################
 
-def process_tiktok(sessionId):
-    progress = 0
-    platform = "TikTok"
-    meta_data = []
-    data = None
-    while True:
-        promptFile = prompt_file(platform, "application/zip, text/plain")
-        fileResult = yield render_donation_page(platform, promptFile, progress)
-        if fileResult.__type__ != 'PayloadString':
-            meta_data.append(("debug", f"{platform}: skip to next step"))
-            break
-
-        meta_data.append(("debug", f"{platform}: extracting file"))
-        extractionResult = extract_tiktok_data(fileResult.value)
-        if extractionResult != 'invalid':
-            meta_data.append(("debug", f"{platform}: extraction successful, go to consent form"))
-            data = extractionResult
-            break
-
-        meta_data.append(("debug", f"{platform}: prompt confirmation to retry file selection"))
-        retry_result = yield render_donation_page(platform, retry_confirmation(platform), progress)
-        if retry_result.__type__ == 'PayloadTrue':
-            meta_data.append(("debug", f"{platform}: skip due to invalid file"))
-            continue
 
-        meta_data.append(("debug", f"{platform}: retry prompt file"))
-        break
-    if data:
-        meta_data.append(("debug", f"{platform}: prompt consent"))
-        consent_result = yield render_donation_page(platform, props.PropsUIPromptConsentForm(data, []), progress)
+ExtractionResult = namedtuple("ExtractionResult", ["id", "title", "data_frame"])
+
+
+class SkipToNextStep(Exception):
+    pass
+
+
+class DataDonationProcessor:
+    def __init__(self, platform, mime_types, extractor, session_id):
+        self.platform = platform
+        self.mime_types = mime_types
+        self.extractor = extractor
+        self.session_id = session_id
+        self.progress = 0
+        self.meta_data = []
+
+    def process(self):
+        print("START")
+        with suppress(SkipToNextStep):
+            while True:
+                file_result = yield from self.prompt_file()
+
+                self.log(f"extracting file")
+                try:
+                    print(file_result)
+                    extraction_result = self.extract_data(file_result.value)
+                except IOError as e:
+                    print("IOERROR")
+                    self.log(f"prompt confirmation to retry file selection")
+                    yield from self.prompt_retry()
+                    return
+                else:
+                    if extraction_result is None:
+                        try_again = yield from self.prompt_retry()
+                        if try_again:
+                            continue
+                        else:
+                            return
+                    self.log(f"extraction successful, go to consent form")
+                    yield from self.prompt_consent(extraction_result)
+
+    def prompt_retry(self):
+        retry_result = yield render_donation_page(
+            self.platform, retry_confirmation(self.platform), self.progress
+        )
+        return retry_result.__type__ == "PayloadTrue"
+
+    def prompt_file(self):
+        description = props.Translatable(
+            {
+                "en": f"Please follow the download instructions and choose the file that you stored on your device. Click “Skip” at the right bottom, if you do not have a {self.platform} file. ",
+                "nl": f"Volg de download instructies en kies het bestand dat u opgeslagen heeft op uw apparaat. Als u geen {self.platform} bestand heeft klik dan op “Overslaan” rechts onder.",
+            }
+        )
+        prompt_file = props.PropsUIPromptFileInput(description, self.mime_types)
+        file_result = yield render_donation_page(
+            self.platform, prompt_file, self.progress
+        )
+        if file_result.__type__ != "PayloadString":
+            self.log(f"skip to next step")
+            raise SkipToNextStep()
+        return file_result
+
+    def log(self, message):
+        self.meta_data.append(("debug", f"{self.platform}: {message}"))
+
+    def extract_data(self, file):
+        return self.extractor(file)
+
+    def prompt_consent(self, data):
+        log_title = props.Translatable({"en": "Log messages", "nl": "Log berichten"})
+
+        tables = [
+            props.PropsUIPromptConsentFormTable(table.id, table.title, table.data_frame)
+            for table in data
+        ]
+        meta_frame = pd.DataFrame(self.meta_data, columns=["type", "message"])
+        meta_table = props.PropsUIPromptConsentFormTable(
+            "log_messages", log_title, meta_frame
+        )
+        self.log(f"prompt consent")
+        consent_result = yield render_donation_page(
+            self.platform,
+            props.PropsUIPromptConsentForm(tables, [meta_table]),
+            self.progress,
+        )
 
         if consent_result.__type__ == "PayloadJSON":
-            meta_data.append(("debug", f"{platform}: donate consent data"))
-            yield donate(f"{sessionId}-{platform}", consent_result.value)
+            self.log(f"donate consent data")
+            yield donate(f"{self.sessionId}-{self.platform}", consent_result.value)
 
 
-def process(sessionId):
-    progress = 0
-    yield donate(f"{sessionId}-tracking", '[{ "message": "user entered script" }]')
-    yield from process_tiktok(sessionId)
-
-    # subflows = len(platforms)
-    # steps = 2
-    # step_percentage = (100/subflows)/steps
-
-    # # progress in %
-    # progress = 0
-
-    # for index, platform in enumerate(platforms):
-    #     meta_data = []
-    #     meta_data.append(("debug", f"{platform}: start"))
-
-    #     # STEP 1: select the file
-    #     progress += step_percentage
-    #     data = None
-    #     while True:
-    #         meta_data.append(("debug", f"{platform}: prompt file"))
-    #         promptFile = prompt_file(platform, "application/zip, text/plain")
-    #         fileResult = yield render_donation_page(platform, promptFile, progress)
-    #         if fileResult.__type__ == 'PayloadString':
-    #             meta_data.append(("debug", f"{platform}: extracting file"))
-    #             extractionResult = doSomethingWithTheFile(platform, fileResult.value)
-    #             if extractionResult != 'invalid':
-    #                 meta_data.append(("debug", f"{platform}: extraction successful, go to consent form"))
-    #                 data = extractionResult
-    #                 break
-    #             else:
-    #                 meta_data.append(("debug", f"{platform}: prompt confirmation to retry file selection"))
-    #                 retry_result = yield render_donation_page(platform, retry_confirmation(platform), progress)
-    #                 if retry_result.__type__ == 'PayloadTrue':
-    #                     meta_data.append(("debug", f"{platform}: skip due to invalid file"))
-    #                     continue
-    #                 else:
-    #                     meta_data.append(("debug", f"{platform}: retry prompt file"))
-    #                     break
-    #         else:
-    #             meta_data.append(("debug", f"{platform}: skip to next step"))
-    #             break
-
-    #     # STEP 2: ask for consent
-    #     progress += step_percentage
-    #     if data is not None:
-    #         meta_data.append(("debug", f"{platform}: prompt consent"))
-    #         prompt = prompt_consent(platform, data, meta_data)
-    #         consent_result = yield render_donation_page(platform, prompt, progress)
-    #         if consent_result.__type__ == "PayloadJSON":
-    #             meta_data.append(("debug", f"{platform}: donate consent data"))
-    #             yield donate(f"{sessionId}-{platform}", consent_result.value)
+class DataDonation:
+    def __init__(self, platform, mime_types, extractor):
+        self.platform = platform
+        self.mime_types = mime_types
+        self.extractor = extractor
+
+    def __call__(self, session_id):
+        processor = DataDonationProcessor(
+            self.platform, self.mime_types, self.extractor, session_id
+        )
+        yield from processor.process()
 
+
+data_donation = DataDonation("Instagram", "application/zip", extract_data)
+
+
+def process(session_id):
+    progress = 0
+    yield donate(f"{session_id}-tracking", '[{ "message": "user entered script" }]')
+    yield from data_donation(session_id)
     yield render_end_page()
 
 
@@ -224,10 +619,7 @@ def render_end_page():
 
 
 def render_donation_page(platform, body, progress):
-    header = props.PropsUIHeader(props.Translatable({
-        "en": platform,
-        "nl": platform
-    }))
+    header = props.PropsUIHeader(props.Translatable({"en": platform, "nl": platform}))
 
     footer = props.PropsUIFooter(progress)
     page = props.PropsUIPageDonation(platform, header, body, footer)
@@ -235,66 +627,41 @@ def render_donation_page(platform, body, progress):
 
 
 def retry_confirmation(platform):
-    text = props.Translatable({
-        "en": f"Unfortunately, we cannot process your {platform} file. Continue, if you are sure that you selected the right file. Try again to select a different file.",
-        "nl": f"Helaas, kunnen we uw {platform} bestand niet verwerken. Weet u zeker dat u het juiste bestand heeft gekozen? Ga dan verder. Probeer opnieuw als u een ander bestand wilt kiezen."
-    })
-    ok = props.Translatable({
-        "en": "Try again",
-        "nl": "Probeer opnieuw"
-    })
-    cancel = props.Translatable({
-        "en": "Continue",
-        "nl": "Verder"
-    })
+    text = props.Translatable(
+        {
+            "en": f"Unfortunately, we cannot process your {platform} file. Continue, if you are sure that you selected the right file. Try again to select a different file.",
+            "nl": f"Helaas, kunnen we uw {platform} bestand niet verwerken. Weet u zeker dat u het juiste bestand heeft gekozen? Ga dan verder. Probeer opnieuw als u een ander bestand wilt kiezen.",
+        }
+    )
+    ok = props.Translatable({"en": "Try again", "nl": "Probeer opnieuw"})
+    cancel = props.Translatable({"en": "Continue", "nl": "Verder"})
     return props.PropsUIPromptConfirm(text, ok, cancel)
 
 
-def prompt_file(platform, extensions):
-    description = props.Translatable({
-        "en": f"Please follow the download instructions and choose the file that you stored on your device. Click “Skip” at the right bottom, if you do not have a {platform} file. ",
-        "nl": f"Volg de download instructies en kies het bestand dat u opgeslagen heeft op uw apparaat. Als u geen {platform} bestand heeft klik dan op “Overslaan” rechts onder."
-    })
-
-    return props.PropsUIPromptFileInput(description, extensions)
-
-
-def doSomethingWithTheFile(platform, filename):
-    return extract_zip_contents(filename)
-
-
-def extract_zip_contents(filename):
-    names = []
-    try:
-        file = zipfile.ZipFile(filename)
-        data = []
-        for name in file.namelist():
-            names.append(name)
-            info = file.getinfo(name)
-            data.append((name, info.compress_size, info.file_size))
-        return data
-    except zipfile.error:
-        return "invalid"
-
-
 def prompt_consent(id, data, meta_data):
+    table_title = props.Translatable(
+        {"en": "Zip file contents", "nl": "Inhoud zip bestand"}
+    )
 
-    table_title = props.Translatable({
-        "en": "Zip file contents",
-        "nl": "Inhoud zip bestand"
-    })
-
-    log_title = props.Translatable({
-        "en": "Log messages",
-        "nl": "Log berichten"
-    })
+    log_title = props.Translatable({"en": "Log messages", "nl": "Log berichten"})
 
     data_frame = pd.DataFrame(data, columns=["filename", "compressed size", "size"])
     table = props.PropsUIPromptConsentFormTable("zip_content", table_title, data_frame)
     meta_frame = pd.DataFrame(meta_data, columns=["type", "message"])
-    meta_table = props.PropsUIPromptConsentFormTable("log_messages", log_title, meta_frame)
+    meta_table = props.PropsUIPromptConsentFormTable(
+        "log_messages", log_title, meta_frame
+    )
     return props.PropsUIPromptConsentForm([table], [meta_table])
 
 
 def donate(key, json_string):
     return CommandSystemDonate(key, json_string)
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) > 1:
+        print(extract_data(sys.argv[1]))
+    else:
+        print("please provide a zip file as argument")
diff --git a/src/framework/processing/py/tests/__init__.py b/src/framework/processing/py/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/framework/processing/py/tests/script_test.py b/src/framework/processing/py/tests/script_test.py
new file mode 100644
index 0000000..35612f8
--- /dev/null
+++ b/src/framework/processing/py/tests/script_test.py
@@ -0,0 +1,351 @@
+"""
+- test file type error
+- test data not found error
+- grouping by hour
+    - multiple columns
+
+
+"""
+
+import json
+import io
+from pathlib import Path
+from dataclasses import dataclass
+from inspect import cleandoc
+import pandas as pd
+from pandas.testing import assert_frame_equal
+from port.api import commands
+from port import script
+
+
+class FakeZip:
+    def __init__(self, files):
+        self._files = files
+
+    def namelist(self):
+        return self._files.keys()
+
+    def open(self, name):
+        data = self._files[name]
+        f = io.StringIO()
+        json.dump(data, f)
+        f.seek(0)
+        return f
+
+
+def assert_frame_str_equal(df1, df2):
+    assert cleandoc(df1) == str(df2)
+
+
+def test_summary_table():
+    data = FakeZip(
+        {
+            "followers_and_following/followers_1.json": {"string_list_data": [{}, {}]},
+            "followers_and_following/followers_2.json": {"string_list_data": [{}]},
+            "followers_and_following/following.json": {
+                "relationships_following": [
+                    {},
+                    {},
+                    {},
+                    {},
+                ]
+            },
+            "content/posts_1.json": [
+                {},
+                {},
+                {},
+                {},
+                {},
+            ],
+            "comments/post_comments.json": {"comments_media_comments": [{}]},
+            "ads_and_topics/videos_watched.json": {
+                "impressions_history_videos_watched": [{}, {}]
+            },
+            "ads_and_topics/posts_viewed.json": {
+                "impressions_history_posts_seen": [{}, {}]
+            },
+            "ads_and_topics/ads_viewed.json": {
+                "impressions_history_ads_seen": [{}, {}]
+            },
+            "messages/inbox/some_person/message_1.json": {
+                "participants": [{"name": "Some"}, {"name": "Me"}],
+                "messages": [
+                    {"sender_name": "Me"},
+                    {"sender_name": "Some"},
+                    {"sender_name": "Me"},
+                ],
+            },
+        }
+    )
+    result = script.extract_summary_data(data)
+    assert "instagram_summary" == result.id
+    assert "Summary information" == result.title.translations["en"]
+
+    reference = """
+             Description  Number
+    0          Followers       3
+    1          Following       4
+    2              Posts       5
+    3    Comments posted       1
+    4     Videos watched       2
+    5       Posts viewed       2
+    6      Messages sent       2
+    7  Messages received       1
+    8         Ads viewed       2
+    """
+    print(result.data_frame)
+    assert_frame_str_equal(reference, result.data_frame)
+
+
+video_posts = {
+    "content/posts_1.json": [
+        {"media": [{"creation_timestamp": 1678743234}]},
+        {"media": [{"creation_timestamp": 1678752349}]},
+    ],
+    "content/igtv_videos.json": {
+        "ig_igtv_media": [
+            {"media": [{"creation_timestamp": 1678743235}]},
+            {"media": [{"creation_timestamp": 1678752319}]},
+            {"media": [{"creation_timestamp": 1678769988}]},
+        ]
+    },
+    "content/reels.json": {
+        "ig_reels_media": [
+            {"media": [{"creation_timestamp": 1678752377}]},
+            {"media": [{"creation_timestamp": 1678793248}]},
+        ]
+    },
+    "content/stories.json": {
+        "ig_stories": [
+            {"creation_timestamp": 1678743234},
+        ]
+    },
+}
+
+
+def test_video_posts_table():
+    data = FakeZip(video_posts)
+    result = script.extract_video_posts(data)
+    assert "instagram_video_posts" == result.id
+    assert "Posts" == result.title.translations["en"]
+
+    reference = """
+             Date Timeslot  Videos  Stories
+    0  2023-03-13    22-23       2        1
+    1  2023-03-14      1-2       3        0
+    2  2023-03-14      5-6       1        0
+    3  2023-03-14    12-13       1        0
+    """
+    print(result.data_frame)
+    assert_frame_str_equal(reference, result.data_frame)
+
+
+comments_data = {
+    "comments/post_comments.json": {
+        "comments_media_comments": [
+            {"string_map_data": {"Time": {"timestamp": 1678743234}}},
+            {"string_map_data": {"Time": {"timestamp": 1678752349}}},
+        ]
+    },
+    "likes/liked_comments.json": {
+        "likes_comment_likes": [{"string_list_data": [{"timestamp": 1678743446}]}],
+    },
+    "likes/liked_posts.json": {
+        "likes_media_likes": [{"string_list_data": [{"timestamp": 1678743446}]}]
+    },
+}
+
+
+def test_comments_and_likes_table():
+    data = FakeZip(comments_data)
+    result = script.extract_comments_and_likes(data)
+    assert "instagram_comments_and_likes" == result.id
+    assert "Comments and likes" == result.title.translations["en"]
+
+    reference = """
+             Date Timeslot  Comments  Likes
+    0  2023-03-13    22-23         1      2
+    1  2023-03-14      1-2         1      0
+    """
+    print(result.data_frame)
+    assert_frame_str_equal(reference, result.data_frame)
+
+
+def test_viewed_table():
+    data = FakeZip(
+        {
+            "ads_and_topics/videos_watched.json": {
+                "impressions_history_videos_watched": [
+                    {"string_map_data": {"Time": {"timestamp": 1678741258}}},
+                    {"string_map_data": {"Time": {"timestamp": 1678741258}}},
+                ]
+            },
+            "ads_and_topics/posts_viewed.json": {
+                "impressions_history_posts_seen": [
+                    {"string_map_data": {"Time": {"timestamp": 1678741258}}},
+                    {"string_map_data": {"Time": {"timestamp": 1678798788}}},
+                ]
+            },
+        },
+    )
+    result = script.extract_viewed(data)
+    assert "instagram_viewed" == result.id
+    assert "Viewed" == result.title.translations["en"]
+
+    reference = """
+             Date Timeslot  Videos  Posts
+    0  2023-03-13    22-23       2      1
+    1  2023-03-14    13-14       0      1
+    """
+    print(result.data_frame)
+    assert_frame_str_equal(reference, result.data_frame)
+
+
+def test_session_info_table():
+    data = FakeZip({**video_posts, **comments_data})
+    result = script.extract_session_info(data)
+    assert "instagram_session_info" == result.id
+    assert "Session information" == result.title.translations["en"]
+
+    reference = """
+                  Start  Duration (in minutes)
+    0  2023-03-13 22:33                   3.53
+    1  2023-03-14 01:05                   0.97
+    2  2023-03-14 05:59                   0.00
+    3  2023-03-14 12:27                   0.00
+    """
+    print(result.data_frame)
+    assert_frame_str_equal(reference, result.data_frame)
+
+
+def test_direct_message_activity_table():
+    data = FakeZip(
+        {
+            "messages/inbox/some_person/message_1.json": {
+                "participants": [{"name": "Some"}, {"name": "Me"}],
+                "messages": [
+                    {
+                        "sender_name": "Me",
+                        "timestamp_ms": 1677493123321,
+                    },
+                    {
+                        "sender_name": "Some",
+                        "timestamp_ms": 1677493127655,
+                    },
+                    {
+                        "sender_name": "Me",
+                        "timestamp_ms": 1677493187671,
+                    },
+                ],
+            },
+            "messages/inbox/some_other/message_1.json": {
+                "participants": [{"name": "Other"}, {"name": "Me"}],
+                "messages": [
+                    {
+                        "sender_name": "Other",
+                        "timestamp_ms": 1677493295441,
+                    },
+                    {
+                        "sender_name": "Me",
+                        "timestamp_ms": 1677493299999,
+                    },
+                    {
+                        "sender_name": "Other",
+                        "timestamp_ms": 1677493299999,
+                    },
+                ],
+            },
+        },
+    )
+    result = script.extract_direct_message_activity(data)
+    assert "instagram_direct_message_activity" == result.id
+    assert "Direct message activity" == result.title.translations["en"]
+
+    reference = """
+       Anonymous ID              Sent
+    0             0  2023-02-27 11:18
+    1             1  2023-02-27 11:18
+    2             0  2023-02-27 11:19
+    3             2  2023-02-27 11:21
+    4             0  2023-02-27 11:21
+    5             2  2023-02-27 11:21
+    """
+    print(result.data_frame)
+    assert_frame_str_equal(reference, result.data_frame)
+
+
+def test_comment_activity_table():
+    data = FakeZip(
+        {
+            "comments/post_comments.json": {
+                "comments_media_comments": [
+                    {"string_map_data": {"Time": {"timestamp": 1678743434}}},
+                    {"string_map_data": {"Time": {"timestamp": 1678743478}}},
+                    {"string_map_data": {"Time": {"timestamp": 1678747777}}},
+                    {"string_map_data": {"Time": {"timestamp": 1678749999}}},
+                    {"string_map_data": {"Time": {"timestamp": 1678999999}}},
+                ]
+            },
+        },
+    )
+    result = script.extract_comment_activity(data)
+    assert "instagram_comment_activity" == result.id
+    assert "Comment activity" == result.title.translations["en"]
+
+    reference = """
+                 Posted
+    0  2023-03-13 22:37
+    1  2023-03-13 22:37
+    2  2023-03-13 23:49
+    3  2023-03-14 00:26
+    4  2023-03-16 21:53
+    """
+    print(result.data_frame)
+    assert_frame_str_equal(reference, result.data_frame)
+
+
+def test_posts_liked_table():
+    data = FakeZip(
+        {
+            "likes/liked_posts.json": {
+                "likes_media_likes": [
+                    {
+                        "string_list_data": [
+                            {
+                                "timestamp": 1678743446,
+                                "href": "https://example.org/test1",
+                            }
+                        ]
+                    },
+                    {
+                        "string_list_data": [
+                            {
+                                "timestamp": 1678743467,
+                                "href": "https://example.org/test2",
+                            }
+                        ]
+                    },
+                    {
+                        "string_list_data": [
+                            {
+                                "timestamp": 1678747777,
+                                "href": "https://example.org/test3",
+                            }
+                        ]
+                    },
+                ]
+            },
+        },
+    )
+    result = script.extract_posts_liked(data)
+    assert "instagram_posts_liked" == result.id
+    assert "Posts Liked" == result.title.translations["en"]
+
+    reference = """
+                  Liked                       Link
+    0  2023-03-13 22:37  https://example.org/test1
+    1  2023-03-13 22:37  https://example.org/test2
+    2  2023-03-13 23:49  https://example.org/test3
+    """
+    print(result.data_frame)
+    assert_frame_str_equal(reference, result.data_frame)