Skip to content

Commit

Permalink
Cambridge POC
Browse files Browse the repository at this point in the history
  • Loading branch information
vloothuis committed Sep 12, 2023
1 parent 7bf1eeb commit 3a5238a
Show file tree
Hide file tree
Showing 11 changed files with 270 additions and 64 deletions.
45 changes: 45 additions & 0 deletions .github/workflows/gh-pages.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Build and Deploy to GitHub Pages

on:
push:
branches:
- master

permissions:
contents: write

jobs:
build-and-deploy:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set PUBLIC_URL in .env
run: |
REPO_NAME=$(echo $GITHUB_REPOSITORY | cut -d'/' -f2)
echo "PUBLIC_URL=https://$(echo $GITHUB_REPOSITORY | cut -d'/' -f1).github.io/$REPO_NAME/" >> .env
- name: Setup Node.js
uses: actions/setup-node@v3
with:
node-version-file: ".tool-versions"

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.11

- name: Install dependencies and build
run: |
npm install
python -m pip install --upgrade pip
python -m pip install poetry
npm run dev:build
- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./build
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

.vscode/
*.pyc
# dependencies
/node_modules
/.pnp
Expand Down
2 changes: 1 addition & 1 deletion .tool-versions
Original file line number Diff line number Diff line change
@@ -1 +1 @@
nodejs 16.17.0
nodejs 16.16.0
2 changes: 1 addition & 1 deletion dist/framework/processing/py_worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,5 @@ function loadPackages() {
}
function installPortPackage() {
console.log('[ProcessingWorker] load port package');
return self.pyodide.runPythonAsync("\n import micropip\n await micropip.install(\"/port-0.0.0-py3-none-any.whl\", deps=False)\n import port\n ");
return self.pyodide.runPythonAsync("\n import micropip\n await micropip.install(\"./port-0.0.0-py3-none-any.whl\", deps=False)\n import port\n ");
}
14 changes: 9 additions & 5 deletions dist/framework/visualisation/react/ui/pages/donation_page.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,11 @@ import { ConsentForm } from '../prompts/consent_form';
import { FileInput } from '../prompts/file_input';
import { RadioInput } from '../prompts/radio_input';
import { Footer } from './templates/footer';
import { Sidebar } from './templates/sidebar';
import LogoSvg from '../../../../../assets/images/logo.svg';
import { Page } from './templates/page';
import { Progress } from '../elements/progress';
import { Instructions } from '../elements/instructions';
export var DonationPage = function (props) {
var _a = prepareCopy(props), title = _a.title, forwardButton = _a.forwardButton;
var platform = props.platform, locale = props.locale, resolve = props.resolve;
var locale = props.locale, resolve = props.resolve;
function renderBody(props) {
var context = { locale: locale, resolve: props.resolve };
var body = props.body;
Expand All @@ -49,7 +46,14 @@ export var DonationPage = function (props) {
resolve === null || resolve === void 0 ? void 0 : resolve({ __type__: 'PayloadFalse', value: false });
}
var footer = (_jsx(Footer, { middle: _jsx(Progress, { percentage: props.footer.progressPercentage }), right: _jsxs("div", __assign({ className: 'flex flex-row' }, { children: [_jsx("div", { className: 'flex-grow' }), _jsx(ForwardButton, { label: forwardButton, onClick: handleSkip })] })) }));
var sidebar = (_jsx(Sidebar, { logo: LogoSvg, content: _jsx(Instructions, { platform: platform, locale: locale }) }));
var sidebar = (_jsx("div", {})
// <Sidebar
// logo={LogoSvg}
// content={
// <Instructions platform={platform} locale={locale} />
// }
// />
);
var body = (_jsxs(_Fragment, { children: [_jsx(Title1, { text: title }), renderBody(props)] }));
return (_jsx(Page, { body: body, sidebar: sidebar, footer: footer }));
};
Expand Down
Binary file modified dist/port-0.0.0-py3-none-any.whl
Binary file not shown.
Binary file modified public/port-0.0.0-py3-none-any.whl
Binary file not shown.
Binary file modified src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl
Binary file not shown.
249 changes: 203 additions & 46 deletions src/framework/processing/py/port/script.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,219 @@
import itertools
import port.api.props as props
from port.api.commands import (CommandSystemDonate, CommandUIRender)

import pandas as pd
import zipfile
import json
import datetime
from collections import defaultdict
##########################
# TikTok file processing #
##########################

filter_start = datetime.datetime(2021, 1, 1)
filter_end = datetime.datetime(2025, 1, 1)

datetime_format = "%Y-%m-%d %H:%M:%S"

def get_in(data_dict, *key_path):
for k in key_path:
data_dict = data_dict.get(k, None)
if data_dict is None:
return None
return data_dict

def get_video_list_data(data):
return get_in(data, "Activity", "Video Browsing History", "VideoList")

def get_comment_list_data(data):
return get_in(data, "Comment", "Comments", "CommentsList")

def get_date_filtered_items(items):
for item in items:
timestamp =datetime.datetime.strptime(item["Date"], datetime_format)
if timestamp < filter_start or timestamp > filter_end:
continue
yield (timestamp, item)

def get_count_by_date_key(timestamps, key_func):
""" Returns a list of tuples of the form (key, count)
The key is determined by the key_func, which takes a datetime object and
returns an object suitable for sorting and usage as a dictionary key.
The returned list is sorted by key.
"""
item_count = defaultdict(int)
for timestamp in timestamps:
item_count[key_func(timestamp)] += 1
return sorted(item_count.items())

def get_all_first(items):
return (i[0] for i in items)

def hourly_key(date):
return date.strftime("%Y-%m-%d %H" )

def daily_key(date):
return date.strftime("%Y-%m-%d")

def get_sessions(timestamps):
""" Returns a list of tuples of the form (start, end, duration)
The start and end are datetime objects, and the duration is a timedelta
object.
"""
timestamps = list(sorted(timestamps))
if len(timestamps) == 0:
return []
if len(timestamps) == 1:
return [(timestamps[0], timestamps[0], datetime.timedelta(0))]

sessions = []
start = timestamps[0]
end = timestamps[0]
for prev, cur in zip(timestamps, timestamps[1:]):
if cur - prev > datetime.timedelta(hours=1):
sessions.append((start, end, end-start))
start = cur
end = cur
sessions.append((start, end, end-start))
return sessions

def get_json_data(zip_file):
with zipfile.ZipFile(zip_file, "r") as zip:
for name in zip.namelist():
if not name.endswith(".json"):
continue
with zip.open(name) as json_file:
yield json.load(json_file)


def extract_tiktok_data(zip_file):
for data in get_json_data(zip_file):
videos = list(get_all_first(get_date_filtered_items(get_video_list_data(data))))
video_counts= get_count_by_date_key(videos, hourly_key)
table_title = props.Translatable({
"en": "TikTok video browsing history",
"nl": "TikTok video geschiedenis"
})
print(video_counts)
data_frame = pd.DataFrame(video_counts, columns=["Hour", "View Count"])
return [props.PropsUIPromptConsentFormTable("tiktok_video_counts", table_title, data_frame)]


# comment_list_dates = list(get_all_first(get_date_filtered_items(get_comment_list_data(data))))
# sessions = get_sessions(itertools.chain(video_dates, comment_list_dates))
# yield sessions

# data = json.load(open(sys.argv[1]))

# from pprint import pprint
# video_dates = list(get_all_first(get_date_filtered_items(get_video_list_data(data))))
# pprint(get_count_by_date_key(video_dates, hourly_key))
# pprint(get_count_by_date_key(video_dates, daily_key))
# print("#"*80)
# comment_list_dates = list(get_all_first(get_date_filtered_items(get_comment_list_data(data))))
# pprint(get_count_by_date_key(comment_list_dates, hourly_key))
# pprint(get_count_by_date_key(comment_list_dates, daily_key))

# sessions = get_sessions(itertools.chain(video_dates, comment_list_dates))
# pprint(sessions)


######################
# Data donation flow #
######################

def process_tiktok(sessionId):
progress = 0
platform = "TikTok"
meta_data = []
data = None
while True:
promptFile = prompt_file(platform, "application/zip, text/plain")
fileResult = yield render_donation_page(platform, promptFile, progress)
if fileResult.__type__ != 'PayloadString':
meta_data.append(("debug", f"{platform}: skip to next step"))
break

meta_data.append(("debug", f"{platform}: extracting file"))
extractionResult = extract_tiktok_data(fileResult.value)
if extractionResult != 'invalid':
meta_data.append(("debug", f"{platform}: extraction successful, go to consent form"))
data = extractionResult
break

meta_data.append(("debug", f"{platform}: prompt confirmation to retry file selection"))
retry_result = yield render_donation_page(platform, retry_confirmation(platform), progress)
if retry_result.__type__ == 'PayloadTrue':
meta_data.append(("debug", f"{platform}: skip due to invalid file"))
continue

meta_data.append(("debug", f"{platform}: retry prompt file"))
break
if data:
meta_data.append(("debug", f"{platform}: prompt consent"))
consent_result = yield render_donation_page(platform, props.PropsUIPromptConsentForm(data, []), progress)

if consent_result.__type__ == "PayloadJSON":
meta_data.append(("debug", f"{platform}: donate consent data"))
yield donate(f"{sessionId}-{platform}", consent_result.value)


def process(sessionId):
progress = 0
yield donate(f"{sessionId}-tracking", '[{ "message": "user entered script" }]')
yield from process_tiktok(sessionId)

platforms = ["Twitter", "Facebook", "Instagram", "Youtube"]
# subflows = len(platforms)
# steps = 2
# step_percentage = (100/subflows)/steps

subflows = len(platforms)
steps = 2
step_percentage = (100/subflows)/steps
# # progress in %
# progress = 0

# progress in %
progress = 0
# for index, platform in enumerate(platforms):
# meta_data = []
# meta_data.append(("debug", f"{platform}: start"))

# # STEP 1: select the file
# progress += step_percentage
# data = None
# while True:
# meta_data.append(("debug", f"{platform}: prompt file"))
# promptFile = prompt_file(platform, "application/zip, text/plain")
# fileResult = yield render_donation_page(platform, promptFile, progress)
# if fileResult.__type__ == 'PayloadString':
# meta_data.append(("debug", f"{platform}: extracting file"))
# extractionResult = doSomethingWithTheFile(platform, fileResult.value)
# if extractionResult != 'invalid':
# meta_data.append(("debug", f"{platform}: extraction successful, go to consent form"))
# data = extractionResult
# break
# else:
# meta_data.append(("debug", f"{platform}: prompt confirmation to retry file selection"))
# retry_result = yield render_donation_page(platform, retry_confirmation(platform), progress)
# if retry_result.__type__ == 'PayloadTrue':
# meta_data.append(("debug", f"{platform}: skip due to invalid file"))
# continue
# else:
# meta_data.append(("debug", f"{platform}: retry prompt file"))
# break
# else:
# meta_data.append(("debug", f"{platform}: skip to next step"))
# break

for index, platform in enumerate(platforms):
meta_data = []
meta_data.append(("debug", f"{platform}: start"))

# STEP 1: select the file
progress += step_percentage
data = None
while True:
meta_data.append(("debug", f"{platform}: prompt file"))
promptFile = prompt_file(platform, "application/zip, text/plain")
fileResult = yield render_donation_page(platform, promptFile, progress)
if fileResult.__type__ == 'PayloadString':
meta_data.append(("debug", f"{platform}: extracting file"))
extractionResult = doSomethingWithTheFile(platform, fileResult.value)
if extractionResult != 'invalid':
meta_data.append(("debug", f"{platform}: extraction successful, go to consent form"))
data = extractionResult
break
else:
meta_data.append(("debug", f"{platform}: prompt confirmation to retry file selection"))
retry_result = yield render_donation_page(platform, retry_confirmation(platform), progress)
if retry_result.__type__ == 'PayloadTrue':
meta_data.append(("debug", f"{platform}: skip due to invalid file"))
continue
else:
meta_data.append(("debug", f"{platform}: retry prompt file"))
break
else:
meta_data.append(("debug", f"{platform}: skip to next step"))
break

# STEP 2: ask for consent
progress += step_percentage
if data is not None:
meta_data.append(("debug", f"{platform}: prompt consent"))
prompt = prompt_consent(platform, data, meta_data)
consent_result = yield render_donation_page(platform, prompt, progress)
if consent_result.__type__ == "PayloadJSON":
meta_data.append(("debug", f"{platform}: donate consent data"))
yield donate(f"{sessionId}-{platform}", consent_result.value)
# # STEP 2: ask for consent
# progress += step_percentage
# if data is not None:
# meta_data.append(("debug", f"{platform}: prompt consent"))
# prompt = prompt_consent(platform, data, meta_data)
# consent_result = yield render_donation_page(platform, prompt, progress)
# if consent_result.__type__ == "PayloadJSON":
# meta_data.append(("debug", f"{platform}: donate consent data"))
# yield donate(f"{sessionId}-{platform}", consent_result.value)

yield render_end_page()

Expand Down
2 changes: 1 addition & 1 deletion src/framework/processing/py_worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ function installPortPackage() {
console.log('[ProcessingWorker] load port package')
return self.pyodide.runPythonAsync(`
import micropip
await micropip.install("/port-0.0.0-py3-none-any.whl", deps=False)
await micropip.install("../../port-0.0.0-py3-none-any.whl", deps=False)
import port
`);
}
Loading

0 comments on commit 3a5238a

Please sign in to comment.