-
Notifications
You must be signed in to change notification settings - Fork 0
/
util_funcs.py
155 lines (118 loc) · 5.47 KB
/
util_funcs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from tqdm import tqdm
from rich.console import Console
from rich import inspect
from pygal.style import NeonStyle, Style
import requests as rq
import pygal
from datetime import datetime
import json
# ------------------------------------------------------------ PARSING REDDIT POSTS ------------------------------------------------------------
def parse_image_post(post_data:dict):
return {
post_data['id']: {
'created_utc': post_data['created_utc'],
'domain': post_data['domain'],
'filename': post_data['url'].split('/')[-1],
'media_url': post_data['url'],
'op': post_data['author'],
'post_id': post_data['id'],
'post_url': f'https://reddit.com{post_data["permalink"]}',
'score': post_data['score'],
'subreddit': post_data['subreddit'],
'title': post_data['title'],
'type': 'image',
'extension': post_data['url'].split('/')[-1].split(".")[-1].split('?')[0], # after spliting url, result can be like filename.jpg?vbnojh76
}
}
def parse_video_post(post_data:dict):
filename = post_data['url'].split('/')[-1] # This returns filename aka ID without the extension
return {
post_data['id']: {
'created_utc': post_data['created_utc'],
'domain': post_data['domain'],
'filename': f'{filename}.mpd',
'media_url': f'https://v.redd.it/{filename}/DASHPlaylist.mpd',
'op': post_data['author'],
'post_id': post_data['id'],
'post_url': f'https://reddit.com{post_data["permalink"]}',
'score': post_data['score'],
'subreddit': post_data['subreddit'],
'title': post_data['title'],
'type': 'video',
'extension': 'mpd',
}
}
def parse_gallery_post(post_data:dict, image_url:str):
ret = {}
post_id = post_data['id']
images = post_data['media_metadata'] # This returns a dictionary which contains filenames (without ext) as keys
if not images: return {} # Sometimes `images` can be "null", god knows why, here is a post like that: https://reddit.com/r/CatsWhoYawn/comments/z4rllw/beeper_yawns_still_hungover_from_tryptophan_ig/
for img in images.keys():
ext = post_data['media_metadata'][img]['m'].split('image/')[-1]
ret[f'{post_id}+{img}'] = {
'created_utc': post_data['created_utc'],
'domain': post_data['domain'],
'filename': f'{img}.{ext}',
'media_url': image_url.format(filename=f'{img}.{ext}'),
'op': post_data['author'],
'post_id': post_id,
'post_url': f'https://reddit.com{post_data["permalink"]}',
'score': post_data['score'],
'subreddit': post_data['subreddit'],
'title': post_data['title'],
'type': 'image',
'extension': ext, # Its always jpg (Narrator: That wasnt the case, like here: https://www.reddit.com/r/blurrypicturesofcats/comments/10ane0m/blurry_picture_of_a_cat/)
}
return ret
# ------------------------------------------------------------ GENERATING CHARTS ------------------------------------------------------------
def upvote_chart(raw_data, style:Style):
# Initialises chart
chart = pygal.Bar(human_readable=True, style=style, show_legend=False)
chart.title = 'Distribution of upvotes per subreddit'
chart.value_formatter = lambda x: f'{x:,}'
data = {}
# Parses the raw data
for dic in raw_data:
sub = dic['subreddit']
score = dic['score']
if not sub in data.keys(): data[sub] = 0
data[sub] += score
# Adds the parsed data into the chart
data = dict(sorted(data.items(), key=lambda item: item[1], reverse=True))
for key in data.keys():
chart.add(key, data[key])
# data = dict(sorted(data.items(), key=lambda item: item[1], reverse=True))
# for n, key in enumerate(data.keys()):
# chart.add(n+1, data[key])
chart.render_to_file('docs/stats/charts/1-upvotes.svg')
return data
def extension_chart(raw_data, style:Style):
# Initialises chart
chart = pygal.Pie(human_readable=True, style=style)
chart.title = 'Distribution of extensions'
data = {}
# Parses the raw data
for dic in raw_data:
ext = dic['extension']
if not ext in data.keys(): data[ext] = 0
data[ext] += 1
# Adds the parsed data into the chart
for key in data.keys():
chart.add(key, data[key])
chart.render_to_file('docs/stats/charts/2-extensions.svg')
return data
def domain_chart(raw_data, style:Style):
# Initialises chart
chart = pygal.Pie(human_readable=True, style=style)
chart.title = 'Distribution of domains'
data = {}
# Parses the raw data
for dic in raw_data:
domain = dic['domain']
if not domain in data.keys(): data[domain] = 0
data[domain] += 1
# Adds the parsed data into the chart
for key in data.keys():
chart.add(key, data[key])
chart.render_to_file('docs/stats/charts/3-domains.svg')
return data