-
Notifications
You must be signed in to change notification settings - Fork 0
/
twitter_time_series.py
46 lines (37 loc) · 1.33 KB
/
twitter_time_series.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import sys
import json
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import pickle
if __name__ == '__main__':
fname = sys.argv[1]
with open(fname, 'r') as f:
all_dates = []
for line in f:
tweet = json.loads(line)
all_dates.append(tweet.get('created_at'))
ones = np.ones(len(all_dates))
idx = pd.DatetimeIndex(all_dates)
# the actual series (at series of 1s for the moment)
my_series = pd.Series(ones, index=idx)
# Resampling / bucketing into 1-minute buckets
per_minute = my_series.resample('1Min').sum().fillna(0)
print(my_series.head())
print(per_minute.head())
fig, ax = plt.subplots()
ax.grid(True)
ax.set_title("Tweet Frequencies")
hours = mdates.MinuteLocator(interval=20)
date_formatter = mdates.DateFormatter('%H:%M')
datemin = datetime(2016, 10, 31, 15, 0)
datemax = datetime(2017, 9, 20, 18, 0)
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(date_formatter)
# ax.set_xlim(datemin, datemax)
max_freq = per_minute.max()
ax.set_ylim(0, max_freq)
ax.plot(per_minute.index, per_minute)
plt.savefig('tweet_time_series.png')