-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwitter_hashtag_stats.py
45 lines (41 loc) · 2.04 KB
/
twitter_hashtag_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from __future__ import division
import sys
from collections import defaultdict
import json
def get_hashtags(tweet):
entities = tweet.get('entities', {})
hashtags = entities.get('hashtags', [])
return [tag['text'].lower() for tag in hashtags]
def usage():
print("Usage:")
print("python {} <filename.jsonl>".format(sys.argv[0]))
if __name__ == '__main__':
if len(sys.argv) != 2:
usage()
sys.exit(1)
fname = sys.argv[1]
with open(fname, 'r') as f:
hashtag_count = defaultdict(int)
for line in f:
tweet = json.loads(line)
hashtags_in_tweet = get_hashtags(tweet)
n_of_hashtags = len(hashtags_in_tweet)
hashtag_count[n_of_hashtags] += 1
tweets_with_hashtags = sum([count for n_of_tags, count in hashtag_count.items() if n_of_tags > 0])
tweets_no_hashtags = hashtag_count[0]
tweets_total = tweets_no_hashtags + tweets_with_hashtags
tweets_with_hashtags_percent = "%.2f" % (tweets_with_hashtags / tweets_total * 100)
tweets_no_hashtags_percent = "%.2f" % (tweets_no_hashtags / tweets_total * 100)
print ("=="*30)
print ("Hashtag stats")
print ("=="*30, "\n")
print("{} tweets without hashtags ({}%)".format(tweets_no_hashtags, tweets_no_hashtags_percent))
print("{} tweets with at least one hashtag ({}%)".format(tweets_with_hashtags, tweets_with_hashtags_percent))
for tag_count, tweet_count in hashtag_count.items():
if tag_count > 0:
percent_total = "%.2f" % (tweet_count / tweets_total * 100)
percent_elite = "%.2f" % (tweet_count / tweets_with_hashtags * 100)
print("{} tweets with {} hashtags ({}% total, {}% elite)".format(tweet_count,
tag_count,
percent_total,
percent_elite))