-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwe.py
134 lines (105 loc) · 4.16 KB
/
twe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/python
import os
import sys
import json
import time
import math
import subprocess as sp
from tweepy import Cursor
from twitter_client import get_twitter_client
from colorama import init
from colorama import Fore, Back, Style
init(autoreset=True)
sp.call('clear',shell=True)
MAX_FRIENDS = 15000
def GetCluster(username):
from collections import defaultdict
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
filename = "users/"+ username + "/followers.jsonl"
k = 5
with open(filename) as f:
# load data
users = []
for line in f:
profile = json.loads(line)
users.append(profile['description'])
# create vectorizer
vectorizer = TfidfVectorizer(max_df=0.8,
min_df=2,
max_features=None,
stop_words='english',
ngram_range=(1, 3),
use_idf=True)
# fit data
X = vectorizer.fit_transform(users)
print("Data dimensions: {}".format(X.shape))
# perform clustering
km = KMeans(n_clusters=k)
km.fit(X)
clusters = defaultdict(list)
for i, label in enumerate(km.labels_):
clusters[label].append(users[i])
# print 10 user description for this cluster
for label, descriptions in clusters.items():
print ("\n" + Fore.BLUE + "-"*80)
print( Fore.BLUE + 'Follower Cluster {}'.format(label +1))
print (Fore.BLUE + "-"*80)
for desc in descriptions[:50]:
print(desc)
def EagleAUser(username):
print (Fore.GREEN + "\nRunning Module 1 : Get User Timeline")
sp.call('python twitter_get_user_timeline.py ' + username,shell=True)
print (Fore.GREEN + "\nRunning Module 2 : Hashtag Frequency")
time.sleep(2)
sp.call('python twitter_hashtag_frequency.py users/'+ username +
'/user_timeline_'+ username +'.jsonl', shell=True)
print (Fore.GREEN + "\nRunning Module 3 : Hashtag Stats")
time.sleep(2)
sp.call('python twitter_hashtag_stats.py users/'+ username +
'/user_timeline_'+ username +'.jsonl', shell=True)
print (Fore.GREEN + "\nRunning Module 4 : Mention Frequency")
time.sleep(2)
sp.call('python twitter_mention_frequency.py users/'+ username +
'/user_timeline_'+ username +'.jsonl', shell=True)
print (Fore.GREEN + "\nRunning Module 5 : Term Frequency")
time.sleep(2)
sp.call('python twitter_term_frequency.py users/'+ username +
'/user_timeline_'+ username +'.jsonl', shell=True)
print (Fore.GREEN + "\nRunning Module 6 : Exporting Term Frequency Graph")
time.sleep(2)
sp.call('python twitter_term_frequency_graph.py users/'+ username +
'/user_timeline_'+ username +'.jsonl', shell=True)
print (Fore.GREEN + "\nDone...")
print (Fore.GREEN + "\nRunning Module 7 : Dowloading User Data ...")
time.sleep(2)
sp.call('python twitter_get_user.py '+ username, shell=True)
print( Fore.GREEN + "\nRunning Module 7 : Getting User Follower Data")
time.sleep(2)
sp.call('python twitter_followers_stats.py '+ username, shell=True)
print (Fore.GREEN + "\nRunning Module 8 : Minning the followers' data")
GetCluster(username)
def EagleStream(tags):
print (Fore.GREEN + "\nRunning Module 1 : Streaming Twitter Data for event - {}".format(tags))
sp.call('python twitter_streaming.py '+ tags, shell=True)
from pyfiglet import Figlet
f = Figlet(font='nancyj')
print ("\n" + Fore.MAGENTA + f.renderText('Tweagle'))
print (Fore.MAGENTA + "~ by Mohammad Shahebaz")
print ("-"*80)
task_list = ("Analyze a twitter user/page", "Stream an event")
for i,c in enumerate(task_list):
print ("{}.{}".format(i+1,c))
choice = int(input("Enter an option:"))
if choice == 1:
screen_name = input("Enter twitter username: ")
EagleAUser(screen_name)
elif choice == 2:
tags = input("Enter event Hashtag. Seperate by spaces: ")
tag_list = []
for i in tags.split(' '):
tag_list.append("\\" + "#" + i)
tags_proccssed = " ".join(tag_list)
EagleStream(tags_proccssed)
else:
ValueError("Invalid Input")