-
Notifications
You must be signed in to change notification settings - Fork 0
/
term_sentiment.py
131 lines (97 loc) · 3.92 KB
/
term_sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import sys
import json
def computeSentiment(afinnfile, tweetfile):
scores = {} # initialize an empty dictionary
for line in afinnfile:
term, score = line.split("\t") # The file is tab-delimited. "\t" means "tab character"
scores[term] = int(score) # Convert the score to an integer.
#print scores.items() # Print every (term, score) pair in the dictionary
#tweetfile = open("output.txt")
#NumberofLines = (len)(tf.readlines())
#print NumberofLines
#Iterate through each line in the tweet file
for i in tweetfile:
#print 'starting New Line'
#print 'checking Sentiment File against this line: i is the line number'
tweetJSON = json.loads(i) # tweeetJSON is the whole tweet
# Initialize Score to 0
score = 0
# learned = 0 # For Debugging - Keeps track of number of new terms learned
newterms = {} # initialize an empty dictionary for unknown terms
# Iterate through all the keys in a given line, we only care about the "text' key
for key, val in tweetJSON.items():
#print key, 'corresponds to', val
#print key #word key is each key in
#mystr = tweetJSON.get(key)
#print key, ' corresponds to', tweetJSON[key]
#encoded_string = mystr.encode('utf-8')
# Check to make sure there it's the text key, otherwise its not the tweet
if key == 'text':
#print 'Found a Text key'
# Split up the tweet sentence into words
for valword in val.split(" "):
# removes spaces and newlines and other things causing formatting errors
valword = valword.rstrip()
valword = valword.strip()
valword = valword.lstrip()
# print 'term', valword.encode('utf-8')
if len(valword) != 0:
# print 'word not empty...proceeding'
if scores.has_key(valword):
score = score + scores.get(valword)
else:
#print 'Did not find Term in dictionary', valword.encode('utf-8')
# d['mynewkey'] = 'mynewvalue'
#print 'term', valword.encode('utf-8'), 'is', len(valword)
newterms[valword] = 0 # Add new term to the dictionary with no value yet
# learned = learned + 1
# else:
# print 'Looks like we found an empty word'
# Print the score of the Tweet (line)
#print score
#print 'learned ', learned, ' new terms in this tweet'
# If score is high, give stored terms a high score too...same for low
#print 'printing all new terms'
if score > 3:
for term in newterms:
newterms[term] = 3
print term.encode('utf-8'), newterms.get(term)
if score == 3:
for term in newterms:
newterms[term] = 3
print term.encode('utf-8'), newterms.get(term)
if score == 2:
for term in newterms:
newterms[term] = 2
print term.encode('utf-8'), newterms.get(term)
if score == 1:
for term in newterms:
newterms[term] = 1
print term.encode('utf-8'), newterms.get(term)
if score == 0:
for term in newterms:
newterms[term] = 0
print term.encode('utf-8'), newterms.get(term)
if score == -1:
for term in newterms:
newterms[term] = -1
print term.encode('utf-8'), newterms.get(term)
if score == -2:
for term in newterms:
newterms[term] = -2
print term.encode('utf-8'), newterms.get(term)
if score == -3:
for term in newterms:
newterms[term] = -3
print term.encode('utf-8'), newterms.get(term)
if score < -3:
for term in newterms:
newterms[term] = -3
#print 'wow this must have been a BAD tweet', 'score=', score
print term.encode('utf-8'), newterms.get(term)
def main():
sent_file = open(sys.argv[1])
tweet_file = open(sys.argv[2])
computeSentiment(sent_file, tweet_file)
if __name__ == '__main__':
main()