forked from rubenIzquierdo/opinion_miner_deluxePP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mpqa_lexicon.py
executable file
·96 lines (72 loc) · 2.99 KB
/
mpqa_lexicon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
import os
import re
##from __init__ import PATH_MPQA_LEXICON
PATH_MPQA_LEXICON = './data/subjclueslen1-HLTEMNLP05.tff'
def normalize_pos(pos):
pos = pos.lower()
new_pos = pos
if pos in ['adj','a'] or pos[0:2]=='jj':
new_pos = 'a'
elif pos in ['adverb','r'] or pos[0:2]=='rb':
new_pos = 'r'
elif pos in ['anypos']:
new_pos = '*'
elif pos in ['noun','n'] or pos[0:2]=='nn' or pos[0:2]=='np':
new_pos = 'n'
elif pos in ['verb','v'] or pos[0]=='v':
new_pos = 'v'
return new_pos
class MPQA_subjectivity_lexicon:
def __init__(self):
self.stemmed = {}
self.stemmed_anypos = {}
self.no_stemmed = {}
self.no_stemmed_anypos = {}
self.__load()
def __load(self):
# Format of lines:
# type=weaksubj len=1 word1=abandoned pos1=adj stemmed1=n priorpolarity=negative
if os.path.exists(PATH_MPQA_LEXICON):
fic = open(PATH_MPQA_LEXICON)
for line in fic:
line=line.strip()+' '
this_type = re.findall('type=([^ ]+)', line)[0]
word = re.findall('word1=([^ ]+)', line)[0]
pos = re.findall('pos1=([^ ]+)', line)[0]
stemmed = re.findall('stemmed1=([^ ]+)', line)[0]
prior_polarity = re.findall('priorpolarity=([^ ]+)', line)[0]
pos = normalize_pos(pos)
if stemmed == 'y':
self.stemmed[(word,pos)] = (this_type,prior_polarity)
if True or pos == '*': #anypos
self.stemmed_anypos[word] = (this_type,prior_polarity)
elif stemmed == 'n':
self.no_stemmed[(word,pos)] = (this_type,prior_polarity)
if True or pos == '*':
self.no_stemmed_anypos[word] = (this_type,prior_polarity)
fic.close()
def print_all(self):
for (word,pos), (this_type, this_polarity) in self.stemmed.items():
if this_polarity in ['positive','negative','neutral']:
print '%s;%s;%s' % (word,pos,this_polarity.upper())
def get_type_and_polarity(self,word,pos=None):
res = None
if pos is not None:
pos = normalize_pos(pos)
# Try no stemmed with the given pos
res = self.no_stemmed.get((word,pos))
# Try stemmed with the given pos
if res is None:
res = self.stemmed.get((word,pos))
# Try no stemmed with any pos
if res is None:
res = self.no_stemmed_anypos.get(word)
# Try stemm with any pos
if res is None:
res = self.stemmed_anypos.get(word)
return res
if __name__ == '__main__':
o = MPQA_subjectivity_lexicon()
o.print_all()
#print o.get_type_and_polarity('abidance','adj')