-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathutils.py
52 lines (40 loc) · 1.75 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import xml.etree.ElementTree as ET
def semeval2014term_to_aspectsentiment_hr(filename, remove_conflicting=True):
sentimap = {
'positive': 'POS',
'negative': 'NEG',
'neutral': 'NEU',
'conflict': 'CONF',
}
def transform_aspect_term_name(se):
return se
with open(filename) as file:
sentence_elements = ET.parse(file).getroot().iter('sentence')
sentences = []
aspect_term_sentiments = []
classes = set([])
for j, s in enumerate(sentence_elements):
# review_text = ' '.join([el.text for el in review_element.iter('text')])
sentence_text = s.find('text').text
aspect_term_sentiment = []
for o in s.iter('aspectTerm'):
aspect_term = transform_aspect_term_name(o.get('term'))
classes.add(aspect_term)
sentiment = sentimap[o.get('polarity')]
if sentiment != 'CONF':
aspect_term_sentiment.append((aspect_term, sentiment))
else:
if remove_conflicting:
pass
# print('Conflicting Term found! Removed!')
else:
aspect_term_sentiment.append((aspect_term, sentiment))
if len(aspect_term_sentiment) > 0:
aspect_term_sentiments.append(aspect_term_sentiment)
sentences.append(sentence_text)
cats = list(classes)
cats.sort()
idx2aspectlabel = {k: v for k, v in enumerate(cats)}
sentilabel2idx = {"NEG": 1, "NEU": 2, "POS": 3, "CONF": 4}
idx2sentilabel = {k: v for v, k in sentilabel2idx.items()}
return sentences, aspect_term_sentiments, (idx2aspectlabel, idx2sentilabel)