-
Notifications
You must be signed in to change notification settings - Fork 0
/
calculate_liwc.py
144 lines (128 loc) · 3.63 KB
/
calculate_liwc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import csv
import sys
# $ python calculate_liwc.py score range
# 4 combinations:
# $ python calculate_liwc.py minmax minmax
# $ python calculate_liwc.py minmax 100
# $ python calculate_liwc.py subtraction minmax
# $ python calculate_liwc.py subtraction 100
LIWC_SCORE_OPT = sys.argv[1]
LIWC_RANGE_OPT = sys.argv[2]
assert(LIWC_SCORE_OPT == 'minmax' or LIWC_SCORE_OPT == 'subtraction')
assert(LIWC_RANGE_OPT == 'minmax' or LIWC_RANGE_OPT == '100')
print sys.argv
# Normalization
def scale(a, b, c, d): # [a, b] -> [c, d]
def y(x):
return (float(d - c) / (b - a)) * (x - a) + c
return y
def normalize(a, b): # [a, b] -> [-1, +1]
return scale(a, b, -1, 1)
# Auxiliar
def calculate_score(pos, neg, method):
if method == 'subtraction':
result = pos - neg
else: # method == 'minmax'
if pos == neg:
result = 0
else:
result = max(pos, neg)
if result == neg:
result = -result
return result
# Convert sc_output.txt into sc_output.csv
output_csv = open('sc_output.csv', 'w')
output_csv.close()
output_csv = open('sc_output.csv', 'a')
csv_writer = csv.writer(
output_csv,
delimiter=',',
quotechar='"',
quoting=csv.QUOTE_MINIMAL
)
output_txt = open('sc_output.txt')
for line in output_txt:
row = line.strip().split('\t')
csv_writer.writerow(row)
output_txt.close()
output_csv.close()
# Calculate MINs and MAXs
outs = []
output = open('sc_output.csv')
out_reader = csv.reader(
output,
delimiter=',',
quotechar='"'
)
out_reader.next()
iters = 1
for out_row in out_reader:
posemo = float(out_row[5])
negemo = float(out_row[6])
liwc = calculate_score(posemo, negemo, LIWC_SCORE_OPT)
outs.append(liwc)
iters += 1
output.close()
print min(outs)
print max(outs)
if LIWC_RANGE_OPT == '100':
MIN_LIWC = -100
MAX_LIWC = 100
else: # LIWC_RANGE_OPT == 'minmax'
max_abs = max(abs(min(outs)), abs(max(outs)))
MIN_LIWC = -max_abs
MAX_LIWC = max_abs
MIN_SCORE = -2
MAX_SCORE = 2
# Join files
scores = open('scores.csv')
scr_reader = csv.reader(
scores,
delimiter=',',
quotechar='"'
)
output = open('sc_output.csv')
out_reader = csv.reader(
output,
delimiter=',',
quotechar='"'
)
# $ python calculate_liwc.py score range
# 4 combinations:
# $ python calculate_liwc.py minmax minmax
# $ python calculate_liwc.py minmax 100
# $ python calculate_liwc.py subtraction minmax
# $ python calculate_liwc.py subtraction 100
if LIWC_SCORE_OPT == 'minmax' and LIWC_RANGE_OPT == 'minmax':
RESULT_FILE = 'results_minmax_minmax.csv'
elif LIWC_SCORE_OPT == 'minmax' and LIWC_RANGE_OPT == '100':
RESULT_FILE = 'results_minmax_100.csv'
elif LIWC_SCORE_OPT == 'subtraction' and LIWC_RANGE_OPT == 'minmax':
RESULT_FILE = 'results_subtraction_minmax.csv'
else: # LIWC_SCORE_OPT == 'subtraction' or LIWC_RANGE_OPT == '100':
RESULT_FILE = 'results_subtraction_100.csv'
result = open(RESULT_FILE, 'w')
res_writer = csv.writer(
result,
delimiter=',',
quotechar='"',
quoting=csv.QUOTE_MINIMAL
)
res_writer.writerow(['#s', 'sentence', 'score', 'liwc', 'norm_score', 'norm_liwc'])
scr_reader.next()
out_reader.next()
iters = 1
for scr_row, out_row in zip(scr_reader, out_reader):
ns = out_row[2]
sentence = scr_row[0]
score = float(scr_row[1])
posemo = float(out_row[5])
negemo = float(out_row[6])
liwc = calculate_score(posemo, negemo, LIWC_SCORE_OPT)
norm_score = normalize(MIN_SCORE, MAX_SCORE)(score)
norm_liwc = normalize(MIN_LIWC, MAX_LIWC)(liwc)
res_writer.writerow([ns, sentence, score, liwc, norm_score, norm_liwc])
iters += 1
scores.close()
output.close()
result.close()