-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmaplight_convert2.py
155 lines (126 loc) · 5.13 KB
/
maplight_convert2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import re
import json
import encode
from pprint import pprint
import legislators_current as leg
legs= leg.load()
fields = [ 'district_holding', 'district_running',
'first_name', 'full_name', 'gender', 'ico', 'last_name', 'office_holding',
'office_running', 'party', 'person_id', 'state', 'status', 'status_date', 'url_photo' ]
def parse():
index= {
};
with open("maplight-convert/all_split.json") as infile:
for line in infile:
if 'person_id' in line:
if(line[-1] == ']'):
data = json.loads(line[:-1])
else:
data = json.loads(line[:-2])
full_name=data['full_name']
sutf8 = full_name.encode('UTF-8')
office = data['office_holding']
state = data['state']
dist = data['district_holding']
party = data['party']
if office not in index:
index[office] = {}
if state not in index[office]:
index[office][state] = {}
if dist not in index[office][state]:
index[office][state][dist] = {}
if party not in index[office][state][dist]:
index[office][state][dist][party] = full_name
return index
data =parse()
import pprint
pp = pprint.PrettyPrinter(indent=4)
#pp.pprint( data)
def person_match(full_name) :
print "match",full_name
pass
def check_suffix(old_name,last_obj,nameobj):
if 'suffix' in nameobj :
full_name = old_name + " " + nameobj['suffix'].lower()
if ( full_name == last_obj) :
#print "match5!",full_name
person_match(full_name)
return True
else:
return False
def check_middle_initial (last_obj,nameobj):
full_name =encode.decodeuc(nameobj['first'].lower() + " "+ nameobj['middle'][0].lower() + ". "+ nameobj['last'].lower())
if (full_name == last_obj) :
# print "match!",full_name
person_match(full_name)
return True
else:
return check_suffix(full_name,last_obj,nameobj)
def check_nick (last_obj,nameobj):
full_name =encode.decodeuc(nameobj['nick'].lower() + " "+ nameobj['last'].lower())
if (full_name == last_obj) :
# print "match6!",full_name
# return True
person_match(full_name)
return True
else:
return check_suffix(full_name,last_obj,nameobj)
def check_middle (last_obj,nameobj):
full_name =encode.decodeuc(nameobj['first'].lower() + " "+ nameobj['middle'].lower() + " "+ nameobj['last'].lower())
if (full_name == last_obj) :
# print "match3!",full_name
person_match(full_name)
return True
else :
if (not check_middle_initial(last_obj,nameobj)) :
return check_suffix(full_name,last_obj,nameobj)
## todo
# 1. prefix (dr.)
# middle names without .
# alt names (double last names, try both)
# remove " from name
# last, first
def check_simple(last_obj,nameobj):
full_name =encode.decodeuc(nameobj['official_full'].lower())
full_name2 =encode.decodeuc(nameobj['first'].lower() + " "+ nameobj['last'].lower())
if (full_name == last_obj) :
person_match(full_name)
return True
elif (full_name2 == last_obj) :
person_match(full_name)
return True
elif 'nick' in nameobj :
return check_nick(last_obj,nameobj)
elif 'middle' in nameobj :
return check_middle(last_obj,nameobj)
else:
# print "last",last_obj,"name",nameobj,"term",last_term
return False
def scan_district (state_obj,nameobj):
for district in state_obj:
district_obj = state_obj[ district ]
# party = last_term['party']
# if party == 'Democrat' :
# party = 'Democratic'
for party in district_obj :
last_obj = district_obj[ party ].lower()
if check_simple(last_obj,nameobj):
return True
# else:
# print "missing ", party, "in district" , nameobj
# pp.pprint( district_obj)
for x in sorted(legs['wp'].keys()):
last_term = legs['wp'][x]['terms'][-1]
nameobj= legs['wp'][x]['name']
if (last_term['type'] == 'rep'):
chamber= data['House']
else:
chamber= data['Senate']
state=last_term['state']
if (state in chamber ) :
state_obj = chamber [ state ]
if 'district' in last_term :
# print state
# for district in [str(last_term['district']),str(last_term['district'] +1),str(last_term['district'] -1)] :
scan_district(state_obj,nameobj)
#OrderedDict([('type', 'rep'), ('start', '2013-01-03'), ('end', '2015-01-03'), ('state', 'CA'), ('party', 'Democrat'), ('district', 19), ('url', 'http://www.house.gov/lofgren'), ('address', '1401 Longworth HOB; Washington DC 20515-0516'), ('phone', '202-225-3072'), ('fax', '202-225-3336'), ('contact_form', 'http://lofgren.house.gov/emailform.shtml'), ('office', '1401 Longworth House Office Building'), ('rss_url', 'http://lofgren.house.gov/index.php?format=feed&type=rss')])