Skip to content

Commit

Permalink
2024 election import updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
dracos committed Jul 2, 2024
1 parent 4e9037b commit 717878e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 44 deletions.
22 changes: 0 additions & 22 deletions scripts/ynmp/fix-names

This file was deleted.

47 changes: 25 additions & 22 deletions scripts/ynmp/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
import unicodedata
import urllib.request

sys.stdout = codecs.getwriter('utf-8')(sys.stdout)

CSV_URL = 'https://candidates.democracyclub.org.uk/results/csv/parl.2019-12-12/'
CSV_URL = 'https://candidates.democracyclub.org.uk/data/export_csv/?election_id=parl.2024-07-04&extra_fields=elected&extra_fields=tied_vote_winner&extra_fields=results_source&extra_fields=mnis_id&extra_fields=twfy_id&format=csv'
JSON = os.path.join(os.path.dirname(__file__), '..', '..', 'members', 'people.json')


Expand All @@ -25,7 +23,7 @@ def main():

def update_from(csv_url, data):
changed = False
for ynmp_id, name, party, cons, person_id, elected in ynmp_csv_reader(csv_url):
for ynmp_id, name, party, cons, person_id, mnis_id in ynmp_csv_reader(csv_url):
# Add a new party if it's not one we know
if party not in data['orgs']:
data['orgs'][party] = slugify(party)
Expand All @@ -51,10 +49,13 @@ def update_from(csv_url, data):
data['max_person_id'] += 1
person_id = 'uk.org.publicwhip/person/%d' % data['max_person_id']
name['note'] = 'Main'
identifiers = [identifier]
if mnis_id:
identifiers.append({'scheme': 'datadotparl_id', 'identifier': mnis_id})
new_person = {
'id': person_id,
"other_names": [ name ],
'identifiers': [identifier],
'identifiers': identifiers,
'shortcuts': {
'current_party': party,
'current_constituency': data['posts_by_name'][cons]['area']['name'],
Expand All @@ -67,7 +68,7 @@ def update_from(csv_url, data):
new_mship = {
'on_behalf_of_id': data['orgs'][party],
'person_id': person_id,
'start_date': '2019-12-13',
'start_date': '2024-07-05',
'start_reason': 'general_election',
}
if cons in data['existing']:
Expand Down Expand Up @@ -111,8 +112,8 @@ def slugify(value):
Also strips leading and trailing whitespace.
"""
value = unicodedata.normalize('NFKD', str(value)).encode('ascii', 'ignore').decode('ascii')
value = re.sub('[^\w\s-]', '', value).strip().lower()
return re.sub('[-\s]+', '-', value)
value = re.sub(r'[^\w\s-]', '', value).strip().lower()
return re.sub(r'[-\s]+', '-', value)


def load_data():
Expand Down Expand Up @@ -149,7 +150,6 @@ def load_data():

PARTY_YNMP_TO_TWFY = {
'Labour Party': 'Labour',
'Conservative Party': 'Conservative',
'Conservative and Unionist Party': 'Conservative',
'Liberal Democrats': 'Liberal Democrat',
'Ulster Unionist Party': 'UUP',
Expand All @@ -158,37 +158,40 @@ def load_data():
'Plaid Cymru - The Party of Wales': 'Plaid Cymru',
"Labour and Co-operative Party": 'Labour/Co-operative',
'Democratic Unionist Party - D.U.P.': 'DUP',
'The Respect Party': 'Respect',
"SDLP (Social Democratic & Labour Party)": "Social Democratic and Labour Party",
"UK Independence Party (UKIP)": "UKIP",
"UK Independence Party (UK I P)": "UKIP",
"Alliance - Alliance Party of Northern Ireland": 'Alliance',
'Green Party': 'Green',
'Scottish Green Party': 'Green',
'Traditional Unionist Voice - TUV': 'Traditional Unionist Voice',
'The Brexit Party': 'Brexit',
# 'The Independent Group for Change'
'Alba Party': 'Alba',
'Workers Party of Britain': 'Workers Party',
}


def ynmp_csv_reader(fn):
if isinstance(fn, str):
fn = urllib.request.urlopen(fn)
fn = codecs.getreader("utf-8")(fn) # Stream in as Unicode
for row in csv.DictReader(fn):
assert row['election_slug'] == 'parl.2019-12-12'
name = row['person_name'].decode('utf-8').strip()
assert row['election_id'] == 'parl.2024-07-04'
name = row['person_name'].strip()
# TWFY has separate first/last name fields. This should catch most.
m = re.match('(.*?) ((?:ap |van |de |di |von |st |duncan |lloyd |\u00d3 )*[^ ]*(?: Jnr)?)$(?i)', name)
given, family = m.groups()
party = row['party_name'].decode('utf-8')
if m := re.match('(?i)(.*?) ((?:van |de |der |den |von |st |duncan |lloyd )*[^ ]*)$', name):
given, family = m.groups()
else:
gievn, family = '', name
party = row['party_name']
party = PARTY_YNMP_TO_TWFY.get(party, party)
m = re.match('parl\.(.*)\.2019-12-12', row['ballot_paper_id'])
m = re.match(r'parl\.(.*)\.2024-07-04', row['ballot_paper_id'])
cons = m.group(1)
m = re.search('(\d+)', row['theyworkforyou_url'])
m = re.search(r'(\d+)', row['twfy_id'])
person_id = 'uk.org.publicwhip/person/' + m.group(1) if m else None
ynmp_id = int(row['person_id'])
elected = True
yield ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, elected
#print(ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, row['mnis_id'])
if not row['elected']:
continue
yield ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, row['mnis_id']


def mship_has_changed(old, new):
Expand Down

0 comments on commit 717878e

Please sign in to comment.