Skip to content

Commit

Permalink
2024 election import updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
dracos committed Jul 2, 2024
1 parent 4e9037b commit b57a373
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 65 deletions.
22 changes: 0 additions & 22 deletions scripts/ynmp/fix-names

This file was deleted.

45 changes: 23 additions & 22 deletions scripts/ynmp/tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,27 @@ First, let's get the functions and load in the existing people.json data:

And we want a function to alter the fixture with different information:

>>> import StringIO, csv
>>> source = ['election_slug,ballot_paper_id,gss,person_id,person_name,party_id,party_name,theyworkforyou_url,wikidata_id']
>>> import csv
>>> from io import StringIO
>>> source = ['person_id,person_name,election_id,ballot_paper_id,election_date,election_current,party_name,party_id,post_label,cancelled_poll,seats_contested,elected,tied_vote_winner,results_source,mnis_id,twfy_id']
>>> cols = source[0].split(',')
>>> def alter(cmd, cons_id, dcid=None, name=None, party=None, pid=None):
... global source
... reader = csv.DictReader(source)
... s = StringIO.StringIO()
... s = StringIO()
... fn = csv.DictWriter(s, cols)
... fn.writeheader()
... for line in reader:
... if cmd == 'add' or line['ballot_paper_id'] != cons_id:
... fn.writerow(line)
... if cmd == 'add':
... fn.writerow({'election_slug':'parl.2019-12-12', 'ballot_paper_id': cons_id, 'person_id': dcid, 'person_name': name, 'party_name': party, 'theyworkforyou_url': pid})
... fn.writerow({'election_id':'parl.2024-07-04', 'ballot_paper_id': cons_id, 'person_id': dcid, 'person_name': name, 'party_name': party, 'twfy_id': pid, 'elected': 't'})
... s.seek(0)
... source = s.readlines()

>>> def process():
... data['dealt_with'] = []
... s = StringIO.StringIO(''.join(source))
... s = StringIO(''.join(source))
... return update_from(s, data)

Now we want to call it with the CSV file before anything has happened:
Expand All @@ -40,54 +41,54 @@ Now we want to call it with the CSV file before anything has happened:

Diane Abbott wins:

>>> alter('add', 'parl.hackney-north-and-stoke-newington.2019-12-12', 2567, 'Diane Abbott', 'Labour', '10001')
>>> alter('add', 'parl.hackney-north-and-stoke-newington.2024-07-04', 2567, 'Diane Abbott', 'Labour', '10001')
>>> process()
NEW result 42077, Diane Abbott, Labour, hackney-north-and-stoke-newington, uk.org.publicwhip/person/10001
NEW result 42782, Diane Abbott, Labour, hackney-north-and-stoke-newington, uk.org.publicwhip/person/10001
True

Boris Johnson loses:

>>> alter('add', 'parl.uxbridge-and-south-ruislip.2019-12-12', 72363, 'Count Binface', 'Independent')
>>> alter('add', 'parl.uxbridge-and-south-ruislip.2024-07-04', 72363, 'Count Binface', 'Independent')
>>> process()
NEW result 42078, Count Binface, Independent, uxbridge-and-south-ruislip, uk.org.publicwhip/person/25791
NEW result 42783, Count Binface, Independent, uxbridge-and-south-ruislip, uk.org.publicwhip/person/26307
True

Actually, that was a mistake at the YNMP end, revert until we know the result:

>>> alter('remove', 'parl.uxbridge-and-south-ruislip.2019-12-12')
>>> alter('remove', 'parl.uxbridge-and-south-ruislip.2024-07-04')
>>> process()
Removing result from uk.org.publicwhip/member/42078 (was uk.org.publicwhip/cons/2241, independent, uk.org.publicwhip/person/25791)
Removing result from uk.org.publicwhip/member/42783 (was uk.org.publicwhip/cons/2847, independent, uk.org.publicwhip/person/26307)
True

We then think someone *else* wins:

>>> alter('add', 'parl.uxbridge-and-south-ruislip.2019-12-12', 72364, 'Lord Buckethead', 'Monster Raving Loony Party')
>>> alter('add', 'parl.uxbridge-and-south-ruislip.2024-07-04', 72364, 'Lord Buckethead', 'Monster Raving Loony Party')
>>> process()
Updating uk.org.publicwhip/member/42078 with Lord Buckethead, Monster Raving Loony Party, uxbridge-and-south-ruislip, uk.org.publicwhip/person/25792
Updating uk.org.publicwhip/member/42783 with Lord Buckethead, Monster Raving Loony Party, uxbridge-and-south-ruislip, uk.org.publicwhip/person/26308
True

It turns out Boris Johnson actually won, we get that change at once:

>>> alter('remove', 'parl.uxbridge-and-south-ruislip.2019-12-12')
>>> alter('add', 'parl.uxbridge-and-south-ruislip.2019-12-12', 4546, 'Boris Johnson', 'Conservative and Unionist Party', 10999)
>>> alter('remove', 'parl.uxbridge-and-south-ruislip.2024-07-04')
>>> alter('add', 'parl.uxbridge-and-south-ruislip.2024-07-04', 4546, 'Boris Johnson', 'Conservative and Unionist Party', 10999)
>>> process()
Updating uk.org.publicwhip/member/42078 with Boris Johnson, Conservative, uxbridge-and-south-ruislip, uk.org.publicwhip/person/10999
Updating uk.org.publicwhip/member/42783 with Boris Johnson, Conservative, uxbridge-and-south-ruislip, uk.org.publicwhip/person/10999
True

Two David Jones both win, one with a person ID, one without:

>>> alter('add', 'parl.clwyd-west.2019-12-12', 3151, 'David Jones', 'Conservative and Unionist Party', 11506)
>>> alter('add', 'parl.blackley-and-broughton.2019-12-12', 5750, 'David Jones', 'Green Party')
>>> alter('add', 'parl.clwyd-north.2024-07-04', 3151, 'David Jones', 'Conservative and Unionist Party', 11506)
>>> alter('add', 'parl.blackley-and-middleton-south.2024-07-04', 5750, 'David Jones', 'Green Party')
>>> process()
NEW result 42079, David Jones, Conservative, clwyd-west, uk.org.publicwhip/person/11506
NEW result 42080, David Jones, Green, blackley-and-broughton, uk.org.publicwhip/person/25793
NEW result 42784, David Jones, Conservative, clwyd-north, uk.org.publicwhip/person/11506
NEW result 42785, David Jones, Green, blackley-and-middleton-south, uk.org.publicwhip/person/26309
True

Tessa Jane Munt wins, and YNMP have not supplied her parlparse ID (she was an MP until 2015):

>>> alter('add', 'parl.wells.2019-12-12', 1699, 'Tessa Munt', 'Liberal Democrats')
>>> alter('add', 'parl.wells-and-mendip-hills.2024-07-04', 1699, 'Tessa Munt', 'Liberal Democrats')
>>> process()
NEW result 42081, Tessa Munt, Liberal Democrat, wells, uk.org.publicwhip/person/25794
NEW result 42786, Tessa Munt, Liberal Democrat, wells-and-mendip-hills, uk.org.publicwhip/person/26310
True

If we then just run again with no changes, nothing should happen:
Expand Down
44 changes: 23 additions & 21 deletions scripts/ynmp/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
import unicodedata
import urllib.request

sys.stdout = codecs.getwriter('utf-8')(sys.stdout)

CSV_URL = 'https://candidates.democracyclub.org.uk/results/csv/parl.2019-12-12/'
DATE = '2024-07-04'
CSV_URL = f'https://candidates.democracyclub.org.uk/data/export_csv/?election_id=parl.{DATE}&extra_fields=elected&extra_fields=tied_vote_winner&extra_fields=results_source&extra_fields=mnis_id&extra_fields=twfy_id&format=csv'
JSON = os.path.join(os.path.dirname(__file__), '..', '..', 'members', 'people.json')


Expand All @@ -25,7 +24,7 @@ def main():

def update_from(csv_url, data):
changed = False
for ynmp_id, name, party, cons, person_id, elected in ynmp_csv_reader(csv_url):
for ynmp_id, name, party, cons, person_id, mnis_id in ynmp_csv_reader(csv_url):
# Add a new party if it's not one we know
if party not in data['orgs']:
data['orgs'][party] = slugify(party)
Expand All @@ -51,10 +50,13 @@ def update_from(csv_url, data):
data['max_person_id'] += 1
person_id = 'uk.org.publicwhip/person/%d' % data['max_person_id']
name['note'] = 'Main'
identifiers = [identifier]
if mnis_id:
identifiers.append({'scheme': 'datadotparl_id', 'identifier': mnis_id})
new_person = {
'id': person_id,
"other_names": [ name ],
'identifiers': [identifier],
'identifiers': identifiers,
'shortcuts': {
'current_party': party,
'current_constituency': data['posts_by_name'][cons]['area']['name'],
Expand All @@ -67,7 +69,7 @@ def update_from(csv_url, data):
new_mship = {
'on_behalf_of_id': data['orgs'][party],
'person_id': person_id,
'start_date': '2019-12-13',
'start_date': '2024-07-05',
'start_reason': 'general_election',
}
if cons in data['existing']:
Expand Down Expand Up @@ -111,8 +113,8 @@ def slugify(value):
Also strips leading and trailing whitespace.
"""
value = unicodedata.normalize('NFKD', str(value)).encode('ascii', 'ignore').decode('ascii')
value = re.sub('[^\w\s-]', '', value).strip().lower()
return re.sub('[-\s]+', '-', value)
value = re.sub(r'[^\w\s-]', '', value).strip().lower()
return re.sub(r'[-\s]+', '-', value)


def load_data():
Expand Down Expand Up @@ -149,7 +151,6 @@ def load_data():

PARTY_YNMP_TO_TWFY = {
'Labour Party': 'Labour',
'Conservative Party': 'Conservative',
'Conservative and Unionist Party': 'Conservative',
'Liberal Democrats': 'Liberal Democrat',
'Ulster Unionist Party': 'UUP',
Expand All @@ -158,37 +159,38 @@ def load_data():
'Plaid Cymru - The Party of Wales': 'Plaid Cymru',
"Labour and Co-operative Party": 'Labour/Co-operative',
'Democratic Unionist Party - D.U.P.': 'DUP',
'The Respect Party': 'Respect',
"SDLP (Social Democratic & Labour Party)": "Social Democratic and Labour Party",
"UK Independence Party (UKIP)": "UKIP",
"UK Independence Party (UK I P)": "UKIP",
"Alliance - Alliance Party of Northern Ireland": 'Alliance',
'Green Party': 'Green',
'Scottish Green Party': 'Green',
'Traditional Unionist Voice - TUV': 'Traditional Unionist Voice',
'The Brexit Party': 'Brexit',
# 'The Independent Group for Change'
'Alba Party': 'Alba',
'Workers Party of Britain': 'Workers Party',
}


def ynmp_csv_reader(fn):
if isinstance(fn, str):
fn = urllib.request.urlopen(fn)
fn = codecs.getreader("utf-8")(fn) # Stream in as Unicode
for row in csv.DictReader(fn):
assert row['election_slug'] == 'parl.2019-12-12'
name = row['person_name'].decode('utf-8').strip()
assert row['election_id'] == f'parl.{DATE}'
name = row['person_name'].strip()
# TWFY has separate first/last name fields. This should catch most.
m = re.match('(.*?) ((?:ap |van |de |di |von |st |duncan |lloyd |\u00d3 )*[^ ]*(?: Jnr)?)$(?i)', name)
m = re.match('(?i)(.*?) (?:.*? )*?((?:van |de |der |den |von |st |duncan |lloyd )*[^ ]*)$|^[^ ]*$', name)
given, family = m.groups()
party = row['party_name'].decode('utf-8')
party = row['party_name']
party = PARTY_YNMP_TO_TWFY.get(party, party)
m = re.match('parl\.(.*)\.2019-12-12', row['ballot_paper_id'])
m = re.match(rf'parl\.(.*)\.{DATE}', row['ballot_paper_id'])
cons = m.group(1)
m = re.search('(\d+)', row['theyworkforyou_url'])
m = re.search(r'(\d+)', row['twfy_id'])
person_id = 'uk.org.publicwhip/person/' + m.group(1) if m else None
ynmp_id = int(row['person_id'])
elected = True
yield ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, elected
#print(ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, row['mnis_id'])
if not row['elected'] or row['elected'] in ('f', 'false', 'False', 0, '0', 'n', 'N', 'No', 'no'):
continue
yield ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, row['mnis_id']


def mship_has_changed(old, new):
Expand Down

0 comments on commit b57a373

Please sign in to comment.