diff --git a/scripts/ynmp/fix-names b/scripts/ynmp/fix-names deleted file mode 100644 index 288480dd..00000000 --- a/scripts/ynmp/fix-names +++ /dev/null @@ -1,22 +0,0 @@ - -import json - -data = json.load(open('../../members/people.json')) -people = {} -for p in data['persons']: - people[p['id']] = p - if 'other_names' in p and isinstance(p['other_names'], dict): - p['other_names'] = [ p['other_names'] ] - -for m in data['memberships']: - if 'redirect' in m: continue - person = people[m['person_id']] - if 'name' in m: - if 'other_names' in person: - pass # print m['name'], person['other_names'] - else: - m['name']['note'] = 'Main' - person['other_names'] = [ m['name'] ] - del m['name'] - -json.dump(data, open('../../members/people.json', 'w'), indent=2, sort_keys=True) diff --git a/scripts/ynmp/tests.txt b/scripts/ynmp/tests.txt index aea92e9e..d1bca66a 100644 --- a/scripts/ynmp/tests.txt +++ b/scripts/ynmp/tests.txt @@ -11,26 +11,27 @@ First, let's get the functions and load in the existing people.json data: And we want a function to alter the fixture with different information: - >>> import StringIO, csv - >>> source = ['election_slug,ballot_paper_id,gss,person_id,person_name,party_id,party_name,theyworkforyou_url,wikidata_id'] + >>> import csv + >>> from io import StringIO + >>> source = ['person_id,person_name,election_id,ballot_paper_id,election_date,election_current,party_name,party_id,post_label,cancelled_poll,seats_contested,elected,tied_vote_winner,results_source,mnis_id,twfy_id'] >>> cols = source[0].split(',') >>> def alter(cmd, cons_id, dcid=None, name=None, party=None, pid=None): ... global source ... reader = csv.DictReader(source) - ... s = StringIO.StringIO() + ... s = StringIO() ... fn = csv.DictWriter(s, cols) ... fn.writeheader() ... for line in reader: ... if cmd == 'add' or line['ballot_paper_id'] != cons_id: ... fn.writerow(line) ... if cmd == 'add': - ... fn.writerow({'election_slug':'parl.2019-12-12', 'ballot_paper_id': cons_id, 'person_id': dcid, 'person_name': name, 'party_name': party, 'theyworkforyou_url': pid}) + ... fn.writerow({'election_id':'parl.2024-07-04', 'ballot_paper_id': cons_id, 'person_id': dcid, 'person_name': name, 'party_name': party, 'twfy_id': pid, 'elected': 't'}) ... s.seek(0) ... source = s.readlines() >>> def process(): ... data['dealt_with'] = [] - ... s = StringIO.StringIO(''.join(source)) + ... s = StringIO(''.join(source)) ... return update_from(s, data) Now we want to call it with the CSV file before anything has happened: @@ -40,54 +41,54 @@ Now we want to call it with the CSV file before anything has happened: Diane Abbott wins: - >>> alter('add', 'parl.hackney-north-and-stoke-newington.2019-12-12', 2567, 'Diane Abbott', 'Labour', '10001') + >>> alter('add', 'parl.hackney-north-and-stoke-newington.2024-07-04', 2567, 'Diane Abbott', 'Labour', '10001') >>> process() - NEW result 42077, Diane Abbott, Labour, hackney-north-and-stoke-newington, uk.org.publicwhip/person/10001 + NEW result 42782, Diane Abbott, Labour, hackney-north-and-stoke-newington, uk.org.publicwhip/person/10001 True Boris Johnson loses: - >>> alter('add', 'parl.uxbridge-and-south-ruislip.2019-12-12', 72363, 'Count Binface', 'Independent') + >>> alter('add', 'parl.uxbridge-and-south-ruislip.2024-07-04', 72363, 'Count Binface', 'Independent') >>> process() - NEW result 42078, Count Binface, Independent, uxbridge-and-south-ruislip, uk.org.publicwhip/person/25791 + NEW result 42783, Count Binface, Independent, uxbridge-and-south-ruislip, uk.org.publicwhip/person/26307 True Actually, that was a mistake at the YNMP end, revert until we know the result: - >>> alter('remove', 'parl.uxbridge-and-south-ruislip.2019-12-12') + >>> alter('remove', 'parl.uxbridge-and-south-ruislip.2024-07-04') >>> process() - Removing result from uk.org.publicwhip/member/42078 (was uk.org.publicwhip/cons/2241, independent, uk.org.publicwhip/person/25791) + Removing result from uk.org.publicwhip/member/42783 (was uk.org.publicwhip/cons/2847, independent, uk.org.publicwhip/person/26307) True We then think someone *else* wins: - >>> alter('add', 'parl.uxbridge-and-south-ruislip.2019-12-12', 72364, 'Lord Buckethead', 'Monster Raving Loony Party') + >>> alter('add', 'parl.uxbridge-and-south-ruislip.2024-07-04', 72364, 'Lord Buckethead', 'Monster Raving Loony Party') >>> process() - Updating uk.org.publicwhip/member/42078 with Lord Buckethead, Monster Raving Loony Party, uxbridge-and-south-ruislip, uk.org.publicwhip/person/25792 + Updating uk.org.publicwhip/member/42783 with Lord Buckethead, Monster Raving Loony Party, uxbridge-and-south-ruislip, uk.org.publicwhip/person/26308 True It turns out Boris Johnson actually won, we get that change at once: - >>> alter('remove', 'parl.uxbridge-and-south-ruislip.2019-12-12') - >>> alter('add', 'parl.uxbridge-and-south-ruislip.2019-12-12', 4546, 'Boris Johnson', 'Conservative and Unionist Party', 10999) + >>> alter('remove', 'parl.uxbridge-and-south-ruislip.2024-07-04') + >>> alter('add', 'parl.uxbridge-and-south-ruislip.2024-07-04', 4546, 'Boris Johnson', 'Conservative and Unionist Party', 10999) >>> process() - Updating uk.org.publicwhip/member/42078 with Boris Johnson, Conservative, uxbridge-and-south-ruislip, uk.org.publicwhip/person/10999 + Updating uk.org.publicwhip/member/42783 with Boris Johnson, Conservative, uxbridge-and-south-ruislip, uk.org.publicwhip/person/10999 True Two David Jones both win, one with a person ID, one without: - >>> alter('add', 'parl.clwyd-west.2019-12-12', 3151, 'David Jones', 'Conservative and Unionist Party', 11506) - >>> alter('add', 'parl.blackley-and-broughton.2019-12-12', 5750, 'David Jones', 'Green Party') + >>> alter('add', 'parl.clwyd-north.2024-07-04', 3151, 'David Jones', 'Conservative and Unionist Party', 11506) + >>> alter('add', 'parl.blackley-and-middleton-south.2024-07-04', 5750, 'David Jones', 'Green Party') >>> process() - NEW result 42079, David Jones, Conservative, clwyd-west, uk.org.publicwhip/person/11506 - NEW result 42080, David Jones, Green, blackley-and-broughton, uk.org.publicwhip/person/25793 + NEW result 42784, David Jones, Conservative, clwyd-north, uk.org.publicwhip/person/11506 + NEW result 42785, David Jones, Green, blackley-and-middleton-south, uk.org.publicwhip/person/26309 True Tessa Jane Munt wins, and YNMP have not supplied her parlparse ID (she was an MP until 2015): - >>> alter('add', 'parl.wells.2019-12-12', 1699, 'Tessa Munt', 'Liberal Democrats') + >>> alter('add', 'parl.wells-and-mendip-hills.2024-07-04', 1699, 'Tessa Munt', 'Liberal Democrats') >>> process() - NEW result 42081, Tessa Munt, Liberal Democrat, wells, uk.org.publicwhip/person/25794 + NEW result 42786, Tessa Munt, Liberal Democrat, wells-and-mendip-hills, uk.org.publicwhip/person/26310 True If we then just run again with no changes, nothing should happen: diff --git a/scripts/ynmp/update.py b/scripts/ynmp/update.py index 77df1021..94953107 100644 --- a/scripts/ynmp/update.py +++ b/scripts/ynmp/update.py @@ -9,9 +9,8 @@ import unicodedata import urllib.request -sys.stdout = codecs.getwriter('utf-8')(sys.stdout) - -CSV_URL = 'https://candidates.democracyclub.org.uk/results/csv/parl.2019-12-12/' +DATE = '2024-07-04' +CSV_URL = f'https://candidates.democracyclub.org.uk/data/export_csv/?election_id=parl.{DATE}&extra_fields=elected&extra_fields=tied_vote_winner&extra_fields=results_source&extra_fields=mnis_id&extra_fields=twfy_id&format=csv' JSON = os.path.join(os.path.dirname(__file__), '..', '..', 'members', 'people.json') @@ -25,7 +24,7 @@ def main(): def update_from(csv_url, data): changed = False - for ynmp_id, name, party, cons, person_id, elected in ynmp_csv_reader(csv_url): + for ynmp_id, name, party, cons, person_id, mnis_id in ynmp_csv_reader(csv_url): # Add a new party if it's not one we know if party not in data['orgs']: data['orgs'][party] = slugify(party) @@ -51,10 +50,13 @@ def update_from(csv_url, data): data['max_person_id'] += 1 person_id = 'uk.org.publicwhip/person/%d' % data['max_person_id'] name['note'] = 'Main' + identifiers = [identifier] + if mnis_id: + identifiers.append({'scheme': 'datadotparl_id', 'identifier': mnis_id}) new_person = { 'id': person_id, "other_names": [ name ], - 'identifiers': [identifier], + 'identifiers': identifiers, 'shortcuts': { 'current_party': party, 'current_constituency': data['posts_by_name'][cons]['area']['name'], @@ -67,7 +69,7 @@ def update_from(csv_url, data): new_mship = { 'on_behalf_of_id': data['orgs'][party], 'person_id': person_id, - 'start_date': '2019-12-13', + 'start_date': '2024-07-05', 'start_reason': 'general_election', } if cons in data['existing']: @@ -111,8 +113,8 @@ def slugify(value): Also strips leading and trailing whitespace. """ value = unicodedata.normalize('NFKD', str(value)).encode('ascii', 'ignore').decode('ascii') - value = re.sub('[^\w\s-]', '', value).strip().lower() - return re.sub('[-\s]+', '-', value) + value = re.sub(r'[^\w\s-]', '', value).strip().lower() + return re.sub(r'[-\s]+', '-', value) def load_data(): @@ -149,7 +151,6 @@ def load_data(): PARTY_YNMP_TO_TWFY = { 'Labour Party': 'Labour', - 'Conservative Party': 'Conservative', 'Conservative and Unionist Party': 'Conservative', 'Liberal Democrats': 'Liberal Democrat', 'Ulster Unionist Party': 'UUP', @@ -158,37 +159,38 @@ def load_data(): 'Plaid Cymru - The Party of Wales': 'Plaid Cymru', "Labour and Co-operative Party": 'Labour/Co-operative', 'Democratic Unionist Party - D.U.P.': 'DUP', - 'The Respect Party': 'Respect', "SDLP (Social Democratic & Labour Party)": "Social Democratic and Labour Party", "UK Independence Party (UKIP)": "UKIP", - "UK Independence Party (UK I P)": "UKIP", "Alliance - Alliance Party of Northern Ireland": 'Alliance', 'Green Party': 'Green', 'Scottish Green Party': 'Green', 'Traditional Unionist Voice - TUV': 'Traditional Unionist Voice', - 'The Brexit Party': 'Brexit', - # 'The Independent Group for Change' + 'Alba Party': 'Alba', + 'Workers Party of Britain': 'Workers Party', } def ynmp_csv_reader(fn): if isinstance(fn, str): fn = urllib.request.urlopen(fn) + fn = codecs.getreader("utf-8")(fn) # Stream in as Unicode for row in csv.DictReader(fn): - assert row['election_slug'] == 'parl.2019-12-12' - name = row['person_name'].decode('utf-8').strip() + assert row['election_id'] == f'parl.{DATE}' + name = row['person_name'].strip() # TWFY has separate first/last name fields. This should catch most. - m = re.match('(.*?) ((?:ap |van |de |di |von |st |duncan |lloyd |\u00d3 )*[^ ]*(?: Jnr)?)$(?i)', name) + m = re.match('(?i)(.*?) (?:.*? )*?((?:van |de |der |den |von |st |duncan |lloyd )*[^ ]*)$|^[^ ]*$', name) given, family = m.groups() - party = row['party_name'].decode('utf-8') + party = row['party_name'] party = PARTY_YNMP_TO_TWFY.get(party, party) - m = re.match('parl\.(.*)\.2019-12-12', row['ballot_paper_id']) + m = re.match(rf'parl\.(.*)\.{DATE}', row['ballot_paper_id']) cons = m.group(1) - m = re.search('(\d+)', row['theyworkforyou_url']) + m = re.search(r'(\d+)', row['twfy_id']) person_id = 'uk.org.publicwhip/person/' + m.group(1) if m else None ynmp_id = int(row['person_id']) - elected = True - yield ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, elected + #print(ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, row['mnis_id']) + if not row['elected'] or row['elected'] in ('f', 'false', 'False', 0, '0', 'n', 'N', 'No', 'no'): + continue + yield ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, row['mnis_id'] def mship_has_changed(old, new):