Skip to content

Commit

Permalink
[NI] Update wikipedia spotter.
Browse files Browse the repository at this point in the history
TheyWorkForYou Live CVS User committed Jul 23, 2024
1 parent 80e9522 commit 66c8a00
Showing 2 changed files with 8 additions and 7 deletions.
13 changes: 7 additions & 6 deletions members/wikipedia-mla.xml
Original file line number Diff line number Diff line change
@@ -112,7 +112,7 @@
<personinfo id="uk.org.publicwhip/person/13872" wikipedia_url="https://en.wikipedia.org/wiki/Jim_Wilson_(Northern_Ireland_politician)" />
<personinfo id="uk.org.publicwhip/person/13874" wikipedia_url="https://en.wikipedia.org/wiki/Bairbre_de_Br%C3%BAn" />
<personinfo id="uk.org.publicwhip/person/13878" wikipedia_url="https://en.wikipedia.org/wiki/Wallace_Browne" />
<personinfo id="uk.org.publicwhip/person/13887" wikipedia_url="https://en.wikipedia.org/wiki/Roy_Beggs_Jr" />
<personinfo id="uk.org.publicwhip/person/13887" wikipedia_url="https://en.wikipedia.org/wiki/Roy_Beggs,_Jr." />
<personinfo id="uk.org.publicwhip/person/13888" wikipedia_url="https://en.wikipedia.org/wiki/Marietta_Farrell" />
<personinfo id="uk.org.publicwhip/person/13889" wikipedia_url="https://en.wikipedia.org/wiki/Dawn_Purvis" />
<personinfo id="uk.org.publicwhip/person/13891" wikipedia_url="https://en.wikipedia.org/wiki/Martina_Anderson" />
@@ -124,7 +124,7 @@
<personinfo id="uk.org.publicwhip/person/13898" wikipedia_url="https://en.wikipedia.org/wiki/Jonathan_Craig" />
<personinfo id="uk.org.publicwhip/person/13899" wikipedia_url="https://en.wikipedia.org/wiki/Stephen_Farry" />
<personinfo id="uk.org.publicwhip/person/13900" wikipedia_url="https://en.wikipedia.org/wiki/Simon_Hamilton" />
<personinfo id="uk.org.publicwhip/person/13901" wikipedia_url="https://en.wikipedia.org/wiki/William_Irwin_(Northern_Ireland_politician)" />
<personinfo id="uk.org.publicwhip/person/13901" wikipedia_url="https://en.wikipedia.org/wiki/William_Irwin_(Unionist_politician)" />
<personinfo id="uk.org.publicwhip/person/13902" wikipedia_url="https://en.wikipedia.org/wiki/Anna_Lo" />
<personinfo id="uk.org.publicwhip/person/13903" wikipedia_url="https://en.wikipedia.org/wiki/Trevor_Lunn" />
<personinfo id="uk.org.publicwhip/person/13904" wikipedia_url="https://en.wikipedia.org/wiki/Paul_Maskey" />
@@ -139,7 +139,7 @@
<personinfo id="uk.org.publicwhip/person/13913" wikipedia_url="https://en.wikipedia.org/wiki/Car%C3%A1l_N%C3%AD_Chuil%C3%ADn" />
<personinfo id="uk.org.publicwhip/person/13914" wikipedia_url="https://en.wikipedia.org/wiki/Declan_O%27Loan" />
<personinfo id="uk.org.publicwhip/person/13915" wikipedia_url="https://en.wikipedia.org/wiki/Michelle_O%27Neill" />
<personinfo id="uk.org.publicwhip/person/13916" wikipedia_url="https://en.wikipedia.org/wiki/Jimmy_Spratt" />
<personinfo id="uk.org.publicwhip/person/13916" wikipedia_url="https://en.wikipedia.org/wiki/Jimmy_Spratt_(politician)" />
<personinfo id="uk.org.publicwhip/person/13917" wikipedia_url="https://en.wikipedia.org/wiki/Brian_Wilson" />
<personinfo id="uk.org.publicwhip/person/13918" wikipedia_url="https://en.wikipedia.org/wiki/Francie_Brolly" />
<personinfo id="uk.org.publicwhip/person/13927" wikipedia_url="https://en.wikipedia.org/wiki/Alastair_Ross" />
@@ -168,8 +168,8 @@
<personinfo id="uk.org.publicwhip/person/25131" wikipedia_url="https://en.wikipedia.org/wiki/Phil_Flanagan" />
<personinfo id="uk.org.publicwhip/person/25132" wikipedia_url="https://en.wikipedia.org/wiki/Brenda_Hale_(Northern_Ireland_politician)" />
<personinfo id="uk.org.publicwhip/person/25133" wikipedia_url="https://en.wikipedia.org/wiki/Ross_Hussey" />
<personinfo id="uk.org.publicwhip/person/25134" wikipedia_url="https://en.wikipedia.org/wiki/Pam_Cameron" />
<personinfo id="uk.org.publicwhip/person/25135" wikipedia_url="https://en.wikipedia.org/wiki/Se%C3%A1n_Lynch_(politician)" />
<personinfo id="uk.org.publicwhip/person/25134" wikipedia_url="https://en.wikipedia.org/wiki/Pam_Lewis" />
<personinfo id="uk.org.publicwhip/person/25135" wikipedia_url="https://en.wikipedia.org/wiki/Sean_Lynch_(politician)" />
<personinfo id="uk.org.publicwhip/person/25136" wikipedia_url="https://en.wikipedia.org/wiki/David_McIlveen" />
<personinfo id="uk.org.publicwhip/person/25137" wikipedia_url="https://en.wikipedia.org/wiki/Karen_McKevitt" />
<personinfo id="uk.org.publicwhip/person/25138" wikipedia_url="https://en.wikipedia.org/wiki/Oliver_McMullan" />
@@ -242,7 +242,7 @@
<personinfo id="uk.org.publicwhip/person/25716" wikipedia_url="https://en.wikipedia.org/wiki/Emma_Rogan" />
<personinfo id="uk.org.publicwhip/person/25741" wikipedia_url="https://en.wikipedia.org/wiki/John_Blair_(Northern_Ireland_politician)" />
<personinfo id="uk.org.publicwhip/person/25747" wikipedia_url="https://en.wikipedia.org/wiki/Emma_Sheerin" />
<personinfo id="uk.org.publicwhip/person/25773" wikipedia_url="https://en.wikipedia.org/wiki/Maol%C3%ADosa_McHugh" />
<personinfo id="uk.org.publicwhip/person/25773" wikipedia_url="https://en.wikipedia.org/wiki/Maoliosa_McHugh" />
<personinfo id="uk.org.publicwhip/person/25784" wikipedia_url="https://en.wikipedia.org/wiki/Harry_Harvey_(politician)" />
<personinfo id="uk.org.publicwhip/person/25785" wikipedia_url="https://en.wikipedia.org/wiki/Rachel_Woods" />
<personinfo id="uk.org.publicwhip/person/25930" wikipedia_url="https://en.wikipedia.org/wiki/M%C3%A1ire_Hendron" />
@@ -284,4 +284,5 @@
<personinfo id="uk.org.publicwhip/person/26638" wikipedia_url="https://en.wikipedia.org/wiki/Michelle_Guy" />
<personinfo id="uk.org.publicwhip/person/26639" wikipedia_url="https://en.wikipedia.org/wiki/Colin_Crawford_(Northern_Ireland_politician)" />
<personinfo id="uk.org.publicwhip/person/26640" wikipedia_url="https://en.wikipedia.org/wiki/Peter_Martin_(Northern_Ireland_politician)" />
<personinfo id="uk.org.publicwhip/person/26644" wikipedia_url="https://en.wikipedia.org/wiki/Timothy_Gaston" />
</publicwhip>
2 changes: 1 addition & 1 deletion pyscraper/ni/wikipedia-mla.py
Original file line number Diff line number Diff line change
@@ -36,7 +36,7 @@ def read(y):
matches.update(re.findall(matcher, content))

# 4-6th Assembly changes
changes = re.findall('(?s)<h2><span[^>]*>MLAs by constituency.*?<h2><span[^>]*>Changes(.*?)</html>', content)
changes = re.findall('(?s)<h2[^>]*>(?:<span[^>]*>)?MLAs by constituency.*?<h2[^>]*>(?:<span[^>]*>)?Changes(.*?)</html>', content)
for change in changes:
for m in re.findall('''(?x)
<td[ ]style="width:[ ]2px;[^>]*>\s*</td>\s* # Thin column of party colour

0 comments on commit 66c8a00

Please sign in to comment.