From daf097f0b9b910aafb8ab3a4c39bcc965f73d44e Mon Sep 17 00:00:00 2001
From: Matthew Somerville <matthew@mysociety.org>
Date: Mon, 13 Mar 2023 15:14:21 +0000
Subject: [PATCH] Upgrade python 2 to 3.

---
 filtersentence_xml.py                         |  19 +-
 get-historic-debates                          |  10 +-
 get-historic-person-urls                      |   6 +-
 london-mayors-questions/questions.py          |  18 +-
 members/parl-old-check-party.py               |   8 +-
 members/wikipedia-commons.py                  |  26 +-
 members/wikipedia-lords.py                    |  16 +-
 members/wikipedia-standingdown.py             |  12 +-
 pyscraper/base_resolver.py                    |  16 +-
 pyscraper/contextexception.py                 |   7 -
 pyscraper/get_links_from_ep.py                |   2 +-
 pyscraper/gettwittermps.py                    |   8 +-
 pyscraper/gidmatching.py                      |  22 +-
 pyscraper/lazyrunall.py                       |   8 +-
 pyscraper/lords/resolvenames.py               |  25 +-
 pyscraper/miscfuncs.py                        | 785 +++++++++---------
 pyscraper/new_hansard.py                      | 102 ++-
 pyscraper/ni/parse.py                         |  13 +-
 pyscraper/ni/resolvenames.py                  |  28 +-
 pyscraper/ni/scrape.py                        |  26 +-
 pyscraper/ni/wikipedia-mla.py                 |  22 +-
 pyscraper/parlphrases.py                      |   5 +-
 pyscraper/patchtool.py                        |  31 +-
 pyscraper/process_hansard.py                  |  18 +-
 pyscraper/pullgluepages.py                    |   3 +-
 pyscraper/regmem/filter.py                    |  22 +-
 pyscraper/regmem/pullgluepages.py             |  53 +-
 pyscraper/resolvemembernames.py               |  59 +-
 pyscraper/runfilters.py                       |  26 +-
 pyscraper/sp/common.py                        |  18 +-
 pyscraper/sp/fastest-msps.py                  |  21 +-
 pyscraper/sp/get-official-reports-new.py      |  39 +-
 pyscraper/sp/parse-official-reports-new.py    |  82 +-
 pyscraper/sp/resolvenames.py                  |  26 +-
 pyscraper/sp/wikipedia-msp.py                 |  26 +-
 pyscraper/test.py                             |  58 +-
 pyscraper/unpack_hansard_zips.py              |   8 +-
 pyscraper/wa/parse.py                         |  33 +-
 pyscraper/xmlfilewrite.py                     |  11 +-
 scripts/2016_data_update/dadem_import_ni.py   |   6 +-
 scripts/2016_data_update/dadem_import_sp.py   |   8 +-
 scripts/2021-lam-update                       |   8 +-
 scripts/2021-msp-update                       |   6 +-
 scripts/add-new-lords                         |  16 +-
 scripts/datadotparl/crawl-members             |   2 +-
 scripts/datadotparl/json-add-new-parl-ids     |   2 +-
 scripts/datadotparl/mp-party-check            |   2 +-
 scripts/datadotparl/one-off-add-pims-ids      |   2 +-
 scripts/datadotparl/one-off-sync-lord-parties |   2 +-
 scripts/datadotparl/update-members            |   2 +-
 scripts/fetch-mp-eu-ref-positions             |   8 +-
 scripts/fetch-pw-json                         |   4 +-
 scripts/fetch_london_assembly.py              |  30 +-
 scripts/fetch_scottish_ministers.py           |  11 +-
 .../fetch_wikidata_from_everypolitician.py    |   2 +-
 scripts/json-add-membership                   |   2 +-
 scripts/json-body-end                         |   2 +-
 scripts/json-change-party                     |   2 +-
 scripts/json-edit-person                      |   2 +-
 scripts/json-end-membership                   |   8 +-
 scripts/json-merge-people                     |   2 +-
 scripts/json-new-ids                          |  14 +-
 scripts/json-nia-2017-new                     |  10 +-
 scripts/json-nia-2022-new                     |   6 +-
 scripts/popolo/menu.py                        |   1 +
 scripts/popolo/utils.py                       |   2 +-
 scripts/quickupdate                           |  10 +-
 scripts/welsh-parliament/dual-posts.py        |   2 +-
 scripts/welsh-parliament/memberships.py       |  14 +-
 scripts/welsh-parliament/official-ids.py      |   2 +-
 scripts/welsh-parliament/organizations.py     |  14 +-
 scripts/welsh-parliament/persons.py           |  14 +-
 scripts/welsh-parliament/posts.py             |  16 +-
 scripts/ynmp/update.py                        |  18 +-
 wrans-2014/parse.py                           |  26 +-
 75 files changed, 943 insertions(+), 1053 deletions(-)

diff --git a/filtersentence_xml.py b/filtersentence_xml.py
index 49dd08569..2e64fbb73 100644
--- a/filtersentence_xml.py
+++ b/filtersentence_xml.py
@@ -1,16 +1,10 @@
-#! /usr/bin/python
-
 from datetime import datetime
 import re
-import string
 
 from lxml import etree
 
 from contextexception import ContextException
 from parlphrases import parlPhrases
-
-from wrans.emblinks import rreglink, rregemail, rehtlink, ConstructHTTPlink
-
 from resolvemembernames import memberList
 
 
@@ -81,12 +75,12 @@ def TokenStandingOrder(mstandingo, phrtok):
         'phrase', ' class="standing-order" code="%s"' % mstandingo.group(1)
     )
 
+rehtlink = re.compile('(?<!["\'])(https?://)([^\s]+)')
 
 def TokenHttpLink(mhttp, phrtok):
-    qstrlink = ConstructHTTPlink(mhttp.group(1), mhttp.group(2), mhttp.group(3))
+    qstrlink = mhttp.group(0)
     return ('a', ' href="%s"' % qstrlink)
 
-
 def TokenHrefLink(mhttp, phrtok):
     return ('', '')
 
@@ -118,8 +112,8 @@ def TokenOffRep(qoffrep, phrtok):
     qcolnum = qcpart.group(1)
     if qcpart.group(2):
         qcpartlead = qcpart.group(1)[len(qcpart.group(1)) - len(qcpart.group(2)):]
-        if string.atoi(qcpartlead) >= string.atoi(qcpart.group(2)):
-            print ' non-following column leadoff ', qoffrep.group(0)
+        if int(qcpartlead) >= int(qcpart.group(2)):
+            print(' non-following column leadoff ', qoffrep.group(0))
             # raise Exception, ' non-following column leadoff '
 
     if qcolsuffix == 'WH':
@@ -218,8 +212,7 @@ def TokenHonFriend(mhonfriend, phrtok):
     # remove any xml entities from the name
     orgname = res[1]
 
-    # if you put the .encode("latin-1") on the res[1] it doesn't work when there are strange characters.
-    return ('phrase', (' class="honfriend" person_id="%s" name="%s"' % (nid, orgname)).encode("latin-1"))
+    return ('phrase', ' class="honfriend" person_id="%s" name="%s"' % (nid, orgname))
 
 
 # the array of tokens which we will detect on the way through
@@ -312,4 +305,4 @@ def GetPara(self):
             else:
                 res.append(tok[2])
 
-        return string.join(res, '')
+        return ''.join(res)
diff --git a/get-historic-debates b/get-historic-debates
index ee5c5fff4..5c4d36e79 100644
--- a/get-historic-debates
+++ b/get-historic-debates
@@ -11,7 +11,7 @@ import requests
 import requests_cache
 from xml.sax.saxutils import escape, quoteattr
 from lxml import html, etree
-from urlparse import urljoin
+from urllib.parse import urljoin
 
 requests_cache.install_cache(cache_name='debates', allowable_codes=(200, 404))
 BASE_SOURCE_URL = 'https://api.parliament.uk'
@@ -183,9 +183,9 @@ def walk(ol, typ, prefix=''):
             title = link.text_content()
             url = BASE_SOURCE_URL + link.get('href')
             if re.match('ORALL? ANS[WN]?ERS? [Tt][Oo] [OQU]UESTIONS?[.,]?$', title):
-                next_prefix = u'Oral Answers to Questions &#8212; '
+                next_prefix = 'Oral Answers to Questions &#8212; '
             elif re.match('ORDERS OF THE\.? DAY[.,:]?$', title):
-                next_prefix = u'Orders of the Day &#8212; '
+                next_prefix = 'Orders of the Day &#8212; '
             else:
                 next_prefix = ''
                 out += output_xml('%s-heading' % typ, '%s%s' % (prefix, escape(title)), url)
@@ -225,13 +225,13 @@ for year in range(1919, 1935+1):
             if "id='commons'" not in res.content:
                 continue
 
-            print '\r\x1b[K%d' % year, month, day,
+            print('\r\x1b[K%d' % year, month, day, end=' ')
             date = '%d-%02d-%02d' % (year, months.index(month)+1, day)
             col = 0
 
             tree = html.fromstring(res.content)
             ol = tree.cssselect('h3#commons + ol')[0]
-            out = u'<publicwhip scrapeversion="a" latest="yes">\n'
+            out = '<publicwhip scrapeversion="a" latest="yes">\n'
             out += walk(ol, 'major')
             out += '</publicwhip>\n'
 
diff --git a/get-historic-person-urls b/get-historic-person-urls
index 7719ae1d9..23e777986 100644
--- a/get-historic-person-urls
+++ b/get-historic-person-urls
@@ -55,7 +55,7 @@ for l in string.ascii_lowercase:
     for li in lis:
         a = li.find('a')
         url = a['href']
-        print '\r\x1b[K' + url,
+        print('\r\x1b[K' + url, end=' ')
         html = requests.get('https://api.parliament.uk/historic-hansard/people/%s/index.html' % url).text
         if html == 'Page not found':
             html = requests.get('https://api.parliament.uk/historic-hansard/people/%s' % url).text
@@ -99,10 +99,10 @@ for l in string.ascii_lowercase:
                     people[name] = [mm for mm in people[name] if mm['id'] != pid]
                     found = True
             if not found:
-                print '\n', url, presence[pid]['min'], presence[pid]['max'], fr, to
+                print('\n', url, presence[pid]['min'], presence[pid]['max'], fr, to)
                 raise Exception
         else:
-            print '\n', matches, url
+            print('\n', matches, url)
             raise Exception
 
 json.dump(data, open(JSON, 'w'), indent=2, sort_keys=True)
diff --git a/london-mayors-questions/questions.py b/london-mayors-questions/questions.py
index 596ce7e26..73db32a89 100755
--- a/london-mayors-questions/questions.py
+++ b/london-mayors-questions/questions.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 
 import os
 import logging
@@ -84,7 +84,7 @@ def writeScraperState(state, output_folder):
         with open(output_file, 'w') as state_json_file:
             logger.debug('Writing state file')
             state_json_file.write(json_string)
-    except TypeError, e:
+    except TypeError as e:
         logger.error('Could not serialise to valid JSON: {}'.format(str(e)))
 
 
@@ -234,14 +234,14 @@ def parseQuestionPage(content):
 
     question_title = main_content.h1.text.strip()
 
-    logger.debug(u'Question title is {}'.format(question_title))
+    logger.debug('Question title is {}'.format(question_title))
 
     # Extract who asked it
 
     asked_by_name = main_content.find('div', class_='field--name-field-asked-by').find('div', class_='field__item').text.strip()
     asked_by_person = getSpeakerObjectFromName(asked_by_name)
 
-    logger.debug(u'Question asked by {}'.format(asked_by_person['name']))
+    logger.debug('Question asked by {}'.format(asked_by_person['name']))
 
     # Try to extract the actual question
 
@@ -326,7 +326,7 @@ def parseAnswersFromQuestionPage(page_content):
         answered_by_name = answer_article.find('div', class_='field--name-field-answered-by').find('div', class_='field__item').text.strip()
         answered_by_person = getSpeakerObjectFromName(answered_by_name)
 
-        logger.debug(u'Question answered by {}'.format(answered_by_person['name']))
+        logger.debug('Question answered by {}'.format(answered_by_person['name']))
 
         answer_paragraphs = []
 
@@ -410,12 +410,12 @@ def getPersonIDFromName(name):
 def getSpeakerObjectFromName(name):
     ''' Given a name, try to find a speaker ID and return a whole object. '''
 
-    name = name.replace(u'\u00a0', ' ')
+    name = name.replace('\u00a0', ' ')
     name = stripPatternsFromName(name)
     id = getPersonIDFromName(name)
     if not id:
         if 'Liz Peace' not in name:
-            logger.warning(u'Could not match name {} to any assembly member'.format(name))
+            logger.warning('Could not match name {} to any assembly member'.format(name))
         id = 'unknown'
 
     return {
@@ -427,7 +427,7 @@ def getSpeakerObjectFromName(name):
 def cleanParagraphText(text):
 
     # Remove non-breaking spaces followed by a space.
-    text = text.replace(u'\u00a0 ', ' ')
+    text = text.replace('\u00a0 ', ' ')
 
     # Strip trailing whitespace
     text = text.strip()
@@ -618,7 +618,7 @@ def loadMembershipsFromFile(members_file):
 
             if name not in person_ids_by_name:
                 person_ids_by_name[name] = membership['person_id']
-                logger.debug(u'Added ID map for for {}'.format(name))
+                logger.debug('Added ID map for for {}'.format(name))
             else:
                 if person_ids_by_name[name] != membership['person_id']:
                     raise Exception('Multiple people with name {}'.format(name))
diff --git a/members/parl-old-check-party.py b/members/parl-old-check-party.py
index 22864bf9b..badcef9df 100644
--- a/members/parl-old-check-party.py
+++ b/members/parl-old-check-party.py
@@ -6,7 +6,7 @@
 # fixing (or longer term, fix it automatically).
 
 import re
-import urllib
+import urllib.request
 import lxml.objectify
 import sys
 
@@ -86,10 +86,10 @@ def __init__(self, lord):
             self.type = TYPES.index('Elected Hereditary') # One of the 92
 
     def __str__(self):
-        return u'%s (%s) - %s' % ( self.longTitle, PARTIES[self.party], self.status )
+        return '%s (%s) - %s' % ( self.longTitle, PARTIES[self.party], self.status )
 
 # Fetch the current live information
-lords = urllib.urlopen('http://data.parliament.uk/resources/members/api/lords/all/').read()
+lords = urllib.request.urlopen('http://data.parliament.uk/resources/members/api/lords/all/').read()
 lords = [ Lord(lord) for lord in lxml.objectify.fromstring(lords).peer ]
 
 for lord in lords:
@@ -113,5 +113,5 @@ def __str__(self):
     if PARTIES[lord.party] == 'UK Independence Party' and lordsList.lords[match]['party'] == 'UKIP': continue
     if PARTIES[lord.party] == 'Plaid Cymru' and lordsList.lords[match]['party'] == 'PC': continue
     if PARTIES[lord.party] == 'Plaid Cymru' and lordsList.lords[match]['party'] == 'PC': continue
-    print PARTIES[lord.party], lordsList.lords[match]['party']
+    print(PARTIES[lord.party], lordsList.lords[match]['party'])
 
diff --git a/members/wikipedia-commons.py b/members/wikipedia-commons.py
index 9caca7e11..511684e64 100755
--- a/members/wikipedia-commons.py
+++ b/members/wikipedia-commons.py
@@ -1,6 +1,4 @@
-#!/usr/bin/env python
-# -*- coding: latin-1 -*-
-# $Id: bbcconv.py,v 1.4 2005/03/25 23:33:35 theyworkforyou Exp $
+#!/usr/bin/env python3
 
 # Screen scrape list of links to Lords on Wikipedia, so we can link to the articles.
 
@@ -11,8 +9,7 @@
 
 import datetime
 import sys
-import urllib
-import urlparse
+import urllib.parse
 import re
 # import sets
 
@@ -49,25 +46,22 @@
             cons = cons2
             name = name2
             url = url2
-        cons = cons.decode('utf-8')
         cons = cons.replace('&amp;', '&')
-        name = name.decode('utf-8')
         try:
             (id, canonname, canoncons) = memberList.matchfullnamecons(name, cons, date_parl[year])
-        except Exception, e:
-            print >>sys.stderr, e
+        except Exception as e:
+            print(e, file=sys.stderr)
         if not id:
             continue
         wikimembers[id] = url
 
-print '''<?xml version="1.0" encoding="ISO-8859-1"?>
-<publicwhip>'''
-k = wikimembers.keys()
-k.sort()
+print('''<?xml version="1.0" encoding="ISO-8859-1"?>
+<publicwhip>''')
+k = sorted(wikimembers)
 for id in k:
-    url = urlparse.urljoin(wiki_index_url, wikimembers[id])
-    print '<personinfo id="%s" wikipedia_url="%s" />' % (id, url)
-print '</publicwhip>'
+    url = urllib.parse.urljoin(wiki_index_url, wikimembers[id])
+    print('<personinfo id="%s" wikipedia_url="%s" />' % (id, url))
+print('</publicwhip>')
 
 #wikimembers = sets.Set(wikimembers.keys())
 #print "len: ", len(wikimembers)
diff --git a/members/wikipedia-lords.py b/members/wikipedia-lords.py
index b6962b78b..34ac829ba 100755
--- a/members/wikipedia-lords.py
+++ b/members/wikipedia-lords.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Screen scrape list of links to Lords on Wikipedia, so we can link to the articles.
 
@@ -9,7 +9,7 @@
 
 import datetime
 import sys
-import urlparse
+import urllib.parse
 import re
 
 sys.path.append("../pyscraper")
@@ -32,19 +32,19 @@
     id = None
     try:
         id = lordsList.GetLordIDfname(name, None, date_today)
-    except Exception, e:
+    except Exception as e:
         continue
 
     if not id:
         continue
     wikimembers[id] = url
 
-print '''<?xml version="1.0" encoding="ISO-8859-1"?>
-<publicwhip>'''
+print('''<?xml version="1.0" encoding="ISO-8859-1"?>
+<publicwhip>''')
 for id, url in sorted(wikimembers.items()):
-    url = urlparse.urljoin(wiki_index_url, url)
-    print '<personinfo id="%s" wikipedia_url="%s" />' % (id, url)
-print '</publicwhip>'
+    url = urllib.parse.urljoin(wiki_index_url, url)
+    print('<personinfo id="%s" wikipedia_url="%s" />' % (id, url))
+print('</publicwhip>')
 
 #print "len: ", len(wikimembers)
 
diff --git a/members/wikipedia-standingdown.py b/members/wikipedia-standingdown.py
index 5b952cc58..ae3f41f74 100755
--- a/members/wikipedia-standingdown.py
+++ b/members/wikipedia-standingdown.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Screen scrape list of who's standing down in the 2010 general election
 
@@ -7,9 +7,7 @@
 # certain conditions.  However, it comes with ABSOLUTELY NO WARRANTY.
 # For details see the file LICENSE.html in the top level of the source.
 
-import datetime
 import sys
-import urlparse
 import re
 
 sys.path.append("../pyscraper")
@@ -19,8 +17,8 @@
 
 page = open('../rawdata/MPs_standing_down_in_2010').read()
 
-print '''<?xml version="1.0" encoding="ISO-8859-1"?>
-<publicwhip>'''
+print('''<?xml version="1.0" encoding="ISO-8859-1"?>
+<publicwhip>''')
 m = re.findall('<li><a href="([^"]*)"[^>]*>([^<]*)</a>', page)
 for row in m:
     url, name = row
@@ -28,6 +26,6 @@
     if name in ('Iris Robinson', 'Ashok Kumar', 'David Taylor'): continue
     id, canonname, canoncons = memberList.matchfullnamecons(name, None, today) 
     pid = memberList.membertoperson(id)
-    print ('  <personinfo id="%s" name="%s" standing_down="1" />' % (pid, name)).encode('iso-8859-1')
-print '</publicwhip>'
+    print(('  <personinfo id="%s" name="%s" standing_down="1" />' % (pid, name)).encode('iso-8859-1'))
+print('</publicwhip>')
 
diff --git a/pyscraper/base_resolver.py b/pyscraper/base_resolver.py
index 0da4de395..dc54bbb2c 100644
--- a/pyscraper/base_resolver.py
+++ b/pyscraper/base_resolver.py
@@ -67,12 +67,12 @@ def import_people_membership(self, mship, posts, orgs):
             return
 
         if mship["id"] in self.membertopersonmap:
-            raise Exception, "Same member id %s appeared twice" % mship["id"]
+            raise Exception("Same member id %s appeared twice" % mship["id"])
         self.membertopersonmap[mship["id"]] = mship['person_id']
         self.persontomembermap.setdefault(mship['person_id'], []).append(mship["id"])
 
         if self.members.get(mship["id"]):
-            raise Exception, "Repeated identifier %s in members JSON file" % mship["id"]
+            raise Exception("Repeated identifier %s in members JSON file" % mship["id"])
         self.members[mship["id"]] = mship
 
         if 'end_date' not in mship:
@@ -90,14 +90,14 @@ def import_people_membership(self, mship, posts, orgs):
                 mship_start_date <= mship_end_date and
                 mship_end_date <= cons['end_date']):
                 if consid and consid != cons['id']:
-                    raise Exception, "Two constituency ids %s %s overlap with MP %s" % (consid, cons['id'], mship['id'])
+                    raise Exception("Two constituency ids %s %s overlap with MP %s" % (consid, cons['id'], mship['id']))
                 consid = cons['id']
         if not consid:
-            raise Exception, "Constituency '%s' not found" % mship["constituency"]
+            raise Exception("Constituency '%s' not found" % mship["constituency"])
         # check name in members file is same as default in cons file
         backformed_cons = self.considtonamemap[consid]
         if backformed_cons != mship["constituency"]:
-            raise Exception, "Constituency '%s' in members file differs from first constituency '%s' listed in cons file" % (mship["constituency"], backformed_cons)
+            raise Exception("Constituency '%s' in members file differs from first constituency '%s' listed in cons file" % (mship["constituency"], backformed_cons))
 
         # check first date ranges don't overlap, MPs only
         # Only check modern MPs as we might have overlapping data previously
@@ -108,7 +108,7 @@ def import_people_membership(self, mship, posts, orgs):
                     or cons['start_date'] <= mship['end_date'] <= cons['end_date'] \
                     or mship['start_date'] <= cons['start_date'] <= mship['end_date'] \
                     or mship['start_date'] <= cons['end_date'] <= mship['end_date']:
-                    raise Exception, "%s %s Two MP entries for constituency %s with overlapping dates" % (mship, cons, consid)
+                    raise Exception("%s %s Two MP entries for constituency %s with overlapping dates" % (mship, cons, consid))
         # then add in
         self.considtomembermap.setdefault(consid, []).append(mship)
 
@@ -124,7 +124,7 @@ def import_people_names(self, person):
         if person['id'] not in self.persontomembermap:
             return
         self.persons[person['id']] = person
-        memberships = map(lambda x: self.members[x], self.persontomembermap[person['id']])
+        memberships = [self.members[x] for x in self.persontomembermap[person['id']]]
         for other_name in person.get('other_names', []):
             if other_name.get('note') == 'Main':
                 self.import_people_main_name(other_name, memberships)
@@ -213,7 +213,7 @@ def name_on_date(self, person_id, date):
                     if nm['lordofname']:
                         name += ' of %s' % nm['lordofname']
                 return name
-        raise Exception, 'No found for %s on %s' % (person['id'], date)
+        raise Exception('No found for %s on %s' % (person['id'], date))
 
     def membertoperson(self, memberid):
         return self.membertopersonmap[memberid]
diff --git a/pyscraper/contextexception.py b/pyscraper/contextexception.py
index ec52f2f85..9d1b3b7d3 100755
--- a/pyscraper/contextexception.py
+++ b/pyscraper/contextexception.py
@@ -1,12 +1,6 @@
 #! $Id: contextexception.py,v 1.12 2004/12/23 12:27:09 goatchurch Exp $
 # vim:sw=8:ts=8:et:nowrap
 
-import os
-import string
-import re
-import sys
-import shutil
-
 class ContextException(Exception):
 
     def __init__(self, description, stamp = None, fragment = None):
@@ -22,4 +16,3 @@ def __str__(self):
         if self.stamp:
             ret = ret + repr(self.stamp) + "\n"
         return ret
-
diff --git a/pyscraper/get_links_from_ep.py b/pyscraper/get_links_from_ep.py
index af331a928..30db86271 100755
--- a/pyscraper/get_links_from_ep.py
+++ b/pyscraper/get_links_from_ep.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 
 import operator
 from lxml import etree
diff --git a/pyscraper/gettwittermps.py b/pyscraper/gettwittermps.py
index 79895b9a2..3a10319dd 100755
--- a/pyscraper/gettwittermps.py
+++ b/pyscraper/gettwittermps.py
@@ -1,12 +1,12 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 
-import urllib2
+import urllib.request
 import csv
 import xml.sax
 
 uri = "http://spreadsheets.google.com/tq?tqx=out:csv&key=0AjWA_TWMI4t_dFI5MWRWZkRWbFJ6MVhHQzVmVndrZnc&hl=en_GB"
 
-f = urllib2.urlopen(uri)
+f = urllib.request.urlopen(uri)
 csv_data = f.read()
 lines = csv_data.split("\n")
 rows = csv.reader(lines.__iter__(), delimiter=',', quotechar='"')
@@ -34,7 +34,7 @@ def endElement(self,name):
 
 output_filename = "../members/twitter-commons.xml"
 fp = open(output_filename,"w")
-fp.write('''<?xml version="1.0" encoding="ISO-8859-1"?>
+fp.write('''<?xml version="1.0" encoding="UTF-8"?>
 <publicwhip>
 ''')
 
diff --git a/pyscraper/gidmatching.py b/pyscraper/gidmatching.py
index d2ab7aac1..c11ad2a50 100644
--- a/pyscraper/gidmatching.py
+++ b/pyscraper/gidmatching.py
@@ -1,12 +1,6 @@
-import sys
 import re
-import os
-import xml.sax
-import tempfile
-import string
 import miscfuncs
 import difflib
-from pprint import pprint
 
 #from xmlfilewrite import PrevParsedFile
 class PrevParsedFile:
@@ -25,7 +19,7 @@ def GetMinIndex(indx, a):
 	assert indx[0] == 0 and a < indx[-1]
 	i0, i1 = 0, len(indx) - 1
 	while i0 + 1 < i1:
-		im = (i0 + i1) / 2
+		im = (i0 + i1) // 2
 		assert i0 != im and i1 != im
 		if indx[im] <= a:
 			i0 = im
@@ -47,7 +41,7 @@ def PrepareXMLForDiff(scrapeversion):
 		# new_chk = chk[2]
 		new_chk = re.sub(
 			r'(?s)(<(p|tr)\s[^>]*>)(.*?)(<\/\2>)',
-			lambda m: (u''.join((m.group(1), re.sub('\n', ' ', m.group(3)), m.group(4)))),
+			lambda m: (''.join((m.group(1), re.sub('\n', ' ', m.group(3)), m.group(4)))),
 			chk[2]
 		)
 		essxindx.append(len(essxlist))
@@ -162,12 +156,12 @@ def DoFactorDiff(essflatbindx, essflatblist, essxindx, essxlist, chks, flatb):
 
 		# missing speech
 		else:
-			print chks[ix]
+			print(chks[ix])
 			if lastmatchg:
-				print "Missing speech matched to last matched speech"
+				print("Missing speech matched to last matched speech")
 				matchlist = [ lastmatchg ]
 			else:
-				print "No match on first speech problem."
+				print("No match on first speech problem.")
 				matchlist = []
 			matchtype = "missing"
 
@@ -248,7 +242,7 @@ def FactorChangesWrans(majblocks, scrapeversion):
 		for qqnum in qqnums:
 			if qblock:
 				if qblock.headingqb.qGID != qnummapq[qqnum].headingqb.qGID:
-				    print qblock.headingqb.qGID, qnummapq[qqnum].headingqb.qGID
+				    print(qblock.headingqb.qGID, qnummapq[qqnum].headingqb.qGID)
 				    assert qblock.headingqb.qGID == qnummapq[qqnum].headingqb.qGID
 			elif qqnum != '0' and qqnum in qnummapq:  # 0 is when there is a missing qnum
 				qblock = qnummapq[qqnum]
@@ -266,7 +260,7 @@ def FactorChangesWrans(majblocks, scrapeversion):
 				qmissblockscorebest = max(qmissblocksscore)
 				qblock = qnummapq[qmissblockscorebest[1]]
 				if miscfuncs.IsNotQuiet():
-					print "Missing qnum; mapping %s to %s with score %f" % (qebchk[0], qblock.headingqb.qGID, qmissblockscorebest[0])
+					print("Missing qnum; mapping %s to %s with score %f" % (qebchk[0], qblock.headingqb.qGID, qmissblockscorebest[0]))
 				assert qmissblockscorebest[0] > 0.8  # otherwise it's not really a match and we need to look harder.  
 													 # perhaps it's matched to a block in the new file which newly has a qnum, and we then have to scan against all of them.  
 
@@ -324,7 +318,7 @@ def FactorChangesWrans(majblocks, scrapeversion):
 		# sometimes we get more than one question.  
 		# when we find a mismatch we'll deal with it as a special paragraph problem, or not bother.
 		if len(qebchkquesids) != len(qblock.queses):
-			print len(qebchkquesids), len(qblock.queses), qblock.queses[0].qGID
+			print(len(qebchkquesids), len(qblock.queses), qblock.queses[0].qGID)
 			assert len(qebchkquesids) == len(qblock.queses)
 		for i in range(len(qebchkquesids)):
 			res.append('<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n' % (qebchkquesids[i], qblock.queses[i].qGID, matchtype))
diff --git a/pyscraper/lazyrunall.py b/pyscraper/lazyrunall.py
index a2fa61b5f..1da9d4010 100755
--- a/pyscraper/lazyrunall.py
+++ b/pyscraper/lazyrunall.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 # vim:sw=8:ts=8:et:nowrap
 
 # Run the script with --help to see command line options
@@ -91,18 +91,18 @@
         elif arg == "ni":
                 options.ni = True
         else:
-                print >>sys.stderr, "error: no such option %s" % arg
+                print("error: no such option %s" % arg, file=sys.stderr)
                 parser.print_help()
                 sys.exit(1)
 if len(args) == 0:
         parser.print_help()
         sys.exit(1)
 if not options.scrape and not options.parse:
-        print >>sys.stderr, "error: choose what to do; scrape, parse, or both"
+        print("error: choose what to do; scrape, parse, or both", file=sys.stderr)
         parser.print_help()
         sys.exit(1)
 if not options.regmem and not options.ni:
-        print >>sys.stderr, "error: choose what work on; regmem, several of them"
+        print("error: choose what work on; regmem, several of them", file=sys.stderr)
         parser.print_help()
         sys.exit(1)
 
diff --git a/pyscraper/lords/resolvenames.py b/pyscraper/lords/resolvenames.py
index a61e5b1e8..2589de6d1 100644
--- a/pyscraper/lords/resolvenames.py
+++ b/pyscraper/lords/resolvenames.py
@@ -1,6 +1,5 @@
 import json
 import os.path
-import string
 import re
 from contextexception import ContextException
 
@@ -22,7 +21,7 @@
 
 hontitles = [ 'Lord  ?Bishop', 'Bishop', 'Marquess', 'Lord', 'Baroness', 'Viscount', 'Earl', 'Countess', 
           'Lord Archbishop', 'Archbishop', 'Duke', 'Lady' ]
-hontitleso = string.join(hontitles, '|')
+hontitleso = '|'.join(hontitles)
 
 honcompl = re.compile('(?:(%s)|(%s) \s*(.*?))(?:\s+of\s+(.*))?$' % (hontitleso, hontitleso))
 
@@ -44,12 +43,12 @@ def import_people_membership(self, mship, posts, orgs):
             return
 
         if mship["id"] in self.membertopersonmap:
-            raise Exception, "Same member id %s appeared twice" % mship["id"]
+            raise Exception("Same member id %s appeared twice" % mship["id"])
         self.membertopersonmap[mship["id"]] = mship['person_id']
         self.persontomembermap.setdefault(mship['person_id'], []).append(mship["id"])
 
         if self.members.get(mship["id"]):
-            raise Exception, "Repeated identifier %s in members JSON file" % mship["id"]
+            raise Exception("Repeated identifier %s in members JSON file" % mship["id"])
         self.members[mship["id"]] = mship
 
         if 'end_date' not in mship:
@@ -62,7 +61,6 @@ def import_people_main_name(self, name, memberships):
         lname = re.sub("\.", "", lname)
         assert lname
         attr = {
-            "id": m["id"],
             "title": name["honorific_prefix"],
             "lordname": name.get("lordname", ""),
             "lordofname": name.get("lordofname", ""),
@@ -71,6 +69,7 @@ def import_people_main_name(self, name, memberships):
             newattr = attr.copy()
             newattr['start_date'] = max(m['start_date'], name.get('start_date', '1000-01-01'))
             newattr['end_date'] = min(m['end_date'], name.get('end_date', '9999-12-31'))
+            newattr['id'] = m["id"]
             self.lordnames.setdefault(lname, []).append(newattr)
 
     def import_people_alternate_name(self, person, other_name, memberships):
@@ -84,8 +83,8 @@ def GetLordID(self, ltitle, llordname, llordofname, loffice, stampurl, sdate, bD
         if ltitle == "Lord Archbishop":
             ltitle = "Archbishop"
 
-        llordofname = string.replace(llordofname, ".", "")
-        llordname = string.replace(llordname, ".", "")
+        llordofname = llordofname.replace(".", "")
+        llordname = llordname.replace(".", "")
         llordname = re.sub('&#(039|146|8217);', "'", llordname)
 
         llordofname = llordofname.strip()
@@ -130,7 +129,7 @@ def GetLordID(self, ltitle, llordname, llordofname, loffice, stampurl, sdate, bD
                 if lm["start_date"] <= sdate <= lm["end_date"]:
                     if lm["lordname"] and llordofname:
                         #if not IsNotQuiet():
-                        print "cm---", ltitle, lm["lordname"], lm["lordofname"], llordname, llordofname
+                        print("cm---", ltitle, lm["lordname"], lm["lordofname"], llordname, llordofname)
                         raise ContextException("lordofname matches lordname in lordlist", stamp=stampurl, fragment=lname)
                     else:
                         assert lm["lordofname"] and llordname
@@ -139,7 +138,7 @@ def GetLordID(self, ltitle, llordname, llordofname, loffice, stampurl, sdate, bD
                             raise ContextException("lordname matches lordofname in lordlist", stamp=stampurl, fragment=lname)
                     res.append(lm)
                 elif ltitle != "Bishop" and ltitle != "Archbishop" and (ltitle, lname) not in (("Duke", "Norfolk"), ("Duke", "Wellington"), ('Earl', 'Kinnoull'), ('Earl', 'Selborne')):
-                    print lm
+                    print(lm)
                     raise ContextException("wrong dates on lords with same name", stamp=stampurl, fragment=lname)
 
         if not res:
@@ -187,19 +186,19 @@ def MatchRevName(self, fss, sdate, stampurl):
         assert fss
         lfn = re.match('(.*?)(?: of (.*?))?, {0,3}((?:L|B|Abp|Bp|V|E|D|M|C|Ly)\.?)$', fss)
         if not lfn:
-            print "$$$%s$$$" % fss
+            print("$$$%s$$$" % fss)
             raise ContextException("No match of format in MatchRevName", stamp=stampurl, fragment=fss)
         shorttitle = lfn.group(3)
         if shorttitle[-1] != '.':
             shorttitle += "."
         ltitle = titleconv[shorttitle]
-        llordname = string.replace(lfn.group(1), ".", "")
-        llordname = string.replace(llordname, "&#039;", "'")
+        llordname = lfn.group(1).replace(".", "")
+        llordname = llordname.replace("&#039;", "'")
         llordname = re.sub("^De ", "de ", llordname)
         fullname = '%s %s' % (ltitle, llordname)
         llordofname = ""
         if lfn.group(2):
-            llordofname = string.replace(lfn.group(2), ".", "")
+            llordofname = lfn.group(2).replace(".", "")
             fullname = '%s of %s' % (fullname, llordofname)
 
         if fullname in self.aliases:
diff --git a/pyscraper/miscfuncs.py b/pyscraper/miscfuncs.py
index 341d49b87..1cc463e33 100755
--- a/pyscraper/miscfuncs.py
+++ b/pyscraper/miscfuncs.py
@@ -1,6 +1,3 @@
-#! /usr/bin/python
-# vim:sw=8:ts=8:et:nowrap
-
 import re
 import sys
 import string
@@ -22,7 +19,7 @@
 pwpatchesdirs = os.path.abspath("patches")  # made locally, relative to the lazyrunall.py module.  Should be relative to toppath eventually
 
 if (not os.path.isdir(toppath)):
-        raise Exception, 'Data directory %s does not exist, please create' % (toppath)
+        raise Exception('Data directory %s does not exist, please create' % (toppath))
 # print "Data directory (set in miscfuncs.py): %s" % toppath
 
 # temporary files are stored here
@@ -34,15 +31,15 @@
 # find raw data path
 rawdatapath = os.path.join(os.getcwd(), "../rawdata")
 if (not os.path.isdir(toppath)):
-        raise Exception, 'Raw data directory %s does not exist, you\'ve not got a proper checkout from CVS.' % (toppath)
+        raise Exception('Raw data directory %s does not exist, you\'ve not got a proper checkout from CVS.' % (toppath))
 
 # quiet flag
 bNotQuiet = True
 def SetQuiet():
-	global bNotQuiet
-	bNotQuiet = False
+    global bNotQuiet
+    bNotQuiet = False
 def IsNotQuiet():
-	return bNotQuiet
+    return bNotQuiet
 
 
 # import lower down so we get the top-path into the contextexception file
@@ -51,22 +48,22 @@ def IsNotQuiet():
 
 # use this to generate chronological scraped files of the same page
 def NextAlphaString(s):
-	assert re.match('[a-z]*$', s)
-	if not s:
-		return 'a'
-	i = string.find(string.lowercase, s[-1]) + 1
-	if i < len(string.lowercase):
-		return s[:-1] + string.lowercase[i]
-	return NextAlphaString(s[:-1]) + 'a'
+    assert re.match('[a-z]*$', s)
+    if not s:
+        return 'a'
+    i = string.ascii_lowercase.find(s[-1]) + 1
+    if i < len(string.ascii_lowercase):
+        return s[:-1] + string.ascii_lowercase[i]
+    return NextAlphaString(s[:-1]) + 'a'
 
 def AlphaStringToOrder(s):
-	assert re.match('[a-z]*$', s)
-	res = 0
-	while s:
-		i = string.find(string.lowercase, s[0]) + 1
-		res = res * 30 + i
-		s = s[1:]
-	return res
+    assert re.match('[a-z]*$', s)
+    res = 0
+    while s:
+        i = string.ascii_lowercase.find(s[0]) + 1
+        res = res * 30 + i
+        s = s[1:]
+    return res
 
 # Impossible to do 6pm, 7.15pm, 6.30pm, 6.45pm, 7pm without future timestamps
 # So not caring any more about timestamp errors
@@ -77,94 +74,94 @@ def AlphaStringToOrder(s):
 # 7 pm
 regparsetimeonhour = re.compile("^(\d+)()(?:\s?|&nbsp;)([\w\.]+)$")
 def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
-	#print "time ", time
-
-        previoustime = None
-	if previoustimearr:
-                previoustime = previoustimearr[-1]
-		prevtimeMatch = re.match("(\d+):(\d+)", previoustime)
-		previoustimehour = int(prevtimeMatch.group(1))
-
-	# This code lifted from fix_time PHP code from easyParliament
-	timeparts = regparsetime.match(time)
-	if not timeparts:
-		timeparts = regparsetimeonhour.match(time)
-	if timeparts:
-		hour = int(timeparts.group(1))
-		if (timeparts.group(2) != ""):
-			mins = int(timeparts.group(2))
-		else:
-			mins = 0
-		meridien = timeparts.group(3)
-		if re.match("p\.?m\.?", meridien):
-			if hour != 12:
-				hour += 12
-		elif meridien == "midnight":
-			assert hour == 12
-			hour += 12
-		elif meridien == "noon":
-			assert hour == 12
-		else:
-			if hour == 12:
-				hour -= 12
-			if not re.match("a\.?m\.?", meridien):
-                                if previoustime and previoustimehour > hour:
-                                        hour += 12
-
-		# skipping forward by twelve hours is a good sign an am/pm has gotten mixed
-                # Assume it's that if it's exactly 12 hours, alert otherwise
-		if previoustime and previoustimehour + 12 == hour:
-                        hour -= 12
-
-		if previoustime and previoustimehour + 12 <= hour:
-			print "TIME: time shift by 12 (from %s to %s) -- should a p.m. be an a.m.? %s" % (previoustime, time, repr(stampurl))
-
-	elif time == 'Midnight':
-                hour = 24
-                mins = 0
-	elif time == 'Noon':
-                hour = 12
-                mins = 0
-	else:
-		return None
-
-	res = "%03d:%02d:00" % (hour, mins)
-
-
-	# day-rotate situation where they went on beyond midnight
-	# it's uncommon enough to handle by listing exceptional days
-	# (sometimes the division time is out of order because that is where it is inserted in the record -- maybe should patch to handle)
-	#print previoustime, res, bIsDivisionTime, stampurl.sdate
-	if previoustime and res < previoustime:
-		if stampurl.sdate in ["2005-03-10"]:
-			if previoustime < "024":
-				print "dayrotate on ", stampurl.sdate, (hour, mins), previoustime
-			hour += 24
-
-		# correction heading case -- a copy of some text that is to be inserted into a different day.
-		elif stampurl.sdate in ["2002-10-28"]:
-			return res
-
-                elif hour in [0, 1, 2, 3, 4] or stampurl.sdate in ["2003-10-20", "2000-10-03", "2000-07-24", "2011-01-17"]:
-                        hour += 24
-		else:
-			print 'TIME: time rotation (from %s to %s %s) not close to midnight %s' % (previoustime, time, res, repr(stampurl))
-
-		res = "%03d:%02d:00" % (hour, mins)
-
-
-	# capture the case where we are out of order by more than a few minutes
-	# (divisions are often out of order slightly)
-
-	# out of order case
-	if previoustime and res < previoustime:
-		# if it's a division type, we can tolerate a few minutes
-		timeminutes = int(hour) * 60 + int(mins)
-		previoustimeminutes = previoustimehour * 60 + int(prevtimeMatch.group(2))
-		if timeminutes < previoustimeminutes:
-			if not bIsDivisionTime or (previoustimeminutes - timeminutes > 10):
-				print 'TIME: time out of order, from %s to %s (division=%s) %s' % (previoustime, res, bIsDivisionTime, repr(stampurl))
-	return res
+    #print "time ", time
+
+    previoustime = None
+    if previoustimearr:
+        previoustime = previoustimearr[-1]
+        prevtimeMatch = re.match("(\d+):(\d+)", previoustime)
+        previoustimehour = int(prevtimeMatch.group(1))
+
+    # This code lifted from fix_time PHP code from easyParliament
+    timeparts = regparsetime.match(time)
+    if not timeparts:
+        timeparts = regparsetimeonhour.match(time)
+    if timeparts:
+        hour = int(timeparts.group(1))
+        if (timeparts.group(2) != ""):
+            mins = int(timeparts.group(2))
+        else:
+            mins = 0
+        meridien = timeparts.group(3)
+        if re.match("p\.?m\.?", meridien):
+            if hour != 12:
+                hour += 12
+        elif meridien == "midnight":
+            assert hour == 12
+            hour += 12
+        elif meridien == "noon":
+            assert hour == 12
+        else:
+            if hour == 12:
+                hour -= 12
+            if not re.match("a\.?m\.?", meridien):
+                if previoustime and previoustimehour > hour:
+                    hour += 12
+
+        # skipping forward by twelve hours is a good sign an am/pm has gotten mixed
+        # Assume it's that if it's exactly 12 hours, alert otherwise
+        if previoustime and previoustimehour + 12 == hour:
+            hour -= 12
+
+        if previoustime and previoustimehour + 12 <= hour:
+            print("TIME: time shift by 12 (from %s to %s) -- should a p.m. be an a.m.? %s" % (previoustime, time, repr(stampurl)))
+
+    elif time == 'Midnight':
+        hour = 24
+        mins = 0
+    elif time == 'Noon':
+        hour = 12
+        mins = 0
+    else:
+        return None
+
+    res = "%03d:%02d:00" % (hour, mins)
+
+
+    # day-rotate situation where they went on beyond midnight
+    # it's uncommon enough to handle by listing exceptional days
+    # (sometimes the division time is out of order because that is where it is inserted in the record -- maybe should patch to handle)
+    #print previoustime, res, bIsDivisionTime, stampurl.sdate
+    if previoustime and res < previoustime:
+        if stampurl.sdate in ["2005-03-10"]:
+            if previoustime < "024":
+                print("dayrotate on ", stampurl.sdate, (hour, mins), previoustime)
+            hour += 24
+
+        # correction heading case -- a copy of some text that is to be inserted into a different day.
+        elif stampurl.sdate in ["2002-10-28"]:
+            return res
+
+        elif hour in [0, 1, 2, 3, 4] or stampurl.sdate in ["2003-10-20", "2000-10-03", "2000-07-24", "2011-01-17"]:
+            hour += 24
+        else:
+            print('TIME: time rotation (from %s to %s %s) not close to midnight %s' % (previoustime, time, res, repr(stampurl)))
+
+        res = "%03d:%02d:00" % (hour, mins)
+
+
+    # capture the case where we are out of order by more than a few minutes
+    # (divisions are often out of order slightly)
+
+    # out of order case
+    if previoustime and res < previoustime:
+        # if it's a division type, we can tolerate a few minutes
+        timeminutes = int(hour) * 60 + int(mins)
+        previoustimeminutes = previoustimehour * 60 + int(prevtimeMatch.group(2))
+        if timeminutes < previoustimeminutes:
+            if not bIsDivisionTime or (previoustimeminutes - timeminutes > 10):
+                print('TIME: time out of order, from %s to %s (division=%s) %s' % (previoustime, res, bIsDivisionTime, repr(stampurl)))
+    return res
 
 
 # The names of entities and what they are are here:
@@ -229,7 +226,7 @@ def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
         '&#095;':'_',    # this is underscore symbol
         '&#95;':'_',    # this is underscore symbol
 
-		'&#039;':"'",   # possession apostrophe
+        '&#039;':"'",   # possession apostrophe
         "&#8364;":'&euro;', # this is euro currency
         "&#8482;":'&trade;',
         "&#8226;":'&bull;',
@@ -254,172 +251,154 @@ def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
 
 
 def StripAnchorTags(text):
-        raise Exception, "I've never called this function, so test it"
+    raise Exception("I've never called this function, so test it")
 
-        abf = re.split('(<[^>]*>)', text)
+    abf = re.split('(<[^>]*>)', text)
 
-        ret = ''
-	for ab in abf:
-		if re.match('<a[^>]*>(?i)', ab):
-                        pass
+    ret = ''
+    for ab in abf:
+        if re.match('<a[^>]*>(?i)', ab):
+            pass
 
-		elif re.match('</a>(?i)', ab):
-			pass
+        elif re.match('</a>(?i)', ab):
+            pass
 
-                else:
-                        ret = ret + ab
+        else:
+            ret = ret + ab
 
-        return ret
+    return ret
 
 
 def WriteCleanText(fout, text, striphref=True):
-        text = re.sub('<!--.*?-->', '', text)
-    	abf = re.split('(<[^>]*>)', text)
-        for ab in abf:
-		# delete comments and links
-		if re.match('<!-[^>]*?->', ab):
-			pass
+    text = re.sub('<!--.*?-->', '', text)
+    abf = re.split('(<[^>]*>)', text)
+    for ab in abf:
+        # delete comments and links
+        if re.match('<!-[^>]*?->', ab):
+            pass
 
-                # XXX Differs from pullgluepages version
-		elif striphref and re.match('<a[^>]+>(?i)', ab):
-			anamem = re.match('<a name\s*?=(?i)', ab)
-                        if anamem:
-                                fout.write(re.sub('\s', ' ', ab))
+        # XXX Differs from pullgluepages version
+        elif striphref and re.match('<a[^>]+>(?i)', ab):
+            anamem = re.match('<a name\s*?=(?i)', ab)
+            if anamem:
+                fout.write(re.sub('\s', ' ', ab))
 
-		elif striphref and re.match('</?a>(?i)', ab):
-			pass
+        elif striphref and re.match('</?a>(?i)', ab):
+            pass
 
-		# spaces only inside tags
-		elif re.match('<[^>]*>', ab):
-			fout.write(re.sub('\s', ' ', ab))
+        # spaces only inside tags
+        elif re.match('<[^>]*>', ab):
+            fout.write(re.sub('\s', ' ', ab))
 
-		# take out spurious > symbols and dos linefeeds
-		else:
-			fout.write(re.sub('>|\r', '', ab))
+        # take out spurious > symbols and dos linefeeds
+        else:
+            fout.write(re.sub('>|\r', '', ab))
 
 
 # Legacy patch system, use patchfilter.py and patchtool now
 def ApplyFixSubstitutions(text, sdate, fixsubs):
-	for sub in fixsubs:
-		if sub[3] == 'all' or sub[3] == sdate:
-			(text, n) = re.subn(sub[0], sub[1], text)
-			if (sub[2] != -1) and (n != sub[2]):
-				print sub
-				raise Exception, 'wrong number of substitutions %d on %s' % (n, sub[0])
-	return text
+    for sub in fixsubs:
+        if sub[3] == 'all' or sub[3] == sdate:
+            (text, n) = re.subn(sub[0], sub[1], text)
+            if (sub[2] != -1) and (n != sub[2]):
+                print(sub)
+                raise Exception('wrong number of substitutions %d on %s' % (n, sub[0]))
+    return text
 
 
 # this only accepts <sup> and <i> tags
 def StraightenHTMLrecurse(stex, stampurl):
-	# split the text into <i></i> and <sup></sup> and <sub></sub> and <a href></a>
-        qisup = re.search(r'(<(a|i|b|s|small|sup|sub)( href="[^"]*")?>(.*?)</\2>)(?i)', stex)
+    # split the text into <i></i> and <sup></sup> and <sub></sub> and <a href></a>
+    qisup = re.search(r'(<(a|i|b|s|small|sup|sub)( href="[^"]*")?>(.*?)</\2>)(?i)', stex)
+    if qisup:
+        qtagtype = qisup.group(2)
+        qhref = qisup.group(3) or ''
+        qtag = ('<%s%s>' % (qtagtype, qhref), '</%s>' % qtagtype)
+    if not qisup:
+        qisup = re.search('(<(a) href="([^"]*)">(.*?)</a>)(?i)', stex)
         if qisup:
-                qtagtype = qisup.group(2)
-                qhref = qisup.group(3) or ''
-                qtag = ('<%s%s>' % (qtagtype, qhref), '</%s>' % qtagtype)
-	if not qisup:
-		qisup = re.search('(<(a) href="([^"]*)">(.*?)</a>)(?i)', stex)
-		if qisup:
-			qtag = ('<a href="%s">' % qisup.group(3), '</a>')
-
-	if qisup:
-		sres = StraightenHTMLrecurse(stex[:qisup.start(1)], stampurl)
-		sres.append(qtag[0])
-		sres.extend(StraightenHTMLrecurse(qisup.group(4), stampurl))
-		sres.append(qtag[1])
-		sres.extend(StraightenHTMLrecurse(stex[qisup.end(1):], stampurl))
-		return sres
-
-	sres = re.split('(&[a-z0-9]*?;|&#\d+;|"|\xa3|&|\x01|\x0e|\x14|\x92|\xb0|\xab|\xe9|\xc3\xb8|\xc3\xb1|<[^>]*>|<|>)', stex)
-	for i in range(len(sres)):
-                #print "sresi ", sres[i], "\n"
-                #print "-----------------------------------------------\n"
-
-		if not sres[i]:
-			pass
-		elif re.match('&#[0-9]+;', sres[i]) and not re.match('[345][0-9];', sres[i]):
-                        pass
-		elif sres[i][0] == '&':
-			if sres[i] in entitymap:
-				sres[i] = entitymap[sres[i]]
-			elif sres[i] in entitymaprev:
-				pass
-			elif sres[i] == '&mdash;': # special case as entitymap maps it with spaces
-				pass
-			elif sres[i] in ('&quot;', '&amp;', '&lt;', '&gt;'):
-				pass
-			elif sres[i] in ('&ldquo;', '&rdquo;'):
-				sres[i] = '&quot;'
-			else:
-				raise Exception, sres[i] + ' unknown ent'
-				sres[i] = 'UNKNOWN-ENTITY'
-
-		elif sres[i] == '"':
-			sres[i] = '&quot;'
-
-		# junk chars sometimes get in
-		# NB this only works if the characters are split in the regexp above
-		elif sres[i] == '\x01':
-			sres[i] = ''
-		elif sres[i] == '\x0e':
-			sres[i] = ' '
-		elif sres[i] == '\x14':
-			sres[i] = ' '
-		elif sres[i] == '\x92':
-			sres[i] = "'"
-		elif sres[i] == '\xa3':
-			sres[i] = '&pound;'
-		elif sres[i] == '\xb0':
-			sres[i] = '&deg;'
-		elif sres[i] == '\xab':
-			sres[i] = '&eacute;'
-		elif sres[i] == '\xe9':
-			sres[i] = '&eacute;'
-                elif sres[i] == '\xc3\xb8':
-			sres[i] = '&oslash;'
-                elif sres[i] == '\xc3\xb1':
-                        sres[i] = '&ntilde;'
-
-		elif re.match('</?i>$(?i)', sres[i]):
-			sres[i] = '' # 'OPEN-i-TAG-OUT-OF-PLACE' 'CLOSE-i-TAG-OUT-OF-PLACE'
-
-		elif re.match('<xref locref=\d+>$', sres[i]): # what is this? wrans 2003-05-13 has one
-			sres[i] = ''
-
-		# allow brs through
-		elif re.match('<br ?/?>$(?i)', sres[i]):
-			sres[i] = '<br/>'
-
-		# discard garbage that appears in recent today postings
-		elif re.match('<jf\d+>$(?i)', sres[i]):
-			sres[i] = ''
-
-		elif sres[i][0] == '<' or sres[i][0] == '>':
-			print "Part:", sres[i][0]
-			print "All:",sres[i]
-			print "stex:", stex
-			print "raising"
-			raise ContextException('tag %s tag out of place in %s' % (sres[i], stex), stamp=stampurl, fragment=stex)
-
-	return sres
-
-
-def FixHTMLEntitiesL(stex, signore='', stampurl=None):
-	# will formalize this into the recursion later
-	if signore:
-		stex = re.sub(signore, '', stex)
-	return StraightenHTMLrecurse(stex, stampurl)
-
-def FixHTMLEntities(stex, signore='', stampurl=None):
-	res = string.join(FixHTMLEntitiesL(stex, signore, stampurl), '')
-	try:
-		res = res.decode('utf-8')
-		return res.encode("latin-1")
-	except Exception, e:
-		print "Encoding problem with:", res
-		raise ContextException(str(e), stamp=stampurl, fragment=res)
-
-
+            qtag = ('<a href="%s">' % qisup.group(3), '</a>')
+
+    if qisup:
+        sres = StraightenHTMLrecurse(stex[:qisup.start(1)], stampurl)
+        sres.append(qtag[0])
+        sres.extend(StraightenHTMLrecurse(qisup.group(4), stampurl))
+        sres.append(qtag[1])
+        sres.extend(StraightenHTMLrecurse(stex[qisup.end(1):], stampurl))
+        return sres
+
+    sres = re.split('(&[a-z0-9]*?;|&#\d+;|"|\xa3|&|\x01|\x0e|\x14|\x92|\xb0|\xab|\xe9|\xc3\xb8|\xc3\xb1|<[^>]*>|<|>)', stex)
+    for i in range(len(sres)):
+        #print "sresi ", sres[i], "\n"
+        #print "-----------------------------------------------\n"
+
+        if not sres[i]:
+            pass
+        elif re.match('&#[0-9]+;', sres[i]) and not re.match('[345][0-9];', sres[i]):
+            pass
+        elif sres[i][0] == '&':
+            if sres[i] in entitymap:
+                sres[i] = entitymap[sres[i]]
+            elif sres[i] in entitymaprev:
+                pass
+            elif sres[i] == '&mdash;': # special case as entitymap maps it with spaces
+                pass
+            elif sres[i] in ('&quot;', '&amp;', '&lt;', '&gt;'):
+                pass
+            elif sres[i] in ('&ldquo;', '&rdquo;'):
+                sres[i] = '&quot;'
+            else:
+                raise Exception(sres[i] + ' unknown ent')
+                sres[i] = 'UNKNOWN-ENTITY'
+
+        elif sres[i] == '"':
+            sres[i] = '&quot;'
+
+        # junk chars sometimes get in
+        # NB this only works if the characters are split in the regexp above
+        elif sres[i] == '\x01':
+            sres[i] = ''
+        elif sres[i] == '\x0e':
+            sres[i] = ' '
+        elif sres[i] == '\x14':
+            sres[i] = ' '
+        elif sres[i] == '\x92':
+            sres[i] = "'"
+        elif sres[i] == '\xa3':
+            sres[i] = '&pound;'
+        elif sres[i] == '\xb0':
+            sres[i] = '&deg;'
+        elif sres[i] == '\xab':
+            sres[i] = '&eacute;'
+        elif sres[i] == '\xe9':
+            sres[i] = '&eacute;'
+        elif sres[i] == '\xc3\xb8':
+            sres[i] = '&oslash;'
+        elif sres[i] == '\xc3\xb1':
+            sres[i] = '&ntilde;'
+
+        elif re.match('</?i>$(?i)', sres[i]):
+            sres[i] = '' # 'OPEN-i-TAG-OUT-OF-PLACE' 'CLOSE-i-TAG-OUT-OF-PLACE'
+
+        elif re.match('<xref locref=\d+>$', sres[i]): # what is this? wrans 2003-05-13 has one
+            sres[i] = ''
+
+        # allow brs through
+        elif re.match('<br ?/?>$(?i)', sres[i]):
+            sres[i] = '<br/>'
+
+        # discard garbage that appears in recent today postings
+        elif re.match('<jf\d+>$(?i)', sres[i]):
+            sres[i] = ''
+
+        elif sres[i][0] == '<' or sres[i][0] == '>':
+            print("Part:", sres[i][0])
+            print("All:",sres[i])
+            print("stex:", stex)
+            print("raising")
+            raise ContextException('tag %s tag out of place in %s' % (sres[i], stex), stamp=stampurl, fragment=stex)
+
+    return sres
 
 
 # The lookahead assertion (?=<table) stops matching tables when another begin table is reached
@@ -437,157 +416,157 @@ def FixHTMLEntities(stex, signore='', stampurl=None):
 # Break text into paragraphs.
 # the result alternates between lists of space types, and strings
 def SplitParaSpace(text, stampurl):
-	res = []
-
-	# used to detect over breaking in spaces
-	bprevparaalone = True
-
-	# list of space objects, list of string
-	spclist = []
-	pstring = ''
-	parts = reparts.split(text)
-	newparts = []
-	# split up the start <table> bits without end </table> into component parts
-	for nf in parts:
-
-		# a tiny bit of extra splitting up as output
-		if retablestart.match(nf) and not retable.match(nf):
-			newparts.extend(reparts2.split(nf))
-		else:
-			newparts.append(nf)
-
-		# get rid of blank and boring paragraphs
-		if reparaempty.match(nf):
-			if pstring and re.search('\S', nf):
-				print text
-				print '---' + pstring
-				print '---' + nf
-				raise Exception, ' it carried across empty para '
-			continue
-
-		# list of space type objects
-		if reparaspace.match(nf):
-			spclist.append(nf)
-			continue
-
-		# sometimes italics are hidden among the paragraph choss
-		# bring forward onto the next string
-		if reitalif.match(nf):
-			if pstring:
-				print text
-				print spclist
-				print pstring
-				raise Exception, ' double italic in paraspace '
-			pstring = '<i>'
-			continue
-
-
-		# we now have a string of a paragraph which we are putting into the list.
-
-		# table type
-		bthisparaalone = False
-		if retable.match(nf):
-			if pstring:
-				print text
-				raise Exception, ' non-empty preceding string '
-			pstring = nf
-			bthisparaalone = True
-
-		else:
-			lnf = re.sub("\s+", " ", nf)
-			if pstring:
-				pstring = pstring + " " + string.strip(lnf)
-			else:
-				pstring = string.strip(lnf)
-
-
-		# check that paragraphs have some text
-		if re.match('(?:<[^>]*>|\s)*$', pstring):
-			print "\nspclist:", spclist
-			print "\npstring:", pstring
-			print "\nthe text:", text[:100]
-			print "\nnf:", nf
-			raise ContextException('no text in paragraph', stamp=stampurl, fragment=pstring)
-
-		# check that paragraph spaces aren't only font text, and have something
-		# real in them, unless they are breaks because of tables
-		if not (bprevparaalone or bthisparaalone):
-			bnonfont = False
-			for sl in spclist:
-				if not re.match('</?font[^>]*>(?i)', sl):
-					bnonfont = True
-			if not bnonfont:
-				print "text:", text
-				print "spclist:", spclist
-				print "pstring", pstring
-				print "----------"
-				print "nf", nf
-				print "----------"
-				raise ContextException('font found in middle of paragraph should be a paragraph break or removed', stamp=stampurl, fragment=pstring)
-		bprevparaalone = bthisparaalone
-
-
-		# put the preceding space, then the string into output list
-		res.append(spclist)
-		res.append(pstring)
-		#print "???%s???" % pstring
-
-		spclist = [ ]
-		pstring = ''
-
-	# findal spaces into the output list
-	res.append(spclist)
-
-	return res
+    res = []
+
+    # used to detect over breaking in spaces
+    bprevparaalone = True
+
+    # list of space objects, list of string
+    spclist = []
+    pstring = ''
+    parts = reparts.split(text)
+    newparts = []
+    # split up the start <table> bits without end </table> into component parts
+    for nf in parts:
+
+        # a tiny bit of extra splitting up as output
+        if retablestart.match(nf) and not retable.match(nf):
+            newparts.extend(reparts2.split(nf))
+        else:
+            newparts.append(nf)
+
+        # get rid of blank and boring paragraphs
+        if reparaempty.match(nf):
+            if pstring and re.search('\S', nf):
+                print(text)
+                print('---' + pstring)
+                print('---' + nf)
+                raise Exception(' it carried across empty para ')
+            continue
+
+        # list of space type objects
+        if reparaspace.match(nf):
+            spclist.append(nf)
+            continue
+
+        # sometimes italics are hidden among the paragraph choss
+        # bring forward onto the next string
+        if reitalif.match(nf):
+            if pstring:
+                print(text)
+                print(spclist)
+                print(pstring)
+                raise Exception(' double italic in paraspace ')
+            pstring = '<i>'
+            continue
+
+
+        # we now have a string of a paragraph which we are putting into the list.
+
+        # table type
+        bthisparaalone = False
+        if retable.match(nf):
+            if pstring:
+                print(text)
+                raise Exception(' non-empty preceding string ')
+            pstring = nf
+            bthisparaalone = True
+
+        else:
+            lnf = re.sub("\s+", " ", nf)
+            if pstring:
+                pstring = pstring + " " + lnf.strip()
+            else:
+                pstring = lnf.strip()
+
+
+        # check that paragraphs have some text
+        if re.match('(?:<[^>]*>|\s)*$', pstring):
+            print("\nspclist:", spclist)
+            print("\npstring:", pstring)
+            print("\nthe text:", text[:100])
+            print("\nnf:", nf)
+            raise ContextException('no text in paragraph', stamp=stampurl, fragment=pstring)
+
+        # check that paragraph spaces aren't only font text, and have something
+        # real in them, unless they are breaks because of tables
+        if not (bprevparaalone or bthisparaalone):
+            bnonfont = False
+            for sl in spclist:
+                if not re.match('</?font[^>]*>(?i)', sl):
+                    bnonfont = True
+            if not bnonfont:
+                print("text:", text)
+                print("spclist:", spclist)
+                print("pstring", pstring)
+                print("----------")
+                print("nf", nf)
+                print("----------")
+                raise ContextException('font found in middle of paragraph should be a paragraph break or removed', stamp=stampurl, fragment=pstring)
+        bprevparaalone = bthisparaalone
+
+
+        # put the preceding space, then the string into output list
+        res.append(spclist)
+        res.append(pstring)
+        #print "???%s???" % pstring
+
+        spclist = [ ]
+        pstring = ''
+
+    # findal spaces into the output list
+    res.append(spclist)
+
+    return res
 
 
 # Break text into paragraphs and mark the paragraphs according to their <ul> indentation
 def SplitParaIndents(text, stampurl):
-	dell = SplitParaSpace(text, stampurl)
-        #print "dell", dell
-
-	res =  [ ]
-	resdent = [ ]
-	bIndent = 0
-	for i in range(len(dell)):
-		if (i % 2) == 0:
-			for sp in dell[i]:
-				if re.match('(?:<ul><ul>)?<ul>(?i)', sp):
-					if bIndent==1:
-						print dell[i - 1: i + 1]
-						raise ContextException(' already indented ', stamp=stampurl, fragment=sp)
-					bIndent = 1
-				elif re.match('(?:</ul></ul>)?</ul>(?i)', sp):
-					# no error
-					#if not bIndent:
-					#	raise Exception, ' already not-indentented '
-					bIndent = 0
-                                elif re.match('<p style="margin-left: ?[23]0px;">', sp):
-                                        bIndent = 2
-                                elif bIndent == 2 and re.match('</p>', sp):
-                                        bIndent = 0
-			continue
-
-		# we have the actual text between the spaces
-		# we might have full italics indent style
-		# (we're ignoring fonts for now)
-
-		# separate out italics type paragraphs
-		tex = dell[i]
-		cindent = bIndent > 0 and 1 or 0
-
-		qitbod = re.match('<i>([\s\S]*?)</i>[.:]?$', tex)
-		if qitbod:
-			tex = qitbod.group(1)
-			cindent = cindent + 2
-
-		res.append(tex)
-		resdent.append(cindent)
-
-	#if bIndent:
-	#	print text
-	#	raise ' still indented after last space '
-	return (res, resdent)
+    dell = SplitParaSpace(text, stampurl)
+    #print "dell", dell
+
+    res =  [ ]
+    resdent = [ ]
+    bIndent = 0
+    for i in range(len(dell)):
+        if (i % 2) == 0:
+            for sp in dell[i]:
+                if re.match('(?:<ul><ul>)?<ul>(?i)', sp):
+                    if bIndent==1:
+                        print(dell[i - 1: i + 1])
+                        raise ContextException(' already indented ', stamp=stampurl, fragment=sp)
+                    bIndent = 1
+                elif re.match('(?:</ul></ul>)?</ul>(?i)', sp):
+                    # no error
+                    #if not bIndent:
+                    #   raise Exception, ' already not-indentented '
+                    bIndent = 0
+                elif re.match('<p style="margin-left: ?[23]0px;">', sp):
+                    bIndent = 2
+                elif bIndent == 2 and re.match('</p>', sp):
+                    bIndent = 0
+            continue
+
+        # we have the actual text between the spaces
+        # we might have full italics indent style
+        # (we're ignoring fonts for now)
+
+        # separate out italics type paragraphs
+        tex = dell[i]
+        cindent = bIndent > 0 and 1 or 0
+
+        qitbod = re.match('<i>([\s\S]*?)</i>[.:]?$', tex)
+        if qitbod:
+            tex = qitbod.group(1)
+            cindent = cindent + 2
+
+        res.append(tex)
+        resdent.append(cindent)
+
+    #if bIndent:
+    #   print text
+    #   raise ' still indented after last space '
+    return (res, resdent)
 
 
 
diff --git a/pyscraper/new_hansard.py b/pyscraper/new_hansard.py
index e9d776af2..dbadc2f6b 100755
--- a/pyscraper/new_hansard.py
+++ b/pyscraper/new_hansard.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 import datetime
@@ -16,10 +16,6 @@
 sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 sys.path.append(os.path.join(os.path.dirname(__file__), 'lords'))
 
-import codecs
-streamWriter = codecs.lookup('utf-8')[-1]
-sys.stdout = streamWriter(sys.stdout)
-
 from pullgluepages import MakeDayMap, GetFileDayVersions
 from miscfuncs import pwxmldirs
 from resolvemembernames import MemberList
@@ -41,7 +37,7 @@ def pbc_match(self, name, date):
         # names are mostly lastname,\nfirstname so reform first
         if re.search(',', name):
             last, first = name.split(',')
-            full = u'{0} {1}'.format(first.strip(), last.strip())
+            full = '{0} {1}'.format(first.strip(), last.strip())
         # apart from committee chairman which we can use as is
         else:
             full = name.strip()
@@ -272,11 +268,11 @@ def check_for_pi_at_start(self, tag):
     # this just makes any gid redirection easier
     def get_text_from_element(self, el):
         text = self.get_single_line_text_from_element(el)
-        text = u'\n{0}\n'.format(text)
+        text = '\n{0}\n'.format(text)
         return text
 
     def get_single_line_text_from_element(self, el):
-        text = u''.join(el.xpath('.//text()'))
+        text = ''.join(el.xpath('.//text()'))
         text = re.sub('\n', ' ', text).strip()
         return text
 
@@ -446,7 +442,7 @@ def parse_major(self, heading, **kwargs):
             tag.text = text
 
         if 'extra_text' in kwargs:
-            tag.text = u'{0} - '.format(tag.text)
+            tag.text = '{0} - '.format(tag.text)
             i = etree.Element('i')
             i.text = kwargs['extra_text']
             tag.append(i)
@@ -463,9 +459,9 @@ def parse_major(self, heading, **kwargs):
         self.output_heading = True
 
     def parse_chair(self, heading):
-	"""
-	If we get an "in the Chair" heading in the main text, we include it as
-	a speech. The one right at the start, we store in case we need to
+        """
+        If we get an "in the Chair" heading in the main text, we include it as
+        a speech. The one right at the start, we store in case we need to
         output a speech (otherwise it'll be ignored).
         """
         if self.output_heading:
@@ -476,7 +472,7 @@ def parse_minor(self, heading):
 
         next_elt = heading.getnext()
         if next_elt is not None and self.get_tag_name_no_ns(next_elt) in self.minor_headings:
-            text = u' - '.join([
+            text = ' - '.join([
                 self.get_single_line_text_from_element(heading),
                 self.get_single_line_text_from_element(next_elt)
             ])
@@ -530,7 +526,7 @@ def parse_opposition(self, heading):
         )
         text = ''
         if len(following) == 1:
-            text = u' - '.join([
+            text = ' - '.join([
                 self.get_single_line_text_from_element(heading),
                 self.get_single_line_text_from_element(following[0])
             ])
@@ -551,7 +547,7 @@ def parse_debated_motion(self, motion):
         )
         text = ''
         if len(following) == 1:
-            text = u' - '.join([
+            text = ' - '.join([
                 self.get_single_line_text_from_element(motion),
                 self.get_single_line_text_from_element(following[0])
             ])
@@ -580,7 +576,7 @@ def parse_WHDebate(self, debate):
         )
         if len(chair) == 1:
             chair_text = self.get_single_line_text_from_element(chair[0])
-            text = u'\n{0} — {1}\n'.format(text, chair_text)
+            text = '\n{0} — {1}\n'.format(text, chair_text)
 
         self.clear_current_speech()
         tag = etree.Element('minor-heading')
@@ -603,7 +599,7 @@ def parse_question(self, question):
         first_para = question.xpath('.//ns:hs_Para', namespaces=self.ns_map)[0]
         self.new_speech(member, first_para.get('url'))
 
-        number = u''.join(
+        number = ''.join(
             question.xpath('.//ns:Number/text()', namespaces=self.ns_map)
         )
         if number != '':
@@ -613,7 +609,7 @@ def parse_question(self, question):
         p.set('pid', self.get_pid())
         uin = question.xpath('.//ns:Uin', namespaces=self.ns_map)
         if len(uin) > 0:
-            uin_text = u''.join(uin[0].xpath('.//text()'))
+            uin_text = ''.join(uin[0].xpath('.//text()'))
             m = re.match('\[\s*(\d+)\s*\]', uin_text)
             if m is not None:
                 no = m.groups(1)[0]
@@ -622,7 +618,7 @@ def parse_question(self, question):
         text = first_para.xpath(
             './/ns:QuestionText/text()', namespaces=self.ns_map
         )
-        text = u''.join(text)
+        text = ''.join(text)
         """
         sometimes the question text is after the tag rather
         than inside it in which case we want to grab all the
@@ -644,7 +640,7 @@ def parse_question(self, question):
                 namespaces=self.ns_map
             )
             if len(q_text):
-                text = u''.join(q_text)
+                text = ''.join(q_text)
 
         p.text = re.sub('\n', ' ', text)
         self.current_speech.append(p)
@@ -663,7 +659,7 @@ def parse_indent(self, tag):
         self.parse_para_with_member(tag, None, css_class='indent')
 
     def parse_petition(self, petition):
-        petition.text = u'Petition - {0}'.format(petition.text)
+        petition.text = 'Petition - {0}'.format(petition.text)
         self.parse_major(petition)
 
     def output_normally_ignored(self):
@@ -719,7 +715,7 @@ def parse_para_with_member(self, para, member, **kwargs):
         if len(text) == 0:
             return
 
-        fs = u'<p>{0}</p>'.format(PhraseTokenize(self.date, text).GetPara())
+        fs = '<p>{0}</p>'.format(PhraseTokenize(self.date, text).GetPara())
         tag = etree.fromstring(fs)
 
         if self.use_pids:
@@ -834,8 +830,8 @@ def parse_division(self, division):
         noes_count = \
             division.xpath('./ns:hs_Para/ns:NoesNumber/text()', namespaces=self.ns_map)
 
-        ayes_count_text = u''.join(ayes_count)
-        noes_count_text = u''.join(noes_count)
+        ayes_count_text = ''.join(ayes_count)
+        noes_count_text = ''.join(noes_count)
 
         self.clear_current_speech()
 
@@ -878,7 +874,7 @@ def parse_division(self, division):
             self.parse_para(para)
 
     def parse_time(self, tag):
-        time_txt = u''.join(tag.xpath('.//text()'))
+        time_txt = ''.join(tag.xpath('.//text()'))
         if time_txt == '':
             return
         matches = re.match('(\d+)(?:[:.,]\s*(\d+))?[\xa0\s]*(am|pm)', time_txt)
@@ -1024,7 +1020,7 @@ def parse_day(self, xml_file):
                 tag_name = self.get_tag_name_no_ns(tag)
                 if self.verbose >= 2:
                     start_tag = re.sub('>.*', '>', etree.tounicode(tag))
-                    print 'Parsing %s' % start_tag
+                    print('Parsing %s' % start_tag)
                 if not self.handle_tag(tag_name, tag):
                     raise ContextException(
                         'unhandled tag: {0}'.format(tag_name),
@@ -1123,7 +1119,7 @@ def get_member_with_no_id(self, member_tag):
     # in the array
     def get_attending_status(self, member_tag):
         text = member_tag.xpath('./preceding-sibling::text()')
-        if len(text) > 0 and re.search(u'\u2020', text[-1]):
+        if len(text) > 0 and re.search('\u2020', text[-1]):
             return 'true'
 
         return 'false'
@@ -1140,7 +1136,7 @@ def parse_chairmen(self, chair):
                 self.chairs.append(member)
             else:
                 raise ContextException(
-                    u'No match for PBC chairman {0}'.format(member_tag.text),
+                    'No match for PBC chairman {0}'.format(member_tag.text),
                     stamp=member_tag.get('url'),
                     fragment=member_tag.text
                 )
@@ -1164,7 +1160,7 @@ def parse_clmember(self, clmember):
             self.members.append(member)
         else:
             raise ContextException(
-                u'No match for PBC member {0}'.format(member_tag.text),
+                'No match for PBC member {0}'.format(member_tag.text),
                 stamp=member_tag.get('url'),
                 fragment=member_tag.text
             )
@@ -1209,10 +1205,10 @@ def current_membership(pid):
             # so use that instead and skip the party
             curr_member = current_membership(m['person_id'])
             if curr_member['constituency'] != m['pbc_cons']:
-                cons.text = u'({0})'.format(m['pbc_cons'])
+                cons.text = '({0})'.format(m['pbc_cons'])
             else:
-                cons.text = u'({0})'.format(curr_member['constituency'])
-                cons.tail = u'({0})'.format(curr_member['party'])
+                cons.text = '({0})'.format(curr_member['constituency'])
+                cons.tail = '({0})'.format(curr_member['party'])
             mp.append(cons)
             committee.append(mp)
 
@@ -1269,7 +1265,7 @@ def parse_chair(self, chair):
             if chair is not None:
                 self.current_chair = chair
         else:
-            raise ContextException(u'No match for chair {0}'.format(text))
+            raise ContextException('No match for chair {0}'.format(text))
 
     def get_division_tag(self, division, yes_text, no_text):
         tag = etree.Element('divisioncount')
@@ -1278,7 +1274,7 @@ def get_division_tag(self, division, yes_text, no_text):
             division.xpath('.//ns:Number/text()', namespaces=self.ns_map)
 
         tag.set('id', self.get_speech_id())
-        tag.set('divnumber', u''.join(div_number))
+        tag.set('divnumber', ''.join(div_number))
         tag.set('ayes', yes_text)
         tag.set('noes', no_text)
         tag.set('url', '')
@@ -1465,7 +1461,7 @@ def parse_member(self, member):
         if name == 'The Queen':
             return {
                 'person_id': 'uk.org.publicwhip/person/13935',
-                'name': u'The Queen'
+                'name': 'The Queen'
             }
 
         tag_name = self.get_tag_name_no_ns(member)
@@ -1571,12 +1567,12 @@ def parse_division(self, division):
         noes_count = \
             division.xpath('.//ns:NotContentsNumber/text()', namespaces=self.ns_map)
 
-        ayes_count_text = u''.join(ayes_count)
-        noes_count_text = u''.join(noes_count)
+        ayes_count_text = ''.join(ayes_count)
+        noes_count_text = ''.join(noes_count)
 
         # output a summary of the division results
         div_summary = \
-            u"Ayes {0}, Noes {1}.".format(ayes_count_text, noes_count_text)
+            "Ayes {0}, Noes {1}.".format(ayes_count_text, noes_count_text)
         div_summary_tag = etree.Element('p')
         div_summary_tag.set('pid', self.get_pid())
         div_summary_tag.set('pwmotiontext', 'yes')
@@ -1608,7 +1604,7 @@ def parse_division(self, division):
 
         paras = division.xpath('./ns:hs_Procedure', namespaces=self.ns_map)
         for para in paras:
-            text = u''.join(para.xpath('.//text()'))
+            text = ''.join(para.xpath('.//text()'))
             if re.search(r'Contents', text) or \
                     re.search(r'Division\s*on', text):
                 continue
@@ -1625,7 +1621,7 @@ def parse_votelist(self, votes, direction, vote_list):
                 is_teller = True
 
             # convert smart quote to apostrophe
-            member_name = re.sub(u'\u2019', "'", member_name)
+            member_name = re.sub('\u2019', "'", member_name)
 
             member = self.resolver.MatchRevName(member_name, self.date, vote.get('url'))
             tag.set('person_id', member)
@@ -1704,7 +1700,7 @@ def get_output_pbc_filename(self, date, xml_file):
             elif f.endswith('~') or f == 'changedates.txt':
                 pass
             elif os.path.isfile(os.path.join(pwstandingpages, f)):
-                print "not recognized file:", f, " in ", pwstandingpages
+                print("not recognized file:", f, " in ", pwstandingpages)
 
         self.parser.get_sitting(xml_file)
         sitting_id = self.parser.sitting_id
@@ -1761,9 +1757,9 @@ def gen_flatb(self, chks):
         return flatb
 
     def normalise_gids(self, string):
-        string = re.sub(u'(publicwhip\/[a-z]*\/\d{4}-\d{2}-\d{2})[a-z]', r'\1', string)
-        string = re.sub(u'(publicwhip\/standing\/.*?\d{4}-\d{2}-\d{2})[a-z]', r'\1', string)
-        string = re.sub(u'(pid=")[a-z]([\d.\/]*")', r'\1\2', string)
+        string = re.sub('(publicwhip\/[a-z]*\/\d{4}-\d{2}-\d{2})[a-z]', r'\1', string)
+        string = re.sub('(publicwhip\/standing\/.*?\d{4}-\d{2}-\d{2})[a-z]', r'\1', string)
+        string = re.sub('(pid=")[a-z]([\d.\/]*")', r'\1\2', string)
 
         return string
 
@@ -1777,13 +1773,13 @@ def compare_xml_files(self, prevfile, nextfile):
         hnextfile.close()
 
         if len(dprevfile) == len(dnextfile):
-            sprevfile = self.normalise_gids(u''.join(dprevfile[1:]))
-            snextfile = self.normalise_gids(u''.join(dnextfile[1:]))
+            sprevfile = self.normalise_gids(''.join(dprevfile[1:]))
+            snextfile = self.normalise_gids(''.join(dnextfile[1:]))
             if sprevfile == snextfile:
                 return "SAME"
         if len(dprevfile) < len(dnextfile):
-            sprevfile = self.normalise_gids(u''.join(dprevfile[1:]))
-            snextfile = self.normalise_gids(u''.join(dnextfile[1:len(dprevfile)]))
+            sprevfile = self.normalise_gids(''.join(dprevfile[1:]))
+            snextfile = self.normalise_gids(''.join(dnextfile[1:len(dprevfile)]))
             if sprevfile == snextfile:
                 return "EXTENSION"
         return "DIFFERENT"
@@ -1791,7 +1787,7 @@ def compare_xml_files(self, prevfile, nextfile):
     def remove_para_newlines(self, string):
         return re.sub(
             '(?s)(<p[^>]*>)(.*?)(<\/p>)',
-            lambda m: (u''.join((m.group(1), re.sub('\n', ' ', m.group(2)), m.group(3)))),
+            lambda m: (''.join((m.group(1), re.sub('\n', ' ', m.group(2)), m.group(3)))),
             string
         )
 
@@ -1827,10 +1823,10 @@ def rewrite_previous_version(self, newfile):
         tempfilenameoldxml = tempfile.mktemp(".xml", "pw-filtertempold-", miscfuncs.tmppath)
         foout = io.open(tempfilenameoldxml, mode="w", encoding='utf-8')
         if self.parser.is_pre_new_parser:
-            WriteXMLHeader(foout, encoding="UTF-8", output_unicode=True)
-        foout.write(u'<publicwhip scrapeversion="%s" latest="no">\n' % self.prev_file)
-        foout.writelines([unicode(x) for x in xprevcompress])
-        foout.write(u"</publicwhip>\n\n")
+            WriteXMLHeader(foout)
+        foout.write('<publicwhip scrapeversion="%s" latest="no">\n' % self.prev_file)
+        foout.writelines([str(x) for x in xprevcompress])
+        foout.write("</publicwhip>\n\n")
         foout.close()
         assert os.path.isfile(self.prev_file)
         os.remove(self.prev_file)
diff --git a/pyscraper/ni/parse.py b/pyscraper/ni/parse.py
index 0ad3f7d33..617ca7e73 100755
--- a/pyscraper/ni/parse.py
+++ b/pyscraper/ni/parse.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 
 import re
 import json
@@ -9,10 +9,6 @@
 from ni.resolvenames import memberList
 from contextexception import ContextException
 
-import codecs
-streamWriter = codecs.lookup('utf-8')[-1]
-sys.stdout = streamWriter(sys.stdout)
-
 parldata = '../../../parldata/'
 
 class ParseDayParserBase(object):
@@ -29,7 +25,7 @@ def time_period(self, ptext, optional=False):
         match = re.search('(\d\d?)(?:[.:]\s*(\d\d?))? ?(am|pm|noon|midnight)', ptext)
         if not match:
             if not optional:
-                raise ContextException, 'Time not found in TimePeriod %s' % p
+                raise ContextException('Time not found in TimePeriod %s' % p)
             return None
         hour = int(match.group(1))
         if hour<12 and match.group(3) == 'pm':
@@ -83,7 +79,7 @@ def parse_day(self, input):
         for line in j:
             text = (line['ComponentText'] or '').replace('&', '&amp;')
             if not text:
-                print "WARNING: Empty line: %s" % line
+                print("WARNING: Empty line: %s" % line)
             elif line['ComponentType'] == 'Document Title':
                 assert re.match('(Plenary|PLE), %s/%s/%s$(?i)' % (self.date[8:10], self.date[5:7], self.date[0:4]), text), text
             elif line['ComponentType'] == 'Time':
@@ -142,8 +138,7 @@ def parse_day(self, input):
         self.display_speech()
 
 class ParseDay(object):
-    def parse_day(self, fp, text, date):
-        out = streamWriter(fp)
+    def parse_day(self, out, text, date):
         out.write('<?xml version="1.0" encoding="utf-8"?>\n')
         out.write('''
 <!DOCTYPE publicwhip
diff --git a/pyscraper/ni/resolvenames.py b/pyscraper/ni/resolvenames.py
index 8e6a8e6eb..a05e7a039 100755
--- a/pyscraper/ni/resolvenames.py
+++ b/pyscraper/ni/resolvenames.py
@@ -1,10 +1,4 @@
-#! /usr/bin/python
-
-import json
 import re
-import string
-import copy
-import sys
 import datetime
 from contextexception import ContextException
 
@@ -32,9 +26,6 @@ def reloadJSON(self):
         self.import_constituencies("constituencies.json")
         self.import_people_json()
 
-    def partylist(self):
-        return self.parties.keys()
-
     def list(self, date=None, fro=None, to=None):
         if date == 'now':
             date = datetime.date.today().isoformat()
@@ -44,16 +35,15 @@ def list(self, date=None, fro=None, to=None):
             fro = '1000-01-01'
         if not to:
             to = '9999-12-31'
-        matches = self.members.values()
         ids = []
-        for m in matches:
+        for m in self.members.values():
             if 'start_date' in m and to >= m["start_date"] and fro <= m["end_date"]:
                 ids.append(self.membertoperson(m["id"]))
         return ids
 
     # useful to have this function out there
     def striptitles(self, text):
-        text = text.replace("&rsquo;", "'").replace(u'\u2019', "'")
+        text = text.replace("&rsquo;", "'").replace('\u2019', "'")
         text = text.replace("&nbsp;", " ")
         (text, titletotal) = self.retitles.subn("", text)
         text = self.rehonorifics.sub("", text)
@@ -82,7 +72,7 @@ def fullnametoids(self, tinput, date):
             matches.extend(self.parties.get("Speaker", []))
         if not matches and text in ('Deputy Speaker', 'Madam Deputy Speaker', 'The Deputy Speaker', 'The Principal Deputy Speaker', 'Madam Principal Deputy Speaker'):
             if not self.deputy_speaker:
-                raise ContextException, 'Deputy speaker speaking, but do not know who it is'
+                raise ContextException('Deputy speaker speaking, but do not know who it is')
             return self.fullnametoids(self.deputy_speaker, date)
 
         if matches:
@@ -100,9 +90,9 @@ def match_person(self, input, date=None):
         ids = self.fullnametoids(input, date)
         ids = set(map(self.membertoperson, ids))
         if len(ids) == 0:
-            raise ContextException, "No match %s" % input
+            raise ContextException("No match %s" % input)
         if len(ids) > 1:
-            raise ContextException, "Multiple matches %s, possibles are %s" % (input, ids)
+            raise ContextException("Multiple matches %s, possibles are %s" % (input, ids))
         id = ids.pop()
         return id
 
@@ -135,7 +125,7 @@ def match(self, input, date):
         if len(ids) == 0:
             if not re.search('Some Members|A Member|Several Members|Members', input):
                 # import pdb;pdb.set_trace()
-                raise ContextException, "No matches %s" % (input)
+                raise ContextException("No matches %s" % (input))
             return None, 'person_id="unknown" error="No match" speakername="%s"' % (input)
         if len(ids) > 1 and 'uk.org.publicwhip/member/90355' in ids:
             # Special case for 8th May, when Mr Hay becomes Speaker
@@ -144,7 +134,7 @@ def match(self, input, date):
             elif input == 'Mr Speaker':
                 ids.remove('uk.org.publicwhip/member/90287')
             else:
-                raise ContextException, 'Problem with Mr Hay!'
+                raise ContextException('Problem with Mr Hay!')
         elif len(ids) > 1 and 'uk.org.publicwhip/member/90449' in ids:
             # Special case for 2015-01-12, when Mr McLaughlin becomes Speaker
             if input == 'Mr Mitchel McLaughlin':
@@ -154,13 +144,13 @@ def match(self, input, date):
             elif input == 'Mr Speaker':
                 ids.remove('uk.org.publicwhip/member/90449')
             else:
-                raise ContextException, 'Problem with Mr McLaughlin! Got "%s"' % input
+                raise ContextException('Problem with Mr McLaughlin! Got "%s"' % input)
         elif len(ids) > 1:
             names = ""
             for id in ids:
                 name = self.name_on_date(self.membertoperson(id), date)
                 names += '%s %s (%s) ' % (id, name, self.members[id]["constituency"])
-            raise ContextException, "Multiple matches %s, possibles are %s" % (input, names)
+            raise ContextException("Multiple matches %s, possibles are %s" % (input, names))
             return None, 'person_id="unknown" error="Matched multiple times" speakername="%s"' % (input)
         for id in ids:
             pass
diff --git a/pyscraper/ni/scrape.py b/pyscraper/ni/scrape.py
index 9d3e593e3..285aeae4a 100755
--- a/pyscraper/ni/scrape.py
+++ b/pyscraper/ni/scrape.py
@@ -1,11 +1,11 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 
 # XXX Pagination has been introduced for the 1998-2003 pages, so any
 # rescraping of those will break with this current code.
 
 import json
-import urllib
-import urlparse
+import urllib.request
+import urllib.parse
 import re
 import time, datetime
 import os
@@ -29,10 +29,10 @@
 
 def scrape_ni_day(url, filename, forcescrape):
     filename = '%s/../../../parldata/cmpages/ni/%s' % (ni_dir, filename)
-    data = urllib.urlopen(url).read()
+    data = urllib.request.urlopen(url).read()
 
-    if 'ExceptionMessage' in data or '"Message":"An error has occurred."' in data:
-        print 'ERROR received scraping %s' % url
+    if b'ExceptionMessage' in data or b'"Message":"An error has occurred."' in data:
+        print('ERROR received scraping %s' % url)
         return
 
     save = True
@@ -42,8 +42,8 @@ def scrape_ni_day(url, filename, forcescrape):
             save = False
 
     if save:
-        print "NI scraping %s" % url
-        open(filename, 'w').write(data)
+        print("NI scraping %s" % url)
+        open(filename, 'wb').write(data)
 
 
 def scrape_ni(datefrom, dateto, forcescrape=False):
@@ -56,11 +56,11 @@ def scrape_ni(datefrom, dateto, forcescrape=False):
 
 
 def scrape_ni_json(datefrom, dateto, forcescrape):
-    ur = urllib.urlopen(API_ROOT)
+    ur = urllib.request.urlopen(API_ROOT)
     index = json.load(ur)
 
     if 'ExceptionMessage' in index:
-        print 'ERROR received scraping NI root'
+        print('ERROR received scraping NI root')
         return
 
     for day in index['AllHansardComponentsList']['HansardComponent']:
@@ -73,7 +73,7 @@ def scrape_ni_json(datefrom, dateto, forcescrape):
 
 def scrape_ni_html(datefrom, dateto, forcescrape):
     for url in root:
-        ur = urllib.urlopen(url)
+        ur = urllib.request.urlopen(url)
         page = ur.read()
         ur.close()
 
@@ -87,7 +87,7 @@ def scrape_ni_html(datefrom, dateto, forcescrape):
             date = '%d-%02d-%02d' % date[:3]
             if date < datefrom or date > dateto: continue
             filename = 'ni%s%s%s.html' % (date, day[1], day[3])
-            scrape_ni_day(urlparse.urljoin(url, day[0]), filename, forcescrape)
+            scrape_ni_day(urllib.parse.urljoin(url, day[0]), filename, forcescrape)
 
         match = re.findall('<a class="html-link" href=\'(/Assembly-Business/Official-Report/Reports-\d\d-(\d\d/([^/]*)/))\'>Read now</a>', page)
         for day in match:
@@ -119,7 +119,7 @@ def scrape_ni_html(datefrom, dateto, forcescrape):
             date = '%d-%02d-%02d' % date[:3]
             if date < datefrom or date > dateto: continue
             filename = 'ni%s.html' % date
-            scrape_ni_day(urlparse.urljoin(url, day[0]), filename, forcescrape)
+            scrape_ni_day(urllib.parse.urljoin(url, day[0]), filename, forcescrape)
 
 if __name__ == '__main__':
     scrape_ni(*sys.argv[1:])
diff --git a/pyscraper/ni/wikipedia-mla.py b/pyscraper/ni/wikipedia-mla.py
index afb83799e..7b6863890 100755
--- a/pyscraper/ni/wikipedia-mla.py
+++ b/pyscraper/ni/wikipedia-mla.py
@@ -1,5 +1,4 @@
-#!/usr/bin/env python
-# -*- coding: latin-1 -*-
+#!/usr/bin/env python3
 
 # Screen scrape list of links to MLAs on Wikipedia, so we can link to the articles.
 
@@ -9,7 +8,7 @@
 # For details see the file LICENSE.html in the top level of the source.
 
 import sys
-import urlparse
+import urllib.parse
 import re
 
 sys.path.extend((".", ".."))
@@ -55,18 +54,17 @@ def read(y):
     pid = memberList.match_person(name, date)
     wikimembers[pid] = url
 
-print '''<?xml version="1.0" encoding="ISO-8859-1"?>
-<publicwhip>'''
-k = wikimembers.keys()
-k.sort()
+print('''<?xml version="1.0" encoding="UTF-8"?>
+<publicwhip>''')
+k = sorted(wikimembers)
 for id in k:
-    url = urlparse.urljoin(wiki_index_url, wikimembers[id])
-    print '<personinfo id="%s" wikipedia_url="%s" />' % (id, url)
-print '</publicwhip>'
+    url = urllib.parse.urljoin(wiki_index_url, wikimembers[id])
+    print('<personinfo id="%s" wikipedia_url="%s" />' % (id, url))
+print('</publicwhip>')
 
 wikimembers = set(wikimembers.keys())
 allmembers = set(memberList.list(fro='2004-01-01'))
 symdiff = allmembers.symmetric_difference(wikimembers)
 if len(symdiff) > 0:
-    print >>sys.stderr, "Failed to get all MLAs, these ones in symmetric difference"
-    print >>sys.stderr, symdiff
+    print("Failed to get all MLAs, these ones in symmetric difference", file=sys.stderr)
+    print(symdiff, file=sys.stderr)
diff --git a/pyscraper/parlphrases.py b/pyscraper/parlphrases.py
index b4ed6b85f..828948bcc 100755
--- a/pyscraper/parlphrases.py
+++ b/pyscraper/parlphrases.py
@@ -1,7 +1,6 @@
-#! /usr/bin/python
 # vim:sw=8:ts=8:et:nowrap
 
-import cStringIO
+import io
 import re
 
 
@@ -210,7 +209,7 @@ def __init__(self):
 			if sio:
 				sio.write('|')
 			else:
-				sio = cStringIO.StringIO()
+				sio = io.StringIO()
 			sio.write(j)
 
 		self.regexpjobs = sio.getvalue()
diff --git a/pyscraper/patchtool.py b/pyscraper/patchtool.py
index 176aa04d4..a5f46ee8e 100755
--- a/pyscraper/patchtool.py
+++ b/pyscraper/patchtool.py
@@ -1,10 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # vim:sw=8:ts=8:et:nowrap
 
 import sys
 import os
 import shutil
-import string
 import miscfuncs
 import re
 import tempfile
@@ -34,8 +33,8 @@ def GenPatchFileNames(typ, sdate):
 		stub = "ministerial"
 	elif typ == "standing":
 		stub = "standing"
-        elif typ[0:9] == 'chgpages/':
-                stub = re.sub('chgpages/', '', typ)
+	elif typ[0:9] == 'chgpages/':
+		stub = re.sub('chgpages/', '', typ)
 	else:
 		stub = typ
 
@@ -66,7 +65,7 @@ def RunPatchToolW(typ, sdate, stamp, frag):
 
 	shutil.copyfile(orgfile, tmpfile)
 	if os.path.isfile(patchfile):
-		print "Patching ", patchfile
+		print("Patching ", patchfile)
 		status = os.system('patch --quiet "%s" < "%s"' % (tmpfile, patchfile))
 
 	# run the editor (first finding the line number to be edited)
@@ -86,16 +85,16 @@ def RunPatchToolW(typ, sdate, stamp, frag):
 	if not frag:
 		fragl = -1
 	elif ganamef:
-		fragl = string.find(ganamef.group(1), str(frag))
+		fragl = ganamef.group(1).find(str(frag))
 	else:
-		fragl = string.find(rforlines, str(frag))
+		fragl = rforlines.find(str(frag))
 	if fragl != -1:
 		gp += fragl
 
-	gl = string.count(rforlines, '\n', 0, gp)
+	gl = rforlines.count('\n', 0, gp)
 	gc = 0
 	if gl:
-		gc = gp - string.rfind(rforlines, '\n', 0, gp)
+		gc = gp - rforlines.rfind('\n', 0, gp)
 	#print "find loc codes ", gp, gl, gc
 
 	if 1==0 and sys.platform == "win32":
@@ -113,23 +112,23 @@ def RunPatchToolW(typ, sdate, stamp, frag):
 		os.remove(tmppatchfile)
 	ern = os.system('diff -u "%s" "%s" > "%s"' % (orgfile, tmpfile, tmppatchfile))
 	if ern == 2:
-		print "Error running diff"
+		print("Error running diff")
 		sys.exit(1)
 	os.remove(tmpfile)
 	if os.path.isfile(patchfile):
 		os.remove(patchfile)
 	if os.path.getsize(tmppatchfile):
 		os.rename(tmppatchfile, patchfile)
-		print "Making patchfile ", patchfile
+		print("Making patchfile ", patchfile)
 
 
 def RunPatchTool(typ, sdatext, ce):
 	if not ce.stamp:
-		print "No stamp available, so won't move your cursor to right place"
+		print("No stamp available, so won't move your cursor to right place")
 	else:
 		assert ce.stamp.sdate[:10] == sdatext[:10]  # omitting the letter extension
 
-	print "\nHit RETURN to launch your editor to make patches "
+	print("\nHit RETURN to launch your editor to make patches ")
 	sys.stdin.readline()
 	RunPatchToolW(typ, sdatext, ce.stamp, ce.fragment)
 	memberList.reloadJSON()
@@ -145,7 +144,7 @@ def RunPatchTool(typ, sdatext, ce):
 	#print args
 
 	if len(args) != 3:
-                print """
+		print("""
 This generates files for the patchfilter.py filter.
 
 They are standard patch files which apply to the glued HTML files which we
@@ -158,7 +157,7 @@ def RunPatchTool(typ, sdatext, ce):
 This will launch your editor, and upon exit write out a patch of your changes
 in the patches folder underneath this folder.  The original file is
 untouched.  We consider the patches permanent data, so add them to CVS.
-"""
- 		sys.exit(1)
+""")
+		sys.exit(1)
 	RunPatchToolW(args[1], args[2], None, "")
 
diff --git a/pyscraper/process_hansard.py b/pyscraper/process_hansard.py
index 3d39836a2..290edcb4f 100755
--- a/pyscraper/process_hansard.py
+++ b/pyscraper/process_hansard.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 import argparse
@@ -17,7 +17,7 @@
 parser = argparse.ArgumentParser(description='Process Hansard XML.')
 parser.add_argument('--from', dest='date_from', default=yesterday.isoformat(), metavar='YYYY-MM-DD')
 parser.add_argument('--to', dest='date_to', default=today.isoformat(), metavar='YYYY-MM-DD')
-parser.add_argument('-v', '--verbose', action='count')
+parser.add_argument('-v', '--verbose', action='count', default=0)
 ARGS = parser.parse_args()
 
 index_filename = join(toppath, 'seen_hansard_xml.txt')
@@ -67,28 +67,28 @@ def handle_file(filename, debate_type):
     file_key = '{0}:{1}'.format(debate_type, filename)
     if file_key in entries:
         if ARGS.verbose:
-            print "already seen {0}, not parsing again".format(filename)
+            print("already seen {0}, not parsing again".format(filename))
         return False
 
     parser.reset()
     if ARGS.verbose:
-        print "looking at {0}".format(filename)
+        print("looking at {0}".format(filename))
     ret = parser.handle_file(join(zip_directory, filename), debate_type, ARGS.verbose)
 
     if ret == 'failed':
-        print "ERROR parsing {0} {1}".format(filename, debate_type)
+        print("ERROR parsing {0} {1}".format(filename, debate_type))
     elif ret == 'not-present':
         if ARGS.verbose:
-            print "Nothing to parse in {0} {1}".format(filename, debate_type)
+            print("Nothing to parse in {0} {1}".format(filename, debate_type))
     elif ret == 'same':
         prev_file = parser.prev_file.replace(toppath, '')
-        print "parsed {0}, no changes from {1}".format(filename, prev_file)
+        print("parsed {0}, no changes from {1}".format(filename, prev_file))
     elif ret in ('change', 'new'):
         output_file = parser.output_file.replace(toppath, '')
-        print "parsed {0} to {1}".format(filename, output_file)
+        print("parsed {0} to {1}".format(filename, output_file))
     else:
         output_file = parser.output_file.replace(toppath, '')
-        print "parsed {0} {1} to {2}, unknown return {3}".format(filename, debate_type, output_file, ret)
+        print("parsed {0} {1} to {2}, unknown return {3}".format(filename, debate_type, output_file, ret))
     entries.append(file_key)
 
     return True
diff --git a/pyscraper/pullgluepages.py b/pyscraper/pullgluepages.py
index 3ec354181..a9bc44b2e 100755
--- a/pyscraper/pullgluepages.py
+++ b/pyscraper/pullgluepages.py
@@ -1,4 +1,3 @@
-#! /usr/bin/python
 # vim:sw=8:ts=8:et:nowrap
 
 import re
@@ -30,7 +29,7 @@ def MakeDayMap(folder, typ, basedir=pwcmdirs, extension='html'):
         elif ldfile.endswith('~') or ldfile == 'changedates.txt':
             pass
         elif os.path.isfile(os.path.join(pwcmfolder, ldfile)):
-            print "not recognized file:", ldfile, " in ", pwcmfolder
+            print("not recognized file:", ldfile, " in ", pwcmfolder)
 
     return lddaymap, pwcmfolder
 
diff --git a/pyscraper/regmem/filter.py b/pyscraper/regmem/filter.py
index b511c9103..a7beeebe1 100755
--- a/pyscraper/regmem/filter.py
+++ b/pyscraper/regmem/filter.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#! /usr/bin/env python3
 # vim:sw=8:ts=8:et:nowrap
 
 import re
@@ -31,23 +31,23 @@ def __init__(self, fout, text, sdate):
         self.memberset = set()
 
     def _handle_h2(self, row):
-        title = row.encode_contents()
+        title = row.encode_contents().decode('utf-8')
         if title in ('HAGUE, Rt Hon William (Richmond (Yorks)', 'PEARCE, Teresa (Erith and Thamesmead'):
             title += ')'
         res = re.search("^([^,]*), ([^(]*) \((.*)\)\s*$", title)
         if not res:
-            raise ContextException, "Failed to break up into first/last/cons: %s" % title
+            raise ContextException("Failed to break up into first/last/cons: %s" % title)
         (lastname, firstname, constituency) = res.groups()
         firstname = memberList.striptitles(firstname.decode('utf-8'))[0]
         lastname = lastname.decode('utf-8')
         if self.sdate < '2015-06-01':
             lastname = memberList.lowercaselastname(lastname)
         constituency = constituency.decode('utf-8')
-        lastname = lastname.replace(u'O\u2019brien', "O'Brien") # Hmm
+        lastname = lastname.replace('O\u2019brien', "O'Brien") # Hmm
         (id, remadename, remadecons) = memberList.matchfullnamecons(firstname + " " + lastname, constituency, self.sdate)
         if not id:
-            raise ContextException, "Failed to match name %s %s (%s) date %s\n" % (firstname, lastname, constituency, self.sdate)
-        self.fout.write(('<regmem personid="%s" membername="%s" date="%s">\n' % (id, remadename, self.sdate)).encode("latin-1"))
+            raise ContextException("Failed to match name %s %s (%s) date %s\n" % (firstname, lastname, constituency, self.sdate))
+        self.fout.write('<regmem personid="%s" membername="%s" date="%s">\n' % (id, remadename, self.sdate))
         self.title = title
         self.category = None
         self.subcategory = None
@@ -55,7 +55,7 @@ def _handle_h2(self, row):
         self.memberset.add(id)
 
     def parse(self):
-        print "2010-? new register of members interests!  Check it is working properly (via mpinfoin.pl) - %s" % self.sdate
+        print("2010-? new register of members interests!  Check it is working properly (via mpinfoin.pl) - %s" % self.sdate)
 
         WriteXMLHeader(self.fout)
         self.fout.write("<publicwhip>\n")
@@ -71,7 +71,7 @@ def parse(self):
                 self._handle_h2(row)
             else:
                 cls = row.get('class', [''])[0]
-                text = row.encode_contents().decode('utf-8').encode('iso-8859-1', 'xmlcharrefreplace')
+                text = row.encode_contents().decode('utf-8')
                 if cls == 'spacer':
                     if self.record:
                         self.fout.write('\t\t</record>\n')
@@ -110,7 +110,7 @@ def parse(self):
                 if self.subcategory:
                     self.fout.write('\t\t\t<item subcategory="%s">%s</item>\n' % (self.subcategory, text))
                 else:
-                    cls = cls.decode('utf-8').encode('iso-8859-1')
+                    cls = cls.decode('utf-8')
                     if cls: cls = ' class="%s"' % cls
                     self.fout.write('\t\t\t<item%s>%s</item>\n' % (cls, text))
         self._end_entry()
@@ -123,10 +123,10 @@ def check_missing(self):
         membersetexpect = set([m['person_id'] for m in memberList.mpslistondate(self.sdate)])
         missing = membersetexpect.difference(self.memberset)
         if len(missing) > 0:
-            print "Missing %d MP entries:\n" % len(missing), missing
+            print("Missing %d MP entries:\n" % len(missing), missing)
         extra = self.memberset.difference(membersetexpect)
         if len(extra) > 0:
-            print "Extra %d MP entries:\n" % len(extra), extra
+            print("Extra %d MP entries:\n" % len(extra), extra)
 
     def _end_entry(self):
         if self.record:
diff --git a/pyscraper/regmem/pullgluepages.py b/pyscraper/regmem/pullgluepages.py
index 2a824752c..cbe43d8fd 100755
--- a/pyscraper/regmem/pullgluepages.py
+++ b/pyscraper/regmem/pullgluepages.py
@@ -1,10 +1,10 @@
-#! /usr/bin/python
+#! /usr/bin/env python3
 
 from datetime import datetime
 import glob
 import sys
-import urllib
-import urlparse
+import urllib.request
+import urllib.parse
 import re
 import os.path
 import time
@@ -24,7 +24,7 @@
 
 tempfilename = tempfile.mktemp("", "pw-gluetemp-", miscfuncs.tmppath)
 
-class AppURLopener(urllib.FancyURLopener):
+class AppURLopener(urllib.request.FancyURLopener):
     version = os.getenv("USER_AGENT")
 opener = AppURLopener()
 
@@ -40,20 +40,19 @@ def GlueByContents(fout, url_contents, regmemdate, remote):
     soup = BeautifulSoup(sr, 'lxml')
     mps = soup.find('a', attrs={'name':'A'}).parent.find_next_siblings('p')
     for p in mps:
-        url = urlparse.urljoin(url_contents, p.a['href'])
-        url = url.encode('utf-8')
+        url = urllib.parse.urljoin(url_contents, p.a['href'])
         #print " reading " + url
         if remote:
             ur = opener.open(url)
         else:
-            url = urllib.quote(url)
+            url = urllib.parse.quote(url)
             ur = open(url)
         sr = ur.read()
         ur.close()
 
-	if remote and ur.code == 404:
-		print "failed to fetch %s - skipping" % url
-		continue
+        if remote and ur.code == 404:
+            print("failed to fetch %s - skipping" % url)
+            continue
 
         # write the marker telling us which page this comes from
         lt = time.gmtime()
@@ -65,7 +64,7 @@ def GlueByContents(fout, url_contents, regmemdate, remote):
         try:
             page = soup_mp.find('h1').find_next_siblings(lambda t: t.name != 'div')
         except:
-            print 'Problem with ' + url.decode('utf-8')
+            print('Problem with ' + url.decode('utf-8'))
         page = '\n'.join([ str(p) for p in page ]) + '\n'
         miscfuncs.WriteCleanText(fout, page)
 
@@ -88,11 +87,11 @@ def GlueByNext(fout, url, regmemdate):
         if not matcheddate:
             dateinpage = re.search("current as at\s*<[bB]>(.*)</[bB]>", sr)
             if not dateinpage:
-                raise Exception, 'Not found date marker'
+                raise Exception('Not found date marker')
             dateinpage = dateinpage.group(1).replace("&nbsp;", " ")
             dateinpage = datetime.strptime(dateinpage, '%d %B %Y').date().isoformat()
             if dateinpage != regmemdate:
-                raise Exception, 'Date in page is %s, expected %s - update the URL list in regmempullgluepages.py' % (dateinpage, regmemdate)
+                raise Exception('Date in page is %s, expected %s - update the URL list in regmempullgluepages.py' % (dateinpage, regmemdate))
             matcheddate = True
 
         # write the marker telling us which page this comes from
@@ -130,8 +129,8 @@ def GlueByNext(fout, url, regmemdate):
         if not nextsectionlink:
             break
         if len(nextsectionlink) > 1:
-            raise Exception, "More than one Next Section!!!"
-        url = urlparse.urljoin(url, nextsectionlink[0])
+            raise Exception("More than one Next Section!!!")
+        url = urllib.parse.urljoin(url, nextsectionlink[0])
 
     # you evidently didn't find any links
     assert sections > 10
@@ -191,17 +190,17 @@ def FindRegmemPages(remote):
     idxurl = 'https://www.parliament.uk/mps-lords-and-offices/standards-and-financial-interests/parliamentary-commissioner-for-standards/registers-of-interests/register-of-members-financial-interests/'
     ur = opener.open(idxurl)
     content = ur.read()
-    if "Cloudflare" in content:
+    if b"Cloudflare" in content:
         sys.exit("Cloudflare please wait page, cannot proceed")
     ur.close()
 
     soup = BeautifulSoup(content, 'lxml')
     soup = soup.find(attrs='main-body').find('ul')
-    ixurls = [urlparse.urljoin(idxurl, ix['href']) for ix in soup.find_all('a', href=True)]
+    ixurls = [urllib.parse.urljoin(idxurl, ix['href']) for ix in soup.find_all('a', href=True)]
 
     for ixurl in ixurls:
         ur = opener.open(ixurl)
-        content = ur.read()
+        content = ur.read().decode('utf-8')
         ur.close()
 
         # <B>14&nbsp;May&nbsp;2001&nbsp;(Dissolution)</B>
@@ -227,8 +226,8 @@ def FindRegmemPages(remote):
 
             alldates = re.findall('(?i)<(?:b|strong)>(\d+[a-z]* [A-Z][a-z]* \d\d\d\d)', content)
             if len(alldates) != 1:
-                print alldates
-                raise Exception, 'Date match failed, expected one got %d\n%s' % (len(alldates), url)
+                print(alldates)
+                raise Exception('Date match failed, expected one got %d\n%s' % (len(alldates), url))
 
             date = datetime.strptime(alldates[0], '%d %B %Y').date().isoformat()
             if (date, ixurl) not in urls:
@@ -237,9 +236,9 @@ def FindRegmemPages(remote):
             allurl_soups = soup.find_all('a', href=re.compile("(memi02|part1contents|/contents\.htm)"))
             for url_soup in allurl_soups:
                 url = url_soup['href']
-                url = urlparse.urljoin(ixurl, url)
+                url = urllib.parse.urljoin(ixurl, url)
                 date = re.sub('^.*(\d\d)(\d\d)(\d\d).*', r'20\1-\2-\3', url)
-                url_path = urlparse.urlparse(url)[2]
+                url_path = urllib.parse.urlparse(url)[2]
                 if url_path in corrections:
                     date = corrections[url_path]
                 if (date, url) not in urls:
@@ -247,21 +246,21 @@ def FindRegmemPages(remote):
         else:
             allurl_soups = soup.find_all('a', href=re.compile("(memi02|part1contents|/contents\.htm)"))
             for url_soup in allurl_soups:
-                row_content = url_soup.find_parent('tr').encode_contents()
+                row_content = url_soup.find_parent('tr').encode_contents().decode('utf-8')
                 url = url_soup['href']
                 #print url
                 if url == '060324/memi02.htm':
                     # fix broken URL
                     url = '/pa/cm/cmregmem/' + url
 
-                url = urlparse.urljoin(ixurl, url)
+                url = urllib.parse.urljoin(ixurl, url)
 
                 alldates = re.findall('\d+[a-z]*\s+[A-Z][a-z]*\s+\d\d\d\d', row_content, re.DOTALL)
                 if len(alldates) != 1:
-                    print alldates
-                    raise Exception, 'Date match failed, expected one got %d\n%s' % (len(alldates), url)
+                    print(alldates)
+                    raise Exception('Date match failed, expected one got %d\n%s' % (len(alldates), url))
 
-                url_path = urlparse.urlparse(url)[2]
+                url_path = urllib.parse.urlparse(url)[2]
                 if url_path in corrections:
                     date = corrections[url_path]
                 else:
diff --git a/pyscraper/resolvemembernames.py b/pyscraper/resolvemembernames.py
index 0d015d0ce..426d90dce 100755
--- a/pyscraper/resolvemembernames.py
+++ b/pyscraper/resolvemembernames.py
@@ -1,11 +1,9 @@
-#! /usr/bin/python
 # vim:sw=4:ts=4:et:nowrap
 
 # Converts names of MPs into unique identifiers
 
 import json
 import re
-import string
 import copy
 import sys
 import datetime
@@ -85,17 +83,13 @@ def import_minister_json(self, file):
                         self.fullnames.setdefault(mship["role"], []).append(newattr)
                         # print mship["role"], early, late, mship['name']
 
-    def partylist(self):
-        return self.parties.keys()
-
     def currentmpslist(self):
         today = datetime.date.today().isoformat()
         return self.mpslistondate(today)
 
     def mpslistondate(self, date):
-        matches = self.members.values()
         ids = []
-        for m in matches:
+        for m in self.members.values():
             if date >= m["start_date"] and date <= m["end_date"]:
                 ids.append(m)
         return ids
@@ -107,7 +101,7 @@ def striptitles(self, text):
         text = text.replace(",", " ")
         text = text.replace("&nbsp;", " ")
         text = text.replace("  ", " ")
-        text = text.replace(u'\u2019', "'")
+        text = text.replace('\u2019', "'")
 
         # Remove initial titles (may be several)
         titletotal = 0
@@ -166,7 +160,7 @@ def matchfullnamecons(self, fullname, cons, date, alwaysmatchcons = True):
 
         consids = self.constoidmap.get(cons, None)
         if alwaysmatchcons and cons and not consids:
-            raise Exception, "Unknown constituency %s" % cons
+            raise Exception("Unknown constituency %s" % cons)
 
         if consids and (len(ids) > 1 or alwaysmatchcons):
             ids = self.intersect_constituency(cons, ids, date, True)
@@ -180,11 +174,10 @@ def matchfullnamecons(self, fullname, cons, date, alwaysmatchcons = True):
                 errstring = 'Matched multiple times: %s : %s : %s : %s - perhaps constituency spelling is not known' % (fullname, cons or "[nocons]", date, ids.__str__())
                 # actually, even no-cons case happens too often
                 # (things like ministerships, with name in brackets after them)
-                print errstring
+                print(errstring)
                 #raise ContextException(errstring, fragment=origfullname)
             lids = list(ids)  # I really hate the Set type
-            lids = map(self.membertoperson, lids)
-            lids.sort()
+            lids = sorted(map(self.membertoperson, lids))
             return None, "MultipleMatch", tuple(lids)
 
         for lid in ids: # pop is no good as it changes the set
@@ -230,17 +223,17 @@ def matchwmsname(self, office, fullname, date):
     #     MCAVOY to McAvoy
     def lowercaselastname(self, name):
         words = re.split("( |-|')", name)
-        words = [ string.capitalize(word) for word in words ]
+        words = [ word.capitalize() for word in words ]
 
         def handlescottish(word):
             if (re.match("Mc[a-z]", word)):
-                return word[0:2] + string.upper(word[2]) + word[3:]
+                return word[0:2] + word[2].upper() + word[3:]
             if (re.match("Mac[a-z]", word)):
-                return word[0:3] + string.upper(word[3]) + word[4:]
+                return word[0:3] + word[3].upper() + word[4:]
             return word
         words = map(handlescottish, words)
 
-        return string.join(words , "")
+        return "".join(words)
 
     def fixnamecase(self, name):
         return self.lowercaselastname(name)
@@ -349,7 +342,7 @@ def matchdebatename(self, input, bracket, date, typ):
                 ids = officeids
 
         # Match between office and name - store for later use in the same days text
-        if speakeroffice <> "":
+        if speakeroffice != "":
             if input in ('The Temporary Chair', 'Madam Deputy Speaker'):
                 self.debateofficehistory[input] = set(ids)
             else:
@@ -363,7 +356,7 @@ def matchdebatename(self, input, bracket, date, typ):
         # Return errors
         if len(ids) == 0:
             if not re.search(regnospeakers, input):
-                raise Exception, "No matches %s" % (rebracket)
+                raise Exception("No matches %s" % (rebracket))
             self.debatenamehistory.append(None) # see below
             return 'person_id="unknown" error="No match" speakername="%s"' % (rebracket)
         if len(ids) > 1:
@@ -371,7 +364,7 @@ def matchdebatename(self, input, bracket, date, typ):
             for id in ids:
                 names += self.member_full_name(id, date, True)
             if not re.search(regnospeakers, input):
-                raise Exception, "Multiple matches %s, possibles are %s" % (rebracket, names)
+                raise Exception("Multiple matches %s, possibles are %s" % (rebracket, names))
             self.debatenamehistory.append(None) # see below
             return 'person_id="unknown" error="Matched multiple times" speakername="%s"' % (rebracket)
 
@@ -405,7 +398,7 @@ def mpnameexists(self, input, date):
             return 1
 
         if re.match('Mr\. |Mrs\. |Miss |Dr\. ', input):
-            print ' potential missing MP name ' + input
+            print(' potential missing MP name ' + input)
 
         return 0
 
@@ -504,17 +497,17 @@ def matchcttename(self, input, bracket, date):
             if self.chairman:
                 ids = self.fullnametoids(self.chairman, date)
             if len(ids) == 0:
-                raise ContextException, "Couldn't match Committee Chairman %s" % self.chairman
+                raise ContextException("Couldn't match Committee Chairman %s" % self.chairman)
             
         if len(ids) == 0:
             if not re.search(regnospeakers, input):
-                raise ContextException, "No matches %s" % (input)
+                raise ContextException("No matches %s" % (input))
             return ' person_id="unknown" error="No match" '
         if len(ids) > 1:
             names = ""
             for id in ids:
                 names += id + " " + self.member_full_name(id, date, True)
-            raise ContextException, "Multiple matches %s, possibles are %s" % (input, names)
+            raise ContextException("Multiple matches %s, possibles are %s" % (input, names))
             return ' person_id="unknown" error="Matched multiple times" '
 
         for id in ids:
@@ -573,7 +566,7 @@ def matchcttedebatename(self, input, bracket, date, external_speakers=False):
              ids = officeids
 
         # Match between office and name - store for later use in the same days text
-        if speakeroffice <> "":
+        if speakeroffice != "":
             self.debateofficehistory.setdefault(input, set()).update(ids)
 
         # Chairman
@@ -582,7 +575,7 @@ def matchcttedebatename(self, input, bracket, date, external_speakers=False):
             if self.chairman:
                 ids = self.fullnametoids(self.chairman, date)
             if len(ids) == 0:
-                raise ContextException, "Couldn't match Committee Chairman %s" % self.chairman
+                raise ContextException("Couldn't match Committee Chairman %s" % self.chairman)
                 
         # Put together original in case we need it
         rebracket = input
@@ -591,7 +584,7 @@ def matchcttedebatename(self, input, bracket, date, external_speakers=False):
         # Return errors
         if len(ids) == 0:
             if not re.search(regnospeakers, input) and not external_speakers:
-                raise ContextException, "No matches %s" % (rebracket)
+                raise ContextException("No matches %s" % (rebracket))
             self.debatenamehistory.append(None) # see below
             return 'person_id="unknown" error="No match" speakername="%s"' % (rebracket)
         if len(ids) > 1:
@@ -599,7 +592,7 @@ def matchcttedebatename(self, input, bracket, date, external_speakers=False):
             for id in ids:
                 names += self.member_full_name(id, date, True)
             if not re.search(regnospeakers, input):
-                raise ContextException, "Multiple matches %s, possibles are %s" % (rebracket, names)
+                raise ContextException("Multiple matches %s, possibles are %s" % (rebracket, names))
             self.debatenamehistory.append(None) # see below
             return 'person_id="unknown" error="Matched multiple times" speakername="%s"' % (rebracket)
 
@@ -616,15 +609,15 @@ def matchcttedebatename(self, input, bracket, date, external_speakers=False):
     def canonicalcons(self, cons, date):
         consids = self.constoidmap.get(cons, None)
         if not consids:
-            raise Exception, "Unknown constituency %s" % cons
+            raise Exception("Unknown constituency %s" % cons)
         consid = None
         for consattr in consids:
             if consattr['start_date'] <= date and date <= consattr['end_date']:
                 if consid:
-                    raise Exception, "Two like-named constituency ids %s %s overlap with date %s" % (consid, consattr['id'], date)
+                    raise Exception("Two like-named constituency ids %s %s overlap with date %s" % (consid, consattr['id'], date))
                 consid = consattr['id']
         if not consid in self.considtonamemap:
-            raise Exception, "Not known name of consid %s cons %s date %s" % (consid, cons, date)
+            raise Exception("Not known name of consid %s cons %s date %s" % (consid, cons, date))
         return self.considtonamemap[consid]
 
     def getmember(self, memberid):
@@ -653,7 +646,7 @@ def scanoneway(whystr, datestr, delta, whystrrev, datestrrev):
                         id = mattr["id"]
                         break
                 else:
-                    raise Exception, "Couldn't find %s %s member party changed from %s date %s" % (whystr, attr[whystr], id, dayafter)
+                    raise Exception("Couldn't find %s %s member party changed from %s date %s" % (whystr, attr[whystr], id, dayafter))
 
                 ids.append(id)
 
@@ -673,9 +666,9 @@ def matchhistoric(self, hansard_id, date):
                 ids.append(attr["id"])
 
         if len(ids) == 0:
-            raise Exception, 'Could not find ID for Historic ID %s, date %s' % (hansard_id, date)
+            raise Exception('Could not find ID for Historic ID %s, date %s' % (hansard_id, date))
         if len(ids) > 1:
-            raise Exception, 'Multiple results for Historic ID %s, date %s: %s' % (hansard_id, date, ','.join(ids))
+            raise Exception('Multiple results for Historic ID %s, date %s: %s' % (hansard_id, date, ','.join(ids)))
         return ids[0]
 
 # Construct the global singleton of class which people will actually use
diff --git a/pyscraper/runfilters.py b/pyscraper/runfilters.py
index aabf6b67d..08e799385 100755
--- a/pyscraper/runfilters.py
+++ b/pyscraper/runfilters.py
@@ -1,10 +1,8 @@
-#! /usr/bin/python
 # vim:sw=8:ts=8:et:nowrap
 
 import sys
 import re
 import os
-import string
 import tempfile
 import time
 import shutil
@@ -49,8 +47,8 @@ def ApplyPatches(filein, fileout, patchfile):
     status = os.system("patch --quiet %s <%s" % (fileout, patchfile))
     if status == 0:
         return True
-    print "blanking out failed patch %s" % patchfile
-    print "---- This should not happen, therefore assert!"
+    print("blanking out failed patch %s" % patchfile)
+    print("---- This should not happen, therefore assert!")
     assert False
 
 # the operation on a single file
@@ -59,7 +57,7 @@ def RunFilterFile(FILTERfunction, xprev, sdate, sdatever, dname, jfin, patchfile
     patchtempfilename = tempfile.mktemp("", "pw-applypatchtemp-", miscfuncs.tmppath)
 
     if not bquietc:
-        print "reading " + jfin
+        print("reading " + jfin)
 
     # apply patch filter
     kfin = jfin
@@ -114,12 +112,10 @@ def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
         if mnums:
             daymap.setdefault(mnums.group(1), []).append((AlphaStringToOrder(mnums.group(2)), mnums.group(2), ldfile))
         elif os.path.isfile(os.path.join(pwcmdirin, ldfile)):
-            print "not recognized file:", ldfile, " inn ", pwcmdirin
+            print("not recognized file:", ldfile, " inn ", pwcmdirin)
 
     # make the list of days which we will iterate through (in revers date order)
-    daydates = daymap.keys()
-    daydates.sort()
-    daydates.reverse()
+    daydates = sorted(daymap, reverse=True)
 
     # loop through file in input directory in reverse date order and build up the
     for sdate in daydates:
@@ -150,7 +146,7 @@ def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
                     if patch_modified > out_modified:
                         bmodifiedoutoforder = fin
         if bmodifiedoutoforder:
-            print "input or patch modified since output reparsing ", bmodifiedoutoforder
+            print("input or patch modified since output reparsing ", bmodifiedoutoforder)
 
 
         # now we parse these files -- in order -- to accumulate their catalogue of diffs
@@ -183,10 +179,10 @@ def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
                     break
 
                 # exception cases which cause the loop to continue
-                except ContextException, ce:
+                except ContextException as ce:
                     if options.patchtool:
                         # deliberately don't set options.anyerrors (as they are to fix it!)
-                        print "runfilters.py", ce
+                        print("runfilters.py", ce)
                         RunPatchTool(dname, (sdate + sdatever), ce)
                         # find file again, in case new
                         patchfile = findpatchfile(fin, newpwpatchesdir, pwpatchesdir)
@@ -194,8 +190,8 @@ def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
 
                     elif options.quietc:
                         options.anyerrors = True
-                        print ce.description
-                        print "\tERROR! %s failed on %s, quietly moving to next day" % (dname, sdate)
+                        print(ce.description)
+                        print("\tERROR! %s failed on %s, quietly moving to next day" % (dname, sdate))
                         newday = 1
                         # sys.exit(1) # remove this and it will continue past an exception (but then keep throwing the same tired errors)
                         break # leave the loop having not written the xml file; go onto the next day
@@ -219,6 +215,6 @@ def FixExtraColNumParas(text):
 
 def RunNIFilters(fp, text, sdate, sdatever):
     parser = ParseNIDay()
-    print "NI parsing %s..." % sdate
+    print("NI parsing %s..." % sdate)
     parser.parse_day(fp, text, sdate + sdatever)
 
diff --git a/pyscraper/sp/common.py b/pyscraper/sp/common.py
index 20b2dbfb3..5e065afdd 100644
--- a/pyscraper/sp/common.py
+++ b/pyscraper/sp/common.py
@@ -38,22 +38,22 @@ def month_name_to_int( name ):
 
     lowered = name.lower()
 
-    if months.has_key(lowered):
+    if lowered in months:
         return months[lowered]
 
-    if abbreviated_months.has_key(lowered):
+    if lowered in abbreviated_months:
         return abbreviated_months[lowered]
 
     return 0
 
-def non_tag_data_in(o, tag_replacement=u''):
+def non_tag_data_in(o, tag_replacement=''):
     if o.__class__ == NavigableString:
         return re.sub('(?ms)[\r\n]',' ',o)
     elif o.__class__ == Tag:
         if o.name == 'script':
             return tag_replacement
         else:
-            return tag_replacement.join( map( lambda x: non_tag_data_in(x) , o.contents ) )
+            return tag_replacement.join( [non_tag_data_in(x) for x in o.contents] )
     elif o.__class__ == Comment:
         return tag_replacement
     else:
@@ -63,7 +63,7 @@ def non_tag_data_in(o, tag_replacement=u''):
 def tidy_string(s):
     # Lots of the paragraphs in the HTML begin with a pointless ':'
     # surrounded by spaces:
-    result = re.sub(u"(?imsu)^\s*:\s*",'',s)
+    result = re.sub("(?imsu)^\s*:\s*",'',s)
     result = re.sub('(?ims)\s+',' ',result)
     return result.strip()
 
@@ -71,16 +71,16 @@ def tidy_string(s):
 #
 #  http://snippets.dzone.com/posts/show/4569
 
-from htmlentitydefs import name2codepoint
+from html.entities import name2codepoint
 
 def substitute_entity(match):
     ent = match.group(2)
     if match.group(1) == "#":
-        return unichr(int(ent))
+        return chr(int(ent))
     else:
         cp = name2codepoint.get(ent)
         if cp:
-            return unichr(cp)
+            return chr(cp)
         else:
             return match.group()
 
@@ -108,7 +108,7 @@ def compare_spids(a,b):
             else:
                 return 0
     else:
-        raise Exception, "Couldn't match spids: "+a+" and "+b
+        raise Exception("Couldn't match spids: "+a+" and "+b)
 
 def just_time( non_tag_text ):
     m = re.match( '^\s*(\d?\d)[:\.](\d\d)\s*$', non_tag_text )
diff --git a/pyscraper/sp/fastest-msps.py b/pyscraper/sp/fastest-msps.py
index 0bd1f394e..194957188 100755
--- a/pyscraper/sp/fastest-msps.py
+++ b/pyscraper/sp/fastest-msps.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 
 import xml.sax
 import re
@@ -72,7 +72,7 @@ def __init__(self):
         self.current_date_string = None
     def complete_time_stretch(self,new_minutes_into_day):
         length_of_last_stretch = new_minutes_into_day - self.previous_minutes_into_day
-        if options.verbose: print "Got speaker IDs: "+str(self.distinct_speakers_since_last_time)
+        if options.verbose: print("Got speaker IDs: "+str(self.distinct_speakers_since_last_time))
         if len(self.distinct_speakers_since_last_time) == 1:
             # If there was only a unique speaker in that line:
             tss = TimedSpeechSection()
@@ -95,7 +95,7 @@ def startElement(self,name,attrs):
             # time point, ignore it and wait for the next one:
             if minutes_into_day == self.previous_minutes_into_day:
                 return
-            if options.verbose: print "Got minutes_into_day: "+str(minutes_into_day)+" from "+attrs['time']
+            if options.verbose: print("Got minutes_into_day: "+str(minutes_into_day)+" from "+attrs['time'])
             # ------
             if self.previous_minutes_into_day == 0:
                 self.previous_minutes_into_day = minutes_into_day
@@ -105,7 +105,7 @@ def startElement(self,name,attrs):
             # if ('speakerid' in attrs) and re.search('^uk.org',attrs['speakerid']):
             if ('speakerid' in attrs):
                 self.current_speakerid = people_parser.officeid_to_personid[attrs['speakerid']]
-                if options.verbose: print "Got new speakerid: "+str(self.current_speakerid)
+                if options.verbose: print("Got new speakerid: "+str(self.current_speakerid))
     def endElement(self,name):
         if name == 'speech':
             self.current_speakerid = None
@@ -129,7 +129,7 @@ def parse(self,filename,date_string):
 
 days_done = 0
 for filename in filenames:
-    if options.verbose: print "Parsing day: "+str(filename)
+    if options.verbose: print("Parsing day: "+str(filename))
     m = re.search('(\d{4}-\d{2}-\d{2})',filename)
     ssp.parse(filename,m.group(1))
     days_done += 1
@@ -168,19 +168,18 @@ def words_per_minute(self):
     all_speakers[tss.speakerid].total_time_in_minutes += tss.minutes
     all_speakers[tss.speakerid].total_passages += 1
     if options.verbose:
-        print "====================================================="
-        s = unicode(tss)
-        print s.encode('UTF-8')
+        print("=====================================================")
+        s = str(tss)
+        print(s.encode('UTF-8'))
 
 def sort_speakers(speakerid):
     return all_speakers[speakerid].words_per_minute()
 
-speakers_found = all_speakers.keys()
-speakers_found.sort(key=sort_speakers,reverse=True)
+speakers_found = sorted(all_speakers, key=sort_speakers, reverse=True)
 
 if options.verbose:
     for s in speakers_found:
-        print s+" spoke at "+str(all_speakers[s].words_per_minute())+" words per minute"
+        print(s+" spoke at "+str(all_speakers[s].words_per_minute())+" words per minute")
 
 people_to_exclude = set()
 
diff --git a/pyscraper/sp/get-official-reports-new.py b/pyscraper/sp/get-official-reports-new.py
index 0b012b27e..78bb0502b 100755
--- a/pyscraper/sp/get-official-reports-new.py
+++ b/pyscraper/sp/get-official-reports-new.py
@@ -1,19 +1,18 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 
 import lxml.html
 from lxml import etree
-import urlparse
 import dateutil.parser
 import datetime
 import re
 import sys
-import urllib
-import urllib2
+import urllib.error
+import urllib.request
 import os
 from optparse import OptionParser
 import time
 import gzip
-from StringIO import StringIO
+from io import StringIO
 import random
 
 output_directory = "../../../parldata/cmpages/sp/official-reports-new/"
@@ -47,7 +46,7 @@
 maximum_sleep = 10
 
 def pp(element):
-    print etree.tostring(element, pretty_print = True)
+    print(etree.tostring(element, pretty_print = True))
 
 missing_report_ids_filename = os.path.join(output_directory, 'missing')
 missing_report_ids = set()
@@ -65,14 +64,14 @@ def get_document_from_id(official_report_id):
     if not os.path.exists(html_filename):
         url = official_report_url_format.format(official_report_id)
         if not options.quiet:
-            print "Fetching:", url
-        request = urllib2.Request(url)
+            print("Fetching:", url)
+        request = urllib.request.Request(url)
         request.add_header('User-Agent', user_agent)
-        opener = urllib2.build_opener()
+        opener = urllib.request.build_opener()
         response = None
         try:
-            response = urllib2.urlopen(request)
-        except urllib2.HTTPError, e:
+            response = urllib.request.urlopen(request)
+        except urllib.error.HTTPError as e:
             # Specifying a non-existent r parameter sometimes gets us
             # a 500 error, and sometimes a 403, so ignore those:
             if e.code in (500, 403):
@@ -88,12 +87,12 @@ def get_document_from_id(official_report_id):
         # redirects us to a search page rather than issuing a 404 so we
         # check to make sure that hasn't happened before saving
         if response.geturl() == url:
-            with open(html_filename, 'w') as fp:
+            with open(html_filename, 'wb') as fp:
                 fp.write(response.read())
         else:
             html_filename = ''
             if not options.quiet:
-                print "   * looks like a redirect, not saving"
+                print("   * looks like a redirect, not saving")
         time.sleep(random.uniform(minimum_sleep, maximum_sleep))
     if html_filename:
         parser = etree.HTMLParser()
@@ -109,12 +108,12 @@ def main():
     elif options.daily:
         year = datetime.date.today().year
         url = current_reports_url_format.format(year)
-        request = urllib2.Request(url)
+        request = urllib.request.Request(url)
         request.add_header('User-Agent', user_agent)
-        opener = urllib2.build_opener()
-        response = urllib2.urlopen(request)
+        opener = urllib.request.build_opener()
+        response = urllib.request.urlopen(request)
         parser = etree.HTMLParser()
-        html = response.read()
+        html = response.read().decode('utf-8')
         html = re.sub('(?ims)^\s*', '', html)
         tree = etree.parse(StringIO(html), parser)
         report_ids = set()
@@ -126,7 +125,7 @@ def main():
                     report_ids.add(int(m.group(1), 10))
 
         if not report_ids:
-            print "SP - No reports available at {0}".format(url)
+            print("SP - No reports available at {0}".format(url))
             return
 
         min_report_id = min(report_ids) - 20
@@ -135,11 +134,11 @@ def main():
             get_document_from_id(report_id)
 
     else:
-        print "Either --daily, --start_range=START_ID and --end_range=END_ID must be specified"
+        print("Either --daily, --start_range=START_ID and --end_range=END_ID must be specified")
 
 if options.test:
     if not options.quiet:
-        print "Running doctests..."
+        print("Running doctests...")
     import doctest
     doctest.testmod()
     sys.exit(0)
diff --git a/pyscraper/sp/parse-official-reports-new.py b/pyscraper/sp/parse-official-reports-new.py
index b49e59290..49cb134ad 100755
--- a/pyscraper/sp/parse-official-reports-new.py
+++ b/pyscraper/sp/parse-official-reports-new.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # coding=UTF-8
 
 from cgi import escape
@@ -83,7 +83,7 @@ def is_division_way(element, report_date=None):
         # strings that begin 'FOR', so only try it on short strings
         # that might be introducing a division, and assume that there
         # are 2 to 4 words in the name:
-        m1 = re.search(r'^(?i)VOTES? FOR ([A-Z ]+)$', tidied)
+        m1 = re.search(r'(?i)^VOTES? FOR ([A-Z ]+)$', tidied)
         m2 = re.search(r'^FOR ((?:[A-Z]+\s*){2,4})$', tidied)
         m = m1 or m2
         if m:
@@ -157,7 +157,7 @@ def get_unique_person_id(tidied_speaker, on_date):
     elif len(ids) == 1:
         return ids[0]
     else:
-        raise Exception, "The speaker '%s' could not be resolved, found: %s" % (tidied_speaker, ids)
+        raise Exception("The speaker '%s' could not be resolved, found: %s" % (tidied_speaker, ids))
 
 
 def is_member_vote(element, vote_date, expecting_a_vote=True):
@@ -219,16 +219,16 @@ def is_member_vote(element, vote_date, expecting_a_vote=True):
     person_id = get_unique_person_id(reformed_name, str(vote_date))
 
     if person_id is None and expecting_a_vote:
-        print "reformed_name is:", reformed_name
-        print "vote_date is:", vote_date
-        raise Exception, "A voting member '%s' couldn't be resolved" % (reformed_name,)
+        print("reformed_name is:", reformed_name)
+        print("vote_date is:", vote_date)
+        raise Exception("A voting member '%s' couldn't be resolved" % (reformed_name,))
     else:
         return person_id
 
 def log_speaker(speaker, date, message):
     if SPEAKERS_DEBUG:
         with open("speakers.txt", "a") as fp:
-            fp.write(str(date)+": ["+message.encode('utf-8')+"] "+speaker.encode('utf-8')+"\n")
+            fp.write(str(date)+": ["+message+"] "+speaker+"\n")
 
 def filename_key(filename):
     m = re.search(r'^(\d+)\.html$', filename)
@@ -265,22 +265,22 @@ def get_title_and_date(soup, page_id):
 
     title_elements = soup.findAll(attrs={"id" : "ReportView_ReportViewHtml_lblReportTitle"})
     if len(title_elements) > 1:
-        raise Exception, "Too many title elements were found"
+        raise Exception("Too many title elements were found")
     elif len(title_elements) == 0:
-        raise Exception, "No title elements were found"
+        raise Exception("No title elements were found")
     title = title_elements[0].string
     if not title:
         if is_page_empty(soup):
             return (None, None)
         else:
-            raise Exception, "No title was found in a page that's non-empty; the page ID was: %d" % (page_id,)
+            raise Exception("No title was found in a page that's non-empty; the page ID was: %d" % (page_id,))
     title = re.sub(r'\(Hybrid\) 202(\d)', r'202\1 (Hybrid)', title)
     m = re.search(r'''(?x)
             ^(.*) \s+
             (?: [(] (?:Hybrid) [)] \s* )?
             ( \d{1,2} [ ] \w+ [ ] \d{4} ) \s*
-            (?: [[({] (?:Virtual|Hybrid) (?: \s (?:Session|Meeting))? [)\][}] \s* )?
-            (?: [[({] Draft [)\][}] \s* )?
+            (?: [\[({] (?:Virtual|Hybrid) (?: \s (?:Session|Meeting))? [)\][}] \s* )?
+            (?: [\[({] Draft [)\][}] \s* )?
             (?: Business [ ] (?:until|to|from|between) [ ] \d\d:\d\d (?: [ ] (?:until|to|and|and[ ]after) [ ] \d\d:\d\d)?
                 | Test
                 | \1
@@ -298,7 +298,7 @@ def get_title_and_date(soup, page_id):
             report_date = dateparser.parse(m.group(2)).date()
             return (session, report_date)
         else:
-            raise Exception, "Failed to parse the title and date from: {0}".format(title)
+            raise Exception("Failed to parse the title and date from: {0}".format(title))
 
 acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
       'big', 'blockquote', 'body', 'br', 'button', 'caption', 'center',
@@ -367,7 +367,7 @@ def __init__(self, session, report_date, page_id):
         self.sections = []
 
     def __unicode__(self):
-        return unicode(self.report_date) + u": " + self.session
+        return str(self.report_date) + ": " + self.session
 
     @property
     def normalized_session_name(self):
@@ -431,7 +431,7 @@ def group_by_key(speech_or_division):
             s = speech_or_division
             return s.person_id or s.speaker_name or "nospeaker"
         else:
-            raise Exception, "Unknown type %s passed to group_by_key" % (type(speech_or_division),)
+            raise Exception("Unknown type %s passed to group_by_key" % (type(speech_or_division),))
 
     def tidy_speeches(self):
         # First remove any empty speeches:
@@ -471,7 +471,7 @@ def empty(self):
 
     def add_vote(self, which_way, voter_name, voter_id):
         if which_way not in DIVISION_HEADINGS:
-            raise Exception, "add_votes for unknown way: " + str(which_way)
+            raise Exception("add_votes for unknown way: " + str(which_way))
         self.votes[which_way].append((voter_name, voter_id))
 
     def as_xml(self, division_id):
@@ -572,7 +572,7 @@ def parse_html(session, report_date, soup, page_id, original_url):
     report_view = soup.find('div', attrs={'id': 'ReportView'})
     div_children_of_report_view = report_view.findChildren('div', recursive=False)
     if len(div_children_of_report_view) != 1:
-        raise Exception, 'We only expect one <div> child of <div id="ReportView">; there were %d in page with ID %d' % (len(div_children_of_report_view), page_id)
+        raise Exception('We only expect one <div> child of <div id="ReportView">; there were %d in page with ID %d' % (len(div_children_of_report_view), page_id))
 
     main_div = div_children_of_report_view[0]
 
@@ -593,11 +593,11 @@ def parse_html(session, report_date, soup, page_id, original_url):
         if link.name == 'br':
             continue
         if link.name != 'a':
-            raise Exception, "There was something other than a <br> or an <a> in the supposed contents <div>, for page ID: %d" % (page_id,)
+            raise Exception("There was something other than a <br> or an <a> in the supposed contents <div>, for page ID: %d" % (page_id,))
         href = link['href']
         m = re.search(r'#(.*)', href)
         if not m:
-            raise Exception, "Failed to find the ID from '%s' in page with ID: %d" % (href, page_id)
+            raise Exception("Failed to find the ID from '%s' in page with ID: %d" % (href, page_id))
         contents_tuples.append((m.group(1), tidy_string(non_tag_data_in(link))))
 
     parsed_page = ParsedPage(session, report_date, page_id)
@@ -614,12 +614,12 @@ def parse_html(session, report_date, soup, page_id, original_url):
     for top_level in text_div:
         # There are sometimes some empty NavigableString elements at
         # the top level, so just ignore those:
-        if not len(unicode(top_level).strip()):
+        if not len(str(top_level).strip()):
             continue
         if top_level.name == 'h2':
-            section_title = tidy_string(non_tag_data_in(top_level, tag_replacement=u' '))
+            section_title = tidy_string(non_tag_data_in(top_level, tag_replacement=' '))
             if not section_title:
-                raise Exception, "There was an empty section title in page ID: %d" % (page_id)
+                raise Exception("There was an empty section title in page ID: %d" % (page_id))
             parsed_page.sections.append(
                 Section(section_title, current_url))
         elif top_level.name in ('br',):
@@ -645,7 +645,7 @@ def parse_html(session, report_date, soup, page_id, original_url):
                         if span.has_attr('style') and 'font-size:10pt;font-weight:bold' in span['style']:
                             span.name = 'b'
                         else:
-       		            span.unwrap()
+                            span.unwrap()
                     p.unwrap()
 
             removed_number = None
@@ -669,17 +669,17 @@ def parse_html(session, report_date, soup, page_id, original_url):
                         # Ignore the line breaks...
                         pass
                     elif speech_part.name == 'ul':
-                        current_speech.paragraphs.append(unicode(speech_part))
+                        current_speech.paragraphs.append(str(speech_part))
                     elif speech_part.name in ('sup', 'sub'):
                         # sometimes the degree symbol is used so we need
                         # to replace it with an entity and then convert to
                         # ascii otherwise we get a codec error
-                        current_speech.paragraphs[-1] += str(unicode(speech_part).replace(u'\xb0', '&#176;'))
+                        current_speech.paragraphs[-1] += str(str(speech_part).replace('\xb0', '&#176;'))
                     elif speech_part.name == 'a' and speech_part.text == '':
                         # skip empty a anchors
                         pass
                     else:
-                        raise Exception, "Unexpected tag '%s' in page ID: %d" % (speech_part, page_id)
+                        raise Exception("Unexpected tag '%s' in page ID: %d" % (speech_part, page_id))
                 elif isinstance(speech_part, NavigableString):
                     tidied_paragraph = tidy_string(speech_part)
                     if tidied_paragraph == "":
@@ -716,8 +716,8 @@ def parse_html(session, report_date, soup, page_id, original_url):
                         current_division_way = division_way
                     elif member_vote:
                         if current_votes is None:
-                            print tidied_paragraph
-                            raise Exception, "Got a member's vote before an indication of which way the vote is"
+                            print(tidied_paragraph)
+                            raise Exception("Got a member's vote before an indication of which way the vote is")
                         current_votes.add_vote(current_division_way, tidied_paragraph, member_vote)
                     elif maybe_time:
                         current_time = maybe_time
@@ -738,10 +738,10 @@ def parse_html(session, report_date, soup, page_id, original_url):
                     if suspended and suspension_time:
                         current_time = suspension_time
                 else:
-                    raise Exception, "Totally unparsed element:\n%s\n... unhandled in page ID: %d" % (speech_part, page_id)
+                    raise Exception("Totally unparsed element:\n%s\n... unhandled in page ID: %d" % (speech_part, page_id))
 
         else:
-            raise Exception, "There was an unhandled element '%s' in page with ID: %d" % (top_level.name, page_id)
+            raise Exception("There was an unhandled element '%s' in page with ID: %d" % (top_level.name, page_id))
 
     return parsed_page
 
@@ -801,13 +801,13 @@ def parse_html(session, report_date, soup, page_id, original_url):
             last_modified = datetime.datetime.fromtimestamp(os.path.getmtime(html_filename))
             if last_modified < earliest_to_consider:
                 if options.verbose:
-                    print "Skipping", html_filename, "(it's well before %s)" % (from_date,)
+                    print("Skipping", html_filename, "(it's well before %s)" % (from_date,))
                 continue
 
         m = re.search(r'^(\d+)\.html$', filename)
         if not m:
             if options.verbose:
-                print "Skipping", html_filename, "(wrong filename format)"
+                print("Skipping", html_filename, "(wrong filename format)")
             continue
         page_id = int(m.group(1), 10)
         if page_id < 9500:
@@ -820,7 +820,7 @@ def parse_html(session, report_date, soup, page_id, original_url):
         try:
             if os.path.getsize(html_filename) == 0:
                 if options.verbose:
-                    print "Skipping", html_filename, "(empty)"
+                    print("Skipping", html_filename, "(empty)")
                 continue
             official_url = official_report_url_format.format(page_id)
             # Do a quick parse of the page first, to extract the date
@@ -830,11 +830,11 @@ def parse_html(session, report_date, soup, page_id, original_url):
                                                           official_url)
             if session is None:
                 if options.verbose:
-                    print "Skipping", html_filename, "(not useful after parsing)"
+                    print("Skipping", html_filename, "(not useful after parsing)")
                 continue
             if report_date < from_date or report_date > to_date:
                 if options.verbose:
-                    print "Skipping", html_filename, "(outside requested date range)"
+                    print("Skipping", html_filename, "(outside requested date range)")
                 continue
             parsed_page = parse_html(session,
                                      report_date,
@@ -845,17 +845,17 @@ def parse_html(session, report_date, soup, page_id, original_url):
             # print "parsing the file '%s' failed, with the exception:" % (filename,)
             # print unicode(e).encode('utf-8')
             # traceback.print_exc()
-            print "parsing the file '%s' failed" % (filename,)
+            print("parsing the file '%s' failed" % (filename,))
             raise
 
         if parsed_page is None:
             if options.verbose:
-                print "Skipping", html_filename, "(outside requested date range)"
+                print("Skipping", html_filename, "(outside requested date range)")
         else:
             parsed_page.tidy_speeches()
 
             if options.verbose:
-                print "Parsed", parsed_page.suggested_file_name
+                print("Parsed", parsed_page.suggested_file_name)
             output_filename = os.path.join(xml_output_directory,
                                            parsed_page.suggested_file_name)
             output_directory, output_leafname = os.path.split(output_filename)
@@ -879,8 +879,8 @@ def parse_html(session, report_date, soup, page_id, original_url):
 
             if changed_output:
                 if not options.quiet:
-                    print "Parsed and changed", parsed_page.suggested_file_name
-                os.chmod(ntf.name, 0644)
+                    print("Parsed and changed", parsed_page.suggested_file_name)
+                os.chmod(ntf.name, 0o644)
                 os.rename(ntf.name, output_filename)
                 changedates_filename = os.path.join(xml_output_directory,
                                                      output_directory,
@@ -890,5 +890,5 @@ def parse_html(session, report_date, soup, page_id, original_url):
                                           output_leafname))
             else:
                 if options.verbose:
-                    print "  not writing, since output is unchanged"
+                    print("  not writing, since output is unchanged")
                 os.remove(ntf.name)
diff --git a/pyscraper/sp/resolvenames.py b/pyscraper/sp/resolvenames.py
index 170d08ef5..7f43500a4 100755
--- a/pyscraper/sp/resolvenames.py
+++ b/pyscraper/sp/resolvenames.py
@@ -1,14 +1,10 @@
-#!/usr/bin/python
-
 import os
 import json
 import re
-import string
 import copy
 import sys
 import datetime
 import time
-import codecs
 
 from base_resolver import ResolverBase
 
@@ -90,7 +86,7 @@ def match_whole_speaker(self,speaker_name,speaker_date):
                 else:
                     if len(ids_so_far) > 0:
                         # Work out the intersection...
-                        ids_so_far = filter(lambda x: x in ids_from_bracketed_part,ids_so_far)
+                        ids_so_far = [x for x in ids_so_far if x in ids_from_bracketed_part]
                         if len(ids_so_far) == 1:
                             return ids_so_far
                     else:
@@ -119,7 +115,7 @@ def match_whole_speaker(self,speaker_name,speaker_date):
     def match_string_somehow(self,s,date,party,just_name):
         s = re.sub('\s{2,}', ' ', s)
 
-        s = s.replace(u"O\u2019", "O'")
+        s = s.replace("O\u2019", "O'")
         if s == 'Katy Clark' and date >= '2020-09-03':
             s = 'Baroness Clark of Kilwinning'
 
@@ -146,7 +142,7 @@ def match_string_somehow(self,s,date,party,just_name):
             office_matches = self.offices.get(office_name)
             if office_matches:
                 for o in office_matches:
-                    if date and ( date < o['start_date'] or 'end_date' not in o.keys() or date >= o['end_date'] ):
+                    if date and ( date < o['start_date'] or 'end_date' not in list(o.keys()) or date >= o['end_date'] ):
                         continue
                     member_ids.append(o['person_id'])
                 if len(member_ids) == 1:
@@ -166,7 +162,7 @@ def match_string_somehow(self,s,date,party,just_name):
                 if m['id'] not in member_ids:
                     member_ids.append(m['id'])
             if len(member_ids) == 1:
-                return map(self.membertoperson, member_ids)
+                return list(map(self.membertoperson, member_ids))
 
         # Now check if this begins with a title:
 
@@ -199,7 +195,7 @@ def match_string_somehow(self,s,date,party,just_name):
                     if m['id'] not in member_ids:
                         member_ids.append(m['id'])
                 if len(member_ids) == 1:
-                    return map(self.membertoperson, member_ids)
+                    return list(map(self.membertoperson, member_ids))
 
             # Or if there's a single word, then this is probably just
             # a last name:
@@ -213,7 +209,7 @@ def match_string_somehow(self,s,date,party,just_name):
                         if m['id'] not in member_ids:
                             member_ids.append(m['id'])
                     if len(member_ids) == 1:
-                        return map(self.membertoperson, member_ids)
+                        return list(map(self.membertoperson, member_ids))
 
         if not just_name:
 
@@ -228,7 +224,7 @@ def match_string_somehow(self,s,date,party,just_name):
                         if m['id'] not in member_ids:
                             member_ids.append(m['id'])
                     if len(member_ids) == 1:
-                        return map(self.membertoperson, member_ids)
+                        return list(map(self.membertoperson, member_ids))
 
         # Just return the string for people that aren't members, but
         # we know are ones we understand.
@@ -240,7 +236,7 @@ def match_string_somehow(self,s,date,party,just_name):
         if s in ('The Deputy Convener', 'The Convener'):
             return None
 
-        return map(self.membertoperson, member_ids)
+        return list(map(self.membertoperson, member_ids))
 
     def reloadJSON(self):
         super(MemberList, self).reloadJSON()
@@ -263,17 +259,15 @@ def reloadJSON(self):
     def list(self, date=None):
         if not date:
             date = datetime.date.today().isoformat()
-        matches = self.members.values()
         ids = []
-        for m in matches:
+        for m in self.members.values():
             if 'start_date' in m and date >= m["start_date"] and date <= m["end_date"]:
                 ids.append(m["id"])
         return ids
 
     def list_all_dates(self):
-        matches = self.members.values()
         ids = []
-        for m in matches:
+        for m in self.members.values():
             ids.append(m["id"])
         return ids
 
diff --git a/pyscraper/sp/wikipedia-msp.py b/pyscraper/sp/wikipedia-msp.py
index 0deea2657..8628a06ea 100755
--- a/pyscraper/sp/wikipedia-msp.py
+++ b/pyscraper/sp/wikipedia-msp.py
@@ -1,5 +1,4 @@
-#!/usr/bin/env python
-# -*- coding: latin-1 -*-
+#!/usr/bin/env python3
 
 # Screen scrape list of links to MLAs on Wikipedia, so we can link to the articles.
 # (Very slightly adapted to get MSPs instead by Mark Longair.)
@@ -12,7 +11,7 @@
 import datetime
 import os
 import sys
-import urlparse
+import urllib.parse
 import re
 
 sys.path.insert(0, os.path.join(os.path.abspath(os.path.dirname(__file__)), '..'))
@@ -52,27 +51,26 @@
     name = name.decode('utf-8')
     try:
         id_list = memberList.match_string_somehow(name, None, '', True)
-    except Exception, e:
-        print >>sys.stderr, e
+    except Exception as e:
+        print(e, file=sys.stderr)
     if not id_list:
         continue
 
     for id_to_add in id_list:
         wikimembers[id_to_add] = url
 
-print '''<?xml version="1.0" encoding="ISO-8859-1"?>
-<publicwhip>'''
-k = wikimembers.keys()
-k.sort()
+print('''<?xml version="1.0" encoding="UTF-8"?>
+<publicwhip>''')
+k = sorted(wikimembers)
 for id in k:
-    url = urlparse.urljoin(wiki_index_urls[0], wikimembers[id])
-    print '<personinfo id="%s" wikipedia_url="%s" />' % (id, url)
-print '</publicwhip>'
+    url = urllib.parse.urljoin(wiki_index_urls[0], wikimembers[id])
+    print('<personinfo id="%s" wikipedia_url="%s" />' % (id, url))
+print('</publicwhip>')
 
 wikimembers = set(wikimembers.keys())
 allmembers = set([ memberList.membertoperson(id) for id in memberList.list_all_dates() ])
 
 symdiff = allmembers.symmetric_difference(wikimembers)
 if len(symdiff) > 0:
-    print >>sys.stderr, "Failed to get all MSPs, these ones in symmetric difference"
-    print >>sys.stderr, "\n".join(symdiff)
+    print("Failed to get all MSPs, these ones in symmetric difference", file=sys.stderr)
+    print("\n".join(symdiff), file=sys.stderr)
diff --git a/pyscraper/test.py b/pyscraper/test.py
index 7f532ebb3..e95509b97 100755
--- a/pyscraper/test.py
+++ b/pyscraper/test.py
@@ -1,46 +1,46 @@
-#! /usr/bin/python
+#! /usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 import sys
 from resolvemembernames import memberList
 from lords.resolvenames import lordsList
 
-print memberList.matchfullnamecons(u"Si\xf4n Simon", "Birmingham Erdington", "2006-01-22")
+print(memberList.matchfullnamecons("Si\xf4n Simon", "Birmingham Erdington", "2006-01-22"))
 sys.exit(0)
 
-print lordsList.GetLordIDfname('Baroness Thatcher', None, '2006-05-01')
-print lordsList.GetLordIDfname('The Archbishop of York', None, '2006-05-01')
-print lordsList.GetLordIDfname('The Bishop of Southwell and Nottingham', None, '2006-05-01')
+print(lordsList.GetLordIDfname('Baroness Thatcher', None, '2006-05-01'))
+print(lordsList.GetLordIDfname('The Archbishop of York', None, '2006-05-01'))
+print(lordsList.GetLordIDfname('The Bishop of Southwell and Nottingham', None, '2006-05-01'))
 
-print memberList.matchfullnamecons("Anne Moffat", "East Lothian", "2006-01-22")
-print memberList.matchfullnamecons("Anne Picking", "East Lothian", "2006-01-22")
-print memberList.matchfullnamecons("Anne Moffat", "East Lothian", "2004-01-22")
-print memberList.matchfullnamecons("Anne Picking", "East Lothian", "2004-01-22")
+print(memberList.matchfullnamecons("Anne Moffat", "East Lothian", "2006-01-22"))
+print(memberList.matchfullnamecons("Anne Picking", "East Lothian", "2006-01-22"))
+print(memberList.matchfullnamecons("Anne Moffat", "East Lothian", "2004-01-22"))
+print(memberList.matchfullnamecons("Anne Picking", "East Lothian", "2004-01-22"))
 
-print memberList.canonicalcons("Aberdeen North", "2001-01-01")
-print memberList.canonicalcons("Aberdeen North", "2005-05-06")
+print(memberList.canonicalcons("Aberdeen North", "2001-01-01"))
+print(memberList.canonicalcons("Aberdeen North", "2005-05-06"))
 
-print memberList.matchdebatename("Solicitor-General", None, "2003-11-21")
-print memberList.matchdebatename("The Advocate-General for Scotland", None, "2004-07-30")
+print(memberList.matchdebatename("Solicitor-General", None, "2003-11-21"))
+print(memberList.matchdebatename("The Advocate-General for Scotland", None, "2004-07-30"))
 
-print memberList.getmembersoneelection("uk.org.publicwhip/member/1238")
-print memberList.getmembersoneelection("uk.org.publicwhip/member/1353")
-print memberList.getmembersoneelection("uk.org.publicwhip/member/1357")
+print(memberList.getmembersoneelection("uk.org.publicwhip/member/1238"))
+print(memberList.getmembersoneelection("uk.org.publicwhip/member/1353"))
+print(memberList.getmembersoneelection("uk.org.publicwhip/member/1357"))
 
-print memberList.matchdebatename("Mr. Mackay", None, "2003-11-21")
-print memberList.matchdebatename("James Marshall", None, "2003-11-21")
-print memberList.matchdebatename("Gareth Thomas", "Clwyd, West", "2003-11-21")
-print memberList.matchdebatename("Gareth Thomas", None, "2005-05-07")
+print(memberList.matchdebatename("Mr. Mackay", None, "2003-11-21"))
+print(memberList.matchdebatename("James Marshall", None, "2003-11-21"))
+print(memberList.matchdebatename("Gareth Thomas", "Clwyd, West", "2003-11-21"))
+print(memberList.matchdebatename("Gareth Thomas", None, "2005-05-07"))
 
-print memberList.matchfullnamecons("Mr. MacDonald", "Western Isles", "2005-04-01")
-print memberList.matchfullnamecons("Mr. MacNeil", "Na h-Eileanan an Iar", "2005-04-01")
-print memberList.matchfullnamecons("Mr. MacDonald", "Western Isles", "2005-05-07")
-print memberList.matchfullnamecons("Mr. MacNeil", "Na h-Eileanan an Iar", "2005-05-07")
+print(memberList.matchfullnamecons("Mr. MacDonald", "Western Isles", "2005-04-01"))
+print(memberList.matchfullnamecons("Mr. MacNeil", "Na h-Eileanan an Iar", "2005-04-01"))
+print(memberList.matchfullnamecons("Mr. MacDonald", "Western Isles", "2005-05-07"))
+print(memberList.matchfullnamecons("Mr. MacNeil", "Na h-Eileanan an Iar", "2005-05-07"))
 
-print memberList.matchfullnamecons("The Prime Minister", None, "2003-11-21")
+print(memberList.matchfullnamecons("The Prime Minister", None, "2003-11-21"))
 # print memberList.matchfullnamecons("The Prime Minister", None, "1992-11-21")
-print memberList.matchfullnamecons("George Galloway", None, "1999-01-01")
-print memberList.matchfullnamecons("George Galloway", None, "2005-01-01")
-print memberList.matchfullnamecons("George Galloway", "Bethnal Green & Bow", "2005-01-01")
-print memberList.matchfullnamecons("George Galloway", None, "2005-05-06")
+print(memberList.matchfullnamecons("George Galloway", None, "1999-01-01"))
+print(memberList.matchfullnamecons("George Galloway", None, "2005-01-01"))
+print(memberList.matchfullnamecons("George Galloway", "Bethnal Green & Bow", "2005-01-01"))
+print(memberList.matchfullnamecons("George Galloway", None, "2005-05-06"))
 
diff --git a/pyscraper/unpack_hansard_zips.py b/pyscraper/unpack_hansard_zips.py
index 8ff5da381..c7c47faf1 100755
--- a/pyscraper/unpack_hansard_zips.py
+++ b/pyscraper/unpack_hansard_zips.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import datetime
 import errno
@@ -143,13 +143,13 @@ def extract_zip(zip_filename, destination_directory):
     ntf.write(r.content)
     ntf.close()
 
-    print "Unpacking top level zip file {}, downloaded from {}".format(
+    print("Unpacking top level zip file {}, downloaded from {}".format(
         ntf.name, entry['link_url']
-    )
+    ))
 
     try:
         extract_zip(ntf.name, subdir)
     except UnzipError as ue:
-        print "Unpacking failed, removing {0}".format(subdir)
+        print("Unpacking failed, removing {0}".format(subdir))
         shutil.rmtree(subdir)
         raise
diff --git a/pyscraper/wa/parse.py b/pyscraper/wa/parse.py
index e1b144eca..fc308bfd8 100755
--- a/pyscraper/wa/parse.py
+++ b/pyscraper/wa/parse.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 
 import re
 import os
@@ -12,10 +12,6 @@
 from contextexception import ContextException
 from bs4 import BeautifulSoup
 
-import codecs
-streamWriter = codecs.lookup('utf-8')[-1]
-sys.stdout = streamWriter(sys.stdout)
-
 parldata = '../../../parldata/'
 
 class ParseDay:
@@ -23,10 +19,9 @@ def parse_day(self, fp, text, date):
         self.date = date
         self.text = text
         self.out = fp
-        self.out = streamWriter(self.out)
 
         if date < '2013-01-01':
-            raise Exception, 'Cannot parse pre 2013'
+            raise Exception('Cannot parse pre 2013')
 
         self.parse_xml_day(date)
         #self.out.write('</publicwhip>\n')
@@ -95,10 +90,10 @@ def getTagValue(self, item, tag_name):
 
     def display_major_heading(self, item):
         agenda = self.getTagValue(item, 'Agenda_item_english')
-        print '\n\n'
-        print '###########################'
-        print 'Major heading: %s' % agenda
-        print '###########################'
+        print('\n\n')
+        print('###########################')
+        print('Major heading: %s' % agenda)
+        print('###########################')
 
     def get_english_text(self, item):
         lang = self.getTagValue(item, 'contribution_language')
@@ -133,15 +128,15 @@ def add_to_speech(self, speech, item):
         return speech
 
     def display_speech(self, speech):
-        print ''
-        print '-----------------'
-        print 'type: %s, speaker: %s (%s)' % ( speech['ctype'], speech['speaker_name'], speech['speaker'])
-        print '<div class="original">%s</div>' % speech['text']
-        print '<div class="translated">%s</div>' % speech['text_translated']
+        print('')
+        print('-----------------')
+        print('type: %s, speaker: %s (%s)' % ( speech['ctype'], speech['speaker_name'], speech['speaker']))
+        print('<div class="original">%s</div>' % speech['text'])
+        print('<div class="translated">%s</div>' % speech['text_translated'])
 
     def display_vote(self, item, speech):
-        print '\nVVVVVVVVVVVVVVVVVVVVVVVVVVVV'
-        print 'Vote: %s' % speech['text']
+        print('\nVVVVVVVVVVVVVVVVVVVVVVVVVVVV')
+        print('Vote: %s' % speech['text'])
 
     def parse_xml_day(self, date):
         soup = BeautifulSoup(self.text, 'xml')
@@ -156,7 +151,7 @@ def parse_xml_day(self, date):
             agenda_id = ''
             try:
                 agenda_id = item.contribution_type
-                print agenda_id
+                print(agenda_id)
             except:
                 pass
             continue
diff --git a/pyscraper/xmlfilewrite.py b/pyscraper/xmlfilewrite.py
index e2104a696..5ab15f041 100644
--- a/pyscraper/xmlfilewrite.py
+++ b/pyscraper/xmlfilewrite.py
@@ -1,10 +1,7 @@
-#! /usr/bin/python
 # vim:sw=8:ts=8:nowrap
 
-def WriteXMLHeader(fout, encoding="ISO-8859-1", output_unicode=False):
-	header = '<?xml version="1.0" encoding="%s"?>\n' % encoding
-	if output_unicode:
-		header = unicode(header)
+def WriteXMLHeader(fout):
+	header = '<?xml version="1.0" encoding="UTF-8"?>\n'
 	fout.write(header)
 
 	# These entity definitions for latin-1 chars are from here:
@@ -74,8 +71,4 @@ def WriteXMLHeader(fout, encoding="ISO-8859-1", output_unicode=False):
 ]>
 
 '''
-
-	if output_unicode:
-		entities = unicode(entities)
-
 	fout.write(entities)
diff --git a/scripts/2016_data_update/dadem_import_ni.py b/scripts/2016_data_update/dadem_import_ni.py
index afa39d9e2..363f62132 100755
--- a/scripts/2016_data_update/dadem_import_ni.py
+++ b/scripts/2016_data_update/dadem_import_ni.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 import codecs
@@ -74,7 +74,7 @@ def slugify(value):
     aren't alphanumerics, underscores, or hyphens. Converts to lowercase.
     Also strips leading and trailing whitespace.
     """
-    value = unicodedata.normalize('NFKD', unicode(value)).encode('ascii', 'ignore').decode('ascii')
+    value = unicodedata.normalize('NFKD', str(value)).encode('ascii', 'ignore').decode('ascii')
     value = re.sub('[^\w\s-]', '', value).strip().lower()
     return re.sub('[-\s]+', '-', value)
 
@@ -139,7 +139,7 @@ def load_data():
 
 
 def dadem_csv_reader(fn):
-    if isinstance(fn, basestring):
+    if isinstance(fn, str):
         fn = open(fn)
     for row in csv.DictReader(fn):
         given = row['First']
diff --git a/scripts/2016_data_update/dadem_import_sp.py b/scripts/2016_data_update/dadem_import_sp.py
index 692982c04..e9effb1e6 100755
--- a/scripts/2016_data_update/dadem_import_sp.py
+++ b/scripts/2016_data_update/dadem_import_sp.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import codecs
 import csv
@@ -49,7 +49,7 @@ def update_from(csv_url, data):
 
         changed = True
         data['max_mship_id'] += 1
-        print "NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id)
+        print("NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id))
         mship = {
             'id': 'uk.org.publicwhip/member/%d' % data['max_mship_id'],
             'post_id': data['posts_by_name'][cons]['id'],
@@ -70,7 +70,7 @@ def slugify(value):
     aren't alphanumerics, underscores, or hyphens. Converts to lowercase.
     Also strips leading and trailing whitespace.
     """
-    value = unicodedata.normalize('NFKD', unicode(value)).encode('ascii', 'ignore').decode('ascii')
+    value = unicodedata.normalize('NFKD', str(value)).encode('ascii', 'ignore').decode('ascii')
     value = re.sub('[^\w\s-]', '', value).strip().lower()
     return re.sub('[-\s]+', '-', value)
 
@@ -133,7 +133,7 @@ def load_data():
 
 
 def dadem_csv_reader(fn):
-    if isinstance(fn, basestring):
+    if isinstance(fn, str):
         fn = open(fn)
     for row in csv.DictReader(fn):
         given = row['First']
diff --git a/scripts/2021-lam-update b/scripts/2021-lam-update
index 675ed6ba6..faf02bafe 100644
--- a/scripts/2021-lam-update
+++ b/scripts/2021-lam-update
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import codecs
 import csv
@@ -43,7 +43,7 @@ def update_from(csv_url, data):
             'start_reason': 'regional_election',
         }
         data['max_mship_id'] += 1
-        print "NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id)
+        print("NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id))
         mship = {
             'id': 'uk.org.publicwhip/member/%d' % data['max_mship_id'],
             'post_id': 'uk.org.publicwhip/cons/10839',
@@ -92,11 +92,11 @@ PARTY_WTT_TO_TWFY = {
 
 
 def wtt_csv_reader(fn):
-    if isinstance(fn, basestring):
+    if isinstance(fn, str):
         fn = open(fn)
     for row in csv.DictReader(fn):
         row['Party'] = PARTY_WTT_TO_TWFY[row['Party']]
-        yield unicode(row['First'], 'utf-8'), unicode(row['Last'], 'utf-8'), unicode(row['Constituency'], 'utf-8'), unicode(row['Party'], 'utf-8')
+        yield str(row['First'], 'utf-8'), str(row['Last'], 'utf-8'), str(row['Constituency'], 'utf-8'), str(row['Party'], 'utf-8')
 
 
 def getNameFromPerson(person):
diff --git a/scripts/2021-msp-update b/scripts/2021-msp-update
index 62648586a..7207873be 100644
--- a/scripts/2021-msp-update
+++ b/scripts/2021-msp-update
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # NOTE This worked but put member IDs in the entries of people that used to be
 # MP. I resolved this manually, it was only a few entries.
@@ -62,7 +62,7 @@ def update_from(csv_url, data):
             'start_reason': 'regional_election',
         }
         data['max_mship_id'] += 1
-        print "NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id)
+        print("NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id))
         mship = {
             'id': 'uk.org.publicwhip/member/%d' % data['max_mship_id'],
             'post_id': data['posts_by_name'][cons]['id'],
@@ -115,7 +115,7 @@ PARTY_TWFY_TO_PARL = {
 
 
 def wtt_csv_reader(fn):
-    if isinstance(fn, basestring):
+    if isinstance(fn, str):
         fn = open(fn)
     for row in csv.DictReader(fn):
         row['Party'] = PARTY_WTT_TO_TWFY[row['Party']]
diff --git a/scripts/add-new-lords b/scripts/add-new-lords
index ddb0c7bd3..c519cae03 100755
--- a/scripts/add-new-lords
+++ b/scripts/add-new-lords
@@ -1,8 +1,8 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import datetime
 import re
-import urllib2
+import urllib.request
 from lxml import etree
 from popolo import Popolo
 from popolo.utils import new_id
@@ -18,7 +18,7 @@ def canon_name(n):
     n = re.sub('^The ', '', n)
     n = re.sub(' De ', ' de ', n)
     n = re.sub('Lord (Archb|B)ishop ', r'\1ishop ', n)
-    n = re.sub(u'\u2019', "'", n)
+    n = re.sub('\u2019', "'", n)
     return n
 
 
@@ -117,13 +117,13 @@ def construct_membership(member, person_id, lord_id):
         'start_date': start_date
     }
     if not membership['start_date'] or not membership['on_behalf_of_id']:
-        print "Missing start date or party!"
+        print("Missing start date or party!")
         sys.exit()
     return membership
 
 
 LORDS_URL = 'http://data.parliament.uk/membersdataplatform/services/mnis/members/query/house=Lords|membership=all/BasicDetails/'
-parl_members = etree.parse(urllib2.urlopen(LORDS_URL)).getroot()
+parl_members = etree.parse(urllib.request.urlopen(LORDS_URL)).getroot()
 parl_members = sorted(parl_members, key=lambda x: x.find('HouseStartDate').text)
 
 changed = False
@@ -140,7 +140,7 @@ for member in parl_members:
     if not person:
         person_id = new_id(person_id)
         lord_id = new_id(data.max_lord_id())
-        print u"{} is a new Lord, person ID {}, lord ID {}".format(name, person_id, lord_id)
+        print("{} is a new Lord, person ID {}, lord ID {}".format(name, person_id, lord_id))
 
         name = construct_name(member)
         person = construct_person(member, person_id, name)
@@ -163,11 +163,11 @@ for member in parl_members:
             if name == 'Bishop of Bradford' and end_date == '2010-07-14' and us == '2010-07-31': continue
             if name == 'Lord Greaves' and end_date == '2021-03-23' and us == '2021-03-24': continue
             if name == 'Baroness Greengross' and end_date == '2022-06-23' and us == '2022-06-29': continue
-            print '*', name, 'Parl=', end_date, 'Us=', mship.get('end_date', '-')
+            print('*', name, 'Parl=', end_date, 'Us=', mship.get('end_date', '-'))
         continue
     else:
         lord_id = new_id(data.max_lord_id())
-        print u"{} is a new Lord already in the system, person ID {}, lord ID {}".format(name, person['id'], lord_id)
+        print("{} is a new Lord already in the system, person ID {}, lord ID {}".format(name, person['id'], lord_id))
 
         name = construct_name(member)
         membership = construct_membership(member, person['id'], lord_id)
diff --git a/scripts/datadotparl/crawl-members b/scripts/datadotparl/crawl-members
index e80f9319b..d5c7ac9df 100755
--- a/scripts/datadotparl/crawl-members
+++ b/scripts/datadotparl/crawl-members
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Grabs the latest members data from data.parliament
 # (http://data.parliament.uk/membersdataplatform/memberquery.aspx) and stores as
diff --git a/scripts/datadotparl/json-add-new-parl-ids b/scripts/datadotparl/json-add-new-parl-ids
index 9a99e2217..6da83a6d1 100755
--- a/scripts/datadotparl/json-add-new-parl-ids
+++ b/scripts/datadotparl/json-add-new-parl-ids
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import os
 import sys
diff --git a/scripts/datadotparl/mp-party-check b/scripts/datadotparl/mp-party-check
index f6bcad6fa..8abfd514f 100755
--- a/scripts/datadotparl/mp-party-check
+++ b/scripts/datadotparl/mp-party-check
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Grabs the latest members data from data.parliament XML and update accordingly.
 # Get data using crawl-datadotparl-members.py
diff --git a/scripts/datadotparl/one-off-add-pims-ids b/scripts/datadotparl/one-off-add-pims-ids
index 08d170ea1..ac0e5b91d 100755
--- a/scripts/datadotparl/one-off-add-pims-ids
+++ b/scripts/datadotparl/one-off-add-pims-ids
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import datetime
 import os
diff --git a/scripts/datadotparl/one-off-sync-lord-parties b/scripts/datadotparl/one-off-sync-lord-parties
index cd1dca92f..165c83be8 100755
--- a/scripts/datadotparl/one-off-sync-lord-parties
+++ b/scripts/datadotparl/one-off-sync-lord-parties
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import os
 import re
diff --git a/scripts/datadotparl/update-members b/scripts/datadotparl/update-members
index a199f341b..8613236d7 100755
--- a/scripts/datadotparl/update-members
+++ b/scripts/datadotparl/update-members
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Grabs the latest members data from data.parliament XML and update accordingly.
 # Get data using crawl-datadotparl-members.py
diff --git a/scripts/fetch-mp-eu-ref-positions b/scripts/fetch-mp-eu-ref-positions
index 8abd99ef8..0224bfd89 100755
--- a/scripts/fetch-mp-eu-ref-positions
+++ b/scripts/fetch-mp-eu-ref-positions
@@ -1,10 +1,10 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # Grabs the json files for the BBC's data on MPs EU ref position
 
 import json
 import os
 import sys
-import urllib2
+import urllib.request
 
 rawdata_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'rawdata'))
 members_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'members'))
@@ -20,9 +20,9 @@ OUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../members/'
 
 
 def urlopen(url):
-    request = urllib2.Request(url)
+    request = urllib.request.Request(url)
     request.add_header('User-Agent', 'TheyWorkForYou/1.0')
-    return urllib2.urlopen(request)
+    return urllib.request.urlopen(request)
 
 data_file = os.path.join(rawdata_dir, 'eu_ref_positions.json')
 data = urlopen(base_url).read()
diff --git a/scripts/fetch-pw-json b/scripts/fetch-pw-json
index 0d9224c25..51a07f29d 100755
--- a/scripts/fetch-pw-json
+++ b/scripts/fetch-pw-json
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Grabs the json files for policy voted from public whip
 
@@ -24,4 +24,4 @@ for json in json_files:
     url = "%s%s" % ( base_url, json['href'] )
     out = "%s/%s" % ( OUT_DIR, json['href'] )
     data = requests.get(url, headers=headers).content
-    open(out, 'w').write(data)
+    open(out, 'wb').write(data)
diff --git a/scripts/fetch_london_assembly.py b/scripts/fetch_london_assembly.py
index d1ae2bade..1a7748274 100755
--- a/scripts/fetch_london_assembly.py
+++ b/scripts/fetch_london_assembly.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Fetch London Assembly members from Wikidata
 
@@ -98,7 +98,7 @@
     if 'endcause' in item:
         member['end_cause'] = item['endcause']['value'].rsplit('/', 1)[-1]
 
-    logger.debug(u'{} ({}) found in Wikidata'.format(member['name'], member['wikidata_id']))
+    logger.debug('{} ({}) found in Wikidata'.format(member['name'], member['wikidata_id']))
 
     # Try retrieve this person by Wikidata ID, if that is known
     pp_person = pp_data.get_person(id=member['wikidata_id'], scheme='wikidata')
@@ -109,7 +109,7 @@
 
         # This person has been matched on Wikidata ID. Hooray!
 
-        logger.debug(u'{} ({}) matched to existing person {} by Wikidata ID'.format(
+        logger.debug('{} ({}) matched to existing person {} by Wikidata ID'.format(
             member['name'],
             member['wikidata_id'],
             pp_id
@@ -117,7 +117,7 @@
 
         # Run a sanity check that Parlparse IDs match.
         if 'parlparse_id' not in member:
-            logger.warning(u'{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.'.format(
+            logger.warning('{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.'.format(
                 member['name'],
                 member['wikidata_id'],
                 pp_id
@@ -126,14 +126,14 @@
         else:
 
             if member['parlparse_id'] != pp_id:
-                logger.warning(u'{} ({}) has a ParlParse ID of {}, expected {}.'.format(
+                logger.warning('{} ({}) has a ParlParse ID of {}, expected {}.'.format(
                     member['name'],
                     member['wikidata_id'],
                     member['parlparse_id'],
                     pp_id
                 ))
             else:
-                logger.debug(u'{} ({}) has expected ParlParse ID'.format(
+                logger.debug('{} ({}) has expected ParlParse ID'.format(
                     member['name'],
                     member['wikidata_id'],
                 ))
@@ -149,7 +149,7 @@
 
                 pp_id = pp_person['id'].rsplit('/', 1)[-1]
 
-                logger.debug(u'{} ({}) matched to existing person {} by ParlParse ID'.format(
+                logger.debug('{} ({}) matched to existing person {} by ParlParse ID'.format(
                     member['name'],
                     member['wikidata_id'],
                     pp_id))
@@ -167,7 +167,7 @@
 
                 pp_data.persons[pp_person['id']].update(pp_person)
 
-                logger.warning(u'{} has had Wikidata ID {} added to their ParlParse person entry.'.format(
+                logger.warning('{} has had Wikidata ID {} added to their ParlParse person entry.'.format(
                     member['name'],
                     member['wikidata_id']
                 ))
@@ -214,7 +214,7 @@
 
             if pp_person:
                 pp_id = pp_person['id'].rsplit('/', 1)[-1]
-                logger.warning(u'{} ({}) appears to match {} by name.'.format(
+                logger.warning('{} ({}) appears to match {} by name.'.format(
                     member['name'],
                     member['wikidata_id'],
                     pp_id
@@ -225,7 +225,7 @@
 
                 # If we make it here, we have nothing. Tell the person to run with --create.
 
-                logger.warning(u'{} ({}) cannot be matched on any ID or name. Run with --create to mint new IDs.'.format(
+                logger.warning('{} ({}) cannot be matched on any ID or name. Run with --create to mint new IDs.'.format(
                     member['name'],
                     member['wikidata_id']
                 ))
@@ -277,12 +277,12 @@
             # If there's a party change for this person with an end_date matching this membership's start_date, it's a
             # fair bet that the two are a pair and the start_reason here is also changed_party.
             elif member['wikidata_id'] in party_changes and party_changes[member['wikidata_id']] == member['start_date']:
-                logger.debug(u'Found matching party change on {} for {}'.format(party_changes[member['wikidata_id']], member['wikidata_id']))
+                logger.debug('Found matching party change on {} for {}'.format(party_changes[member['wikidata_id']], member['wikidata_id']))
                 pp_membership['start_reason'] = 'changed_party'
 
             else:
                 pp_membership['start_reason'] = 'unknown'
-                logger.warning(u'Cannot determine start cause of membership for {} ({}) starting {}.'.format(
+                logger.warning('Cannot determine start cause of membership for {} ({}) starting {}.'.format(
                     member['name'],
                     member['wikidata_id'],
                     member['start_date']
@@ -298,13 +298,13 @@
                         # If this is a party change, add it to the list so we can use it to detect its pair.
                         if member['end_cause'] == WD_PARTY_CHANGE_OBJECT:
                             party_changes[member['wikidata_id']] = member['end_date']
-                            logger.debug(u'Recorded that {} changed their party on {}'.format(member['wikidata_id'], member['end_date']))
+                            logger.debug('Recorded that {} changed their party on {}'.format(member['wikidata_id'], member['end_date']))
 
                     else:
                         pp_membership['end_reason'] = 'unknown'
                         logger.warning('End cause {} is not mapped.'.format(member['end_cause']))
                 else:
-                    logger.warning(u'Membership for {} ({}) starting {} does not have an end cause in Wikidata.'.format(
+                    logger.warning('Membership for {} ({}) starting {} does not have an end cause in Wikidata.'.format(
                         member['name'],
                         member['wikidata_id'],
                         member['start_date']
@@ -324,7 +324,7 @@
             logger.error('Could not match {} to party name. Edit the map in fetch_london_assembly.py to fix.'.format(member['party']))
 
     else:
-        logger.error(u'Skipping doing anything with {} ({}). This shouldn\'t happen.'.format(
+        logger.error('Skipping doing anything with {} ({}). This shouldn\'t happen.'.format(
             member['name'],
             member['wikidata_id']
         ))
diff --git a/scripts/fetch_scottish_ministers.py b/scripts/fetch_scottish_ministers.py
index cffcf41d8..81aa8dd26 100755
--- a/scripts/fetch_scottish_ministers.py
+++ b/scripts/fetch_scottish_ministers.py
@@ -1,12 +1,9 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 
 import os
 import json
-import re
-import string
-import copy
 import sys
-import urllib2
+import urllib.request
 
 import dateutil.parser as dateparser
 
@@ -24,8 +21,8 @@
     'sp_msps.json': 'https://data.parliament.scot/api/members/json'
 }
 
-for outfile, url in api_points.iteritems():
-    data = urllib2.urlopen(url).read()
+for outfile, url in api_points.items():
+    data = urllib.request.urlopen(url).read()
     output = os.path.join(rawdata_dir, outfile)
     with open(output, 'w+') as fp:
         fp.write(data)
diff --git a/scripts/fetch_wikidata_from_everypolitician.py b/scripts/fetch_wikidata_from_everypolitician.py
index 36534d19b..f0e45f36c 100644
--- a/scripts/fetch_wikidata_from_everypolitician.py
+++ b/scripts/fetch_wikidata_from_everypolitician.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Fetch a legislature from EveryPolitician, and for all people with a ParlParse
 # ID and Wikidata ID import the Wikidata ID.
diff --git a/scripts/json-add-membership b/scripts/json-add-membership
index 7141a2089..210acc36a 100755
--- a/scripts/json-add-membership
+++ b/scripts/json-add-membership
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import argparse
 import sys
diff --git a/scripts/json-body-end b/scripts/json-body-end
index eb374ed22..185192efa 100755
--- a/scripts/json-body-end
+++ b/scripts/json-body-end
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import argparse
 from popolo import Popolo
diff --git a/scripts/json-change-party b/scripts/json-change-party
index 810b80e59..be5be8748 100755
--- a/scripts/json-change-party
+++ b/scripts/json-change-party
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import argparse
 import datetime
diff --git a/scripts/json-edit-person b/scripts/json-edit-person
index 26fb02da7..b1918d60d 100755
--- a/scripts/json-edit-person
+++ b/scripts/json-edit-person
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import argparse
 import sys
diff --git a/scripts/json-end-membership b/scripts/json-end-membership
index 6589e5498..8a191def4 100755
--- a/scripts/json-end-membership
+++ b/scripts/json-end-membership
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # End a current membership
 
@@ -20,15 +20,15 @@ data = Popolo()
 
 person = data.get_person(name=ARGS.name)
 if len(person) != 1:
-    raise Exception, 'Did not get one person: %s' % person
+    raise Exception('Did not get one person: %s' % person)
 person = person[0]
 
 mships = data.memberships.of_person(person['id'])
 mships_current = mships.current()
 if not len(mships_current):
-    raise Exception, 'No current membership, previous ones: %s' % mships
+    raise Exception('No current membership, previous ones: %s' % mships)
 elif len(mships_current) > 1:
-    raise Exception, 'Multiple current memberships: %s' % mships_current
+    raise Exception('Multiple current memberships: %s' % mships_current)
 
 mship = mships_current.memberships[0]
 mship['end_date'] = ARGS.date
diff --git a/scripts/json-merge-people b/scripts/json-merge-people
index a10185492..725bf4e9b 100755
--- a/scripts/json-merge-people
+++ b/scripts/json-merge-people
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # End a current membership
 
diff --git a/scripts/json-new-ids b/scripts/json-new-ids
index 3918bff72..1e8e1f72e 100755
--- a/scripts/json-new-ids
+++ b/scripts/json-new-ids
@@ -1,11 +1,11 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 from popolo import Popolo
 data = Popolo()
 
-print "Current maximum Lord ID:", data.max_lord_id()
-print "Current maximum MP ID:  ", data.max_mp_id()
-print "Current maximum MSP ID: ", data.max_msp_id()
-print "Current maximum MLA ID: ", data.max_mla_id()
-print "Current maximum London Assembly Member ID: ", data.max_londonassembly_id()
-print "Current maximum Person ID:", data.max_person_id()
+print("Current maximum Lord ID:", data.max_lord_id())
+print("Current maximum MP ID:  ", data.max_mp_id())
+print("Current maximum MSP ID: ", data.max_msp_id())
+print("Current maximum MLA ID: ", data.max_mla_id())
+print("Current maximum London Assembly Member ID: ", data.max_londonassembly_id())
+print("Current maximum Person ID:", data.max_person_id())
diff --git a/scripts/json-nia-2017-new b/scripts/json-nia-2017-new
index c72ec8315..1b59d82ba 100755
--- a/scripts/json-nia-2017-new
+++ b/scripts/json-nia-2017-new
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import os
 from bs4 import BeautifulSoup
@@ -13,14 +13,14 @@ URL = 'http://www.bbc.co.uk/news/politics/northern-ireland-constituencies/N06000
 PARTY_FIXES = {
     'democratic-unionist': 'dup',
     'sdlp': 'social-democratic-and-labour-party',
-    u'sinn-f\xe9in': 'sinn-fein',
+    'sinn-f\xe9in': 'sinn-fein',
     'ulster-unionist': 'uup',
 }
 NAME_FIXES = {
     'Chris Hazzard': 'Christopher Hazzard',
-    u'Sin\xe9ad Bradley': 'Sinead Bradley',
+    'Sin\xe9ad Bradley': 'Sinead Bradley',
     'Stephen Aiken': 'Steve Aiken',
-    u'Se\xe1n Lynch': 'Sean Lynch',
+    'Se\xe1n Lynch': 'Sean Lynch',
 }
 
 # Set up requests cache
@@ -91,7 +91,7 @@ for i in range(1, 18+1):
             "start_date": "2017-03-03",
             "start_reason": "regional_election"
         }
-        print cons, name, mship['on_behalf_of_id'], mship['person_id']
+        print(cons, name, mship['on_behalf_of_id'], mship['person_id'])
         data.add_membership(mship)
 
 data.dump()
diff --git a/scripts/json-nia-2022-new b/scripts/json-nia-2022-new
index 7f671b10c..6d430a1f3 100755
--- a/scripts/json-nia-2022-new
+++ b/scripts/json-nia-2022-new
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import os
 import requests
@@ -19,7 +19,7 @@ CONS_FIXES = {
 PARTY_FIXES = {
     'alliance-party': 'alliance',
     'democratic-unionist-party': 'dup',
-    u'sinn-f\xe9in': 'sinn-fein',
+    'sinn-f\xe9in': 'sinn-fein',
     'ulster-unionist-party': 'uup',
 }
 NAME_FIXES = {
@@ -88,7 +88,7 @@ for member in members['AllMembersList']['Member']:
         "start_date": "2022-05-05",
         "start_reason": "regional_election"
     }
-    print cons, name, mship['on_behalf_of_id'], mship['person_id']
+    print(cons, name, mship['on_behalf_of_id'], mship['person_id'])
     data.add_membership(mship)
 
 data.dump()
diff --git a/scripts/popolo/menu.py b/scripts/popolo/menu.py
index 4b19d1232..66b4474b1 100644
--- a/scripts/popolo/menu.py
+++ b/scripts/popolo/menu.py
@@ -1,5 +1,6 @@
 # coding: utf-8
 
+
 import termios
 import fcntl
 import sys
diff --git a/scripts/popolo/utils.py b/scripts/popolo/utils.py
index c4f2dc360..7c881f999 100644
--- a/scripts/popolo/utils.py
+++ b/scripts/popolo/utils.py
@@ -23,7 +23,7 @@ def edit_file(edit_data):
                     break
                 except ValueError as e:
                     print("Bad JSON: %s" % e)
-                    raw_input("Press Enter to continue...")
+                    input("Press Enter to continue...")
     finally:
         os.remove(fp.name)
     return new_edit_data
diff --git a/scripts/quickupdate b/scripts/quickupdate
index 322eae145..7f6e9b524 100755
--- a/scripts/quickupdate
+++ b/scripts/quickupdate
@@ -1,10 +1,10 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 #
 # Thrown together
 # Currently only rescrapes/parses missing UK and NI from preivous day
 
 import glob
-import datetime, os, commands
+import datetime, os
 
 base = os.path.expanduser('~/')
 base_html = base + 'parldata/cmpages/'
@@ -24,9 +24,9 @@ suffixes = { 'ni': '' }
 
 def run_command(dir, cmd):
     #print 'Running', cmd
-    if dir: print 'cd %s%s' % (base_scraper, dir)
+    if dir: print('cd %s%s' % (base_scraper, dir))
     #os.chdir(base_scraper + dir)
-    print cmd
+    print(cmd)
     #status, output = commands.getstatusoutput(cmd)
     #print 'Exit status =', status
     #print 'Output', output
@@ -76,6 +76,6 @@ if reparse:
     if reparse:
         run_command('', './process_hansard.py # not parsed ' + ' '.join(reparse))
 
-print '# Parse/fix any older things from the cron email now'
+print('# Parse/fix any older things from the cron email now')
 
 run_command('', 'other-sites-update 0')
diff --git a/scripts/welsh-parliament/dual-posts.py b/scripts/welsh-parliament/dual-posts.py
index 440e7281a..124b7e44c 100644
--- a/scripts/welsh-parliament/dual-posts.py
+++ b/scripts/welsh-parliament/dual-posts.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # This script will search for people who were both MS and MP, or both MS and
 # Lord, and add their Wikidata ID to people.json so they match later on
diff --git a/scripts/welsh-parliament/memberships.py b/scripts/welsh-parliament/memberships.py
index c20486d84..b193f1233 100644
--- a/scripts/welsh-parliament/memberships.py
+++ b/scripts/welsh-parliament/memberships.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 from os import path, sys
 
@@ -89,7 +89,7 @@
 
 for candidate in candidates:
 
-    logger.debug(u"Looking at {}'s membership ({}):".format(
+    logger.debug("Looking at {}'s membership ({}):".format(
         candidate["name"],
         candidate["wikidata_id"]
     ))
@@ -98,14 +98,14 @@
 
     if membership:
 
-        logger.debug(u"{}'s membership ({}) matched to existing membership {} by Wikidata ID".format(
+        logger.debug("{}'s membership ({}) matched to existing membership {} by Wikidata ID".format(
             candidate["name"],
             candidate["wikidata_id"],
             membership["id"]
         ))
 
         if "parlparse_id" not in candidate:
-            logger.warning(u"{}'s membership ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
+            logger.warning("{}'s membership ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
                 candidate["name"],
                 candidate["wikidata_id"],
                 membership["id"]
@@ -114,7 +114,7 @@
         else:
 
             if candidate["parlparse_id"] != membership["id"]:
-                logger.warning(u"{}'s membership ({}) has a parlparse ID of {}, expected {}.".format(
+                logger.warning("{}'s membership ({}) has a parlparse ID of {}, expected {}.".format(
                     candidate["name"],
                     candidate["wikidata_id"],
                     candidate["parlparse_id"],
@@ -122,14 +122,14 @@
                 ))
 
             else:
-                logger.debug(u"{}'s membership ({}) has expected parlparse ID".format(
+                logger.debug("{}'s membership ({}) has expected parlparse ID".format(
                     candidate["name"],
                     candidate["wikidata_id"],
                 ))
 
     else:
 
-        logger.debug(u"Creating parlparse ID for {}'s membership ({})".format(candidate["name"], candidate["wikidata_id"]))
+        logger.debug("Creating parlparse ID for {}'s membership ({})".format(candidate["name"], candidate["wikidata_id"]))
 
         new_membership_id = new_id(popolo.max_ms_id())
         logger.debug("Parlparse ID is {}".format(new_membership_id))
diff --git a/scripts/welsh-parliament/official-ids.py b/scripts/welsh-parliament/official-ids.py
index 824f77e34..d2a20d59d 100644
--- a/scripts/welsh-parliament/official-ids.py
+++ b/scripts/welsh-parliament/official-ids.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 from os import path, sys
 
diff --git a/scripts/welsh-parliament/organizations.py b/scripts/welsh-parliament/organizations.py
index ab023d149..506646283 100644
--- a/scripts/welsh-parliament/organizations.py
+++ b/scripts/welsh-parliament/organizations.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 from os import path, sys
 
@@ -64,7 +64,7 @@
 
 for candidate in candidates:
 
-    logger.debug(u"Looking at {} ({}):".format(
+    logger.debug("Looking at {} ({}):".format(
         candidate["name"],
         candidate["wikidata_id"]
     ))
@@ -73,14 +73,14 @@
 
     if org:
 
-        logger.debug(u"{} ({}) matched to existing org {} by Wikidata ID".format(
+        logger.debug("{} ({}) matched to existing org {} by Wikidata ID".format(
             candidate["name"],
             candidate["wikidata_id"],
             org["id"]
         ))
 
         if "parlparse_id" not in candidate:
-            logger.warning(u"{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
+            logger.warning("{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
                 candidate["name"],
                 candidate["wikidata_id"],
                 org["id"]
@@ -89,7 +89,7 @@
         else:
 
             if candidate["parlparse_id"] != org["id"]:
-                logger.warning(u"{} ({}) has a parlparse ID of {}, expected {}.".format(
+                logger.warning("{} ({}) has a parlparse ID of {}, expected {}.".format(
                     candidate["name"],
                     candidate["wikidata_id"],
                     candidate["parlparse_id"],
@@ -97,14 +97,14 @@
                 ))
 
             else:
-                logger.debug(u"{} ({}) has expected parlparse ID".format(
+                logger.debug("{} ({}) has expected parlparse ID".format(
                     candidate["name"],
                     candidate["wikidata_id"],
                 ))
 
     else:
 
-        logger.debug(u"Creating parlparse ID for {} ({})".format(candidate["name"], candidate["wikidata_id"]))
+        logger.debug("Creating parlparse ID for {} ({})".format(candidate["name"], candidate["wikidata_id"]))
 
         new_org_id = re.sub("'", "", re.sub(r'\s', '-', candidate["name"])).lower()
         logger.debug("Parlparse ID is {}".format(new_org_id))
diff --git a/scripts/welsh-parliament/persons.py b/scripts/welsh-parliament/persons.py
index f9afa380c..ed1d05594 100644
--- a/scripts/welsh-parliament/persons.py
+++ b/scripts/welsh-parliament/persons.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 from os import path, sys
 
@@ -59,7 +59,7 @@
 
 for candidate in candidates:
 
-    logger.debug(u"Looking at {} ({}):".format(
+    logger.debug("Looking at {} ({}):".format(
         candidate["name"],
         candidate["wikidata_id"]
     ))
@@ -68,14 +68,14 @@
 
     if person:
 
-        logger.debug(u"{} ({}) matched to existing person {} by Wikidata ID".format(
+        logger.debug("{} ({}) matched to existing person {} by Wikidata ID".format(
             candidate["name"],
             candidate["wikidata_id"],
             person["id"]
         ))
 
         if "parlparse_id" not in candidate:
-            logger.warning(u"{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
+            logger.warning("{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
                 candidate["name"],
                 candidate["wikidata_id"],
                 person["id"]
@@ -84,7 +84,7 @@
         else:
 
             if candidate["parlparse_id"] != person["id"]:
-                logger.warning(u"{} ({}) has a parlparse ID of {}, expected {}.".format(
+                logger.warning("{} ({}) has a parlparse ID of {}, expected {}.".format(
                     candidate["name"],
                     candidate["wikidata_id"],
                     candidate["parlparse_id"],
@@ -92,14 +92,14 @@
                 ))
 
             else:
-                logger.debug(u"{} ({}) has expected parlparse ID".format(
+                logger.debug("{} ({}) has expected parlparse ID".format(
                     candidate["name"],
                     candidate["wikidata_id"],
                 ))
 
     else:
 
-        logger.debug(u"Creating parlparse ID for {} ({})".format(candidate["name"], candidate["wikidata_id"]))
+        logger.debug("Creating parlparse ID for {} ({})".format(candidate["name"], candidate["wikidata_id"]))
 
         new_person_id = new_id(popolo.max_person_id())
         logger.debug("Parlparse ID is {}".format(new_person_id))
diff --git a/scripts/welsh-parliament/posts.py b/scripts/welsh-parliament/posts.py
index 3d88a9947..7a9087848 100644
--- a/scripts/welsh-parliament/posts.py
+++ b/scripts/welsh-parliament/posts.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 from os import path, sys
 
@@ -61,7 +61,7 @@
 
 for candidate in candidates:
 
-    logger.debug(u"Looking at {} ({}):".format(
+    logger.debug("Looking at {} ({}):".format(
         candidate["name"],
         candidate["wikidata_id"]
     ))
@@ -70,14 +70,14 @@
 
     if post:
 
-        logger.debug(u"{} ({}) matched to existing post {} by Wikidata ID".format(
+        logger.debug("{} ({}) matched to existing post {} by Wikidata ID".format(
             candidate["name"],
             candidate["wikidata_id"],
             post["id"]
         ))
 
         if "parlparse_id" not in candidate:
-            logger.warning(u"{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
+            logger.warning("{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
                 candidate["name"],
                 candidate["wikidata_id"],
                 post["id"]
@@ -86,7 +86,7 @@
         else:
 
             if candidate["parlparse_id"] != post["id"]:
-                logger.warning(u"{} ({}) has a parlparse ID of {}, expected {}.".format(
+                logger.warning("{} ({}) has a parlparse ID of {}, expected {}.".format(
                     candidate["name"],
                     candidate["wikidata_id"],
                     candidate["parlparse_id"],
@@ -94,14 +94,14 @@
                 ))
 
             else:
-                logger.debug(u"{} ({}) has expected parlparse ID".format(
+                logger.debug("{} ({}) has expected parlparse ID".format(
                     candidate["name"],
                     candidate["wikidata_id"],
                 ))
 
     else:
 
-        logger.debug(u"Creating parlparse ID for {} ({})".format(candidate["name"], candidate["wikidata_id"]))
+        logger.debug("Creating parlparse ID for {} ({})".format(candidate["name"], candidate["wikidata_id"]))
 
         new_post_id = new_id(popolo.max_post_id("welsh-parliament", range_start = 70000))
         logger.debug("Parlparse ID is {}".format(new_post_id))
@@ -117,7 +117,7 @@
               "scheme": "wikidata"
             }
           ],
-          "label": u"MS for {}".format(candidate["name"]),
+          "label": "MS for {}".format(candidate["name"]),
           "organization_id": "welsh-parliament",
           "role": "MS",
         }
diff --git a/scripts/ynmp/update.py b/scripts/ynmp/update.py
index 17079ddc6..77df10212 100644
--- a/scripts/ynmp/update.py
+++ b/scripts/ynmp/update.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import codecs
 import csv
@@ -7,7 +7,7 @@
 import re
 import sys
 import unicodedata
-import urllib
+import urllib.request
 
 sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
 
@@ -74,11 +74,11 @@ def update_from(csv_url, data):
             mship = data['existing'][cons]
             if mship_has_changed(mship, new_mship):
                 changed = True
-                print "Updating %s with %s %s, %s, %s, %s" % (mship['id'], name['given_name'], name['family_name'], party, cons, person_id)
+                print("Updating %s with %s %s, %s, %s, %s" % (mship['id'], name['given_name'], name['family_name'], party, cons, person_id))
         else:
             changed = True
             data['max_mship_id'] += 1
-            print "NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id)
+            print("NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id))
             mship = {
                 'id': 'uk.org.publicwhip/member/%d' % data['max_mship_id'],
                 'post_id': data['posts_by_name'][cons]['id'],
@@ -94,7 +94,7 @@ def update_from(csv_url, data):
         mship = data['existing'][cons]
         if cons not in data['dealt_with'] and mship['person_id'] != 'uk.org.publicwhip/person/0':
             # This row has been removed from the CSV
-            print "Removing result from %s (was %s, %s, %s)" % (mship['id'], mship['post_id'], mship['on_behalf_of_id'], mship['person_id'])
+            print("Removing result from %s (was %s, %s, %s)" % (mship['id'], mship['post_id'], mship['on_behalf_of_id'], mship['person_id']))
             mship.update({
                 'on_behalf_of_id': 'none',
                 'person_id': 'uk.org.publicwhip/person/0',
@@ -110,7 +110,7 @@ def slugify(value):
     aren't alphanumerics, underscores, or hyphens. Converts to lowercase.
     Also strips leading and trailing whitespace.
     """
-    value = unicodedata.normalize('NFKD', unicode(value)).encode('ascii', 'ignore').decode('ascii')
+    value = unicodedata.normalize('NFKD', str(value)).encode('ascii', 'ignore').decode('ascii')
     value = re.sub('[^\w\s-]', '', value).strip().lower()
     return re.sub('[-\s]+', '-', value)
 
@@ -172,13 +172,13 @@ def load_data():
 
 
 def ynmp_csv_reader(fn):
-    if isinstance(fn, basestring):
-        fn = urllib.urlopen(fn)
+    if isinstance(fn, str):
+        fn = urllib.request.urlopen(fn)
     for row in csv.DictReader(fn):
         assert row['election_slug'] == 'parl.2019-12-12'
         name = row['person_name'].decode('utf-8').strip()
         # TWFY has separate first/last name fields. This should catch most.
-        m = re.match(u'(.*?) ((?:ap |van |de |di |von |st |duncan |lloyd |\u00d3 )*[^ ]*(?: Jnr)?)$(?i)', name)
+        m = re.match('(.*?) ((?:ap |van |de |di |von |st |duncan |lloyd |\u00d3 )*[^ ]*(?: Jnr)?)$(?i)', name)
         given, family = m.groups()
         party = row['party_name'].decode('utf-8')
         party = PARTY_YNMP_TO_TWFY.get(party, party)
diff --git a/wrans-2014/parse.py b/wrans-2014/parse.py
index 584bd8086..b6a911a7f 100755
--- a/wrans-2014/parse.py
+++ b/wrans-2014/parse.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # UK Parliament Written Answers are now in a new database-driven website. This
 # site has a nice-ish search, but for now we still need a bit of scraping to
@@ -9,7 +9,7 @@
 import json
 import os
 import re
-import urllib
+import urllib.parse
 from xml.sax.saxutils import escape
 
 import bs4
@@ -62,14 +62,14 @@ def main():
         # Make sure we have all grouped questions (some might actually not have
         # been returned due to being on another day)
         for uin, qn in writtens.by_id.items():
-            qn.groupedQuestions = map(writtens.get_by_uin, qn.groupedQuestions)
+            qn.groupedQuestions = list(map(writtens.get_by_uin, qn.groupedQuestions))
     else:
         writtens = Statements()
         params['madeWhenFrom'] = ARGS.date
         params['madeWhenTo'] = ARGS.date
         get_from_list(writtens, params)
 
-    output = ('%s' % writtens).encode('utf-8')
+    output = str(writtens)
     if output:
         if ARGS.type == 'answers':
             filename = 'lordswrans' if ARGS.house == 'lords' else 'answers'
@@ -78,11 +78,11 @@ def main():
         filename += ARGS.date + '.xml'
         with open(os.path.join(ARGS.out, filename), 'w') as fp:
             fp.write(output)
-        print "* %s Written %s: found %d new items" % (ARGS.house.title(), ARGS.type.title(), writtens.number)
+        print("* %s Written %s: found %d new items" % (ARGS.house.title(), ARGS.type.title(), writtens.number))
 
 
 def get_from_list(writtens, params):
-    params = urllib.urlencode(params)
+    params = urllib.parse.urlencode(params)
     url_page = '%s?%s' % (API_INDEX, params)
     errors = 0
     skip = 0
@@ -95,7 +95,7 @@ def get_from_list(writtens, params):
             requests.Session().cache.delete_url(url_page)
             errors += 1
             if errors >= 5:
-                raise Exception, 'Too many server errors, giving up: %s' % j['title']
+                raise Exception('Too many server errors, giving up: %s' % j['title'])
             continue
         writtens.add_from_json(j)
         if writtens.number < j['totalResults']:
@@ -120,7 +120,7 @@ def find_date(self, date):
 
     def fix_text(self, text):
         soup = bs4.BeautifulSoup(text, features="lxml")
-        return ''.join(map(unicode, soup.body.contents))
+        return ''.join(map(str, soup.body.contents))
 
     def get_detail(self):
         url = '%s/%s' % (API_INDEX, self['id'])
@@ -184,7 +184,7 @@ def __init__(self, st):
         self.statement += self.add_attachments(data['attachments'])
 
     def __str__(self):
-        return u'''
+        return '''
 <minor-heading id="uk.org.publicwhip/wms/{st.date}.{st.uin}.h" nospeaker="true">
     {st.heading}
 </minor-heading>
@@ -210,19 +210,19 @@ def __init__(self, qn):
 
     @property
     def secondary_group_question(self):
-        return self.groupedQuestions and self.uin > min(map(lambda x: x['uin'], self.groupedQuestions))
+        return self.groupedQuestions and self.uin > min([x['uin'] for x in self.groupedQuestions])
 
     @property
     def questions_xml(self):
         qns = [self] + self.groupedQuestions
-        return ''.join([u'''
+        return ''.join(['''
 <ques id="uk.org.publicwhip/wrans/{qn.date}.{qn.uin}.q{i}" person_id="{qn.asker.id}" speakername="{qn.asker.name}" url="{url_root}/written-questions/detail/{qn.date}/{qn.uin}">
     <p qnum="{qn.uin}">{qn.question}</p>
 </ques>'''.format(i=i, qn=qn, url_root=HOST) for i, qn in enumerate(qns)])
 
     @property
     def answers_xml(self):
-        return u'''
+        return '''
 <reply id="uk.org.publicwhip/wrans/{qn.date}.{qn.uin}.r{i}" person_id="{qn.answerer.id}" speakername="{qn.answerer.name}">
     {qn.answer}
 </reply>'''.format(i=0, qn=self)
@@ -238,7 +238,7 @@ def __str__(self):
         #    matchtype = 'altques'
         #    return '<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n' % (oldgid, newgid, matchtype)
 
-        return u'''
+        return '''
 <minor-heading id="uk.org.publicwhip/wrans/{qn.date}.{qn.uin}.h" nospeaker="true">
     {qn.heading}
 </minor-heading>