From 1ccc60d0d6dab217afc389b9efce907dadd0dc60 Mon Sep 17 00:00:00 2001 From: Matthew Somerville Date: Mon, 13 Mar 2023 18:26:47 +0000 Subject: [PATCH] Upgrade from python 2 to 3. --- commonlib | 2 +- scripts/future-fetch.py | 14 +++---- scripts/morningupdate | 2 +- scripts/photo-attribution-import.py | 10 ++--- scripts/wikipedia-backlinks.py | 61 ----------------------------- 5 files changed, 14 insertions(+), 75 deletions(-) delete mode 100755 scripts/wikipedia-backlinks.py diff --git a/commonlib b/commonlib index 599c8119f6..57a3dce66c 160000 --- a/commonlib +++ b/commonlib @@ -1 +1 @@ -Subproject commit 599c8119f6defec05b7f70c4c770b0587702a564 +Subproject commit 57a3dce66cb47526c630dff49264afa80d479071 diff --git a/scripts/future-fetch.py b/scripts/future-fetch.py index 1f5fb10fe9..9bc099bfa1 100644 --- a/scripts/future-fetch.py +++ b/scripts/future-fetch.py @@ -1,11 +1,11 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # encoding: utf-8 import json import os import sys import re -import urllib2 +import urllib.request import MySQLdb import datetime @@ -32,7 +32,7 @@ def fetch_url(date): data = CALENDAR_BASE % {'date': date} - data = urllib2.urlopen(data) + data = urllib.request.urlopen(data) data = json.load(data) return data @@ -115,9 +115,9 @@ def __init__(self, event): if not match: match = lordsList.match_by_mnis(id, self.event_date) if match: - self.people.append( - int(match['id'].replace('uk.org.publicwhip/person/', '')) - ) + self.people.append( + int(match['id'].replace('uk.org.publicwhip/person/', '')) + ) self.witnesses = [] witnesses_str = [] @@ -272,7 +272,7 @@ def update(self): if old_tuple != new_tuple: new_entry.update() - old_entries.discard((long(id),)) + old_entries.discard((int(id),)) else: new_entry.add() diff --git a/scripts/morningupdate b/scripts/morningupdate index f9d4b1656c..bc5880dc7c 100755 --- a/scripts/morningupdate +++ b/scripts/morningupdate @@ -48,7 +48,7 @@ system "./xml2db.pl $cronquiet --recent --scotwrans --quiet"; system "./xml2db.pl $cronquiet --recent --scotqs --quiet"; system "./xml2db.pl $cronquiet --recent --standing --quiet"; system "./xml2db.pl $cronquiet --recent --lmqs --quiet"; -system "python ./future-fetch.py"; +system "python3 ./future-fetch.py"; $cronquiet = substr($cronquiet, 2) if $cronquiet; diff --git a/scripts/photo-attribution-import.py b/scripts/photo-attribution-import.py index b1e5c2efe0..c967e06e86 100644 --- a/scripts/photo-attribution-import.py +++ b/scripts/photo-attribution-import.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # encoding: utf-8 import json @@ -31,13 +31,13 @@ ) db_cursor = db_connection.cursor() -data_blank = filter(lambda r: not r["data_value"], data) -data_blank = map(lambda r: (r["person_id"], r["data_key"]), data_blank) +data_blank = [r for r in data if not r["data_value"]] +data_blank = [(r["person_id"], r["data_key"]) for r in data_blank] db_cursor.executemany("""DELETE FROM personinfo WHERE person_id=%s AND data_key=%s""", data_blank) -data = filter(lambda r: r["data_value"], data) -data = map(lambda r: (r["person_id"], r["data_key"], r["data_value"]), data) +data = [r for r in data if r["data_value"]] +data = [(r["person_id"], r["data_key"], r["data_value"]) for r in data] db_cursor.executemany("""INSERT INTO personinfo (person_id, data_key, data_value) VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE data_value = VALUES(data_value)""", data) diff --git a/scripts/wikipedia-backlinks.py b/scripts/wikipedia-backlinks.py deleted file mode 100755 index e1b1d6dd63..0000000000 --- a/scripts/wikipedia-backlinks.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/local/bin/python2.4 -# vim:sw=4:ts=4:et:nowrap - -# Reads wikipedia dump and finds links to TheyWorkForYou - -# bzcat ~/parldata/dumps/enwiki-latest-pages-articles.xml.bz2 | ./wikipedia-backlinks.py - -import xml.sax -import sys -import re - -def process_page(text): - links = re.findall("https?://(?:www.)?theyworkforyou.com/[^\s[\]]+", text) - return links - -class WikipediaReader(xml.sax.handler.ContentHandler): - def __init__(self): - pass - - def startElement(self, name, attr): - self.currtag = name - if name == 'title': - self.title = '' - if name == 'text': - self.text = '' - - def characters(self, chrs): - if self.currtag == 'title': - self.title = self.title + chrs - if self.currtag == 'text': - self.text = self.text + chrs - - def endElement(self, name): - if name == 'page': - #print self.title.encode('utf-8') - if "theyworkforyou.com" in self.text: - links = process_page(self.text) - for link in links: - print link, self.title.encode('utf-8') - self.currtag = None - -wr = WikipediaReader() -parser = xml.sax.make_parser() -parser.setContentHandler(wr) -parser.parse(sys.stdin) - - - -page = ''' - * [http://www.epolitix.com/EN/MPWebsites/Ann+Widdecombe/ ePolitix.com <80><94> Ann Widdecombe] - * [http://politics.guardian.co.uk/person/0,9290,-5516,00.html Guardian Unlimited Politics <80><94> Ask Aristotle: Ann Wid - decombe MP] - * [https://www.theyworkforyou.com/mp/ann_widdecombe/maidstone_and_the_weald TheyWorkForYou.com <80><94> Ann Widdecombe MP] - * [http://publicwhip.org.uk/mp.php?mpn=Ann_Widdecombe&mpc=Maidstone+%26amp%3B+The+Weald The Public Whip <80><94> Ann - Widdecombe MP] voting record - * [http://news.bbc.co.uk/1/shared/mpdb/html/275.stm BBC News <80><94> Ann Widdecombe] profile 10 February, 2005 - * [http://news.bbc.co.uk/1/hi/entertainment/tv_and_radio/3558378.stm BBC News <80><94> The Widdecombe Project] about her - agony aunt television programme on BBC Two''' -print process_page(page) -sys.exit() -