From 71c4c3ab0389c0fa93f483baeef0ad28581310a0 Mon Sep 17 00:00:00 2001 From: xqt Date: Sat, 23 Sep 2023 17:43:57 +0200 Subject: [PATCH] [cleanup] cleanup redirect methods - add a new redirects method to BaseSite that returns the generic redirect tag list (previously returned by BaseSite.redirect() - add a new redirects method to APISite that returns the redirect tag list extracted from getmagicwords('redirect') - return the default redirect tag with BaseSite.redirect() instead of a list. This enables to get the default tag also from APISite if APISite.redirects is defined - move redirect_regex method from APISite to BaseSite but simplify the implementation. It is not necessary to assume that getmagicwords('redirect') does not exists; it was a guess of of the very first 2.0 implemetation and was implemented as the redirect tags were part of the family files sometime in 2008. - remove deprecated redirectRegex Bug: T347226 Change-Id: I3647cf8cb154686f075bc53a054b937675d74765 --- ROADMAP.rst | 1 - pywikibot/site/_apisite.py | 34 +++++++----------------- pywikibot/site/_basesite.py | 52 ++++++++++++++++++++++--------------- 3 files changed, 41 insertions(+), 46 deletions(-) diff --git a/ROADMAP.rst b/ROADMAP.rst index 98146458d8..d91c5b4355 100644 --- a/ROADMAP.rst +++ b/ROADMAP.rst @@ -76,4 +76,3 @@ Will be removed in Pywikibot 9 * 6.2.0: Throttle.multiplydelay attribute is deprecated * 6.2.0: SequenceOutputter.format_list() is deprecated in favour of 'out' property * 6.0.0: config.register_family_file() is deprecated -* 5.5.0: APISite.redirectRegex() will be removed in favour of APISite.redirect_regex() diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py index 33d17528cd..ba3e8ca087 100644 --- a/pywikibot/site/_apisite.py +++ b/pywikibot/site/_apisite.py @@ -17,7 +17,7 @@ from pywikibot import login from pywikibot.backports import DefaultDict, Dict, List, Match from pywikibot.backports import OrderedDict as OrderedDictType -from pywikibot.backports import Iterable, Pattern, Set, Tuple, removesuffix +from pywikibot.backports import Iterable, Set, Tuple, removesuffix from pywikibot.comms import http from pywikibot.data import api from pywikibot.exceptions import ( @@ -1036,32 +1036,18 @@ def getmagicwords(self, word: str) -> List[str]: return self._magicwords[word] return [word] - def redirect(self) -> str: - """Return the localized #REDIRECT keyword.""" - # return the magic word without the preceding '#' character - return self.getmagicwords('redirect')[0].lstrip('#') + def redirects(self) -> List[str]: + """Return a list of localized tags for the site without preceding '#'. - @deprecated('redirect_regex', since='5.5.0') - def redirectRegex(self) -> Pattern[str]: # noqa: N802 - """Return a compiled regular expression matching on redirect pages.""" - return self.redirect_regex - - @property - def redirect_regex(self) -> Pattern[str]: - """Return a compiled regular expression matching on redirect pages. - - Group 1 in the regex match object will be the target title. + .. seealso:: + :meth:`BaseSite.redirect() + ` and + :meth:`BaseSite.redirects() + ` + .. versionadded:: 8.4 """ - # NOTE: this is needed, since the API can give false positives! - try: - keywords = {s.lstrip('#') for s in self.getmagicwords('redirect')} - keywords.add('REDIRECT') # just in case - pattern = '(?:' + '|'.join(keywords) + ')' - except KeyError: - # no localized keyword for redirects - pattern = None - return super().redirectRegex(pattern) + return [s.lstrip('#') for s in self.getmagicwords('redirect')] def pagenamecodes(self) -> List[str]: """Return list of localized PAGENAME tags for the site.""" diff --git a/pywikibot/site/_basesite.py b/pywikibot/site/_basesite.py index 7292efc97c..ea47b94f71 100644 --- a/pywikibot/site/_basesite.py +++ b/pywikibot/site/_basesite.py @@ -11,7 +11,7 @@ from warnings import warn import pywikibot -from pywikibot.backports import Pattern +from pywikibot.backports import List, Pattern from pywikibot.exceptions import ( Error, FamilyMaintenanceWarning, @@ -242,26 +242,36 @@ def namespaces(self): """Return dict of valid namespaces on this wiki.""" return NamespacesDict(self._build_namespaces()) - def ns_normalize(self, value): - """ - Return canonical local form of namespace name. + def ns_normalize(self, value: str): + """Return canonical local form of namespace name. :param value: A namespace name - :type value: str - """ index = self.namespaces.lookup_name(value) return self.namespace(index) - def redirect(self): - """Return list of localized redirect tags for the site.""" + def redirect(self) -> str: + """Return a default redirect tag for the site. + + .. versionchanged:: 8.4 + return a single generic redirect tag instead of a list of + tags. For the list use :meth:`redirects` instead. + """ + return self.redirects()[0] + + def redirects(self) -> List[str]: + """Return list of generic redirect tags for the site. + + .. seealso:: :meth:`redirect` for the default redirect tag. + .. versionadded:: 8.4 + """ return ['REDIRECT'] - def pagenamecodes(self): + def pagenamecodes(self) -> List[str]: """Return list of localized PAGENAME tags for the site.""" return ['PAGENAME'] - def pagename2codes(self): + def pagename2codes(self) -> List[str]: """Return list of localized PAGENAMEE tags for the site.""" return ['PAGENAMEE'] @@ -337,22 +347,22 @@ def isInterwikiLink(self, text): # noqa: N802 linkfam, linkcode = pywikibot.Link(text, self).parse_site() return linkfam != self.family.name or linkcode != self.code - def redirectRegex( # noqa: N802 - self, - pattern: Optional[str] = None - ) -> Pattern[str]: + @property + def redirect_regex(self) -> Pattern[str]: """Return a compiled regular expression matching on redirect pages. Group 1 in the regex match object will be the target title. + A redirect starts with hash (#), followed by a keyword, then + arbitrary stuff, then a wikilink. The wikilink may contain a + label, although this is not useful. + + .. versionadded:: 8.4 + moved from class:`APISite` """ - if pattern is None: - pattern = 'REDIRECT' - # A redirect starts with hash (#), followed by a keyword, then - # arbitrary stuff, then a wikilink. The wikilink may contain - # a label, although this is not useful. - return re.compile(r'\s*#{pattern}\s*:?\s*\[\[(.+?)(?:\|.*?)?\]\]' - .format(pattern=pattern), re.IGNORECASE | re.DOTALL) + tags = '|'.join(self.redirects()) + return re.compile(fr'\s*#(?:{tags})\s*:?\s*\[\[(.+?)(?:\|.*?)?\]\]', + re.IGNORECASE | re.DOTALL) def sametitle(self, title1: str, title2: str) -> bool: """