Skip to content

Commit

Permalink
[cleanup] cleanup redirect methods
Browse files Browse the repository at this point in the history
- add a new redirects method to BaseSite that returns the generic
  redirect tag list (previously returned by BaseSite.redirect()
- add a new redirects method to APISite that returns the
  redirect tag list extracted from getmagicwords('redirect')
- return the default redirect tag with BaseSite.redirect() instead of
  a list. This enables to get the default tag also from APISite if
  APISite.redirects is defined
- move redirect_regex method from APISite to BaseSite but simplify
  the implementation. It is not necessary to assume that
  getmagicwords('redirect') does not exists; it was a guess of of the
  very first 2.0 implemetation and was implemented as the redirect tags
  were part of the family files sometime in 2008.
- remove deprecated redirectRegex

Bug: T347226
Change-Id: I3647cf8cb154686f075bc53a054b937675d74765
  • Loading branch information
xqt committed Sep 23, 2023
1 parent 4589be0 commit 71c4c3a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 46 deletions.
1 change: 0 additions & 1 deletion ROADMAP.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,3 @@ Will be removed in Pywikibot 9
* 6.2.0: Throttle.multiplydelay attribute is deprecated
* 6.2.0: SequenceOutputter.format_list() is deprecated in favour of 'out' property
* 6.0.0: config.register_family_file() is deprecated
* 5.5.0: APISite.redirectRegex() will be removed in favour of APISite.redirect_regex()
34 changes: 10 additions & 24 deletions pywikibot/site/_apisite.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pywikibot import login
from pywikibot.backports import DefaultDict, Dict, List, Match
from pywikibot.backports import OrderedDict as OrderedDictType
from pywikibot.backports import Iterable, Pattern, Set, Tuple, removesuffix
from pywikibot.backports import Iterable, Set, Tuple, removesuffix
from pywikibot.comms import http
from pywikibot.data import api
from pywikibot.exceptions import (
Expand Down Expand Up @@ -1036,32 +1036,18 @@ def getmagicwords(self, word: str) -> List[str]:
return self._magicwords[word]
return [word]

def redirect(self) -> str:
"""Return the localized #REDIRECT keyword."""
# return the magic word without the preceding '#' character
return self.getmagicwords('redirect')[0].lstrip('#')
def redirects(self) -> List[str]:
"""Return a list of localized tags for the site without preceding '#'.
@deprecated('redirect_regex', since='5.5.0')
def redirectRegex(self) -> Pattern[str]: # noqa: N802
"""Return a compiled regular expression matching on redirect pages."""
return self.redirect_regex

@property
def redirect_regex(self) -> Pattern[str]:
"""Return a compiled regular expression matching on redirect pages.
Group 1 in the regex match object will be the target title.
.. seealso::
:meth:`BaseSite.redirect()
<pywikibot.site._basesite.BaseSite.redirect>` and
:meth:`BaseSite.redirects()
<pywikibot.site._basesite.BaseSite.redirects>`
.. versionadded:: 8.4
"""
# NOTE: this is needed, since the API can give false positives!
try:
keywords = {s.lstrip('#') for s in self.getmagicwords('redirect')}
keywords.add('REDIRECT') # just in case
pattern = '(?:' + '|'.join(keywords) + ')'
except KeyError:
# no localized keyword for redirects
pattern = None
return super().redirectRegex(pattern)
return [s.lstrip('#') for s in self.getmagicwords('redirect')]

def pagenamecodes(self) -> List[str]:
"""Return list of localized PAGENAME tags for the site."""
Expand Down
52 changes: 31 additions & 21 deletions pywikibot/site/_basesite.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from warnings import warn

import pywikibot
from pywikibot.backports import Pattern
from pywikibot.backports import List, Pattern
from pywikibot.exceptions import (
Error,
FamilyMaintenanceWarning,
Expand Down Expand Up @@ -242,26 +242,36 @@ def namespaces(self):
"""Return dict of valid namespaces on this wiki."""
return NamespacesDict(self._build_namespaces())

def ns_normalize(self, value):
"""
Return canonical local form of namespace name.
def ns_normalize(self, value: str):
"""Return canonical local form of namespace name.
:param value: A namespace name
:type value: str
"""
index = self.namespaces.lookup_name(value)
return self.namespace(index)

def redirect(self):
"""Return list of localized redirect tags for the site."""
def redirect(self) -> str:
"""Return a default redirect tag for the site.
.. versionchanged:: 8.4
return a single generic redirect tag instead of a list of
tags. For the list use :meth:`redirects` instead.
"""
return self.redirects()[0]

def redirects(self) -> List[str]:
"""Return list of generic redirect tags for the site.
.. seealso:: :meth:`redirect` for the default redirect tag.
.. versionadded:: 8.4
"""
return ['REDIRECT']

def pagenamecodes(self):
def pagenamecodes(self) -> List[str]:
"""Return list of localized PAGENAME tags for the site."""
return ['PAGENAME']

def pagename2codes(self):
def pagename2codes(self) -> List[str]:
"""Return list of localized PAGENAMEE tags for the site."""
return ['PAGENAMEE']

Expand Down Expand Up @@ -337,22 +347,22 @@ def isInterwikiLink(self, text): # noqa: N802
linkfam, linkcode = pywikibot.Link(text, self).parse_site()
return linkfam != self.family.name or linkcode != self.code

def redirectRegex( # noqa: N802
self,
pattern: Optional[str] = None
) -> Pattern[str]:
@property
def redirect_regex(self) -> Pattern[str]:
"""Return a compiled regular expression matching on redirect pages.
Group 1 in the regex match object will be the target title.
A redirect starts with hash (#), followed by a keyword, then
arbitrary stuff, then a wikilink. The wikilink may contain a
label, although this is not useful.
.. versionadded:: 8.4
moved from class:`APISite<pywikibot.site._apisite.APISite>`
"""
if pattern is None:
pattern = 'REDIRECT'
# A redirect starts with hash (#), followed by a keyword, then
# arbitrary stuff, then a wikilink. The wikilink may contain
# a label, although this is not useful.
return re.compile(r'\s*#{pattern}\s*:?\s*\[\[(.+?)(?:\|.*?)?\]\]'
.format(pattern=pattern), re.IGNORECASE | re.DOTALL)
tags = '|'.join(self.redirects())
return re.compile(fr'\s*#(?:{tags})\s*:?\s*\[\[(.+?)(?:\|.*?)?\]\]',
re.IGNORECASE | re.DOTALL)

def sametitle(self, title1: str, title2: str) -> bool:
"""
Expand Down

0 comments on commit 71c4c3a

Please sign in to comment.