Piwik is now Matomo

Parsely · Aug 12, 2020 · cb5854e · cb5854e
1 parent 4e05249
commit cb5854e
Show file tree

Hide file tree

Showing 6 changed files with 22 additions and 21 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -1,3 +1,4 @@
+v0.7.0, 2020-08-11 -- Updated to fix Piwik -> Matomo rename and get latest list.
 v0.6.3, 2017-07-19 -- Updated to latest Piwik list to support more hidden keyword URLs for Bing and Google
 v0.6.2, 2017-05-04 -- Fixed issue where hidden_keyword regexes had too many slashes removed, and updated Yahoo! Japan hidden_keyword_paths.
 v0.6.1, 2017-03-06 -- Fixed issue where get_all_query_params_by_domain didn't return a complete list

diff --git a/README.rst b/README.rst
@@ -6,8 +6,8 @@ serpextract
 
 ``serpextract`` provides easy extraction of keywords from search engine results pages (SERPs).
 
-This module is possible in large part to the very hard work of the `Piwik <http://piwik.org/>`_ team.
-Specifically, we make extensive use of their `list of search engines <https://github.com/piwik/piwik/blob/master/core/DataFiles/SearchEngines.php>`_.
+This module is possible in large part to the very hard work of the `Matomo <http://matomo.org/>`_ team.
+Specifically, we make extensive use of their `list of search engines <https://raw.githubusercontent.com/matomo-org/searchengine-and-social-list/master/SearchEngines.yml>`_.
 
 
 Installation
@@ -69,7 +69,7 @@ Python
 
 **Naive Detection**
 
-The list of search engine parsers that Piwik and therefore ``serpextract`` uses is far from
+The list of search engine parsers that Matomo and therefore ``serpextract`` uses is far from
 exhaustive.  If you want ``serpextract`` to attempt to guess if a given referring URL is a SERP,
 you can specify ``use_naive_method=True`` to ``serpextract.is_serp`` or ``serpextract.extract``.
 By default, the naive method is disabled.
@@ -104,7 +104,7 @@ If one of these are found, a keyword is extracted and an ``ExtractResult`` is co
 **Custom Parsers**
 
 In the event that you have a custom search engine that you'd like to track which is not currently
-supported by Piwik/``serpextract``, you can create your own instance of
+supported by Matomo/``serpextract``, you can create your own instance of
 ``serpextract.SearchEngineParser`` and either pass it explicitly to either
 ``serpextract.is_serp`` or ``serpextract.extract`` or add it
 to the internal list of parsers.
@@ -167,6 +167,6 @@ Caching
 -------
 
 Internally, this module caches an OrderedDict representation of
-`Piwik's list of search engines <https://github.com/piwik/piwik/blob/master/core/DataFiles/SearchEngines.php>`_
+`Matomo's list of search engines <https://raw.githubusercontent.com/matomo-org/searchengine-and-social-list/master/SearchEngines.yml>`_
 which is stored in ``serpextract/search_engines.pickle``.  This isn't intended to change that often and so this
 module ships with a cached version.
diff --git a/docs/index.rst b/docs/index.rst
@@ -65,7 +65,7 @@ your local cache via::
 Naive Detection
 ---------------
 
-The list of search engine parsers that Piwik and therefore :mod:`serpextract.serpextract` uses is far from
+The list of search engine parsers that Matomo and therefore :mod:`serpextract.serpextract` uses is far from
 exhaustive.  If you want :mod:`serpextract.serpextract` to attempt to guess if a given referring URL is a SERP,
 you can specify ``use_naive_method=True`` to :func:`serpextract.serpextract.is_serp` or :func:`serpextract.serpextract.extract`.
 By default, the naive method is disabled.
@@ -101,7 +101,7 @@ Custom Parsers
 --------------
 
 In the event that you have a custom search engine that you'd like to track which is not currently
-supported by Piwik/:mod:`serpextract.serpextract`, you can create your own instance of
+supported by Matomo/:mod:`serpextract.serpextract`, you can create your own instance of
 :class:`serpextract.serpextract.SearchEngineParser` and either pass it explicitly to either
 :func:`serpextract.serpextract.is_serp` or :func:`serpextract.serpextract.extract` or add it
 to the internal list of parsers.

diff --git a/serpextract/__init__.py b/serpextract/__init__.py
@@ -2,4 +2,4 @@
 
 from .serpextract import *
 
-__version__ = '0.6.3'
+__version__ = '0.7.0'
diff --git a/serpextract/serpextract.py b/serpextract/serpextract.py
@@ -157,7 +157,7 @@ def _is_url_without_path_query_or_fragment(url_parts):
 _engines = None
 def _get_search_engines():
     """
-    Convert the OrderedDict of search engine parsers that we get from Piwik
+    Convert the OrderedDict of search engine parsers that we get from Matomo
     to a dictionary of SearchEngineParser objects.
 
     Cache this thing by storing in the global ``_engines``.
@@ -166,13 +166,13 @@ def _get_search_engines():
     if _engines:
         return _engines
 
-    piwik_engines = _get_piwik_engines()
+    matomo_engines = _get_matomo_engines()
     # Engine names are the first param of each of the search engine arrays
     # so we group by those guys, and create our new dictionary with that
     # order
     _engines = {}
 
-    for engine_name, rule_group in iteritems(piwik_engines):
+    for engine_name, rule_group in iteritems(matomo_engines):
         defaults = {
             'extractor': None,
             'link_macro': None,
@@ -214,7 +214,7 @@ def _expand_country_codes(urls):
     return expanded_urls
 
 
-def _get_piwik_engines():
+def _get_matomo_engines():
     """
     Return the search engine parser definitions stored in this module. We don't
     cache this result since it's only supposed to be called once.
@@ -226,8 +226,8 @@ def _get_piwik_engines():
                 json_stream = TextIOWrapper(json_stream.buffer, encoding='utf-8')
             else:
                 json_stream = TextIOWrapper(json_stream, encoding='utf-8')
-        _piwik_engines = json.load(json_stream)
-    return _piwik_engines
+        _matomo_engines = json.load(json_stream)
+    return _matomo_engines
 
 
 class ExtractResult(object):
@@ -244,12 +244,12 @@ def __repr__(self):
 
 
 class SearchEngineParser(object):
-    """Handles persing logic for a single line in Piwik's list of search
+    """Handles persing logic for a single line in Matomo's list of search
     engines.
 
-    Piwik's list for reference:
+    Matomo's list for reference:
 
-    https://raw.github.com/piwik/piwik/master/core/DataFiles/SearchEngines.php
+    https://raw.githubusercontent.com/matomo-org/searchengine-and-social-list/master/SearchEngines.yml
 
     This class is not used directly since it already assumes you know the
     exact search engine you want to use to parse a URL. The main interface

diff --git a/update_list.py b/update_list.py
@@ -20,13 +20,13 @@ def main():
     filename = _here('serpextract', 'search_engines.json')
     print('Updating search engine parser definitions.')
 
-    url = urlopen('https://raw.githubusercontent.com/piwik/searchengine-and-social-list/master/SearchEngines.yml')
-    piwik_engines = yaml.safe_load(url)
+    url = urlopen('https://raw.githubusercontent.com/matomo-org/searchengine-and-social-list/master/SearchEngines.yml')
+    matomo_engines = yaml.safe_load(url)
     with open(filename, 'w') as json_file:
-        json.dump(piwik_engines, json_file, indent=2, sort_keys=True)
+        json.dump(matomo_engines, json_file, indent=2, sort_keys=True)
 
     print('Saved {} search engine parser definitions to {}.'
-          .format(len(piwik_engines), filename))
+          .format(len(matomo_engines), filename))
 
 
 if __name__ == '__main__':