Merge pull request searx#2642 from return42/fix-apkmirror

[fix] APKMirror engine - update xpath selectors and fix img_src
dalf · Mar 11, 2021 · af3e969 · af3e969
2 parents 8b650e6 + 96422e5
commit af3e969
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 20 deletions.
diff --git a/Makefile b/Makefile
@@ -196,6 +196,7 @@ PYLINT_FILES=\
 	searx/engines/mediathekviewweb.py \
 	searx/engines/google_scholar.py \
 	searx/engines/yahoo_news.py \
+	searx/engines/apkmirror.py \
 	searx_extra/update/update_external_bangs.py
 
 test.pylint: pyenvinstall

diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py
@@ -1,13 +1,21 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+"""APKMirror
 """
- APK Mirror
-"""
+
+# pylint: disable=invalid-name, missing-function-docstring
 
 from urllib.parse import urlencode
 from lxml import html
-from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 
-# about
+from searx import logger
+from searx.utils import (
+    eval_xpath_list,
+    eval_xpath_getindex,
+    extract_text,
+)
+
+logger = logger.getChild('APKMirror engine')
+
 about = {
     "website": 'https://www.apkmirror.com',
     "wikidata_id": None,
@@ -18,49 +26,43 @@
 }
 
 # engine dependent config
-categories = ['it']
+categories = ['files']
 paging = True
-
-# I am not 100% certain about this, as apkmirror appears to be a wordpress site,
-# which might support time_range searching. If you want to implement it, go ahead.
 time_range_support = False
 
 # search-url
 base_url = 'https://www.apkmirror.com'
 search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{query}'
 
 
-# do search-request
 def request(query, params):
-
-    params['url'] = search_url.format(pageno=params['pageno'],
-                                      query=urlencode({'s': query}))
+    params['url'] = search_url.format(
+        pageno = params['pageno'],
+        query = urlencode({'s': query}),
+    )
+    logger.debug("query_url --> %s", params['url'])
     return params
 
 
-# get response from search-request
 def response(resp):
     results = []
 
     dom = html.fromstring(resp.text)
 
     # parse results
-    for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]//div[@class="appRow"]'):
+    for result in eval_xpath_list(dom, "//div[@id='content']//div[@class='listWidget']/div/div[@class='appRow']"):
 
         link = eval_xpath_getindex(result, './/h5/a', 0)
+
         url = base_url + link.attrib.get('href') + '#downloads'
         title = extract_text(link)
-        thumbnail_src = base_url\
-            + eval_xpath_getindex(result, './/img', 0).attrib.get('src').replace('&w=32&h=32', '&w=64&h=64')
-
+        img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0)
         res = {
             'url': url,
             'title': title,
-            'thumbnail_src': thumbnail_src
+            'img_src': img_src
         }
 
-        # append result
         results.append(res)
 
-    # return results
     return results