Skip to content
This repository has been archived by the owner on Sep 7, 2023. It is now read-only.

pycurl instead of requests #1725

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
language: python
sudo: false
cache:
- pip
- npm
Expand All @@ -9,6 +8,8 @@ cache:
addons:
firefox: "latest"

before_install:
- sudo apt-get install -y libcurl4-openssl-dev libssl-dev
install:
- ./manage.sh install_geckodriver ~/drivers
- export PATH=~/drivers:$PATH
Expand Down
6 changes: 4 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ RUN apk -U upgrade \
libxslt-dev \
libxml2-dev \
openssl-dev \
curl-dev \
tar \
git \
&& apk add \
Expand All @@ -45,6 +46,8 @@ RUN apk -U upgrade \
libxml2 \
libxslt \
openssl \
libcurl \
libstdc++ \
tini \
uwsgi \
uwsgi-python3 \
Expand All @@ -55,8 +58,7 @@ RUN apk -U upgrade \

COPY --chown=searx:searx . .

RUN su searx -c "/usr/bin/python3 -m compileall -q searx"; \
touch -c --date=@${TIMESTAMP_SETTINGS} searx/settings.yml; \
RUN touch -c --date=@${TIMESTAMP_SETTINGS} searx/settings.yml; \
touch -c --date=@${TIMESTAMP_UWSGI} dockerfiles/uwsgi.ini; \
if [ ! -z $VERSION_GITCOMMIT ]; then\
echo "VERSION_STRING = VERSION_STRING + \"-$VERSION_GITCOMMIT\"" >> /usr/local/searx/searx/version.py; \
Expand Down
163 changes: 163 additions & 0 deletions misc/httpclient_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from sys import path
from os.path import realpath, dirname
path.append(realpath(dirname(realpath(__file__)) + '/../'))

import sys
import gc
import aiohttp
import asyncio
import time
import uvloop
from statistics import mean, stdev
import searx.httpclient
import searx.httpclient.requests
import concurrent.futures
import logging
from searx import settings, logger

urls1 = [
'https://a.searx.space/0',
'https://a.searx.space/1',
'https://a.searx.space/2',
'https://a.searx.space/3',
'https://a.searx.space/4',
'https://a.searx.space/5',
'https://a.searx.space/6',
'https://a.searx.space/7',
'https://a.searx.space/8',
'https://a.searx.space/9',
# 'https://github.com',
'https://en.wikipedia.org/wiki/List_of_Unicode_characters',
'https://en.wikipedia.org/wiki/Windows-1252'
# 'https://google.com',
]

urls2 = [
# 'https://yandex.com/search/?text=test&p=0',
'https://www.wikidata.org/w/index.php?search=test&ns0=1',
'https://duckduckgo.com/html?q=test&kl=us-en&s=0&dc=0',
'https://en.wikipedia.org/w/api.php?action=query&format=json&titles=test%7CTest'\
+ '&prop=extracts%7Cpageimages&exintro&explaintext&pithumbsize=300&redirects',
'https://www.bing.com/search?q=language%3AEN+test&first=1',
'https://api.duckduckgo.com/?q=test&format=json&pretty=0&no_redirect=1&d=1'
]

urls3 = [
'https://a.searx.space/9'
]

user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0'


def timed(func):
async def wrapper(*args):
print('<', end='')
sys.stdout.flush()
start = time.time()
await func(*args)
t = round(time.time() - start, 4)
await asyncio.sleep(0.125)
print('>', end='')
sys.stdout.flush()
return t
return wrapper


def timed_sync(func):
def wrapper(*args):
start = time.time()
func(*args)
return round(time.time() - start, 4)
return wrapper


async def fetch_aiohttp(session, url):
async with session.get(url) as response:
return await response.text()


@timed
async def main_aiohttp(urls, client_session):
coroutines = [fetch_aiohttp(client_session, url) for url in urls]
await asyncio.gather(*coroutines)


async def fetch_curl(async_multi_request, url):
# print('.', end='')
response = await async_multi_request.async_request(method='GET', url=url, timeout=3)
# print(':', end='')
return response.text


@timed
async def main_curl(urls, async_multi_request):
coroutines = [fetch_curl(async_multi_request, url) for url in urls]
await asyncio.gather(*coroutines)
# futures = [ async_multi_request.async_request(method='GET', url=url) for url in urls ]
# responses = await asyncio.gather(futures, return_exceptions=True)
# print(responses)
# texts = [response.text() for response in responses]
# print(futures[0].result())


async def bench_aiohttp(bench_urls):
client_session = aiohttp.ClientSession()
try:
print('== aiohttp ==')
r = [await main_aiohttp(bench_urls, client_session) for i in range(10)]
# await asyncio.sleep(30)
r = r + [await main_aiohttp(bench_urls, client_session) for i in range(10)]
print(f'\n--> avg={mean(r)} s. stdev={stdev(r)} s.')
print(r)
finally:
await client_session.close()


async def bench_curl(bench_urls):
print('== curl ==')
async_multi_request = searx.httpclient.AsyncioSession()
async_multi_request.start()
try:
r = [await main_curl(bench_urls, async_multi_request) for i in range(10)]
# await asyncio.sleep(30)
r = r + [await main_curl(bench_urls, async_multi_request) for i in range(10)]
print(f'\n--> avg={mean(r)} s. stdev={stdev(r)} s.')
print(r)
finally:
async_multi_request.close()


def main(coroutine, prof_filename):
import yappi
import cProfile

profiling = False

logging.basicConfig(level=logging.DEBUG)
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
loop = asyncio.get_event_loop()

if profiling:
yappi.start(builtins=True)
yappi.set_clock_type("cpu")

try:
searx.httpclient.requests.SESSION.stop()
loop.run_until_complete(coroutine)

gc.collect()
finally:
if profiling:
yappi.stop()
pr = yappi.convert2pstats(yappi.get_func_stats())
pr.dump_stats(prof_filename)
yappi.clear_stats()

if __name__ == '__main__':
import pycurl
print(pycurl.version)

bench_urls = urls1 + urls2 + urls3

main(bench_aiohttp(bench_urls), 'aiohttp.prof')
# main(bench_curl(bench_urls), 'curl.prof')
49 changes: 49 additions & 0 deletions misc/httpclient_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
if __name__ == '__main__':
from sys import path
from os.path import realpath, dirname
path.append(realpath(dirname(realpath(__file__)) + '/../../'))

import concurrent.futures
from searx.httpclient import Sessions, logger

import yappi
import cProfile
import time

yappi.start(builtins=True)

r = Sessions()
# time.sleep(10)

start_time = time.time()
a = time.time()
allresponses = []

# r1 = r.request('GET', 'https://httpbin.org/delay/0', headers={'User-Agent': 'x'})
allresponses.append(r.async_request('GET', 'https://a.searx.space/404',
timeout=5.1, params={'q': 'test'}, debug=False))
allresponses.append(r.async_request('GET', 'https://a-v2.sndcdn.com/assets/app-55ad8b3-d95005d-3.js',
timeout=20, debug=True))
# r2 = r.request('GET', 'https://a.searx.space/config', cookies={'as': 'sa', 'bb': 'cc'})
# r3 = r.request('GET', 'https://a.searx.space', timeout=1.0, headers={'User-Agent': 'x'})
# for i in range(1, 100):
# allresponses.append(r.async_request('GET', 'https://a.searx.space/dummmy/' + str(i), timeout=5.0))

for async_response in concurrent.futures.as_completed(allresponses):
try:
response = async_response.result()
print(response.request)
print(response)
print(response.request.url, response.status_code, response.reason, response.headers, response.cookies)
except Exception as e:
logger.exception(e)
# print(v.text)
# print(v.headers)

print(time.time() - start_time)

r.close()
yappi.stop()
pr = yappi.convert2pstats(yappi.get_func_stats())
pr.dump_stats('curl.prof')
Loading