Skip to content

Commit

Permalink
Add benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
deedy5 committed Apr 16, 2024
1 parent 4f2d7b5 commit ddf6fea
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 2 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
![Python >= 3.8](https://img.shields.io/badge/python->=3.8-red.svg) [![](https://badgen.net/github/release/deedy5/pyreqwest-impersonate)](https://github.com/deedy5/pyreqwest-impersonate/releases) [![](https://badge.fury.io/py/pyreqwest_impersonate.svg)](https://pypi.org/project/pyreqwest_impersonate) [![Downloads](https://static.pepy.tech/badge/pyreqwest_impersonate/week)](https://pepy.tech/project/pyreqwest_impersonate) [![CI](https://github.com/deedy5/pyreqwest-impersonate/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/deedy5/pyreqwest-impersonate/actions/workflows/CI.yml)
# Pyreqwest_impersonate

HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints.</br>
Binding to the Rust `reqwest_impersonate` library.
The fastest python HTTP client that can impersonate web browsers by mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints.</br>
Binding to the Rust `reqwest_impersonate` library.</br>
🏁 Check the benchmarks for more details.


Provides precompiled wheels:
- [x] Linux: `amd64`, `aarch64`.
Expand Down
6 changes: 6 additions & 0 deletions benchmark/1_threads.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name,threads,cpu_time 50k,cpu_time 5k,duration 50k,duration 5k
curl_cffi,1,5.735,1.521,7.957,3.25
httpx,1,3.801,2.116,6.117,3.987
pyreqwest_impersonate,1,0.855,0.297,1.977,1.238
requests,1,5.787,2.814,8.355,4.73
tls_client,1,6.414,1.96,6.941,3.153
6 changes: 6 additions & 0 deletions benchmark/4_threads.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name,threads,cpu_time 50k,cpu_time 5k,duration 50k,duration 5k
curl_cffi,4,4.014,1.255,1.567,0.866
httpx,4,2.105,1.461,1.505,1.307
pyreqwest_impersonate,4,1.15,0.399,0.875,0.751
requests,4,4.14,3.006,3.356,2.802
tls_client,4,3.803,1.357,1.382,0.832
17 changes: 17 additions & 0 deletions benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
## Benchmark

Benchmark between `pyreqwests_impersonate` and other python http clients:

- curl_cffi
- httpx
- pyreqwests_impersonate
- python-tls-client
- requests

All the clients run with session/client enabled.
Server response is gzipped.

#### Run benchmark:

- run server: `uvicorn server:app`
- run benchmark: `python benchmark.py`
77 changes: 77 additions & 0 deletions benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import requests
import httpx
import tls_client
import pyreqwest_impersonate
import curl_cffi.requests

results = []


def session_get_test(session_class, requests_number):
s = session_class()
for _ in range(requests_number):
s.get(url).text

# one thread
requests_number = 2000
for response_size in ["5k", "50k"]:
url = f"http://127.0.0.1:8000/{response_size}"
print(f"\nOne worker, {response_size=}, {requests_number=}")
for name, session_class in [
("requests", requests.Session),
("httpx", httpx.Client),
("tls_client", tls_client.Session),
("curl_cffi", curl_cffi.requests.Session),
("pyreqwest_impersonate", pyreqwest_impersonate.Client),
]:
start = time.perf_counter()
cpu_start = time.process_time()
session_get_test(session_class, requests_number)
dur = round(time.perf_counter() - start, 3)
cpu_dur = round(time.process_time() - cpu_start, 3)
results.append({"name": name, "threads": 1, "response_size": response_size, "duration": dur, "cpu_time": cpu_dur})
print(f" name: {name:<22} {response_size=} {dur=} {cpu_dur=}")


# multiple threads
requests_number = 500
threads_number = 4
for response_size in ["5k", "50k"]:
url = f"http://127.0.0.1:8000/{response_size}"
print(f"\n{threads_number} workers, {response_size=}, {requests_number=}")
for name, session_class in [
("requests", requests.Session),
("httpx", httpx.Client),
("tls_client", tls_client.Session),
("curl_cffi", curl_cffi.requests.Session),
("pyreqwest_impersonate", pyreqwest_impersonate.Client),
]:
start = time.perf_counter()
cpu_start = time.process_time()
with ThreadPoolExecutor(threads_number) as executor:
futures = [executor.submit(session_get_test, session_class, requests_number) for _ in range(threads_number)]
for f in as_completed(futures):
f.result()
dur = round(time.perf_counter() - start, 3)
cpu_dur = round(time.process_time() - cpu_start, 3)
results.append({"name": name, "threads": threads_number, "response_size": response_size, "duration": dur, "cpu_time": cpu_dur})
print(f" name: {name:<22} {response_size=} {dur=} {cpu_dur=}")


df = pd.DataFrame(results)
pivot_df = df.pivot_table(index=['name', 'threads'], columns='response_size', values=['duration', 'cpu_time'], aggfunc='mean')
pivot_df.reset_index(inplace=True)
pivot_df.columns = [' '.join(col).strip() for col in pivot_df.columns.values]
pivot_df = pivot_df[['name', 'threads'] + [col for col in pivot_df.columns if col not in ['name', 'threads']]]
unique_threads = pivot_df['threads'].unique()
for thread in unique_threads:
thread_df = pivot_df[pivot_df['threads'] == thread]
print(f"\nTable for {thread} threads:")
print(thread_df.to_string(index=False))
thread_df.to_csv(f'{thread}_threads.csv', index=False)



8 changes: 8 additions & 0 deletions benchmark/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pandas
starlette
uvicorn
requests
httpx
tls-client
pyreqwest_impersonate
curl_cffi
29 changes: 29 additions & 0 deletions benchmark/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import base64
import os
import gzip
from starlette.applications import Starlette
from starlette.responses import Response
from starlette.routing import Route

random_5k = base64.b64encode(os.urandom(5 * 1024)).decode('utf-8')
random_5k = gzip.compress(random_5k.encode('utf-8'))

random_50k = base64.b64encode(os.urandom(50 * 1024)).decode('utf-8')
random_50k = gzip.compress(random_50k.encode('utf-8'))


def gzip_response(gzipped_content):
headers = {
'Content-Encoding': 'gzip',
'Content-Length': str(len(gzipped_content)),
}
return Response(gzipped_content, headers=headers)

app = Starlette(
routes=[
Route("/5k", lambda r: gzip_response(random_5k)),
Route("/50k", lambda r: gzip_response(random_50k)),
],
)

# Run server: uvicorn server:app

0 comments on commit ddf6fea

Please sign in to comment.