diff --git a/Cargo.lock b/Cargo.lock index 7bfe417..3225a96 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1007,7 +1007,7 @@ dependencies = [ [[package]] name = "pyreqwest_impersonate" -version = "0.2.0" +version = "0.2.1" dependencies = [ "encoding_rs", "pyo3", diff --git a/Cargo.toml b/Cargo.toml index 0835aba..2c03ee2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyreqwest_impersonate" -version = "0.2.0" +version = "0.2.1" edition = "2021" description = "HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints" authors = ["deedy5"] diff --git a/README.md b/README.md index 9e2b425..2d78c61 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ The fastest python HTTP client that can impersonate web browsers by mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints.
Binding to the Rust `reqwest_impersonate` library.
-🏁 Check the benchmarks for more details. +🏁 Check the [benchmark](https://github.com/deedy5/pyreqwest_impersonate/tree/main/benchmark) for more details. Provides precompiled wheels: @@ -29,7 +29,7 @@ pip install -U pyreqwest_impersonate ## Usage ### I. Client -A blocking HTTP client that can impersonate web browsers. +A blocking HTTP client that can impersonate web browsers. Not thread-safe! ```python3 class Client: """Initializes a blocking HTTP client that can impersonate web browsers. @@ -53,6 +53,10 @@ class Client: verify (bool, optional): Verify SSL certificates. Default is True. http1 (bool, optional): Use only HTTP/1.1. Default is None. http2 (bool, optional): Use only HTTP/2. Default is None. + + Note: + The Client instance is not thread-safe, meaning it should be initialized once and reused across a multi-threaded environment. + """ ``` @@ -92,7 +96,8 @@ Performs a POST request to the specified URL. ```python from pyreqwest_impersonate import Client -client = Client(impersonate="chrome_123") +# Not thread-safe! Initialize the Client instance once and reuse it across threads +client = Client(impersonate="chrome_123") # get request resp = client.get("https://tls.peet.ws/api/all") @@ -118,28 +123,31 @@ TODO #### Response attributes and methods -- `cookies`: Fetches the cookies from the response as a dictionary. -- `headers`: Retrieves the headers from the response as a dictionary. -- `status_code`: Gets the status code of the response as an integer. -- `url`: Returns the URL of the response as a string. -- `content`: Provides the content of the response as bytes. -- `text`: Decodes the response body into text, automatically detecting the character encoding. -- `json()`: Parses the response body as JSON, converting it into a Python object for easy manipulation. +- `content` (bytes): Provides the content of the response as bytes. +- `cookies` (dict): Fetches the cookies from the response as a dictionary. +- `headers` (dict): Retrieves the headers from the response as a dictionary. +- `json()` (function): Parses the response body as JSON, converting it into a Python object for easy manipulation. +- `raw` (list[int]): Contains the raw byte representation of the HTTP response body. +- `status_code` (int): Gets the status code of the response as an integer. +- `text` (str): Decodes the response body into text, automatically detecting the character encoding. +- `url` (str): Returns the URL of the response as a string. #### Example ```python from pyreqwest_impersonate import Client +# Not thread-safe! Initialize the Client instance once and reuse it across threads client = Client() response = client.get("https://example.com") -print(response.status_code) # Access the status code -print(response.url) # Access the URL -print(response.headers) # Access headers -print(response.cookies) # Access cookies print(response.content) # Get the content as bytes -print(response.text) # Decode the content as text +print(response.cookies) # Access cookies +print(response.headers) # Access headers print(response.json()) # Parse the content as JSON +print(response.raw) # Raw response +print(response.status_code) # Access the status code +print(response.text) # Decode the content as text +print(response.url) # Access the URL ``` diff --git a/benchmark/1_threads.csv b/benchmark/1_threads.csv index 8e05bc9..a1ba037 100644 --- a/benchmark/1_threads.csv +++ b/benchmark/1_threads.csv @@ -1,6 +1,6 @@ -name,threads,cpu_time 50k,cpu_time 5k,duration 50k,duration 5k -curl_cffi,1,5.735,1.521,7.957,3.25 -httpx,1,3.801,2.116,6.117,3.987 -pyreqwest_impersonate,1,0.855,0.297,1.977,1.238 -requests,1,5.787,2.814,8.355,4.73 -tls_client,1,6.414,1.96,6.941,3.153 +name,threads,cpu_time 50k,cpu_time 5k,time 50k,time 5k +curl_cffi 0.6.2,1,5.617,1.618,7.681,3.367 +httpx 0.27.0,1,2.58,1.934,4.206,3.605 +pyreqwest_impersonate 0.2.1,1,1.706,0.38,3.486,1.133 +requests 2.31.0,1,4.852,3.121,6.993,4.743 +tls_client 1.0.1,1,5.608,1.87,6.333,2.71 diff --git a/benchmark/4_threads.csv b/benchmark/4_threads.csv index b79c28d..fbddf91 100644 --- a/benchmark/4_threads.csv +++ b/benchmark/4_threads.csv @@ -1,6 +1,6 @@ -name,threads,cpu_time 50k,cpu_time 5k,duration 50k,duration 5k -curl_cffi,4,4.014,1.255,1.567,0.866 -httpx,4,2.105,1.461,1.505,1.307 -pyreqwest_impersonate,4,1.15,0.399,0.875,0.751 -requests,4,4.14,3.006,3.356,2.802 -tls_client,4,3.803,1.357,1.382,0.832 +name,threads,cpu_time 50k,cpu_time 5k,time 50k,time 5k +curl_cffi 0.6.2,4,3.859,1.124,1.415,0.703 +httpx 0.27.0,4,2.172,1.422,1.485,1.228 +pyreqwest_impersonate 0.2.1,4,1.168,0.477,2.025,1.617 +requests 2.31.0,4,4.036,3.237,3.221,3.08 +tls_client 1.0.1,4,3.52,1.185,1.252,0.723 diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 801ea34..3cf9d17 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1,5 +1,6 @@ import time from concurrent.futures import ThreadPoolExecutor, as_completed +from importlib.metadata import version import pandas as pd import requests import httpx @@ -8,6 +9,17 @@ import curl_cffi.requests results = [] +PACKAGES = [ + ("requests", requests.Session), + ("httpx", httpx.Client), + ("tls_client", tls_client.Session), + ("curl_cffi", curl_cffi.requests.Session), + ("pyreqwest_impersonate", pyreqwest_impersonate.Client), +] + + +def add_package_version(packages): + return [(f"{name} {version(name)}", classname) for name, classname in packages] def session_get_test(session_class, requests_number): @@ -15,25 +27,30 @@ def session_get_test(session_class, requests_number): for _ in range(requests_number): s.get(url).text + +PACKAGES = add_package_version(PACKAGES) + # one thread requests_number = 2000 for response_size in ["5k", "50k"]: url = f"http://127.0.0.1:8000/{response_size}" print(f"\nOne worker, {response_size=}, {requests_number=}") - for name, session_class in [ - ("requests", requests.Session), - ("httpx", httpx.Client), - ("tls_client", tls_client.Session), - ("curl_cffi", curl_cffi.requests.Session), - ("pyreqwest_impersonate", pyreqwest_impersonate.Client), - ]: + for name, session_class in PACKAGES: start = time.perf_counter() cpu_start = time.process_time() session_get_test(session_class, requests_number) dur = round(time.perf_counter() - start, 3) cpu_dur = round(time.process_time() - cpu_start, 3) - results.append({"name": name, "threads": 1, "response_size": response_size, "duration": dur, "cpu_time": cpu_dur}) - print(f" name: {name:<22} {response_size=} {dur=} {cpu_dur=}") + results.append( + { + "name": name, + "threads": 1, + "size": response_size, + "time": dur, + "cpu_time": cpu_dur, + } + ) + print(f" name: {name:<30} time: {dur} cpu_time: {cpu_dur}") # multiple threads @@ -42,36 +59,46 @@ def session_get_test(session_class, requests_number): for response_size in ["5k", "50k"]: url = f"http://127.0.0.1:8000/{response_size}" print(f"\n{threads_number} workers, {response_size=}, {requests_number=}") - for name, session_class in [ - ("requests", requests.Session), - ("httpx", httpx.Client), - ("tls_client", tls_client.Session), - ("curl_cffi", curl_cffi.requests.Session), - ("pyreqwest_impersonate", pyreqwest_impersonate.Client), - ]: + for name, session_class in PACKAGES: start = time.perf_counter() cpu_start = time.process_time() with ThreadPoolExecutor(threads_number) as executor: - futures = [executor.submit(session_get_test, session_class, requests_number) for _ in range(threads_number)] + futures = [ + executor.submit(session_get_test, session_class, requests_number) + for _ in range(threads_number) + ] for f in as_completed(futures): f.result() dur = round(time.perf_counter() - start, 3) cpu_dur = round(time.process_time() - cpu_start, 3) - results.append({"name": name, "threads": threads_number, "response_size": response_size, "duration": dur, "cpu_time": cpu_dur}) - print(f" name: {name:<22} {response_size=} {dur=} {cpu_dur=}") - + results.append( + { + "name": name, + "threads": threads_number, + "size": response_size, + "time": dur, + "cpu_time": cpu_dur, + } + ) + print(f" name: {name:<30} time: {dur} cpu_time: {cpu_dur}") + df = pd.DataFrame(results) -pivot_df = df.pivot_table(index=['name', 'threads'], columns='response_size', values=['duration', 'cpu_time'], aggfunc='mean') +pivot_df = df.pivot_table( + index=["name", "threads"], + columns="size", + values=["time", "cpu_time"], + aggfunc="mean", +) pivot_df.reset_index(inplace=True) -pivot_df.columns = [' '.join(col).strip() for col in pivot_df.columns.values] -pivot_df = pivot_df[['name', 'threads'] + [col for col in pivot_df.columns if col not in ['name', 'threads']]] -unique_threads = pivot_df['threads'].unique() +pivot_df.columns = [" ".join(col).strip() for col in pivot_df.columns.values] +pivot_df = pivot_df[ + ["name", "threads"] + + [col for col in pivot_df.columns if col not in ["name", "threads"]] +] +unique_threads = pivot_df["threads"].unique() for thread in unique_threads: - thread_df = pivot_df[pivot_df['threads'] == thread] + thread_df = pivot_df[pivot_df["threads"] == thread] print(f"\nTable for {thread} threads:") print(thread_df.to_string(index=False)) - thread_df.to_csv(f'{thread}_threads.csv', index=False) - - - + thread_df.to_csv(f"{thread}_threads.csv", index=False) diff --git a/pyproject.toml b/pyproject.toml index c448f9b..7abb06e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,13 +32,7 @@ dependencies = [] [project.optional-dependencies] dev = [ "pytest>=8.1.1", - "pytest-retry>=1.6.2", ] [tool.maturin] features = ["pyo3/extension-module"] - -[tool.pytest.ini_options] -retries = 3 -retry_delay = 0.5 -cumulative_timing = false diff --git a/src/lib.rs b/src/lib.rs index 13c481c..8c02154 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -86,11 +86,11 @@ impl Client { } let mut client_builder = reqwest_impersonate::blocking::Client::builder() - .enable_ech_grease(true) - .permute_extensions(true) - .cookie_store(true) - .trust_dns(true) - .timeout(timeout.map(Duration::from_secs_f64)); + .enable_ech_grease(true) + .permute_extensions(true) + .cookie_store(true) + .trust_dns(true) + .timeout(timeout.map(Duration::from_secs_f64)); // Headers if let Some(headers) = headers { @@ -198,111 +198,143 @@ impl Client { auth_bearer: Option, timeout: Option, ) -> PyResult { - Python::with_gil(|py| { - // Release the gil - py.allow_threads(|| { - // Check if method is POST || PUT || PATCH - let is_post_put_patch = method == "POST" || method == "PUT" || method == "PATCH"; + // Check if method is POST || PUT || PATCH + let is_post_put_patch = method == "POST" || method == "PUT" || method == "PATCH"; - // Method - let method = match method { - "GET" => Ok(Method::GET), - "POST" => Ok(Method::POST), - "HEAD" => Ok(Method::HEAD), - "OPTIONS" => Ok(Method::OPTIONS), - "PUT" => Ok(Method::PUT), - "PATCH" => Ok(Method::PATCH), - "DELETE" => Ok(Method::DELETE), - &_ => Err(PyErr::new::( - "Unrecognized HTTP method", - )), - }; - let method = method?; + // Method + let method = match method { + "GET" => Ok(Method::GET), + "POST" => Ok(Method::POST), + "HEAD" => Ok(Method::HEAD), + "OPTIONS" => Ok(Method::OPTIONS), + "PUT" => Ok(Method::PUT), + "PATCH" => Ok(Method::PATCH), + "DELETE" => Ok(Method::DELETE), + &_ => Err(PyErr::new::( + "Unrecognized HTTP method", + )), + }; + let method = method?; - // Create request builder + // Create request builder let mut request_builder = self.client.request(method, url); - // Params (use the provided `params` if available; otherwise, fall back to `self.params`) - let params_to_use = params.or(self.params.clone()).unwrap_or_default(); - if !params_to_use.is_empty() { - request_builder = request_builder.query(¶ms_to_use); - } + // Params (use the provided `params` if available; otherwise, fall back to `self.params`) + let params_to_use = params.or(self.params.clone()).unwrap_or_default(); + if !params_to_use.is_empty() { + request_builder = request_builder.query(¶ms_to_use); + } - // Headers - if let Some(headers) = headers { - let mut headers_new = HeaderMap::new(); - for (key, value) in headers { - headers_new.insert( - HeaderName::from_bytes(key.as_bytes()).map_err(|_| { - PyErr::new::("Invalid header name") - })?, - HeaderValue::from_str(&value).map_err(|_| { - PyErr::new::("Invalid header value") - })?, - ); - } - request_builder = request_builder.headers(headers_new); - } + // Headers + if let Some(headers) = headers { + let mut headers_new = HeaderMap::new(); + for (key, value) in headers { + headers_new.insert( + HeaderName::from_bytes(key.as_bytes()).map_err(|_| { + PyErr::new::("Invalid header name") + })?, + HeaderValue::from_str(&value).map_err(|_| { + PyErr::new::("Invalid header value") + })?, + ); + } + request_builder = request_builder.headers(headers_new); + } - // Only if method POST || PUT || PATCH - if is_post_put_patch { - // Content - if let Some(content) = content { - request_builder = request_builder.body(content); - } - // Data - if let Some(data) = data { - request_builder = request_builder.form(&data); - } - // Files - if let Some(files) = files { - let mut form = multipart::Form::new(); - for (field, path) in files { - form = form.file(field, path)?; - } - request_builder = request_builder.multipart(form); - } + // Only if method POST || PUT || PATCH + if is_post_put_patch { + // Content + if let Some(content) = content { + request_builder = request_builder.body(content); + } + // Data + if let Some(data) = data { + request_builder = request_builder.form(&data); + } + // Files + if let Some(files) = files { + let mut form = multipart::Form::new(); + for (field, path) in files { + form = form.file(field, path)?; } + request_builder = request_builder.multipart(form); + } + } - // Auth - match (auth, auth_bearer, &self.auth, &self.auth_bearer) { - (Some((username, password)), None, _, _) => { - request_builder = request_builder.basic_auth(username, password.as_deref()); - } - (None, Some(token), _, _) => { - request_builder = request_builder.bearer_auth(token); - } - (None, None, Some((username, password)), None) => { - request_builder = request_builder.basic_auth(username, password.as_deref()); - } - (None, None, None, Some(token)) => { - request_builder = request_builder.bearer_auth(token); - } - (Some(_), Some(_), None, None) | (None, None, Some(_), Some(_)) => { - return Err(PyErr::new::( - "Cannot provide both auth and auth_bearer", - )); - } - _ => {} // No authentication provided - } + // Auth + match (auth, auth_bearer, &self.auth, &self.auth_bearer) { + (Some((username, password)), None, _, _) => { + request_builder = request_builder.basic_auth(username, password.as_deref()); + } + (None, Some(token), _, _) => { + request_builder = request_builder.bearer_auth(token); + } + (None, None, Some((username, password)), None) => { + request_builder = request_builder.basic_auth(username, password.as_deref()); + } + (None, None, None, Some(token)) => { + request_builder = request_builder.bearer_auth(token); + } + (Some(_), Some(_), None, None) | (None, None, Some(_), Some(_)) => { + return Err(PyErr::new::( + "Cannot provide both auth and auth_bearer", + )); + } + _ => {} // No authentication provided + } - // Timeout - if let Some(seconds) = timeout { - request_builder = request_builder.timeout(Duration::from_secs_f64(seconds)); - } + // Timeout + if let Some(seconds) = timeout { + request_builder = request_builder.timeout(Duration::from_secs_f64(seconds)); + } - // Send request - let resp = request_builder.send().map_err(|e| { - PyErr::new::(format!("Error in request: {}", e)) - })?; + // Send request + let mut resp = request_builder.send().map_err(|e| { + PyErr::new::(format!("Error in request: {}", e)) + })?; - Ok(Response { - resp, - encoding: "utf-8".to_string(), - _content_as_vec: None, - _text: None, + // Response items + let mut raw: Vec = vec![]; + resp.copy_to(&mut raw).map_err(|e| { + PyErr::new::(format!("Error in get resp.raw: {}", e)) + })?; + let cookies: HashMap = resp + .cookies() + .map(|cookie| (cookie.name().to_string(), cookie.value().to_string())) + .collect(); + // Encoding from "Content-Type" header or "UTF-8" + let encoding = resp + .headers() + .get("Content-Type") + .and_then(|ct| ct.to_str().ok()) + .and_then(|ct| { + ct.split(';').find_map(|param| { + let mut kv = param.splitn(2, '='); + let key = kv.next()?.trim(); + let value = kv.next()?.trim(); + if key.eq_ignore_ascii_case("charset") { + Some(value.to_string()) + } else { + None + } }) }) + .unwrap_or("UTF-8".to_string()); + let headers: HashMap = resp + .headers() + .iter() + .map(|(k, v)| (k.as_str().to_string(), v.to_str().unwrap_or("").to_string())) + .collect(); + let status_code = resp.status().as_u16(); + let url = resp.url().to_string(); + + Ok(Response { + cookies, + encoding, + headers, + raw, + status_code, + url, }) } diff --git a/src/response.rs b/src/response.rs index 4aec140..fa98efd 100644 --- a/src/response.rs +++ b/src/response.rs @@ -13,164 +13,73 @@ use serde_json::{Error as SerdeError, Value}; /// It also supports decoding the response body as text or JSON, with the ability to specify the character encoding. #[pyclass] pub struct Response { - pub resp: reqwest_impersonate::blocking::Response, + #[pyo3(get)] + pub cookies: HashMap, #[pyo3(get)] pub encoding: String, - pub _content_as_vec: Option>, - pub _text: Option, + #[pyo3(get)] + pub headers: HashMap, + #[pyo3(get)] + pub raw: Vec, + #[pyo3(get)] + pub status_code: u16, + #[pyo3(get)] + pub url: String, } #[pymethods] impl Response { - /// Returns the cookies from the response as a `HashMap`. - #[getter] - fn cookies(&self) -> PyResult> { - let cookies: HashMap = self - .resp - .cookies() - .map(|cookie| (cookie.name().to_string(), cookie.value().to_string())) - .collect(); - Ok(cookies) - } - - /// Returns the headers from the response as a `HashMap`. - #[getter] - fn headers(&self) -> PyResult> { - let headers = self - .resp - .headers() - .iter() - .map(|(k, v)| (k.as_str().to_string(), v.to_str().unwrap_or("").to_string())) - .collect(); - Ok(headers) - } - - /// Returns the status code of the response as a `u16`. - #[getter] - fn status_code(&self) -> PyResult { - let status_code = self.resp.status().as_u16(); - Ok(status_code) - } - - #[getter] - fn url(&self) -> PyResult { - let url = self.resp.url().to_string(); - Ok(url) - } - - /// Stores the content of the response as a `Vec`. - /// - /// This method is used internally to cache the response body for future use. - fn content_as_vec(&mut self) -> PyResult> { - // Check if content has already been read and stored - if self._content_as_vec.is_none() { - let mut buf: Vec = vec![]; - self.resp.copy_to(&mut buf).map_err(|e| { - PyErr::new::(format!( - "Error copying response body: {}", - e - )) - })?; - // Store the content for future use - self._content_as_vec = Some(buf); - } - // Return the stored content - Ok(self._content_as_vec.as_ref().unwrap().clone()) - } - - /// Returns the content of the response as Python bytes. #[getter] fn content(&mut self) -> PyResult { - let content = self.content_as_vec()?; Python::with_gil(|py| { - let bytes = PyBytes::new_bound(py, &content); - Ok(bytes.to_object(py)) - }) - } + // Convert the raw response body to PyBytes + let py_bytes = PyBytes::new_bound(py, &self.raw); - /// Extracts the character encoding from the "Content-Type" header, or defaults to "UTF-8". - #[getter] - fn charset(&mut self) -> PyResult { - let encoding_from_headers = self - .resp - .headers() - .get("Content-Type") - .and_then(|ct| ct.to_str().ok()) - .and_then(|ct| { - ct.split(';').find_map(|param| { - let mut kv = param.splitn(2, '='); - let key = kv.next()?.trim(); - let value = kv.next()?.trim(); - if key.eq_ignore_ascii_case("charset") { - Some(value.to_string()) - } else { - None - } - }) - }); - // If encoding is not found in headers, default to UTF-8 - Ok(encoding_from_headers.unwrap_or_else(|| "UTF-8".to_string())) + // Convert the PyBytes to a Python object + Ok(py_bytes.into()) + }) } - /// Decodes the response body as text. #[getter] fn text(&mut self) -> PyResult { - if let Some(ref text) = self._text { - // If the text is already decoded, return it - Ok(text.clone()) - } else { - Python::with_gil(|py| { - // Release the GIL here because decoding can be CPU-intensive. - py.allow_threads(|| { - // Otherwise, decode the content and store it - let content = self.content_as_vec()?; - let charset = self.charset()?; - let encoding = Encoding::for_label(charset.as_bytes()).ok_or_else(|| { - PyErr::new::(format!( - "Unsupported charset: {}", - charset - )) - })?; - let (decoded_str, detected_encoding, _) = encoding.decode(&content); - // Redefine resp.encoding if detected_encoding != charset - if detected_encoding != encoding { - self.encoding = detected_encoding.name().to_string(); - } - // Store the decoded text for future use - self._text = Some(decoded_str.into_owned()); - Ok(self._text.as_ref().unwrap().clone()) - }) + Python::with_gil(|py| { + // Release the GIL here because decoding can be CPU-intensive. + py.allow_threads(|| { + let encoding = Encoding::for_label(&self.encoding.as_bytes()).ok_or_else(|| { + PyErr::new::(format!( + "Unsupported charset: {}", + self.encoding + )) + })?; + let (decoded_str, detected_encoding, _) = encoding.decode(&self.raw); + // Redefine resp.encoding if detected_encoding != charset + if detected_encoding != encoding { + self.encoding = detected_encoding.name().to_string(); + } + Ok(decoded_str.to_string()) }) - } + }) } - /// Parses the response body as JSON and returns it as a Python object. fn json(&mut self) -> PyResult { Python::with_gil(|py| { - // This call to self.text() should already correctly manage the GIL. - let text = self.text()?; - - // Parse the text as JSON + // Directly parse the raw response body as JSON // We release the GIL here because JSON parsing can be CPU-intensive. - let value: Result = py.allow_threads(|| serde_json::from_str(&text)); + let value: Result = + py.allow_threads(|| serde_json::from_slice(&self.raw)); - // Handle the result of parsing - match value { - Ok(json_value) => { - // Convert the parsed JSON into a Python object using pythonize - match pythonize(py, &json_value) { - Ok(py_obj) => Ok(py_obj), - Err(e) => Err(PyErr::new::(format!( - "Failed to convert JSON to Python object: {}", - e - ))), - } - } - Err(e) => Err(PyErr::new::(format!( - "Failed to parse JSON: {}", + // Manually convert the serde_json::Error into a pyo3::PyErr + let json_value = value.map_err(|e| { + PyErr::new::(format!("Failed to parse JSON: {}", e)) + })?; + + // Convert the parsed JSON into a Python object using pythonize + pythonize(py, &json_value).map_err(|e| { + PyErr::new::(format!( + "Failed to convert JSON to Python object: {}", e - ))), - } + )) + }) }) } } diff --git a/tests/test_client.py b/tests/test_client.py index b2db27f..d3f27ef 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,12 +1,27 @@ -import json +from time import sleep from urllib.parse import parse_qs +from pyreqwest_impersonate import Client -import pytest -from pyreqwest_impersonate import Client +def retry(max_retries=3, delay=1): + def decorator(func): + def wrapper(*args, **kwargs): + for attempt in range(max_retries): + try: + return func(*args, **kwargs) + except Exception as e: + if attempt < max_retries - 1: + sleep(delay) + continue + else: + raise e + + return wrapper + return decorator +@retry() def test_client_init_params(): auth = ("user", "password") headers = {"X-Test": "test"} @@ -14,17 +29,18 @@ def test_client_init_params(): client = Client(auth=auth, params=params, headers=headers, verify=False) response = client.get("https://httpbin.org/anything") assert response.status_code == 200 - json_data = json.loads(response.text) + json_data = response.json() assert json_data["headers"]["X-Test"] == "test" assert json_data["headers"]["Authorization"] == "Basic dXNlcjpwYXNzd29yZA==" assert json_data["args"] == {"x": "aaa", "y": "bbb"} +@retry() def test_client_get(): + client = Client(verify=False) auth_bearer = "bearerXXXXXXXXXXXXXXXXXXXX" headers = {"X-Test": "test"} params = {"x": "aaa", "y": "bbb"} - client = Client(verify=False) response = client.get( "https://httpbin.org/anything", auth_bearer=auth_bearer, @@ -32,18 +48,21 @@ def test_client_get(): params=params, ) assert response.status_code == 200 - json_data = json.loads(response.text) + json_data = response.json() assert json_data["headers"]["X-Test"] == "test" assert json_data["headers"]["Authorization"] == "Bearer bearerXXXXXXXXXXXXXXXXXXXX" assert json_data["args"] == {"x": "aaa", "y": "bbb"} + assert "Bearer bearerXXXXXXXXXXXXXXXXXXXX" in response.text + assert b"Bearer bearerXXXXXXXXXXXXXXXXXXXX" in response.content +@retry() def test_client_post_content(): + client = Client(verify=False) auth = ("user", "password") headers = {"X-Test": "test"} params = {"x": "aaa", "y": "bbb"} content = b"test content" - client = Client(verify=False) response = client.post( "https://httpbin.org/anything", auth=auth, @@ -52,19 +71,20 @@ def test_client_post_content(): content=content, ) assert response.status_code == 200 - json_data = json.loads(response.text) + json_data = response.json() assert json_data["headers"]["X-Test"] == "test" assert json_data["headers"]["Authorization"] == "Basic dXNlcjpwYXNzd29yZA==" assert json_data["args"] == {"x": "aaa", "y": "bbb"} assert json_data["data"] == "test content" +@retry() def test_client_post_data(): + client = Client(verify=False) auth_bearer = "bearerXXXXXXXXXXXXXXXXXXXX" headers = {"X-Test": "test"} params = {"x": "aaa", "y": "bbb"} data = {"key1": "value1", "key2": "value2"} - client = Client(verify=False) response = client.post( "https://httpbin.org/anything", auth_bearer=auth_bearer, @@ -73,7 +93,7 @@ def test_client_post_data(): data=data, ) assert response.status_code == 200 - json_data = json.loads(response.text) + json_data = response.json() assert json_data["headers"]["X-Test"] == "test" assert json_data["headers"]["Authorization"] == "Bearer bearerXXXXXXXXXXXXXXXXXXXX" assert json_data["args"] == {"x": "aaa", "y": "bbb"} @@ -81,11 +101,12 @@ def test_client_post_data(): assert received_data_dict == {"key1": ["value1"], "key2": ["value2"]} -def test_client_get_impersonate(): +@retry() +def test_client_impersonate(): client = Client(impersonate="chrome_123", verify=False) response = client.get("https://tls.peet.ws/api/all") - json_data = json.loads(response.text) assert response.status_code == 200 + json_data = response.json() assert json_data["http_version"] == "h2" assert json_data["tls"]["ja4"].startswith("t13d") assert (