diff --git a/README.md b/README.md index cfa3a4d..c33db18 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,46 @@ result = app( ) ``` +### If you want to use proxy +### Example of creating BrightDataProxy object and generating session for each request +```python +from google_play_scraper.utils.proxies import Proxy +import random +from dotenv import load_dotenv +import os + +class BrightDataProxy(Proxy): + def __init__(self, username: str, password: str, host: str, port: int): + self.username = username + self.password = password + self.host = host + self.port = port + + def get_proxy(self) -> dict: + return { + "https": f"http://{self.username}:{self.password}@{self.host}:{self.port}" + } + +session_id = ''.join(random.choice('0123456789abcdef') for _ in range(6)) + +# Creating proxy session string +session = os.getenv("SESSION") +proxy_session = f"{session}-{session_id}" + +host=os.getenv("PROXY_HOST") +password = os.getenv("PROXY_PASSWORD") +port=os.getenv("PROXY_PORT") + +proxy = BrightDataProxy(username=proxy_session, password=password, host=host, port=port) + +result = app( + 'com.nianticlabs.pokemongo', + lang='en', # defaults to 'en' + country='us', # defaults to 'us' + proxy=proxy +) +``` + Result of `print(result)`: ```python diff --git a/google_play_scraper/features/app.py b/google_play_scraper/features/app.py index f09f2e0..ffa0cf4 100644 --- a/google_play_scraper/features/app.py +++ b/google_play_scraper/features/app.py @@ -1,21 +1,22 @@ import json -from typing import Any, Dict +from typing import Any, Dict, Optional from google_play_scraper.constants.element import ElementSpecs from google_play_scraper.constants.regex import Regex from google_play_scraper.constants.request import Formats from google_play_scraper.exceptions import NotFoundError from google_play_scraper.utils.request import get +from google_play_scraper.utils.proxies import Proxy -def app(app_id: str, lang: str = "en", country: str = "us") -> Dict[str, Any]: +def app(app_id: str, lang: str = "en", country: str = "us", proxy: Optional[Proxy] = None) -> Dict[str, Any]: url = Formats.Detail.build(app_id=app_id, lang=lang, country=country) try: - dom = get(url) + dom = get(url, proxy) except NotFoundError: url = Formats.Detail.fallback_build(app_id=app_id, lang=lang) - dom = get(url) + dom = get(url, proxy) return parse_dom(dom=dom, app_id=app_id, url=url) diff --git a/google_play_scraper/utils/proxies.py b/google_play_scraper/utils/proxies.py new file mode 100644 index 0000000..b7fbf92 --- /dev/null +++ b/google_play_scraper/utils/proxies.py @@ -0,0 +1,19 @@ +from abc import ABC, abstractmethod + +class Proxy(ABC): + def __init__(self, username: str, password: str, host: str, port: int): + self.username = username + self.password = password + self.host = host + self.port = port + + @abstractmethod + def get_proxy(self) -> dict: + """ + Abstract method to get the proxy information. + + Returns: + - proxy (dict): Dictionary containing proxy information. + """ + pass + diff --git a/google_play_scraper/utils/request.py b/google_play_scraper/utils/request.py index 215e4f6..1a6b7e6 100644 --- a/google_play_scraper/utils/request.py +++ b/google_play_scraper/utils/request.py @@ -1,13 +1,16 @@ from typing import Union from urllib.error import HTTPError -from urllib.request import Request, urlopen +from urllib.request import Request +from typing import Optional from google_play_scraper.exceptions import ExtraHTTPError, NotFoundError +from google_play_scraper.utils.proxies import Proxy +import requests -def _urlopen(obj): +def _urlopen(obj, proxy: Optional[Proxy] = None): try: - resp = urlopen(obj) + resp = requests.get(obj, proxies=proxy.get_proxy()).text except HTTPError as e: if e.code == 404: raise NotFoundError("App not found(404).") @@ -15,13 +18,13 @@ def _urlopen(obj): raise ExtraHTTPError( "App not found. Status code {} returned.".format(e.code) ) - - return resp.read().decode("UTF-8") + print(resp) + return resp def post(url: str, data: Union[str, bytes], headers: dict) -> str: return _urlopen(Request(url, data=data, headers=headers)) -def get(url: str) -> str: - return _urlopen(url) +def get(url: str, proxy: Optional[Proxy] = None) -> str: + return _urlopen(url, proxy)