From d253b26524528ed75ae23665241e44403601c5c2 Mon Sep 17 00:00:00 2001 From: Eldar Mametov Date: Sun, 30 Jun 2024 19:14:58 +0300 Subject: [PATCH] feat: addendum readme.md and .env --- .env.example | 2 + .gitignore | 4 ++ README.md | 57 ++++++++++++++++++ poetry.lock | 141 ++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + pyrogram_parser.py | 21 ++++--- 6 files changed, 216 insertions(+), 10 deletions(-) create mode 100644 .env.example create mode 100644 README.md diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..eb13e99 --- /dev/null +++ b/.env.example @@ -0,0 +1,2 @@ +URL=apiurl +PYROGRAM_SESSION_STRING=sessionname diff --git a/.gitignore b/.gitignore index b091641..3d414cc 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,7 @@ public.pem *.session parser.json test_site.py + +### Docker ### +Dockerfile +docker-compose.yml diff --git a/README.md b/README.md new file mode 100644 index 0000000..9ed2e50 --- /dev/null +++ b/README.md @@ -0,0 +1,57 @@ +# Telegram Message Parser Bot + +## Overview +This Python script uses a Pyrogram to analyze messages from Telegram channels and send them to a Fast API or another server. If a failure occurs during message delivery (status code is not 200), it saves the message in a JSON file and tries again after successful delivery of a new received message from the telegram channel. + +## Installation +### Requirements +- Python 3.11+ +- Poetry + +### Installation Steps +1. Clone the repository: + ```bash + git clone https://github.com/one-zero-eight/telegram-userbot-parser.git + cd telegram-userbot-parser +2. Install dependencies using Poetry: + ```terminal + poetry install --no-root --with code-style +3. Getting the API for the user bot: + - go to the website https://core.telegram.org/api/obtaining_api_id and use the account to get the API. Be sure to write down the api_id and api_hash that you will receive on the site. + - Create a Python file and use this code to get your session file to use user bot. write down your data in api_id and api_hash. Run this file and go through all the necessary steps. You can read more here https://docs.pyrogram.org/intro/quickstart + ```python + import asyncio + from pyrogram import Client + + api_id = 12345 + api_hash = "0123456789abcdef0123456789abcdef" + + + async def main(): + async with Client("my_account", api_id, api_hash) as app: + await app.send_message("me", "Greetings from **Pyrogram**!") + + + asyncio.run(main()) + +4. Using the example .env.example, fill it in with your data. In the "url", specify the address of your server. In "PROGRAM_SESSION_STRING" - specify the name of your session file for program + +5. Run the file pyrogram_parser.py + ```python + python pyrogram_parser.py + +### Futures + - Parses messages from specified Telegram channels. + - Sends parsed data to a server via HTTP POST requests. + - Retries sending messages stored in a JSON file upon server error. + +### Technologies Used + - Python 3.11+ + - Pyrogram + - FastAPI + - httpx + - dotenv + - Docker, Docker Compose + +### License +This project is licensed under the MIT License - see the LICENSE file for details. diff --git a/poetry.lock b/poetry.lock index 0b8f81a..afcfef8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "annotated-types" @@ -11,6 +11,50 @@ files = [ {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, ] +[[package]] +name = "anyio" +version = "4.4.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.8" +files = [ + {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"}, + {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"}, +] + +[package.dependencies] +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (>=0.23)"] + +[[package]] +name = "asyncio" +version = "3.4.3" +description = "reference implementation of PEP 3156" +optional = false +python-versions = "*" +files = [ + {file = "asyncio-3.4.3-cp33-none-win32.whl", hash = "sha256:b62c9157d36187eca799c378e572c969f0da87cd5fc42ca372d92cdb06e7e1de"}, + {file = "asyncio-3.4.3-cp33-none-win_amd64.whl", hash = "sha256:c46a87b48213d7464f22d9a497b9eef8c1928b68320a2fa94240f969f6fec08c"}, + {file = "asyncio-3.4.3-py3-none-any.whl", hash = "sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d"}, + {file = "asyncio-3.4.3.tar.gz", hash = "sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41"}, +] + +[[package]] +name = "certifi" +version = "2024.6.2" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"}, + {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, +] + [[package]] name = "cfgv" version = "3.4.0" @@ -49,6 +93,62 @@ docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1 testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-asyncio (>=0.21)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)", "virtualenv (>=20.26.2)"] typing = ["typing-extensions (>=4.8)"] +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "httpcore" +version = "1.0.5" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, + {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.13,<0.15" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<0.26.0)"] + +[[package]] +name = "httpx" +version = "0.27.0" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, + {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +httpcore = "==1.*" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] + [[package]] name = "identify" version = "2.5.36" @@ -63,6 +163,17 @@ files = [ [package.extras] license = ["ukkonen"] +[[package]] +name = "idna" +version = "3.7" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -255,6 +366,20 @@ files = [ {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"}, ] +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pyyaml" version = "6.0.1" @@ -280,6 +405,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -340,6 +466,17 @@ files = [ {file = "ruff-0.4.10.tar.gz", hash = "sha256:3aa4f2bc388a30d346c56524f7cacca85945ba124945fe489952aadb6b5cd804"}, ] +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -374,4 +511,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "1a62922991c7083fc04e870ec05f4e0f0a3619fb1e327d86296d485f96a0530a" +content-hash = "b2b592133ccaca470a09290258fd0529d1288f139d074e3fc18f50e40e85838b" diff --git a/pyproject.toml b/pyproject.toml index d62dacc..d438ae5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ Pyrogram = "^2.0.106" pydantic = "^2.7.4" httpx = "^0.27.0" asyncio = "^3.4.3" +python-dotenv = "^1.0.1" [tool.poetry.group.dev] optional = true diff --git a/pyrogram_parser.py b/pyrogram_parser.py index 12268fb..e3d3c33 100644 --- a/pyrogram_parser.py +++ b/pyrogram_parser.py @@ -5,13 +5,19 @@ import httpx import os import asyncio +from dotenv import load_dotenv + +# Loading parameters from .env +load_dotenv() # File for saving data filename = "parser.json" # specify the name of the session file for userbot -app = Client("my_account") +app = Client(f"{os.getenv("PYROGRAM_SESSION_STRING")}") # The url of the server -url = "http://127.0.0.1:5000/api/messages" +url = os.getenv("URL") + +print(url) class Chat(BaseModel): @@ -26,8 +32,8 @@ class StructureMessage(BaseModel): sender_chat: Chat date: datetime chat: Chat - text: str = None - caption: str = None + text: str | None + caption: str | None def serializableDict(self): """Converts an object into a dictionary with date conversion to an ISO format string""" @@ -71,7 +77,7 @@ async def resend_all_message(): print( f"Status code: {response.status_code}, message id: {history_message['id']}" ) - if response.status_code != 200: + if response.status_code not in [200, 201]: return file.seek(0) json.dump([], file, ensure_ascii=False, indent=4) @@ -84,7 +90,7 @@ async def send_to_server(message_save): try: response = await client.post(url, json=message_save.serializableDict()) print(f"Status code: {response.status_code}, message id: {message_save.id}") - if response.status_code == 200: + if response.status_code in [200, 201]: await resend_all_message() else: await saveJson(message_save) @@ -99,8 +105,7 @@ async def send_to_server(message_save): async def new_message_handler(client, message): """Processes new messages and saves them if there is a text or signature""" if message.text or message.caption: - message = str(message) - message_save = StructureMessage.model_validate_json(message) + message_save = StructureMessage.model_validate(message, from_attributes=True) await send_to_server(message_save) # saveJson(message)