From fbf963e1fc745754929898712f4c99802420b2da Mon Sep 17 00:00:00 2001 From: Kevin Kibe Date: Tue, 10 Sep 2024 22:44:27 +0300 Subject: [PATCH] Update documentation (#184) * update(dist): add readme description to pypi, temp uncomment install_requires * update(docs): update documentation, update makefile * update(training): test out model.to("cuda"), low_cpu_mem_usage parameters * update(training): revert model.to("cuda"), low_cpu_mem_usage parameters * update(training): revert install_requires --- DOCS/deployment.md | 39 ++++++------------------------ DOCS/gettingstarted.md | 4 +-- Makefile | 13 +++++----- setup.py | 5 ++++ src/training/whisper_model_prep.py | 5 ++-- 5 files changed, 23 insertions(+), 43 deletions(-) diff --git a/DOCS/deployment.md b/DOCS/deployment.md index 40ab617b..57cdff36 100644 --- a/DOCS/deployment.md +++ b/DOCS/deployment.md @@ -21,11 +21,7 @@ HUGGINGFACE_READ_TOKEN = "huggingface-read-token" - Run this command to launch the endpoint: ```bash -# If the model is peft finetuned -python3 -m deployment.main - -# If the model is fully finetuned -python3 -m deployment.app +make up ``` - Test it out by accessing the Swagger UI at `http://localhost:8000/docs` and uploading either an .mp3 file or a .wav file and a task either `transcribe` or `translate`. @@ -45,29 +41,10 @@ python3 -m deployment.app - `MODEL_NAME`: Name of the fine-tuned model to use in your Hugging Face Hub repository. - `HUGGINGFACE_READ_TOKEN`: Your Hugging Face authentication token for read access. -2. Modify the `CMD` command in`src/deployment/Dockerfile` file according to whether your finetuned model is PEFT finetuned or fully finetuned. - - `app.py` for fully finetuned models, `main.py` for peft-finetuned models. - - update `src/deployment/.dockerignore` accordingly. - -### Run Application - -3. Run the application locally by executing the following command: - - ```bash - docker compose up - ``` - -### Test - -4. Test the application by accessing the Swagger UI at `http://localhost:8000/docs`. Upload an `.mp3` file and specify a task as either `transcribe` or `translate`. - -### Set up monitoring - -5. Visualize Prometheus graphs in Grafana by logging in to Grafana at `http://localhost:3000`. You can access Prometheus graphs at `http://localhost:9090`. - - -## To dockerize and semd to a docker registry - -- Modify the `CMD` command in `src/deployment/Dockerfile` file according to whether your finetuned model is PEFT finetuned or fully finetuned. - - `app.py` for fully finetuned models, `main.py` for peft-finetuned models. - - update `src/deployment/.dockerignore` accordingly. +2. Top deploy a docker container running the application and monitoring endpoints. + ```bash + make deploy + ``` +- `http://localhost:8000` - Application. `/docs` for Swagger UI. +- `http://localhost:3000` - Grafana dashboard. +- `http://localhost:9090` - Prometheus dashboard \ No newline at end of file diff --git a/DOCS/gettingstarted.md b/DOCS/gettingstarted.md index 8c49a96c..1f91966e 100644 --- a/DOCS/gettingstarted.md +++ b/DOCS/gettingstarted.md @@ -37,7 +37,6 @@ huggingface_read_token = " " huggingface_write_token = " " dataset_name = "mozilla-foundation/common_voice_16_1" language_abbr= [ ] # Example `["ti", "yi"]`. see abbreviations here https://huggingface.co/datasets/mozilla-foundation/common_voice_16_1. - # Note: choose a small dataset so as to not run out of memory, model_id= "model-id" # Example openai/whisper-small, openai/whisper-medium processing_task= "translate" # translate or transcribe wandb_api_key = " " @@ -248,5 +247,4 @@ HUGGINGFACE_READ_TOKEN = "huggingface-read-token" python -m deployment.speech_inference_cli --audio_file FILENAME --task TASK --perform_diarization --perform_alignment ``` -- Run `python -m training.main --help` to see the flag descriptions. - +- Run `python -m training.main --help` to see the flag descriptions. \ No newline at end of file diff --git a/Makefile b/Makefile index 12758ecd..d16241ad 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,17 @@ -make pip: +pip: pip install -r requirements.txt pip-dev: pip install mkdocs-material mkdocs-glightbox mkdocs-material[imaging] && export DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib -make env: +env: python3 -m venv venv && source venv/bin/activate -make test: +test: pytest -make up: - pip install -r requirements.txt - cd src && python -m deployment.app +up: + cd src/deployment && pip install -r requirements.txt && cd .. && python -m deployment.app -make deploy: +deploy: docker-compose -f src/deployment/docker-compose.yaml up --build \ No newline at end of file diff --git a/setup.py b/setup.py index 9bdff11f..8356b834 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,8 @@ from setuptools import find_packages, setup +with open("README.md", "r", encoding="utf-8") as fh: + long_description = fh.read() + BASE_DEPS = [ "transformers==4.42.3", "datasets==2.19.2", @@ -61,6 +64,8 @@ }, description = "A framework for fast fine-tuning and API endpoint deployment of Whisper model specifically developed to accelerate Automatic Speech Recognition(ASR) for African Languages.", readme = "README.md", + long_description=long_description, + long_description_content_type="text/markdown", license = "MIT", python_requires=">=3.9", # install_requires = BASE_DEPS, diff --git a/src/training/whisper_model_prep.py b/src/training/whisper_model_prep.py index 6c3f92ba..5b901fa1 100644 --- a/src/training/whisper_model_prep.py +++ b/src/training/whisper_model_prep.py @@ -113,12 +113,13 @@ def initialize_model(self) -> WhisperForConditionalGeneration: else: print("PEFT optimization is not enabled.") model = WhisperForConditionalGeneration.from_pretrained( - self.model_id + self.model_id, + # low_cpu_mem_usage = True ) model.config.forced_decoder_ids = None model.config.suppress_tokens = [] model.config.use_cache = False model.generation_config.language = "en" model.generation_config.task = self.processing_task - + # model.to("cuda") return model