From fbf963e1fc745754929898712f4c99802420b2da Mon Sep 17 00:00:00 2001
From: Kevin Kibe <kchegz234@gmail.com>
Date: Tue, 10 Sep 2024 22:44:27 +0300
Subject: [PATCH] Update documentation (#184)

* update(dist): add readme description to pypi, temp uncomment install_requires

* update(docs): update documentation, update makefile

* update(training): test out model.to("cuda"), low_cpu_mem_usage parameters

* update(training): revert model.to("cuda"), low_cpu_mem_usage parameters

* update(training): revert install_requires
---
 DOCS/deployment.md                 | 39 ++++++------------------------
 DOCS/gettingstarted.md             |  4 +--
 Makefile                           | 13 +++++-----
 setup.py                           |  5 ++++
 src/training/whisper_model_prep.py |  5 ++--
 5 files changed, 23 insertions(+), 43 deletions(-)

diff --git a/DOCS/deployment.md b/DOCS/deployment.md
index 40ab617b..57cdff36 100644
--- a/DOCS/deployment.md
+++ b/DOCS/deployment.md
@@ -21,11 +21,7 @@ HUGGINGFACE_READ_TOKEN = "huggingface-read-token"
 
 - Run this command to launch the endpoint:
 ```bash
-# If the model is peft finetuned
-python3 -m deployment.main
-
-# If the model is fully finetuned
-python3 -m deployment.app
+make up
 ```
 
 - Test it out by accessing the Swagger UI at `http://localhost:8000/docs` and uploading either an .mp3 file or a .wav file and a task either `transcribe` or `translate`. 
@@ -45,29 +41,10 @@ python3 -m deployment.app
    - `MODEL_NAME`: Name of the fine-tuned model to use in your Hugging Face Hub repository.
    - `HUGGINGFACE_READ_TOKEN`: Your Hugging Face authentication token for read access.
 
-2. Modify the `CMD` command in`src/deployment/Dockerfile` file according to whether your finetuned model is PEFT finetuned or fully finetuned.
-    - `app.py` for fully finetuned models, `main.py` for peft-finetuned models.
-    - update `src/deployment/.dockerignore` accordingly.
-
-### Run Application
-
-3. Run the application locally by executing the following command:
-
-    ```bash
-    docker compose up
-    ```
-
-### Test
-
-4. Test the application by accessing the Swagger UI at `http://localhost:8000/docs`. Upload an `.mp3` file and specify a task as either `transcribe` or `translate`. 
-
-### Set up monitoring
-
-5. Visualize Prometheus graphs in Grafana by logging in to Grafana at `http://localhost:3000`. You can access Prometheus graphs at `http://localhost:9090`.
-
-
-## To dockerize and semd to a docker registry
- 
-- Modify the `CMD` command in `src/deployment/Dockerfile` file according to whether your finetuned model is PEFT finetuned or fully finetuned.
- - `app.py` for fully finetuned models, `main.py` for peft-finetuned models.
- - update `src/deployment/.dockerignore` accordingly.
+2. Top deploy a docker container running the application and monitoring endpoints.
+   ```bash
+   make deploy
+   ```
+- `http://localhost:8000` - Application. `/docs` for Swagger UI.
+- `http://localhost:3000` - Grafana dashboard.
+- `http://localhost:9090` - Prometheus dashboard
\ No newline at end of file
diff --git a/DOCS/gettingstarted.md b/DOCS/gettingstarted.md
index 8c49a96c..1f91966e 100644
--- a/DOCS/gettingstarted.md
+++ b/DOCS/gettingstarted.md
@@ -37,7 +37,6 @@ huggingface_read_token = " "
 huggingface_write_token = " "
 dataset_name = "mozilla-foundation/common_voice_16_1" 
 language_abbr= [ ]                                    # Example `["ti", "yi"]`. see abbreviations here https://huggingface.co/datasets/mozilla-foundation/common_voice_16_1. 
-                                                      # Note: choose a small dataset so as to not run out of memory,
 model_id= "model-id"                                  # Example openai/whisper-small, openai/whisper-medium
 processing_task= "translate"                          # translate or transcribe
 wandb_api_key = " "     
@@ -248,5 +247,4 @@ HUGGINGFACE_READ_TOKEN = "huggingface-read-token"
 python -m deployment.speech_inference_cli --audio_file FILENAME --task TASK --perform_diarization --perform_alignment
 
 ```
-- Run `python -m training.main --help` to see the flag descriptions. 
-
+- Run `python -m training.main --help` to see the flag descriptions. 
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 12758ecd..d16241ad 100644
--- a/Makefile
+++ b/Makefile
@@ -1,18 +1,17 @@
-make pip:
+pip:
 	pip install -r requirements.txt
 
 pip-dev:
 	pip install mkdocs-material mkdocs-glightbox mkdocs-material[imaging] && export DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib
 
-make env:
+env:
 	python3 -m venv venv && source venv/bin/activate
 
-make test:
+test:
 	pytest
 
-make up:
-	pip install -r requirements.txt
-	cd src && python -m deployment.app
+up:
+	cd src/deployment && pip install -r requirements.txt && cd .. && python -m deployment.app
 
-make deploy:
+deploy:
 	docker-compose -f src/deployment/docker-compose.yaml up --build
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 9bdff11f..8356b834 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,8 @@
 from setuptools import find_packages, setup
 
+with open("README.md", "r", encoding="utf-8") as fh:
+    long_description = fh.read()
+
 BASE_DEPS = [
     "transformers==4.42.3",
     "datasets==2.19.2",
@@ -61,6 +64,8 @@
     },
     description = "A framework for fast fine-tuning and API endpoint deployment of Whisper model specifically developed to accelerate Automatic Speech Recognition(ASR) for African Languages.",
     readme = "README.md",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
     license = "MIT",
     python_requires=">=3.9",
     # install_requires = BASE_DEPS,
diff --git a/src/training/whisper_model_prep.py b/src/training/whisper_model_prep.py
index 6c3f92ba..5b901fa1 100644
--- a/src/training/whisper_model_prep.py
+++ b/src/training/whisper_model_prep.py
@@ -113,12 +113,13 @@ def initialize_model(self) -> WhisperForConditionalGeneration:
         else:
             print("PEFT optimization is not enabled.")
             model = WhisperForConditionalGeneration.from_pretrained(
-                self.model_id
+                self.model_id,
+                # low_cpu_mem_usage = True
             )
             model.config.forced_decoder_ids = None
             model.config.suppress_tokens = []
             model.config.use_cache = False
             model.generation_config.language = "en"
             model.generation_config.task = self.processing_task
-
+            # model.to("cuda")
         return model