Load models from huggingface instead of blob storage

AutoResearch · Dec 13, 2023 · 8b029a1 · 8b029a1
1 parent 45bd148
commit 8b029a1
Show file tree

Hide file tree

Showing 5 changed files with 28 additions and 18 deletions.
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
@@ -17,7 +17,6 @@ permissions:
 
 jobs:
   deploy:
-
     runs-on: ubuntu-latest
     permissions:
       id-token: write
@@ -35,3 +34,5 @@ jobs:
       run: python -m build
     - name: Publish package
       uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        password: ${{ secrets.PYPI_API_TOKEN }}      
diff --git a/README.md b/README.md
@@ -31,13 +31,14 @@ Once you have created a new environment, you can install this project for local
 development using the following commands:
 
 ```
->> pip install -e .'[dev]'
+>> pip install -e .'[dev,train]'
 >> pre-commit install
 >> conda install pandoc
 ```
 
 Notes:
 1) The single quotes around `'[dev]'` may not be required for your operating system.
+3) Look at `pyproject.toml` for other optional dependencies, e.g. you can do `pip install -e ."[dev,train,cuda]"` if you want to use CUDA.
 2) `pre-commit install` will initialize pre-commit for this local repository, so
    that a set of tests will be run prior to completing a local commit. For more
    information, see the Python Project Template documentation on 
@@ -69,21 +70,24 @@ az account set --subscription "<your subscription name>"
 az configure --defaults workspace=<aml workspace> group=<resource group> location=<location, e.g. westus3>
 ```
 
-### Uploading data
-
-Example:
-```sh
-az storage blob upload  --account-name <account> --container <container>> --file data/data.jsonl -n data/sweetpea/data.jsonl
-```
 
 ### Running jobs
 
 Prediction
 ```sh
-az ml job create -f azureml/eval.yml  --set display_name="Test prediction job" --web
+az ml job create -f azureml/eval.yml  --set display_name="Test prediction job" --set environment_variables.HF_TOKEN=<your huggingface token> --web
 ```
 
 Notes:
 - `--name` will set the mlflow run id
 - `--display_name` becomes the name in the experiment dashboard
-- `--web` argument will pop-up a browser window for tracking the job.
+- `--web` argument will pop-up a browser window for tracking the job.
+- The `HF_TOKEN` is required for gated repos, which need authentication
+
+
+### Uploading data
+
+Example:
+```sh
+az storage blob upload  --account-name <account> --container <container>> --file data/data.jsonl -n data/sweetpea/data.jsonl
+ ```
diff --git a/azureml/eval.yml b/azureml/eval.yml
@@ -2,7 +2,7 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 command: >
   python -m autora.doc.pipelines.main eval 
   ${{inputs.data_dir}}/data.jsonl
-  --model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf
+  --model-path ${{inputs.model_path}}
   --sys-id ${{inputs.sys_id}}
   --instruc-id ${{inputs.instruc_id}}
   --param temperature=${{inputs.temperature}}
@@ -13,9 +13,11 @@ inputs:
   data_dir:
     type: uri_folder 
     path: azureml://datastores/workspaceblobstore/paths/data/sweetpea/
-  model_dir:
-    type: uri_folder 
-    path: azureml://datastores/workspaceblobstore/paths/base_models    
+  # Currently models are loading faster directly from HuggingFace vs Azure Blob Storage
+  # model_dir:
+  #   type: uri_folder 
+  #   path: azureml://datastores/workspaceblobstore/paths/base_models    
+  model_path: meta-llama/Llama-2-7b-chat-hf
   temperature: 0.7
   top_p: 0.95
   top_k: 40

diff --git a/azureml/generate.yml b/azureml/generate.yml
@@ -1,7 +1,7 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 command: >
   python -m autora.doc.pipelines.main generate 
-  --model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf
+  --model-path ${{inputs.model_path}}
   --output ./outputs/output.txt
   --sys-id ${{inputs.sys_id}}
   --instruc-id ${{inputs.instruc_id}}
@@ -11,9 +11,11 @@ command: >
   autora/doc/pipelines/main.py    
 code: ../src
 inputs:
-  model_dir:
-    type: uri_folder 
-    path: azureml://datastores/workspaceblobstore/paths/base_models    
+  # Currently models are loading faster directly from HuggingFace vs Azure Blob Storage
+  # model_dir:
+  #   type: uri_folder 
+  #   path: azureml://datastores/workspaceblobstore/paths/base_models    
+  model_path: meta-llama/Llama-2-7b-chat-hf
   temperature: 0.7
   top_p: 0.95
   top_k: 40

diff --git a/pyproject.toml b/pyproject.toml
@@ -43,6 +43,7 @@ dev = [
     "ipython", # Also used in building notebooks into Sphinx
     "matplotlib", # Used in sample notebook intro_notebook.ipynb
     "ipykernel",
+    "hf_transfer",
 ]
 train = [
     "jsonlines",