From 593c65096b88fc41c7c9b74bed6c34844e048280 Mon Sep 17 00:00:00 2001
From: sunny <sunnyliu19981005@gmail.com>
Date: Wed, 9 Oct 2024 14:56:53 -0400
Subject: [PATCH] added basic tutorial notebook

---
 notebooks/Axolotl-tutorial-basic.ipynb | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 notebooks/Axolotl-tutorial-basic.ipynb
diff --git a/notebooks/Axolotl-tutorial-basic.ipynb b/notebooks/Axolotl-tutorial-basic.ipynb
new file mode 100644
index 0000000..4a7af9b
--- /dev/null
+++ b/notebooks/Axolotl-tutorial-basic.ipynb
@@ -0,0 +1 @@
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","authorship_tag":"ABX9TyNLVJXslczk5IrM9us34x9M"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"c180822e7754499c9c44ad9d5a1c624c":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_069557ca72044c228853408187cf33ac","IPY_MODEL_c56e9e08fe36430798444127a86048d8","IPY_MODEL_aa0847b6c46e41db91e3d0058f81d789"],"layout":"IPY_MODEL_a8dc2d8300eb4073879beafd38582dec"}},"60fd06e6a8844b40a002dfd3a3a70a3d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d737fa3b102640ca917182f291be536d","placeholder":"​","style":"IPY_MODEL_8ab41d58d57c4864a1f93afecdb31789","value":"<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"}},"26df93c1b51a4d46b3b3e8496b3b77f4":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_82e0ed0936944830932886925b8bd44f","placeholder":"​","style":"IPY_MODEL_dafdd31227e646e48c804127b8f01bd6","value":""}},"c88e0a4ef2f24eb2a822b07a6d1eefb0":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_658f892da6514bb9aa33ee471ca75909","style":"IPY_MODEL_8702d69b0b6142e5980565fc78681855","value":false}},"396a170dc75a4c8bb01eda31a6cba556":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_6a1dd8f39e364a4cae78f709c1d45560","style":"IPY_MODEL_fed784cd1836418eb950fdf1d4cf0a83","tooltip":""}},"feab9d4fc44149a69c35712d0fd883c1":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3c68b8d84bb047209527bcf98371db9a","placeholder":"​","style":"IPY_MODEL_aa6d9ff31c204d47ac15a1aca9026b77","value":"\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"}},"a8dc2d8300eb4073879beafd38582dec":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"d737fa3b102640ca917182f291be536d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8ab41d58d57c4864a1f93afecdb31789":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"82e0ed0936944830932886925b8bd44f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dafdd31227e646e48c804127b8f01bd6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"658f892da6514bb9aa33ee471ca75909":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8702d69b0b6142e5980565fc78681855":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6a1dd8f39e364a4cae78f709c1d45560":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fed784cd1836418eb950fdf1d4cf0a83":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"3c68b8d84bb047209527bcf98371db9a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"aa6d9ff31c204d47ac15a1aca9026b77":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2c77cd25bd4d409485c5e41bd09d6baa":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f54176c91b7f48c7a44dd5164a03d061","placeholder":"​","style":"IPY_MODEL_73c5227e6943496e9b8c3ad2bc47004d","value":"Connecting..."}},"f54176c91b7f48c7a44dd5164a03d061":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"73c5227e6943496e9b8c3ad2bc47004d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"069557ca72044c228853408187cf33ac":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_660fae67f5e3445fb75c2ff750746503","placeholder":"​","style":"IPY_MODEL_b56986bb4f19492d81f23bd2a7823d45","value":"Token is valid (permission: read)."}},"c56e9e08fe36430798444127a86048d8":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ce97b1f24bd14e5e927cb0d691584823","placeholder":"​","style":"IPY_MODEL_f428dc9030ec4acbbef406bcf65c99cc","value":"Your token has been saved to /root/.cache/huggingface/token"}},"aa0847b6c46e41db91e3d0058f81d789":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_db7d2c036cd64ea2acbb9197308b8199","placeholder":"​","style":"IPY_MODEL_09e4e8a163c44313a9978eb789ae053e","value":"Login successful"}},"660fae67f5e3445fb75c2ff750746503":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b56986bb4f19492d81f23bd2a7823d45":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ce97b1f24bd14e5e927cb0d691584823":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f428dc9030ec4acbbef406bcf65c99cc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"db7d2c036cd64ea2acbb9197308b8199":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"09e4e8a163c44313a9978eb789ae053e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"markdown","source":["##Setting up"],"metadata":{"id":"-1Djx2x8TWkX"}},{"cell_type":"code","execution_count":1,"metadata":{"id":"DgOWyh8BTPh3","executionInfo":{"status":"ok","timestamp":1728489349032,"user_tz":240,"elapsed":6642,"user":{"displayName":"Sunny Liu","userId":"17044907459311958453"}}},"outputs":[],"source":["import torch\n","# Check so there is a gpu available, a T4(free tier) is enough to run this notebook\n","assert (torch.cuda.is_available()==True)"]},{"cell_type":"code","source":["#!pip install -e git+https://github.com/axolotl-ai-cloud/axolotl#egg=axolotl\n","#!git clone https://github.com/axolotl-ai-cloud/axolotl\n","!pip install -e git+\"https://github.com/axolotl-ai-cloud/axolotl#egg=axolotl\"\n","!cd axolotl\n","\n","!pip3 install packaging ninja\n","#!pip3 install -e '.[flash-attn,deepspeed]'\n","!pip3 install flash-attn\n","!pip3 install deepspeed\n","#!pip install mamba-ssm"],"metadata":{"id":"fhNtvB89TbEm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1728489685856,"user_tz":240,"elapsed":88466,"user":{"displayName":"Sunny Liu","userId":"17044907459311958453"}},"outputId":"c7d83ee5-2d30-4dce-ef57-93e97fd49864"},"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["Obtaining axolotl from git+https://github.com/axolotl-ai-cloud/axolotl#egg=axolotl\n","  Cloning https://github.com/axolotl-ai-cloud/axolotl to ./src/axolotl\n","  Running command git clone --filter=blob:none --quiet https://github.com/axolotl-ai-cloud/axolotl /content/src/axolotl\n","  Resolved https://github.com/axolotl-ai-cloud/axolotl to commit e8d3da00814ec7773d33edd5643bb885d85686cb\n","  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe (from axolotl)\n","  Cloning https://github.com/lm-sys/FastChat.git (to revision 27a05b04a35510afb1d767ae7e5990cbd278f8fe) to /tmp/pip-install-yqziubun/fschat_0e901a96e4c24cee9538188f6d5e0597\n","  Running command git clone --filter=blob:none --quiet https://github.com/lm-sys/FastChat.git /tmp/pip-install-yqziubun/fschat_0e901a96e4c24cee9538188f6d5e0597\n","  Running command git rev-parse -q --verify 'sha^27a05b04a35510afb1d767ae7e5990cbd278f8fe'\n","  Running command git fetch -q https://github.com/lm-sys/FastChat.git 27a05b04a35510afb1d767ae7e5990cbd278f8fe\n","  Running command git checkout -q 27a05b04a35510afb1d767ae7e5990cbd278f8fe\n","  Resolved https://github.com/lm-sys/FastChat.git to commit 27a05b04a35510afb1d767ae7e5990cbd278f8fe\n","  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n","  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n","  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Collecting packaging==23.2 (from axolotl)\n","  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)\n","Collecting peft==0.13.0 (from axolotl)\n","  Downloading peft-0.13.0-py3-none-any.whl.metadata (13 kB)\n","Collecting transformers==4.45.1 (from axolotl)\n","  Downloading transformers-4.45.1-py3-none-any.whl.metadata (44 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: tokenizers>=0.19.1 in /usr/local/lib/python3.10/dist-packages (from axolotl) (0.19.1)\n","Collecting bitsandbytes==0.44.0 (from axolotl)\n","  Downloading bitsandbytes-0.44.0-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)\n","Requirement already satisfied: accelerate==0.34.2 in /usr/local/lib/python3.10/dist-packages (from axolotl) (0.34.2)\n","Collecting datasets==2.21.0 (from axolotl)\n","  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)\n","Collecting pydantic==2.6.3 (from axolotl)\n","  Downloading pydantic-2.6.3-py3-none-any.whl.metadata (84 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.4/84.4 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting addict (from axolotl)\n","  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)\n","Collecting fire (from axolotl)\n","  Downloading fire-0.7.0.tar.gz (87 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: PyYAML>=6.0 in /usr/local/lib/python3.10/dist-packages (from axolotl) (6.0.2)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from axolotl) (2.32.3)\n","Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from axolotl) (0.2.0)\n","Collecting wandb (from axolotl)\n","  Downloading wandb-0.18.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)\n","Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from axolotl) (0.8.0)\n","Collecting optimum==1.16.2 (from axolotl)\n","  Downloading optimum-1.16.2-py3-none-any.whl.metadata (17 kB)\n","Collecting hf_transfer (from axolotl)\n","  Downloading hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n","Collecting colorama (from axolotl)\n","  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)\n","Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from axolotl) (0.60.0)\n","Requirement already satisfied: numpy<=2.0.1,>=1.24.4 in /usr/local/lib/python3.10/dist-packages (from axolotl) (1.26.4)\n","Collecting evaluate==0.4.1 (from axolotl)\n","  Downloading evaluate-0.4.1-py3-none-any.whl.metadata (9.4 kB)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from axolotl) (1.13.1)\n","Collecting scikit-learn==1.4.2 (from axolotl)\n","  Downloading scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n","Collecting pynvml (from axolotl)\n","  Downloading pynvml-11.5.3-py3-none-any.whl.metadata (8.8 kB)\n","Collecting art (from axolotl)\n","  Downloading art-6.3-py3-none-any.whl.metadata (70 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.4/70.4 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting gradio==3.50.2 (from axolotl)\n","  Downloading gradio-3.50.2-py3-none-any.whl.metadata (17 kB)\n","Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from axolotl) (2.17.0)\n","Collecting python-dotenv==1.0.1 (from axolotl)\n","  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n","Collecting autoawq>=0.2.5 (from axolotl)\n","  Downloading autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl.metadata (18 kB)\n","Collecting triton>=2.3.0 (from axolotl)\n","  Downloading triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)\n","Collecting liger-kernel==0.3.0 (from axolotl)\n","  Downloading liger_kernel-0.3.0-py3-none-any.whl.metadata (25 kB)\n","Collecting s3fs>=2024.5.0 (from axolotl)\n","  Downloading s3fs-2024.9.0-py3-none-any.whl.metadata (1.6 kB)\n","Requirement already satisfied: gcsfs>=2024.5.0 in /usr/local/lib/python3.10/dist-packages (from axolotl) (2024.6.1)\n","Collecting trl==0.9.6 (from axolotl)\n","  Downloading trl-0.9.6-py3-none-any.whl.metadata (12 kB)\n","Collecting zstandard==0.22.0 (from axolotl)\n","  Downloading zstandard-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.9 kB)\n","Requirement already satisfied: fastcore in /usr/local/lib/python3.10/dist-packages (from axolotl) (1.7.10)\n","Requirement already satisfied: torch==2.4.1+cu121 in /usr/local/lib/python3.10/dist-packages (from axolotl) (2.4.1+cu121)\n","Collecting xformers>=0.0.27 (from axolotl)\n","  Downloading xformers-0.0.28.post1-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate==0.34.2->axolotl) (5.9.5)\n","Requirement already satisfied: huggingface-hub>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==0.34.2->axolotl) (0.24.7)\n","Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.10/dist-packages (from accelerate==0.34.2->axolotl) (0.4.5)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets==2.21.0->axolotl) (3.16.1)\n","Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets==2.21.0->axolotl) (16.1.0)\n","Collecting dill<0.3.9,>=0.3.0 (from datasets==2.21.0->axolotl)\n","  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets==2.21.0->axolotl) (2.2.2)\n","Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets==2.21.0->axolotl) (4.66.5)\n","Collecting xxhash (from datasets==2.21.0->axolotl)\n","  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n","Collecting multiprocess (from datasets==2.21.0->axolotl)\n","  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)\n","Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets==2.21.0->axolotl) (2024.6.1)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets==2.21.0->axolotl) (3.10.8)\n","Collecting responses<0.19 (from evaluate==0.4.1->axolotl)\n","  Downloading responses-0.18.0-py3-none-any.whl.metadata (29 kB)\n","Collecting aiofiles<24.0,>=22.0 (from gradio==3.50.2->axolotl)\n","  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n","Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio==3.50.2->axolotl) (4.2.2)\n","Collecting fastapi (from gradio==3.50.2->axolotl)\n","  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)\n","Collecting ffmpy (from gradio==3.50.2->axolotl)\n","  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)\n","Collecting gradio-client==0.6.1 (from gradio==3.50.2->axolotl)\n","  Downloading gradio_client-0.6.1-py3-none-any.whl.metadata (7.1 kB)\n","Collecting httpx (from gradio==3.50.2->axolotl)\n","  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)\n","Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio==3.50.2->axolotl) (6.4.5)\n","Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio==3.50.2->axolotl) (3.1.4)\n","Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio==3.50.2->axolotl) (2.1.5)\n","Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio==3.50.2->axolotl) (3.7.1)\n","Collecting orjson~=3.0 (from gradio==3.50.2->axolotl)\n","  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio==3.50.2->axolotl) (10.4.0)\n","Collecting pydub (from gradio==3.50.2->axolotl)\n","  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n","Collecting python-multipart (from gradio==3.50.2->axolotl)\n","  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)\n","Collecting semantic-version~=2.0 (from gradio==3.50.2->axolotl)\n","  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n","Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio==3.50.2->axolotl) (4.12.2)\n","Collecting uvicorn>=0.14.0 (from gradio==3.50.2->axolotl)\n","  Downloading uvicorn-0.31.0-py3-none-any.whl.metadata (6.6 kB)\n","Collecting websockets<12.0,>=10.0 (from gradio==3.50.2->axolotl)\n","  Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n","Collecting coloredlogs (from optimum==1.16.2->axolotl)\n","  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from optimum==1.16.2->axolotl) (1.13.3)\n","Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic==2.6.3->axolotl) (0.7.0)\n","Collecting pydantic-core==2.16.3 (from pydantic==2.6.3->axolotl)\n","  Downloading pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)\n","Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.4.2->axolotl) (1.4.2)\n","Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.4.2->axolotl) (3.5.0)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.4.1+cu121->axolotl) (3.3)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.45.1->axolotl) (2024.9.11)\n","Collecting tokenizers>=0.19.1 (from axolotl)\n","  Downloading tokenizers-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n","Collecting tyro>=0.5.11 (from trl==0.9.6->axolotl)\n","  Downloading tyro-0.8.11-py3-none-any.whl.metadata (8.4 kB)\n","INFO: pip is looking at multiple versions of autoawq to determine which version is compatible with other requirements. This could take a while.\n","Collecting autoawq>=0.2.5 (from axolotl)\n","  Downloading autoawq-0.2.5-cp310-cp310-manylinux2014_x86_64.whl.metadata (16 kB)\n","Collecting autoawq-kernels (from autoawq>=0.2.5->axolotl)\n","  Downloading autoawq_kernels-0.0.8-cp310-cp310-manylinux2014_x86_64.whl.metadata (2.4 kB)\n","Requirement already satisfied: decorator>4.1.2 in /usr/local/lib/python3.10/dist-packages (from gcsfs>=2024.5.0->axolotl) (4.4.2)\n","Requirement already satisfied: google-auth>=1.2 in /usr/local/lib/python3.10/dist-packages (from gcsfs>=2024.5.0->axolotl) (2.27.0)\n","Requirement already satisfied: google-auth-oauthlib in /usr/local/lib/python3.10/dist-packages (from gcsfs>=2024.5.0->axolotl) (1.2.1)\n","Requirement already satisfied: google-cloud-storage in /usr/local/lib/python3.10/dist-packages (from gcsfs>=2024.5.0->axolotl) (2.8.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->axolotl) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->axolotl) (3.10)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->axolotl) (2.2.3)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->axolotl) (2024.8.30)\n","Collecting aiobotocore<3.0.0,>=2.5.4 (from s3fs>=2024.5.0->axolotl)\n","  Downloading aiobotocore-2.15.2-py3-none-any.whl.metadata (23 kB)\n","INFO: pip is looking at multiple versions of s3fs to determine which version is compatible with other requirements. This could take a while.\n","Collecting s3fs>=2024.5.0 (from axolotl)\n","  Downloading s3fs-2024.6.1-py3-none-any.whl.metadata (1.6 kB)\n","Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire->axolotl) (2.4.0)\n","Collecting markdown2[all] (from fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl)\n","  Downloading markdown2-2.5.0-py2.py3-none-any.whl.metadata (2.2 kB)\n","Collecting nh3 (from fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl)\n","  Downloading nh3-0.2.18-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n","Requirement already satisfied: prompt-toolkit>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl) (3.0.48)\n","Requirement already satisfied: rich>=10.0.0 in /usr/local/lib/python3.10/dist-packages (from fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl) (13.9.1)\n","Collecting shortuuid (from fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl)\n","  Downloading shortuuid-1.0.13-py3-none-any.whl.metadata (5.8 kB)\n","Collecting tiktoken (from fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl)\n","  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n","Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->axolotl) (0.43.0)\n","Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl) (1.4.0)\n","Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl) (1.64.1)\n","Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl) (3.7)\n","Requirement already satisfied: protobuf!=4.24.0,<5.0.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl) (3.20.3)\n","Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl) (71.0.4)\n","Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl) (1.16.0)\n","Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl) (0.7.2)\n","Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->axolotl) (3.0.4)\n","Requirement already satisfied: click!=8.0.0,>=7.1 in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl) (8.1.7)\n","Collecting docker-pycreds>=0.4.0 (from wandb->axolotl)\n","  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)\n","Collecting gitpython!=3.1.29,>=1.0.0 (from wandb->axolotl)\n","  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)\n","Requirement already satisfied: platformdirs in /usr/local/lib/python3.10/dist-packages (from wandb->axolotl) (4.3.6)\n","Collecting sentry-sdk>=1.0.0 (from wandb->axolotl)\n","  Downloading sentry_sdk-2.16.0-py2.py3-none-any.whl.metadata (9.8 kB)\n","Collecting setproctitle (from wandb->axolotl)\n","  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)\n","Collecting botocore<1.35.37,>=1.35.16 (from aiobotocore<3.0.0,>=2.5.4->s3fs>=2024.5.0->axolotl)\n","  Downloading botocore-1.35.36-py3-none-any.whl.metadata (5.7 kB)\n","Requirement already satisfied: wrapt<2.0.0,>=1.10.10 in /usr/local/lib/python3.10/dist-packages (from aiobotocore<3.0.0,>=2.5.4->s3fs>=2024.5.0->axolotl) (1.16.0)\n","Collecting aioitertools<1.0.0,>=0.5.1 (from aiobotocore<3.0.0,>=2.5.4->s3fs>=2024.5.0->axolotl)\n","  Downloading aioitertools-0.12.0-py3-none-any.whl.metadata (3.8 kB)\n","Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.21.0->axolotl) (2.4.3)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.21.0->axolotl) (1.3.1)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.21.0->axolotl) (24.2.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.21.0->axolotl) (1.4.1)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.21.0->axolotl) (6.1.0)\n","Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.21.0->axolotl) (1.13.1)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.21.0->axolotl) (4.0.3)\n","Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==3.50.2->axolotl) (0.4)\n","Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==3.50.2->axolotl) (4.23.0)\n","Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==3.50.2->axolotl) (0.12.1)\n","Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb->axolotl)\n","  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)\n","Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.2->gcsfs>=2024.5.0->axolotl) (5.5.0)\n","Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.2->gcsfs>=2024.5.0->axolotl) (0.4.1)\n","Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.2->gcsfs>=2024.5.0->axolotl) (4.9)\n","Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==3.50.2->axolotl) (1.3.0)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==3.50.2->axolotl) (0.12.1)\n","Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==3.50.2->axolotl) (4.54.1)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==3.50.2->axolotl) (1.4.7)\n","Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==3.50.2->axolotl) (3.1.4)\n","Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==3.50.2->axolotl) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets==2.21.0->axolotl) (2024.2)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets==2.21.0->axolotl) (2024.2)\n","Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit>=3.0.0->fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl) (0.2.13)\n","Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.0.0->fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl) (3.0.0)\n","Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.0.0->fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl) (2.18.0)\n","Requirement already satisfied: docstring-parser>=0.16 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl==0.9.6->axolotl) (0.16)\n","Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl==0.9.6->axolotl)\n","  Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n","Collecting h11>=0.8 (from uvicorn>=0.14.0->gradio==3.50.2->axolotl)\n","  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n","Collecting humanfriendly>=9.1 (from coloredlogs->optimum==1.16.2->axolotl)\n","  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)\n","Collecting starlette<0.39.0,>=0.37.2 (from fastapi->gradio==3.50.2->axolotl)\n","  Downloading starlette-0.38.6-py3-none-any.whl.metadata (6.0 kB)\n","Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib->gcsfs>=2024.5.0->axolotl) (1.3.1)\n","Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 in /usr/local/lib/python3.10/dist-packages (from google-cloud-storage->gcsfs>=2024.5.0->axolotl) (2.19.2)\n","Requirement already satisfied: google-cloud-core<3.0dev,>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-storage->gcsfs>=2024.5.0->axolotl) (2.4.1)\n","Requirement already satisfied: google-resumable-media>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from google-cloud-storage->gcsfs>=2024.5.0->axolotl) (2.7.2)\n","Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx->gradio==3.50.2->axolotl) (3.7.1)\n","Collecting httpcore==1.* (from httpx->gradio==3.50.2->axolotl)\n","  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)\n","Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->gradio==3.50.2->axolotl) (1.3.1)\n","Collecting wavedrom (from markdown2[all]->fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl)\n","  Downloading wavedrom-2.0.3.post3.tar.gz (137 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.7/137.7 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting latex2mathml (from markdown2[all]->fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl)\n","  Downloading latex2mathml-3.77.0-py3-none-any.whl.metadata (14 kB)\n","INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.\n","Collecting multiprocess (from datasets==2.21.0->axolotl)\n","  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->optimum==1.16.2->axolotl) (1.3.0)\n","Collecting jmespath<2.0.0,>=0.7.1 (from botocore<1.35.37,>=1.35.16->aiobotocore<3.0.0,>=2.5.4->s3fs>=2024.5.0->axolotl)\n","  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)\n","Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb->axolotl)\n","  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)\n","Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-storage->gcsfs>=2024.5.0->axolotl) (1.65.0)\n","Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /usr/local/lib/python3.10/dist-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-storage->gcsfs>=2024.5.0->axolotl) (1.24.0)\n","Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /usr/local/lib/python3.10/dist-packages (from google-resumable-media>=2.3.2->google-cloud-storage->gcsfs>=2024.5.0->axolotl) (1.6.0)\n","Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==3.50.2->axolotl) (2023.12.1)\n","Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==3.50.2->axolotl) (0.35.1)\n","Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==3.50.2->axolotl) (0.20.0)\n","Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.0.0->fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl) (0.1.2)\n","Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.2->gcsfs>=2024.5.0->axolotl) (0.6.1)\n","Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs>=2024.5.0->axolotl) (3.2.2)\n","Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx->gradio==3.50.2->axolotl) (1.2.2)\n","Collecting svgwrite (from wavedrom->markdown2[all]->fschat@ git+https://github.com/lm-sys/FastChat.git@27a05b04a35510afb1d767ae7e5990cbd278f8fe->axolotl)\n","  Downloading svgwrite-1.4.3-py3-none-any.whl.metadata (8.8 kB)\n","Downloading bitsandbytes-0.44.0-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m122.4/122.4 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading datasets-2.21.0-py3-none-any.whl (527 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m527.3/527.3 kB\u001b[0m \u001b[31m30.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading evaluate-0.4.1-py3-none-any.whl (84 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading gradio-3.50.2-py3-none-any.whl (20.3 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m61.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading liger_kernel-0.3.0-py3-none-any.whl (58 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.1/58.1 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading optimum-1.16.2-py3-none-any.whl (402 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.5/402.5 kB\u001b[0m \u001b[31m28.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading packaging-23.2-py3-none-any.whl (53 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading peft-0.13.0-py3-none-any.whl (322 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m322.5/322.5 kB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading pydantic-2.6.3-py3-none-any.whl (395 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m395.2/395.2 kB\u001b[0m \u001b[31m19.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n","Downloading scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.1/12.1 MB\u001b[0m \u001b[31m76.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading transformers-4.45.1-py3-none-any.whl (9.9 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.9/9.9 MB\u001b[0m \u001b[31m80.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading trl-0.9.6-py3-none-any.whl (245 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m245.8/245.8 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading zstandard-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m74.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading gradio_client-0.6.1-py3-none-any.whl (299 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m299.2/299.2 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m58.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading autoawq-0.2.5-cp310-cp310-manylinux2014_x86_64.whl (84 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.3/84.3 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading s3fs-2024.6.1-py3-none-any.whl (29 kB)\n","Downloading tokenizers-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.9 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.9/2.9 MB\u001b[0m \u001b[31m57.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.4/209.4 MB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading xformers-0.0.28.post1-cp310-cp310-manylinux_2_28_x86_64.whl (16.7 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.7/16.7 MB\u001b[0m \u001b[31m96.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n","Downloading art-6.3-py3-none-any.whl (606 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m606.3/606.3 kB\u001b[0m \u001b[31m37.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n","Downloading hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m99.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading pynvml-11.5.3-py3-none-any.whl (53 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading wandb-0.18.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m97.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading aiobotocore-2.15.2-py3-none-any.whl (77 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.4/77.4 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n","Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n","Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.3/207.3 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading responses-0.18.0-py3-none-any.whl (38 kB)\n","Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n","Downloading sentry_sdk-2.16.0-py2.py3-none-any.whl (313 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m313.8/313.8 kB\u001b[0m \u001b[31m26.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading tyro-0.8.11-py3-none-any.whl (105 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.9/105.9 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading uvicorn-0.31.0-py3-none-any.whl (63 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.7/63.7 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading autoawq_kernels-0.0.8-cp310-cp310-manylinux2014_x86_64.whl (37.3 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m37.3/37.3 MB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading fastapi-0.115.0-py3-none-any.whl (94 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.6/94.6 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading ffmpy-0.4.0-py3-none-any.whl (5.8 kB)\n","Downloading httpx-0.27.2-py3-none-any.whl (76 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading httpcore-1.0.6-py3-none-any.whl (78 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.0/78.0 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading nh3-0.2.18-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (769 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m769.2/769.2 kB\u001b[0m \u001b[31m51.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n","Downloading python_multipart-0.0.12-py3-none-any.whl (23 kB)\n","Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n","Downloading shortuuid-1.0.13-py3-none-any.whl (10 kB)\n","Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m59.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading aioitertools-0.12.0-py3-none-any.whl (24 kB)\n","Downloading botocore-1.35.36-py3-none-any.whl (12.6 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.6/12.6 MB\u001b[0m \u001b[31m112.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading h11-0.14.0-py3-none-any.whl (58 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n","Downloading starlette-0.38.6-py3-none-any.whl (71 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading latex2mathml-3.77.0-py3-none-any.whl (73 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.7/73.7 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading markdown2-2.5.0-py2.py3-none-any.whl (47 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n","Downloading smmap-5.0.1-py3-none-any.whl (24 kB)\n","Downloading svgwrite-1.4.3-py3-none-any.whl (67 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.1/67.1 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hBuilding wheels for collected packages: fire, fschat, wavedrom\n","  Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114250 sha256=be3201a91a6b8a29322ffd5aff7e98044cd36c5e78ae80e317f6e0fde625052e\n","  Stored in directory: /root/.cache/pip/wheels/19/39/2f/2d3cadc408a8804103f1c34ddd4b9f6a93497b11fa96fe738e\n","  Building wheel for fschat (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for fschat: filename=fschat-0.2.36-py3-none-any.whl size=272080 sha256=7933884836c10fc2d7dda89fa387cfc0a751870de6b6ccbaaf76ea0e58023abb\n","  Stored in directory: /root/.cache/pip/wheels/21/dc/55/8647f928ab3e6390d35d3bb898acca851918560726ecdfc42a\n","  Building wheel for wavedrom (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for wavedrom: filename=wavedrom-2.0.3.post3-py2.py3-none-any.whl size=30052 sha256=5e23f717a88b262c1c146efc23901c318c40c92f742c31b0013dcf7e5aec07e6\n","  Stored in directory: /root/.cache/pip/wheels/9c/52/8c/38b454b42f712f325e26f633287484c7dc1ad469e1580c5954\n","Successfully built fire fschat wavedrom\n","Installing collected packages: pydub, nh3, addict, zstandard, xxhash, websockets, triton, svgwrite, smmap, shtab, shortuuid, setproctitle, sentry-sdk, semantic-version, python-multipart, python-dotenv, pynvml, pydantic-core, packaging, orjson, markdown2, latex2mathml, jmespath, humanfriendly, hf_transfer, h11, fire, ffmpy, docker-pycreds, dill, colorama, art, aioitertools, aiofiles, wavedrom, uvicorn, tiktoken, starlette, scikit-learn, responses, pydantic, multiprocess, httpcore, gitdb, coloredlogs, botocore, xformers, tyro, tokenizers, httpx, gitpython, fastapi, bitsandbytes, autoawq-kernels, wandb, transformers, gradio-client, fschat, aiobotocore, s3fs, peft, liger-kernel, gradio, datasets, trl, optimum, evaluate, autoawq, axolotl\n","  Attempting uninstall: pydantic-core\n","    Found existing installation: pydantic_core 2.23.4\n","    Uninstalling pydantic_core-2.23.4:\n","      Successfully uninstalled pydantic_core-2.23.4\n","  Attempting uninstall: packaging\n","    Found existing installation: packaging 24.1\n","    Uninstalling packaging-24.1:\n","      Successfully uninstalled packaging-24.1\n","  Attempting uninstall: scikit-learn\n","    Found existing installation: scikit-learn 1.5.2\n","    Uninstalling scikit-learn-1.5.2:\n","      Successfully uninstalled scikit-learn-1.5.2\n","  Attempting uninstall: pydantic\n","    Found existing installation: pydantic 2.9.2\n","    Uninstalling pydantic-2.9.2:\n","      Successfully uninstalled pydantic-2.9.2\n","  Attempting uninstall: tokenizers\n","    Found existing installation: tokenizers 0.19.1\n","    Uninstalling tokenizers-0.19.1:\n","      Successfully uninstalled tokenizers-0.19.1\n","  Attempting uninstall: transformers\n","    Found existing installation: transformers 4.44.2\n","    Uninstalling transformers-4.44.2:\n","      Successfully uninstalled transformers-4.44.2\n","  Running setup.py develop for axolotl\n","\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","albumentations 1.4.15 requires pydantic>=2.7.0, but you have pydantic 2.6.3 which is incompatible.\u001b[0m\u001b[31m\n","\u001b[0mSuccessfully installed addict-2.4.0 aiobotocore-2.15.2 aiofiles-23.2.1 aioitertools-0.12.0 art-6.3 autoawq-0.2.5 autoawq-kernels-0.0.8 axolotl-0.4.1 bitsandbytes-0.44.0 botocore-1.35.36 colorama-0.4.6 coloredlogs-15.0.1 datasets-2.21.0 dill-0.3.8 docker-pycreds-0.4.0 evaluate-0.4.1 fastapi-0.115.0 ffmpy-0.4.0 fire-0.7.0 fschat-0.2.36 gitdb-4.0.11 gitpython-3.1.43 gradio-3.50.2 gradio-client-0.6.1 h11-0.14.0 hf_transfer-0.1.8 httpcore-1.0.6 httpx-0.27.2 humanfriendly-10.0 jmespath-1.0.1 latex2mathml-3.77.0 liger-kernel-0.3.0 markdown2-2.5.0 multiprocess-0.70.16 nh3-0.2.18 optimum-1.16.2 orjson-3.10.7 packaging-23.2 peft-0.13.0 pydantic-2.6.3 pydantic-core-2.16.3 pydub-0.25.1 pynvml-11.5.3 python-dotenv-1.0.1 python-multipart-0.0.12 responses-0.18.0 s3fs-2024.6.1 scikit-learn-1.4.2 semantic-version-2.10.0 sentry-sdk-2.16.0 setproctitle-1.3.3 shortuuid-1.0.13 shtab-1.7.1 smmap-5.0.1 starlette-0.38.6 svgwrite-1.4.3 tiktoken-0.8.0 tokenizers-0.20.0 transformers-4.45.1 triton-3.0.0 trl-0.9.6 tyro-0.8.11 uvicorn-0.31.0 wandb-0.18.3 wavedrom-2.0.3.post3 websockets-11.0.3 xformers-0.0.28.post1 xxhash-3.5.0 zstandard-0.22.0\n","Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (23.2)\n","Requirement already satisfied: ninja in /usr/local/lib/python3.10/dist-packages (1.11.1.1)\n","Requirement already satisfied: flash-attn in /usr/local/lib/python3.10/dist-packages (2.6.3)\n","Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from flash-attn) (2.4.1+cu121)\n","Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from flash-attn) (0.8.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.16.1)\n","Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (4.12.2)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (1.13.3)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.3)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.1.4)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2024.6.1)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->flash-attn) (2.1.5)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->flash-attn) (1.3.0)\n","Requirement already satisfied: deepspeed in /usr/local/lib/python3.10/dist-packages (0.15.1)\n","Requirement already satisfied: hjson in /usr/local/lib/python3.10/dist-packages (from deepspeed) (3.1.0)\n","Requirement already satisfied: ninja in /usr/local/lib/python3.10/dist-packages (from deepspeed) (1.11.1.1)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from deepspeed) (1.26.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from deepspeed) (23.2)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from deepspeed) (5.9.5)\n","Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from deepspeed) (9.0.0)\n","Requirement already satisfied: pydantic>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from deepspeed) (2.6.3)\n","Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from deepspeed) (2.4.1+cu121)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from deepspeed) (4.66.5)\n","Requirement already satisfied: nvidia-ml-py in /usr/local/lib/python3.10/dist-packages (from deepspeed) (12.560.30)\n","Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0.0->deepspeed) (0.7.0)\n","Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0.0->deepspeed) (2.16.3)\n","Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0.0->deepspeed) (4.12.2)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.16.1)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (1.13.3)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.3)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.1.4)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (2024.6.1)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->deepspeed) (2.1.5)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->deepspeed) (1.3.0)\n"]}]},{"cell_type":"markdown","source":["###Hugging Face login"],"metadata":{"id":"6PplPlY5TeEY"}},{"cell_type":"code","source":["from huggingface_hub import notebook_login\n","notebook_login()"],"metadata":{"id":"vQHoE0ACTgpn","colab":{"base_uri":"https://localhost:8080/","height":113,"referenced_widgets":["c180822e7754499c9c44ad9d5a1c624c","60fd06e6a8844b40a002dfd3a3a70a3d","26df93c1b51a4d46b3b3e8496b3b77f4","c88e0a4ef2f24eb2a822b07a6d1eefb0","396a170dc75a4c8bb01eda31a6cba556","feab9d4fc44149a69c35712d0fd883c1","a8dc2d8300eb4073879beafd38582dec","d737fa3b102640ca917182f291be536d","8ab41d58d57c4864a1f93afecdb31789","82e0ed0936944830932886925b8bd44f","dafdd31227e646e48c804127b8f01bd6","658f892da6514bb9aa33ee471ca75909","8702d69b0b6142e5980565fc78681855","6a1dd8f39e364a4cae78f709c1d45560","fed784cd1836418eb950fdf1d4cf0a83","3c68b8d84bb047209527bcf98371db9a","aa6d9ff31c204d47ac15a1aca9026b77","2c77cd25bd4d409485c5e41bd09d6baa","f54176c91b7f48c7a44dd5164a03d061","73c5227e6943496e9b8c3ad2bc47004d","069557ca72044c228853408187cf33ac","c56e9e08fe36430798444127a86048d8","aa0847b6c46e41db91e3d0058f81d789","660fae67f5e3445fb75c2ff750746503","b56986bb4f19492d81f23bd2a7823d45","ce97b1f24bd14e5e927cb0d691584823","f428dc9030ec4acbbef406bcf65c99cc","db7d2c036cd64ea2acbb9197308b8199","09e4e8a163c44313a9978eb789ae053e"]},"executionInfo":{"status":"ok","timestamp":1728489410023,"user_tz":240,"elapsed":675,"user":{"displayName":"Sunny Liu","userId":"17044907459311958453"}},"outputId":"3457e602-dafe-43c4-d5ee-86fa893e0ec5"},"execution_count":3,"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c180822e7754499c9c44ad9d5a1c624c"}},"metadata":{}}]},{"cell_type":"markdown","source":["## Example configuration"],"metadata":{"id":"akdTq8AeTjki"}},{"cell_type":"code","source":["import yaml\n","\n","yaml_string = \"\"\"\n","base_model: NousResearch/Meta-Llama-3.1-8B\n","\n","load_in_8bit: true\n","load_in_4bit: false\n","strict: false\n","\n","datasets:\n","  - path: tatsu-lab/alpaca\n","    type: alpaca\n","dataset_prepared_path: last_run_prepared\n","val_set_size: 0.05\n","output_dir: ./outputs/lora-out\n","\n","sequence_len: 1024\n","sample_packing: true\n","pad_to_sequence_len: true\n","\n","adapter: lora\n","lora_model_dir:\n","lora_r: 32\n","lora_alpha: 16\n","lora_dropout: 0.05\n","lora_target_linear: true\n","lora_fan_in_fan_out:\n","lora_modules_to_save:\n","  - embed_tokens\n","  - lm_head\n","\n","wandb_project:\n","wandb_entity:\n","wandb_watch:\n","wandb_name:\n","wandb_log_model:\n","\n","gradient_accumulation_steps: 8\n","micro_batch_size: 1\n","num_epochs: 1\n","optimizer: paged_adamw_8bit\n","lr_scheduler: cosine\n","learning_rate: 2e-5\n","\n","train_on_inputs: false\n","group_by_length: false\n","bf16: auto\n","fp16:\n","tf32: false\n","\n","gradient_checkpointing: true\n","gradient_checkpointing_kwargs:\n","  use_reentrant: false\n","early_stopping_patience:\n","resume_from_checkpoint:\n","logging_steps: 1\n","xformers_attention:\n","flash_attention: false\n","sdp_attention: true\n","\n","warmup_steps: 1\n","evals_per_epoch: 2\n","eval_table_size:\n","saves_per_epoch: 1\n","debug:\n","deepspeed:\n","weight_decay: 0.0\n","fsdp:\n","fsdp_config:\n","special_tokens:\n","  pad_token: <|end_of_text|>\n","\"\"\"\n","\n","\n","# Convert the YAML string to a Python dictionary\n","yaml_dict = yaml.safe_load(yaml_string)\n","\n","# Specify your file path\n","file_path = 'ft-8b-lora_colab.yaml'\n","\n","# Write the YAML file\n","with open(file_path, 'w') as file:\n","    yaml.dump(yaml_dict, file)"],"metadata":{"id":"LUHiAzDZTlkq","executionInfo":{"status":"ok","timestamp":1728489694332,"user_tz":240,"elapsed":212,"user":{"displayName":"Sunny Liu","userId":"17044907459311958453"}}},"execution_count":7,"outputs":[]},{"cell_type":"markdown","source":["Above we have a configuration file with base LLM model and datasets specified, amiong many other things. Axolotl can atuomatically detect whether the specifed datasets are on Hugguing Face repo or local machine.\n","\n","The Axolotl configuration options encompass model and dataset selection, data pre-processing, and training. Let's go through them line by line:\n","\n","*   \"base model\": String value, specifies the underlying pre-trained LLM that will be used for finetuning\n","\n","Next we have options for model weights quantization. Quantization allows for reduction in occupied memory on GPUs.\n","\n","*   \"load_in_8bit\": Boolean value, whether to quantize the model weights into 8-bit integer.\n","\n","*   \"load_in_4bit\": Boolean value, whether to quantize the model weights into 4-bit integer.\n","\n","*   \"strict\": Boolean value. If false, it allows for overidding established configuration options in the yaml file when executing in command-line interface.\n","\n","*   \"datasets\": a list of dicts that contain path and type of data sets as well as other optional configurations where datasets are concerned. Supports multiple datasets.\n","\n","*   \"val_set_size\": Either a float value less than one or an integer less than the total size of dataset. Sets the size of validation set from the whole dataset. If float, sets the proportion of the dataset assigned for validation. If integer, sets the direct size of validation set.\n","\n","*   \"output_dir\": String value. Path of trained model.\n","\n","For data preprocessing:\n","\n","*   \"sequence_len\": Integer. Specifies the maximum sequence length of the input. Typically 2048 or less.\n","\n","*   \"pad_to_sequence_len\": Boolean. Padding input to maximum sequence length.\n","\n","*   \"sample_packing\": Boolean. Specifies whether to use multi-packing with block diagonal attention.\n","\n","*   \"special_tokens\": Python dict, optional. Allows users to specify the additional special tokens to be ignored by the tokenizer.\n","\n","For LoRA configuration and its hyperparamters:\n","\n","*   \"adapter\": String. Either \"lora\" or \"qlora\", depending on user's choice.\n","\n","*   \"lora_model_dir\": String, Optional. Path to directory that contains LoRA model, if there is already a trained LoRA model the user would like to use.\n","\n","*   \"lora_r\": Integer. Refers to the rank of LoRA decomposition matrices. Higher value will reduce LoRA efficiency. Recommended to be set to 8.\n","\n","*   \"lora_alpha\": Integer. Scale the weight matrices by $\\frac{\\text{lora_alpha}}{\\text{lora_r}}$Recommended to be fixed at 16.\n","\n","*   \"lora_dropout\": Float that is 1 or less. The dropout probability of a lora layer.\n","\n","*   \"lora_target_linear\": Boolean. If true, lora will target all linear modules in the transformers architecture.\n","\n","*   \"lora_modules_to_save\": If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.\n","\n","See [LoRA](https://arxiv.org/abs/2106.09685) for detailed explanation of LoRA implementation.\n","\n","For the training configurations:\n","\n","*   \"gradient_accumulation_steps\": Integer. The number of steps over which to accumulate gradient for batch training. E.g. if 2, backprop is performed every two steps.\n","\n","*   \"micro_batch_size\": Integer. Batch size per gpu / gradient_accumulation_steps\n","\n","*   \"num_epochs\": Integer. Number of epochs. One epoch is when training has looped over every batch in the whole data set once.\n","\n","*   \"optimizer\": The optimizer to use for the training.\n","\n","*   \"learning_rate\": The learning rate.\n","\n","*   \"lr_scheduler\": The learning rate scheduler to use for adjusting learning rate during training.\n","\n","*   \"train_on_inputs\": Boolean. Whether to ignore or include the user's prompt from the training labels.\n","\n","*   \"group_by_length\": Boolean. Whether to group similarly sized data to minimize padding.\n","\n","*   \"bf16\": Either \"auto\", \"true\", or \"false\". Whether to use CUDA bf16 floating point format. If set to \"auto\", will automatically apply bf16 should the gpu supports it.\n","\n","*   \"fp16\": Optional. Specifies whether to use CUDA fp16. Automatically set to true if \"bf16\" is set to true. Otherwise false.\n","\n","*   \"tf32\": Boolean. Whether to use CUDA tf32. Will override bf16.\n","\n","*   \"gradient_checkpointing\": Boolean. Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing\n","\n","*   \"gradient_checkpointing_kwargs\": Python Dict. Fed into the trainer.\n","\n","*   \"logging_steps\": Integer. Log training information over every specified number of steps.\n","\n","*   \"flash_attention\": Boolean. Whether to use the [flash attention](https://github.com/Dao-AILab/flash-attention) mechanism.\n","\n","*   \"sdp_attention\": Boolean. Whether to use the Scaled Dot Product attention mechanism (the attention mechanism in the [original implementation](https://arxiv.org/abs/1706.03762) of transformers.)\n","\n","*   \"warmup_steps\": Integer. The number of pre-training steps where a very low learning rate is used.\n","\n","*   \"evals_per_epoch\": Integer. Number of evaluations to be performed within one training epoch.\n","\n","*   \"saves_per_epoch\": Integer. Number of times the model is saved in one training epoch.\n","\n","*   \"weight_decay\": Positive Float. Sets the \"strength\" of weight decay (i.e. setting the coeeficient of L2 regularization)\n"],"metadata":{"id":"Y-DvAayoTsxq"}},{"cell_type":"markdown","source":["The above is but a snippet aiming to get users familiarized with the types of streamlined configuration options axolotl provides. For a full list of configuration options, see [here](https://github.com/axolotl-ai-cloud/axolotl/blob/main/docs/config.qmd)\n"],"metadata":{"id":"w-kC8Y_2Tyk1"}},{"cell_type":"markdown","source":["Train the model"],"metadata":{"id":"DfFqam6GT2dF"}},{"cell_type":"code","source":["!cd ./axolotl/\n","!accelerate launch -m axolotl.cli.train ./\"ft-8b-lora_colab.yaml\""],"metadata":{"id":"YNcoiy8mT5IR","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1728490112740,"user_tz":240,"elapsed":331515,"user":{"displayName":"Sunny Liu","userId":"17044907459311958453"}},"outputId":"f12698cd-dcdd-4942-813d-73de22fa07fb"},"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":["The following values were not passed to `accelerate launch` and had defaults used instead:\n","\t`--num_processes` was set to a value of `1`\n","\t`--num_machines` was set to a value of `1`\n","\t`--mixed_precision` was set to a value of `'no'`\n","\t`--dynamo_backend` was set to a value of `'no'`\n","To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n","2024-10-09 16:03:07.667507: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-10-09 16:03:07.699692: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-10-09 16:03:07.709483: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","2024-10-09 16:03:07.731437: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2024-10-09 16:03:08.838904: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","/content/src/axolotl/src/axolotl/utils/gradient_checkpointing/unsloth.py:29: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n","  def forward(ctx, forward_function, hidden_states, *args):\n","/content/src/axolotl/src/axolotl/utils/gradient_checkpointing/unsloth.py:40: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.\n","  def backward(ctx, dY):\n","[2024-10-09 16:03:11,661] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n","[2024-10-09 16:03:11,744] [INFO] [root.spawn:61] [PID:2725] x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -c /tmp/tmp1gp40d1_/test.c -o /tmp/tmp1gp40d1_/test.o\n","[2024-10-09 16:03:11,768] [INFO] [root.spawn:61] [PID:2725] x86_64-linux-gnu-gcc /tmp/tmp1gp40d1_/test.o -laio -o /tmp/tmp1gp40d1_/a.out\n","[2024-10-09 16:03:12,370] [INFO] [root.spawn:61] [PID:2725] x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -c /tmp/tmpriqmge5_/test.c -o /tmp/tmpriqmge5_/test.o\n","[2024-10-09 16:03:12,387] [INFO] [root.spawn:61] [PID:2725] x86_64-linux-gnu-gcc /tmp/tmpriqmge5_/test.o -L/usr/local/cuda -L/usr/local/cuda/lib64 -lcufile -o /tmp/tmpriqmge5_/a.out\n","[2024-10-09 16:03:12,433] [INFO] [root.spawn:61] [PID:2725] x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -c /tmp/tmpn_bwjo1h/test.c -o /tmp/tmpn_bwjo1h/test.o\n","[2024-10-09 16:03:12,449] [INFO] [root.spawn:61] [PID:2725] x86_64-linux-gnu-gcc /tmp/tmpn_bwjo1h/test.o -laio -o /tmp/tmpn_bwjo1h/a.out\n","/content/src/axolotl/src/axolotl/monkeypatch/relora.py:16: DeprecationWarning: `TorchScript` support for functional optimizers is deprecated and will be removed in a future PyTorch release. Consider using the `torch.compile` optimizer instead.\n","  from torch.distributed.optim import ZeroRedundancyOptimizer\n","/usr/local/lib/python3.10/dist-packages/pydantic/_internal/_fields.py:151: UserWarning: Field \"model_kwargs\" has conflict with protected namespace \"model_\".\n","\n","You may be able to resolve this warning by setting `model_config['protected_namespaces'] = ()`.\n","  warnings.warn(\n","\u001b[33m[2024-10-09 16:03:14,731] [WARNING] [axolotl.utils.config.models.input.check_sample_packing_w_sdpa_bf16:1327] [PID:2725] [RANK:0] sample_packing & torch sdpa with bf16 is unsupported may results in 0.0 loss. This may work on H100s.\u001b[39m\n","[2024-10-09 16:03:14,731] [INFO] [axolotl.utils.config.models.input.check_eval_packing:989] [PID:2725] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`\u001b[39m\n","[2024-10-09 16:03:14,733] [DEBUG] [axolotl.normalize_config:83] [PID:2725] [RANK:0] bf16 support detected, enabling for this configuration.\u001b[39m\n","config.json: 100% 826/826 [00:00<00:00, 6.21MB/s]\n","[2024-10-09 16:03:15,052] [INFO] [axolotl.normalize_config:207] [PID:2725] [RANK:0] GPU memory usage baseline: 0.000GB (+0.002GB cache, +0.352GB misc)\u001b[39m\n","                                 dP            dP   dP \n","                                 88            88   88 \n","      .d8888b. dP.  .dP .d8888b. 88 .d8888b. d8888P 88 \n","      88'  `88  `8bd8'  88'  `88 88 88'  `88   88   88 \n","      88.  .88  .d88b.  88.  .88 88 88.  .88   88   88 \n","      `88888P8 dP'  `dP `88888P' dP `88888P'   dP   dP \n","                                                       \n","                                                       \n","\n","****************************************\n","**** Axolotl Dependency Versions *****\n","  accelerate: 0.34.2         \n","        peft: 0.13.0         \n","transformers: 4.45.1         \n","         trl: 0.9.6          \n","       torch: 2.4.1+cu121    \n","bitsandbytes: 0.44.0         \n","****************************************\n","tokenizer_config.json: 100% 50.5k/50.5k [00:00<00:00, 791kB/s]\n","tokenizer.json: 100% 9.09M/9.09M [00:00<00:00, 22.2MB/s]\n","special_tokens_map.json: 100% 73.0/73.0 [00:00<00:00, 484kB/s]\n","[2024-10-09 16:03:17,166] [DEBUG] [axolotl.load_tokenizer:290] [PID:2725] [RANK:0] EOS: 128001 / <|end_of_text|>\u001b[39m\n","[2024-10-09 16:03:17,166] [DEBUG] [axolotl.load_tokenizer:291] [PID:2725] [RANK:0] BOS: 128000 / <|begin_of_text|>\u001b[39m\n","[2024-10-09 16:03:17,166] [DEBUG] [axolotl.load_tokenizer:292] [PID:2725] [RANK:0] PAD: 128001 / <|end_of_text|>\u001b[39m\n","[2024-10-09 16:03:17,166] [DEBUG] [axolotl.load_tokenizer:293] [PID:2725] [RANK:0] UNK: None / None\u001b[39m\n","[2024-10-09 16:03:17,166] [INFO] [axolotl.load_tokenizer:304] [PID:2725] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.\u001b[39m\n","[2024-10-09 16:03:17,167] [INFO] [axolotl.load_tokenized_prepared_datasets:208] [PID:2725] [RANK:0] Unable to find prepared dataset in last_run_prepared/f66a86a5bf935a173bd7f6fa02531d7f\u001b[39m\n","[2024-10-09 16:03:17,167] [INFO] [axolotl.load_tokenized_prepared_datasets:209] [PID:2725] [RANK:0] Loading raw datasets...\u001b[39m\n","\u001b[33m[2024-10-09 16:03:17,167] [WARNING] [axolotl.load_tokenized_prepared_datasets:211] [PID:2725] [RANK:0] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset.\u001b[39m\n","[2024-10-09 16:03:17,167] [INFO] [axolotl.load_tokenized_prepared_datasets:218] [PID:2725] [RANK:0] No seed provided, using default seed of 42\u001b[39m\n","Downloading readme: 100% 7.47k/7.47k [00:00<00:00, 81.7kB/s]\n","Downloading data: 100% 24.2M/24.2M [00:00<00:00, 41.2MB/s]\n","Generating train split: 100% 52002/52002 [00:00<00:00, 207938.59 examples/s]\n","[2024-10-09 16:03:21,053] [INFO] [axolotl.get_dataset_wrapper:582] [PID:2725] [RANK:0] Loading dataset with base_type: alpaca and prompt_style: None\u001b[39m\n","Tokenizing Prompts (num_proc=2):   0% 171/52002 [00:01<06:44, 128.24 examples/s]\u001b[33m[2024-10-09 16:03:23,068] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):   4% 2087/52002 [00:02<00:28, 1741.86 examples/s]\u001b[33m[2024-10-09 16:03:24,037] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):   4% 2321/52002 [00:02<00:30, 1647.59 examples/s]\u001b[33m[2024-10-09 16:03:24,150] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):   6% 2979/52002 [00:03<00:25, 1929.46 examples/s]\u001b[33m[2024-10-09 16:03:24,509] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):   9% 4757/52002 [00:04<00:24, 1953.51 examples/s]\u001b[33m[2024-10-09 16:03:25,382] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  18% 9536/52002 [00:06<00:20, 2101.70 examples/s]\u001b[33m[2024-10-09 16:03:27,749] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  22% 11575/52002 [00:07<00:20, 2012.95 examples/s]\u001b[33m[2024-10-09 16:03:28,807] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  25% 13149/52002 [00:08<00:19, 2028.71 examples/s]\u001b[33m[2024-10-09 16:03:29,660] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  28% 14791/52002 [00:09<00:33, 1109.23 examples/s]\u001b[33m[2024-10-09 16:03:31,136] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  33% 17408/52002 [00:12<00:38, 903.50 examples/s] \u001b[33m[2024-10-09 16:03:33,595] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  39% 20155/52002 [00:14<00:31, 1018.75 examples/s]\u001b[33m[2024-10-09 16:03:36,293] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  42% 21807/52002 [00:16<00:20, 1500.57 examples/s]\u001b[33m[2024-10-09 16:03:37,772] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","\u001b[33m[2024-10-09 16:03:37,851] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  42% 22026/52002 [00:16<00:18, 1650.76 examples/s]\u001b[33m[2024-10-09 16:03:37,957] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  55% 28554/52002 [00:19<00:11, 1968.45 examples/s]\u001b[33m[2024-10-09 16:03:41,199] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  56% 29223/52002 [00:20<00:11, 1977.64 examples/s]\u001b[33m[2024-10-09 16:03:41,617] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  71% 37110/52002 [00:24<00:08, 1836.21 examples/s]\u001b[33m[2024-10-09 16:03:45,782] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  76% 39683/52002 [00:25<00:06, 1930.22 examples/s]\u001b[33m[2024-10-09 16:03:47,063] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  79% 41337/52002 [00:26<00:08, 1265.20 examples/s]\u001b[33m[2024-10-09 16:03:48,149] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  81% 42344/52002 [00:27<00:07, 1286.09 examples/s]\u001b[33m[2024-10-09 16:03:49,032] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  89% 46175/52002 [00:31<00:04, 1169.72 examples/s]\u001b[33m[2024-10-09 16:03:52,459] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  91% 47075/52002 [00:32<00:05, 962.14 examples/s]\u001b[33m[2024-10-09 16:03:53,449] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  91% 47183/52002 [00:32<00:05, 941.50 examples/s]\u001b[33m[2024-10-09 16:03:53,574] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  92% 47781/52002 [00:32<00:04, 988.78 examples/s]\u001b[33m[2024-10-09 16:03:54,193] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  93% 48182/52002 [00:33<00:03, 1139.65 examples/s]\u001b[33m[2024-10-09 16:03:54,441] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  93% 48322/52002 [00:33<00:03, 1121.60 examples/s]\u001b[33m[2024-10-09 16:03:54,602] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  95% 49323/52002 [00:33<00:01, 1674.12 examples/s]\u001b[33m[2024-10-09 16:03:55,115] [WARNING] [axolotl._tokenize:66] [PID:2847] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2):  99% 51388/52002 [00:34<00:00, 1753.94 examples/s]\u001b[33m[2024-10-09 16:03:56,164] [WARNING] [axolotl._tokenize:66] [PID:2848] [RANK:0] Empty text requested for tokenization.\u001b[39m\n","Tokenizing Prompts (num_proc=2): 100% 52002/52002 [00:35<00:00, 1467.57 examples/s]\n","Dropping Long Sequences (num_proc=2): 100% 52002/52002 [00:06<00:00, 7626.02 examples/s]\n","Drop Samples with Zero Trainable Tokens (num_proc=2): 100% 52002/52002 [00:11<00:00, 4527.53 examples/s]\n","Add position_id column (Sample Packing) (num_proc=2): 100% 51974/51974 [00:12<00:00, 4023.83 examples/s]\n","[2024-10-09 16:04:30,105] [INFO] [axolotl.load_tokenized_prepared_datasets:461] [PID:2725] [RANK:0] Saving merged prepared dataset to disk... last_run_prepared/f66a86a5bf935a173bd7f6fa02531d7f\u001b[39m\n","Saving the dataset (1/1 shards): 100% 51974/51974 [00:00<00:00, 320074.99 examples/s]\n","[2024-10-09 16:04:30,314] [DEBUG] [axolotl.calculate_total_num_steps:316] [PID:2725] [RANK:0] total_num_tokens: 273_872\u001b[39m\n","[2024-10-09 16:04:30,337] [DEBUG] [axolotl.calculate_total_num_steps:333] [PID:2725] [RANK:0] `total_supervised_tokens: 150_069`\u001b[39m\n","[2024-10-09 16:04:36,180] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:195] [PID:2725] [RANK:0] gather_len_batches: [139]\u001b[39m\n","[2024-10-09 16:04:36,181] [DEBUG] [axolotl.calculate_total_num_steps:385] [PID:2725] [RANK:0] data_loader_len: 17\u001b[39m\n","[2024-10-09 16:04:36,181] [INFO] [axolotl.calc_sample_packing_eff_est:391] [PID:2725] [RANK:0] sample_packing_eff_est across ranks: [0.9620616007194245]\u001b[39m\n","[2024-10-09 16:04:36,181] [DEBUG] [axolotl.calculate_total_num_steps:403] [PID:2725] [RANK:0] sample_packing_eff_est: None\u001b[39m\n","[2024-10-09 16:04:36,181] [DEBUG] [axolotl.calculate_total_num_steps:411] [PID:2725] [RANK:0] total_num_steps: 17\u001b[39m\n","[2024-10-09 16:04:36,228] [DEBUG] [axolotl.calculate_total_num_steps:316] [PID:2725] [RANK:0] total_num_tokens: 5_124_625\u001b[39m\n","[2024-10-09 16:04:36,640] [DEBUG] [axolotl.calculate_total_num_steps:333] [PID:2725] [RANK:0] `total_supervised_tokens: 2_779_441`\u001b[39m\n","[2024-10-09 16:04:36,678] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:195] [PID:2725] [RANK:0] gather_len_batches: [2587]\u001b[39m\n","[2024-10-09 16:04:36,679] [DEBUG] [axolotl.calculate_total_num_steps:385] [PID:2725] [RANK:0] data_loader_len: 322\u001b[39m\n","[2024-10-09 16:04:36,679] [INFO] [axolotl.calc_sample_packing_eff_est:391] [PID:2725] [RANK:0] sample_packing_eff_est across ranks: [0.9672432550372052]\u001b[39m\n","[2024-10-09 16:04:36,679] [DEBUG] [axolotl.calculate_total_num_steps:403] [PID:2725] [RANK:0] sample_packing_eff_est: 0.97\u001b[39m\n","[2024-10-09 16:04:36,679] [DEBUG] [axolotl.calculate_total_num_steps:411] [PID:2725] [RANK:0] total_num_steps: 322\u001b[39m\n","[2024-10-09 16:04:36,679] [DEBUG] [axolotl.train.train:67] [PID:2725] [RANK:0] loading tokenizer... NousResearch/Meta-Llama-3.1-8B\u001b[39m\n","[2024-10-09 16:04:37,452] [DEBUG] [axolotl.load_tokenizer:290] [PID:2725] [RANK:0] EOS: 128001 / <|end_of_text|>\u001b[39m\n","[2024-10-09 16:04:37,452] [DEBUG] [axolotl.load_tokenizer:291] [PID:2725] [RANK:0] BOS: 128000 / <|begin_of_text|>\u001b[39m\n","[2024-10-09 16:04:37,452] [DEBUG] [axolotl.load_tokenizer:292] [PID:2725] [RANK:0] PAD: 128001 / <|end_of_text|>\u001b[39m\n","[2024-10-09 16:04:37,452] [DEBUG] [axolotl.load_tokenizer:293] [PID:2725] [RANK:0] UNK: None / None\u001b[39m\n","[2024-10-09 16:04:37,452] [INFO] [axolotl.load_tokenizer:304] [PID:2725] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.\u001b[39m\n","[2024-10-09 16:04:37,452] [DEBUG] [axolotl.train.train:99] [PID:2725] [RANK:0] loading model and peft_config...\u001b[39m\n","[2024-10-09 16:04:37,555] [INFO] [axolotl.load_model:469] [PID:2725] [RANK:0] patching llama _prepare_4d_causal_attention_mask*\u001b[39m\n","`low_cpu_mem_usage` was None, now set to True since model is quantized.\n","model.safetensors.index.json: 100% 23.9k/23.9k [00:00<00:00, 86.8MB/s]\n","Downloading shards:   0% 0/4 [00:00<?, ?it/s]\n","model-00001-of-00004.safetensors:   0% 0.00/4.98G [00:00<?, ?B/s]\u001b[A\n","model-00001-of-00004.safetensors:   0% 10.5M/4.98G [00:00<03:05, 26.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   0% 21.0M/4.98G [00:00<02:10, 38.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   1% 31.5M/4.98G [00:00<01:39, 49.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   1% 41.9M/4.98G [00:00<01:21, 60.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   1% 52.4M/4.98G [00:00<01:09, 70.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   1% 73.4M/4.98G [00:01<00:51, 95.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   2% 94.4M/4.98G [00:01<00:41, 118MB/s] \u001b[A\n","model-00001-of-00004.safetensors:   2% 115M/4.98G [00:01<00:36, 133MB/s] \u001b[A\n","model-00001-of-00004.safetensors:   3% 136M/4.98G [00:01<00:32, 148MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   3% 157M/4.98G [00:01<00:30, 160MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   4% 178M/4.98G [00:01<00:28, 168MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   4% 199M/4.98G [00:01<00:27, 173MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   4% 220M/4.98G [00:01<00:27, 174MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   5% 241M/4.98G [00:02<00:26, 179MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   5% 262M/4.98G [00:02<00:25, 183MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   6% 283M/4.98G [00:02<00:25, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   6% 304M/4.98G [00:02<00:25, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   7% 325M/4.98G [00:02<00:25, 180MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   7% 346M/4.98G [00:02<00:25, 183MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   7% 367M/4.98G [00:02<00:25, 178MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   8% 388M/4.98G [00:02<00:25, 182MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   8% 409M/4.98G [00:02<00:24, 188MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   9% 430M/4.98G [00:03<00:23, 193MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   9% 451M/4.98G [00:03<00:23, 189MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   9% 472M/4.98G [00:03<00:24, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  10% 493M/4.98G [00:03<00:24, 182MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  10% 514M/4.98G [00:03<00:24, 183MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  11% 535M/4.98G [00:03<00:24, 185MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  11% 556M/4.98G [00:03<00:23, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  12% 577M/4.98G [00:03<00:23, 185MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  12% 598M/4.98G [00:04<00:37, 118MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  12% 619M/4.98G [00:04<00:33, 129MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  13% 640M/4.98G [00:04<00:31, 137MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  13% 661M/4.98G [00:04<00:28, 151MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  14% 682M/4.98G [00:04<00:26, 164MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  14% 703M/4.98G [00:04<00:31, 134MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  15% 724M/4.98G [00:04<00:28, 147MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  15% 744M/4.98G [00:05<00:28, 150MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  15% 765M/4.98G [00:05<00:26, 160MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  16% 786M/4.98G [00:05<00:25, 166MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  16% 807M/4.98G [00:06<01:12, 57.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  17% 828M/4.98G [00:06<01:01, 67.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  17% 860M/4.98G [00:06<00:43, 93.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  18% 891M/4.98G [00:06<00:34, 119MB/s] \u001b[A\n","model-00001-of-00004.safetensors:  18% 912M/4.98G [00:06<00:31, 131MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  19% 933M/4.98G [00:06<00:28, 142MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  19% 954M/4.98G [00:07<00:26, 151MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  20% 975M/4.98G [00:07<00:25, 160MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  20% 996M/4.98G [00:07<00:23, 166MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  20% 1.02G/4.98G [00:07<00:23, 172MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  21% 1.04G/4.98G [00:07<00:22, 176MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  21% 1.06G/4.98G [00:07<00:22, 178MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  22% 1.08G/4.98G [00:07<00:21, 179MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  22% 1.10G/4.98G [00:07<00:21, 178MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  23% 1.12G/4.98G [00:09<01:20, 47.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  23% 1.14G/4.98G [00:09<01:03, 60.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  23% 1.16G/4.98G [00:09<00:50, 76.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  24% 1.18G/4.98G [00:09<00:41, 92.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  24% 1.22G/4.98G [00:09<00:31, 120MB/s] \u001b[A\n","model-00001-of-00004.safetensors:  25% 1.25G/4.98G [00:09<00:26, 142MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  25% 1.27G/4.98G [00:09<00:24, 151MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  26% 1.29G/4.98G [00:09<00:23, 160MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  26% 1.31G/4.98G [00:09<00:22, 167MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  27% 1.33G/4.98G [00:10<00:28, 130MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  27% 1.35G/4.98G [00:10<00:34, 104MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  28% 1.37G/4.98G [00:10<00:43, 83.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  28% 1.39G/4.98G [00:11<00:45, 79.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  28% 1.41G/4.98G [00:11<00:46, 76.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  29% 1.43G/4.98G [00:11<00:45, 78.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  29% 1.44G/4.98G [00:11<00:46, 75.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  29% 1.45G/4.98G [00:11<00:47, 74.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  29% 1.46G/4.98G [00:12<00:51, 67.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  29% 1.47G/4.98G [00:12<00:52, 66.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  30% 1.48G/4.98G [00:12<00:54, 64.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  30% 1.50G/4.98G [00:12<00:43, 80.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  31% 1.52G/4.98G [00:12<00:34, 101MB/s] \u001b[A\n","model-00001-of-00004.safetensors:  31% 1.54G/4.98G [00:12<00:28, 120MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  31% 1.56G/4.98G [00:12<00:25, 136MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  32% 1.58G/4.98G [00:13<00:22, 149MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  32% 1.60G/4.98G [00:13<00:21, 159MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  33% 1.63G/4.98G [00:13<00:20, 166MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  33% 1.65G/4.98G [00:13<00:19, 173MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  34% 1.67G/4.98G [00:13<00:18, 176MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  34% 1.69G/4.98G [00:13<00:18, 180MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  34% 1.71G/4.98G [00:13<00:17, 182MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  35% 1.73G/4.98G [00:13<00:17, 183MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  35% 1.75G/4.98G [00:14<00:17, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  36% 1.77G/4.98G [00:14<00:17, 185MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  36% 1.79G/4.98G [00:14<00:17, 181MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  36% 1.81G/4.98G [00:14<00:16, 188MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  37% 1.84G/4.98G [00:14<00:16, 187MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  37% 1.86G/4.98G [00:14<00:16, 187MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  38% 1.88G/4.98G [00:14<00:16, 187MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  38% 1.90G/4.98G [00:14<00:16, 185MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  39% 1.92G/4.98G [00:14<00:17, 178MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  39% 1.94G/4.98G [00:15<00:16, 183MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  39% 1.96G/4.98G [00:15<00:17, 176MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  40% 1.98G/4.98G [00:15<00:16, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  40% 2.00G/4.98G [00:15<00:15, 189MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  41% 2.02G/4.98G [00:15<00:23, 124MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  41% 2.06G/4.98G [00:15<00:19, 152MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  42% 2.09G/4.98G [00:15<00:17, 168MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  42% 2.11G/4.98G [00:16<00:17, 167MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  43% 2.13G/4.98G [00:16<00:17, 166MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  43% 2.16G/4.98G [00:16<00:15, 180MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  44% 2.18G/4.98G [00:16<00:15, 182MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  44% 2.20G/4.98G [00:16<00:15, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  45% 2.22G/4.98G [00:16<00:14, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  45% 2.24G/4.98G [00:16<00:14, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  46% 2.26G/4.98G [00:16<00:14, 185MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  46% 2.29G/4.98G [00:17<00:14, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  46% 2.31G/4.98G [00:17<00:14, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  47% 2.33G/4.98G [00:17<00:14, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  47% 2.35G/4.98G [00:17<00:14, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  48% 2.37G/4.98G [00:17<00:14, 185MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  48% 2.39G/4.98G [00:17<00:13, 185MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  48% 2.41G/4.98G [00:17<00:13, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  49% 2.43G/4.98G [00:17<00:13, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  49% 2.45G/4.98G [00:17<00:14, 174MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  50% 2.49G/4.98G [00:18<00:13, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  50% 2.51G/4.98G [00:18<00:13, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  51% 2.53G/4.98G [00:18<00:13, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  51% 2.55G/4.98G [00:18<00:13, 187MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  52% 2.57G/4.98G [00:18<00:12, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  52% 2.59G/4.98G [00:18<00:13, 182MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  52% 2.61G/4.98G [00:18<00:12, 187MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  53% 2.63G/4.98G [00:18<00:12, 187MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  53% 2.65G/4.98G [00:19<00:12, 187MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  54% 2.67G/4.98G [00:19<00:12, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  54% 2.69G/4.98G [00:19<00:12, 185MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  55% 2.72G/4.98G [00:19<00:12, 185MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  55% 2.74G/4.98G [00:19<00:12, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  55% 2.76G/4.98G [00:19<00:11, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  56% 2.78G/4.98G [00:19<00:11, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  56% 2.80G/4.98G [00:19<00:11, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  57% 2.82G/4.98G [00:19<00:11, 185MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  57% 2.84G/4.98G [00:20<00:11, 187MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  58% 2.86G/4.98G [00:20<00:11, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  58% 2.88G/4.98G [00:20<00:11, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  58% 2.90G/4.98G [00:20<00:11, 186MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  59% 2.93G/4.98G [00:23<01:42, 20.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  59% 2.95G/4.98G [00:23<01:19, 25.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  60% 2.97G/4.98G [00:24<01:06, 30.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  60% 2.98G/4.98G [00:24<00:59, 33.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  60% 2.99G/4.98G [00:24<00:53, 37.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  60% 3.00G/4.98G [00:24<00:52, 38.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  60% 3.01G/4.98G [00:25<00:48, 40.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  61% 3.02G/4.98G [00:25<00:42, 45.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  61% 3.04G/4.98G [00:25<00:30, 63.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  62% 3.06G/4.98G [00:25<00:22, 83.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  62% 3.08G/4.98G [00:25<00:18, 103MB/s] \u001b[A\n","model-00001-of-00004.safetensors:  62% 3.10G/4.98G [00:25<00:20, 89.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  63% 3.14G/4.98G [00:26<00:14, 123MB/s] \u001b[A\n","model-00001-of-00004.safetensors:  63% 3.16G/4.98G [00:26<00:13, 134MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  64% 3.18G/4.98G [00:26<00:12, 146MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  64% 3.20G/4.98G [00:26<00:11, 156MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  65% 3.22G/4.98G [00:26<00:10, 164MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  65% 3.24G/4.98G [00:26<00:10, 170MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  66% 3.26G/4.98G [00:26<00:09, 174MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  66% 3.28G/4.98G [00:26<00:09, 177MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  66% 3.30G/4.98G [00:26<00:09, 180MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  67% 3.32G/4.98G [00:27<00:09, 182MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  67% 3.34G/4.98G [00:27<00:08, 183MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  68% 3.37G/4.98G [00:27<00:08, 184MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  68% 3.39G/4.98G [00:29<01:05, 24.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  68% 3.41G/4.98G [00:30<00:50, 31.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  69% 3.43G/4.98G [00:31<01:08, 22.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  69% 3.45G/4.98G [00:31<00:50, 30.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  70% 3.47G/4.98G [00:31<00:37, 40.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  70% 3.50G/4.98G [00:32<00:24, 59.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  71% 3.53G/4.98G [00:33<00:33, 43.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  71% 3.55G/4.98G [00:33<00:27, 51.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  72% 3.58G/4.98G [00:33<00:22, 63.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  72% 3.60G/4.98G [00:33<00:18, 75.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  73% 3.63G/4.98G [00:33<00:13, 100MB/s] \u001b[A\n","model-00001-of-00004.safetensors:  73% 3.65G/4.98G [00:33<00:11, 116MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  74% 3.67G/4.98G [00:34<00:19, 68.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  74% 3.69G/4.98G [00:34<00:15, 81.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  75% 3.72G/4.98G [00:34<00:11, 108MB/s] \u001b[A\n","model-00001-of-00004.safetensors:  75% 3.74G/4.98G [00:34<00:09, 123MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  76% 3.76G/4.98G [00:34<00:08, 138MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  76% 3.79G/4.98G [00:35<00:08, 143MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  76% 3.81G/4.98G [00:35<00:07, 152MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  77% 3.83G/4.98G [00:35<00:07, 160MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  77% 3.85G/4.98G [00:35<00:06, 164MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  78% 3.87G/4.98G [00:35<00:10, 106MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  78% 3.89G/4.98G [00:35<00:10, 107MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  79% 3.91G/4.98G [00:36<00:12, 84.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  79% 3.93G/4.98G [00:36<00:14, 73.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  79% 3.94G/4.98G [00:36<00:13, 75.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  79% 3.95G/4.98G [00:36<00:14, 72.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  80% 3.97G/4.98G [00:37<00:10, 92.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  80% 4.00G/4.98G [00:37<00:08, 112MB/s] \u001b[A\n","model-00001-of-00004.safetensors:  81% 4.02G/4.98G [00:37<00:07, 129MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  81% 4.04G/4.98G [00:37<00:06, 143MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  82% 4.06G/4.98G [00:37<00:05, 154MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  82% 4.08G/4.98G [00:37<00:05, 163MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  82% 4.10G/4.98G [00:37<00:05, 170MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  83% 4.12G/4.98G [00:37<00:04, 175MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  83% 4.14G/4.98G [00:37<00:04, 177MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  84% 4.16G/4.98G [00:41<00:50, 16.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  84% 4.19G/4.98G [00:42<00:30, 25.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  85% 4.22G/4.98G [00:42<00:22, 33.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  85% 4.24G/4.98G [00:42<00:17, 43.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  86% 4.26G/4.98G [00:42<00:13, 55.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  86% 4.28G/4.98G [00:42<00:10, 69.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  86% 4.30G/4.98G [00:42<00:07, 84.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  87% 4.32G/4.98G [00:42<00:06, 101MB/s] \u001b[A\n","model-00001-of-00004.safetensors:  87% 4.34G/4.98G [00:42<00:05, 117MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  88% 4.36G/4.98G [00:43<00:04, 132MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  88% 4.38G/4.98G [00:43<00:04, 144MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  88% 4.40G/4.98G [00:43<00:03, 153MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  89% 4.42G/4.98G [00:43<00:03, 163MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  89% 4.45G/4.98G [00:43<00:03, 169MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  90% 4.47G/4.98G [00:43<00:02, 171MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  90% 4.49G/4.98G [00:43<00:02, 177MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  91% 4.51G/4.98G [00:43<00:02, 180MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  91% 4.53G/4.98G [00:43<00:02, 182MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  91% 4.55G/4.98G [00:44<00:02, 183MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  92% 4.57G/4.98G [00:44<00:06, 60.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  92% 4.59G/4.98G [00:45<00:05, 76.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  93% 4.62G/4.98G [00:45<00:03, 102MB/s] \u001b[A\n","model-00001-of-00004.safetensors:  93% 4.65G/4.98G [00:45<00:02, 117MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  94% 4.67G/4.98G [00:45<00:02, 133MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  94% 4.69G/4.98G [00:45<00:01, 145MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  95% 4.71G/4.98G [00:45<00:01, 154MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  95% 4.73G/4.98G [00:45<00:01, 161MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  95% 4.75G/4.98G [00:45<00:01, 162MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  96% 4.77G/4.98G [00:46<00:01, 130MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  96% 4.80G/4.98G [00:46<00:01, 158MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  97% 4.82G/4.98G [00:46<00:00, 161MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  97% 4.84G/4.98G [00:46<00:00, 168MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  98% 4.87G/4.98G [00:46<00:00, 173MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  98% 4.89G/4.98G [00:46<00:00, 177MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  99% 4.91G/4.98G [00:46<00:00, 180MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  99% 4.93G/4.98G [00:46<00:00, 182MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  99% 4.95G/4.98G [00:47<00:00, 125MB/s]\u001b[A\n","model-00001-of-00004.safetensors: 100% 4.98G/4.98G [00:47<00:00, 104MB/s] \n","Downloading shards:  25% 1/4 [00:47<02:23, 47.86s/it]\n","model-00002-of-00004.safetensors:   0% 0.00/5.00G [00:00<?, ?B/s]\u001b[A\n","model-00002-of-00004.safetensors:   0% 10.5M/5.00G [00:00<02:12, 37.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   0% 21.0M/5.00G [00:00<01:48, 45.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   1% 31.5M/5.00G [00:00<01:29, 55.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   1% 41.9M/5.00G [00:00<01:14, 66.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   1% 62.9M/5.00G [00:00<00:56, 88.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   2% 83.9M/5.00G [00:01<00:45, 108MB/s] \u001b[A\n","model-00002-of-00004.safetensors:   2% 105M/5.00G [00:01<00:38, 128MB/s] \u001b[A\n","model-00002-of-00004.safetensors:   3% 126M/5.00G [00:01<00:34, 143MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   3% 147M/5.00G [00:01<00:31, 155MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   3% 168M/5.00G [00:01<00:29, 164MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   4% 189M/5.00G [00:01<00:28, 169MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   4% 210M/5.00G [00:01<00:27, 174MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   5% 231M/5.00G [00:01<00:27, 177MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   5% 252M/5.00G [00:01<00:26, 180MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   5% 273M/5.00G [00:02<00:26, 181MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   6% 294M/5.00G [00:02<00:25, 182MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   6% 315M/5.00G [00:02<00:25, 183MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   7% 336M/5.00G [00:02<00:25, 182MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   7% 357M/5.00G [00:02<00:24, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   8% 377M/5.00G [00:02<00:24, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   8% 398M/5.00G [00:02<00:24, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   8% 419M/5.00G [00:02<00:24, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   9% 440M/5.00G [00:02<00:25, 182MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   9% 461M/5.00G [00:03<00:24, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  10% 482M/5.00G [00:03<00:24, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  10% 503M/5.00G [00:03<00:24, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  10% 524M/5.00G [00:03<00:24, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  11% 545M/5.00G [00:03<00:23, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  11% 566M/5.00G [00:03<00:23, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  12% 587M/5.00G [00:03<00:23, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  12% 608M/5.00G [00:03<00:23, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  13% 629M/5.00G [00:03<00:23, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  13% 650M/5.00G [00:04<00:24, 175MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  13% 671M/5.00G [00:04<00:24, 175MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  14% 692M/5.00G [00:04<00:24, 175MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  14% 713M/5.00G [00:04<00:24, 178MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  15% 734M/5.00G [00:06<02:17, 31.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  15% 755M/5.00G [00:08<03:57, 17.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  16% 786M/5.00G [00:08<02:31, 27.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  16% 807M/5.00G [00:09<01:55, 36.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  17% 828M/5.00G [00:09<01:29, 46.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  17% 849M/5.00G [00:09<01:10, 58.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  17% 870M/5.00G [00:09<00:56, 73.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  18% 891M/5.00G [00:09<00:45, 90.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  18% 912M/5.00G [00:09<00:39, 103MB/s] \u001b[A\n","model-00002-of-00004.safetensors:  19% 933M/5.00G [00:09<00:33, 121MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  19% 954M/5.00G [00:09<00:30, 134MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  20% 975M/5.00G [00:09<00:27, 147MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  20% 996M/5.00G [00:10<00:33, 121MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  20% 1.02G/5.00G [00:10<00:35, 112MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  21% 1.04G/5.00G [00:10<00:39, 101MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  21% 1.06G/5.00G [00:10<00:38, 103MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  22% 1.08G/5.00G [00:11<00:38, 103MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  22% 1.10G/5.00G [00:11<00:43, 90.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  22% 1.11G/5.00G [00:11<00:49, 78.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  22% 1.12G/5.00G [00:11<00:50, 76.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  23% 1.13G/5.00G [00:12<01:14, 52.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  23% 1.16G/5.00G [00:12<00:45, 84.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  24% 1.18G/5.00G [00:12<00:37, 103MB/s] \u001b[A\n","model-00002-of-00004.safetensors:  24% 1.21G/5.00G [00:12<00:32, 116MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  25% 1.23G/5.00G [00:12<00:28, 131MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  25% 1.25G/5.00G [00:12<00:25, 144MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  25% 1.27G/5.00G [00:12<00:24, 155MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  26% 1.29G/5.00G [00:12<00:22, 163MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  26% 1.31G/5.00G [00:13<00:21, 170MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  27% 1.33G/5.00G [00:13<00:21, 173MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  27% 1.35G/5.00G [00:13<00:20, 177MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  27% 1.37G/5.00G [00:13<00:20, 174MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  28% 1.39G/5.00G [00:13<00:19, 181MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  28% 1.42G/5.00G [00:13<00:19, 182MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  29% 1.44G/5.00G [00:13<00:19, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  29% 1.46G/5.00G [00:13<00:19, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  30% 1.48G/5.00G [00:14<00:19, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  30% 1.50G/5.00G [00:14<00:19, 180MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  30% 1.52G/5.00G [00:14<00:18, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  31% 1.54G/5.00G [00:14<00:19, 181MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  31% 1.56G/5.00G [00:14<00:18, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  32% 1.58G/5.00G [00:14<00:18, 189MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  32% 1.60G/5.00G [00:14<00:17, 189MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  33% 1.63G/5.00G [00:14<00:17, 188MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  33% 1.65G/5.00G [00:14<00:17, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  33% 1.67G/5.00G [00:15<00:17, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  34% 1.69G/5.00G [00:15<00:19, 174MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  34% 1.71G/5.00G [00:15<00:18, 178MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  35% 1.74G/5.00G [00:15<00:17, 190MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  35% 1.76G/5.00G [00:15<00:17, 189MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  36% 1.78G/5.00G [00:15<00:17, 188MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  36% 1.80G/5.00G [00:15<00:17, 188MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  36% 1.82G/5.00G [00:15<00:16, 188MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  37% 1.85G/5.00G [00:15<00:16, 188MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  37% 1.87G/5.00G [00:16<00:16, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  38% 1.89G/5.00G [00:16<00:16, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  38% 1.91G/5.00G [00:16<00:16, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  39% 1.93G/5.00G [00:16<00:17, 179MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  39% 1.95G/5.00G [00:16<00:17, 169MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  39% 1.97G/5.00G [00:16<00:18, 167MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  40% 1.99G/5.00G [00:16<00:17, 172MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  40% 2.01G/5.00G [00:16<00:16, 178MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  41% 2.03G/5.00G [00:17<00:22, 135MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  41% 2.06G/5.00G [00:17<00:19, 148MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  42% 2.08G/5.00G [00:17<00:18, 161MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  42% 2.10G/5.00G [00:17<00:17, 168MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  42% 2.12G/5.00G [00:17<00:16, 178MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  43% 2.14G/5.00G [00:17<00:15, 179MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  43% 2.16G/5.00G [00:17<00:15, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  44% 2.18G/5.00G [00:17<00:15, 182MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  44% 2.20G/5.00G [00:18<00:15, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  44% 2.22G/5.00G [00:18<00:15, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  45% 2.24G/5.00G [00:18<00:15, 183MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  45% 2.26G/5.00G [00:18<00:14, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  46% 2.29G/5.00G [00:18<00:14, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  46% 2.31G/5.00G [00:18<00:14, 181MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  47% 2.33G/5.00G [00:18<00:14, 181MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  47% 2.36G/5.00G [00:18<00:13, 189MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  48% 2.38G/5.00G [00:19<00:13, 188MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  48% 2.40G/5.00G [00:19<00:13, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  48% 2.42G/5.00G [00:19<00:13, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  49% 2.44G/5.00G [00:19<00:13, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  49% 2.46G/5.00G [00:19<00:13, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  50% 2.49G/5.00G [00:19<00:20, 123MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  50% 2.52G/5.00G [00:19<00:16, 151MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  51% 2.54G/5.00G [00:20<00:15, 159MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  51% 2.56G/5.00G [00:20<00:14, 165MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  52% 2.58G/5.00G [00:20<00:14, 173MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  52% 2.60G/5.00G [00:20<00:13, 177MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  52% 2.62G/5.00G [00:20<00:13, 179MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  53% 2.64G/5.00G [00:20<00:12, 182MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  53% 2.66G/5.00G [00:20<00:12, 183MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  54% 2.68G/5.00G [00:20<00:12, 182MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  54% 2.71G/5.00G [00:20<00:12, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  55% 2.73G/5.00G [00:21<00:12, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  55% 2.75G/5.00G [00:21<00:12, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  55% 2.77G/5.00G [00:21<00:32, 69.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  56% 2.79G/5.00G [00:22<00:34, 64.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  56% 2.81G/5.00G [00:22<00:28, 76.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  57% 2.83G/5.00G [00:22<00:25, 85.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  57% 2.85G/5.00G [00:22<00:23, 89.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  57% 2.87G/5.00G [00:23<00:24, 87.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  58% 2.88G/5.00G [00:23<00:25, 83.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  58% 2.90G/5.00G [00:23<00:20, 101MB/s] \u001b[A\n","model-00002-of-00004.safetensors:  59% 2.93G/5.00G [00:23<00:25, 81.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  59% 2.94G/5.00G [00:23<00:24, 83.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  59% 2.95G/5.00G [00:23<00:27, 75.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  59% 2.96G/5.00G [00:24<00:28, 70.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  59% 2.97G/5.00G [00:24<00:28, 72.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  60% 2.98G/5.00G [00:24<00:29, 69.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  60% 2.99G/5.00G [00:24<00:29, 67.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  60% 3.00G/5.00G [00:24<00:30, 65.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  60% 3.01G/5.00G [00:24<00:31, 63.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  60% 3.02G/5.00G [00:25<00:29, 66.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  61% 3.03G/5.00G [00:25<00:30, 63.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  61% 3.04G/5.00G [00:25<00:31, 62.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  61% 3.05G/5.00G [00:25<00:29, 65.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  61% 3.07G/5.00G [00:25<00:20, 91.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  62% 3.09G/5.00G [00:25<00:16, 114MB/s] \u001b[A\n","model-00002-of-00004.safetensors:  62% 3.11G/5.00G [00:25<00:14, 132MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  63% 3.14G/5.00G [00:26<00:13, 143MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  63% 3.16G/5.00G [00:26<00:11, 158MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  64% 3.18G/5.00G [00:26<00:10, 168MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  64% 3.20G/5.00G [00:26<00:10, 173MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  64% 3.22G/5.00G [00:26<00:10, 176MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  65% 3.24G/5.00G [00:26<00:09, 180MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  65% 3.26G/5.00G [00:26<00:09, 182MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  66% 3.28G/5.00G [00:26<00:09, 183MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  66% 3.30G/5.00G [00:26<00:09, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  66% 3.32G/5.00G [00:27<00:09, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  67% 3.34G/5.00G [00:27<00:09, 181MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  67% 3.37G/5.00G [00:27<00:08, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  68% 3.39G/5.00G [00:27<00:08, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  68% 3.41G/5.00G [00:27<00:08, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  69% 3.43G/5.00G [00:27<00:08, 183MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  69% 3.45G/5.00G [00:27<00:08, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  69% 3.47G/5.00G [00:27<00:08, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  70% 3.49G/5.00G [00:28<00:08, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  70% 3.51G/5.00G [00:28<00:07, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  71% 3.53G/5.00G [00:28<00:07, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  71% 3.55G/5.00G [00:28<00:07, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  72% 3.58G/5.00G [00:28<00:07, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  72% 3.60G/5.00G [00:28<00:07, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  72% 3.62G/5.00G [00:28<00:07, 181MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  73% 3.64G/5.00G [00:28<00:07, 188MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  73% 3.66G/5.00G [00:28<00:07, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  74% 3.68G/5.00G [00:29<00:07, 187MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  74% 3.70G/5.00G [00:29<00:06, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  74% 3.72G/5.00G [00:29<00:06, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  75% 3.74G/5.00G [00:29<00:06, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  75% 3.76G/5.00G [00:29<00:06, 182MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  76% 3.79G/5.00G [00:29<00:06, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  76% 3.81G/5.00G [00:29<00:06, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  77% 3.83G/5.00G [00:30<00:10, 117MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  77% 3.85G/5.00G [00:30<00:15, 75.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  78% 3.88G/5.00G [00:30<00:10, 102MB/s] \u001b[A\n","model-00002-of-00004.safetensors:  78% 3.90G/5.00G [00:30<00:09, 113MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  78% 3.92G/5.00G [00:30<00:09, 118MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  79% 3.94G/5.00G [00:31<00:08, 124MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  79% 3.96G/5.00G [00:31<00:08, 124MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  80% 3.98G/5.00G [00:31<00:08, 126MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  80% 4.01G/5.00G [00:31<00:07, 139MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  81% 4.04G/5.00G [00:31<00:06, 160MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  81% 4.07G/5.00G [00:31<00:05, 176MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  82% 4.09G/5.00G [00:31<00:04, 183MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  82% 4.11G/5.00G [00:32<00:04, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  83% 4.13G/5.00G [00:32<00:04, 183MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  83% 4.15G/5.00G [00:32<00:04, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  83% 4.17G/5.00G [00:32<00:04, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  84% 4.19G/5.00G [00:32<00:04, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  84% 4.22G/5.00G [00:32<00:04, 183MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  85% 4.24G/5.00G [00:32<00:04, 183MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  85% 4.26G/5.00G [00:32<00:04, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  86% 4.28G/5.00G [00:32<00:03, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  86% 4.30G/5.00G [00:33<00:03, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  86% 4.32G/5.00G [00:33<00:03, 182MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  87% 4.34G/5.00G [00:33<00:03, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  87% 4.36G/5.00G [00:33<00:03, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  88% 4.38G/5.00G [00:33<00:03, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  88% 4.40G/5.00G [00:33<00:03, 181MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  89% 4.42G/5.00G [00:33<00:03, 183MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  89% 4.45G/5.00G [00:33<00:03, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  89% 4.47G/5.00G [00:34<00:02, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  90% 4.49G/5.00G [00:34<00:02, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  90% 4.51G/5.00G [00:34<00:02, 184MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  91% 4.53G/5.00G [00:34<00:02, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  91% 4.55G/5.00G [00:34<00:02, 185MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  91% 4.57G/5.00G [00:34<00:02, 186MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  92% 4.59G/5.00G [00:34<00:02, 175MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  92% 4.61G/5.00G [00:34<00:02, 177MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  93% 4.65G/5.00G [00:34<00:01, 190MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  93% 4.67G/5.00G [00:35<00:01, 189MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  94% 4.69G/5.00G [00:39<00:18, 16.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  94% 4.72G/5.00G [00:39<00:10, 25.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  95% 4.74G/5.00G [00:39<00:07, 33.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  95% 4.76G/5.00G [00:39<00:05, 43.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  96% 4.78G/5.00G [00:39<00:03, 55.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  96% 4.80G/5.00G [00:39<00:02, 68.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  96% 4.82G/5.00G [00:39<00:02, 84.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  97% 4.84G/5.00G [00:40<00:01, 101MB/s] \u001b[A\n","model-00002-of-00004.safetensors:  97% 4.87G/5.00G [00:40<00:01, 117MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  98% 4.89G/5.00G [00:40<00:00, 130MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  98% 4.91G/5.00G [00:40<00:00, 144MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  99% 4.93G/5.00G [00:40<00:00, 155MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  99% 4.95G/5.00G [00:40<00:00, 161MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  99% 4.97G/5.00G [00:40<00:00, 169MB/s]\u001b[A\n","model-00002-of-00004.safetensors: 100% 5.00G/5.00G [00:40<00:00, 122MB/s]\n","Downloading shards:  50% 2/4 [01:28<01:27, 43.86s/it]\n","model-00003-of-00004.safetensors:   0% 0.00/4.92G [00:00<?, ?B/s]\u001b[A\n","model-00003-of-00004.safetensors:   0% 10.5M/4.92G [00:00<02:39, 30.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   0% 21.0M/4.92G [00:00<02:05, 39.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   1% 31.5M/4.92G [00:00<01:38, 49.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   1% 41.9M/4.92G [00:00<01:20, 60.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   1% 52.4M/4.92G [00:00<01:09, 70.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   1% 73.4M/4.92G [00:01<00:53, 91.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   2% 94.4M/4.92G [00:01<00:42, 115MB/s] \u001b[A\n","model-00003-of-00004.safetensors:   2% 115M/4.92G [00:01<00:37, 128MB/s] \u001b[A\n","model-00003-of-00004.safetensors:   3% 136M/4.92G [00:01<00:34, 139MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   3% 157M/4.92G [00:01<00:32, 147MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   4% 189M/4.92G [00:01<00:28, 168MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   4% 210M/4.92G [00:01<00:26, 177MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   5% 231M/4.92G [00:01<00:25, 182MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   5% 252M/4.92G [00:02<00:26, 178MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   6% 273M/4.92G [00:02<00:25, 185MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   6% 294M/4.92G [00:02<00:25, 185MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   6% 315M/4.92G [00:02<00:25, 178MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   7% 336M/4.92G [00:02<00:25, 177MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   7% 357M/4.92G [00:02<00:25, 180MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   8% 377M/4.92G [00:02<00:24, 182MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   8% 398M/4.92G [00:02<00:23, 189MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   9% 419M/4.92G [00:02<00:23, 191MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   9% 440M/4.92G [00:03<00:23, 190MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   9% 461M/4.92G [00:03<00:23, 188MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  10% 482M/4.92G [00:03<00:23, 187MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  10% 503M/4.92G [00:03<00:23, 186MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  11% 524M/4.92G [00:03<00:23, 185MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  11% 545M/4.92G [00:03<00:24, 179MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  12% 566M/4.92G [00:03<00:25, 173MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  12% 587M/4.92G [00:03<00:25, 173MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  12% 608M/4.92G [00:04<00:23, 181MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  13% 629M/4.92G [00:04<00:23, 180MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  13% 650M/4.92G [00:06<02:34, 27.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  14% 682M/4.92G [00:06<01:40, 42.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  14% 703M/4.92G [00:06<01:18, 53.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  15% 724M/4.92G [00:06<01:02, 66.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  15% 744M/4.92G [00:06<00:51, 81.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  16% 765M/4.92G [00:06<00:42, 97.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  16% 786M/4.92G [00:07<00:36, 113MB/s] \u001b[A\n","model-00003-of-00004.safetensors:  16% 807M/4.92G [00:07<00:32, 128MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  17% 828M/4.92G [00:07<00:29, 141MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  17% 849M/4.92G [00:07<00:26, 151MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  18% 870M/4.92G [00:07<00:26, 155MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  18% 891M/4.92G [00:07<00:24, 162MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  19% 912M/4.92G [00:07<00:23, 173MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  19% 933M/4.92G [00:07<00:23, 173MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  19% 954M/4.92G [00:08<00:22, 180MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  20% 975M/4.92G [00:08<00:21, 182MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  20% 996M/4.92G [00:08<00:21, 182MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  21% 1.02G/4.92G [00:08<00:21, 183MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  21% 1.04G/4.92G [00:08<00:21, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  22% 1.06G/4.92G [00:08<00:21, 183MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  22% 1.08G/4.92G [00:08<00:21, 181MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  22% 1.10G/4.92G [00:08<00:21, 180MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  23% 1.12G/4.92G [00:08<00:20, 188MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  23% 1.14G/4.92G [00:09<00:26, 141MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  24% 1.16G/4.92G [00:09<00:29, 125MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  24% 1.18G/4.92G [00:09<00:30, 122MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  25% 1.21G/4.92G [00:12<03:11, 19.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  25% 1.24G/4.92G [00:12<02:02, 30.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  26% 1.26G/4.92G [00:13<01:34, 38.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  26% 1.28G/4.92G [00:13<01:13, 49.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  26% 1.30G/4.92G [00:13<00:57, 63.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  27% 1.32G/4.92G [00:13<00:47, 75.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  27% 1.34G/4.92G [00:13<00:39, 91.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  28% 1.37G/4.92G [00:13<00:30, 118MB/s] \u001b[A\n","model-00003-of-00004.safetensors:  28% 1.39G/4.92G [00:13<00:26, 131MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  29% 1.42G/4.92G [00:13<00:24, 142MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  29% 1.44G/4.92G [00:13<00:22, 153MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  30% 1.46G/4.92G [00:14<00:21, 161MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  30% 1.48G/4.92G [00:14<00:20, 168MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  31% 1.50G/4.92G [00:14<00:19, 172MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  31% 1.52G/4.92G [00:14<00:19, 176MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  31% 1.54G/4.92G [00:14<00:19, 169MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  32% 1.56G/4.92G [00:14<00:19, 169MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  32% 1.58G/4.92G [00:14<00:19, 167MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  33% 1.60G/4.92G [00:14<00:19, 172MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  33% 1.63G/4.92G [00:15<00:18, 180MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  33% 1.65G/4.92G [00:16<01:06, 49.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  34% 1.67G/4.92G [00:16<00:51, 63.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  35% 1.70G/4.92G [00:16<00:36, 88.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  35% 1.72G/4.92G [00:16<00:30, 104MB/s] \u001b[A\n","model-00003-of-00004.safetensors:  35% 1.74G/4.92G [00:16<00:34, 91.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  36% 1.76G/4.92G [00:17<00:38, 81.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  36% 1.79G/4.92G [00:17<00:28, 108MB/s] \u001b[A\n","model-00003-of-00004.safetensors:  37% 1.82G/4.92G [00:17<00:23, 133MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  38% 1.85G/4.92G [00:17<00:21, 143MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  38% 1.87G/4.92G [00:17<00:20, 150MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  38% 1.89G/4.92G [00:17<00:18, 161MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  39% 1.91G/4.92G [00:17<00:17, 167MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  39% 1.93G/4.92G [00:18<00:17, 172MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  40% 1.95G/4.92G [00:18<00:16, 175MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  40% 1.97G/4.92G [00:18<00:16, 178MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  41% 1.99G/4.92G [00:18<00:16, 174MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  41% 2.01G/4.92G [00:18<00:16, 180MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  41% 2.03G/4.92G [00:18<00:15, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  42% 2.06G/4.92G [00:18<00:15, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  42% 2.08G/4.92G [00:18<00:15, 185MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  43% 2.10G/4.92G [00:18<00:15, 185MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  43% 2.12G/4.92G [00:19<00:15, 185MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  44% 2.14G/4.92G [00:19<00:15, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  44% 2.16G/4.92G [00:19<00:15, 183MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  44% 2.18G/4.92G [00:19<00:14, 185MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  45% 2.20G/4.92G [00:19<00:14, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  45% 2.22G/4.92G [00:19<00:14, 186MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  46% 2.24G/4.92G [00:19<00:14, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  46% 2.26G/4.92G [00:19<00:14, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  46% 2.29G/4.92G [00:19<00:14, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  47% 2.31G/4.92G [00:20<00:14, 182MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  47% 2.33G/4.92G [00:20<00:14, 182MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  48% 2.35G/4.92G [00:20<00:13, 186MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  48% 2.37G/4.92G [00:20<00:13, 186MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  49% 2.39G/4.92G [00:20<00:13, 186MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  49% 2.41G/4.92G [00:22<01:37, 25.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  49% 2.43G/4.92G [00:23<01:11, 34.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  50% 2.45G/4.92G [00:23<00:58, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  50% 2.47G/4.92G [00:23<00:51, 47.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  51% 2.50G/4.92G [00:23<00:46, 52.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  51% 2.51G/4.92G [00:24<00:44, 53.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  51% 2.52G/4.92G [00:24<00:42, 56.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  51% 2.53G/4.92G [00:24<00:38, 61.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  52% 2.54G/4.92G [00:24<00:35, 66.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  52% 2.55G/4.92G [00:24<00:34, 69.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  52% 2.57G/4.92G [00:24<00:24, 95.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  53% 2.59G/4.92G [00:24<00:19, 117MB/s] \u001b[A\n","model-00003-of-00004.safetensors:  53% 2.61G/4.92G [00:24<00:17, 134MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  54% 2.63G/4.92G [00:25<00:15, 147MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  54% 2.65G/4.92G [00:25<00:14, 155MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  54% 2.67G/4.92G [00:25<00:13, 165MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  55% 2.69G/4.92G [00:25<00:12, 171MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  55% 2.72G/4.92G [00:25<00:12, 173MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  56% 2.74G/4.92G [00:26<00:34, 62.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  56% 2.76G/4.92G [00:26<00:27, 78.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  57% 2.79G/4.92G [00:26<00:20, 106MB/s] \u001b[A\n","model-00003-of-00004.safetensors:  57% 2.81G/4.92G [00:26<00:17, 122MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  58% 2.83G/4.92G [00:26<00:15, 136MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  58% 2.85G/4.92G [00:26<00:14, 147MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  58% 2.87G/4.92G [00:27<00:12, 157MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  59% 2.89G/4.92G [00:27<00:12, 165MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  59% 2.92G/4.92G [00:27<00:11, 170MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  60% 2.94G/4.92G [00:27<00:11, 174MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  60% 2.96G/4.92G [00:27<00:11, 165MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  61% 2.98G/4.92G [00:27<00:11, 174MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  61% 3.00G/4.92G [00:27<00:11, 172MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  61% 3.02G/4.92G [00:28<00:15, 119MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  62% 3.05G/4.92G [00:28<00:12, 147MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  62% 3.07G/4.92G [00:28<00:11, 159MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  63% 3.09G/4.92G [00:28<00:11, 166MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  63% 3.11G/4.92G [00:28<00:11, 163MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  64% 3.14G/4.92G [00:28<00:11, 161MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  64% 3.16G/4.92G [00:28<00:10, 166MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  65% 3.18G/4.92G [00:28<00:10, 171MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  65% 3.20G/4.92G [00:29<00:09, 178MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  65% 3.22G/4.92G [00:31<00:59, 28.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  66% 3.24G/4.92G [00:33<01:24, 19.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  67% 3.27G/4.92G [00:33<00:53, 30.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  67% 3.29G/4.92G [00:33<00:40, 39.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  67% 3.31G/4.92G [00:33<00:31, 50.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  68% 3.33G/4.92G [00:33<00:24, 64.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  68% 3.36G/4.92G [00:33<00:19, 79.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  69% 3.38G/4.92G [00:33<00:16, 94.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  69% 3.40G/4.92G [00:33<00:13, 111MB/s] \u001b[A\n","model-00003-of-00004.safetensors:  70% 3.42G/4.92G [00:33<00:11, 126MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  70% 3.44G/4.92G [00:34<00:10, 138MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  70% 3.46G/4.92G [00:34<00:09, 149MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  71% 3.48G/4.92G [00:34<00:09, 159MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  71% 3.50G/4.92G [00:34<00:08, 166MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  72% 3.52G/4.92G [00:34<00:08, 171MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  72% 3.54G/4.92G [00:34<00:09, 142MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  73% 3.57G/4.92G [00:35<00:13, 98.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  73% 3.59G/4.92G [00:35<00:17, 77.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  73% 3.61G/4.92G [00:35<00:18, 71.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  74% 3.63G/4.92G [00:35<00:14, 86.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  74% 3.65G/4.92G [00:36<00:17, 74.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  75% 3.67G/4.92G [00:36<00:13, 89.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  75% 3.69G/4.92G [00:36<00:11, 105MB/s] \u001b[A\n","model-00003-of-00004.safetensors:  76% 3.71G/4.92G [00:36<00:09, 121MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  76% 3.73G/4.92G [00:36<00:08, 133MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  76% 3.75G/4.92G [00:36<00:07, 146MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  77% 3.77G/4.92G [00:37<00:07, 157MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  77% 3.80G/4.92G [00:37<00:06, 164MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  78% 3.82G/4.92G [00:37<00:06, 169MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  78% 3.84G/4.92G [00:37<00:06, 174MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  78% 3.86G/4.92G [00:37<00:06, 173MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  79% 3.88G/4.92G [00:37<00:05, 179MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  79% 3.90G/4.92G [00:37<00:05, 180MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  80% 3.92G/4.92G [00:37<00:05, 182MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  80% 3.94G/4.92G [00:37<00:05, 183MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  81% 3.96G/4.92G [00:38<00:05, 183MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  81% 3.98G/4.92G [00:38<00:05, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  81% 4.01G/4.92G [00:38<00:04, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  82% 4.03G/4.92G [00:38<00:04, 183MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  82% 4.05G/4.92G [00:38<00:04, 179MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  83% 4.07G/4.92G [00:38<00:04, 187MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  83% 4.09G/4.92G [00:38<00:04, 185MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  84% 4.11G/4.92G [00:38<00:04, 185MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  84% 4.13G/4.92G [00:41<00:31, 24.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  84% 4.15G/4.92G [00:41<00:24, 31.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  85% 4.17G/4.92G [00:41<00:18, 39.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  85% 4.19G/4.92G [00:42<00:14, 51.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  86% 4.22G/4.92G [00:42<00:11, 62.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  86% 4.25G/4.92G [00:42<00:07, 89.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  87% 4.27G/4.92G [00:42<00:06, 104MB/s] \u001b[A\n","model-00003-of-00004.safetensors:  87% 4.29G/4.92G [00:42<00:05, 118MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  88% 4.31G/4.92G [00:42<00:04, 132MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  88% 4.33G/4.92G [00:42<00:04, 144MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  89% 4.35G/4.92G [00:42<00:03, 154MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  89% 4.37G/4.92G [00:42<00:03, 162MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  89% 4.39G/4.92G [00:43<00:03, 167MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  90% 4.41G/4.92G [00:43<00:02, 173MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  90% 4.44G/4.92G [00:43<00:02, 176MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  91% 4.46G/4.92G [00:43<00:02, 178MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  91% 4.48G/4.92G [00:43<00:02, 178MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  92% 4.50G/4.92G [00:43<00:02, 181MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  92% 4.52G/4.92G [00:43<00:02, 176MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  92% 4.54G/4.92G [00:43<00:02, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  93% 4.56G/4.92G [00:44<00:01, 184MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  93% 4.58G/4.92G [00:44<00:01, 185MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  94% 4.60G/4.92G [00:47<00:16, 18.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  94% 4.63G/4.92G [00:47<00:09, 29.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  95% 4.66G/4.92G [00:47<00:06, 38.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  95% 4.68G/4.92G [00:47<00:04, 49.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  96% 4.70G/4.92G [00:48<00:03, 57.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  96% 4.72G/4.92G [00:48<00:03, 64.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  96% 4.74G/4.92G [00:48<00:02, 80.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  97% 4.76G/4.92G [00:48<00:01, 96.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  97% 4.78G/4.92G [00:48<00:01, 110MB/s] \u001b[A\n","model-00003-of-00004.safetensors:  98% 4.80G/4.92G [00:48<00:00, 128MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  98% 4.82G/4.92G [00:48<00:00, 140MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  99% 4.84G/4.92G [00:49<00:00, 152MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  99% 4.87G/4.92G [00:49<00:00, 159MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  99% 4.89G/4.92G [00:49<00:00, 166MB/s]\u001b[A\n","model-00003-of-00004.safetensors: 100% 4.92G/4.92G [00:49<00:00, 99.5MB/s]\n","Downloading shards:  75% 3/4 [02:18<00:46, 46.46s/it]\n","model-00004-of-00004.safetensors:   0% 0.00/1.17G [00:00<?, ?B/s]\u001b[A\n","model-00004-of-00004.safetensors:   1% 10.5M/1.17G [00:00<00:19, 60.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   2% 21.0M/1.17G [00:00<00:15, 75.8MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   3% 31.5M/1.17G [00:00<00:13, 84.8MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   4% 52.4M/1.17G [00:00<00:10, 105MB/s] \u001b[A\n","model-00004-of-00004.safetensors:   6% 73.4M/1.17G [00:00<00:08, 128MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   8% 94.4M/1.17G [00:00<00:07, 134MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  10% 115M/1.17G [00:01<00:19, 55.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  13% 147M/1.17G [00:01<00:12, 84.0MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  14% 168M/1.17G [00:01<00:09, 100MB/s] \u001b[A\n","model-00004-of-00004.safetensors:  16% 189M/1.17G [00:01<00:08, 116MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  18% 210M/1.17G [00:02<00:07, 131MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  20% 231M/1.17G [00:02<00:06, 144MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  22% 252M/1.17G [00:02<00:05, 155MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  23% 273M/1.17G [00:02<00:05, 162MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  25% 294M/1.17G [00:02<00:05, 169MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  27% 315M/1.17G [00:02<00:04, 174MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  29% 336M/1.17G [00:02<00:04, 177MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  31% 357M/1.17G [00:02<00:04, 180MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  32% 377M/1.17G [00:02<00:04, 182MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  34% 398M/1.17G [00:03<00:04, 182MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  36% 419M/1.17G [00:03<00:04, 185MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  38% 440M/1.17G [00:03<00:03, 183MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  39% 461M/1.17G [00:03<00:03, 186MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  41% 482M/1.17G [00:03<00:03, 186MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  43% 503M/1.17G [00:03<00:03, 184MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  45% 524M/1.17G [00:03<00:03, 185MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  47% 545M/1.17G [00:03<00:03, 186MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  48% 566M/1.17G [00:03<00:03, 187MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  50% 587M/1.17G [00:04<00:03, 182MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  52% 608M/1.17G [00:04<00:03, 184MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  54% 629M/1.17G [00:04<00:02, 189MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  56% 650M/1.17G [00:04<00:02, 187MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  57% 671M/1.17G [00:04<00:02, 177MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  59% 692M/1.17G [00:04<00:02, 177MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  61% 713M/1.17G [00:04<00:02, 169MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  64% 744M/1.17G [00:04<00:02, 181MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  66% 776M/1.17G [00:05<00:02, 192MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  69% 807M/1.17G [00:05<00:01, 189MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  71% 828M/1.17G [00:05<00:01, 192MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  73% 849M/1.17G [00:05<00:01, 190MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  75% 870M/1.17G [00:05<00:01, 189MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  76% 891M/1.17G [00:05<00:01, 188MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  78% 912M/1.17G [00:05<00:01, 188MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  80% 933M/1.17G [00:05<00:01, 179MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  82% 954M/1.17G [00:06<00:01, 182MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  83% 975M/1.17G [00:06<00:01, 182MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  85% 996M/1.17G [00:06<00:00, 182MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  87% 1.02G/1.17G [00:06<00:00, 184MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  89% 1.04G/1.17G [00:06<00:00, 155MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  92% 1.07G/1.17G [00:06<00:00, 178MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  94% 1.10G/1.17G [00:06<00:00, 191MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  96% 1.12G/1.17G [00:06<00:00, 189MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  98% 1.14G/1.17G [00:07<00:00, 189MB/s]\u001b[A\n","model-00004-of-00004.safetensors: 100% 1.17G/1.17G [00:07<00:00, 161MB/s]\n","Downloading shards: 100% 4/4 [02:25<00:00, 36.47s/it]\n","Loading checkpoint shards: 100% 4/4 [01:17<00:00, 19.39s/it]\n","generation_config.json: 100% 185/185 [00:00<00:00, 1.09MB/s]\n","[2024-10-09 16:08:22,151] [INFO] [axolotl.load_model:855] [PID:2725] [RANK:0] GPU memory usage after model load: 8.463GB (+0.039GB cache, +0.368GB misc)\u001b[39m\n","[2024-10-09 16:08:22,190] [INFO] [axolotl.load_model:913] [PID:2725] [RANK:0] converting PEFT model w/ prepare_model_for_kbit_training\u001b[39m\n","[2024-10-09 16:08:22,194] [INFO] [axolotl.load_model:922] [PID:2725] [RANK:0] converting modules to torch.bfloat16 for flash attention\u001b[39m\n","[2024-10-09 16:08:22,198] [INFO] [axolotl.load_lora:1087] [PID:2725] [RANK:0] found linear modules: ['down_proj', 'gate_proj', 'k_proj', 'o_proj', 'q_proj', 'up_proj', 'v_proj']\u001b[39m\n","trainable params: 1,134,559,232 || all params: 9,164,820,480 || trainable%: 12.3795\n","[2024-10-09 16:08:23,440] [INFO] [axolotl.load_model:970] [PID:2725] [RANK:0] GPU memory usage after adapters: 11.714GB (+1.829GB cache, +0.368GB misc)\u001b[39m\n","/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n","  warnings.warn(\n","[2024-10-09 16:08:24,754] [INFO] [axolotl.train.train:143] [PID:2725] [RANK:0] Pre-saving adapter config to ./outputs/lora-out\u001b[39m\n","[2024-10-09 16:08:24,974] [INFO] [axolotl.train.train:180] [PID:2725] [RANK:0] Starting trainer...\u001b[39m\n","[2024-10-09 16:08:25,455] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:195] [PID:2725] [RANK:0] gather_len_batches: [2586]\u001b[39m\n","[2024-10-09 16:08:25,488] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:195] [PID:2725] [RANK:0] gather_len_batches: [2587]\u001b[39m\n","  0% 0/322 [00:00<?, ?it/s][2024-10-09 16:08:25,609] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:195] [PID:2725] [RANK:0] gather_len_batches: [2586]\u001b[39m\n","You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n","/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:316: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n","  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n","Traceback (most recent call last):\n","  File \"/usr/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n","    return _run_code(code, main_globals, None,\n","  File \"/usr/lib/python3.10/runpy.py\", line 86, in _run_code\n","    exec(code, run_globals)\n","  File \"/content/src/axolotl/src/axolotl/cli/train.py\", line 72, in <module>\n","    fire.Fire(do_cli)\n","  File \"/usr/local/lib/python3.10/dist-packages/fire/core.py\", line 135, in Fire\n","    component_trace = _Fire(component, args, parsed_flag_args, context, name)\n","  File \"/usr/local/lib/python3.10/dist-packages/fire/core.py\", line 468, in _Fire\n","    component, remaining_args = _CallAndUpdateTrace(\n","  File \"/usr/local/lib/python3.10/dist-packages/fire/core.py\", line 684, in _CallAndUpdateTrace\n","    component = fn(*varargs, **kwargs)\n","  File \"/content/src/axolotl/src/axolotl/cli/train.py\", line 39, in do_cli\n","    return do_train(parsed_cfg, parsed_cli_args)\n","  File \"/content/src/axolotl/src/axolotl/cli/train.py\", line 67, in do_train\n","    return train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)\n","  File \"/content/src/axolotl/src/axolotl/train.py\", line 194, in train\n","    trainer.train(resume_from_checkpoint=resume_from_checkpoint)\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\", line 2052, in train\n","    return inner_training_loop(\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\", line 2388, in _inner_training_loop\n","    tr_loss_step = self.training_step(model, inputs)\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\", line 3485, in training_step\n","    loss = self.compute_loss(model, inputs)\n","  File \"/content/src/axolotl/src/axolotl/core/trainer_builder.py\", line 671, in compute_loss\n","    return super().compute_loss(model, inputs, return_outputs=return_outputs)\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\", line 3532, in compute_loss\n","    outputs = model(**inputs)\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1553, in _wrapped_call_impl\n","    return self._call_impl(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1562, in _call_impl\n","    return forward_call(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py\", line 820, in forward\n","    return model_forward(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py\", line 808, in __call__\n","    return convert_to_fp32(self.model_forward(*args, **kwargs))\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py\", line 43, in decorate_autocast\n","    return func(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/peft/peft_model.py\", line 1644, in forward\n","    return self.base_model(\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1553, in _wrapped_call_impl\n","    return self._call_impl(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1562, in _call_impl\n","    return forward_call(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/peft/tuners/tuners_utils.py\", line 197, in forward\n","    return self.model.forward(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\", line 170, in new_forward\n","    output = module._old_forward(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py\", line 1189, in forward\n","    outputs = self.model(\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1553, in _wrapped_call_impl\n","    return self._call_impl(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1562, in _call_impl\n","    return forward_call(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\", line 170, in new_forward\n","    output = module._old_forward(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py\", line 988, in forward\n","    layer_outputs = self._gradient_checkpointing_func(\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/_compile.py\", line 31, in inner\n","    return disable_fn(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/_dynamo/eval_frame.py\", line 600, in _fn\n","    return fn(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py\", line 488, in checkpoint\n","    ret = function(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1553, in _wrapped_call_impl\n","    return self._call_impl(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1562, in _call_impl\n","    return forward_call(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\", line 170, in new_forward\n","    output = module._old_forward(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py\", line 729, in forward\n","    hidden_states, self_attn_weights, present_key_value = self.self_attn(\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1553, in _wrapped_call_impl\n","    return self._call_impl(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1562, in _call_impl\n","    return forward_call(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\", line 170, in new_forward\n","    output = module._old_forward(*args, **kwargs)\n","  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py\", line 655, in forward\n","    attn_output = torch.nn.functional.scaled_dot_product_attention(\n","torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 217.06 MiB is free. Process 26983 has 14.53 GiB memory in use. Of the allocated memory 14.08 GiB is allocated by PyTorch, and 333.79 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n","  0% 0/322 [00:04<?, ?it/s]\n","Traceback (most recent call last):\n","  File \"/usr/local/bin/accelerate\", line 8, in <module>\n","    sys.exit(main())\n","  File \"/usr/local/lib/python3.10/dist-packages/accelerate/commands/accelerate_cli.py\", line 48, in main\n","    args.func(args)\n","  File \"/usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py\", line 1174, in launch_command\n","    simple_launcher(args)\n","  File \"/usr/local/lib/python3.10/dist-packages/accelerate/commands/launch.py\", line 769, in simple_launcher\n","    raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)\n","subprocess.CalledProcessError: Command '['/usr/bin/python3', '-m', 'axolotl.cli.train', './ft-8b-lora_colab.yaml']' returned non-zero exit status 1.\n"]}]},{"cell_type":"markdown","source":["Predict with trained model"],"metadata":{"id":"-BJ_aaymT74-"}},{"cell_type":"code","source":["!cd ./src/axolotl\n","!accelerate launch -m axolotl.cli.inference \"ft-8b_colab.yaml\" --qlora_model_dir=\"./outputs/lora-out\" --gradio"],"metadata":{"id":"55TzZxXOT-Zq"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["##Deeper Dive"],"metadata":{"id":"V5BiDQurUB1f"}},{"cell_type":"markdown","source":["It is also helpful to gain some familiarity over some of the core inner workings of axolotl"],"metadata":{"id":"SxmbdRQvUEWz"}},{"cell_type":"markdown","source":["###Configuration Normalization"],"metadata":{"id":"5Uf-c5PBUHHo"}},{"cell_type":"markdown","source":["Axolotl uses a custom Dict class, called ```DictDefault```\n","to store configurations specified in the yaml configuration file (into a Python variable named ```cfg```). The definition for this custom Dict can be found in the [utils/dict.py](https://github.com/axolotl-ai-cloud/axolotl/blob/main/src/axolotl/utils/dict.py)\n","\n","```DictDefault``` is amended such that calling a missing key from it will result in a ```None``` return type. This is important because if some configuration options aren't specified by the user, the '''None''' type allows Axolotl to perform boolean operations to determine the default settings for missing configurations. For more examples on how this is done, check out [utils/config/__init__.py](https://github.com/axolotl-ai-cloud/axolotl/blob/main/src/axolotl/utils/config/__init__.py)"],"metadata":{"id":"ORFfWUaHUJX4"}},{"cell_type":"markdown","source":["###Loading Models, Tokenizers, and Trainer"],"metadata":{"id":"0DdLyUohUL3G"}},{"cell_type":"markdown","source":["If we inspect [cli.train.py](https://github.com/axolotl-ai-cloud/axolotl/blob/main/src/axolotl/cli/train.py), we will find that most of the heavy lifting were done by the function ```train()``` which is itself imported from [src/axolotl/train.py](https://github.com/axolotl-ai-cloud/axolotl/blob/main/src/axolotl/train.py).\n","\n","```train()``` takes care of loading the appropriate tokenizer and pre-trained model through ```load_model()``` and ```load_tokenizer()``` from [src/axolotl/utils/models.py](https://github.com/axolotl-ai-cloud/axolotl/blob/main/src/axolotl/utils/models.py) respectively.\n","\n","```load_tokenizer()``` loads in the appropriate tokenizer given the desired model, as well as chat templates.\n","\n","```load_model()``` follows after tokenizer has been selected. It will automatically discern the base model type, load in the desired model, as well as applying model-appropriate attention mechanism modifications (e.g. flash attention). Depending on which base model the user chooses in the configuration, ```load_model()``` will utilize the corresponding \"attention hijacking\" script. For example, if the user specified the base model to be ```NousResearch/Meta-Llama-3.1-8B```, which is of llama type, and set ```flash_attn``` to ```True```, ```load_model()``` will load in [llama_attn_hijack_flash.py](https://github.com/axolotl-ai-cloud/axolotl/blob/main/src/axolotl/monkeypatch/llama_attn_hijack_flash.py). For a list of supported attention hijacking, please refer to the directory [/src/axolotl/monkeypatch/](https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/monkeypatch)\n","\n","Another important operation encompassed in ```train()``` is setting up the training that takes into account of user-specified traning configurations (e.g. num_epochs, optimizer) through the use of ```setup_trainer()``` from [/src/axolotl/utils/trainer.py](https://github.com/axolotl-ai-cloud/axolotl/blob/main/src/axolotl/utils/trainer.py), which in turn relies on modules from [/src/axolotl/core/trainer_builder.py](https://github.com/axolotl-ai-cloud/axolotl/blob/main/src/axolotl/core/trainer_builder.py).\n","```trainer_builder.py``` provides a list of trainer object options bespoke for the task type (Causal or Reinforcement learning ('dpo', 'ipo', 'kto') )"],"metadata":{"id":"zHiy24k_UO0w"}},{"cell_type":"markdown","source":["###Monkey patch\n","\n","The [Monkey patch directory](https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/monkeypatch) is where model architecture/optimization patching scripts are stored (these are modifications that are not implemented in the official releases, hence the name monkey patch). It includes attention jacking, ReLoRA, and unsloth optimization."],"metadata":{"id":"pe4niBoIUSb6"}}]}
\ No newline at end of file