diff --git a/bigscience/eval-spreadsheet/interactive_results_parser.py b/bigscience/eval-spreadsheet/interactive_results_parser.py index 72a7f3a62..26d8fd002 100644 --- a/bigscience/eval-spreadsheet/interactive_results_parser.py +++ b/bigscience/eval-spreadsheet/interactive_results_parser.py @@ -3,7 +3,7 @@ import time from parse_eai_results import process_eai_results -from parse_promptsource import process_ps_results +from parse_t0 import process_t0_results if __name__ == "__main__": previous_clipboard = clipboard.paste() diff --git a/bigscience/eval-spreadsheet/parse_promptsource.py b/bigscience/eval-spreadsheet/parse_t0.py similarity index 95% rename from bigscience/eval-spreadsheet/parse_promptsource.py rename to bigscience/eval-spreadsheet/parse_t0.py index 49f2865b2..59b02fa8c 100644 --- a/bigscience/eval-spreadsheet/parse_promptsource.py +++ b/bigscience/eval-spreadsheet/parse_t0.py @@ -85,13 +85,13 @@ def process_task_prompt(task_prompt: str) -> tuple[str, str]: if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Get formatted promptsource tasks & results for c/p in spreadsheet" + description="Get formatted t0 tasks & results for c/p in spreadsheet" ) parser.add_argument( "-i", "--ps-results-file", type=str, - help="Path to a promptsource .json result file", + help="Path to a t0 .json result file", ) parser.add_argument( "-t", diff --git a/bigscience/gins/c_dec_t0_adapt.gin b/bigscience/gins/c_dec_t0_adapt.gin index 45b58fe86..8e041dace 100644 --- a/bigscience/gins/c_dec_t0_adapt.gin +++ b/bigscience/gins/c_dec_t0_adapt.gin @@ -3,7 +3,7 @@ from __gin__ import dynamic_registration from t5x import models from t5x import utils import seqio -from promptsource import seqio_tasks +from t0 import seqio_tasks include "bigscience/gins/c_dec_xxl.gin" include "t5x/configs/runs/finetune.gin" diff --git a/bigscience/gins/enc_dec_t0_adapt.gin b/bigscience/gins/enc_dec_t0_adapt.gin index e0705086d..6d94f51d9 100644 --- a/bigscience/gins/enc_dec_t0_adapt.gin +++ b/bigscience/gins/enc_dec_t0_adapt.gin @@ -4,7 +4,7 @@ from t5x import models from t5x import trainer from t5x import utils import seqio -from promptsource import seqio_tasks +from t0 import seqio_tasks include "bigscience/gins/enc_dec_xxl.gin" include "t5x/configs/runs/finetune.gin" diff --git a/bigscience/gins/nc_dec_t0_adapt.gin b/bigscience/gins/nc_dec_t0_adapt.gin index cc0fba614..2c0448dcf 100644 --- a/bigscience/gins/nc_dec_t0_adapt.gin +++ b/bigscience/gins/nc_dec_t0_adapt.gin @@ -3,7 +3,7 @@ from __gin__ import dynamic_registration from t5x import models from t5x import utils import seqio -from promptsource import seqio_tasks +from t0 import seqio_tasks include "bigscience/gins/nc_dec_xxl.gin" include "t5x/configs/runs/finetune.gin" diff --git a/bigscience/gins/task.py b/bigscience/gins/task.py index b7f726cce..a69a40f1a 100644 --- a/bigscience/gins/task.py +++ b/bigscience/gins/task.py @@ -2,7 +2,7 @@ import seqio -from promptsource import seqio_tasks +from t0 import seqio_tasks # --- Seqio --- seqio.add_global_cache_dirs([ diff --git a/bigscience/scripts/plot_eai_eval.py b/bigscience/scripts/plot_eai_eval.py index 0faad9552..19975086d 100644 --- a/bigscience/scripts/plot_eai_eval.py +++ b/bigscience/scripts/plot_eai_eval.py @@ -8,8 +8,6 @@ import numpy as np from matplotlib import pyplot as plt -# gcloud alpha compute tpus tpu-vm scp thomas-dev-tpu:~/arch_objective_exps_v2 . --zone us-central2-b --recurse - def get_args(): parser = ArgumentParser() parser.add_argument('--all', action="store_true", help="Plot all results in a single plot") diff --git a/bigscience/scripts/run_on_all_vms.sh b/bigscience/scripts/run_on_all_vms.sh index f82314604..b7e0a0d5e 100644 --- a/bigscience/scripts/run_on_all_vms.sh +++ b/bigscience/scripts/run_on_all_vms.sh @@ -12,4 +12,4 @@ gcloud alpha compute tpus tpu-vm ssh ${TPU_NAME} --zone ${ZONE} --worker=all --c # Example to run t5_c4_span_corruption # - run setup vms: sh bigscience/scripts/run_on_all_vms.sh enc_dec_c4_span_corruption "$(cat bigscience/scripts/setup_vm.sh)" # - run t5_c4_span_corruption: sh bigscience/scripts/run_on_all_vms.sh enc_dec_c4_span_corruption "cd code/t5x; git pull; sh bigscience/scripts/launch_command_in_tmux.sh \"bash bigscience/scripts/pretrain.sh enc_dec_c4_span_corruption\"" -# - kill zombie process: sh bigscience/scripts/run_on_all_vms.sh enc_dec_c4_span_corruption "killall -u thomas" +# - kill zombie process: sh bigscience/scripts/run_on_all_vms.sh enc_dec_c4_span_corruption "killall -u {USER}" diff --git a/bigscience/scripts/setup_vm.sh b/bigscience/scripts/setup_vm.sh index 2ae9081a1..9aa610248 100644 --- a/bigscience/scripts/setup_vm.sh +++ b/bigscience/scripts/setup_vm.sh @@ -5,39 +5,25 @@ rm libtpu_tpuv4-0.1.dev* gsutil cp gs://cloud-tpu-tpuvm-v4-artifacts/wheels/libtpu/latest/libtpu_tpuv4-0.1.dev* . pip3 install libtpu_tpuv4-0.1.dev* -### ... -##pip3 install tensorflow==2.7.0 -#rm tensorflow-2.7.0-cp38-cp38-linux_x86_64.whl -#gsutil cp gs://cloud-tpu-tpuvm-v4-artifacts/wheels/tensorflow/tf-2-7-0/tensorflow-2.7.0-cp38-cp38-linux_x86_64.whl . -#pip3 install tensorflow-2.7.0-cp38-cp38-linux_x86_64.whl tensorflow-text==2.7.0 - mkdir -p ~/code cd ~/code -# Install t5 first +# Install t5 master version git clone https://github.com/google-research/text-to-text-transfer-transformer.git pushd text-to-text-transfer-transformer pip3 install -e . popd -#rm -rf text-to-text-transfer-transformer -#git clone https://github.com/thomasw21/text-to-text-transfer-transformer.git -#pushd text-to-text-transfer-transformer -#git checkout fix_prefix_lm_obj -#pip3 install -e . -#popd -git clone https://github.com/bigscience-workshop/promptsource.git -pushd promptsource -git reset e65186c2b8a544de1eb5c283b11b235033b01514 --hard -pip3 install black==21.12b0 # conflicts with streamlit -pip3 install -r requirements.txt -pip3 install --ignore-requires-python -e . #needed because `promptsource` forces the use of python 3.7 +git clone https://github.com/bigscience-workshop/t-zero.git +pushd t-zero +# TODO: remove once https://github.com/bigscience-workshop/t-zero/pull/24 is merged +git checkout thomas/update_promptsource_dependency +pip3 install -e ".[seqio_tasks]" popd -#rm -rf t5x git clone https://github.com/bigscience-workshop/t5x.git pushd t5x -pip3 install -e . +pip3 install -e ".[bigscience]" popd git clone https://github.com/EleutherAI/lm-evaluation-harness.git diff --git a/setup.py b/setup.py index 653caea1f..7f154762f 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,6 @@ 't5', 'tensorflow', 'tensorstore==0.1.13', - 'promptsource' ], extras_require={ 'gcp': [ @@ -63,6 +62,9 @@ 'google-cloud-storage', 'oauth2client' ], 'test': ['pytest'], + 'bigscience': [ + "t0[seqio_tasks]" + ] }, classifiers=[ 'Development Status :: 4 - Beta',