Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove promptsource and use t-zero #24

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bigscience/eval-spreadsheet/interactive_results_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import time

from parse_eai_results import process_eai_results
from parse_promptsource import process_ps_results
from parse_t0 import process_t0_results

if __name__ == "__main__":
previous_clipboard = clipboard.paste()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,13 @@ def process_task_prompt(task_prompt: str) -> tuple[str, str]:

if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Get formatted promptsource tasks & results for c/p in spreadsheet"
description="Get formatted t0 tasks & results for c/p in spreadsheet"
)
parser.add_argument(
"-i",
"--ps-results-file",
type=str,
help="Path to a promptsource .json result file",
help="Path to a t0 .json result file",
)
parser.add_argument(
"-t",
Expand Down
2 changes: 1 addition & 1 deletion bigscience/gins/c_dec_t0_adapt.gin
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ from __gin__ import dynamic_registration
from t5x import models
from t5x import utils
import seqio
from promptsource import seqio_tasks
from t0 import seqio_tasks

include "bigscience/gins/c_dec_xxl.gin"
include "t5x/configs/runs/finetune.gin"
Expand Down
2 changes: 1 addition & 1 deletion bigscience/gins/enc_dec_t0_adapt.gin
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ from t5x import models
from t5x import trainer
from t5x import utils
import seqio
from promptsource import seqio_tasks
from t0 import seqio_tasks

include "bigscience/gins/enc_dec_xxl.gin"
include "t5x/configs/runs/finetune.gin"
Expand Down
2 changes: 1 addition & 1 deletion bigscience/gins/nc_dec_t0_adapt.gin
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ from __gin__ import dynamic_registration
from t5x import models
from t5x import utils
import seqio
from promptsource import seqio_tasks
from t0 import seqio_tasks

include "bigscience/gins/nc_dec_xxl.gin"
include "t5x/configs/runs/finetune.gin"
Expand Down
2 changes: 1 addition & 1 deletion bigscience/gins/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import seqio

from promptsource import seqio_tasks
from t0 import seqio_tasks

# --- Seqio ---
seqio.add_global_cache_dirs([
Expand Down
2 changes: 0 additions & 2 deletions bigscience/scripts/plot_eai_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import numpy as np
from matplotlib import pyplot as plt

# gcloud alpha compute tpus tpu-vm scp thomas-dev-tpu:~/arch_objective_exps_v2 . --zone us-central2-b --recurse

def get_args():
parser = ArgumentParser()
parser.add_argument('--all', action="store_true", help="Plot all results in a single plot")
Expand Down
2 changes: 1 addition & 1 deletion bigscience/scripts/run_on_all_vms.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ gcloud alpha compute tpus tpu-vm ssh ${TPU_NAME} --zone ${ZONE} --worker=all --c
# Example to run t5_c4_span_corruption
# - run setup vms: sh bigscience/scripts/run_on_all_vms.sh enc_dec_c4_span_corruption "$(cat bigscience/scripts/setup_vm.sh)"
# - run t5_c4_span_corruption: sh bigscience/scripts/run_on_all_vms.sh enc_dec_c4_span_corruption "cd code/t5x; git pull; sh bigscience/scripts/launch_command_in_tmux.sh \"bash bigscience/scripts/pretrain.sh enc_dec_c4_span_corruption\""
# - kill zombie process: sh bigscience/scripts/run_on_all_vms.sh enc_dec_c4_span_corruption "killall -u thomas"
# - kill zombie process: sh bigscience/scripts/run_on_all_vms.sh enc_dec_c4_span_corruption "killall -u {USER}"
28 changes: 7 additions & 21 deletions bigscience/scripts/setup_vm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,25 @@ rm libtpu_tpuv4-0.1.dev*
gsutil cp gs://cloud-tpu-tpuvm-v4-artifacts/wheels/libtpu/latest/libtpu_tpuv4-0.1.dev* .
pip3 install libtpu_tpuv4-0.1.dev*

### ...
##pip3 install tensorflow==2.7.0
#rm tensorflow-2.7.0-cp38-cp38-linux_x86_64.whl
#gsutil cp gs://cloud-tpu-tpuvm-v4-artifacts/wheels/tensorflow/tf-2-7-0/tensorflow-2.7.0-cp38-cp38-linux_x86_64.whl .
#pip3 install tensorflow-2.7.0-cp38-cp38-linux_x86_64.whl tensorflow-text==2.7.0

mkdir -p ~/code
cd ~/code

# Install t5 first
# Install t5 master version
git clone https://github.com/google-research/text-to-text-transfer-transformer.git
pushd text-to-text-transfer-transformer
pip3 install -e .
popd
#rm -rf text-to-text-transfer-transformer
#git clone https://github.com/thomasw21/text-to-text-transfer-transformer.git
#pushd text-to-text-transfer-transformer
#git checkout fix_prefix_lm_obj
#pip3 install -e .
#popd

git clone https://github.com/bigscience-workshop/promptsource.git
pushd promptsource
git reset e65186c2b8a544de1eb5c283b11b235033b01514 --hard
pip3 install black==21.12b0 # conflicts with streamlit
pip3 install -r requirements.txt
pip3 install --ignore-requires-python -e . #needed because `promptsource` forces the use of python 3.7
git clone https://github.com/bigscience-workshop/t-zero.git
pushd t-zero
# TODO: remove once https://github.com/bigscience-workshop/t-zero/pull/24 is merged
git checkout thomas/update_promptsource_dependency
pip3 install -e ".[seqio_tasks]"
popd

#rm -rf t5x
git clone https://github.com/bigscience-workshop/t5x.git
pushd t5x
pip3 install -e .
pip3 install -e ".[bigscience]"
popd

git clone https://github.com/EleutherAI/lm-evaluation-harness.git
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,16 @@
't5',
'tensorflow',
'tensorstore==0.1.13',
'promptsource'
],
extras_require={
'gcp': [
'gevent', 'google-api-python-client', 'google-compute-engine',
'google-cloud-storage', 'oauth2client'
],
'test': ['pytest'],
'bigscience': [
"t0[seqio_tasks]"
]
},
classifiers=[
'Development Status :: 4 - Beta',
Expand Down