Rewrite MemoryCache alloc_timeout logic #1990
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Tests | |
on: | |
push: | |
branches: [ main ] | |
pull_request: | |
jobs: | |
run-tests: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: [ '3.8', '3.9', '3.10', '3.11' ] | |
fail-fast: false | |
timeout-minutes: 15 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Set up Python | |
uses: actions/setup-python@v3 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Cache dependencies | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/pip | |
key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install .[dev] | |
- name: Test | |
run: | | |
export MODEL_NAME=bigscience/bloom-560m | |
export REF_NAME=bigscience/bloom-560m | |
export ADAPTER_NAME=artek0chumak/bloom-560m-safe-peft | |
python -m petals.cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:12 \ | |
--new_swarm --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 --throughput 1 \ | |
--torch_dtype float32 --compression NONE --attn_cache_tokens 2048 --max_chunk_size_bytes 1024 \ | |
--adapters $ADAPTER_NAME &> server1.log & | |
SERVER1_PID=$! | |
sleep 5 # wait for the first server to initialize DHT | |
export INITIAL_PEERS=/ip4/127.0.0.1/tcp/31337/p2p/QmS9KwZptnVdB9FFV7uGgaTq4sEKBwcYeKZDfSpyKDUd1g | |
# ^-- server 1 multiaddr is determined by --identity and --host_maddrs | |
python -m petals.cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 12:22 \ | |
--initial_peers $INITIAL_PEERS --throughput 1 --torch_dtype float32 --adapters $ADAPTER_NAME &> server2.log & | |
SERVER2_PID=$! | |
sleep 10 # wait for initial servers to declare blocks, then let server decide which blocks to serve | |
python -m petals.cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 12:15 \ | |
--initial_peers $INITIAL_PEERS --throughput 1 --torch_dtype float32 --tensor_parallel_devices cpu cpu &> server3.log & | |
SERVER3_PID=$! | |
python -m petals.cli.run_server --converted_model_name_or_path $MODEL_NAME --num_blocks 3 \ | |
--initial_peers $INITIAL_PEERS --throughput 1 --torch_dtype float32 --adapters $ADAPTER_NAME &> server4.log & | |
SERVER4_PID=$! | |
tail -n 100 -f server*.log & | |
LOGGER_PID=$! | |
sleep 30 # wait for servers to download layers | |
kill -0 $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID # ensure all servers survived init | |
pytest tests --durations=0 --durations-min=1.0 -v | |
kill -0 $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID # ensure all servers survived tests | |
kill -s SIGINT $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID $LOGGER_PID | |
echo "Done!" |