Improve default arguments for clients and servers #2520
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Tests | |
on: | |
push: | |
branches: [ main ] | |
pull_request: | |
jobs: | |
run-tests: | |
strategy: | |
matrix: | |
include: | |
- { model: 'bigscience/bloom-560m', os: 'ubuntu', python-version: '3.8' } | |
- { model: 'bigscience/bloom-560m', os: 'ubuntu', python-version: '3.11' } | |
- { model: 'Maykeye/TinyLLama-v0', os: 'ubuntu', python-version: '3.8' } | |
- { model: 'Maykeye/TinyLLama-v0', os: 'ubuntu', python-version: '3.11' } | |
- { model: 'petals-team/falcon-rw-1b', os: 'ubuntu', python-version: '3.8' } | |
- { model: 'petals-team/falcon-rw-1b', os: 'ubuntu', python-version: '3.11' } | |
- { model: 'Maykeye/TinyLLama-v0', os: 'macos', python-version: '3.10' } | |
- { model: 'Maykeye/TinyLLama-v0', os: 'macos', python-version: '3.11' } | |
fail-fast: false | |
runs-on: ${{ matrix.os }}-latest | |
timeout-minutes: 20 | |
steps: | |
- name: Increase swap space | |
if: ${{ matrix.os == 'ubuntu' }} | |
uses: pierotofy/set-swap-space@master | |
with: | |
swap-size-gb: 10 | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Set up Python | |
uses: actions/setup-python@v3 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Cache dependencies | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/pip | |
key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install .[dev] | |
- name: Test | |
run: | | |
set -x # Print executed commands | |
export MODEL_NAME="${{ matrix.model }}" | |
export REF_NAME="${{ matrix.model }}" | |
export ADAPTER_NAME="${{ matrix.model == 'bigscience/bloom-560m' && 'artek0chumak/bloom-560m-safe-peft' || '' }}" | |
export TENSOR_PARALLEL_ARGS="${{ matrix.model == 'bigscience/bloom-560m' && '--tensor_parallel_devices cpu cpu' || '' }}" | |
# [Step 1] Set up a tiny test swarm (see https://github.com/bigscience-workshop/petals/wiki/Launch-your-own-swarm) | |
python -m petals.cli.run_dht \ | |
--identity_path tests/bootstrap.id --host_maddrs /ip4/127.0.0.1/tcp/31337 &> bootstrap.log & | |
BOOTSTRAP_PID=$! | |
export INITIAL_PEERS=/ip4/127.0.0.1/tcp/31337/p2p/QmS9KwZptnVdB9FFV7uGgaTq4sEKBwcYeKZDfSpyKDUd1g | |
# ^-- multiaddr in INITIAL_PEERS is determined by --identity_path and --host_maddrs | |
until [ -s bootstrap.log ]; do sleep 5; done # wait for DHT init | |
python -m petals.cli.run_server $MODEL_NAME --adapters $ADAPTER_NAME --torch_dtype float32 --num_blocks 5 \ | |
--mean_balance_check_period 10 \ | |
--initial_peers $INITIAL_PEERS --throughput 1 &> server1.log & | |
SERVER1_PID=$! | |
# ^-- rebalacing test: this server chooses blocks 0:5, then sees a gap in the swarm and moves there | |
sleep 10 # wait for the 1st server to choose blocks | |
python -m petals.cli.run_server $MODEL_NAME --adapters $ADAPTER_NAME --torch_dtype float32 --block_indices 0:5 \ | |
--identity_path tests/server2.id \ | |
--initial_peers $INITIAL_PEERS --throughput 1 &> server2.log & | |
SERVER2_PID=$! | |
python -m petals.cli.run_server $MODEL_NAME --adapters $ADAPTER_NAME --torch_dtype float32 --num_blocks 14 \ | |
--attn_cache_tokens 2048 --max_chunk_size_bytes 1024 \ | |
--initial_peers $INITIAL_PEERS --throughput auto &> server3.log & | |
SERVER3_PID=$! | |
# ^-- chunking test | |
python -m petals.cli.run_server $MODEL_NAME $TENSOR_PARALLEL_ARGS --torch_dtype float32 --block_indices 0:2 \ | |
--initial_peers $INITIAL_PEERS --throughput auto &> server4.log & | |
SERVER4_PID=$! | |
# ^-- tensor parallelism test (not compatible with adapters yet) | |
sleep 5 # wait for the log files to appear | |
tail -n 100 -f bootstrap.log server*.log & | |
LOGGER_PID=$! | |
sleep 30 # wait for servers to eval throughput, download layers, and rebalance | |
kill -0 $BOOTSTRAP_PID $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID # ensure all peers survived init | |
# [Step 2] Run PyTest | |
# Share disk cache between Petals servers, clients, and HF Transformers | |
export TRANSFORMERS_CACHE=~/.cache/petals | |
# Necessary for @pytest.mark.forked to work properly on macOS, see https://github.com/kevlened/pytest-parallel/issues/93 | |
export no_proxy=* | |
export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES | |
pytest tests --durations=0 --durations-min=1.0 -v | |
# [Step 3] Check if benchmarks work (their results here are meaningless since it's a tiny swarm of CPU servers) | |
python benchmarks/benchmark_inference.py --model $MODEL_NAME --initial_peers $INITIAL_PEERS --torch_dtype float32 \ | |
--seq_len 3 | |
python benchmarks/benchmark_forward.py --model $MODEL_NAME --initial_peers $INITIAL_PEERS --torch_dtype float32 \ | |
--seq_len 3 --batch_size 3 --n_steps 1 | |
python benchmarks/benchmark_training.py --model $MODEL_NAME --initial_peers $INITIAL_PEERS --torch_dtype float32 \ | |
--seq_len 3 --batch_size 3 --pre_seq_len 1 --n_steps 1 --task cls | |
python benchmarks/benchmark_training.py --model $MODEL_NAME --initial_peers $INITIAL_PEERS --torch_dtype float32 \ | |
--seq_len 3 --batch_size 3 --pre_seq_len 1 --n_steps 1 --task causal_lm | |
# [Step 4] Clean up | |
kill -s SIGINT $BOOTSTRAP_PID $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID $LOGGER_PID | |
echo "Done!" |