forked from milvus-io/milvus
-
Notifications
You must be signed in to change notification settings - Fork 0
188 lines (163 loc) · 7.28 KB
/
pod-kill-chaos-test.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
name: Pod Kill Chaos Test
on:
workflow_dispatch:
inputs:
image_tag:
description: The image tag to use for the chaos test
required: true
default: 'master-latest'
image_repo:
description: The image repo to use for the chaos test
required: true
default: 'milvusdb/milvus'
schedule:
- cron: "30 18 * * *"
jobs:
test-pod-kill-chaos:
runs-on: ubuntu-latest
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
pod: [allstandalone, allcluster, standalone, datacoord, datanode, indexcoord, indexnode, proxy, pulsar, querycoord, querynode, rootcoord, etcd, minio]
steps:
- name: Set env param
env:
DEFAULT_IMAGE_TAG: master-latest
DEFAULT_IMAGE_REPO: milvusdb/milvus
run: |
echo "RELEASE=test-${{ matrix.pod }}-pod-kill" >> $GITHUB_ENV
echo "IMAGE_REPO=${{ github.event.inputs.image_repo || env.DEFAULT_IMAGE_REPO}}" >> $GITHUB_ENV
echo "IMAGE_TAG=${{ github.event.inputs.image_tag || env.DEFAULT_IMAGE_TAG}}" >> $GITHUB_ENV
- name: Creating kind cluster
uses: helm/kind-action@v1.2.0
- name: Print cluster information
run: |
kubectl config view
kubectl cluster-info
kubectl get nodes
kubectl get pods -n kube-system
helm version
kubectl version
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install dependency
uses: nick-invision/retry@v2
with:
timeout_minutes: 5
max_attempts: 3
retry_on: error
shell: bash
command: |
pip install -r tests/python_client/requirements.txt --trusted-host https://test.pypi.org
- name: Deploy Chaos Mesh
timeout-minutes: 2
shell: bash
run: |
helm repo add chaos-mesh https://charts.chaos-mesh.org
helm search repo chaos-mesh
kubectl create ns chaos-testing
helm install --wait --timeout 360s chaos-mesh chaos-mesh/chaos-mesh --namespace=chaos-testing --version v2.0.3 --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock
kubectl get po -n chaos-testing
- name: Deploy Milvus
timeout-minutes: 15
shell: bash
working-directory: tests/python_client/chaos
run: |
echo "latest tag:"
bash ../../../scripts/docker_image_find_tag.sh -n milvusdb/milvus -t master-latest -f master- -F -L -q
helm repo add milvus https://zilliztech.github.io/milvus-helm
helm repo update
if [[ ${{ matrix.pod }} != *"standalone"* ]]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus --set image.all.repository=${{ env.IMAGE_REPO }} --set image.all.tag=${{ env.IMAGE_TAG }} -f cluster-values.yaml -n=chaos-testing; fi
if [[ ${{ matrix.pod }} == *"standalone"* ]]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus --set image.all.repository=${{ env.IMAGE_REPO }} --set image.all.tag=${{ env.IMAGE_TAG }} -f standalone-values.yaml -n=chaos-testing; fi
kubectl get pods -n chaos-testing
sleep 20s
kubectl get pods -n chaos-testing
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
sleep 20s
# check whether port-forward success
nc -vz 127.0.0.1 19530
- name: Run e2e test before chaos
timeout-minutes: 5
shell: bash
working-directory: tests/python_client
run: |
pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
- name: Run data presistence test before chaos
timeout-minutes: 5
shell: bash
working-directory: tests/python_client/chaos
run: |
pytest -s -v testcases/test_data_persistence.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
- name: Chaos Test
timeout-minutes: 15
shell: bash
working-directory: tests/python_client/chaos
run: |
# replace chaos object
sed -i "s/TESTS_CONFIG_LOCATION =.*/TESTS_CONFIG_LOCATION = \'chaos_objects\/pod_kill\/'/g" constants.py
sed -i "s/ALL_CHAOS_YAMLS =.*/ALL_CHAOS_YAMLS = \'chaos_${{ matrix.pod }}_pod_kill.yaml\'/g" constants.py
sed -i "s/RELEASE_NAME =.*/RELEASE_NAME = \'${{ env.RELEASE }}\'/g" constants.py
cat constants.py
timeout 14m pytest -s -v test_chaos.py --host 127.0.0.1 --log-cli-level=INFO --capture=no || echo "chaos test failed"
- name: Result Analysis
timeout-minutes: 1
shell: bash
working-directory: tests/python_client/chaos/reports
run: |
echo "result analysis"
cat ${{ env.RELEASE }}.log || echo "no log file"
- name: Wait all pods ready
timeout-minutes: 5
shell: bash
working-directory: tests/python_client
run: |
kubectl get pod -n chaos-testing
# wait all pod to be ready
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${{ env.RELEASE }} -n chaos-testing --timeout=360s
kubectl wait --for=condition=Ready pod -l release=${{ env.RELEASE }} -n chaos-testing --timeout=360s
kubectl get pod -n chaos-testing
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
sleep 20s
nc -vz 127.0.0.1 19530
- name: Run e2e test after chaos
timeout-minutes: 5
shell: bash
working-directory: tests/python_client
run: |
pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
- name: Run data presistence test after chaos
timeout-minutes: 5
shell: bash
working-directory: tests/python_client/chaos
run: |
pytest -s -v testcases/test_data_persistence.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
- name: Verify all collections after chaos
timeout-minutes: 15
shell: bash
working-directory: tests/python_client/chaos
run: |
pytest -s -v testcases/test_get_collections.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
sleep 2s
pytest -s -v testcases/test_all_collections_after_chaos.py --host 127.0.0.1 -n 4 --log-cli-level=INFO --capture=no
- name: Export logs
if: ${{ always() }}
shell: bash
working-directory: tests/python_client/chaos
run: |
#in this step, verify whether pod has been killed by pod's age
kubectl get po -n chaos-testing
# export k8s log for chaos mesh and milvus
bash ../../scripts/export_log_k8s.sh chaos-testing ${{ env.RELEASE }} k8s_logs/chaos-test
- name: Upload logs
if: ${{ ! success() }}
uses: actions/upload-artifact@v2
with:
name: logs-${{ matrix.pod }}
path: |
tests/python_client/chaos/k8s_logs
tests/python_client/chaos/reports