Add script for loading prometheus snapshots, improve grafana
[dcaegen2/collectors/hv-ves.git] / tools / performance / cloud / cloud-based-performance-test.sh
1 #!/usr/bin/env bash
2 # ============LICENSE_START=======================================================
3 # dcaegen2-collectors-veshv
4 # ================================================================================
5 # Copyright (C) 2019-2020 NOKIA
6 # ================================================================================
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
10 #
11 #      http://www.apache.org/licenses/LICENSE-2.0
12 #
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
18 # ============LICENSE_END=========================================================
19
20 SCRIPT_DIRECTORY="$(pwd "$0")"
21 CONTAINERS_COUNT=1
22 COMPLETED_PRODUCERS_SUM=0
23 LOAD_TEST="false"
24 TEST_CONFIG_MAP=performance-test-config
25 PROPERTIES_FILE=${SCRIPT_DIRECTORY}/test.properties
26 PRODUCER_APPS_LABEL=hv-collector-producer
27 CONSUMER_APPS_LABEL=hv-collector-kafka-consumer
28 PROMETHEUS_CONF_LABEL=prometheus-server-conf
29 PROMETHEUS_APPS_LABEL=hv-collector-prometheus
30 GRAFANA_APPS_LABEL=hv-collector-grafana
31 GRAFANA_DATASOURCE=grafana-datasources
32 GRAFANA_DASHBOARDS=grafana-dashboards
33 GRAFANA_DASHBOARD_PROVIDERS=grafana-dashboards-providers
34 ONAP_NAMESPACE=onap
35 MAXIMUM_BACK_OFF_CHECK_ITERATIONS=30
36 CHECK_NUMBER=0
37 PRODUCERS_TO_RECREATE=0
38 NAME_REASON_PATTERN="custom-columns=NAME:.metadata.name,REASON:.status.containerStatuses[].state.waiting.reason"
39 HVVES_POD_NAME=$(kubectl -n ${ONAP_NAMESPACE} get pods --no-headers=true -o custom-columns=:metadata.name | grep hv-ves-collector)
40 HVVES_CERT_PATH=/etc/ves-hv/ssl/server
41 KAFKA_RETENTION_TIME_MINUTES=60
42 MILISECONDS_IN_MINUTE=60000
43 CALC_RETENTION_TIME_IN_MS_CMD='expr $KAFKA_RETENTION_TIME_MINUTES \* $MILISECONDS_IN_MINUTE'
44 KAFKA_ROUTER_0_POD_NAME=$(kubectl -n ${ONAP_NAMESPACE} get pods --no-headers=true -o custom-columns=:metadata.name | grep router-kafka-0)
45 KAFKA_SET_TOPIC_RETENTION_TIME_CMD='kafka-topics --zookeeper message-router-zookeeper:2181 --alter --topic HV_VES_PERF3GPP --config retention.ms='
46 HIDE_OUTPUT='grep abc | grep 123'
47
48 function clean() {
49     echo "Cleaning up environment"
50
51     echo "Attempting to delete test parameters ConfigMap"
52     kubectl delete configmap ${TEST_CONFIG_MAP} -n ${ONAP_NAMESPACE}
53
54     echo "Attempting to delete prometheus ConfigMap"
55     kubectl delete configmap -l name=${PROMETHEUS_CONF_LABEL} -n ${ONAP_NAMESPACE}
56
57     echo "Attempting to delete prometheus deployment and service"
58     kubectl delete service,deployments -l app=${PROMETHEUS_APPS_LABEL} -n ${ONAP_NAMESPACE}
59
60     echo "Attempting to delete grafana deployment and service"
61     kubectl delete service,deployments -l app=${GRAFANA_APPS_LABEL} -n ${ONAP_NAMESPACE}
62
63     echo "Attempting to delete grafana ConfigMap (DASHBOARDS)"
64     kubectl delete configmap ${GRAFANA_DASHBOARDS} -n ${ONAP_NAMESPACE}
65
66     echo "Attempting to delete grafana ConfigMap (GRAFANA_DASHBOARD_PROVIDERS)"
67     kubectl delete configmap -l name=${GRAFANA_DASHBOARD_PROVIDERS} -n ${ONAP_NAMESPACE}
68
69     echo "Attempting to delete grafana ConfigMap (GRAFANA_DATASOURCE)"
70     kubectl delete configmap -l name=${GRAFANA_DATASOURCE} -n ${ONAP_NAMESPACE}
71
72     echo "Attempting to delete consumer deployments"
73     kubectl delete deployments -l app=${CONSUMER_APPS_LABEL} -n ${ONAP_NAMESPACE}
74
75     echo "Attempting to delete producer pods"
76     kubectl delete pods -l app=${PRODUCER_APPS_LABEL} -n ${ONAP_NAMESPACE}
77
78     echo "Attempting to delete client certs secret"
79     kubectl delete secret cert -n ${ONAP_NAMESPACE}
80
81     echo "Attempting to turn off SSL"
82     ./configure-consul.sh true
83
84     echo "Environment clean up finished!"
85 }
86
87 function copy_certs_to_hvves() {
88     cd ../../ssl
89     echo "Attempting to create certs directory in HV-VES"
90     kubectl exec -n ${ONAP_NAMESPACE} ${HVVES_POD_NAME} 'mkdir' ${HVVES_CERT_PATH}
91     for file in {trust.p12,trust.pass,server.p12,server.pass}
92     do
93         echo "Copying file: ${file}"
94         kubectl cp ${file} ${ONAP_NAMESPACE}/${HVVES_POD_NAME}:${HVVES_CERT_PATH}
95     done
96 }
97
98 function set_kafka_retention_time() {
99     echo "Setting message retention time"
100     kubectl exec -it ${KAFKA_ROUTER_0_POD_NAME} -n ${ONAP_NAMESPACE} -- ${KAFKA_SET_TOPIC_RETENTION_TIME_CMD}$(eval $CALC_RETENTION_TIME_IN_MS_CMD) | eval $HIDE_OUTPUT
101 }
102
103 function create_producers() {
104     echo "Recreating test properties ConfigMap from: $PROPERTIES_FILE"
105     kubectl delete configmap ${TEST_CONFIG_MAP} -n ${ONAP_NAMESPACE}
106     kubectl create configmap ${TEST_CONFIG_MAP} --from-env-file=${PROPERTIES_FILE} -n ${ONAP_NAMESPACE}
107
108     set -e
109     for i in $(seq 1 ${CONTAINERS_COUNT});
110     do
111         echo "Creating ${i}/${CONTAINERS_COUNT} producer"
112         kubectl create -f producer-pod.yaml -n ${ONAP_NAMESPACE}
113     done
114     echo "Producers created"
115     set +e
116 }
117
118 function generate_certs() {
119     echo "Generation of certs"
120     cd ../../ssl
121     ./gen-certs.sh
122 }
123
124 function handle_backoffs() {
125     IMAGE_PULL_BACK_OFFS=$(kubectl get pods -l app=${PRODUCER_APPS_LABEL} -n ${ONAP_NAMESPACE} -o ${NAME_REASON_PATTERN} | grep -c "ImagePullBackOff \| ErrImagePull")
126     if [[ ${IMAGE_PULL_BACK_OFFS} -gt 0 ]]; then
127         CHECK_NUMBER=$((CHECK_NUMBER + 1))
128         if [[ ${CHECK_NUMBER} -gt ${MAXIMUM_BACK_OFF_CHECK_ITERATIONS} ]]; then
129             echo "Error: Image pull problem"
130             exit 1
131         fi
132     fi
133 }
134
135 function handle_key_interrupt() {
136     trap SIGINT
137     echo "Script interrupted, attempt to delete producers"
138     echo "Wait with patience"
139     COMPLETED_PRODUCERS_SUM=$(($(kubectl delete pods -l app=${PRODUCER_APPS_LABEL} -n ${ONAP_NAMESPACE} | grep producer | wc -l) + COMPLETED_PRODUCERS_SUM))
140     echo "Total number of completed producers: ${COMPLETED_PRODUCERS_SUM}"
141     exit 0
142 }
143
144 function print_test_setup_info() {
145     echo "Starting cloud based performance tests"
146     echo "________________________________________"
147     echo "Test configuration:"
148     echo "Producer containers count: ${CONTAINERS_COUNT}"
149     echo "Properties file path: ${PROPERTIES_FILE}"
150     echo "Retention time of kafka messages in minutes: ${KAFKA_RETENTION_TIME_MINUTES}"
151     echo "________________________________________"
152 }
153
154 function usage() {
155     echo ""
156     echo "Run cloud based HV-VES performance test"
157     echo "Usage $0 gen_certs|setup|start|clean|help"
158     echo "  gen_certs: generate certs in ../../ssl directory"
159     echo "  setup    : set up ConfigMap and consumers"
160     echo "  start    : create producers - start the performance test"
161     echo "    Optional parameters:"
162     echo "      --load              : should test keep defined containers number till script interruption (false)"
163     echo "      --containers        : number of producer containers to create (1)"
164     echo "      --properties-file   : path to file with benchmark properties (./test.properties)"
165     echo "      --retention-time-minutes : messages retention time on kafka in minutes (60)"
166     echo "  clean    : remove ConfigMap, HV-VES consumers and producers"
167     echo "  help     : print usage"
168     echo "Example invocations:"
169     echo "./cloud-based-performance-test.sh gen_certs"
170     echo "./cloud-based-performance-test.sh setup"
171     echo "./cloud-based-performance-test.sh start"
172     echo "./cloud-based-performance-test.sh start --containers 10"
173     echo "./cloud-based-performance-test.sh start --load true --containers 10"
174     echo "./cloud-based-performance-test.sh start --load true --containers 10 --retention-time-minutes 50"
175     echo "./cloud-based-performance-test.sh start --properties-file ~/other_test.properties"
176     echo "./cloud-based-performance-test.sh clean"
177     exit 1
178 }
179
180 function setup_environment() {
181     echo "Setting up environment"
182
183     echo "Copying certs to hv-ves pod"
184     copy_certs_to_hvves
185
186     echo "Creating secrets with clients cert"
187     kubectl create secret generic cert --from-file=./client.p12 --from-file=./client.pass -n ${ONAP_NAMESPACE}
188     cd ${SCRIPT_DIRECTORY}
189
190     echo "Turning on SSL"
191     ./configure-consul.sh false
192
193     echo "Creating test properties ConfigMap from: $PROPERTIES_FILE"
194     kubectl create configmap ${TEST_CONFIG_MAP} --from-env-file=${PROPERTIES_FILE} -n ${ONAP_NAMESPACE}
195
196     echo "Creating consumer deployment"
197     kubectl apply -f consumer-deployment.yaml
198
199     echo "Creating ConfigMap for prometheus deployment"
200     kubectl apply -f prometheus/prometheus-config-map.yaml
201
202     echo "Creating prometheus deployment"
203     kubectl apply -f prometheus-deployment.yaml
204
205     echo "Creating ConfigMap for grafana connections dashboard"
206     kubectl create configmap ${GRAFANA_DASHBOARDS} -n ${ONAP_NAMESPACE} --from-file grafana/dashboards/
207
208     echo "Creating ConfigMap for grafana datasource"
209     kubectl apply -f grafana/datasources/datasource.yaml
210
211     echo "Creating ConfigMap for grafana dashboards-providers"
212     kubectl apply -f grafana/dashboards-providers/dashboards.yaml
213
214     echo "Creating grafana deployment"
215     kubectl apply -f grafana-deployment.yaml
216
217     echo "Waiting for consumers to be running."
218     while [[ $(kubectl get pods -l app=${CONSUMER_APPS_LABEL} -n ${ONAP_NAMESPACE} | grep -c "unhealthy\|starting") -ne 0 ]] ; do
219         sleep 1
220     done
221     echo "Setting up environment finished!"
222 }
223
224 function start_load_tests() {
225     print_test_setup_info
226
227     set_kafka_retention_time
228
229     echo "CTRL + C to stop/interrupt this script"
230     create_producers
231
232     trap "handle_key_interrupt" INT
233
234     echo "Constant producer number keeper started working"
235     while :; do
236         PRODUCERS_TO_RECREATE=$((CONTAINERS_COUNT-$(kubectl get pods -l app=${PRODUCER_APPS_LABEL} -n ${ONAP_NAMESPACE} | grep -c "Running")))
237         handle_backoffs
238
239         set -e
240         for i in $(seq 1 ${PRODUCERS_TO_RECREATE});
241         do
242             echo "Recreating ${i}/${PRODUCERS_TO_RECREATE} producer"
243             kubectl create -f producer-pod.yaml -n ${ONAP_NAMESPACE}
244         done
245         set +e
246         COMPLETED_PRODUCERS_SUM=$((COMPLETED_PRODUCERS_SUM + PRODUCERS_TO_RECREATE))
247         echo "Attempting to clear completed producers"
248         kubectl delete pod --field-selector=status.phase==Succeeded -l app=${PRODUCER_APPS_LABEL} -n ${ONAP_NAMESPACE}
249
250         [[ ${CHECK_NUMBER} -gt ${MAXIMUM_BACK_OFF_CHECK_ITERATIONS} ]] && break
251         sleep 1
252     done
253
254     trap SIGINT
255     exit 0
256 }
257
258 function start_performance_test() {
259     print_test_setup_info
260
261     set_kafka_retention_time
262
263     create_producers
264
265     echo "Waiting for producers completion"
266     while :; do
267         COMPLETED_PRODUCERS=$(kubectl get pods -l app=${PRODUCER_APPS_LABEL} -n ${ONAP_NAMESPACE} | grep -c "Completed")
268         handle_backoffs
269
270         [[ ${COMPLETED_PRODUCERS} -eq ${CONTAINERS_COUNT} || ${CHECK_NUMBER} -gt ${MAXIMUM_BACK_OFF_CHECK_ITERATIONS} ]] && break
271         sleep 1
272     done
273
274     echo "Attempting to delete producer pods"
275     kubectl delete pods -l app=${PRODUCER_APPS_LABEL} -n ${ONAP_NAMESPACE}
276     echo "Performance test finished"
277     exit 0
278 }
279
280 cd ${SCRIPT_DIRECTORY}
281
282 if [[ $# -eq 0 ]]; then
283     usage
284 else
285     for arg in ${@}
286     do
287         case ${arg} in
288             gen_certs)
289             generate_certs
290             ;;
291             setup)
292             setup_environment
293             ;;
294             start)
295             shift 1
296             while [[ $(($#)) -gt 0 ]]; do
297                 case "${1}" in
298                     --load)
299                         LOAD_TEST=${2}
300                         ;;
301                     --containers)
302                         CONTAINERS_COUNT=${2}
303                         ;;
304                     --properties-file)
305                         PROPERTIES_FILE=${2}
306                         ;;
307                     --retention-time-minutes)
308                         KAFKA_RETENTION_TIME_MINUTES=${2}
309                         ;;
310                     *)
311                         echo "Unknown option: ${1}"
312                         usage
313                         ;;
314                 esac
315                 shift 2
316             done
317             if [ ${LOAD_TEST} == "true" ] ; then
318                 start_load_tests
319             else
320                 start_performance_test
321             fi
322             ;;
323             clean)
324             clean
325             ;;
326             help)
327             usage
328             ;;
329             *)
330             echo "Unknown action: ${arg}" >&2
331             usage
332             ;;
333         esac
334     done
335 fi