Add healthchecks for docker-compose for k6 and CSIT 38/138838/16
authordanielhanrahan <daniel.hanrahan@est.tech>
Thu, 22 Aug 2024 16:39:58 +0000 (17:39 +0100)
committerdanielhanrahan <daniel.hanrahan@est.tech>
Thu, 13 Feb 2025 10:20:00 +0000 (10:20 +0000)
To improve reliability and consistency of healthchecks in
k6 and CSIT tests, they are implemented in docker-compose.
This commit adds fail-fast logic, where k6 and CSITs will
abort immediately if the containers won't start.

Implementation:
- Add healthchecks for docker containers used in tests.
- Change k6 & CSIT tests to use docker healthchecks.
- Tests will abort if containers are not healthy.
- Start-up timeout for CPS containers is 90 seconds
- Start-up timeout for other containers is 60 seconds

Other Improvements:
- Add --quiet-pull option to suppress junk output in Jenkins logs.
- Add kpi.env file containing environment variables for KPI pipeline,
  just like endurance.env. This allows same code to run either suite.
- Changed from port range to port number for Postgres exporter, since
  only a single instance runs, to be consistent with other containers

Issue-ID: CPS-2630
Signed-off-by: danielhanrahan <daniel.hanrahan@est.tech>
Change-Id: I50929ca4061bb844fca87a0d6c3103aaa2c45e0b

csit/plans/cps/setup.sh
docker-compose/docker-compose.yml
docker-compose/env/endurance.env [moved from docker-compose/config/endurance.env with 90% similarity]
docker-compose/env/kpi.env [new file with mode: 0644]
k6-tests/setup.sh

index 00ed52a..332be8c 100755 (executable)
@@ -1,6 +1,11 @@
 #!/bin/bash
 #
 # Copyright 2016-2017 Huawei Technologies Co., Ltd.
+# Modifications copyright (c) 2017 AT&T Intellectual Property
+# Modifications copyright (c) 2020-2021 Samsung Electronics Co., Ltd.
+# Modifications Copyright (C) 2021 Pantheon.tech
+# Modifications Copyright (C) 2021 Bell Canada.
+# Modifications Copyright (C) 2021-2025 Nordix Foundation.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-# Modifications copyright (c) 2017 AT&T Intellectual Property
-# Modifications copyright (c) 2020-2021 Samsung Electronics Co., Ltd.
-# Modifications Copyright (C) 2021 Pantheon.tech
-# Modifications Copyright (C) 2021 Bell Canada.
-# Modifications Copyright (C) 2021-2024 Nordix Foundation.
-#
 # Branched from ccsdk/distribution to this repository Feb 23, 2021
 #
 
-check_health()
-{
-  TIME_OUT=120
-  INTERVAL=5
-  TICKER=0
-
-  while [ "$TICKER" -le "$TIME_OUT" ]; do
-
-    RESPONSE=$(curl --location --request GET 'http://'$1'/actuator/health/readiness')
-
-    if [[ "$RESPONSE" == *"UP"* ]]; then
-      echo "$2 started in $TICKER"
-      break;
-    fi
-
-    sleep $INTERVAL
-    TICKER=$((TICKER + INTERVAL))
-
-  done
-
-  if [ "$TICKER" -ge "$TIME_OUT" ]; then
-    echo TIME OUT: $2 session not started in $TIME_OUT seconds... Could cause problems for testing activities...
-  fi
-}
-
 ###################### setup env ############################
 # Set env variables for docker compose
 export LOCAL_IP=$((ip -4 addr show docker0 | grep -Po 'inet \K[\d.]+') || hostname -I | awk '{print $1}')
@@ -58,8 +32,8 @@ export $(cut -d= -f1 $WORKSPACE/plans/cps/test.properties)
 ###################### setup cps-ncmp ############################
 cd $CPS_HOME/docker-compose
 
-# start CPS/NCMP, DMI Plugin, and PostgreSQL containers with docker compose
-docker-compose --profile dmi-service up -d
+# start CPS/NCMP, DMI Plugin, and PostgreSQL containers with docker compose, waiting for all containers to be healthy
+docker-compose --profile dmi-service up -d --quiet-pull --wait || exit 1
 
 ###################### setup sdnc #######################################
 source $WORKSPACE/plans/cps/sdnc/sdnc_setup.sh
@@ -67,14 +41,6 @@ source $WORKSPACE/plans/cps/sdnc/sdnc_setup.sh
 ###################### setup pnfsim #####################################
 docker-compose -f $WORKSPACE/plans/cps/pnfsim/docker-compose.yml up -d
 
-###################### verify ncmp-cps health ##########################
-
-check_health $CPS_CORE_HOST:$CPS_CORE_PORT 'cps-ncmp'
-
-###################### verify dmi health ##########################
-
-check_health $DMI_HOST:$DMI_PORT 'dmi-plugin'
-
 ###################### ROBOT Configurations ##########################
 # Pass variables required for Robot test suites in ROBOT_VARIABLES
 ROBOT_VARIABLES="-v CPS_CORE_HOST:$CPS_CORE_HOST -v CPS_CORE_PORT:$CPS_CORE_PORT -v DMI_HOST:$LOCAL_IP -v DMI_PORT:$DMI_PORT -v DMI_VERSION:$DMI_VERSION -v DMI_CSIT_STUB_HOST:$LOCAL_IP -v DMI_CSIT_STUB_PORT:$DMI_DEMO_STUB_PORT -v DMI_AUTH_ENABLED:$DMI_AUTH_ENABLED -v DATADIR_CPS_CORE:$WORKSPACE/data/cps-core -v DATADIR_NCMP:$WORKSPACE/data/ncmp -v DATADIR_SUBS_NOTIFICATION:$WORKSPACE/data/subscription-notification --exitonfailure"
index 8e42bc5..2747996 100644 (file)
 
 services:
 
-  ### docker-compose --profile dmi-service up -d -> run CPS services incl. dmi-plugin ###
+  ### docker-compose --profile dmi-service up -d --wait -> run CPS services incl. dmi-plugin
   ### docker-compose --profile dmi-stub --profile monitoring up -d -> run CPS with stubbed dmi-plugin (for registration performance testing)
   ### docker-compose --profile dmi-stub --profile tracing up -d -> run CPS with stubbed dmi-plugin (for open telemetry tracing testing make ONAP_TRACING_ENABLED "true" later "http://localhost:16686" can be accessed from browser)
   ### docker-compose --profile dmi-stub --profile policy-executor-stub up -d -> run CPS with stubbed dmi-plugin and policy executor stub (for policy executor service testing make POLICY_SERVICE_ENABLED "true")
-  ### to disable notifications make notification.enabled to false & comment out kafka/zookeeper services ###
+  ### to disable notifications make notification.enabled to false & comment out kafka/zookeeper services
   ### DEBUG: Look for '### DEBUG' comments to enable CPS-NCMP debugging
-  ### docker-compose --profile dmi-stub --project-name endurance --env-file config/endurance.env up -d -> run CPS with stubbed dmi-plugin for endurance testing
+  ### docker-compose --profile dmi-stub --project-name endurance --env-file env/endurance.env up -d -> run CPS with stubbed dmi-plugin for endurance testing
   ### docker-compose --profile dmi-stub --project-name endurance down --volumes
 
   dbpostgresql:
@@ -46,6 +46,13 @@ services:
         limits:
           cpus: '6'
           memory: 3G
+    healthcheck:
+      test: pg_isready || exit 1 # This command runs inside the container, returning 0 for success, non-zero for failure.
+      timeout: 10s               # Time-out of the above test command.
+      interval: 10s              # How often the health is run.
+      retries: 3                 # If 3 health checks fail, the container is unhealthy.
+      start_period: 30s          # Ignore failed health checks for first 30 seconds, to give system time to start
+      # Full start up time allowed = 30 seconds start period + 3 tries * 10 seconds interval = 60 seconds
 
   cps-and-ncmp:
     image: ${DOCKER_REPO:-nexus3.onap.org:10003}/onap/cps-and-ncmp:${CPS_VERSION:-latest}
@@ -71,7 +78,7 @@ services:
       CPS_MONITORING_MICROMETER_JVM_EXTRAS: 'true'
       JAVA_TOOL_OPTIONS: "-XX:InitialRAMPercentage=70.0 -XX:MaxRAMPercentage=70.0"
       ### DEBUG: Uncomment next line to enable java debugging
-      ### JAVA_TOOL_OPTIONS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
+      # JAVA_TOOL_OPTIONS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
     restart: unless-stopped
     depends_on:
       - dbpostgresql
@@ -83,6 +90,12 @@ services:
           cpus: '3'
           memory: 3G
     memswap_limit: 3G
+    healthcheck:
+      test: wget -q -O - http://localhost:8080/actuator/health/readiness | grep -q '{"status":"UP"}' || exit 1
+      interval: 10s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
 
   nginx:
     container_name: ${NGINX_CONTAINER_NAME:-nginx-loadbalancer}
@@ -94,6 +107,12 @@ services:
     volumes:
       - ./config/nginx/nginx.conf:/etc/nginx/nginx.conf
       - ./config/nginx/proxy_params:/etc/nginx/proxy_params
+    healthcheck:
+      test: curl -fs http://localhost/actuator/health/readiness || exit 1
+      interval: 10s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
 
   ### if kafka is not required comment out zookeeper and kafka ###
   zookeeper:
@@ -103,6 +122,12 @@ services:
       - ${ZOOKEEPER_PORT:-2181}:2181
     environment:
       ZOOKEEPER_CLIENT_PORT: 2181
+    healthcheck:
+      test: nc -z localhost 2181 || exit 1
+      interval: 10s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
 
   kafka:
     image: confluentinc/cp-kafka:7.8.0
@@ -117,6 +142,12 @@ services:
       KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,CONNECTIONS_FROM_HOST:PLAINTEXT
       KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,CONNECTIONS_FROM_HOST://localhost:9092
       KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+    healthcheck:
+      test: kafka-topics --bootstrap-server kafka:29092 --list || exit 1
+      interval: 10s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
 
   ncmp-dmi-plugin:
     container_name: ${NCMP_DMI_PLUGIN_CONTAINER_NAME:-ncmp-dmi-plugin}
@@ -142,6 +173,12 @@ services:
     restart: unless-stopped
     profiles:
       - dmi-service
+    healthcheck:
+      test: wget -q -O - http://localhost:8080/actuator/health/readiness | grep -q '{"status":"UP"}' || exit 1
+      interval: 10s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
 
   ncmp-dmi-plugin-demo-and-csit-stub:
     container_name: ${NCMP_DMI_PLUGIN_DEMO_AND_CSIT_STUB_CONTAINER_NAME:-ncmp-dmi-plugin-demo-and-csit-stub}
@@ -161,6 +198,12 @@ services:
     profiles:
       - dmi-stub
       - dmi-service
+    healthcheck:
+      test: wget -q -O - http://localhost:8092/actuator/health/readiness | grep -q '{"status":"UP"}' || exit 1
+      interval: 10s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
 
   policy-executor-stub:
     container_name: ${POLICY_EXECUTOR_STUB_CONTAINER_NAME:-policy-executor-stub}
@@ -170,6 +213,7 @@ services:
     restart: unless-stopped
     profiles:
       - policy-executor-stub
+    # Note policy-executor-stub does not have a healthcheck as it does not expose /actuator/health endpoint
 
   prometheus:
     container_name: ${PROMETHEUS_CONTAINER_NAME:-prometheus}
@@ -182,11 +226,6 @@ services:
       - prometheus_data:/prometheus
     environment:
       - PROMETHEUS_RETENTION_TIME=${PROMETHEUS_RETENTION_TIME:-30d}
-    healthcheck:
-      test: [ "CMD-SHELL", "wget --spider --quiet --tries=1 --timeout=10 http://localhost:9090/-/healthy || exit 1" ]
-      interval: 30s
-      timeout: 10s
-      retries: 3
     profiles:
       - monitoring
 
@@ -194,8 +233,7 @@ services:
     image: grafana/grafana:latest
     container_name: ${GRAFANA_CONTAINER_NAME:-grafana}
     depends_on:
-      prometheus:
-        condition: service_started
+      - prometheus
     ports:
       - ${GRAFANA_PORT:-3000}:3000
     volumes:
@@ -234,11 +272,12 @@ services:
       - tracing
 
   postgres-exporter:
+    container_name: ${POSTGRES_EXPORTER_CONTAINER_NAME:-postgres-exporter}
     image: quay.io/prometheuscommunity/postgres-exporter
     environment:
       - DATA_SOURCE_NAME=postgresql://${DB_USERNAME:-cps}:${DB_PASSWORD:-cps}@${DB_CONTAINER_NAME:-dbpostgresql}:5432/postgres?sslmode=disable
     ports:
-      - ${POSTGRES_EXPORTER_PORT_RANGE:-9187-9188}:9187
+      - ${POSTGRES_EXPORTER_PORT:-9187}:9187
     depends_on:
       - dbpostgresql
 
similarity index 90%
rename from docker-compose/config/endurance.env
rename to docker-compose/env/endurance.env
index e46bd54..907c63a 100644 (file)
@@ -1,7 +1,8 @@
 DB_CONTAINER_NAME=endurance-dbpostgresql
 DB_PORT=5433
 
-POSTGRES_EXPORTER_PORT_RANGE=9187-9188
+POSTGRES_EXPORTER_CONTAINER_NAME=endurance-postgres-exporter
+POSTGRES_EXPORTER_PORT=9188
 
 NGINX_CONTAINER_NAME=endurance-nginx-loadbalancer
 CPS_CORE_PORT=8884
@@ -35,4 +36,4 @@ JAEGER_SERVICE_CONTAINER_NAME=endurance-jaeger-service
 JAEGER_SERVICE_PORT=16687
 
 CPS_NCMP_CACHES_CLUSTER_NAME=endurance-cps-and-ncmp-common-cache-cluster
-CPS_NCMP_INSTANCE_CONFIG_NAME=endurance-cps-and-ncmp-hazelcast-instance-config
\ No newline at end of file
+CPS_NCMP_INSTANCE_CONFIG_NAME=endurance-cps-and-ncmp-hazelcast-instance-config
diff --git a/docker-compose/env/kpi.env b/docker-compose/env/kpi.env
new file mode 100644 (file)
index 0000000..0fd8ef2
--- /dev/null
@@ -0,0 +1,39 @@
+DB_CONTAINER_NAME=kpi-dbpostgresql
+DB_PORT=5432
+
+POSTGRES_EXPORTER_CONTAINER_NAME=kpi-postgres-exporter
+POSTGRES_EXPORTER_PORT=9187
+
+NGINX_CONTAINER_NAME=kpi-nginx-loadbalancer
+CPS_CORE_PORT=8883
+CPS_PORT_RANGE=8698-8699
+
+ZOOKEEPER_CONTAINER_NAME=kpi-zookeeper
+ZOOKEEPER_PORT=2181
+
+KAFKA_CONTAINER_NAME=kpi-kafka
+KAFKA_PORT=9092
+
+NCMP_DMI_PLUGIN_CONTAINER_NAME=kpi-ncmp-dmi-plugin
+DMI_PORT=8783
+
+NCMP_DMI_PLUGIN_DEMO_AND_CSIT_STUB_CONTAINER_NAME=kpi-ncmp-dmi-plugin-demo-and-csit-stub
+DMI_DEMO_STUB_PORT=8784
+
+POLICY_EXECUTOR_STUB_CONTAINER_NAME=kpi-policy-executor-stub
+POLICY_EXECUTOR_STUB_PORT=8785
+
+PROMETHEUS_CONTAINER_NAME=kpi-prometheus
+PROMETHEUS_PORT=9090
+
+GRAFANA_CONTAINER_NAME=kpi-grafana
+GRAFANA_PORT=3000
+
+KAFKA_UI_CONTAINER_NAME=kpi-kafka-ui
+KAFKA_UI_PORT=8089
+
+JAEGER_SERVICE_CONTAINER_NAME=kpi-jaeger-service
+JAEGER_SERVICE_PORT=16686
+
+CPS_NCMP_CACHES_CLUSTER_NAME=kpi-cps-and-ncmp-common-cache-cluster
+CPS_NCMP_INSTANCE_CONFIG_NAME=kpi-cps-and-ncmp-hazelcast-instance-config
index c794c64..3a121cf 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright 2024 Nordix Foundation.
+# Copyright 2024-2025 Nordix Foundation.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 testProfile=$1
 echo "Spinning off the CPS and NCMP containers for $testProfile testing..."
 
-if [[ "$testProfile" == "endurance" ]]; then
-  docker-compose -f ../docker-compose/docker-compose.yml --profile dmi-stub --project-name "$testProfile" --env-file ../docker-compose/config/endurance.env up --quiet-pull -d
-  CONTAINER_IDS=$(docker ps --filter "name=endurance-cps-and-ncmp" --format "{{.ID}}")
-else
-  docker-compose -f ../docker-compose/docker-compose.yml --profile dmi-stub --project-name "$testProfile" up --quiet-pull -d
-  CONTAINER_IDS=$(docker ps --filter "name=kpi-cps-and-ncmp" --format "{{.ID}}")
-fi
+ENV_FILE="../docker-compose/env/${testProfile}.env"
+docker-compose \
+  --file "../docker-compose/docker-compose.yml" \
+  --env-file "$ENV_FILE" \
+  --project-name "$testProfile" \
+  --profile dmi-stub \
+  up --quiet-pull --detach --wait || exit 1
 
-echo "Waiting for CPS to start..."
-READY_MESSAGE="Inventory Model updated successfully"
-
-# Check the logs for each container
-for CONTAINER_ID in $CONTAINER_IDS; do
-    echo "Checking logs for container: $CONTAINER_ID"
-    docker logs "$CONTAINER_ID" -f | grep -m 1 "$READY_MESSAGE" >/dev/null && echo "CPS is ready in container: $CONTAINER_ID" || true
-done
-
-# Output build information including git commit info
 echo "Build information:"
 curl http://localhost:8883/actuator/info
 echo