From 835a9024d524a48e2875645882c24712c5d176f8 Mon Sep 17 00:00:00 2001 From: "halil.cakal" Date: Wed, 28 May 2025 12:15:37 +0100 Subject: [PATCH] Fix intermittent failures on-demand KPI job - On-demand KPI jobs fails frequently since cps-and-ncmp sometimes got "Connection refused" error that means the PostgreSQL isn't fully ready yet. - Give more time to db to start (210s), previously it was 30s - Change healthcheck interval from 10s to 2s - Reduce the timeout of healtchecks from 10s to 1s - Store db container logs to investigate further - cps-and-ncmp waits for db until its fully ready before make connection Issue-ID: CPS-2694 Change-Id: Ie6362741f98222eec58892734190b1ee0fff148e Signed-off-by: halil.cakal --- docker-compose/cps-base.yml | 14 ++++++++------ k6-tests/make-logs.sh | 42 +++++++++++++++++++----------------------- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/docker-compose/cps-base.yml b/docker-compose/cps-base.yml index 2391fb2441..5fcec499a9 100644 --- a/docker-compose/cps-base.yml +++ b/docker-compose/cps-base.yml @@ -47,10 +47,10 @@ services: memory: 3G healthcheck: test: pg_isready || exit 1 # This command runs inside the container, returning 0 for success, non-zero for failure. - timeout: 10s # Time-out of the above test command. - interval: 10s # How often the health is run. - retries: 3 # If 3 health checks fail, the container is unhealthy. - start_period: 30s # Ignore failed health checks for first 30 seconds, to give system time to start + timeout: 1s # Time-out of the above test command. + interval: 2s # How often the health is run. + retries: 100 # If 3 health checks fail, the container is unhealthy. + start_period: 210s # Ignore failed health checks for first 30 seconds, to give system time to start ### Full start-up time allowed = 30 seconds start period + 3 tries * 10 seconds interval = 60 seconds cps-and-ncmp-template: @@ -99,7 +99,8 @@ services: ports: - ${CPS_INSTANCE_0_REST_PORT:-8698}:8080 depends_on: - - dbpostgresql + dbpostgresql: + condition: service_healthy ### DEBUG: For easier debugging use just 1 instance and comment out below cps-and-ncmp-1: @@ -112,7 +113,8 @@ services: ports: - ${CPS_INSTANCE_1_REST_PORT:-8699}:8080 depends_on: - - dbpostgresql + dbpostgresql: + condition: service_healthy nginx: container_name: ${NGINX_CONTAINER_NAME:-nginx-loadbalancer} diff --git a/k6-tests/make-logs.sh b/k6-tests/make-logs.sh index 60976247e5..f3343b6de8 100644 --- a/k6-tests/make-logs.sh +++ b/k6-tests/make-logs.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright 2025 Nordix Foundation. +# Copyright 2025 OpenInfra Foundation Europe. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,30 +14,26 @@ # See the License for the specific language governing permissions and # limitations under the License. # - -SERVICE_NAME="cps-and-ncmp" +SERVICE_NAMES=("cps-and-ncmp" "dbpostgresql") TIMESTAMP=$(date +"%Y%m%d%H%M%S") LOG_DIR="${WORKSPACE:-.}/logs" -TEMP_DIR="$LOG_DIR/temp_$TIMESTAMP" -ZIP_FILE="$LOG_DIR/${SERVICE_NAME}_logs_$TIMESTAMP.zip" - mkdir -p "$LOG_DIR" -mkdir -p "$TEMP_DIR" - -# Store logs for cps-and-ncmp containers to temp directory -CONTAINER_IDS=$(docker ps --filter "name=$SERVICE_NAME" --format "{{.ID}}") -for CONTAINER_ID in $CONTAINER_IDS; do - CONTAINER_NAME=$(docker inspect --format="{{.Name}}" "$CONTAINER_ID" | sed 's/\///g') - LOG_FILE="$TEMP_DIR/${CONTAINER_NAME}_logs_$TIMESTAMP.log" - docker logs "$CONTAINER_ID" > "$LOG_FILE" +# Store logs for each service's containers and zip them individually +for SERVICE_NAME in "${SERVICE_NAMES[@]}"; do + TEMP_DIR="$LOG_DIR/temp_${SERVICE_NAME}_$TIMESTAMP" + ZIP_FILE="$LOG_DIR/logs_${SERVICE_NAME}_$TIMESTAMP.zip" + mkdir -p "$TEMP_DIR" + CONTAINER_IDS=$(docker ps --filter "name=$SERVICE_NAME" --format "{{.ID}}") + for CONTAINER_ID in $CONTAINER_IDS; do + CONTAINER_NAME=$(docker inspect --format="{{.Name}}" "$CONTAINER_ID" | sed 's/\///g') + LOG_FILE="$TEMP_DIR/${CONTAINER_NAME}_logs_$TIMESTAMP.log" + docker logs "$CONTAINER_ID" > "$LOG_FILE" + done + # Zip the logs for the current service + zip -r "$ZIP_FILE" "$TEMP_DIR" + echo "Logs for service $SERVICE_NAME saved to $ZIP_FILE" + # Clean temp files for the current service + rm -r "$TEMP_DIR" done - -# Zip the logs -zip -r "$ZIP_FILE" "$TEMP_DIR" -echo "Logs saved to $ZIP_FILE inside workspace" - -# Clean temp files -rm -r "$TEMP_DIR" - # Delete logs older than 2 weeks -find "$LOG_DIR" -name "${SERVICE_NAME}_logs_*.zip" -mtime +14 -delete +find "$LOG_DIR" -name "logs_*.zip" -mtime +14 -delete \ No newline at end of file -- 2.16.6