[COMMON] Fix db-metrics readiness timeout issue 28/124028/4
authora.sreekumar <ajith.sreekumar@bell.ca>
Fri, 10 Sep 2021 13:41:30 +0000 (14:41 +0100)
committerSylvain Desbureaux <sylvain.desbureaux@orange.com>
Thu, 16 Sep 2021 11:51:48 +0000 (11:51 +0000)
DB connection from Policy Framework components fail intermittently with
Connection refused error. Upon investigation, identified that
mariadb-metrics readiness is failing with timeout, and thereby affecting
the db connectivity intermittently.

So, changing readiness timeout from 1 second to 5 seconds so that
there is enough time to get back the /metrics response
and readiness can pass. Also making the properties configurable.

Similar issue could happen in other components too.

Change-Id: I8dfbfeb0fe791c1bce373dd9d7124d26457c4919
Issue-ID: POLICY-3637
Signed-off-by: a.sreekumar <ajith.sreekumar@bell.ca>
kubernetes/common/mariadb-galera/templates/statefulset.yaml
kubernetes/common/mariadb-galera/values.yaml

index 9227e18..bb3af76 100644 (file)
@@ -1,5 +1,6 @@
 {{/*
-# Copyright © 2018 Amdocs, Bell Canada
+# Copyright © 2018 Amdocs
+# Copyright © 2018,2021 Bell Canada
 # Copyright © 2019 Samsung Electronics
 # Copyright © 2019-2020 Orange
 # Copyright © 2020 Bitnami
@@ -202,14 +203,20 @@ spec:
             httpGet:
               path: /metrics
               port: metrics
-            initialDelaySeconds: 30
-            timeoutSeconds: 5
+            initialDelaySeconds: {{ .Values.metrics.livenessProbe.initialDelaySeconds }}
+            periodSeconds: {{ .Values.metrics.livenessProbe.periodSeconds }}
+            timeoutSeconds: {{ .Values.metrics.livenessProbe.timeoutSeconds }}
+            successThreshold: {{ .Values.metrics.livenessProbe.successThreshold }}
+            failureThreshold: {{ .Values.metrics.livenessProbe.failureThreshold }}
           readinessProbe:
             httpGet:
               path: /metrics
               port: metrics
-            initialDelaySeconds: 5
-            timeoutSeconds: 1
+            initialDelaySeconds: {{ .Values.metrics.readinessProbe.initialDelaySeconds }}
+            periodSeconds: {{ .Values.metrics.readinessProbe.periodSeconds }}
+            timeoutSeconds: {{ .Values.metrics.readinessProbe.timeoutSeconds }}
+            successThreshold: {{ .Values.metrics.readinessProbe.successThreshold }}
+            failureThreshold: {{ .Values.metrics.readinessProbe.failureThreshold }}
           {{ include "common.containerSecurityContext" . | indent 10 | trim }}
           resources: {{- toYaml .Values.metrics.resources | nindent 12 }}
         {{- end }}
index ed9977a..bc9273f 100644 (file)
@@ -1,4 +1,5 @@
-# Copyright © 2018 Amdocs, Bell Canada
+# Copyright © 2018 Amdocs
+# Copyright © 2018,2021 Bell Canada
 # Copyright © 2019 Samsung Electronics
 # Copyright © 2020 Bitnami, Orange
 #
@@ -560,6 +561,23 @@ metrics:
     requests:
       cpu: 0.5
       memory: 256Mi
+  ## MariaDB Galera metrics container's liveness and readiness probes
+  ## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-probes
+  ##
+  livenessProbe:
+    enabled: true
+    initialDelaySeconds: 30
+    periodSeconds: 10
+    timeoutSeconds: 5
+    successThreshold: 1
+    failureThreshold: 3
+  readinessProbe:
+    enabled: true
+    initialDelaySeconds: 5
+    periodSeconds: 10
+    timeoutSeconds: 5
+    successThreshold: 1
+    failureThreshold: 3
   ## MySQL Prometheus exporter service parameters
   ##
   service: