[MUSIC][CASSANDRA] Use Startup probes 96/121096/7
authorSylvain Desbureaux <sylvain.desbureaux@orange.com>
Wed, 5 May 2021 08:50:55 +0000 (10:50 +0200)
committerSylvain Desbureaux <sylvain.desbureaux@orange.com>
Wed, 12 May 2021 11:25:49 +0000 (11:25 +0000)
Instead of long initial delay on readiness and liveness probes, use
startup probes and be more aggressive on readiness and liveness.
Also, decrease number of replicas from 3 to 1

Issue-ID: OOM-2742
Signed-off-by: Sylvain Desbureaux <sylvain.desbureaux@orange.com>
Change-Id: Ideb0ede251332e182b975ff18ca5a75bcbff2351

kubernetes/common/music/components/music-cassandra/templates/job.yaml
kubernetes/common/music/components/music-cassandra/templates/statefulset.yaml
kubernetes/common/music/components/music-cassandra/values.yaml

index 3cf1ae3..d3c89d4 100644 (file)
@@ -39,8 +39,6 @@ spec:
         command:
         - /app/ready.py
         args:
-        - --timeout
-        - "{{ .Values.readinessTimeout }}"
         - --container-name
         - music-cassandra
         env:
@@ -87,4 +85,3 @@ spec:
       restartPolicy: Never
       imagePullSecrets:
       - name: "{{ include "common.namespace" . }}-docker-registry-key"
-
index 2a1fb4f..1aabfb6 100644 (file)
@@ -73,6 +73,17 @@ spec:
           timeoutSeconds: {{ .Values.readiness.timeoutSeconds }}
           successThreshold: {{ .Values.readiness.successThreshold }}
           failureThreshold: {{ .Values.readiness.failureThreshold }}
+        startupProbe:
+          exec:
+            command:
+            - /bin/bash
+            - -c
+            - nodetool status | grep $POD_IP | awk '$1!="UN" { exit 1; }'
+          initialDelaySeconds: {{ .Values.startup.initialDelaySeconds }}
+          periodSeconds: {{ .Values.startup.periodSeconds }}
+          timeoutSeconds: {{ .Values.startup.timeoutSeconds }}
+          successThreshold: {{ .Values.startup.successThreshold }}
+          failureThreshold: {{ .Values.startup.failureThreshold }}
         lifecycle:
           preStop:
             exec:
index 8530172..92ed723 100644 (file)
@@ -18,7 +18,7 @@ global:
   nodePortPrefix: 302
   persistence: {}
 
-replicaCount: 3
+replicaCount: 1
 
 # Cassandra Image - This image is modified from the original on
 # Docker Hub where the Security has been turned on.
@@ -72,8 +72,8 @@ cql:
 
 # probe configuration parameters
 liveness:
-  initialDelaySeconds: 120
-  periodSeconds: 20
+  initialDelaySeconds: 1
+  periodSeconds: 10
   timeoutSeconds: 10
   successThreshold: 1
   failureThreshold: 3
@@ -81,15 +81,20 @@ liveness:
   # in debugger so K8s doesn't restart unresponsive container
   enabled: true
 
-readinessTimeout: 240
-
 readiness:
-  initialDelaySeconds: 10
-  periodSeconds: 20
+  initialDelaySeconds: 1
+  periodSeconds: 10
   timeoutSeconds: 10
   successThreshold: 1
   failureThreshold: 3
 
+startup:
+  initialDelaySeconds: 10
+  periodSeconds: 10
+  timeoutSeconds: 10
+  successThreshold: 1
+  failureThreshold: 90
+
 podManagementPolicy: OrderedReady
 updateStrategy:
   type: OnDelete