Fix issue with etcd pod startup
[oom.git] / kubernetes / common / etcd / templates / statefulset.yaml
index ccc6b69..7190c5b 100644 (file)
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 apiVersion: apps/v1beta1
 kind: StatefulSet
 metadata:
-  name: {{ include "common.servicename" .  }}
+  name: {{ include "common.fullname" .  }}
   labels:
     heritage: "{{ .Release.Service }}"
     release: "{{ .Release.Name }}"
     chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
-    app: {{ template "common.name" . }}
+    app: {{ include "common.name" . }}
 spec:
-  serviceName: {{ include "common.servicename" .  }}
+  serviceName: {{ include "common.servicename" .}}
   replicas: {{ .Values.replicaCount }}
   template:
     metadata:
@@ -45,7 +44,7 @@ spec:
 {{ toYaml .Values.tolerations | indent 8 }}
 {{- end }}
       containers:
-      - name: {{ include "common.servicename" .  }}
+      - name: {{ include "common.fullname" .  }}
         image: "{{ .Values.repository }}/{{ .Values.image }}"
         imagePullPolicy: "{{ .Values.pullPolicy }}"
         ports:
@@ -55,23 +54,20 @@ spec:
           name: {{ .Values.service.clientPortName }}
         {{- if eq .Values.liveness.enabled true }}
         livenessProbe:
-          exec:
-            command: ["/bin/sh", "-c", "etcdctl cluster-health | grep -w healthy" ]
-            initialDelaySeconds: {{ .Values.liveness.initialDelaySeconds }}
-            periodSeconds: {{ .Values.liveness.periodSeconds }}
-            timeoutSeconds: {{ .Values.liveness.timeoutSeconds }}
-          {{ end -}}
-        readinessProbe:
-          exec:
-            command: ["/bin/sh", "-c", "etcdctl cluster-health | grep -w healthy" ]
-            initialDelaySeconds: {{ .Values.readiness.initialDelaySeconds }}
-            periodSeconds: {{ .Values.readiness.periodSeconds }}
+          tcpSocket:
+            port: {{ .Values.service.clientInternalPort }}
+          initialDelaySeconds: {{ .Values.liveness.initialDelaySeconds }}
+          periodSeconds: {{ .Values.liveness.periodSeconds }}
+          timeoutSeconds: {{ .Values.liveness.timeoutSeconds }}
+        {{ end -}}
         resources:
 {{ include "common.resources" . | indent 10 }}
         env:
         - name: INITIAL_CLUSTER_SIZE
           value: {{ .Values.replicaCount | quote }}
         - name: SET_NAME
+          value: {{ include "common.fullname" . }}
+        - name: SERVICE_NAME
           value: {{ include "common.servicename" . }}
 {{- if .Values.extraEnv }}
 {{ toYaml .Values.extraEnv | indent 8 }}
@@ -85,13 +81,13 @@ spec:
                 - |
                   EPS=""
                   for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
-                      EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
+                      EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SERVICE_NAME}:2379"
                   done
 
                   HOSTNAME=$(hostname)
 
                   member_hash() {
-                      etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
+                      etcdctl member list | grep http://${HOSTNAME}.${SERVICE_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
                   }
 
                   SET_ID=${HOSTNAME##*[^0-9]}
@@ -113,43 +109,43 @@ spec:
             # store member id into PVC for later member replacement
             collect_member() {
                 while ! etcdctl member list &>/dev/null; do sleep 1; done
-                etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id
+                etcdctl member list | grep http://${HOSTNAME}.${SERVICE_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id
                 exit 0
             }
 
             eps() {
                 EPS=""
                 for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
-                    EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
+                    EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SERVICE_NAME}:2379"
                 done
                 echo ${EPS}
             }
 
             member_hash() {
-                etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
+                etcdctl member list | grep http://${HOSTNAME}.${SERVICE_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
             }
 
             # we should wait for other pods to be up before trying to join
             # otherwise we got "no such host" errors when trying to resolve other members
             for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
                 while true; do
-                    echo "Waiting for ${SET_NAME}-${i}.${SET_NAME} to come up"
-                    ping -W 1 -c 1 ${SET_NAME}-${i}.${SET_NAME} > /dev/null && break
+                    echo "Waiting for ${SET_NAME}-${i}.${SERVICE_NAME} to come up"
+                    ping -W 1 -c 1 ${SET_NAME}-${i}.${SERVICE_NAME} > /dev/null && break
                     sleep 1s
                 done
             done
 
             # re-joining after failure?
-            if [ -e /var/run/etcd/default.etcd ]; then
+            if [[ -e /var/run/etcd/default.etcd && -f /var/run/etcd/member_id ]]; then
                 echo "Re-joining etcd member"
                 member_id=$(cat /var/run/etcd/member_id)
 
                 # re-join member
-                ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SET_NAME}:2380 | true
+                ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SERVICE_NAME}:2380 | true
                 exec etcd --name ${HOSTNAME} \
                     --listen-peer-urls http://0.0.0.0:2380 \
                     --listen-client-urls http://0.0.0.0:2379\
-                    --advertise-client-urls http://${HOSTNAME}.${SET_NAME}:2379 \
+                    --advertise-client-urls http://${HOSTNAME}.${SERVICE_NAME}:2379 \
                     --data-dir /var/run/etcd/default.etcd
             fi
 
@@ -170,7 +166,7 @@ spec:
                 fi
 
                 echo "Adding new member"
-                etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SET_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs
+                etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SERVICE_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs
 
                 if [ $? -ne 0 ]; then
                     echo "Exiting"
@@ -186,37 +182,37 @@ spec:
                 exec etcd --name ${HOSTNAME} \
                     --listen-peer-urls http://0.0.0.0:2380 \
                     --listen-client-urls http://0.0.0.0:2379 \
-                    --advertise-client-urls http://${HOSTNAME}.${SET_NAME}:2379 \
+                    --advertise-client-urls http://${HOSTNAME}.${SERVICE_NAME}:2379 \
                     --data-dir /var/run/etcd/default.etcd \
-                    --initial-advertise-peer-urls http://${HOSTNAME}.${SET_NAME}:2380 \
+                    --initial-advertise-peer-urls http://${HOSTNAME}.${SERVICE_NAME}:2380 \
                     --initial-cluster ${ETCD_INITIAL_CLUSTER} \
                     --initial-cluster-state ${ETCD_INITIAL_CLUSTER_STATE}
             fi
 
             PEERS=""
             for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
-                PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SET_NAME}:2380"
+                PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SERVICE_NAME}:2380"
             done
 
             collect_member &
 
             # join member
             exec etcd --name ${HOSTNAME} \
-                --initial-advertise-peer-urls http://${HOSTNAME}.${SET_NAME}:2380 \
+                --initial-advertise-peer-urls http://${HOSTNAME}.${SERVICE_NAME}:2380 \
                 --listen-peer-urls http://0.0.0.0:2380 \
                 --listen-client-urls http://0.0.0.0:2379 \
-                --advertise-client-urls http://${HOSTNAME}.${SET_NAME}:2379 \
+                --advertise-client-urls http://${HOSTNAME}.${SERVICE_NAME}:2379 \
                 --initial-cluster-token etcd-cluster-1 \
                 --initial-cluster ${PEERS} \
                 --initial-cluster-state new \
                 --data-dir /var/run/etcd/default.etcd
         volumeMounts:
-        - name: {{ include "common.servicename" . }}-datadir
+        - name: {{ include "common.fullname" . }}-data
           mountPath: /var/run/etcd
   {{- if .Values.persistence.enabled }}
   volumeClaimTemplates:
   - metadata:
-      name: {{ include "common.servicename" . }}-data
+      name: {{ include "common.fullname" . }}-data
     spec:
       accessModes:
         - "{{ .Values.persistence.accessMode }}"
@@ -224,16 +220,10 @@ spec:
         requests:
           # upstream recommended max is 700M
           storage: "{{ .Values.persistence.storage }}"
-    {{- if .Values.persistence.storageClass }}
-    {{- if (eq "-" .Values.persistence.storageClass) }}
-      storageClassName: ""
-    {{- else }}
-      storageClassName: "{{ .Values.persistence.storageClass }}"
-    {{- end }}
-    {{- end }}
+      storageClassName: {{ include "common.fullname" . }}-data
   {{- else }}
       volumes:
-      - name: {{ include "common.servicename" . }}-datadir
+      - name: {{ include "common.fullname" . }}-data
       {{- if .Values.memoryMode }}
         emptyDir:
           medium: Memory