Refactor Distributed Analytics project structure
[demo.git] / vnfs / DAaaS / sample-apps / training / sample-horovod-app / templates / config.yaml
diff --git a/vnfs/DAaaS/sample-apps/training/sample-horovod-app/templates/config.yaml b/vnfs/DAaaS/sample-apps/training/sample-horovod-app/templates/config.yaml
new file mode 100644 (file)
index 0000000..ae93c44
--- /dev/null
@@ -0,0 +1,130 @@
+{{- $workerNum := .Values.worker.number -}}
+{{- $name := include "horovod.fullname" . }}
+{{- $slots := 1 }}
+{{- if index .Values.resources "nvidia.com/gpu" }}
+{{- $slots := index .Values.resources "nvidia.com/gpu" }}
+{{- end }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ template "horovod.fullname" . }}
+  labels:
+    heritage: {{ .Release.Service | quote }}
+    release: {{ .Release.Name | quote }}
+    chart: {{ template "horovod.chart" . }}
+    app: {{ template "horovod.fullname" . }}
+data:
+  hostfile.config: |
+    {{ $name }}-master slots={{ $slots }}
+    {{- range $i, $none := until (int $workerNum) }}
+    {{ $name }}-{{ $i }}.{{ $name }} slots={{ $slots }}
+    {{- end }}
+  ssh.readiness: |
+    #!/bin/bash
+    set -xev
+    ssh localhost ls
+  master.run: |
+     #!/bin/bash
+     set -x
+     sleep 5
+
+     mkdir -p /root/.ssh
+     rm -f /root/.ssh/config
+     touch /root/.ssh/config
+
+     if [ "$USESECRETS" == "true" ];then
+        set +e
+        yes | cp /etc/secret-volume/id_rsa /root/.ssh/id_rsa
+        yes | cp /etc/secret-volume/authorized_keys /root/.ssh/authorized_keys
+        set -e
+     fi
+
+     if [ -n "$SSHPORT" ]; then
+        echo "Port $SSHPORT" > /root/.ssh/config
+        sed -i "s/^Port.*/Port $SSHPORT /g" /etc/ssh/sshd_config
+     fi
+     echo "StrictHostKeyChecking no" >> /root/.ssh/config
+     /usr/sbin/sshd
+
+     if [ $# -eq 0 ]; then
+          sleep infinity
+        else
+          bash -c "$*"
+     fi
+     sleep 300
+  master.waitWorkerReady: |
+    #!/bin/bash
+    set -xev
+    function updateSSHPort() {
+      mkdir -p /root/.ssh
+      rm -f /root/.ssh/config
+      touch /root/.ssh/config
+
+      if [ -n "$SSHPORT" ]; then
+        echo "Port $SSHPORT" > /root/.ssh/config
+        echo "StrictHostKeyChecking no" >> /root/.ssh/config
+      fi
+    }
+
+    function runCheckSSH() {
+      if [[ "$USESECRETS" == "true" ]];then
+        set +e
+        yes | cp /etc/secret-volume/id_rsa /root/.ssh/id_rsa
+        yes | cp /etc/secret-volume/authorized_keys /root/.ssh/authorized_keys
+        set -e
+      fi
+      
+      for i in `cat $1 | awk '{print $(1)}'`;do
+        if [[ "$i" != *"master" ]];then
+          retry 30 ssh -o ConnectTimeout=2 -q $i exit
+        fi
+      done
+    }
+
+    function retry()
+    {
+        local n=0;local try=$1
+        local cmd="${@: 2}"
+        [[ $# -le 1 ]] && {
+            echo "Usage $0 <retry_number> <Command>";
+        }
+        set +e
+        until [[ $n -ge $try ]]
+        do
+          $cmd && break || {
+                  echo "Command Fail.."
+                  ((n++))
+                  echo "retry $n :: [$cmd]"
+                  sleep 1;
+                  }
+        done
+        $cmd
+        if [ $? -ne 0 ]; then
+          exit 1
+        fi
+        set -e   
+    }
+    updateSSHPort
+    runCheckSSH $1
+  worker.run: |
+     #!/bin/bash
+     set -x
+     
+     mkdir -p /root/.ssh
+     rm -f /root/.ssh/config
+     touch /root/.ssh/config
+
+     if [[ "$USESECRETS" == "true" ]];then
+        set +e
+        yes | cp /etc/secret-volume/id_rsa /root/.ssh/id_rsa
+        yes | cp /etc/secret-volume/authorized_keys /root/.ssh/authorized_keys
+        set -e
+     fi
+
+     if [ -n "$SSHPORT" ]; then
+        echo "Port $SSHPORT" > /root/.ssh/config
+        sed -i "s/^Port.*/Port $SSHPORT /g" /etc/ssh/sshd_config
+     fi
+     echo "StrictHostKeyChecking no" >> /root/.ssh/config
+
+     /usr/sbin/sshd -D