Adding SRIOV Network Device Plugin to KuD 60/96160/16
authorAkhila Kishore <akhila.kishore@intel.com>
Mon, 23 Sep 2019 21:57:43 +0000 (14:57 -0700)
committerAkhila Kishore <akhila.kishore@intel.com>
Tue, 22 Oct 2019 03:45:10 +0000 (20:45 -0700)
Integrating SRIOV as an add-on to KuD. A device
should have X700 series NIC for this Add-on to work.
Getting the device driver, build and installing it is
a part of this patch. Followed by running the SRIOV CNI
Daemonset, and NetworkAttachmentDefinition.
Reworked the way SRIOV check happens.
Previously ran on installer.sh.
Now the script is injected into kube-nodes and playbook will run
only if the hardware check is true by creating a conf file.
Removed unwanted comments and nit changes.

Signed-off-by: Akhila Kishore <akhila.kishore@intel.com>
Issue-ID: MULTICLOUD-832
Change-Id: I1701a50bc717ddca0d332d6a42d329eaf4c03820

kud/deployment_infra/images/sriov-cni.yml [new file with mode: 0644]
kud/deployment_infra/images/sriov-daemonset.yml [new file with mode: 0644]
kud/deployment_infra/playbooks/configure-sriov.yml [new file with mode: 0644]
kud/deployment_infra/playbooks/install_iavf_drivers.sh [new file with mode: 0755]
kud/deployment_infra/playbooks/kud-vars.yml
kud/deployment_infra/playbooks/preconfigure-sriov.yml [new file with mode: 0644]
kud/deployment_infra/playbooks/sriov-nad.yml [new file with mode: 0644]
kud/deployment_infra/playbooks/sriov_hardware_check.sh [new file with mode: 0644]
kud/hosting_providers/vagrant/clean_sriov.sh [new file with mode: 0644]
kud/hosting_providers/vagrant/installer.sh
kud/tests/sriov.sh [new file with mode: 0755]

diff --git a/kud/deployment_infra/images/sriov-cni.yml b/kud/deployment_infra/images/sriov-cni.yml
new file mode 100644 (file)
index 0000000..bd943d0
--- /dev/null
@@ -0,0 +1,45 @@
+# SRIOV-CNI Release v1
+# Based on:
+# https://github.com/intel/sriov-cni/blob/master/images/sriov-cni-daemonset.yaml
+---
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: kube-sriov-cni-ds-amd64
+  namespace: kube-system
+  labels:
+    tier: node
+    app: sriov-cni
+spec:
+  template:
+    metadata:
+      labels:
+        tier: node
+        app: sriov-cni
+    spec:
+      hostNetwork: true
+      nodeSelector:
+        beta.kubernetes.io/arch: amd64
+      tolerations:
+      - key: node-role.kubernetes.io/master
+        operator: Exists
+        effect: NoSchedule
+      containers:
+      - name: kube-sriov-cni
+        image: nfvpe/sriov-cni
+        securityContext:
+          privileged: true
+        resources:
+          requests:
+            cpu: "100m"
+            memory: "50Mi"
+          limits:
+            cpu: "100m"
+            memory: "50Mi"
+        volumeMounts:
+        - name: cnibin
+          mountPath: /host/opt/cni/bin
+      volumes:
+        - name: cnibin
+          hostPath:
+            path: /opt/cni/bin
diff --git a/kud/deployment_infra/images/sriov-daemonset.yml b/kud/deployment_infra/images/sriov-daemonset.yml
new file mode 100644 (file)
index 0000000..1edbc6c
--- /dev/null
@@ -0,0 +1,82 @@
+# SRIOV device CNI plugin
+# Based on:
+# https://github.com/intel/sriov-network-device-plugin/blob/master/images/sriovdp-daemonset.yaml
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: sriovdp-config
+  namespace: kube-system
+data:
+  config.json: |
+    {
+      "resourceList": [{
+         "resourceName": "intel_sriov_700",
+         "selectors": {
+            "vendors": ["8086"]
+            "devices": ["37cd"]
+         }
+       }]
+    }
+
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: sriov-device-plugin
+  namespace: kube-system
+
+---
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: kube-sriov-device-plugin-amd64
+  namespace: kube-system
+  labels:
+    tier: node
+    app: sriovdp
+spec:
+  template:
+    metadata:
+      labels:
+        tier: node
+        app: sriovdp
+    spec:
+      hostNetwork: true
+      hostPID: true
+      nodeSelector:
+        beta.kubernetes.io/arch: amd64
+      tolerations:
+      - key: node-role.kubernetes.io/master
+        operator: Exists
+        effect: NoSchedule
+      serviceAccountName: sriov-device-plugin
+      containers:
+      - name: kube-sriovdp
+        image: nfvpe/sriov-device-plugin
+        args:
+        - --log-dir=sriovdp
+        - --log-level=10
+        securityContext:
+          privileged: true
+        volumeMounts:
+        - name: devicesock
+          mountPath: /var/lib/kubelet/
+          readOnly: false
+        - name: log
+          mountPath: /var/log
+        - name: config-volume
+          mountPath: /etc/pcidp
+      volumes:
+        - name: devicesock
+          hostPath:
+            path: /var/lib/kubelet/
+        - name: log
+          hostPath:
+            path: /var/log
+        - name: config-volume
+          configMap:
+            name: sriovdp-config
+            items:
+            - key: config.json
+              path: config.json
diff --git a/kud/deployment_infra/playbooks/configure-sriov.yml b/kud/deployment_infra/playbooks/configure-sriov.yml
new file mode 100644 (file)
index 0000000..8ba6cf4
--- /dev/null
@@ -0,0 +1,36 @@
+---
+# SPDX-license-identifier: Apache-2.0
+##############################################################################
+# Copyright (c) 2018
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+- import_playbook: preconfigure-sriov.yml
+
+- hosts: localhost
+  become: yes
+  pre_tasks:
+  - block:
+    - name: "End play if SRIOV is False"
+      debug:
+        msg: "SRIOV option not available, ending play"
+    - meta: end_play
+    when: SRIOV_NODE == "False"
+  tasks:
+    - debug:
+        var: SRIOV_NODE
+    - name: Apply Multus
+      shell: "/usr/local/bin/kubectl apply -f {{ playbook_dir }}/../images/multus-daemonset.yml"
+      when: SRIOV_NODE==True
+    - name: Apply SRIOV CNI
+      shell: "/usr/local/bin/kubectl apply -f {{ playbook_dir }}/../images/sriov-cni.yaml"
+      when: SRIOV_NODE==True
+    - name: Apply SRIOV DaemonSet
+      shell: "/usr/local/bin/kubectl apply -f {{ playbook_dir }}/../images/sriov-daemonset.yaml"
+      when: SRIOV_NODE==True
+    - name: Apply SRIOV Network Attachment definition
+      shell: "/usr/local/bin/kubectl apply -f {{ playbook_dir }}/sriov-nad.yml"
+      when: SRIOV_NODE==True
diff --git a/kud/deployment_infra/playbooks/install_iavf_drivers.sh b/kud/deployment_infra/playbooks/install_iavf_drivers.sh
new file mode 100755 (executable)
index 0000000..d44483d
--- /dev/null
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+# Based on:
+# https://gerrit.akraino.org/r/#/c/icn/+/1359/1/deploy/kud-plugin-addons/device-plugins/sriov/driver/install_iavf_drivers.sh
+
+function install_iavf_driver {
+    local ifname=$1
+
+    echo "Installing modules..."
+    echo "Installing i40evf blacklist file..."
+    mkdir -p "/etc/modprobe.d/"
+    echo "blacklist i40evf" > "/etc/modprobe.d/iavf-blacklist-i40evf.conf"
+
+    kver=`uname -a | awk '{print $3}'`
+    install_mod_dir=/lib/modules/$kver/updates/drivers/net/ethernet/intel/iavf/
+    echo "Installing driver in $install_mod_dir"
+    mkdir -p $install_mod_dir
+    cp iavf.ko $install_mod_dir
+
+    echo "Installing kernel module i40evf..."
+    depmod -a
+    modprobe i40evf
+    modprobe iavf
+
+    echo "Enabling VF on interface $ifname..."
+    echo "/sys/class/net/$ifname/device/sriov_numvfs"
+    echo '8' > /sys/class/net/$ifname/device/sriov_numvfs
+}
+
+function is_used {
+    local ifname=$1
+    route_info=`ip route show | grep $ifname`
+    if [ -z "$route_info" ]; then
+        return 0
+    else
+        return 1
+    fi
+}
+
+function get_sriov_ifname {
+    for net_device in /sys/class/net/*/ ; do
+        if [ -e $net_device/device/sriov_numvfs ] ; then
+            ifname=$(basename $net_device)
+            is_used $ifname
+            if [ "$?" = "0" ]; then
+                echo $ifname
+                return
+            fi
+        fi
+    done
+    echo ''
+}
+
+if [ $# -ne 1 ] ; then
+    ifname=$(get_sriov_ifname)
+    if [ -z "$ifname" ]; then
+        echo "Cannot find Nic with SRIOV support."
+    else
+        install_iavf_driver $ifname
+    fi
+else
+    ifname=$1
+    if [ ! -e /sys/class/net/$ifname/device/sriov_numvfs ] ; then
+        echo "${ifname} is not a valid sriov interface"
+    else
+        install_iavf_driver $ifname
+    fi
+fi
index a9910f8..316ec89 100644 (file)
@@ -39,6 +39,12 @@ istio_source_type: "tarball"
 istio_version: 1.0.3
 istio_url: "https://github.com/istio/istio/releases/download/{{ istio_version }}/istio-{{ istio_version }}-linux.tar.gz"
 
+sriov_dest: "{{ base_dest }}/sriov"
+driver_source_type: "tarball"
+driver_version: 3.7.34
+driver_url: "https://downloadmirror.intel.com/28943/eng/iavf-{{ driver_version }}.tar.gz"
+package: iavf-3.7.34
+
 go_version: '1.12.5'
 kubespray_version: 2.10.4
 helm_client_version: 2.9.1
diff --git a/kud/deployment_infra/playbooks/preconfigure-sriov.yml b/kud/deployment_infra/playbooks/preconfigure-sriov.yml
new file mode 100644 (file)
index 0000000..c4276e1
--- /dev/null
@@ -0,0 +1,116 @@
+---
+# SPDX-license-identifier: Apache-2.0
+##############################################################################
+# Copyright (c) 2018
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+- hosts: kube-node
+  become: yes
+  pre_tasks:
+    - name: Create SRIOV driver folder in the target destination
+      file:
+        state: directory
+        path: "{{ item }}"
+      with_items:
+        - sriov
+    - copy:
+        src: "{{ playbook_dir }}/sriov_hardware_check.sh"
+        dest: sriov
+    - name: Changing perm of "sh", adding "+x"
+      shell: "chmod +x sriov_hardware_check.sh"
+      args:
+        chdir: "sriov"
+        warn: False
+    - name: Register SRIOV
+      shell: "echo {{ SRIOV | default(False) }}"
+    - name: Run the script and Re-evaluate the variable
+      command: sriov/sriov_hardware_check.sh
+      register: output
+    - set_fact:
+        SRIOV: "{{ output.stdout }}"
+    - name: Recreate the conf file for every host
+      file:
+        path: /tmp/sriov.conf
+        state: absent
+      delegate_to: localhost
+    - lineinfile : >
+       dest=/tmp/sriov.conf
+       create=yes
+       line='{{SRIOV}}'
+      delegate_to: localhost
+    - name: Clean the script and folder.
+      file:
+        path: sriov
+        state: absent
+
+# Run the following task only if the SRIOV is set to True
+# i.e when SRIOV hardware is available
+- hosts: localhost
+  become: yes
+  pre_tasks:
+    - name: Read SRIOV value from the conf file.
+      command: cat /tmp/sriov.conf
+      register: installer_output
+      become: yes
+    - set_fact:
+        SRIOV_NODE: "{{ installer_output.stdout }}"
+    - meta: end_play
+      when: SRIOV_NODE == "False"
+    - name: Load kud variables
+      include_vars:
+        file: kud-vars.yml
+      when: SRIOV_NODE == "True"
+  tasks:
+    - name: Create sriov folder
+      file:
+        state: directory
+        path: "{{ sriov_dest }}"
+      when: SRIOV_NODE == "True"
+      ignore_errors: yes
+    - name: Get SRIOV compatible driver
+      get_url: "url={{ driver_url }}  dest=/tmp/{{ package }}.tar.gz"
+      when: SRIOV_NODE == "True"
+    - name: Extract sriov source code
+      unarchive:
+         src: "/tmp/{{ package }}.tar.gz"
+         dest: "{{ sriov_dest }}"
+      when: SRIOV_NODE == "True"
+    - name: Build the default target
+      make:
+        chdir: "/tmp/sriov/{{ package }}/src"
+      become: yes
+      when: SRIOV_NODE == "True"
+# Copy all the driver and install script into target node
+- hosts: kube-node
+  become: yes
+  pre_tasks:
+    - name: Load kud variables
+      include_vars:
+        file: kud-vars.yml
+      when: SRIOV == "True"
+  tasks:
+    - name: create SRIOV driver folder in the target destination
+      file:
+        state: directory
+        path: "{{ item }}"
+      with_items:
+        - sriov_driver
+      when: SRIOV == "True"
+    - name: Copy SRIOV driver to target destination
+      command: "cp {{ sriov_dest }}/{{ package }}/src/iavf.ko /root/sriov_driver/"
+      when: SRIOV == "True"
+    - name: Copy SRIOV driver install script to target folder
+      command: "cp {{ playbook_dir }}/install_iavf_drivers.sh /root/sriov_driver/install.sh"
+      when: SRIOV == "True"
+    - name: Changing perm of "install.sh", adding "+x"
+      file: dest=/root/sriov_driver/install.sh mode=a+x
+      when: SRIOV == "True"
+    - name: Run a script with arguments
+      shell: ./install.sh
+      args:
+        chdir: "/root/sriov_driver"
+      when: SRIOV == "True"
diff --git a/kud/deployment_infra/playbooks/sriov-nad.yml b/kud/deployment_infra/playbooks/sriov-nad.yml
new file mode 100644 (file)
index 0000000..7670b70
--- /dev/null
@@ -0,0 +1,19 @@
+apiVersion: "k8s.cni.cncf.io/v1"
+kind: NetworkAttachmentDefinition
+metadata:
+  name: sriov-eno2
+  annotations:
+    k8s.v1.cni.cncf.io/resourceName: intel.com/intel_sriov_700
+spec:
+  config: '{
+    "type": "sriov",
+    "cniVersion": "0.3.1",
+    "ipam": {
+            "type": "host-local",
+            "subnet": "10.56.206.0/24",
+            "routes": [
+                    { "dst": "0.0.0.0/0" }
+            ],
+            "gateway": "10.56.206.1"
+    }
+  }'
diff --git a/kud/deployment_infra/playbooks/sriov_hardware_check.sh b/kud/deployment_infra/playbooks/sriov_hardware_check.sh
new file mode 100644 (file)
index 0000000..ea1b7b0
--- /dev/null
@@ -0,0 +1,26 @@
+#!/bin/bash
+# SPDX-license-identifier: Apache-2.0
+##############################################################################
+# Copyright (c) 2018
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+set -o pipefail
+
+source /etc/environment
+
+ethernet_adpator_version=$( lspci | grep "Ethernet Controller X710" | head -n 1 | cut -d " " -f 8 )
+if [ -z "$ethernet_adpator_version" ]; then
+    echo "False"
+    exit 0
+fi
+SRIOV_ENABLED=${ethernet_adpator_version:-"false"}
+#checking for the right hardware version of NIC on the machine
+if [ "$ethernet_adpator_version" == "X710" ]; then
+    echo "True"
+else
+    echo "False"
+fi
diff --git a/kud/hosting_providers/vagrant/clean_sriov.sh b/kud/hosting_providers/vagrant/clean_sriov.sh
new file mode 100644 (file)
index 0000000..76b8a96
--- /dev/null
@@ -0,0 +1,16 @@
+#!/bin/bash
+# SPDX-license-identifier: Apache-2.0
+##############################################################################
+# Copyright (c) 2018
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+modprobe -r iavf
+kver=`uname -a | awk '{print $3}'`
+rm -rf /lib/modules/$kver/updates/drivers/net/ethernet/intel/iavf/iavf.ko
+depmod -a
+sudo rm -rf /tmp/sriov
+sudo rm -rf iavf-3.7.34.tar.gz
index 41b21f6..9402352 100755 (executable)
@@ -159,6 +159,13 @@ function install_addons {
             popd
         fi
     done
+    ansible-playbook $verbose -i $kud_inventory $kud_playbooks/configure-sriov.yml | sudo tee $log_folder/setup-sriov.log
+        if [[ "${testing_enabled}" == "true" ]]; then
+            pushd $kud_tests
+            bash sriov.sh
+            popd
+        fi
+    echo "Add-ons deployment complete..."
 }
 
 # install_plugin() - Install ONAP Multicloud Kubernetes plugin
@@ -229,11 +236,9 @@ kud_playbooks=$kud_infra_folder/playbooks
 kud_tests=$kud_folder/../../tests
 k8s_info_file=$kud_folder/k8s_info.log
 testing_enabled=${KUD_ENABLE_TESTS:-false}
-
 sudo mkdir -p $log_folder
 sudo mkdir -p /opt/csar
 sudo chown -R $USER /opt/csar
-
 # Install dependencies
 # Setup proxy variables
 if [ -f $kud_folder/sources.list ]; then
diff --git a/kud/tests/sriov.sh b/kud/tests/sriov.sh
new file mode 100755 (executable)
index 0000000..c66f5db
--- /dev/null
@@ -0,0 +1,72 @@
+#!/bin/bash
+# SPDX-license-identifier: Apache-2.0
+##############################################################################
+# Copyright (c) 2018
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+set -o pipefail
+
+ethernet_adpator_version=$( lspci | grep "Ethernet Controller X710" | head -n 1 | cut -d " " -f 8 )
+if [ -z "$ethernet_adpator_version" ]; then
+    echo " Ethernet adapator version is not set. SRIOV test case cannot run on this machine"
+    exit 0
+fi
+#checking for the right hardware version of NIC on the machine
+if [ $ethernet_adpator_version == "X710" ]; then
+    echo "NIC card specs match. SRIOV option avaiable for this version."
+else
+    echo -e "Failed. The version supplied does not match.\nTest cannot be executed."
+    exit 0
+fi
+
+pod_name=pod-case-01
+rm -f $HOME/$pod_name.yaml
+kubectl delete pod $pod_name --ignore-not-found=true --now --wait
+allocated_node_resource=$(kubectl describe node | grep "intel.com/intel_sriov_700" | tail -n1 |awk '{print $(NF)}')
+
+echo "The allocated resource of the node is: " $allocated_node_resource
+cat << POD > $HOME/$pod_name.yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: pod-case-01
+  annotations:
+    k8s.v1.cni.cncf.io/networks: sriov-eno2
+spec:
+  containers:
+  - name: test-pod
+    image: docker.io/centos/tools:latest
+    command:
+    - /sbin/init
+    resources:
+      requests:
+        intel.com/intel_sriov_700: '1'
+      limits:
+        intel.com/intel_sriov_700: '1'
+POD
+kubectl create -f $HOME/$pod_name.yaml --validate=false
+    for pod in $pod_name; do
+        status_phase=""
+        while [[ $status_phase != "Running" ]]; do
+            new_phase=$(kubectl get pods $pod | awk 'NR==2{print $3}')
+            if [[ $new_phase != $status_phase ]]; then
+                echo "$(date +%H:%M:%S) - $pod : $new_phase"
+                status_phase=$new_phase
+            fi
+            if [[ $new_phase == "Running" ]]; then
+                echo "Pod is up and running.."
+            fi
+            if [[ $new_phase == "Err"* ]]; then
+                exit 1
+            fi
+        done
+    done
+allocated_node_resource=$(kubectl describe node | grep "intel.com/intel_sriov_700" | tail -n1 |awk '{print $(NF)}')
+
+echo " The current resource allocation after the pod creation is: " $allocated_node_resource
+kubectl delete pod $pod_name --now
+echo "Test complete."