-#!/bin/sh
+#!/bin/bash
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
(-D|--delete-all)]
[-C|--clean-only]
- Usage 1 (simple heuristics - redeploy failed components):
+EXAMPLES
+
+ Usage 1: (simple heuristics - redeploy failed components):
${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs
- Usage 2 (redeploy ONLY explicit listed components):
- ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs \
+ Usage 2: (redeploy ONLY explicitly listed components):
+ ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs \\
-c onap-aaf -c onap-sdc -c onap-portal
- Usage 3 (delete EVERYTHING and redeploy):
- ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs \
- --delete-all
-
- Usage 4 (just clean - do not redeploy)
- ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs \
- --delete-all --clean-only
-
- Namespace argument and at least one override file are mandatory
- for this script to execute. Also you must provide path to the
- storage or explicitly request to not delete file storage of the
- component.
-
- Storage should be directory where persistent volume resides. It
- will work only if component created a persistent volume with the
- same filename as its release name. Otherwise no effect. The
- exception is when '--delete-all' is used - in that case all
- content of the storage is deleted (because ONAP is not consistent
- with the volume directory names - eg.: sdnc).
-
- CAUTION 1: filename of an override file cannot contain whitespace!
- This is actually helm/onap deploy plugin issue which does not
- handle such files. So I dropped the more complicated version of
- this script when there is no reason to support something on what
- will helm deploy choke anyway.
-
- '--prefix' option is helm release argument - it is actually prefix
- when you list the helm releases - helm is little confusing here.
-
- CAUTION 2: By default release prefix is 'onap' - if you deployed
- release 'onap' and now run this script with different prefix then
- it will skip all 'onap-*' components and will deploy a new release
- with new prefix - BEWARE TO USE PROPER RELEASE PREFIX!
-
- Timeout set the waiting time for helm deploy per component.
-
- '--component' references to release name of the chart which you
- want to redeploy excplicitly - otherwise 'ALL FAILED' components
- will be redeployed. You can target more than one component at once
- - just use the argument multiple times.
-
- Component option is mutually exclusive with the '--delete-all'
- which will delete all components - healthy or not. Actually it will
- delete the whole NAMESPACE and everything in it.
-
- '--clean-only' can be used with any usage: heuristics, explicit
- component list or with '--delete-all'. It basically just skips the
- last step - the actual redeploy.
+ Usage 3: (delete EVERYTHING and redeploy):
+ ${CMD} -n onap -f /some/override1.yml -s /dockerdata-nfs --delete-all
+
+ Usage 4: (delete EVERYTHING and DO NOT redeploy - clean env.)
+ ${CMD} -n onap -s /dockerdata-nfs --delete-all --clean-only
+
+NOTES
+
+ Namespace argument (always) and at least one override file (if you don't
+ use '--delete-all') are mandatory for this script to execute. Also you must
+ provide path to the storage ('--storage') OR explicitly request to not
+ delete file storage of the component ('--no-storage-deletion').
+
+ The storage should be a directory where persistent volume resides. It will
+ work only if the component created the persistent volume with the same
+ filename as its release name. Otherwise no files are deleted. The exception
+ is when '--delete-all' is used - in that case all content of the storage is
+ deleted (because ONAP is not consistent with the volume directory names
+ - e.g.: sdnc).
+
+ '--file' can be used multiple of times and it is used for override files
+ which are passed on to helm. The order is significant because if two
+ override files modify one value the latest one is used. This option is
+ ignored if '--clean-only' is used.
+
+ CAUTION 1: filename of an override file cannot contain whitespace! This is
+ actually helm/onap deploy plugin issue which does not handle such files. So
+ I dropped the more complicated version of this script when there is no
+ reason to support something on what will helm deploy choke anyway.
+
+ '--prefix' option is helm release argument - it is actually prefix when you
+ list the helm releases - helm is little confusing here.
+
+ CAUTION 2: By default release prefix is 'onap' - if you deployed release
+ 'onap' and now run this script with different prefix then it will skip all
+ 'onap-*' components and will deploy a new release with new prefix - BEWARE
+ TO USE PROPER RELEASE PREFIX!
+
+ Timeout sets the waiting time for helm deploy per component.
+
+ '--component' references to the release name of the chart which you want to
+ redeploy excplicitly - otherwise 'ALL FAILED' components will be
+ redeployed. You can target more than one component at once - just use the
+ argument multiple times.
+
+ Component option is mutually exclusive with the '--delete-all' which will
+ delete all components - healthy or not. Actually it will delete the whole
+ NAMESPACE and everything in it. Also to be sure it will cleanup all
+ orphaned images and volumes on all kubernetes nodes.
+
+ '--clean-only' can be used with any usage: heuristics, explicit component
+ list or with '--delete-all'. It basically just skips the last step - the
+ actual redeploy.
EOF
}
+use_help()
+{
+ printf "Try help: ${CMD} --help\n"
+}
+
msg()
{
- echo -e "${COLOR_ON_GREEN}INFO: $@ ${COLOR_OFF}"
+ printf "${COLOR_ON_GREEN}INFO: $@ ${COLOR_OFF}\n"
}
error()
{
- echo -e "${COLOR_ON_RED}ERROR: $@ ${COLOR_OFF}"
+ printf "${COLOR_ON_RED}ERROR: $@ ${COLOR_OFF}\n"
+}
+
+on_exit()
+{
+ printf "$COLOR_OFF"
}
# remove all successfully completed jobs
kubectl get pods -n ${NAMESPACE} \
--show-labels=true \
- --include-uninitialized=true \
${_selector} \
--ignore-not-found=true \
--no-headers=true | \
{
msg "Undeploy helm release name: ${1}"
helm undeploy ${1} --purge
+ sleep 15s
}
# arg: <job name>
kubectl delete job -n ${NAMESPACE} \
--cascade=true \
--now=true \
- --include-uninitialized=true \
--wait=true \
${1}
done
}
-# arg: <resource> <release name>
-delete_resource()
+#arg: <component>
+get_resources_for_component()
{
- _resource="$1"
- _release="$2"
- msg "Delete ${_resource} for ${_release}..."
- {
- kubectl get ${_resource} -n ${NAMESPACE} \
- --ignore-not-found=true \
- --selector="release=${_release}" \
- --no-headers=true
-
- # this is due to missing "release" label in some pods
- # grep for the rescue...
- kubectl get ${_resource} -n ${NAMESPACE} \
- --no-headers=true | grep "^${_release}"
- } | awk '{print $1}' | sort -u | while read -r _name _rest ; do
- echo "Deleting '${_name}'"
- kubectl delete ${_resource} -n ${NAMESPACE} \
- --cascade=true \
- --now=true \
- --include-uninitialized=true \
- --wait=true \
- ${_name} \
- 2>&1 | grep -iv 'not[[:space:]]*found'
-
- # wait for resource to be deleted
- _output=start
- while [ -n "$_output" ] && sleep 1 ; do
- _output=$(kubectl get ${_resource} -n ${NAMESPACE} \
- --ignore-not-found=true \
- --no-headers=true \
- --field-selector="metadata.name=${_name}")
- done
+helm status $1 | awk -f <(cat - <<-'EOD'
+BEGIN {
+ work="no"
+ kind=""
+ a["dummy"]=""
+}
+
+$1 ~ ":" {
+ if ( $1 == "RESOURCES:" ) {
+ work="yes"
+} else {
+ work="no"
+}
+
+}
+
+$1 == "==>" {
+ split($2, a, "[/(]")
+ kind=a[2]
+}
+
+$1 != "NAME" && $1 != "==>" && work == "yes" && $1 !~ ":" && $1 != "" {
+ printf "%s/%s\n", kind, $1
+}
+
+EOD
+)
+}
+
+# arg: <resource>
+delete_resource()
+{
+ local _resource="$1"
+ local _kind="${_resource%/*}"
+ local _name="${_resource#*/}"
+
+
+ if kubectl get ${_resource} >/dev/null 2>&1; then
+ msg "${_resource} has not been removed with helm undeploy, manual removal is required. Proceeding"
+ kubectl delete ${_resource} -n ${NAMESPACE} \
+ --cascade=true \
+ --now=true \
+ --wait=true \
+ 2>&1 | grep -iv 'not[[:space:]]*found'
+
+ # wait for resource to be deleted
+ _output=start
+ while [ -n "$_output" ] && sleep 1 ; do
+ _output=$(kubectl get ${_kind} ${_name} -n ${NAMESPACE} \
+ --ignore-not-found=true \
+ --no-headers=true )
done
+ msg "Done"
+ fi
}
delete_namespace()
kubectl delete namespace \
--cascade=true \
--now=true \
- --include-uninitialized=true \
--wait=true \
"$NAMESPACE"
done
}
-# arg: [optional: subdir]
+delete_persistent_volume()
+{
+ _persistent_volume=$1
+ if kubectl get ${_persistent_volume} >/dev/null 2>&1; then
+ msg "${_persistent_volume} has not been removed with helm undeploy, manual removal is required. Proceeding"
+ #very often k8s hangs on Terminating state for pv due to still active pvc. It is better to delete pvc directly
+ _claim=$(kubectl get ${_persistent_volume} -o jsonpath='{ .spec.claimRef.name}')
+ delete_resource PersistentVolumeClaim/${_claim}
+ fi
+}
+
+# arg: [optional: directory]
delete_storage()
{
_node=$(kubectl get nodes \
error "Could not list kubernetes nodes - SKIPPING DELETION"
else
if [ -n "$1" ] ; then
- msg "Delete directory '${VOLUME_STORAGE}/${1}' on $_node"
- ssh -T $_node <<EOF
-rm -rf "${VOLUME_STORAGE}/${1}"
-EOF
+ msg "Delete directory '${1}' on $_node"
+ ssh $_node "rm -rf '${1}'"
else
msg "Delete directories '${VOLUME_STORAGE}/*' on $_node"
- ssh -T $_node <<EOF
-find "${VOLUME_STORAGE}" -maxdepth 1 -mindepth 1 -exec rm -rf '{}' \;
-EOF
+ ssh $_node "find '${VOLUME_STORAGE}' -maxdepth 1 -mindepth 1 -exec rm -rf '{}' \;"
fi
fi
}
+docker_cleanup()
+{
+ _nodes=$(kubectl get nodes \
+ --selector=node-role.kubernetes.io/worker \
+ -o wide \
+ --no-headers=true | \
+ awk '{print $6}')
+
+ if [ -z "$_nodes" ] ; then
+ error "Could not list kubernetes nodes - SKIPPING docker cleanup"
+ return
+ fi
+
+ for _node in $_nodes ; do
+ msg "Docker cleanup on $_node"
+ ssh $_node "docker system prune --force --all --volumes" >/dev/null &
+ done
+
+ msg "We are waiting now for docker cleanup to finish on all nodes..."
+ wait
+}
+
+is_helm_serve_running()
+{
+ # healthy result: HTTP/1.1 200 OK
+ _helm_serve_result=$(\
+ curl --head --silent --connect-timeout 3 http://127.0.0.1:8879 | \
+ head -n 1 | cut -d" " -f 3 | tr '[:upper:]' '[:lower:]' | tr -d '\r' )
+
+ if [ "$_helm_serve_result" == ok ] ; then
+ return 0
+ else
+ return 1
+ fi
+}
+
# arg: <release name>
-redeploy_component()
+undeploy_component()
{
- _chart=$(echo "$1" | sed 's/[^-]*-//')
- helm_undeploy ${1}
- # TODO: does deleted secret per component break something?
- for x in jobs deployments pods pvc pv ; do
- delete_resource ${x} ${1}
+ local _component=$1
+
+ #Because Helm undeploy is not reliable: Gathering resources assigned to componen to track and remove orphans later
+ _component_resources=($(get_resources_for_component ${_component}))
+
+ declare -a _persistent_volumes
+ declare -a _standard
+ declare -a _unknown_kinds
+
+ for resource in ${_component_resources[@]}; do
+ case $resource in
+ CronJob/* | Job/* | Secret/* | ConfigMap/* | Pod/* | Service/* | Deployment/* | StatefulSet/*)
+ _standard+=(${resource});;
+ #Ignoring PVC, they will be handled along with PV as 'helm' status does not return them for some components
+ PersistentVolumeClaim/*)
+ ;;
+ PersistentVolume/*)
+ _persistent_volumes+=(${resource});;
+ *)
+ _unknown_kinds+=(${resource})
+ esac
done
- if [ -n "$VOLUME_STORAGE" ] ; then
- msg "Persistent volume data deletion in directory: ${VOLUME_STORAGE}/${1}"
- delete_storage "$1"
+
+ #Gathering physical location of directories for persistent volumes to delete them after undeploy
+ declare -a _physical_locations
+ for volume in ${_persistent_volumes[@]}; do
+ _physical_locations+=($(kubectl get ${volume} -o jsonpath='{ .spec.hostPath.path}' ))
+ done
+
+ helm_undeploy ${_component}
+
+ #Manual items removal
+ for resource in ${_standard[@]}; do
+ delete_resource ${resource}
+ done
+
+ for volume in ${_persistent_volumes[@]}; do
+ delete_persistent_volume ${volume}
+ done
+
+ for subdir in ${_physical_locations[@]}; do
+ delete_storage ${subdir}
+ done
+
+ if [ "${#_unknown_kinds[@]}" -ne 0 ] ; then
+ for resource in ${_unknown_kinds[@]}; do
+ error "Untracked resource kind present: ${resource}, attempting to delete it..."
+ delete_resource ${resource}
+ done
+ return
fi
+}
+# arg: <release name>
+deploy_component()
+{
# TODO: until I can verify that this does the same for this component as helm deploy
#msg "Redeployment of the component ${1}..."
#helm install "local/${_chart}" --name ${1} --namespace ${NAMESPACE} --wait --timeout ${HELM_TIMEOUT}
+ error "NOT IMPLEMENTED"
}
--no-storage-deletion)
if [ -n "$arg_storage" ] ; then
error "Usage of storage argument together with no storage deletion option!"
+ use_help
exit 1
elif [ -z "$arg_nostorage" ] ; then
arg_nostorage=nostorage
-c|--component)
if [ -n "$arg_deleteall" ] ; then
error "'Delete all components' used already - argument mismatch"
+ use_help
exit 1
fi
state=component
-D|--delete-all)
if [ -n "$arg_components" ] ; then
error "Explicit component(s) provided already - argument mismatch"
+ use_help
exit 1
elif [ -z "$arg_deleteall" ] ; then
arg_deleteall=deleteall
;;
*)
error "Unknown parameter: $1"
+ use_help
exit 1
;;
esac
state=nil
else
error "Duplicit argument for namespace!"
+ use_help
exit 1
fi
;;
override)
if ! [ -f "$1" ] ; then
error "Wrong filename for override file: $1"
+ use_help
exit 1
fi
arg_overrides="${arg_overrides} -f $1"
state=nil
else
error "Duplicit argument for release prefix!"
+ use_help
exit 1
fi
;;
if [ -z "$arg_timeout" ] ; then
if ! echo "$1" | grep -q '^[0-9]\+$' ; then
error "Timeout must be an integer: $1"
+ use_help
exit 1
fi
arg_timeout="$1"
state=nil
else
error "Duplicit argument for timeout!"
+ use_help
exit 1
fi
;;
storage)
if [ -n "$arg_nostorage" ] ; then
error "Usage of storage argument together with no storage deletion option!"
+ use_help
exit 1
elif [ -z "$arg_storage" ] ; then
arg_storage="$1"
state=nil
else
error "Duplicit argument for storage!"
+ use_help
exit 1
fi
;;
shift
done
-# sanity check
+# sanity checks
+
if [ -z "$arg_namespace" ] ; then
error "Missing namespace"
- help
+ use_help
exit 1
else
NAMESPACE="$arg_namespace"
fi
-if [ -z "$arg_overrides" ] ; then
- error "Missing override file(s)"
- help
+if [ -z "$arg_overrides" ] && [ -z "$arg_cleanonly" ] ; then
+ error "Missing override file(s) or use '--clean-only'"
+ use_help
exit 1
else
OVERRIDES="$arg_overrides"
VOLUME_STORAGE="$arg_storage"
elif [ -z "$arg_nostorage" ] ; then
error "Missing storage argument! If it is intended then use '--no-storage-deletion' option"
+ use_help
exit 1
fi
# main
#
+# set trap for this script cleanup
+trap on_exit INT QUIT TERM EXIT
+
+# another sanity checks
+for tool in helm kubectl curl ; do
+ if ! which "$tool" >/dev/null 2>&1 ; then
+ error "Missing '${tool}' command"
+ exit 1
+ fi
+done
+
+if ! is_helm_serve_running ; then
+ error "'helm serve' is not running (http://localhost:8879)"
+ exit 1
+fi
+
# if --delete-all is used then redeploy all components (the current namespace is deleted)
if [ -n "$HELM_DELETE_ALL" ] ; then
# undeploy helm release (prefix)
# we will delete the whole namespace
delete_namespace
+ # we will cleanup docker on each node
+ docker_cleanup
+
+ # we will delete the content of storage (volumes)
if [ -n "$VOLUME_STORAGE" ] ; then
delete_storage
fi
for _component in ${_COMPONENTS} ; do
if echo "$_component" | grep -q "^${RELEASE_PREFIX}-" ; then
msg "Redeploy component: ${_component}"
- redeploy_component ${_component}
+ undeploy_component ${_component}
else
error "Component release name '${_component}' does not match release prefix: ${RELEASE_PREFIX} (SKIP)"
fi