Increase robustness for TCA 13/47513/2
authorLusheng Ji <lji@research.att.com>
Tue, 15 May 2018 02:45:56 +0000 (22:45 -0400)
committerLusheng Ji <lji@research.att.com>
Tue, 15 May 2018 03:02:39 +0000 (23:02 -0400)
Enhanced TCA robustness against unprovisioned topics.
When the configuration tells TCA to subscribe to a non-existent MR topic,
TCA will attempt but stop because subscribing to such topics resulted
failure.  The enhancements implemented here will test for sub topic, and if
non-existent, make a publish to create the topic.
Additional enhancements include:
1. restart TCA is the number of workers is below expected (3);
2. allow MR subscriber group and id be set via environment variables
DMAAPSUBGROUP and DMAAPSUBID.
3. Minor version is bumped.

Issue-ID: DCAEGEN2-502
Change-Id: I3414a96706a1b720184cd657324db4d11db12590
Signed-off-by: Lusheng Ji <lji@research.att.com>
tca-cdap-container/Dockerfile
tca-cdap-container/get-tca.sh
tca-cdap-container/mr-watchdog.sh [new file with mode: 0755]
tca-cdap-container/pom.xml
tca-cdap-container/restart.sh

index 5cd1267..2c57ff2 100644 (file)
 
 FROM caskdata/cdap-standalone:4.1.2
 
-RUN apt-get update && apt-get install -y netcat jq iputils-ping wget vim
+RUN apt-get update && apt-get install -y netcat jq iputils-ping wget vim curl
 COPY get-tca.sh /opt/tca/get-tca.sh
 RUN /opt/tca/get-tca.sh
 COPY tca_app_config.json /opt/tca/tca_app_config.json
 COPY tca_app_preferences.json /opt/tca/tca_app_preferences.json
 COPY restart.sh /opt/tca/restart.sh
 RUN chmod 755 /opt/tca/restart.sh
+COPY mr-watchdog.sh /opt/tca/mr-watchdog.sh
+RUN chmod 755 /opt/tca/mr-watchdog.sh
 
 #COPY host.aliases /etc/host.aliases
 #RUN echo "export HOSTALIASES=/etc/host.aliases" >> /etc/profile
index 9b46830..784d914 100755 (executable)
@@ -37,7 +37,7 @@ echo "Getting version $VERSION of $GROUPID.$ARTIFACTID from $REPO repo on $NEXUS
 if [ "$REPO" == "snapshots" ]; then
   # SNOTSHOT repo container many snapshots for each version.  get the newest among them
   URL="${PROTO}://${NEXUSREPO}/service/local/repositories/${REPO}/content/${GROUPID//.//}/${ARTIFACTID}/${VERSION}/maven-metadata.xml"
-  VT=$(wget --no-check-certificate -O- $URL | grep -m 1 \<value\> | sed -e 's/<value>\(.*\)<\/value>/\1/' | sed -e 's/ //g')
+  VT=$(wget --no-check-certificate -O- "$URL" | grep -m 1 \<value\> | sed -e 's/<value>\(.*\)<\/value>/\1/' | sed -e 's/ //g')
 else
   VT=${VERSION}
 fi
diff --git a/tca-cdap-container/mr-watchdog.sh b/tca-cdap-container/mr-watchdog.sh
new file mode 100755 (executable)
index 0000000..fa623a1
--- /dev/null
@@ -0,0 +1,59 @@
+#!/bin/bash
+# ================================================================================
+# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved.
+# ================================================================================
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============LICENSE_END=========================================================
+
+
+
+SUB_TOPIC=${3:-unauthenticated.VES_MEASUREMENT_OUTPUT}
+MR_LOCATION=${1:-10.0.11.1}
+MR_PORT=${2:-3904}
+MR_PROTO='http'
+
+
+TOPIC_LIST_URL="${MR_PROTO}://${MR_LOCATION}:${MR_PORT}/topics"
+TEST_PUB_URL="${MR_PROTO}://${MR_LOCATION}:${MR_PORT}/events/${SUB_TOPIC}"
+
+unset RES
+echo "==> Check topic [${SUB_TOPIC}] availbility on ${MR_LOCATION}:${MR_PORT}"
+until [ -n "$RES" ]; do
+    URL="$TOPIC_LIST_URL"
+    HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" "$URL")
+    HTTP_BODY=$(echo "$HTTP_RESPONSE" | sed -e 's/HTTPSTATUS\:.*//g')
+    HTTP_STATUS=$(echo "$HTTP_RESPONSE" | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
+    if [ "${HTTP_STATUS}" != "200" ]; then
+        echo "   ==> MR topic listing not ready, retry in 30 seconds"
+        sleep 30
+        continue
+    fi
+
+    echo "   ==> MR topic listing received, check topic availbility"
+    RES=$(echo "${HTTP_BODY}" |jq .topics |grep "\"$SUB_TOPIC\"")
+    if [ -z "${RES}" ]; then
+        echo "      ==> No topic [${SUB_TOPIC}] found, send test publish"
+        URL="$TEST_PUB_URL"
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -H "Content-Type:text/plain" -X POST -d "{}" "$URL")
+        HTTP_BODY=$(echo "$HTTP_RESPONSE" | sed -e 's/HTTPSTATUS\:.*//g')
+        HTTP_STATUS=$(echo "$HTTP_RESPONSE" | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
+         
+        if [ "$HTTP_STATUS" != "200" ]; then
+            echo "      ==> Testing MR topic publishing received status $HTTP_STATUS != 200, retesting in 30 seconds"
+            sleep 30
+        else
+            echo "      ==> Testing MR topic publishing received status $HTTP_STATUS, topic [$SUB_TOPIC] created"
+        fi
+    fi
+done
+echo "==> Topic [${SUB_TOPIC}] ready"
index cab867b..212feab 100644 (file)
@@ -27,7 +27,7 @@ limitations under the License.
   <groupId>org.onap.dcaegen2.deployments</groupId>
   <artifactId>tca-cdap-container</artifactId>
   <name>dcaegen2-deployments-tca-cdap-container</name>
-  <version>1.0.0</version>
+  <version>1.1.0</version>
   <url>http://maven.apache.org</url>
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
index 4f6ed92..6d0c60f 100755 (executable)
@@ -36,6 +36,10 @@ TCA_PREF_TEMP='/tmp/tca_preferences.json'
 TCA_PATH_APP="${CDAP_HOST}:${CDAP_PORT}/v3/namespaces/${TCA_NAMESPACE}/apps/${TCA_APPNAME}"
 TCA_PATH_ARTIFACT="${CDAP_HOST}:${CDAP_PORT}/v3/namespaces/${TCA_NAMESPACE}/artifacts"
 
+MR_WATCHDOG_PATH="${TCA_FILE_PATH}/mr-watchdog.sh"
+
+
+WORKER_COUNT='0'
 
 CONSUL_HOST=${CONSUL_HOST:-consul}
 CONSUL_PORT=${CONSUL_PORT:-8500}
@@ -48,12 +52,14 @@ MY_NAME=${HOSTNAME:-tca}
 
 
 echo "Generting preference file"
+DMAAPSUBGROUP=${DMAAPSUBGROUP:-OpenDCAEc12}
+DMAAPSUBID=${DMAAPSUBID:=c12}
 sed -i 's/{{DMAAPHOST}}/'"${DMAAPHOST}"'/g' ${TCA_PREF}
 sed -i 's/{{DMAAPPORT}}/'"${DMAAPPORT}"'/g' ${TCA_PREF}
 sed -i 's/{{DMAAPPUBTOPIC}}/'"${DMAAPPUBTOPIC}"'/g' ${TCA_PREF}
 sed -i 's/{{DMAAPSUBTOPIC}}/'"${DMAAPSUBTOPIC}"'/g' ${TCA_PREF}
-sed -i 's/{{DMAAPSUBGROUP}}/OpenDCAEc12/g' ${TCA_PREF}
-sed -i 's/{{DMAAPSUBID}}/c12/g' ${TCA_PREF}
+sed -i 's/{{DMAAPSUBGROUP}}/'"${DMAAPSUBGROUP}"'/g' ${TCA_PREF}
+sed -i 's/{{DMAAPSUBID}}/'"${DMAAPSUBID}"'/g' ${TCA_PREF}
 sed -i 's/{{AAIHOST}}/'"${AAIHOST}"'/g' ${TCA_PREF}
 sed -i 's/{{AAIPORT}}/'"${AAIPORT}"'/g' ${TCA_PREF}
 if [ -z "$REDISHOSTPORT" ]; then
@@ -121,19 +127,41 @@ function tca_start {
 
 
 function tca_status {
+    WORKER_COUNT='0'
     echo
-    echo "TCADMaaPMRPublisherWorker status: "
-    curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRPublisherWorker/status"
+    STATUS=$(curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRPublisherWorker/status")
+    echo "TCADMaaPMRPublisherWorker status: $STATUS"
+    INC=$(echo "$STATUS" | jq . |grep RUNNING |wc -l)
+    WORKER_COUNT=$((WORKER_COUNT+INC))
+
+    STATUS=$(curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRSubscriberWorker/status")
+    echo "TCADMaaPMRSubscriberWorker status: $STATUS"
+    INC=$(echo "$STATUS" | jq . |grep RUNNING |wc -l)
+    WORKER_COUNT=$((WORKER_COUNT+INC))
+
+    STATUS=$(curl -s "http://${TCA_PATH_APP}/flows/TCAVESCollectorFlow/status")
+    echo "TCAVESCollectorFlow status: $STATUS"
+    INC=$(echo "$STATUS" | jq . |grep RUNNING |wc -l)
+    WORKER_COUNT=$((WORKER_COUNT+INC))
     echo
-    echo "TCADMaaPMRSubscriberWorker status: "
-    curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRSubscriberWorker/status"
-    echo
-    echo "TCAVESCollectorFlow status"
-    curl -s "http://${TCA_PATH_APP}/flows/TCAVESCollectorFlow/status"
-    echo; echo
 }
 
 
+function tca_restart {
+    MR_HOST=$(jq .subscriberHostName ${TCA_PREF} |sed -e 's/\"//g')
+    MR_PORT=$(jq .subscriberHostPort ${TCA_PREF} |sed -e 's/\"//g')
+    MR_TOPIC=$(jq .subscriberTopicName ${TCA_PREF}  |sed -e 's/\"//g')
+    echo "Verifying DMaaP topic: ${MR_TOPIC}@${MR_HOST}:${MR_PORT} (will block until topic ready)"
+    "${MR_WATCHDOG_PATH}" "${MR_HOST}" "${MR_PORT}" "${MR_TOPIC}"
+    tca_stop
+    tca_delete
+    tca_load_artifact
+    tca_load_conf
+    tca_start
+    sleep 5
+    tca_status
+}
+
 function tca_poll_policy {
     URL0="${CBS_HOST}:${CBS_PORT}/service_component_all/${MY_NAME}"
     echo "tca_poll_policy: Retrieving all-in-one config at ${URL0}"
@@ -246,12 +274,9 @@ function tca_poll_policy {
 
     if [[ "$PERF_CHANGED" == "1" || "$CONF_CHANGED" == "1" ]]; then
         echo "Newly received configuration/preference differ from the running instance's.  reload confg"
-       tca_stop
-       tca_delete
-        tca_load_artifact
-       tca_load_conf
-       tca_start
-       tca_status
+        tca_restart
+    else
+        echo "Newly received configuration/preference identical from the running instance's"
     fi 
 }
 
@@ -264,9 +289,9 @@ echo "Starting TCA-CDAP in standalone mode"
 # starting CDAP SDK in background
 cdap sdk start 
 
-echo "Started, waiting CDAP ready on port 11015 ..."
+echo "CDAP Started, waiting CDAP ready on ${CDAP_HOST}:${CDAP_PORT} ..."
 while ! nc -z ${CDAP_HOST} ${CDAP_PORT}; do   
-  sleep 0.1 # wait for 1/10 of the second before check again
+  sleep 1 # wait for 1 second before check again
 done
 
 echo "Creating namespace cdap_tca_hi_lo ..."
@@ -274,21 +299,18 @@ curl -s -X PUT "http://${CDAP_HOST}:${CDAP_PORT}/v3/namespaces/cdap_tca_hi_lo"
 
 # stop programs
 tca_stop
-
 # delete application
 tca_delete
-
 # load artifact
 tca_load_artifact
 tca_load_conf
-
 # start programs
 tca_start
 
 # get status of programs
 tca_status
 
-echo "TCA-CDAP standalone mode initialization completed"
+echo "TCA-CDAP standalone mode initialization completed, with $WORKER_COUNT / 3 up"
 
 
 
@@ -301,7 +323,18 @@ echo "TCA environment: I am ${MY_NAME}, consul at ${CONSUL_HOST}:${CONSUL_PORT},
 
 while echo
 do
-    echo "$(date):  ======================================================"
+    echo "======================================================> $(date)"
+    tca_status
+
+    while [ "$WORKER_COUNT" != "3" ]; do
+        echo "Status checking: worker count is $WORKER_COUNT, needs a reset"
+        sleep 5
+
+        tca_restart
+        echo "TCA restarted"
+    done
+
+
     if [[ -z "$CBS_HOST" ||  -z "$CBS_PORT" ]]; then
        echo "Retrieving host and port for ${CBS_SERVICE_NAME} from ${CONSUL_HOST}:${CONSUL_PORT}"
        sleep 2