6df4f9bdbd7289195c05d837bb13a03805406770
[portal.git] / ecomp-portal-BE-common / src / main / java / org / onap / portalapp / portal / listener / HealthMonitor.java
1 /*-
2  * ============LICENSE_START==========================================
3  * ONAP Portal
4  * ===================================================================
5  * Copyright (C) 2017-2018 AT&T Intellectual Property. All rights reserved.
6  * ===================================================================
7  *
8  * Unless otherwise specified, all software contained herein is licensed
9  * under the Apache License, Version 2.0 (the "License");
10  * you may not use this software except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *             http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  * Unless otherwise specified, all documentation contained herein is licensed
22  * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23  * you may not use this documentation except in compliance with the License.
24  * You may obtain a copy of the License at
25  *
26  *             https://creativecommons.org/licenses/by/4.0/
27  *
28  * Unless required by applicable law or agreed to in writing, documentation
29  * distributed under the License is distributed on an "AS IS" BASIS,
30  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31  * See the License for the specific language governing permissions and
32  * limitations under the License.
33  *
34  * ============LICENSE_END============================================
35  *
36  * 
37  */
38 package org.onap.portalapp.portal.listener;
39
40 import java.time.Instant;
41 import java.util.List;
42
43 import javax.annotation.PostConstruct;
44 import javax.annotation.PreDestroy;
45
46 import org.apache.commons.lang3.StringUtils;
47 import org.apache.zookeeper.ZooKeeper;
48 import org.apache.zookeeper.client.FourLetterWordMain;
49 import org.hibernate.Query;
50 import org.hibernate.Session;
51 import org.hibernate.SessionFactory;
52 import org.onap.music.datastore.PreparedQueryObject;
53 import org.onap.music.exceptions.MusicServiceException;
54 import org.onap.music.main.MusicCore;
55 import org.onap.music.main.MusicUtil;
56 import org.onap.portalapp.music.util.MusicProperties;
57 import org.onap.portalapp.portal.logging.aop.EPMetricsLog;
58 import org.onap.portalapp.portal.logging.format.EPAppMessagesEnum;
59 import org.onap.portalapp.portal.logging.logic.EPLogUtil;
60 import org.onap.portalapp.portal.utils.EPCommonSystemProperties;
61 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
62 import org.onap.portalsdk.core.util.SystemProperties;
63 import org.springframework.beans.factory.annotation.Autowired;
64 import org.springframework.context.annotation.EnableAspectJAutoProxy;
65 import org.springframework.transaction.annotation.Transactional;
66
67
68
69
70 @Transactional
71 @org.springframework.context.annotation.Configuration
72 @EnableAspectJAutoProxy
73 @EPMetricsLog
74 public class HealthMonitor {
75
76         
77         ZooKeeper zookeeper = null;
78
79         private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
80
81         @Autowired
82         private SessionFactory sessionFactory;
83
84
85         private static boolean databaseUp;
86         private static boolean uebUp;
87         private static boolean frontEndUp;
88         private static boolean backEndUp;
89         private static boolean dbClusterStatusOk;
90         private static boolean dbPermissionsOk;
91         private static boolean zookeeperStatusOk;
92         private static boolean cassandraStatusOk;
93         private static String APPLICATION = "Portal";
94         
95         /**
96          * Read directly by external classes.
97          */
98         public static boolean isSuspended = false;
99
100         private Thread healthMonitorThread;
101
102         public HealthMonitor() {
103         }
104
105         public static boolean isDatabaseUp() {
106                 return databaseUp;
107         }
108
109         public static boolean isClusterStatusOk() {
110                 return dbClusterStatusOk;
111         }
112
113         public static boolean isDatabasePermissionsOk() {
114                 return dbPermissionsOk;
115         }
116
117         public static boolean isUebUp() {
118                 return uebUp;
119         }
120
121         public static boolean isFrontEndUp() {
122                 return frontEndUp;
123         }
124
125         public static boolean isBackEndUp() {
126                 return backEndUp;
127         }
128         
129         public static boolean isZookeeperStatusOk() {
130                 return zookeeperStatusOk;
131         }
132
133         public static boolean isCassandraStatusOk() {
134                 return cassandraStatusOk;
135         }
136
137         private void monitorEPHealth() throws InterruptedException {
138
139                 int numIntervalsDatabaseHasBeenDown = 0;
140                 int numIntervalsClusterNotHealthy = 0;
141                 int numIntervalsDatabasePermissionsIncorrect = 0;
142                 int numIntervalsZookeeperNotHealthy = 0;
143                 int numIntervalsCassandraNotHealthy = 0;
144
145                 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
146
147                 long sleepInterval = (Long
148                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
149                 long numIntervalsBetweenAlerts = Long
150                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
151                 logger.debug(EELFLoggerDelegate.debugLogger,
152                                 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
153                                                 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
154
155                 while (true) {
156                         //
157                         // Get DB status. If down, signal alert once every X intervals.
158                         //
159                         databaseUp = this.checkIfDatabaseUp();
160                         if (databaseUp == false) {
161                                 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
162                                         logger.debug(EELFLoggerDelegate.debugLogger,
163                                                         "monitorEPHealth: database down, logging to error log to trigger alert.");
164                                         // Write a Log entry that will generate an alert
165                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
166                                         numIntervalsDatabaseHasBeenDown++;
167                                 } else {
168                                         numIntervalsDatabaseHasBeenDown = 0;
169                                 }
170                         }
171
172                         dbClusterStatusOk = this.checkClusterStatus();
173                         if (dbClusterStatusOk == false) {
174                                 if ((numIntervalsClusterNotHealthy % numIntervalsBetweenAlerts) == 0) {
175                                         logger.debug(EELFLoggerDelegate.debugLogger,
176                                                         "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
177                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
178                                         numIntervalsClusterNotHealthy++;
179                                 } else {
180                                         numIntervalsClusterNotHealthy = 0;
181                                 }
182                         }
183
184                         dbPermissionsOk = this.checkDatabasePermissions();
185                         if (dbPermissionsOk == false) {
186                                 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
187                                         logger.debug(EELFLoggerDelegate.debugLogger,
188                                                         "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
189                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
190                                         numIntervalsDatabasePermissionsIncorrect++;
191                                 } else {
192                                         numIntervalsDatabasePermissionsIncorrect = 0;
193                                 }
194                         }
195                         org.onap.portalapp.music.util.MusicUtil MusicUtilSDK = new org.onap.portalapp.music.util.MusicUtil();
196                         if(MusicUtilSDK.isMusicEnable()){
197                                 zookeeperStatusOk = this.checkZookeeperStatus();
198                                 if (zookeeperStatusOk == false) {
199                                         if ((numIntervalsZookeeperNotHealthy % numIntervalsBetweenAlerts) == 0) {
200                                                 logger.debug(EELFLoggerDelegate.debugLogger,
201                                                                 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
202                                                 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckZookeeperError);
203                                                 numIntervalsZookeeperNotHealthy++;
204                                         } else {
205                                                 numIntervalsZookeeperNotHealthy = 0;
206                                         }
207                                 }
208
209                                 cassandraStatusOk = this.checkCassandraStatus();
210                                 if (cassandraStatusOk == false) {
211                                         if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
212                                                 logger.debug(EELFLoggerDelegate.debugLogger,
213                                                                 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
214                                                 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
215                                                 numIntervalsCassandraNotHealthy++;
216                                         } else {
217                                                 numIntervalsCassandraNotHealthy = 0;
218                                         }
219                                 }
220                         }
221                         
222                         
223                         //
224                         // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
225                         // returned, status is Up.
226                         // If down, signal alert once every X intervals.
227                         // EP will ignore this bogus message.
228                         // Commenting this out as Dependency on UEB is being deprecated
229                         /*
230                          * uebUp = this.checkIfUebUp(); if (uebUp == false) {
231                          * 
232                          * if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
233                          * logger.debug(EELFLoggerDelegate.debugLogger,
234                          * "monitorEPHealth: UEB down, logging to error log to trigger alert"); // Write
235                          * a Log entry that will generate an alert EPLogUtil.logEcompError(logger,
236                          * EPAppMessagesEnum.BeHealthCheckUebClusterError);
237                          * numIntervalsUebHasBeenDown++; } else { numIntervalsUebHasBeenDown = 0; } }
238                          */
239
240                         // The front end should be up because the API is called through
241                         // proxy front end server.
242                         frontEndUp = true;
243
244                         // If the rest API called, the backend is always up
245                         backEndUp = true;
246
247                         //
248                         // future nice to have...get Partner status
249                         //
250                         // For all apps exposing a rest url, query one of the rest
251                         // urls(/roles?) and manage a list
252                         // of app name/status. We might not return back a non 200 OK in
253                         // health check, but we
254                         // could return information in the json content of a health check.
255                         //
256
257                         //
258                         // Get DB status. If down, signal alert once every X intervals.
259                         //
260                         if (Thread.interrupted()) {
261                                 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
262                                 break;
263                         }
264
265                         try {
266                                 Thread.sleep(sleepInterval);
267                         } catch (InterruptedException e) {
268                                 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
269                                 Thread.currentThread().interrupt();
270                         }
271                 }
272         }
273
274         @PostConstruct
275         public void initHealthMonitor() {
276                 healthMonitorThread = new Thread("EP HealthMonitor thread") {
277                         public void run() {
278                                 try {
279                                         monitorEPHealth();
280                                 } catch (InterruptedException e) {
281                                         logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
282                                 } catch (Exception e) {
283                                         logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
284                                 }
285                         }
286                 };
287                 healthMonitorThread.start();
288                 
289         }
290
291         @PreDestroy
292         public void closeHealthMonitor() {
293                 this.healthMonitorThread.interrupt();
294         }
295
296         /**
297          * This routine checks whether the database can be read. In June 2017 we
298          * experimented with checking if the database can be WRITTEN. Writes failed
299          * with some regularity in a MariaDB Galera cluster, and in that
300          * environment, the resulting alerts in the log triggered a health monitor
301          * cron job to shut down the Tomcat instance. The root cause of the cluster
302          * write failures was not determined.
303          * 
304          * @return true if the database can be read.
305          */
306         private boolean checkIfDatabaseUp() {
307                 boolean isUp = false;
308                 Session localSession = null;
309                 try {
310                         localSession = sessionFactory.openSession();
311                         if (localSession != null) {
312                                 String sql = "select app_name from fn_app where app_id=1";
313                                 Query query = localSession.createSQLQuery(sql);
314                                 @SuppressWarnings("unchecked")
315                                 List<String> queryList = query.list();
316                                 if (queryList != null) {
317                                         isUp = true;
318                                 }
319                         }
320                 } catch (Exception e) {
321                         logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
322                         isUp = false;
323                 } finally {
324                         if (localSession != null)
325                                 localSession.close();
326                 }
327                 return isUp;
328         }
329
330         private boolean checkClusterStatus() {
331                 boolean isUp = false;
332                 Session localSession = null;
333                 try {
334                         localSession = sessionFactory.openSession();
335                         if (localSession != null) {
336                                 // If all nodes are unhealthy in a cluster, this will throw an
337                                 // exception
338                                 String sql = "select * from mysql.user";
339                                 Query query = localSession.createSQLQuery(sql);
340                                 @SuppressWarnings("unchecked")
341                                 List<String> queryList = query.list();
342                                 if (queryList != null) {
343                                         isUp = true;
344                                 }
345                         }
346                 } catch (Exception e) {
347                         logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failed", e);
348                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
349                                 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failure cause", e.getCause());
350                         }
351                         isUp = false;
352                 } finally {
353                         if (localSession != null) {
354                                 localSession.close();
355                         }
356                 }
357                 return isUp;
358         }
359
360         private boolean checkZookeeperStatus() {
361
362                 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
363                 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
364                 for (int i = 0; i < zookeeperNodes.length; i++) {
365                         try {
366                                 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper  :" + zookeeperNodes[i].trim());
367                                 String[] iport = zookeeperNodes[i].split(":");
368                                 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
369                                                 Integer.parseInt(iport[1].trim()), "stat");
370                                 logger.info(EELFLoggerDelegate.applicationLogger,
371                                                 "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
372                                 if (StringUtils.isNotBlank(zkNodeStatistics)) {
373                                         String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
374                                                         zkNodeStatistics.indexOf("Node"));
375                                         logger.info(EELFLoggerDelegate.applicationLogger,
376                                                         "Getting Status for zookeeper :" + zookeeperNodes[i].trim() + ":------:" + state);
377                                         if (state.contains("leader"))
378                                                 return true;
379                                 }
380                         } catch (Exception e) {
381                                 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
382                         }
383                 }
384
385                 return false;
386         }
387
388
389         public boolean checkCassandraStatus() {
390                 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
391                 if (this.getAdminKeySpace()) {
392                         return true;
393                 } else {
394                         logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
395                         return false;
396                 }
397         }
398         
399         private Boolean getAdminKeySpace() {
400                 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE);
401                 Instant creationTime = Instant.now();
402                 PreparedQueryObject pQuery = new PreparedQueryObject();
403                 pQuery.appendQueryString(
404                                 "UPDATE " + musicKeySpace + ".health_check  SET creation_time = ? WHERE primary_id = ?");
405                 pQuery.addValue(creationTime.toString());
406                 pQuery.addValue(APPLICATION);
407                 try {
408                         MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.CRITICAL);
409                 } catch (MusicServiceException e) {
410                         return Boolean.FALSE;
411                 }
412                 return Boolean.TRUE;
413
414         }
415
416         
417         private boolean checkDatabasePermissions() {
418                 boolean isUp = false;
419                 Session localSession = null;
420                 try {
421                         localSession = sessionFactory.openSession();
422                         if (localSession != null) {
423                                 String sql = "SHOW GRANTS FOR CURRENT_USER";
424                                 Query query = localSession.createSQLQuery(sql);
425                                 @SuppressWarnings("unchecked")
426                                 List<String> grantsList = query.list();
427                                 for (String str : grantsList) {
428                                         if ((str.toUpperCase().contains("ALL"))
429                                                         || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
430                                                                         && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
431                                                 isUp = true;
432                                                 break;
433                                         }
434                                 }
435                                 if (isUp == false) {
436                                         logger.error(EELFLoggerDelegate.errorLogger,
437                                                         "checkDatabasePermissions returning false.  SHOW GRANTS FOR CURRENT_USER being dumped:");
438                                         for (String str : grantsList) {
439                                                 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
440                                         }
441                                 }
442                         }
443                 } catch (Exception e) {
444                         logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
445                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
446                                 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
447                         }
448                         isUp = false;
449                 } finally {
450                         if (localSession != null) {
451                                 localSession.close();
452                         }
453                 }
454                 return isUp;
455         }
456         
457 }