891da3b7bbbf37e63483652b8d2588f78c4ba07f
[portal.git] / ecomp-portal-BE-common / src / main / java / org / onap / portalapp / portal / listener / HealthMonitor.java
1 /*-
2  * ============LICENSE_START==========================================
3  * ONAP Portal
4  * ===================================================================
5  * Copyright (C) 2017-2018 AT&T Intellectual Property. All rights reserved.
6  * ===================================================================
7  *
8  * Unless otherwise specified, all software contained herein is licensed
9  * under the Apache License, Version 2.0 (the "License");
10  * you may not use this software except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *             http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  * Unless otherwise specified, all documentation contained herein is licensed
22  * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23  * you may not use this documentation except in compliance with the License.
24  * You may obtain a copy of the License at
25  *
26  *             https://creativecommons.org/licenses/by/4.0/
27  *
28  * Unless required by applicable law or agreed to in writing, documentation
29  * distributed under the License is distributed on an "AS IS" BASIS,
30  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31  * See the License for the specific language governing permissions and
32  * limitations under the License.
33  *
34  * ============LICENSE_END============================================
35  *
36  * 
37  */
38 package org.onap.portalapp.portal.listener;
39
40 import java.time.Instant;
41 import java.util.List;
42
43 import javax.annotation.PostConstruct;
44 import javax.annotation.PreDestroy;
45
46 import org.apache.commons.lang3.StringUtils;
47 import org.apache.zookeeper.ZooKeeper;
48 import org.apache.zookeeper.client.FourLetterWordMain;
49 import org.hibernate.Query;
50 import org.hibernate.Session;
51 import org.hibernate.SessionFactory;
52 import org.onap.music.datastore.PreparedQueryObject;
53 import org.onap.music.exceptions.MusicServiceException;
54 import org.onap.music.main.MusicCore;
55 import org.onap.music.main.MusicUtil;
56 import org.onap.portalapp.music.util.MusicProperties;
57 import org.onap.portalapp.portal.logging.aop.EPMetricsLog;
58 import org.onap.portalapp.portal.logging.format.EPAppMessagesEnum;
59 import org.onap.portalapp.portal.logging.logic.EPLogUtil;
60 import org.onap.portalapp.portal.utils.EPCommonSystemProperties;
61 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
62 import org.onap.portalsdk.core.util.SystemProperties;
63 import org.springframework.beans.factory.annotation.Autowired;
64 import org.springframework.context.annotation.EnableAspectJAutoProxy;
65 import org.springframework.transaction.annotation.Transactional;
66
67
68
69
70 @Transactional
71 @org.springframework.context.annotation.Configuration
72 @EnableAspectJAutoProxy
73 @EPMetricsLog
74 public class HealthMonitor {
75
76         
77         ZooKeeper zookeeper = null;
78
79         private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
80
81         @Autowired
82         private SessionFactory sessionFactory;
83
84
85         private static boolean databaseUp;
86         private static boolean uebUp;
87         private static boolean frontEndUp;
88         private static boolean backEndUp;
89         private static boolean dbPermissionsOk;
90         private static boolean zookeeperStatusOk;
91         private static boolean cassandraStatusOk;
92         private static String APPLICATION = "Portal";
93         
94         /**
95          * Read directly by external classes.
96          */
97         public static boolean isSuspended = false;
98
99         private Thread healthMonitorThread;
100
101         public HealthMonitor() {
102         }
103
104         public static boolean isDatabaseUp() {
105                 return databaseUp;
106         }
107
108         public static boolean isDatabasePermissionsOk() {
109                 return dbPermissionsOk;
110         }
111
112         public static boolean isUebUp() {
113                 return uebUp;
114         }
115
116         public static boolean isFrontEndUp() {
117                 return frontEndUp;
118         }
119
120         public static boolean isBackEndUp() {
121                 return backEndUp;
122         }
123         
124         public static boolean isZookeeperStatusOk() {
125                 return zookeeperStatusOk;
126         }
127
128         public static boolean isCassandraStatusOk() {
129                 return cassandraStatusOk;
130         }
131
132         private void monitorEPHealth() throws InterruptedException {
133
134                 int numIntervalsDatabaseHasBeenDown = 0;
135                 int numIntervalsClusterNotHealthy = 0;
136                 int numIntervalsDatabasePermissionsIncorrect = 0;
137                 int numIntervalsZookeeperNotHealthy = 0;
138                 int numIntervalsCassandraNotHealthy = 0;
139
140                 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
141         
142
143                 long sleepInterval = (Long
144                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
145                 long numIntervalsBetweenAlerts = Long
146                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
147                 logger.debug(EELFLoggerDelegate.debugLogger,
148                                 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
149                                                 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
150                 
151                 while (true) {
152                         logger.debug(EELFLoggerDelegate.debugLogger,
153                                         "monitorEPHealth: Test Connection to all");
154                         //
155                         // Get DB status. If down, signal alert once every X intervals.
156                         //
157                         databaseUp = this.checkIfDatabaseUp();
158                         if (databaseUp == false) {
159                                 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
160                                         logger.debug(EELFLoggerDelegate.debugLogger,
161                                                         "monitorEPHealth: database down, logging to error log to trigger alert.");
162                                         // Write a Log entry that will generate an alert
163                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
164                                         numIntervalsDatabaseHasBeenDown++;
165                                 } else {
166                                         numIntervalsDatabaseHasBeenDown = 0;
167                                 }
168                         }
169
170                         dbPermissionsOk = this.checkDatabasePermissions();
171                         if (dbPermissionsOk == false) {
172                                 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
173                                         logger.debug(EELFLoggerDelegate.debugLogger,
174                                                         "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
175                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
176                                         numIntervalsDatabasePermissionsIncorrect++;
177                                 } else {
178                                         numIntervalsDatabasePermissionsIncorrect = 0;
179                                 }
180                         }
181                         org.onap.portalapp.music.util.MusicUtil MusicUtilSDK = new org.onap.portalapp.music.util.MusicUtil();
182                         if(MusicUtilSDK.isMusicEnable()){
183
184                                 zookeeperStatusOk = this.checkZookeeperStatus();
185
186                                 if (zookeeperStatusOk == false) {
187                                         if ((numIntervalsZookeeperNotHealthy % numIntervalsBetweenAlerts) == 0) {
188                                                 logger.debug(EELFLoggerDelegate.debugLogger,
189                                                                 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
190                                                 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckZookeeperError);
191                                                 numIntervalsZookeeperNotHealthy++;
192                                         } else {
193                                                 numIntervalsZookeeperNotHealthy = 0;
194                                         }
195                                 }
196
197                                 cassandraStatusOk = this.checkCassandraStatus();
198                                 if (cassandraStatusOk == false) {
199                                         if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
200                                                 logger.debug(EELFLoggerDelegate.debugLogger,
201                                                                 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
202                                                 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
203                                                 numIntervalsCassandraNotHealthy++;
204                                         } else {
205                                                 numIntervalsCassandraNotHealthy = 0;
206                                         }
207                                 }
208                         }
209                         
210                         
211                         //
212                         // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
213                         // returned, status is Up.
214                         // If down, signal alert once every X intervals.
215                         // EP will ignore this bogus message.
216                         // Commenting this out as Dependency on UEB is being deprecated
217                         /*
218                          * uebUp = this.checkIfUebUp(); if (uebUp == false) {
219                          * 
220                          * if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
221                          * logger.debug(EELFLoggerDelegate.debugLogger,
222                          * "monitorEPHealth: UEB down, logging to error log to trigger alert"); // Write
223                          * a Log entry that will generate an alert EPLogUtil.logEcompError(logger,
224                          * EPAppMessagesEnum.BeHealthCheckUebClusterError);
225                          * numIntervalsUebHasBeenDown++; } else { numIntervalsUebHasBeenDown = 0; } }
226                          */
227
228                         // The front end should be up because the API is called through
229                         // proxy front end server.
230                         frontEndUp = true;
231
232                         // If the rest API called, the backend is always up
233                         backEndUp = true;
234
235                         //
236                         // future nice to have...get Partner status
237                         //
238                         // For all apps exposing a rest url, query one of the rest
239                         // urls(/roles?) and manage a list
240                         // of app name/status. We might not return back a non 200 OK in
241                         // health check, but we
242                         // could return information in the json content of a health check.
243                         //
244
245                         //
246                         // Get DB status. If down, signal alert once every X intervals.
247                         //
248                         if (Thread.interrupted()) {
249                                 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
250                                 break;
251                         }
252
253                         try {
254                                 Thread.sleep(sleepInterval);
255                         } catch (InterruptedException e) {
256                                 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
257                                 Thread.currentThread().interrupt();
258                         }
259                 }
260         }
261
262         @PostConstruct
263         public void initHealthMonitor() {
264                 healthMonitorThread = new Thread("EP HealthMonitor thread") {
265                         public void run() {
266                                 try {
267                                         monitorEPHealth();
268                                 } catch (InterruptedException e) {
269                                         logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
270                                 } 
271                                 catch (Exception e) {
272                                         logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
273                                 }
274                         }
275                 };
276                 healthMonitorThread.start();
277                 
278         }
279
280         @PreDestroy
281         public void closeHealthMonitor() {
282                 this.healthMonitorThread.interrupt();
283         }
284
285         /**
286          * This routine checks whether the database can be read. In June 2017 we
287          * experimented with checking if the database can be WRITTEN. Writes failed
288          * with some regularity in a MariaDB Galera cluster, and in that
289          * environment, the resulting alerts in the log triggered a health monitor
290          * cron job to shut down the Tomcat instance. The root cause of the cluster
291          * write failures was not determined.
292          * 
293          * @return true if the database can be read.
294          */
295         private boolean checkIfDatabaseUp() {
296                 boolean isUp = false;
297                 Session localSession = null;
298                 try {
299                         localSession = sessionFactory.openSession();
300                         if (localSession != null) {
301                                 String sql = "select app_name from fn_app where app_id=1";
302                                 Query query = localSession.createSQLQuery(sql);
303                                 @SuppressWarnings("unchecked")
304                                 List<String> queryList = query.list();
305                                 if (queryList != null) {
306                                         isUp = true;
307                                 }
308                         }
309                 } catch (Exception e) {
310                         logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
311                         isUp = false;
312                 } finally {
313                         if (localSession != null)
314                                 localSession.close();
315                 }
316                 return isUp;
317         }
318
319         private boolean checkZookeeperStatus() {
320
321                 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
322                 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
323                 for (int i = 0; i < zookeeperNodes.length; i++) {
324                         try {
325                                 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper  :" + zookeeperNodes[i].trim());
326                                 String[] iport = zookeeperNodes[i].split(":");
327                                 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
328                                                 Integer.parseInt(iport[1].trim()), "stat");
329                                 logger.info(EELFLoggerDelegate.applicationLogger,
330                                                 "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
331                                 if (StringUtils.isNotBlank(zkNodeStatistics)) {
332                                         String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
333                                                         zkNodeStatistics.indexOf("Node"));
334                                         logger.info(EELFLoggerDelegate.applicationLogger,
335                                                         "Getting Status for zookeeper :" + zookeeperNodes[i].trim() + ":------:" + state);
336                                         if (state.contains("leader"))
337                                                 return true;
338                                 }
339                         } catch (Exception e) {
340                                 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
341                         }
342                 }
343
344                 return false;
345         }
346
347
348         public boolean checkCassandraStatus() {
349                 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
350                 if (this.getAdminKeySpace()) {
351                         return true;
352                 } else {
353                         logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
354                         return false;
355                 }
356         }
357         
358         private Boolean getAdminKeySpace() {
359                 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE);
360                 Instant creationTime = Instant.now();
361                 PreparedQueryObject pQuery = new PreparedQueryObject();
362                 pQuery.appendQueryString(
363                                 "UPDATE " + musicKeySpace + ".health_check  SET creation_time = ? WHERE primary_id = ?");
364                 pQuery.addValue(creationTime.toString());
365                 pQuery.addValue(APPLICATION);
366                 try {
367                         MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.CRITICAL);
368                 } catch (MusicServiceException e) {
369                         return Boolean.FALSE;
370                 }
371                 return Boolean.TRUE;
372
373         }
374
375         
376         private boolean checkDatabasePermissions() {
377                 boolean isUp = false;
378                 Session localSession = null;
379                 try {
380                         localSession = sessionFactory.openSession();
381                         if (localSession != null) {
382                                 String sql = "SHOW GRANTS FOR CURRENT_USER";
383                                 Query query = localSession.createSQLQuery(sql);
384                                 @SuppressWarnings("unchecked")
385                                 List<String> grantsList = query.list();
386                                 for (String str : grantsList) {
387                                         if ((str.toUpperCase().contains("ALL"))
388                                                         || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
389                                                                         && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
390                                                 isUp = true;
391                                                 break;
392                                         }
393                                 }
394                                 if (isUp == false) {
395                                         logger.error(EELFLoggerDelegate.errorLogger,
396                                                         "checkDatabasePermissions returning false.  SHOW GRANTS FOR CURRENT_USER being dumped:");
397                                         for (String str : grantsList) {
398                                                 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
399                                         }
400                                 }
401                         }
402                 } catch (Exception e) {
403                         logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
404                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
405                                 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
406                         }
407                         isUp = false;
408                 } finally {
409                         if (localSession != null) {
410                                 localSession.close();
411                         }
412                 }
413                 return isUp;
414         }
415         
416 }