Fixed health check issue
[portal.git] / ecomp-portal-BE-common / src / main / java / org / onap / portalapp / portal / listener / HealthMonitor.java
1 /*-
2  * ============LICENSE_START==========================================
3  * ONAP Portal
4  * ===================================================================
5  * Copyright (C) 2017-2018 AT&T Intellectual Property. All rights reserved.
6  * ===================================================================
7  *
8  * Unless otherwise specified, all software contained herein is licensed
9  * under the Apache License, Version 2.0 (the "License");
10  * you may not use this software except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *             http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  * Unless otherwise specified, all documentation contained herein is licensed
22  * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23  * you may not use this documentation except in compliance with the License.
24  * You may obtain a copy of the License at
25  *
26  *             https://creativecommons.org/licenses/by/4.0/
27  *
28  * Unless required by applicable law or agreed to in writing, documentation
29  * distributed under the License is distributed on an "AS IS" BASIS,
30  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31  * See the License for the specific language governing permissions and
32  * limitations under the License.
33  *
34  * ============LICENSE_END============================================
35  *
36  * 
37  */
38 package org.onap.portalapp.portal.listener;
39
40 import java.time.Instant;
41 import java.util.List;
42
43 import javax.annotation.PostConstruct;
44 import javax.annotation.PreDestroy;
45
46 import lombok.NoArgsConstructor;
47 import org.apache.commons.lang3.StringUtils;
48 import org.apache.zookeeper.client.FourLetterWordMain;
49 import org.hibernate.Query;
50 import org.hibernate.Session;
51 import org.hibernate.SessionFactory;
52 import org.onap.music.datastore.PreparedQueryObject;
53 import org.onap.music.exceptions.MusicServiceException;
54 import org.onap.music.main.MusicCore;
55 import org.onap.music.main.MusicUtil;
56 import org.onap.portalapp.music.util.MusicProperties;
57 import org.onap.portalapp.portal.logging.aop.EPMetricsLog;
58 import org.onap.portalapp.portal.logging.format.EPAppMessagesEnum;
59 import org.onap.portalapp.portal.logging.logic.EPLogUtil;
60 import org.onap.portalapp.portal.utils.EPCommonSystemProperties;
61 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
62 import org.onap.portalsdk.core.util.SystemProperties;
63 import org.springframework.beans.factory.annotation.Autowired;
64 import org.springframework.context.annotation.Configuration;
65 import org.springframework.context.annotation.EnableAspectJAutoProxy;
66 import org.springframework.transaction.annotation.Transactional;
67
68
69
70
71 @Transactional
72 @Configuration
73 @EnableAspectJAutoProxy
74 @EPMetricsLog
75 @NoArgsConstructor
76 public class HealthMonitor {
77         private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
78         private Thread healthMonitorThread;
79         private static SessionFactory sessionFactory;
80
81         private static boolean databaseUp;
82         private static boolean uebUp;
83         private static boolean frontEndUp;
84         private static boolean backEndUp;
85         private static boolean dbPermissionsOk;
86         private static boolean zookeeperStatusOk;
87         private static boolean cassandraStatusOk;
88         private static String application = "Portal";
89         private static boolean isSuspended = false;
90
91         @Autowired
92         public HealthMonitor(SessionFactory sessionFactory) {
93                 HealthMonitor.sessionFactory = sessionFactory;
94         }
95
96         private static void monitorEPHealth() {
97
98                 int numIntervalsDatabaseHasBeenDown = 0;
99                 int numIntervalsDatabasePermissionsIncorrect = 0;
100                 int numIntervalsZookeeperNotHealthy = 0;
101                 int numIntervalsCassandraNotHealthy = 0;
102
103                 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
104         
105
106                 long sleepInterval = (Long
107                                 .parseLong(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
108                 long numIntervalsBetweenAlerts = Long
109                                 .parseLong(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
110                 logger.debug(EELFLoggerDelegate.debugLogger,
111                                 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
112                                                 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
113                 
114                 while (true) {
115                         logger.debug(EELFLoggerDelegate.debugLogger,
116                                         "monitorEPHealth: Test Connection to all");
117                         //
118                         // Get DB status. If down, signal alert once every X intervals.
119                         //
120                         databaseUp = checkIfDatabaseUp();
121                         if (databaseUp) {
122                                 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
123                                         logger.debug(EELFLoggerDelegate.debugLogger,
124                                                         "monitorEPHealth: database down, logging to error log to trigger alert.");
125                                         // Write a Log entry that will generate an alert
126                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
127                                         numIntervalsDatabaseHasBeenDown++;
128                                 } else {
129                                         numIntervalsDatabaseHasBeenDown = 0;
130                                 }
131                         }
132
133                         dbPermissionsOk = checkDatabasePermissions();
134                         if (!dbPermissionsOk) {
135                                 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
136                                         logger.debug(EELFLoggerDelegate.debugLogger,
137                                                         "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
138                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
139                                         numIntervalsDatabasePermissionsIncorrect++;
140                                 } else {
141                                         numIntervalsDatabasePermissionsIncorrect = 0;
142                                 }
143                         }
144                         if(org.onap.portalapp.music.util.MusicUtil.isMusicEnable()){
145
146                                 /*
147                                  * zookeeperStatusOk = checkZookeeperStatus();
148                                  * 
149                                  * if (!zookeeperStatusOk) { if ((numIntervalsZookeeperNotHealthy %
150                                  * numIntervalsBetweenAlerts) == 0) {
151                                  * logger.debug(EELFLoggerDelegate.debugLogger,
152                                  * "monitorEPHealth: cluster nodes down, logging to error log to trigger alert."
153                                  * ); EPLogUtil.logEcompError(logger,
154                                  * EPAppMessagesEnum.MusicHealthCheckZookeeperError);
155                                  * numIntervalsZookeeperNotHealthy++; } else { numIntervalsZookeeperNotHealthy =
156                                  * 0; } }
157                                  */
158
159                                 cassandraStatusOk = checkCassandraStatus();
160                                 if (!cassandraStatusOk) {
161                                         if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
162                                                 logger.debug(EELFLoggerDelegate.debugLogger,
163                                                                 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
164                                                 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
165                                                 numIntervalsCassandraNotHealthy++;
166                                         } else {
167                                                 numIntervalsCassandraNotHealthy = 0;
168                                         }
169                                 }
170                         }
171                         frontEndUp = true;
172                         backEndUp = true;
173
174                         if (Thread.interrupted()) {
175                                 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
176                                 break;
177                         }
178
179                         try {
180                                 Thread.sleep(sleepInterval);
181                         } catch (InterruptedException e) {
182                                 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
183                                 Thread.currentThread().interrupt();
184                         }
185                 }
186         }
187
188         @PostConstruct
189         public void initHealthMonitor() {
190                 healthMonitorThread = new Thread("EP HealthMonitor thread") {
191                         @Override
192                         public void run() {
193                                 try {
194                                         monitorEPHealth();
195                                 }
196                                 catch (Exception e) {
197                                         logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
198                                 }
199                         }
200                 };
201                 healthMonitorThread.start();
202                 
203         }
204
205         @PreDestroy
206         public void closeHealthMonitor() {
207                 this.healthMonitorThread.interrupt();
208         }
209
210         /**
211          * This routine checks whether the database can be read. In June 2017 we
212          * experimented with checking if the database can be WRITTEN. Writes failed
213          * with some regularity in a MariaDB Galera cluster, and in that
214          * environment, the resulting alerts in the log triggered a health monitor
215          * cron job to shut down the Tomcat instance. The root cause of the cluster
216          * write failures was not determined.
217          * 
218          * @return true if the database can be read.
219          */
220         private static boolean checkIfDatabaseUp() {
221                 boolean isUp = false;
222                 Session localSession = null;
223                 try {
224                         localSession = sessionFactory.openSession();
225                         if (localSession != null) {
226                                 String sql = "select app_name from fn_app where app_id=1";
227                                 Query query = localSession.createSQLQuery(sql);
228                                 @SuppressWarnings("unchecked")
229                                 List<String> queryList = query.list();
230                                 if (queryList != null) {
231                                         isUp = true;
232                                 }
233                         }
234                 } catch (Exception e) {
235                         logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
236                         isUp = false;
237                 } finally {
238                         if (localSession != null)
239                                 localSession.close();
240                 }
241                 return isUp;
242         }
243
244         private static boolean checkZookeeperStatus() {
245
246                 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
247                 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
248                 for (String zookeeperNode : zookeeperNodes) {
249                         try {
250                                 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper  :" + zookeeperNode.trim());
251                                 String[] iport = zookeeperNode.split(":");
252                                 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
253                                         Integer.parseInt(iport[1].trim()), "stat");
254                                 logger.info(EELFLoggerDelegate.applicationLogger,
255                                         "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
256                                 if (StringUtils.isNotBlank(zkNodeStatistics)) {
257                                         String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
258                                                 zkNodeStatistics.indexOf("Node"));
259                                         logger.info(EELFLoggerDelegate.applicationLogger,
260
261                                                 "Getting Status for zookeeper :" + zookeeperNode.trim() + ":------:" + state);
262                                         if (state.contains("leader") || state.contains("follower")) {
263                                                 return true;
264                                         }
265                                 }
266                         } catch (Exception e) {
267                                 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
268                         }
269                 }
270
271                 return false;
272         }
273
274
275         private static boolean checkCassandraStatus() {
276                 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
277                 if (getAdminKeySpace()) {
278                         return true;
279                 } else {
280                         logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
281                         return false;
282                 }
283         }
284         
285         private static Boolean getAdminKeySpace() {
286                 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE);
287                 Instant creationTime = Instant.now();
288                 PreparedQueryObject pQuery = new PreparedQueryObject();
289                 pQuery.appendQueryString(
290                                 "UPDATE " + musicKeySpace + ".health_check  SET creation_time = ? WHERE primary_id = ?");
291                 pQuery.addValue(creationTime.toString());
292                 pQuery.addValue(application);
293                 try {
294                         MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.CRITICAL);
295                 } catch (MusicServiceException e) {
296                         logger.error(EELFLoggerDelegate.errorLogger, e.getErrorMessage(), e);
297                         return Boolean.FALSE;
298                 } catch (Exception ex) {
299                         logger.error(EELFLoggerDelegate.errorLogger, ex.getMessage(), ex);
300                         return Boolean.FALSE;
301                 }
302                 return Boolean.TRUE;
303
304         }
305
306         
307         private static boolean checkDatabasePermissions() {
308                 boolean isUp = false;
309                 Session localSession = null;
310                 try {
311                         localSession = sessionFactory.openSession();
312                         if (localSession != null) {
313                                 String sql = "SHOW GRANTS FOR CURRENT_USER";
314                                 Query query = localSession.createSQLQuery(sql);
315                                 @SuppressWarnings("unchecked")
316                                 List<String> grantsList = query.list();
317                                 for (String str : grantsList) {
318                                         if ((str.toUpperCase().contains("ALL"))
319                                                         || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
320                                                                         && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
321                                                 isUp = true;
322                                                 break;
323                                         }
324                                 }
325                                 if (!isUp) {
326                                         logger.error(EELFLoggerDelegate.errorLogger,
327                                                         "checkDatabasePermissions returning false.  SHOW GRANTS FOR CURRENT_USER being dumped:");
328                                         for (String str : grantsList) {
329                                                 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
330                                         }
331                                 }
332                         }
333                 } catch (Exception e) {
334                         logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
335                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
336                                 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
337                         }
338                         isUp = false;
339                 } finally {
340                         if (localSession != null) {
341                                 localSession.close();
342                         }
343                 }
344                 return isUp;
345         }
346
347         public static boolean isDatabaseUp() {
348                 return databaseUp;
349         }
350
351         public static boolean isUebUp() {
352                 return uebUp;
353         }
354
355         public static boolean isFrontEndUp() {
356                 return frontEndUp;
357         }
358
359         public static boolean isBackEndUp() {
360                 return backEndUp;
361         }
362
363         public static boolean isDbPermissionsOk() {
364                 return dbPermissionsOk;
365         }
366
367         public static boolean isZookeeperStatusOk() {
368                 return zookeeperStatusOk;
369         }
370
371         public static boolean isCassandraStatusOk() {
372                 return cassandraStatusOk;
373         }
374
375         public static boolean isSuspended() {
376                 return isSuspended;
377         }
378
379         public static void setSuspended(boolean isSuspended) {
380                 HealthMonitor.isSuspended = isSuspended;
381         }
382 }