HealthCheckController up
[portal.git] / portal-BE / src / main / java / org / onap / portal / scheduler / healthMonitor / HealthMonitor.java
1 /*-
2  * ============LICENSE_START==========================================
3  * ONAP Portal
4  * ===================================================================
5  * Copyright (C) 2017-2018 AT&T Intellectual Property. All rights reserved.
6  * ===================================================================
7  *
8  * Unless otherwise specified, all software contained herein is licensed
9  * under the Apache License, Version 2.0 (the "License");
10  * you may not use this software except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *             http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  * Unless otherwise specified, all documentation contained herein is licensed
22  * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23  * you may not use this documentation except in compliance with the License.
24  * You may obtain a copy of the License at
25  *
26  *             https://creativecommons.org/licenses/by/4.0/
27  *
28  * Unless required by applicable law or agreed to in writing, documentation
29  * distributed under the License is distributed on an "AS IS" BASIS,
30  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31  * See the License for the specific language governing permissions and
32  * limitations under the License.
33  *
34  * ============LICENSE_END============================================
35  *
36  * 
37  */
38 package org.onap.portal.scheduler.healthMonitor;
39
40 import java.time.Instant;
41 import java.util.List;
42 import javax.annotation.PostConstruct;
43 import javax.annotation.PreDestroy;
44 import javax.persistence.EntityManagerFactory;
45 import lombok.NoArgsConstructor;
46 import org.apache.zookeeper.client.FourLetterWordMain;
47 import org.hibernate.Query;
48 import org.hibernate.Session;
49 import org.hibernate.SessionFactory;
50 import org.onap.music.datastore.PreparedQueryObject;
51 import org.onap.music.exceptions.MusicServiceException;
52 import org.onap.music.main.MusicCore;
53 import org.onap.music.main.MusicUtil;
54 import org.onap.portal.logging.format.EPAppMessagesEnum;
55 import org.onap.portal.logging.logic.EPLogUtil;
56 import org.onap.portal.utils.EPCommonSystemProperties;
57 import org.onap.portalapp.music.util.MusicProperties;
58 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
59 import org.onap.portalsdk.core.util.SystemProperties;
60 import org.springframework.beans.factory.annotation.Autowired;
61 import org.springframework.context.annotation.Configuration;
62 import org.springframework.context.annotation.EnableAspectJAutoProxy;
63 import org.springframework.transaction.annotation.Transactional;
64
65
66 @Transactional
67 @Configuration
68 @EnableAspectJAutoProxy
69 @NoArgsConstructor
70 public class HealthMonitor {
71         private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
72         private Thread healthMonitorThread;
73         private static EntityManagerFactory entityManagerFactory;
74         private static boolean databaseUp;
75         private static boolean uebUp;
76         private static boolean frontEndUp;
77         private static boolean backEndUp;
78         private static boolean dbPermissionsOk;
79         private static boolean zookeeperStatusOk;
80         private static boolean cassandraStatusOk;
81         private static String application = "Portal";
82         private static boolean isSuspended = false;
83
84         @Autowired
85         public HealthMonitor(EntityManagerFactory entityManagerFactory) {
86                 this.entityManagerFactory = entityManagerFactory;
87
88         }
89
90         private static void monitorEPHealth() {
91
92                 int numIntervalsDatabaseHasBeenDown = 0;
93                 int numIntervalsDatabasePermissionsIncorrect = 0;
94                 int numIntervalsZookeeperNotHealthy = 0;
95                 int numIntervalsCassandraNotHealthy = 0;
96
97                 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
98         
99
100                 long sleepInterval = (Long
101                                 .parseLong(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
102                 long numIntervalsBetweenAlerts = Long
103                                 .parseLong(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
104                 logger.debug(EELFLoggerDelegate.debugLogger,
105                                 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
106                                                 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
107                 
108                 while (true) {
109                         logger.debug(EELFLoggerDelegate.debugLogger,
110                                         "monitorEPHealth: Test Connection to all");
111                         //
112                         // Get DB status. If down, signal alert once every X intervals.
113                         //
114                         databaseUp = checkIfDatabaseUp();
115                         if (databaseUp) {
116                                 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
117                                         logger.debug(EELFLoggerDelegate.debugLogger,
118                                                         "monitorEPHealth: database down, logging to error log to trigger alert.");
119                                         // Write a Log entry that will generate an alert
120                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
121                                         numIntervalsDatabaseHasBeenDown++;
122                                 } else {
123                                         numIntervalsDatabaseHasBeenDown = 0;
124                                 }
125                         }
126
127                         dbPermissionsOk = checkDatabasePermissions();
128                         if (!dbPermissionsOk) {
129                                 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
130                                         logger.debug(EELFLoggerDelegate.debugLogger,
131                                                         "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
132                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
133                                         numIntervalsDatabasePermissionsIncorrect++;
134                                 } else {
135                                         numIntervalsDatabasePermissionsIncorrect = 0;
136                                 }
137                         }
138                         if(org.onap.portalapp.music.util.MusicUtil.isMusicEnable()){
139                                 cassandraStatusOk = checkCassandraStatus();
140                                 if (!cassandraStatusOk) {
141                                         if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
142                                                 logger.debug(EELFLoggerDelegate.debugLogger,
143                                                                 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
144                                                 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
145                                                 numIntervalsCassandraNotHealthy++;
146                                         } else {
147                                                 numIntervalsCassandraNotHealthy = 0;
148                                         }
149                                 }
150                         }
151                         frontEndUp = true;
152                         backEndUp = true;
153
154                         if (Thread.interrupted()) {
155                                 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
156                                 break;
157                         }
158
159                         try {
160                                 Thread.sleep(sleepInterval);
161                         } catch (InterruptedException e) {
162                                 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
163                                 Thread.currentThread().interrupt();
164                         }
165                 }
166         }
167
168         @PostConstruct
169         public void initHealthMonitor() {
170                 healthMonitorThread = new Thread("EP HealthMonitor thread") {
171                         @Override
172                         public void run() {
173                                 try {
174                                         monitorEPHealth();
175                                 }
176                                 catch (Exception e) {
177                                         logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
178                                 }
179                         }
180                 };
181                 healthMonitorThread.start();
182                 
183         }
184
185         @PreDestroy
186         public void closeHealthMonitor() {
187                 this.healthMonitorThread.interrupt();
188         }
189
190         /**
191          * This routine checks whether the database can be read. In June 2017 we
192          * experimented with checking if the database can be WRITTEN. Writes failed
193          * with some regularity in a MariaDB Galera cluster, and in that
194          * environment, the resulting alerts in the log triggered a health monitor
195          * cron job to shut down the Tomcat instance. The root cause of the cluster
196          * write failures was not determined.
197          * 
198          * @return true if the database can be read.
199          */
200         private static boolean checkIfDatabaseUp() {
201                 boolean isUp = false;
202                 Session localSession = null;
203                 try {
204                         localSession = entityManagerFactory.unwrap(SessionFactory.class).openSession();
205                         if (localSession != null) {
206                                 String sql = "select app_name from fn_app where app_id=1";
207                                 Query query = localSession.createSQLQuery(sql);
208                                 @SuppressWarnings("unchecked")
209                                 List<String> queryList = query.list();
210                                 if (queryList != null) {
211                                         isUp = true;
212                                 }
213                         }
214                 } catch (Exception e) {
215                         logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
216                         isUp = false;
217                 } finally {
218                         if (localSession != null)
219                                 localSession.close();
220                 }
221                 return isUp;
222         }
223
224         private static boolean checkZookeeperStatus() {
225
226                 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
227                 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
228                 for (String zookeeperNode : zookeeperNodes) {
229                         try {
230                                 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper  :" + zookeeperNode.trim());
231                                 String[] iport = zookeeperNode.split(":");
232                                 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
233                                         Integer.parseInt(iport[1].trim()), "stat");
234                                 logger.info(EELFLoggerDelegate.applicationLogger,
235                                         "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
236                                 if (!zkNodeStatistics.isEmpty()) {
237                                         String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
238                                                 zkNodeStatistics.indexOf("Node"));
239                                         logger.info(EELFLoggerDelegate.applicationLogger,
240
241                                                 "Getting Status for zookeeper :" + zookeeperNode.trim() + ":------:" + state);
242                                         if (state.contains("leader") || state.contains("follower")) {
243                                                 return true;
244                                         }
245                                 }
246                         } catch (Exception e) {
247                                 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
248                         }
249                 }
250
251                 return false;
252         }
253
254
255         private static boolean checkCassandraStatus() {
256                 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
257                 if (getAdminKeySpace()) {
258                         return true;
259                 } else {
260                         logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
261                         return false;
262                 }
263         }
264         
265         private static Boolean getAdminKeySpace() {
266                 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE);
267                 Instant creationTime = Instant.now();
268                 PreparedQueryObject pQuery = new PreparedQueryObject();
269                 pQuery.appendQueryString(
270                                 "UPDATE " + musicKeySpace + ".health_check  SET creation_time = ? WHERE primary_id = ?");
271                 pQuery.addValue(creationTime.toString());
272                 pQuery.addValue(application);
273                 try {
274                         MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.CRITICAL);
275                 } catch (MusicServiceException e) {
276                         logger.error(EELFLoggerDelegate.errorLogger, e.getErrorMessage(), e);
277                         return Boolean.FALSE;
278                 }
279                 return Boolean.TRUE;
280
281         }
282
283         
284         private static boolean checkDatabasePermissions() {
285                 boolean isUp = false;
286                 Session localSession = null;
287                 try {
288                         localSession = entityManagerFactory.unwrap(SessionFactory.class).openSession();
289                         if (localSession != null) {
290                                 String sql = "SHOW GRANTS FOR CURRENT_USER";
291                                 Query query = localSession.createSQLQuery(sql);
292                                 @SuppressWarnings("unchecked")
293                                 List<String> grantsList = query.list();
294                                 for (String str : grantsList) {
295                                         if ((str.toUpperCase().contains("ALL"))
296                                                         || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
297                                                                         && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
298                                                 isUp = true;
299                                                 break;
300                                         }
301                                 }
302                                 if (!isUp) {
303                                         logger.error(EELFLoggerDelegate.errorLogger,
304                                                         "checkDatabasePermissions returning false.  SHOW GRANTS FOR CURRENT_USER being dumped:");
305                                         for (String str : grantsList) {
306                                                 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
307                                         }
308                                 }
309                         }
310                 } catch (Exception e) {
311                         logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
312                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
313                                 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
314                         }
315                         isUp = false;
316                 } finally {
317                         if (localSession != null) {
318                                 localSession.close();
319                         }
320                 }
321                 return isUp;
322         }
323
324         public static boolean isDatabaseUp() {
325                 return databaseUp;
326         }
327
328         public static boolean isUebUp() {
329                 return uebUp;
330         }
331
332         public static boolean isFrontEndUp() {
333                 return frontEndUp;
334         }
335
336         public static boolean isBackEndUp() {
337                 return backEndUp;
338         }
339
340         public static boolean isDbPermissionsOk() {
341                 return dbPermissionsOk;
342         }
343
344         public static boolean isZookeeperStatusOk() {
345                 return zookeeperStatusOk;
346         }
347
348         public static boolean isCassandraStatusOk() {
349                 return cassandraStatusOk;
350         }
351
352         public static boolean isSuspended() {
353                 return isSuspended;
354         }
355
356         public static void setSuspended(boolean isSuspended) {
357                 HealthMonitor.isSuspended = isSuspended;
358         }
359 }