27c43160cb28d15dbdc77586f95f226308fe63b2
[portal.git] / ecomp-portal-BE-common / src / main / java / org / openecomp / portalapp / portal / listener / HealthMonitor.java
1 /*-
2  * ================================================================================
3  * ECOMP Portal
4  * ================================================================================
5  * Copyright (C) 2017 AT&T Intellectual Property
6  * ================================================================================
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  * 
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  * 
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  * ================================================================================
19  */
20 package org.openecomp.portalapp.portal.listener;
21
22 import java.util.List;
23
24 import javax.annotation.PostConstruct;
25 import javax.annotation.PreDestroy;
26
27 import org.hibernate.Query;
28 import org.hibernate.Session;
29 import org.hibernate.SessionFactory;
30 import org.openecomp.portalapp.portal.logging.aop.EPMetricsLog;
31 import org.openecomp.portalapp.portal.logging.format.EPAppMessagesEnum;
32 import org.openecomp.portalapp.portal.logging.logic.EPLogUtil;
33 import org.openecomp.portalapp.portal.ueb.EPUebHelper;
34 import org.openecomp.portalapp.portal.utils.EPCommonSystemProperties;
35 import org.openecomp.portalsdk.core.logging.logic.EELFLoggerDelegate;
36 import org.openecomp.portalsdk.core.util.SystemProperties;
37 import org.springframework.beans.factory.annotation.Autowired;
38 import org.springframework.context.annotation.EnableAspectJAutoProxy;
39 import org.springframework.transaction.annotation.Transactional;
40
41 @Transactional
42 @org.springframework.context.annotation.Configuration
43 @EnableAspectJAutoProxy
44 @EPMetricsLog
45 public class HealthMonitor {
46
47         private EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
48
49         @Autowired
50         private SessionFactory sessionFactory;
51
52         @Autowired
53         private EPUebHelper epUebHelper;
54
55         private static boolean databaseUp;
56         private static boolean uebUp;
57         private static boolean frontEndUp;
58         private static boolean backEndUp;
59         private static boolean dbClusterStatusOk;
60         private static boolean dbPermissionsOk;
61
62         /**
63          * Read directly by external classes.
64          */
65         public static boolean isSuspended = false;
66
67         private Thread healthMonitorThread;
68
69         public HealthMonitor() {
70         }
71
72         public static boolean isDatabaseUp() {
73                 return databaseUp;
74         }
75
76         public static boolean isClusterStatusOk() {
77                 return dbClusterStatusOk;
78         }
79
80         public static boolean isDatabasePermissionsOk() {
81                 return dbPermissionsOk;
82         }
83
84         public static boolean isUebUp() {
85                 return uebUp;
86         }
87
88         public static boolean isFrontEndUp() {
89                 return frontEndUp;
90         }
91
92         public static boolean isBackEndUp() {
93                 return backEndUp;
94         }
95
96         private void monitorEPHealth() throws InterruptedException {
97
98                 int numIntervalsDatabaseHasBeenDown = 0;
99                 int numIntervalsClusterNotHealthy = 0;
100                 int numIntervalsDatabasePermissionsIncorrect = 0;
101                 int numIntervalsUebHasBeenDown = 0;
102
103                 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
104
105                 long sleepInterval = (Long
106                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
107                 long numIntervalsBetweenAlerts = Long
108                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
109                 logger.debug(EELFLoggerDelegate.debugLogger,
110                                 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
111                                                 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
112
113                 while (true) {
114                         //
115                         // Get DB status. If down, signal alert once every X intervals.
116                         //
117                         databaseUp = this.checkIfDatabaseUp();
118                         if (databaseUp == false) {
119                                 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
120                                         logger.debug(EELFLoggerDelegate.debugLogger,
121                                                         "monitorEPHealth: database down, logging to error log to trigger alert.");
122                                         // Write a Log entry that will generate an alert
123                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
124                                         numIntervalsDatabaseHasBeenDown++;
125                                 } else {
126                                         numIntervalsDatabaseHasBeenDown = 0;
127                                 }
128                         }
129
130                         dbClusterStatusOk = this.checkClusterStatus();
131                         if (dbClusterStatusOk == false) {
132                                 if ((numIntervalsClusterNotHealthy % numIntervalsBetweenAlerts) == 0) {
133                                         logger.debug(EELFLoggerDelegate.debugLogger,
134                                                         "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
135                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
136                                         numIntervalsClusterNotHealthy++;
137                                 } else {
138                                         numIntervalsClusterNotHealthy = 0;
139                                 }
140                         }
141
142                         dbPermissionsOk = this.checkDatabasePermissions();
143                         if (dbPermissionsOk == false) {
144                                 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
145                                         logger.debug(EELFLoggerDelegate.debugLogger,
146                                                         "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
147                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
148                                         numIntervalsDatabasePermissionsIncorrect++;
149                                 } else {
150                                         numIntervalsDatabasePermissionsIncorrect = 0;
151                                 }
152                         }
153
154                         //
155                         // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
156                         // returned, status is Up.
157                         // If down, signal alert once every X intervals.
158                         // EP will ignore this bogus message.
159                         //
160                         uebUp = this.checkIfUebUp();
161                         if (uebUp == false) {
162
163                                 if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
164                                         logger.debug(EELFLoggerDelegate.debugLogger,
165                                                         "monitorEPHealth: UEB down, logging to error log to trigger alert");
166                                         // Write a Log entry that will generate an alert
167                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckUebClusterError);
168                                         numIntervalsUebHasBeenDown++;
169                                 } else {
170                                         numIntervalsUebHasBeenDown = 0;
171                                 }
172                         }
173
174                         // The front end should be up because the API is called through
175                         // proxy front end server.
176                         frontEndUp = true;
177
178                         // If the rest API called, the backend is always up
179                         backEndUp = true;
180
181                         //
182                         // future nice to have...get Partner status
183                         //
184                         // For all apps exposing a rest url, query one of the rest
185                         // urls(/roles?) and manage a list
186                         // of app name/status. We might not return back a non 200 OK in
187                         // health check, but we
188                         // could return information in the json content of a health check.
189                         //
190
191                         //
192                         // Get DB status. If down, signal alert once every X intervals.
193                         //
194                         if (Thread.interrupted()) {
195                                 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
196                                 break;
197                         }
198
199                         try {
200                                 Thread.sleep(sleepInterval);
201                         } catch (InterruptedException e) {
202                                 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
203                                 Thread.currentThread().interrupt();
204                         }
205                 }
206         }
207
208         @PostConstruct
209         public void initHealthMonitor() {
210                 healthMonitorThread = new Thread("EP HealthMonitor thread") {
211                         public void run() {
212                                 try {
213                                         monitorEPHealth();
214                                 } catch (InterruptedException e) {
215                                         logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
216                                 } catch (Exception e) {
217                                         logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
218                                 }
219                         }
220                 };
221                 healthMonitorThread.start();
222                 
223         }
224
225         @PreDestroy
226         public void closeHealthMonitor() {
227                 this.healthMonitorThread.interrupt();
228         }
229
230         /**
231          * This routine checks whether the database can be read. In June 2017 we
232          * experimented with checking if the database can be WRITTEN. Writes failed
233          * with some regularity in a MariaDB Galera cluster, and in that
234          * environment, the resulting alerts in the log triggered a health monitor
235          * cron job to shut down the Tomcat instance. The root cause of the cluster
236          * write failures was not determined.
237          * 
238          * @return true if the database can be read.
239          */
240         private boolean checkIfDatabaseUp() {
241                 boolean isUp = false;
242                 Session localSession = null;
243                 try {
244                         localSession = sessionFactory.openSession();
245                         if (localSession != null) {
246                                 String sql = "select app_name from fn_app where app_id=1";
247                                 Query query = localSession.createSQLQuery(sql);
248                                 @SuppressWarnings("unchecked")
249                                 List<String> queryList = query.list();
250                                 if (queryList != null) {
251                                         isUp = true;
252                                 }
253                         }
254                 } catch (Exception e) {
255                         logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
256                         isUp = false;
257                 } finally {
258                         if (localSession != null)
259                                 localSession.close();
260                 }
261                 return isUp;
262         }
263
264         private boolean checkClusterStatus() {
265                 boolean isUp = false;
266                 Session localSession = null;
267                 try {
268                         localSession = sessionFactory.openSession();
269                         if (localSession != null) {
270                                 // If all nodes are unhealthy in a cluster, this will throw an
271                                 // exception
272                                 String sql = "select * from mysql.user";
273                                 Query query = localSession.createSQLQuery(sql);
274                                 @SuppressWarnings("unchecked")
275                                 List<String> queryList = query.list();
276                                 if (queryList != null) {
277                                         isUp = true;
278                                 }
279                         }
280                 } catch (Exception e) {
281                         logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failed", e);
282                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
283                                 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failure cause", e.getCause());
284                         }
285                         isUp = false;
286                 } finally {
287                         if (localSession != null) {
288                                 localSession.close();
289                         }
290                 }
291                 return isUp;
292         }
293
294         private boolean checkDatabasePermissions() {
295                 boolean isUp = false;
296                 Session localSession = null;
297                 try {
298                         localSession = sessionFactory.openSession();
299                         if (localSession != null) {
300                                 String sql = "SHOW GRANTS FOR CURRENT_USER";
301                                 Query query = localSession.createSQLQuery(sql);
302                                 @SuppressWarnings("unchecked")
303                                 List<String> grantsList = query.list();
304                                 for (String str : grantsList) {
305                                         if ((str.toUpperCase().contains("ALL"))
306                                                         || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
307                                                                         && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
308                                                 isUp = true;
309                                                 break;
310                                         }
311                                 }
312                                 if (isUp == false) {
313                                         logger.error(EELFLoggerDelegate.errorLogger,
314                                                         "checkDatabasePermissions returning false.  SHOW GRANTS FOR CURRENT_USER being dumped:");
315                                         for (String str : grantsList) {
316                                                 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
317                                         }
318                                 }
319                         }
320                 } catch (Exception e) {
321                         logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
322                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
323                                 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
324                         }
325                         isUp = false;
326                 } finally {
327                         if (localSession != null) {
328                                 localSession.close();
329                         }
330                 }
331                 return isUp;
332         }
333
334         private boolean checkIfUebUp() {
335                 boolean uebUp = false;
336                 try {
337                         boolean isAvailable = epUebHelper.checkAvailability();
338                         boolean messageCanBeSent = epUebHelper.MessageCanBeSentToTopic();
339                         uebUp = (isAvailable && messageCanBeSent);
340                 } catch (Exception e) {
341                         logger.error(EELFLoggerDelegate.errorLogger, "checkIfUebUp failed", e);
342                 }
343                 return uebUp;
344         }
345
346 }