[PORTAL-20,PORTAL-23,PORTAL-32] Repair defects
[portal.git] / ecomp-portal-BE-common / src / main / java / org / openecomp / portalapp / portal / listener / HealthMonitor.java
1 /*-
2  * ================================================================================
3  * ECOMP Portal
4  * ================================================================================
5  * Copyright (C) 2017 AT&T Intellectual Property
6  * ================================================================================
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  * 
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  * 
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  * ================================================================================
19  */
20 package org.openecomp.portalapp.portal.listener;
21
22 import java.util.List;
23
24 import javax.annotation.PostConstruct;
25 import javax.annotation.PreDestroy;
26
27 import org.hibernate.Query;
28 import org.hibernate.Session;
29 import org.hibernate.SessionFactory;
30 import org.openecomp.portalapp.portal.logging.aop.EPMetricsLog;
31 import org.openecomp.portalapp.portal.logging.format.EPAppMessagesEnum;
32 import org.openecomp.portalapp.portal.logging.logic.EPLogUtil;
33 import org.openecomp.portalapp.portal.ueb.EPUebHelper;
34 import org.openecomp.portalapp.portal.utils.EPCommonSystemProperties;
35 import org.openecomp.portalsdk.core.logging.logic.EELFLoggerDelegate;
36 import org.openecomp.portalsdk.core.util.SystemProperties;
37 import org.springframework.beans.factory.annotation.Autowired;
38 import org.springframework.context.annotation.EnableAspectJAutoProxy;
39 import org.springframework.transaction.annotation.Transactional;
40
41 @Transactional
42 @org.springframework.context.annotation.Configuration
43 @EnableAspectJAutoProxy
44 @EPMetricsLog
45 public class HealthMonitor {
46
47         private EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
48
49         @Autowired
50         private SessionFactory sessionFactory;
51
52         @Autowired
53         private EPUebHelper epUebHelper;
54
55         private static boolean databaseUp;
56         private static boolean uebUp;
57         private static boolean frontEndUp;
58         private static boolean backEndUp;
59         private static boolean dbClusterStatusOk;
60         private static boolean dbPermissionsOk;
61
62         /**
63          * Read directly by external classes.
64          */
65         public static boolean isSuspended = false;
66
67         private Thread healthMonitorThread;
68
69         public HealthMonitor() {
70         }
71
72         public static boolean isDatabaseUp() {
73                 return databaseUp;
74         }
75
76         public static boolean isClusterStatusOk() {
77                 return dbClusterStatusOk;
78         }
79
80         public static boolean isDatabasePermissionsOk() {
81                 return dbPermissionsOk;
82         }
83
84         public static boolean isUebUp() {
85                 return uebUp;
86         }
87
88         public static boolean isFrontEndUp() {
89                 return frontEndUp;
90         }
91
92         public static boolean isBackEndUp() {
93                 return backEndUp;
94         }
95
96         private void monitorEPHealth() throws InterruptedException {
97
98                 int numIntervalsDatabaseHasBeenDown = 0;
99                 int numIntervalsClusterNotHealthy = 0;
100                 int numIntervalsDatabasePermissionsIncorrect = 0;
101                 int numIntervalsUebHasBeenDown = 0;
102
103                 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
104
105                 long sleepInterval = (Long
106                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
107                 long numIntervalsBetweenAlerts = Long
108                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
109                 logger.debug(EELFLoggerDelegate.debugLogger,
110                                 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
111                                                 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
112
113                 while (true) {
114                         //
115                         // Get DB status. If down, signal alert once every X intervals.
116                         //
117                         databaseUp = this.checkIfDatabaseUp();
118                         if (databaseUp == false) {
119                                 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
120                                         logger.debug(EELFLoggerDelegate.debugLogger,
121                                                         "monitorEPHealth: database down, logging to error log to trigger alert.");
122                                         // Write a Log entry that will generate an alert
123                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
124                                         numIntervalsDatabaseHasBeenDown++;
125                                 } else {
126                                         numIntervalsDatabaseHasBeenDown = 0;
127                                 }
128                         }
129
130                         dbClusterStatusOk = this.checkClusterStatus();
131                         if (dbClusterStatusOk == false) {
132                                 if ((numIntervalsClusterNotHealthy % numIntervalsBetweenAlerts) == 0) {
133                                         logger.debug(EELFLoggerDelegate.debugLogger,
134                                                         "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
135                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
136                                         numIntervalsClusterNotHealthy++;
137                                 } else {
138                                         numIntervalsClusterNotHealthy = 0;
139                                 }
140                         }
141
142                         dbPermissionsOk = this.checkDatabasePermissions();
143                         if (dbPermissionsOk == false) {
144                                 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
145                                         logger.debug(EELFLoggerDelegate.debugLogger,
146                                                         "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
147                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
148                                         numIntervalsDatabasePermissionsIncorrect++;
149                                 } else {
150                                         numIntervalsDatabasePermissionsIncorrect = 0;
151                                 }
152                         }
153
154                         //
155                         // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
156                         // returned, status is Up.
157                         // If down, signal alert once every X intervals.
158                         // EP will ignore this bogus message.
159                         //
160                         uebUp = this.checkIfUebUp();
161                         if (uebUp == false) {
162
163                                 if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
164                                         logger.debug(EELFLoggerDelegate.debugLogger,
165                                                         "monitorEPHealth: UEB down, logging to error log to trigger alert");
166                                         // Write a Log entry that will generate an alert
167                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckUebClusterError);
168                                         numIntervalsUebHasBeenDown++;
169                                 } else {
170                                         numIntervalsUebHasBeenDown = 0;
171                                 }
172                         }
173
174                         // The front end should be up because the API is called through
175                         // proxy front end server.
176                         frontEndUp = true;
177
178                         // If the rest API called, the backend is always up
179                         backEndUp = true;
180
181                         //
182                         // future nice to have...get Partner status
183                         //
184                         // For all apps exposing a rest url, query one of the rest
185                         // urls(/roles?) and manage a list
186                         // of app name/status. We might not return back a non 200 OK in
187                         // health check, but we
188                         // could return information in the json content of a health check.
189                         //
190
191                         //
192                         // Get DB status. If down, signal alert once every X intervals.
193                         //
194                         if (Thread.interrupted()) {
195                                 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
196                                 break;
197                         }
198
199                         try {
200                                 Thread.sleep(sleepInterval);
201                         } catch (InterruptedException e) {
202                                 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
203                                 Thread.currentThread().interrupt();
204                         }
205                 }
206         }
207
208         @PostConstruct
209         public void initHealthMonitor() {
210                 healthMonitorThread = new Thread("EP HealthMonitor thread") {
211                         public void run() {
212                                 try {
213                                         monitorEPHealth();
214                                 } catch (InterruptedException e) {
215                                         logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
216                                 } catch (Exception e) {
217                                         logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
218                                 }
219                         }
220                 };
221                 if (healthMonitorThread != null) {
222                         healthMonitorThread.start();
223                 }
224         }
225
226         @PreDestroy
227         public void closeHealthMonitor() {
228                 this.healthMonitorThread.interrupt();
229         }
230
231         /**
232          * This routine checks whether the database can be read. In June 2017 we
233          * experimented with checking if the database can be WRITTEN. Writes failed
234          * with some regularity in a MariaDB Galera cluster, and in that
235          * environment, the resulting alerts in the log triggered a health monitor
236          * cron job to shut down the Tomcat instance. The root cause of the cluster
237          * write failures was not determined.
238          * 
239          * @return true if the database can be read.
240          */
241         private boolean checkIfDatabaseUp() {
242                 boolean isUp = false;
243                 Session localSession = null;
244                 try {
245                         localSession = sessionFactory.openSession();
246                         if (localSession != null) {
247                                 String sql = "select app_name from fn_app where app_id=1";
248                                 Query query = localSession.createSQLQuery(sql);
249                                 @SuppressWarnings("unchecked")
250                                 List<String> queryList = query.list();
251                                 if (queryList != null) {
252                                         isUp = true;
253                                 }
254                         }
255                 } catch (Exception e) {
256                         logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
257                         isUp = false;
258                 } finally {
259                         if (localSession != null)
260                                 localSession.close();
261                 }
262                 return isUp;
263         }
264
265         private boolean checkClusterStatus() {
266                 boolean isUp = false;
267                 Session localSession = null;
268                 try {
269                         localSession = sessionFactory.openSession();
270                         if (localSession != null) {
271                                 // If all nodes are unhealthy in a cluster, this will throw an
272                                 // exception
273                                 String sql = "select * from mysql.user";
274                                 Query query = localSession.createSQLQuery(sql);
275                                 @SuppressWarnings("unchecked")
276                                 List<String> queryList = query.list();
277                                 if (queryList != null) {
278                                         isUp = true;
279                                 }
280                         }
281                 } catch (Exception e) {
282                         logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failed", e);
283                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
284                                 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failure cause", e.getCause());
285                         }
286                         isUp = false;
287                 } finally {
288                         if (localSession != null) {
289                                 localSession.close();
290                         }
291                 }
292                 return isUp;
293         }
294
295         private boolean checkDatabasePermissions() {
296                 boolean isUp = false;
297                 Session localSession = null;
298                 try {
299                         localSession = sessionFactory.openSession();
300                         if (localSession != null) {
301                                 String sql = "SHOW GRANTS FOR CURRENT_USER";
302                                 Query query = localSession.createSQLQuery(sql);
303                                 @SuppressWarnings("unchecked")
304                                 List<String> grantsList = query.list();
305                                 for (String str : grantsList) {
306                                         if ((str.toUpperCase().contains("ALL"))
307                                                         || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
308                                                                         && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
309                                                 isUp = true;
310                                                 break;
311                                         }
312                                 }
313                                 if (isUp == false) {
314                                         logger.error(EELFLoggerDelegate.errorLogger,
315                                                         "checkDatabasePermissions returning false.  SHOW GRANTS FOR CURRENT_USER being dumped:");
316                                         for (String str : grantsList) {
317                                                 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
318                                         }
319                                 }
320                         }
321                 } catch (Exception e) {
322                         logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
323                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
324                                 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
325                         }
326                         isUp = false;
327                 } finally {
328                         if (localSession != null) {
329                                 localSession.close();
330                         }
331                 }
332                 return isUp;
333         }
334
335         private boolean checkIfUebUp() {
336                 boolean uebUp = false;
337                 try {
338                         boolean isAvailable = epUebHelper.checkAvailability();
339                         boolean messageCanBeSent = epUebHelper.MessageCanBeSentToTopic();
340                         uebUp = (isAvailable && messageCanBeSent);
341                 } catch (Exception e) {
342                         logger.error(EELFLoggerDelegate.errorLogger, "checkIfUebUp failed", e);
343                 }
344                 return uebUp;
345         }
346
347 }