Replace ecomp references
[portal.git] / ecomp-portal-BE-common / src / main / java / org / onap / portalapp / portal / listener / HealthMonitor.java
1 /*-
2  * ============LICENSE_START==========================================
3  * ONAP Portal
4  * ===================================================================
5  * Copyright (C) 2017 AT&T Intellectual Property. All rights reserved.
6  * ===================================================================
7  *
8  * Unless otherwise specified, all software contained herein is licensed
9  * under the Apache License, Version 2.0 (the "License");
10  * you may not use this software except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *             http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  * Unless otherwise specified, all documentation contained herein is licensed
22  * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23  * you may not use this documentation except in compliance with the License.
24  * You may obtain a copy of the License at
25  *
26  *             https://creativecommons.org/licenses/by/4.0/
27  *
28  * Unless required by applicable law or agreed to in writing, documentation
29  * distributed under the License is distributed on an "AS IS" BASIS,
30  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31  * See the License for the specific language governing permissions and
32  * limitations under the License.
33  *
34  * ============LICENSE_END============================================
35  *
36  * 
37  */
38 package org.onap.portalapp.portal.listener;
39
40 import java.util.List;
41
42 import javax.annotation.PostConstruct;
43 import javax.annotation.PreDestroy;
44
45 import org.hibernate.Query;
46 import org.hibernate.Session;
47 import org.hibernate.SessionFactory;
48 import org.onap.portalapp.portal.logging.aop.EPMetricsLog;
49 import org.onap.portalapp.portal.logging.format.EPAppMessagesEnum;
50 import org.onap.portalapp.portal.logging.logic.EPLogUtil;
51 import org.onap.portalapp.portal.ueb.EPUebHelper;
52 import org.onap.portalapp.portal.utils.EPCommonSystemProperties;
53 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
54 import org.onap.portalsdk.core.util.SystemProperties;
55 import org.springframework.beans.factory.annotation.Autowired;
56 import org.springframework.context.annotation.EnableAspectJAutoProxy;
57 import org.springframework.transaction.annotation.Transactional;
58
59 @Transactional
60 @org.springframework.context.annotation.Configuration
61 @EnableAspectJAutoProxy
62 @EPMetricsLog
63 public class HealthMonitor {
64
65         private EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
66
67         @Autowired
68         private SessionFactory sessionFactory;
69
70         @Autowired
71         private EPUebHelper epUebHelper;
72
73         private static boolean databaseUp;
74         private static boolean uebUp;
75         private static boolean frontEndUp;
76         private static boolean backEndUp;
77         private static boolean dbClusterStatusOk;
78         private static boolean dbPermissionsOk;
79
80         /**
81          * Read directly by external classes.
82          */
83         public static boolean isSuspended = false;
84
85         private Thread healthMonitorThread;
86
87         public HealthMonitor() {
88         }
89
90         public static boolean isDatabaseUp() {
91                 return databaseUp;
92         }
93
94         public static boolean isClusterStatusOk() {
95                 return dbClusterStatusOk;
96         }
97
98         public static boolean isDatabasePermissionsOk() {
99                 return dbPermissionsOk;
100         }
101
102         public static boolean isUebUp() {
103                 return uebUp;
104         }
105
106         public static boolean isFrontEndUp() {
107                 return frontEndUp;
108         }
109
110         public static boolean isBackEndUp() {
111                 return backEndUp;
112         }
113
114         private void monitorEPHealth() throws InterruptedException {
115
116                 int numIntervalsDatabaseHasBeenDown = 0;
117                 int numIntervalsClusterNotHealthy = 0;
118                 int numIntervalsDatabasePermissionsIncorrect = 0;
119                 int numIntervalsUebHasBeenDown = 0;
120
121                 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
122
123                 long sleepInterval = (Long
124                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
125                 long numIntervalsBetweenAlerts = Long
126                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
127                 logger.debug(EELFLoggerDelegate.debugLogger,
128                                 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
129                                                 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
130
131                 while (true) {
132                         //
133                         // Get DB status. If down, signal alert once every X intervals.
134                         //
135                         databaseUp = this.checkIfDatabaseUp();
136                         if (databaseUp == false) {
137                                 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
138                                         logger.debug(EELFLoggerDelegate.debugLogger,
139                                                         "monitorEPHealth: database down, logging to error log to trigger alert.");
140                                         // Write a Log entry that will generate an alert
141                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
142                                         numIntervalsDatabaseHasBeenDown++;
143                                 } else {
144                                         numIntervalsDatabaseHasBeenDown = 0;
145                                 }
146                         }
147
148                         dbClusterStatusOk = this.checkClusterStatus();
149                         if (dbClusterStatusOk == false) {
150                                 if ((numIntervalsClusterNotHealthy % numIntervalsBetweenAlerts) == 0) {
151                                         logger.debug(EELFLoggerDelegate.debugLogger,
152                                                         "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
153                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
154                                         numIntervalsClusterNotHealthy++;
155                                 } else {
156                                         numIntervalsClusterNotHealthy = 0;
157                                 }
158                         }
159
160                         dbPermissionsOk = this.checkDatabasePermissions();
161                         if (dbPermissionsOk == false) {
162                                 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
163                                         logger.debug(EELFLoggerDelegate.debugLogger,
164                                                         "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
165                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
166                                         numIntervalsDatabasePermissionsIncorrect++;
167                                 } else {
168                                         numIntervalsDatabasePermissionsIncorrect = 0;
169                                 }
170                         }
171
172                         //
173                         // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
174                         // returned, status is Up.
175                         // If down, signal alert once every X intervals.
176                         // EP will ignore this bogus message.
177                         //
178                         uebUp = this.checkIfUebUp();
179                         if (uebUp == false) {
180
181                                 if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
182                                         logger.debug(EELFLoggerDelegate.debugLogger,
183                                                         "monitorEPHealth: UEB down, logging to error log to trigger alert");
184                                         // Write a Log entry that will generate an alert
185                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckUebClusterError);
186                                         numIntervalsUebHasBeenDown++;
187                                 } else {
188                                         numIntervalsUebHasBeenDown = 0;
189                                 }
190                         }
191
192                         // The front end should be up because the API is called through
193                         // proxy front end server.
194                         frontEndUp = true;
195
196                         // If the rest API called, the backend is always up
197                         backEndUp = true;
198
199                         //
200                         // future nice to have...get Partner status
201                         //
202                         // For all apps exposing a rest url, query one of the rest
203                         // urls(/roles?) and manage a list
204                         // of app name/status. We might not return back a non 200 OK in
205                         // health check, but we
206                         // could return information in the json content of a health check.
207                         //
208
209                         //
210                         // Get DB status. If down, signal alert once every X intervals.
211                         //
212                         if (Thread.interrupted()) {
213                                 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
214                                 break;
215                         }
216
217                         try {
218                                 Thread.sleep(sleepInterval);
219                         } catch (InterruptedException e) {
220                                 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
221                                 Thread.currentThread().interrupt();
222                         }
223                 }
224         }
225
226         @PostConstruct
227         public void initHealthMonitor() {
228                 healthMonitorThread = new Thread("EP HealthMonitor thread") {
229                         public void run() {
230                                 try {
231                                         monitorEPHealth();
232                                 } catch (InterruptedException e) {
233                                         logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
234                                 } catch (Exception e) {
235                                         logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
236                                 }
237                         }
238                 };
239                 healthMonitorThread.start();
240                 
241         }
242
243         @PreDestroy
244         public void closeHealthMonitor() {
245                 this.healthMonitorThread.interrupt();
246         }
247
248         /**
249          * This routine checks whether the database can be read. In June 2017 we
250          * experimented with checking if the database can be WRITTEN. Writes failed
251          * with some regularity in a MariaDB Galera cluster, and in that
252          * environment, the resulting alerts in the log triggered a health monitor
253          * cron job to shut down the Tomcat instance. The root cause of the cluster
254          * write failures was not determined.
255          * 
256          * @return true if the database can be read.
257          */
258         private boolean checkIfDatabaseUp() {
259                 boolean isUp = false;
260                 Session localSession = null;
261                 try {
262                         localSession = sessionFactory.openSession();
263                         if (localSession != null) {
264                                 String sql = "select app_name from fn_app where app_id=1";
265                                 Query query = localSession.createSQLQuery(sql);
266                                 @SuppressWarnings("unchecked")
267                                 List<String> queryList = query.list();
268                                 if (queryList != null) {
269                                         isUp = true;
270                                 }
271                         }
272                 } catch (Exception e) {
273                         logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
274                         isUp = false;
275                 } finally {
276                         if (localSession != null)
277                                 localSession.close();
278                 }
279                 return isUp;
280         }
281
282         private boolean checkClusterStatus() {
283                 boolean isUp = false;
284                 Session localSession = null;
285                 try {
286                         localSession = sessionFactory.openSession();
287                         if (localSession != null) {
288                                 // If all nodes are unhealthy in a cluster, this will throw an
289                                 // exception
290                                 String sql = "select * from mysql.user";
291                                 Query query = localSession.createSQLQuery(sql);
292                                 @SuppressWarnings("unchecked")
293                                 List<String> queryList = query.list();
294                                 if (queryList != null) {
295                                         isUp = true;
296                                 }
297                         }
298                 } catch (Exception e) {
299                         logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failed", e);
300                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
301                                 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failure cause", e.getCause());
302                         }
303                         isUp = false;
304                 } finally {
305                         if (localSession != null) {
306                                 localSession.close();
307                         }
308                 }
309                 return isUp;
310         }
311
312         private boolean checkDatabasePermissions() {
313                 boolean isUp = false;
314                 Session localSession = null;
315                 try {
316                         localSession = sessionFactory.openSession();
317                         if (localSession != null) {
318                                 String sql = "SHOW GRANTS FOR CURRENT_USER";
319                                 Query query = localSession.createSQLQuery(sql);
320                                 @SuppressWarnings("unchecked")
321                                 List<String> grantsList = query.list();
322                                 for (String str : grantsList) {
323                                         if ((str.toUpperCase().contains("ALL"))
324                                                         || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
325                                                                         && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
326                                                 isUp = true;
327                                                 break;
328                                         }
329                                 }
330                                 if (isUp == false) {
331                                         logger.error(EELFLoggerDelegate.errorLogger,
332                                                         "checkDatabasePermissions returning false.  SHOW GRANTS FOR CURRENT_USER being dumped:");
333                                         for (String str : grantsList) {
334                                                 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
335                                         }
336                                 }
337                         }
338                 } catch (Exception e) {
339                         logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
340                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
341                                 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
342                         }
343                         isUp = false;
344                 } finally {
345                         if (localSession != null) {
346                                 localSession.close();
347                         }
348                 }
349                 return isUp;
350         }
351
352         private boolean checkIfUebUp() {
353                 boolean uebUp = false;
354                 try {
355                         boolean isAvailable = epUebHelper.checkAvailability();
356                         boolean messageCanBeSent = epUebHelper.MessageCanBeSentToTopic();
357                         uebUp = (isAvailable && messageCanBeSent);
358                 } catch (Exception e) {
359                         logger.error(EELFLoggerDelegate.errorLogger, "checkIfUebUp failed", e);
360                 }
361                 return uebUp;
362         }
363
364 }