2 * ================================================================================
4 * ================================================================================
5 * Copyright (C) 2017 AT&T Intellectual Property
6 * ================================================================================
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * ================================================================================
20 package org.openecomp.portalapp.portal.listener;
22 import java.util.List;
24 import javax.annotation.PostConstruct;
25 import javax.annotation.PreDestroy;
27 import org.hibernate.Query;
28 import org.hibernate.Session;
29 import org.hibernate.SessionFactory;
30 import org.openecomp.portalapp.portal.logging.aop.EPMetricsLog;
31 import org.openecomp.portalapp.portal.logging.format.EPAppMessagesEnum;
32 import org.openecomp.portalapp.portal.logging.logic.EPLogUtil;
33 import org.openecomp.portalapp.portal.ueb.EPUebHelper;
34 import org.openecomp.portalapp.portal.utils.EPCommonSystemProperties;
35 import org.openecomp.portalsdk.core.logging.logic.EELFLoggerDelegate;
36 import org.openecomp.portalsdk.core.util.SystemProperties;
37 import org.springframework.beans.factory.annotation.Autowired;
38 import org.springframework.context.annotation.EnableAspectJAutoProxy;
39 import org.springframework.transaction.annotation.Transactional;
42 @org.springframework.context.annotation.Configuration
43 @EnableAspectJAutoProxy
45 public class HealthMonitor {
47 private EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
50 private SessionFactory sessionFactory;
53 private EPUebHelper epUebHelper;
55 private static boolean databaseUp;
56 private static boolean uebUp;
57 private static boolean frontEndUp;
58 private static boolean backEndUp;
59 private static boolean dbClusterStatusOk;
60 private static boolean dbPermissionsOk;
63 * Read directly by external classes.
65 public static boolean isSuspended = false;
67 private Thread healthMonitorThread;
69 public HealthMonitor() {
72 public static boolean isDatabaseUp() {
76 public static boolean isClusterStatusOk() {
77 return dbClusterStatusOk;
80 public static boolean isDatabasePermissionsOk() {
81 return dbPermissionsOk;
84 public static boolean isUebUp() {
88 public static boolean isFrontEndUp() {
92 public static boolean isBackEndUp() {
96 private void monitorEPHealth() throws InterruptedException {
98 int numIntervalsDatabaseHasBeenDown = 0;
99 int numIntervalsClusterNotHealthy = 0;
100 int numIntervalsDatabasePermissionsIncorrect = 0;
101 int numIntervalsUebHasBeenDown = 0;
103 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
105 long sleepInterval = (Long
106 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
107 long numIntervalsBetweenAlerts = Long
108 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
109 logger.debug(EELFLoggerDelegate.debugLogger,
110 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
111 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
115 // Get DB status. If down, signal alert once every X intervals.
117 databaseUp = this.checkIfDatabaseUp();
118 if (databaseUp == false) {
119 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
120 logger.debug(EELFLoggerDelegate.debugLogger,
121 "monitorEPHealth: database down, logging to error log to trigger alert.");
122 // Write a Log entry that will generate an alert
123 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
124 numIntervalsDatabaseHasBeenDown++;
126 numIntervalsDatabaseHasBeenDown = 0;
130 dbClusterStatusOk = this.checkClusterStatus();
131 if (dbClusterStatusOk == false) {
132 if ((numIntervalsClusterNotHealthy % numIntervalsBetweenAlerts) == 0) {
133 logger.debug(EELFLoggerDelegate.debugLogger,
134 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
135 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
136 numIntervalsClusterNotHealthy++;
138 numIntervalsClusterNotHealthy = 0;
142 dbPermissionsOk = this.checkDatabasePermissions();
143 if (dbPermissionsOk == false) {
144 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
145 logger.debug(EELFLoggerDelegate.debugLogger,
146 "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
147 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
148 numIntervalsDatabasePermissionsIncorrect++;
150 numIntervalsDatabasePermissionsIncorrect = 0;
155 // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
156 // returned, status is Up.
157 // If down, signal alert once every X intervals.
158 // EP will ignore this bogus message.
160 uebUp = this.checkIfUebUp();
161 if (uebUp == false) {
163 if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
164 logger.debug(EELFLoggerDelegate.debugLogger,
165 "monitorEPHealth: UEB down, logging to error log to trigger alert");
166 // Write a Log entry that will generate an alert
167 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckUebClusterError);
168 numIntervalsUebHasBeenDown++;
170 numIntervalsUebHasBeenDown = 0;
174 // The front end should be up because the API is called through
175 // proxy front end server.
178 // If the rest API called, the backend is always up
182 // future nice to have...get Partner status
184 // For all apps exposing a rest url, query one of the rest
185 // urls(/roles?) and manage a list
186 // of app name/status. We might not return back a non 200 OK in
187 // health check, but we
188 // could return information in the json content of a health check.
192 // Get DB status. If down, signal alert once every X intervals.
194 if (Thread.interrupted()) {
195 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
200 Thread.sleep(sleepInterval);
201 } catch (InterruptedException e) {
202 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
203 Thread.currentThread().interrupt();
209 public void initHealthMonitor() {
210 healthMonitorThread = new Thread("EP HealthMonitor thread") {
214 } catch (InterruptedException e) {
215 logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
216 } catch (Exception e) {
217 logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
221 if (healthMonitorThread != null) {
222 healthMonitorThread.start();
227 public void closeHealthMonitor() {
228 this.healthMonitorThread.interrupt();
232 * This routine checks whether the database can be read. In June 2017 we
233 * experimented with checking if the database can be WRITTEN. Writes failed
234 * with some regularity in a MariaDB Galera cluster, and in that
235 * environment, the resulting alerts in the log triggered a health monitor
236 * cron job to shut down the Tomcat instance. The root cause of the cluster
237 * write failures was not determined.
239 * @return true if the database can be read.
241 private boolean checkIfDatabaseUp() {
242 boolean isUp = false;
243 Session localSession = null;
245 localSession = sessionFactory.openSession();
246 if (localSession != null) {
247 String sql = "select app_name from fn_app where app_id=1";
248 Query query = localSession.createSQLQuery(sql);
249 @SuppressWarnings("unchecked")
250 List<String> queryList = query.list();
251 if (queryList != null) {
255 } catch (Exception e) {
256 logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
259 if (localSession != null)
260 localSession.close();
265 private boolean checkClusterStatus() {
266 boolean isUp = false;
267 Session localSession = null;
269 localSession = sessionFactory.openSession();
270 if (localSession != null) {
271 // If all nodes are unhealthy in a cluster, this will throw an
273 String sql = "select * from mysql.user";
274 Query query = localSession.createSQLQuery(sql);
275 @SuppressWarnings("unchecked")
276 List<String> queryList = query.list();
277 if (queryList != null) {
281 } catch (Exception e) {
282 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failed", e);
283 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
284 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failure cause", e.getCause());
288 if (localSession != null) {
289 localSession.close();
295 private boolean checkDatabasePermissions() {
296 boolean isUp = false;
297 Session localSession = null;
299 localSession = sessionFactory.openSession();
300 if (localSession != null) {
301 String sql = "SHOW GRANTS FOR CURRENT_USER";
302 Query query = localSession.createSQLQuery(sql);
303 @SuppressWarnings("unchecked")
304 List<String> grantsList = query.list();
305 for (String str : grantsList) {
306 if ((str.toUpperCase().contains("ALL"))
307 || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
308 && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
314 logger.error(EELFLoggerDelegate.errorLogger,
315 "checkDatabasePermissions returning false. SHOW GRANTS FOR CURRENT_USER being dumped:");
316 for (String str : grantsList) {
317 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
321 } catch (Exception e) {
322 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
323 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
324 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
328 if (localSession != null) {
329 localSession.close();
335 private boolean checkIfUebUp() {
336 boolean uebUp = false;
338 boolean isAvailable = epUebHelper.checkAvailability();
339 boolean messageCanBeSent = epUebHelper.MessageCanBeSentToTopic();
340 uebUp = (isAvailable && messageCanBeSent);
341 } catch (Exception e) {
342 logger.error(EELFLoggerDelegate.errorLogger, "checkIfUebUp failed", e);