2 * ============LICENSE_START==========================================
4 * ===================================================================
5 * Copyright © 2017 AT&T Intellectual Property. All rights reserved.
6 * ===================================================================
8 * Unless otherwise specified, all software contained herein is licensed
9 * under the Apache License, Version 2.0 (the “License”);
10 * you may not use this software except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
21 * Unless otherwise specified, all documentation contained herein is licensed
22 * under the Creative Commons License, Attribution 4.0 Intl. (the “License”);
23 * you may not use this documentation except in compliance with the License.
24 * You may obtain a copy of the License at
26 * https://creativecommons.org/licenses/by/4.0/
28 * Unless required by applicable law or agreed to in writing, documentation
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
34 * ============LICENSE_END============================================
36 * ECOMP is a trademark and service mark of AT&T Intellectual Property.
38 package org.openecomp.portalapp.portal.listener;
40 import java.util.List;
42 import javax.annotation.PostConstruct;
43 import javax.annotation.PreDestroy;
45 import org.hibernate.Query;
46 import org.hibernate.Session;
47 import org.hibernate.SessionFactory;
48 import org.openecomp.portalapp.portal.logging.aop.EPMetricsLog;
49 import org.openecomp.portalapp.portal.logging.format.EPAppMessagesEnum;
50 import org.openecomp.portalapp.portal.logging.logic.EPLogUtil;
51 import org.openecomp.portalapp.portal.ueb.EPUebHelper;
52 import org.openecomp.portalapp.portal.utils.EPCommonSystemProperties;
53 import org.openecomp.portalsdk.core.logging.logic.EELFLoggerDelegate;
54 import org.openecomp.portalsdk.core.util.SystemProperties;
55 import org.springframework.beans.factory.annotation.Autowired;
56 import org.springframework.context.annotation.EnableAspectJAutoProxy;
57 import org.springframework.transaction.annotation.Transactional;
60 @org.springframework.context.annotation.Configuration
61 @EnableAspectJAutoProxy
63 public class HealthMonitor {
65 private EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
68 private SessionFactory sessionFactory;
71 private EPUebHelper epUebHelper;
73 private static boolean databaseUp;
74 private static boolean uebUp;
75 private static boolean frontEndUp;
76 private static boolean backEndUp;
77 private static boolean dbClusterStatusOk;
78 private static boolean dbPermissionsOk;
81 * Read directly by external classes.
83 public static boolean isSuspended = false;
85 private Thread healthMonitorThread;
87 public HealthMonitor() {
90 public static boolean isDatabaseUp() {
94 public static boolean isClusterStatusOk() {
95 return dbClusterStatusOk;
98 public static boolean isDatabasePermissionsOk() {
99 return dbPermissionsOk;
102 public static boolean isUebUp() {
106 public static boolean isFrontEndUp() {
110 public static boolean isBackEndUp() {
114 private void monitorEPHealth() throws InterruptedException {
116 int numIntervalsDatabaseHasBeenDown = 0;
117 int numIntervalsClusterNotHealthy = 0;
118 int numIntervalsDatabasePermissionsIncorrect = 0;
119 int numIntervalsUebHasBeenDown = 0;
121 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
123 long sleepInterval = (Long
124 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
125 long numIntervalsBetweenAlerts = Long
126 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
127 logger.debug(EELFLoggerDelegate.debugLogger,
128 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
129 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
133 // Get DB status. If down, signal alert once every X intervals.
135 databaseUp = this.checkIfDatabaseUp();
136 if (databaseUp == false) {
137 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
138 logger.debug(EELFLoggerDelegate.debugLogger,
139 "monitorEPHealth: database down, logging to error log to trigger alert.");
140 // Write a Log entry that will generate an alert
141 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
142 numIntervalsDatabaseHasBeenDown++;
144 numIntervalsDatabaseHasBeenDown = 0;
148 dbClusterStatusOk = this.checkClusterStatus();
149 if (dbClusterStatusOk == false) {
150 if ((numIntervalsClusterNotHealthy % numIntervalsBetweenAlerts) == 0) {
151 logger.debug(EELFLoggerDelegate.debugLogger,
152 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
153 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
154 numIntervalsClusterNotHealthy++;
156 numIntervalsClusterNotHealthy = 0;
160 dbPermissionsOk = this.checkDatabasePermissions();
161 if (dbPermissionsOk == false) {
162 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
163 logger.debug(EELFLoggerDelegate.debugLogger,
164 "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
165 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
166 numIntervalsDatabasePermissionsIncorrect++;
168 numIntervalsDatabasePermissionsIncorrect = 0;
173 // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
174 // returned, status is Up.
175 // If down, signal alert once every X intervals.
176 // EP will ignore this bogus message.
178 uebUp = this.checkIfUebUp();
179 if (uebUp == false) {
181 if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
182 logger.debug(EELFLoggerDelegate.debugLogger,
183 "monitorEPHealth: UEB down, logging to error log to trigger alert");
184 // Write a Log entry that will generate an alert
185 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckUebClusterError);
186 numIntervalsUebHasBeenDown++;
188 numIntervalsUebHasBeenDown = 0;
192 // The front end should be up because the API is called through
193 // proxy front end server.
196 // If the rest API called, the backend is always up
200 // future nice to have...get Partner status
202 // For all apps exposing a rest url, query one of the rest
203 // urls(/roles?) and manage a list
204 // of app name/status. We might not return back a non 200 OK in
205 // health check, but we
206 // could return information in the json content of a health check.
210 // Get DB status. If down, signal alert once every X intervals.
212 if (Thread.interrupted()) {
213 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
218 Thread.sleep(sleepInterval);
219 } catch (InterruptedException e) {
220 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
221 Thread.currentThread().interrupt();
227 public void initHealthMonitor() {
228 healthMonitorThread = new Thread("EP HealthMonitor thread") {
232 } catch (InterruptedException e) {
233 logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
234 } catch (Exception e) {
235 logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
239 healthMonitorThread.start();
244 public void closeHealthMonitor() {
245 this.healthMonitorThread.interrupt();
249 * This routine checks whether the database can be read. In June 2017 we
250 * experimented with checking if the database can be WRITTEN. Writes failed
251 * with some regularity in a MariaDB Galera cluster, and in that
252 * environment, the resulting alerts in the log triggered a health monitor
253 * cron job to shut down the Tomcat instance. The root cause of the cluster
254 * write failures was not determined.
256 * @return true if the database can be read.
258 private boolean checkIfDatabaseUp() {
259 boolean isUp = false;
260 Session localSession = null;
262 localSession = sessionFactory.openSession();
263 if (localSession != null) {
264 String sql = "select app_name from fn_app where app_id=1";
265 Query query = localSession.createSQLQuery(sql);
266 @SuppressWarnings("unchecked")
267 List<String> queryList = query.list();
268 if (queryList != null) {
272 } catch (Exception e) {
273 logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
276 if (localSession != null)
277 localSession.close();
282 private boolean checkClusterStatus() {
283 boolean isUp = false;
284 Session localSession = null;
286 localSession = sessionFactory.openSession();
287 if (localSession != null) {
288 // If all nodes are unhealthy in a cluster, this will throw an
290 String sql = "select * from mysql.user";
291 Query query = localSession.createSQLQuery(sql);
292 @SuppressWarnings("unchecked")
293 List<String> queryList = query.list();
294 if (queryList != null) {
298 } catch (Exception e) {
299 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failed", e);
300 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
301 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failure cause", e.getCause());
305 if (localSession != null) {
306 localSession.close();
312 private boolean checkDatabasePermissions() {
313 boolean isUp = false;
314 Session localSession = null;
316 localSession = sessionFactory.openSession();
317 if (localSession != null) {
318 String sql = "SHOW GRANTS FOR CURRENT_USER";
319 Query query = localSession.createSQLQuery(sql);
320 @SuppressWarnings("unchecked")
321 List<String> grantsList = query.list();
322 for (String str : grantsList) {
323 if ((str.toUpperCase().contains("ALL"))
324 || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
325 && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
331 logger.error(EELFLoggerDelegate.errorLogger,
332 "checkDatabasePermissions returning false. SHOW GRANTS FOR CURRENT_USER being dumped:");
333 for (String str : grantsList) {
334 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
338 } catch (Exception e) {
339 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
340 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
341 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
345 if (localSession != null) {
346 localSession.close();
352 private boolean checkIfUebUp() {
353 boolean uebUp = false;
355 boolean isAvailable = epUebHelper.checkAvailability();
356 boolean messageCanBeSent = epUebHelper.MessageCanBeSentToTopic();
357 uebUp = (isAvailable && messageCanBeSent);
358 } catch (Exception e) {
359 logger.error(EELFLoggerDelegate.errorLogger, "checkIfUebUp failed", e);