2 * ============LICENSE_START==========================================
4 * ===================================================================
5 * Copyright (C) 2017 AT&T Intellectual Property. All rights reserved.
6 * ===================================================================
8 * Unless otherwise specified, all software contained herein is licensed
9 * under the Apache License, Version 2.0 (the "License");
10 * you may not use this software except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
21 * Unless otherwise specified, all documentation contained herein is licensed
22 * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23 * you may not use this documentation except in compliance with the License.
24 * You may obtain a copy of the License at
26 * https://creativecommons.org/licenses/by/4.0/
28 * Unless required by applicable law or agreed to in writing, documentation
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
34 * ============LICENSE_END============================================
38 package org.onap.portalapp.portal.listener;
40 import java.io.IOException;
41 import java.util.HashSet;
42 import java.util.List;
44 import java.util.UUID;
46 import javax.annotation.PostConstruct;
47 import javax.annotation.PreDestroy;
49 import org.apache.commons.lang3.StringUtils;
50 import org.apache.zookeeper.ZooKeeper;
51 import org.apache.zookeeper.client.FourLetterWordMain;
52 import org.hibernate.Query;
53 import org.hibernate.Session;
54 import org.hibernate.SessionFactory;
55 import org.onap.music.datastore.PreparedQueryObject;
56 import org.onap.music.exceptions.MusicServiceException;
57 import org.onap.music.main.MusicCore;
58 import org.onap.music.main.MusicUtil;
59 import org.onap.portalapp.music.util.MusicProperties;
60 import org.onap.portalapp.portal.logging.aop.EPMetricsLog;
61 import org.onap.portalapp.portal.logging.format.EPAppMessagesEnum;
62 import org.onap.portalapp.portal.logging.logic.EPLogUtil;
63 import org.onap.portalapp.portal.utils.EPCommonSystemProperties;
64 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
65 import org.onap.portalsdk.core.util.SystemProperties;
66 import org.springframework.beans.factory.annotation.Autowired;
67 import org.springframework.context.annotation.EnableAspectJAutoProxy;
68 import org.springframework.transaction.annotation.Transactional;
74 @org.springframework.context.annotation.Configuration
75 @EnableAspectJAutoProxy
77 public class HealthMonitor {
80 ZooKeeper zookeeper = null;
82 private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
85 private SessionFactory sessionFactory;
88 private static boolean databaseUp;
89 private static boolean uebUp;
90 private static boolean frontEndUp;
91 private static boolean backEndUp;
92 private static boolean dbClusterStatusOk;
93 private static boolean dbPermissionsOk;
94 private static boolean zookeeperStatusOk;
95 private static boolean cassandraStatusOk;
98 * Read directly by external classes.
100 public static boolean isSuspended = false;
102 private Thread healthMonitorThread;
104 public HealthMonitor() {
107 public static boolean isDatabaseUp() {
111 public static boolean isClusterStatusOk() {
112 return dbClusterStatusOk;
115 public static boolean isDatabasePermissionsOk() {
116 return dbPermissionsOk;
119 public static boolean isUebUp() {
123 public static boolean isFrontEndUp() {
127 public static boolean isBackEndUp() {
131 public static boolean isZookeeperStatusOk() {
132 return zookeeperStatusOk;
135 public static boolean isCassandraStatusOk() {
136 return cassandraStatusOk;
139 private void monitorEPHealth() throws InterruptedException {
141 int numIntervalsDatabaseHasBeenDown = 0;
142 int numIntervalsClusterNotHealthy = 0;
143 int numIntervalsDatabasePermissionsIncorrect = 0;
144 int numIntervalsZookeeperNotHealthy = 0;
145 int numIntervalsCassandraNotHealthy = 0;
147 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
149 long sleepInterval = (Long
150 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
151 long numIntervalsBetweenAlerts = Long
152 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
153 logger.debug(EELFLoggerDelegate.debugLogger,
154 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
155 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
159 // Get DB status. If down, signal alert once every X intervals.
161 databaseUp = this.checkIfDatabaseUp();
162 if (databaseUp == false) {
163 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
164 logger.debug(EELFLoggerDelegate.debugLogger,
165 "monitorEPHealth: database down, logging to error log to trigger alert.");
166 // Write a Log entry that will generate an alert
167 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
168 numIntervalsDatabaseHasBeenDown++;
170 numIntervalsDatabaseHasBeenDown = 0;
174 dbClusterStatusOk = this.checkClusterStatus();
175 if (dbClusterStatusOk == false) {
176 if ((numIntervalsClusterNotHealthy % numIntervalsBetweenAlerts) == 0) {
177 logger.debug(EELFLoggerDelegate.debugLogger,
178 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
179 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
180 numIntervalsClusterNotHealthy++;
182 numIntervalsClusterNotHealthy = 0;
186 dbPermissionsOk = this.checkDatabasePermissions();
187 if (dbPermissionsOk == false) {
188 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
189 logger.debug(EELFLoggerDelegate.debugLogger,
190 "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
191 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
192 numIntervalsDatabasePermissionsIncorrect++;
194 numIntervalsDatabasePermissionsIncorrect = 0;
198 zookeeperStatusOk = this.checkZookeeperStatus();
199 if (zookeeperStatusOk == false) {
200 if ((numIntervalsZookeeperNotHealthy % numIntervalsBetweenAlerts) == 0) {
201 logger.debug(EELFLoggerDelegate.debugLogger,
202 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
203 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckZookeeperError);
204 numIntervalsZookeeperNotHealthy++;
206 numIntervalsZookeeperNotHealthy = 0;
210 cassandraStatusOk = this.checkCassandraStatus();
211 if (cassandraStatusOk == false) {
212 if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
213 logger.debug(EELFLoggerDelegate.debugLogger,
214 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
215 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
216 numIntervalsCassandraNotHealthy++;
218 numIntervalsCassandraNotHealthy = 0;
223 // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
224 // returned, status is Up.
225 // If down, signal alert once every X intervals.
226 // EP will ignore this bogus message.
227 // Commenting this out as Dependency on UEB is being deprecated
229 * uebUp = this.checkIfUebUp(); if (uebUp == false) {
231 * if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
232 * logger.debug(EELFLoggerDelegate.debugLogger,
233 * "monitorEPHealth: UEB down, logging to error log to trigger alert"); // Write
234 * a Log entry that will generate an alert EPLogUtil.logEcompError(logger,
235 * EPAppMessagesEnum.BeHealthCheckUebClusterError);
236 * numIntervalsUebHasBeenDown++; } else { numIntervalsUebHasBeenDown = 0; } }
239 // The front end should be up because the API is called through
240 // proxy front end server.
243 // If the rest API called, the backend is always up
247 // future nice to have...get Partner status
249 // For all apps exposing a rest url, query one of the rest
250 // urls(/roles?) and manage a list
251 // of app name/status. We might not return back a non 200 OK in
252 // health check, but we
253 // could return information in the json content of a health check.
257 // Get DB status. If down, signal alert once every X intervals.
259 if (Thread.interrupted()) {
260 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
265 Thread.sleep(sleepInterval);
266 } catch (InterruptedException e) {
267 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
268 Thread.currentThread().interrupt();
274 public void initHealthMonitor() {
275 healthMonitorThread = new Thread("EP HealthMonitor thread") {
279 } catch (InterruptedException e) {
280 logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
281 } catch (Exception e) {
282 logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
286 healthMonitorThread.start();
291 public void closeHealthMonitor() {
292 this.healthMonitorThread.interrupt();
296 * This routine checks whether the database can be read. In June 2017 we
297 * experimented with checking if the database can be WRITTEN. Writes failed
298 * with some regularity in a MariaDB Galera cluster, and in that
299 * environment, the resulting alerts in the log triggered a health monitor
300 * cron job to shut down the Tomcat instance. The root cause of the cluster
301 * write failures was not determined.
303 * @return true if the database can be read.
305 private boolean checkIfDatabaseUp() {
306 boolean isUp = false;
307 Session localSession = null;
309 localSession = sessionFactory.openSession();
310 if (localSession != null) {
311 String sql = "select app_name from fn_app where app_id=1";
312 Query query = localSession.createSQLQuery(sql);
313 @SuppressWarnings("unchecked")
314 List<String> queryList = query.list();
315 if (queryList != null) {
319 } catch (Exception e) {
320 logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
323 if (localSession != null)
324 localSession.close();
329 private boolean checkClusterStatus() {
330 boolean isUp = false;
331 Session localSession = null;
333 localSession = sessionFactory.openSession();
334 if (localSession != null) {
335 // If all nodes are unhealthy in a cluster, this will throw an
337 String sql = "select * from mysql.user";
338 Query query = localSession.createSQLQuery(sql);
339 @SuppressWarnings("unchecked")
340 List<String> queryList = query.list();
341 if (queryList != null) {
345 } catch (Exception e) {
346 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failed", e);
347 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
348 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failure cause", e.getCause());
352 if (localSession != null) {
353 localSession.close();
359 private boolean checkZookeeperStatus() {
361 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
362 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
363 for (int i = 0; i < zookeeperNodes.length; i++) {
365 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper :" + zookeeperNodes[i].trim());
366 String[] iport = zookeeperNodes[i].split(":");
367 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
368 Integer.parseInt(iport[1].trim()), "stat");
369 logger.info(EELFLoggerDelegate.applicationLogger,
370 "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
371 if (StringUtils.isNotBlank(zkNodeStatistics)) {
372 String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
373 zkNodeStatistics.indexOf("Node"));
374 logger.info(EELFLoggerDelegate.applicationLogger,
375 "Getting Status for zookeeper :" + zookeeperNodes[i].trim() + ":------:" + state);
376 if (state.contains("leader"))
379 } catch (Exception e) {
380 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
388 public boolean checkCassandraStatus() {
389 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
390 if (this.getAdminKeySpace()) {
393 logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
398 private Boolean getAdminKeySpace() {
399 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE );
400 //deletePortalHealthcheck(musicKeySpace);
401 PreparedQueryObject pQuery = new PreparedQueryObject();
402 pQuery.appendQueryString("insert into "+musicKeySpace+".healthcheck (id) values (?)");
403 pQuery.addValue(UUID.randomUUID());
405 MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.EVENTUAL);
406 } catch (MusicServiceException e) {
407 logger.error(EELFLoggerDelegate.errorLogger, "getAdminKeySpace() failed", e.getCause());
408 return Boolean.FALSE;
414 private void deletePortalHealthcheck(String musicKeySpace) {
415 PreparedQueryObject pQuery = new PreparedQueryObject();
416 pQuery.appendQueryString("TRUNCATE "+musicKeySpace+".healthcheck");
418 MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.EVENTUAL);
419 } catch (MusicServiceException e) {
420 logger.error(EELFLoggerDelegate.errorLogger, "deletePortalHealthcheck() failed", e.getCause());
424 private boolean checkDatabasePermissions() {
425 boolean isUp = false;
426 Session localSession = null;
428 localSession = sessionFactory.openSession();
429 if (localSession != null) {
430 String sql = "SHOW GRANTS FOR CURRENT_USER";
431 Query query = localSession.createSQLQuery(sql);
432 @SuppressWarnings("unchecked")
433 List<String> grantsList = query.list();
434 for (String str : grantsList) {
435 if ((str.toUpperCase().contains("ALL"))
436 || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
437 && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
443 logger.error(EELFLoggerDelegate.errorLogger,
444 "checkDatabasePermissions returning false. SHOW GRANTS FOR CURRENT_USER being dumped:");
445 for (String str : grantsList) {
446 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
450 } catch (Exception e) {
451 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
452 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
453 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
457 if (localSession != null) {
458 localSession.close();