2 * ============LICENSE_START==========================================
4 * ===================================================================
5 * Copyright (C) 2017 AT&T Intellectual Property. All rights reserved.
6 * ===================================================================
8 * Unless otherwise specified, all software contained herein is licensed
9 * under the Apache License, Version 2.0 (the "License");
10 * you may not use this software except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
21 * Unless otherwise specified, all documentation contained herein is licensed
22 * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23 * you may not use this documentation except in compliance with the License.
24 * You may obtain a copy of the License at
26 * https://creativecommons.org/licenses/by/4.0/
28 * Unless required by applicable law or agreed to in writing, documentation
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
34 * ============LICENSE_END============================================
38 package org.onap.portalapp.portal.listener;
40 import java.time.Instant;
41 import java.util.List;
43 import javax.annotation.PostConstruct;
44 import javax.annotation.PreDestroy;
46 import org.apache.commons.lang3.StringUtils;
47 import org.apache.zookeeper.ZooKeeper;
48 import org.apache.zookeeper.client.FourLetterWordMain;
49 import org.hibernate.Query;
50 import org.hibernate.Session;
51 import org.hibernate.SessionFactory;
52 import org.onap.music.datastore.PreparedQueryObject;
53 import org.onap.music.exceptions.MusicServiceException;
54 import org.onap.music.main.MusicCore;
55 import org.onap.music.main.MusicUtil;
56 import org.onap.portalapp.music.util.MusicProperties;
57 import org.onap.portalapp.portal.logging.aop.EPMetricsLog;
58 import org.onap.portalapp.portal.logging.format.EPAppMessagesEnum;
59 import org.onap.portalapp.portal.logging.logic.EPLogUtil;
60 import org.onap.portalapp.portal.utils.EPCommonSystemProperties;
61 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
62 import org.onap.portalsdk.core.util.SystemProperties;
63 import org.springframework.beans.factory.annotation.Autowired;
64 import org.springframework.context.annotation.EnableAspectJAutoProxy;
65 import org.springframework.transaction.annotation.Transactional;
71 @org.springframework.context.annotation.Configuration
72 @EnableAspectJAutoProxy
74 public class HealthMonitor {
77 ZooKeeper zookeeper = null;
79 private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
82 private SessionFactory sessionFactory;
85 private static boolean databaseUp;
86 private static boolean uebUp;
87 private static boolean frontEndUp;
88 private static boolean backEndUp;
89 private static boolean dbClusterStatusOk;
90 private static boolean dbPermissionsOk;
91 private static boolean zookeeperStatusOk;
92 private static boolean cassandraStatusOk;
93 private static String APPLICATION = "Portal";
96 * Read directly by external classes.
98 public static boolean isSuspended = false;
100 private Thread healthMonitorThread;
102 public HealthMonitor() {
105 public static boolean isDatabaseUp() {
109 public static boolean isClusterStatusOk() {
110 return dbClusterStatusOk;
113 public static boolean isDatabasePermissionsOk() {
114 return dbPermissionsOk;
117 public static boolean isUebUp() {
121 public static boolean isFrontEndUp() {
125 public static boolean isBackEndUp() {
129 public static boolean isZookeeperStatusOk() {
130 return zookeeperStatusOk;
133 public static boolean isCassandraStatusOk() {
134 return cassandraStatusOk;
137 private void monitorEPHealth() throws InterruptedException {
139 int numIntervalsDatabaseHasBeenDown = 0;
140 int numIntervalsClusterNotHealthy = 0;
141 int numIntervalsDatabasePermissionsIncorrect = 0;
142 int numIntervalsZookeeperNotHealthy = 0;
143 int numIntervalsCassandraNotHealthy = 0;
145 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
147 long sleepInterval = (Long
148 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
149 long numIntervalsBetweenAlerts = Long
150 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
151 logger.debug(EELFLoggerDelegate.debugLogger,
152 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
153 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
157 // Get DB status. If down, signal alert once every X intervals.
159 databaseUp = this.checkIfDatabaseUp();
160 if (databaseUp == false) {
161 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
162 logger.debug(EELFLoggerDelegate.debugLogger,
163 "monitorEPHealth: database down, logging to error log to trigger alert.");
164 // Write a Log entry that will generate an alert
165 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
166 numIntervalsDatabaseHasBeenDown++;
168 numIntervalsDatabaseHasBeenDown = 0;
172 dbClusterStatusOk = this.checkClusterStatus();
173 if (dbClusterStatusOk == false) {
174 if ((numIntervalsClusterNotHealthy % numIntervalsBetweenAlerts) == 0) {
175 logger.debug(EELFLoggerDelegate.debugLogger,
176 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
177 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
178 numIntervalsClusterNotHealthy++;
180 numIntervalsClusterNotHealthy = 0;
184 dbPermissionsOk = this.checkDatabasePermissions();
185 if (dbPermissionsOk == false) {
186 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
187 logger.debug(EELFLoggerDelegate.debugLogger,
188 "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
189 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
190 numIntervalsDatabasePermissionsIncorrect++;
192 numIntervalsDatabasePermissionsIncorrect = 0;
195 org.onap.portalapp.music.util.MusicUtil MusicUtilSDK = new org.onap.portalapp.music.util.MusicUtil();
196 if(MusicUtilSDK.isMusicEnable()){
197 zookeeperStatusOk = this.checkZookeeperStatus();
198 if (zookeeperStatusOk == false) {
199 if ((numIntervalsZookeeperNotHealthy % numIntervalsBetweenAlerts) == 0) {
200 logger.debug(EELFLoggerDelegate.debugLogger,
201 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
202 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckZookeeperError);
203 numIntervalsZookeeperNotHealthy++;
205 numIntervalsZookeeperNotHealthy = 0;
209 cassandraStatusOk = this.checkCassandraStatus();
210 if (cassandraStatusOk == false) {
211 if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
212 logger.debug(EELFLoggerDelegate.debugLogger,
213 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
214 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
215 numIntervalsCassandraNotHealthy++;
217 numIntervalsCassandraNotHealthy = 0;
224 // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
225 // returned, status is Up.
226 // If down, signal alert once every X intervals.
227 // EP will ignore this bogus message.
228 // Commenting this out as Dependency on UEB is being deprecated
230 * uebUp = this.checkIfUebUp(); if (uebUp == false) {
232 * if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
233 * logger.debug(EELFLoggerDelegate.debugLogger,
234 * "monitorEPHealth: UEB down, logging to error log to trigger alert"); // Write
235 * a Log entry that will generate an alert EPLogUtil.logEcompError(logger,
236 * EPAppMessagesEnum.BeHealthCheckUebClusterError);
237 * numIntervalsUebHasBeenDown++; } else { numIntervalsUebHasBeenDown = 0; } }
240 // The front end should be up because the API is called through
241 // proxy front end server.
244 // If the rest API called, the backend is always up
248 // future nice to have...get Partner status
250 // For all apps exposing a rest url, query one of the rest
251 // urls(/roles?) and manage a list
252 // of app name/status. We might not return back a non 200 OK in
253 // health check, but we
254 // could return information in the json content of a health check.
258 // Get DB status. If down, signal alert once every X intervals.
260 if (Thread.interrupted()) {
261 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
266 Thread.sleep(sleepInterval);
267 } catch (InterruptedException e) {
268 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
269 Thread.currentThread().interrupt();
275 public void initHealthMonitor() {
276 healthMonitorThread = new Thread("EP HealthMonitor thread") {
280 } catch (InterruptedException e) {
281 logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
282 } catch (Exception e) {
283 logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
287 healthMonitorThread.start();
292 public void closeHealthMonitor() {
293 this.healthMonitorThread.interrupt();
297 * This routine checks whether the database can be read. In June 2017 we
298 * experimented with checking if the database can be WRITTEN. Writes failed
299 * with some regularity in a MariaDB Galera cluster, and in that
300 * environment, the resulting alerts in the log triggered a health monitor
301 * cron job to shut down the Tomcat instance. The root cause of the cluster
302 * write failures was not determined.
304 * @return true if the database can be read.
306 private boolean checkIfDatabaseUp() {
307 boolean isUp = false;
308 Session localSession = null;
310 localSession = sessionFactory.openSession();
311 if (localSession != null) {
312 String sql = "select app_name from fn_app where app_id=1";
313 Query query = localSession.createSQLQuery(sql);
314 @SuppressWarnings("unchecked")
315 List<String> queryList = query.list();
316 if (queryList != null) {
320 } catch (Exception e) {
321 logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
324 if (localSession != null)
325 localSession.close();
330 private boolean checkClusterStatus() {
331 boolean isUp = false;
332 Session localSession = null;
334 localSession = sessionFactory.openSession();
335 if (localSession != null) {
336 // If all nodes are unhealthy in a cluster, this will throw an
338 String sql = "select * from mysql.user";
339 Query query = localSession.createSQLQuery(sql);
340 @SuppressWarnings("unchecked")
341 List<String> queryList = query.list();
342 if (queryList != null) {
346 } catch (Exception e) {
347 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failed", e);
348 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
349 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failure cause", e.getCause());
353 if (localSession != null) {
354 localSession.close();
360 private boolean checkZookeeperStatus() {
362 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
363 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
364 for (int i = 0; i < zookeeperNodes.length; i++) {
366 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper :" + zookeeperNodes[i].trim());
367 String[] iport = zookeeperNodes[i].split(":");
368 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
369 Integer.parseInt(iport[1].trim()), "stat");
370 logger.info(EELFLoggerDelegate.applicationLogger,
371 "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
372 if (StringUtils.isNotBlank(zkNodeStatistics)) {
373 String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
374 zkNodeStatistics.indexOf("Node"));
375 logger.info(EELFLoggerDelegate.applicationLogger,
376 "Getting Status for zookeeper :" + zookeeperNodes[i].trim() + ":------:" + state);
377 if (state.contains("leader"))
380 } catch (Exception e) {
381 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
389 public boolean checkCassandraStatus() {
390 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
391 if (this.getAdminKeySpace()) {
394 logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
399 private Boolean getAdminKeySpace() {
400 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE);
401 Instant creationTime = Instant.now();
402 PreparedQueryObject pQuery = new PreparedQueryObject();
403 pQuery.appendQueryString(
404 "UPDATE " + musicKeySpace + ".health_check SET creation_time = ? WHERE primary_id = ?");
405 pQuery.addValue(creationTime.toString());
406 pQuery.addValue(APPLICATION);
408 MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.ATOMIC);
409 MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.CRITICAL);
410 } catch (MusicServiceException e) {
411 return Boolean.FALSE;
418 private boolean checkDatabasePermissions() {
419 boolean isUp = false;
420 Session localSession = null;
422 localSession = sessionFactory.openSession();
423 if (localSession != null) {
424 String sql = "SHOW GRANTS FOR CURRENT_USER";
425 Query query = localSession.createSQLQuery(sql);
426 @SuppressWarnings("unchecked")
427 List<String> grantsList = query.list();
428 for (String str : grantsList) {
429 if ((str.toUpperCase().contains("ALL"))
430 || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
431 && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
437 logger.error(EELFLoggerDelegate.errorLogger,
438 "checkDatabasePermissions returning false. SHOW GRANTS FOR CURRENT_USER being dumped:");
439 for (String str : grantsList) {
440 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
444 } catch (Exception e) {
445 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
446 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
447 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
451 if (localSession != null) {
452 localSession.close();