2 * ============LICENSE_START==========================================
4 * ===================================================================
5 * Copyright (C) 2017-2018 AT&T Intellectual Property. All rights reserved.
6 * ===================================================================
8 * Unless otherwise specified, all software contained herein is licensed
9 * under the Apache License, Version 2.0 (the "License");
10 * you may not use this software except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
21 * Unless otherwise specified, all documentation contained herein is licensed
22 * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23 * you may not use this documentation except in compliance with the License.
24 * You may obtain a copy of the License at
26 * https://creativecommons.org/licenses/by/4.0/
28 * Unless required by applicable law or agreed to in writing, documentation
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
34 * ============LICENSE_END============================================
38 package org.onap.portalapp.portal.listener;
40 import java.time.Instant;
41 import java.util.List;
43 import javax.annotation.PostConstruct;
44 import javax.annotation.PreDestroy;
46 import lombok.NoArgsConstructor;
47 import org.apache.commons.lang3.StringUtils;
48 import org.apache.zookeeper.client.FourLetterWordMain;
49 import org.hibernate.Query;
50 import org.hibernate.Session;
51 import org.hibernate.SessionFactory;
52 import org.onap.music.datastore.PreparedQueryObject;
53 import org.onap.music.exceptions.MusicServiceException;
54 import org.onap.music.main.MusicCore;
55 import org.onap.music.main.MusicUtil;
56 import org.onap.portalapp.music.util.MusicProperties;
57 import org.onap.portalapp.portal.logging.aop.EPMetricsLog;
58 import org.onap.portalapp.portal.logging.format.EPAppMessagesEnum;
59 import org.onap.portalapp.portal.logging.logic.EPLogUtil;
60 import org.onap.portalapp.portal.utils.EPCommonSystemProperties;
61 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
62 import org.onap.portalsdk.core.util.SystemProperties;
63 import org.springframework.beans.factory.annotation.Autowired;
64 import org.springframework.context.annotation.Configuration;
65 import org.springframework.context.annotation.EnableAspectJAutoProxy;
66 import org.springframework.transaction.annotation.Transactional;
73 @EnableAspectJAutoProxy
76 public class HealthMonitor {
77 private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
78 private Thread healthMonitorThread;
79 private static SessionFactory sessionFactory;
81 private static boolean databaseUp;
82 private static boolean uebUp;
83 private static boolean frontEndUp;
84 private static boolean backEndUp;
85 private static boolean dbPermissionsOk;
86 private static boolean zookeeperStatusOk;
87 private static boolean cassandraStatusOk;
88 private static String application = "Portal";
89 private static boolean isSuspended = false;
92 public HealthMonitor(SessionFactory sessionFactory) {
93 HealthMonitor.sessionFactory = sessionFactory;
96 private static void monitorEPHealth() {
98 int numIntervalsDatabaseHasBeenDown = 0;
99 int numIntervalsDatabasePermissionsIncorrect = 0;
100 int numIntervalsZookeeperNotHealthy = 0;
101 int numIntervalsCassandraNotHealthy = 0;
103 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
106 long sleepInterval = (Long
107 .parseLong(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
108 long numIntervalsBetweenAlerts = Long
109 .parseLong(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
110 logger.debug(EELFLoggerDelegate.debugLogger,
111 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
112 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
115 logger.debug(EELFLoggerDelegate.debugLogger,
116 "monitorEPHealth: Test Connection to all");
118 // Get DB status. If down, signal alert once every X intervals.
120 databaseUp = checkIfDatabaseUp();
122 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
123 logger.debug(EELFLoggerDelegate.debugLogger,
124 "monitorEPHealth: database down, logging to error log to trigger alert.");
125 // Write a Log entry that will generate an alert
126 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
127 numIntervalsDatabaseHasBeenDown++;
129 numIntervalsDatabaseHasBeenDown = 0;
133 dbPermissionsOk = checkDatabasePermissions();
134 if (!dbPermissionsOk) {
135 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
136 logger.debug(EELFLoggerDelegate.debugLogger,
137 "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
138 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
139 numIntervalsDatabasePermissionsIncorrect++;
141 numIntervalsDatabasePermissionsIncorrect = 0;
144 if(org.onap.portalapp.music.util.MusicUtil.isMusicEnable()){
146 zookeeperStatusOk = checkZookeeperStatus();
148 if (!zookeeperStatusOk) {
149 if ((numIntervalsZookeeperNotHealthy % numIntervalsBetweenAlerts) == 0) {
150 logger.debug(EELFLoggerDelegate.debugLogger,
151 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
152 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckZookeeperError);
153 numIntervalsZookeeperNotHealthy++;
155 numIntervalsZookeeperNotHealthy = 0;
159 cassandraStatusOk = checkCassandraStatus();
160 if (!cassandraStatusOk) {
161 if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
162 logger.debug(EELFLoggerDelegate.debugLogger,
163 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
164 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
165 numIntervalsCassandraNotHealthy++;
167 numIntervalsCassandraNotHealthy = 0;
174 if (Thread.interrupted()) {
175 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
180 Thread.sleep(sleepInterval);
181 } catch (InterruptedException e) {
182 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
183 Thread.currentThread().interrupt();
189 public void initHealthMonitor() {
190 healthMonitorThread = new Thread("EP HealthMonitor thread") {
196 catch (Exception e) {
197 logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
201 healthMonitorThread.start();
206 public void closeHealthMonitor() {
207 this.healthMonitorThread.interrupt();
211 * This routine checks whether the database can be read. In June 2017 we
212 * experimented with checking if the database can be WRITTEN. Writes failed
213 * with some regularity in a MariaDB Galera cluster, and in that
214 * environment, the resulting alerts in the log triggered a health monitor
215 * cron job to shut down the Tomcat instance. The root cause of the cluster
216 * write failures was not determined.
218 * @return true if the database can be read.
220 private static boolean checkIfDatabaseUp() {
221 boolean isUp = false;
222 Session localSession = null;
224 localSession = sessionFactory.openSession();
225 if (localSession != null) {
226 String sql = "select app_name from fn_app where app_id=1";
227 Query query = localSession.createSQLQuery(sql);
228 @SuppressWarnings("unchecked")
229 List<String> queryList = query.list();
230 if (queryList != null) {
234 } catch (Exception e) {
235 logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
238 if (localSession != null)
239 localSession.close();
244 private static boolean checkZookeeperStatus() {
246 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
247 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
248 for (String zookeeperNode : zookeeperNodes) {
250 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper :" + zookeeperNode.trim());
251 String[] iport = zookeeperNode.split(":");
252 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
253 Integer.parseInt(iport[1].trim()), "stat");
254 logger.info(EELFLoggerDelegate.applicationLogger,
255 "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
256 if (StringUtils.isNotBlank(zkNodeStatistics)) {
257 String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
258 zkNodeStatistics.indexOf("Node"));
259 logger.info(EELFLoggerDelegate.applicationLogger,
260 "Getting Status for zookeeper :" + zookeeperNode.trim() + ":------:" + state);
261 if (state.contains("leader") || state.contains("follower")) {
265 } catch (Exception e) {
266 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
274 private static boolean checkCassandraStatus() {
275 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
276 if (getAdminKeySpace()) {
279 logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
284 private static Boolean getAdminKeySpace() {
285 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE);
286 Instant creationTime = Instant.now();
287 PreparedQueryObject pQuery = new PreparedQueryObject();
288 pQuery.appendQueryString(
289 "UPDATE " + musicKeySpace + ".health_check SET creation_time = ? WHERE primary_id = ?");
290 pQuery.addValue(creationTime.toString());
291 pQuery.addValue(application);
293 MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.CRITICAL);
294 } catch (MusicServiceException e) {
295 logger.error(EELFLoggerDelegate.errorLogger, e.getErrorMessage(), e);
296 return Boolean.FALSE;
303 private static boolean checkDatabasePermissions() {
304 boolean isUp = false;
305 Session localSession = null;
307 localSession = sessionFactory.openSession();
308 if (localSession != null) {
309 String sql = "SHOW GRANTS FOR CURRENT_USER";
310 Query query = localSession.createSQLQuery(sql);
311 @SuppressWarnings("unchecked")
312 List<String> grantsList = query.list();
313 for (String str : grantsList) {
314 if ((str.toUpperCase().contains("ALL"))
315 || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
316 && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
322 logger.error(EELFLoggerDelegate.errorLogger,
323 "checkDatabasePermissions returning false. SHOW GRANTS FOR CURRENT_USER being dumped:");
324 for (String str : grantsList) {
325 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
329 } catch (Exception e) {
330 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
331 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
332 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
336 if (localSession != null) {
337 localSession.close();
343 public static boolean isDatabaseUp() {
347 public static boolean isUebUp() {
351 public static boolean isFrontEndUp() {
355 public static boolean isBackEndUp() {
359 public static boolean isDbPermissionsOk() {
360 return dbPermissionsOk;
363 public static boolean isZookeeperStatusOk() {
364 return zookeeperStatusOk;
367 public static boolean isCassandraStatusOk() {
368 return cassandraStatusOk;
371 public static boolean isSuspended() {
375 public static void setSuspended(boolean isSuspended) {
376 HealthMonitor.isSuspended = isSuspended;