2 * ============LICENSE_START==========================================
4 * ===================================================================
5 * Copyright (C) 2017-2018 AT&T Intellectual Property. All rights reserved.
6 * ===================================================================
8 * Unless otherwise specified, all software contained herein is licensed
9 * under the Apache License, Version 2.0 (the "License");
10 * you may not use this software except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
21 * Unless otherwise specified, all documentation contained herein is licensed
22 * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23 * you may not use this documentation except in compliance with the License.
24 * You may obtain a copy of the License at
26 * https://creativecommons.org/licenses/by/4.0/
28 * Unless required by applicable law or agreed to in writing, documentation
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
34 * ============LICENSE_END============================================
38 package org.onap.portalapp.portal.listener;
40 import java.time.Instant;
41 import java.util.List;
43 import javax.annotation.PostConstruct;
44 import javax.annotation.PreDestroy;
46 import lombok.NoArgsConstructor;
47 import org.apache.commons.lang3.StringUtils;
48 import org.apache.zookeeper.client.FourLetterWordMain;
49 import org.hibernate.Query;
50 import org.hibernate.Session;
51 import org.hibernate.SessionFactory;
52 import org.onap.music.datastore.PreparedQueryObject;
53 import org.onap.music.exceptions.MusicServiceException;
54 import org.onap.music.main.MusicCore;
55 import org.onap.music.main.MusicUtil;
56 import org.onap.portalapp.music.util.MusicProperties;
57 import org.onap.portalapp.portal.logging.aop.EPMetricsLog;
58 import org.onap.portalapp.portal.logging.format.EPAppMessagesEnum;
59 import org.onap.portalapp.portal.logging.logic.EPLogUtil;
60 import org.onap.portalapp.portal.utils.EPCommonSystemProperties;
61 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
62 import org.onap.portalsdk.core.util.SystemProperties;
63 import org.springframework.beans.factory.annotation.Autowired;
64 import org.springframework.context.annotation.Configuration;
65 import org.springframework.context.annotation.EnableAspectJAutoProxy;
66 import org.springframework.transaction.annotation.Transactional;
73 @EnableAspectJAutoProxy
76 public class HealthMonitor {
77 private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
78 private Thread healthMonitorThread;
79 private static SessionFactory sessionFactory;
81 private static boolean databaseUp;
82 private static boolean uebUp;
83 private static boolean frontEndUp;
84 private static boolean backEndUp;
85 private static boolean dbPermissionsOk;
86 private static boolean zookeeperStatusOk;
87 private static boolean cassandraStatusOk;
88 private static String application = "Portal";
89 private static boolean isSuspended = false;
92 public HealthMonitor(SessionFactory sessionFactory) {
93 HealthMonitor.sessionFactory = sessionFactory;
96 private static void monitorEPHealth() {
98 int numIntervalsDatabaseHasBeenDown = 0;
99 int numIntervalsDatabasePermissionsIncorrect = 0;
100 int numIntervalsZookeeperNotHealthy = 0;
101 int numIntervalsCassandraNotHealthy = 0;
103 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
106 long sleepInterval = (Long
107 .parseLong(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
108 long numIntervalsBetweenAlerts = Long
109 .parseLong(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
110 logger.debug(EELFLoggerDelegate.debugLogger,
111 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
112 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
115 logger.debug(EELFLoggerDelegate.debugLogger,
116 "monitorEPHealth: Test Connection to all");
118 // Get DB status. If down, signal alert once every X intervals.
120 databaseUp = checkIfDatabaseUp();
122 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
123 logger.debug(EELFLoggerDelegate.debugLogger,
124 "monitorEPHealth: database down, logging to error log to trigger alert.");
125 // Write a Log entry that will generate an alert
126 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
127 numIntervalsDatabaseHasBeenDown++;
129 numIntervalsDatabaseHasBeenDown = 0;
133 dbPermissionsOk = checkDatabasePermissions();
134 if (!dbPermissionsOk) {
135 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
136 logger.debug(EELFLoggerDelegate.debugLogger,
137 "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
138 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
139 numIntervalsDatabasePermissionsIncorrect++;
141 numIntervalsDatabasePermissionsIncorrect = 0;
144 if(org.onap.portalapp.music.util.MusicUtil.isMusicEnable()){
147 * zookeeperStatusOk = checkZookeeperStatus();
149 * if (!zookeeperStatusOk) { if ((numIntervalsZookeeperNotHealthy %
150 * numIntervalsBetweenAlerts) == 0) {
151 * logger.debug(EELFLoggerDelegate.debugLogger,
152 * "monitorEPHealth: cluster nodes down, logging to error log to trigger alert."
153 * ); EPLogUtil.logEcompError(logger,
154 * EPAppMessagesEnum.MusicHealthCheckZookeeperError);
155 * numIntervalsZookeeperNotHealthy++; } else { numIntervalsZookeeperNotHealthy =
159 cassandraStatusOk = checkCassandraStatus();
160 if (!cassandraStatusOk) {
161 if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
162 logger.debug(EELFLoggerDelegate.debugLogger,
163 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
164 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
165 numIntervalsCassandraNotHealthy++;
167 numIntervalsCassandraNotHealthy = 0;
174 if (Thread.interrupted()) {
175 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
180 Thread.sleep(sleepInterval);
181 } catch (InterruptedException e) {
182 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
183 Thread.currentThread().interrupt();
189 public void initHealthMonitor() {
190 healthMonitorThread = new Thread("EP HealthMonitor thread") {
196 catch (Exception e) {
197 logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
201 healthMonitorThread.start();
206 public void closeHealthMonitor() {
207 this.healthMonitorThread.interrupt();
211 * This routine checks whether the database can be read. In June 2017 we
212 * experimented with checking if the database can be WRITTEN. Writes failed
213 * with some regularity in a MariaDB Galera cluster, and in that
214 * environment, the resulting alerts in the log triggered a health monitor
215 * cron job to shut down the Tomcat instance. The root cause of the cluster
216 * write failures was not determined.
218 * @return true if the database can be read.
220 private static boolean checkIfDatabaseUp() {
221 boolean isUp = false;
222 Session localSession = null;
224 localSession = sessionFactory.openSession();
225 if (localSession != null) {
226 String sql = "select app_name from fn_app where app_id=1";
227 Query query = localSession.createSQLQuery(sql);
228 @SuppressWarnings("unchecked")
229 List<String> queryList = query.list();
230 if (queryList != null) {
234 } catch (Exception e) {
235 logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
238 if (localSession != null)
239 localSession.close();
244 private static boolean checkZookeeperStatus() {
246 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
247 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
248 for (String zookeeperNode : zookeeperNodes) {
250 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper :" + zookeeperNode.trim());
251 String[] iport = zookeeperNode.split(":");
252 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
253 Integer.parseInt(iport[1].trim()), "stat");
254 logger.info(EELFLoggerDelegate.applicationLogger,
255 "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
256 if (StringUtils.isNotBlank(zkNodeStatistics)) {
257 String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
258 zkNodeStatistics.indexOf("Node"));
259 logger.info(EELFLoggerDelegate.applicationLogger,
261 "Getting Status for zookeeper :" + zookeeperNode.trim() + ":------:" + state);
262 if (state.contains("leader") || state.contains("follower")) {
266 } catch (Exception e) {
267 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
275 private static boolean checkCassandraStatus() {
276 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
277 if (getAdminKeySpace()) {
280 logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
285 private static Boolean getAdminKeySpace() {
286 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE);
287 Instant creationTime = Instant.now();
288 PreparedQueryObject pQuery = new PreparedQueryObject();
289 pQuery.appendQueryString(
290 "UPDATE " + musicKeySpace + ".health_check SET creation_time = ? WHERE primary_id = ?");
291 pQuery.addValue(creationTime.toString());
292 pQuery.addValue(application);
294 MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.CRITICAL);
295 } catch (MusicServiceException e) {
296 logger.error(EELFLoggerDelegate.errorLogger, e.getErrorMessage(), e);
297 return Boolean.FALSE;
304 private static boolean checkDatabasePermissions() {
305 boolean isUp = false;
306 Session localSession = null;
308 localSession = sessionFactory.openSession();
309 if (localSession != null) {
310 String sql = "SHOW GRANTS FOR CURRENT_USER";
311 Query query = localSession.createSQLQuery(sql);
312 @SuppressWarnings("unchecked")
313 List<String> grantsList = query.list();
314 for (String str : grantsList) {
315 if ((str.toUpperCase().contains("ALL"))
316 || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
317 && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
323 logger.error(EELFLoggerDelegate.errorLogger,
324 "checkDatabasePermissions returning false. SHOW GRANTS FOR CURRENT_USER being dumped:");
325 for (String str : grantsList) {
326 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
330 } catch (Exception e) {
331 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
332 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
333 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
337 if (localSession != null) {
338 localSession.close();
344 public static boolean isDatabaseUp() {
348 public static boolean isUebUp() {
352 public static boolean isFrontEndUp() {
356 public static boolean isBackEndUp() {
360 public static boolean isDbPermissionsOk() {
361 return dbPermissionsOk;
364 public static boolean isZookeeperStatusOk() {
365 return zookeeperStatusOk;
368 public static boolean isCassandraStatusOk() {
369 return cassandraStatusOk;
372 public static boolean isSuspended() {
376 public static void setSuspended(boolean isSuspended) {
377 HealthMonitor.isSuspended = isSuspended;