2 * ============LICENSE_START==========================================
4 * ===================================================================
5 * Copyright (C) 2017-2018 AT&T Intellectual Property. All rights reserved.
6 * ===================================================================
8 * Unless otherwise specified, all software contained herein is licensed
9 * under the Apache License, Version 2.0 (the "License");
10 * you may not use this software except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
21 * Unless otherwise specified, all documentation contained herein is licensed
22 * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23 * you may not use this documentation except in compliance with the License.
24 * You may obtain a copy of the License at
26 * https://creativecommons.org/licenses/by/4.0/
28 * Unless required by applicable law or agreed to in writing, documentation
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
34 * ============LICENSE_END============================================
38 package org.onap.portalapp.portal.listener;
40 import java.time.Instant;
41 import java.util.List;
43 import javax.annotation.PostConstruct;
44 import javax.annotation.PreDestroy;
46 import org.apache.commons.lang3.StringUtils;
47 import org.apache.zookeeper.ZooKeeper;
48 import org.apache.zookeeper.client.FourLetterWordMain;
49 import org.hibernate.Query;
50 import org.hibernate.Session;
51 import org.hibernate.SessionFactory;
52 import org.onap.music.datastore.PreparedQueryObject;
53 import org.onap.music.exceptions.MusicServiceException;
54 import org.onap.music.main.MusicCore;
55 import org.onap.music.main.MusicUtil;
56 import org.onap.portalapp.music.util.MusicProperties;
57 import org.onap.portalapp.portal.logging.aop.EPMetricsLog;
58 import org.onap.portalapp.portal.logging.format.EPAppMessagesEnum;
59 import org.onap.portalapp.portal.logging.logic.EPLogUtil;
60 import org.onap.portalapp.portal.utils.EPCommonSystemProperties;
61 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
62 import org.onap.portalsdk.core.util.SystemProperties;
63 import org.springframework.beans.factory.annotation.Autowired;
64 import org.springframework.context.annotation.EnableAspectJAutoProxy;
65 import org.springframework.transaction.annotation.Transactional;
71 @org.springframework.context.annotation.Configuration
72 @EnableAspectJAutoProxy
74 public class HealthMonitor {
77 ZooKeeper zookeeper = null;
79 private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
82 private SessionFactory sessionFactory;
85 private static boolean databaseUp;
86 private static boolean uebUp;
87 private static boolean frontEndUp;
88 private static boolean backEndUp;
89 private static boolean dbPermissionsOk;
90 private static boolean zookeeperStatusOk;
91 private static boolean cassandraStatusOk;
92 private static String APPLICATION = "Portal";
95 * Read directly by external classes.
97 public static boolean isSuspended = false;
99 private Thread healthMonitorThread;
101 public HealthMonitor() {
104 public static boolean isDatabaseUp() {
108 public static boolean isDatabasePermissionsOk() {
109 return dbPermissionsOk;
112 public static boolean isUebUp() {
116 public static boolean isFrontEndUp() {
120 public static boolean isBackEndUp() {
124 public static boolean isZookeeperStatusOk() {
125 return zookeeperStatusOk;
128 public static boolean isCassandraStatusOk() {
129 return cassandraStatusOk;
132 private void monitorEPHealth() throws InterruptedException {
134 int numIntervalsDatabaseHasBeenDown = 0;
135 int numIntervalsClusterNotHealthy = 0;
136 int numIntervalsDatabasePermissionsIncorrect = 0;
137 int numIntervalsZookeeperNotHealthy = 0;
138 int numIntervalsCassandraNotHealthy = 0;
140 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
143 long sleepInterval = (Long
144 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
145 long numIntervalsBetweenAlerts = Long
146 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
147 logger.debug(EELFLoggerDelegate.debugLogger,
148 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
149 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
152 logger.debug(EELFLoggerDelegate.debugLogger,
153 "monitorEPHealth: Test Connection to all");
155 // Get DB status. If down, signal alert once every X intervals.
157 databaseUp = this.checkIfDatabaseUp();
158 if (databaseUp == false) {
159 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
160 logger.debug(EELFLoggerDelegate.debugLogger,
161 "monitorEPHealth: database down, logging to error log to trigger alert.");
162 // Write a Log entry that will generate an alert
163 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
164 numIntervalsDatabaseHasBeenDown++;
166 numIntervalsDatabaseHasBeenDown = 0;
170 dbPermissionsOk = this.checkDatabasePermissions();
171 if (dbPermissionsOk == false) {
172 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
173 logger.debug(EELFLoggerDelegate.debugLogger,
174 "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
175 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
176 numIntervalsDatabasePermissionsIncorrect++;
178 numIntervalsDatabasePermissionsIncorrect = 0;
181 org.onap.portalapp.music.util.MusicUtil MusicUtilSDK = new org.onap.portalapp.music.util.MusicUtil();
182 if(MusicUtilSDK.isMusicEnable()){
184 zookeeperStatusOk = this.checkZookeeperStatus();
186 if (zookeeperStatusOk == false) {
187 if ((numIntervalsZookeeperNotHealthy % numIntervalsBetweenAlerts) == 0) {
188 logger.debug(EELFLoggerDelegate.debugLogger,
189 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
190 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckZookeeperError);
191 numIntervalsZookeeperNotHealthy++;
193 numIntervalsZookeeperNotHealthy = 0;
197 cassandraStatusOk = this.checkCassandraStatus();
198 if (cassandraStatusOk == false) {
199 if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
200 logger.debug(EELFLoggerDelegate.debugLogger,
201 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
202 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
203 numIntervalsCassandraNotHealthy++;
205 numIntervalsCassandraNotHealthy = 0;
212 // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
213 // returned, status is Up.
214 // If down, signal alert once every X intervals.
215 // EP will ignore this bogus message.
216 // Commenting this out as Dependency on UEB is being deprecated
218 * uebUp = this.checkIfUebUp(); if (uebUp == false) {
220 * if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
221 * logger.debug(EELFLoggerDelegate.debugLogger,
222 * "monitorEPHealth: UEB down, logging to error log to trigger alert"); // Write
223 * a Log entry that will generate an alert EPLogUtil.logEcompError(logger,
224 * EPAppMessagesEnum.BeHealthCheckUebClusterError);
225 * numIntervalsUebHasBeenDown++; } else { numIntervalsUebHasBeenDown = 0; } }
228 // The front end should be up because the API is called through
229 // proxy front end server.
232 // If the rest API called, the backend is always up
236 // future nice to have...get Partner status
238 // For all apps exposing a rest url, query one of the rest
239 // urls(/roles?) and manage a list
240 // of app name/status. We might not return back a non 200 OK in
241 // health check, but we
242 // could return information in the json content of a health check.
246 // Get DB status. If down, signal alert once every X intervals.
248 if (Thread.interrupted()) {
249 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
254 Thread.sleep(sleepInterval);
255 } catch (InterruptedException e) {
256 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
257 Thread.currentThread().interrupt();
263 public void initHealthMonitor() {
264 healthMonitorThread = new Thread("EP HealthMonitor thread") {
268 } catch (InterruptedException e) {
269 logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
271 catch (Exception e) {
272 logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
276 healthMonitorThread.start();
281 public void closeHealthMonitor() {
282 this.healthMonitorThread.interrupt();
286 * This routine checks whether the database can be read. In June 2017 we
287 * experimented with checking if the database can be WRITTEN. Writes failed
288 * with some regularity in a MariaDB Galera cluster, and in that
289 * environment, the resulting alerts in the log triggered a health monitor
290 * cron job to shut down the Tomcat instance. The root cause of the cluster
291 * write failures was not determined.
293 * @return true if the database can be read.
295 private boolean checkIfDatabaseUp() {
296 boolean isUp = false;
297 Session localSession = null;
299 localSession = sessionFactory.openSession();
300 if (localSession != null) {
301 String sql = "select app_name from fn_app where app_id=1";
302 Query query = localSession.createSQLQuery(sql);
303 @SuppressWarnings("unchecked")
304 List<String> queryList = query.list();
305 if (queryList != null) {
309 } catch (Exception e) {
310 logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
313 if (localSession != null)
314 localSession.close();
319 private boolean checkZookeeperStatus() {
321 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
322 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
323 for (int i = 0; i < zookeeperNodes.length; i++) {
325 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper :" + zookeeperNodes[i].trim());
326 String[] iport = zookeeperNodes[i].split(":");
327 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
328 Integer.parseInt(iport[1].trim()), "stat");
329 logger.info(EELFLoggerDelegate.applicationLogger,
330 "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
331 if (StringUtils.isNotBlank(zkNodeStatistics)) {
332 String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
333 zkNodeStatistics.indexOf("Node"));
334 logger.info(EELFLoggerDelegate.applicationLogger,
335 "Getting Status for zookeeper :" + zookeeperNodes[i].trim() + ":------:" + state);
336 if (state.contains("leader"))
339 } catch (Exception e) {
340 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
348 public boolean checkCassandraStatus() {
349 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
350 if (this.getAdminKeySpace()) {
353 logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
358 private Boolean getAdminKeySpace() {
359 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE);
360 Instant creationTime = Instant.now();
361 PreparedQueryObject pQuery = new PreparedQueryObject();
362 pQuery.appendQueryString(
363 "UPDATE " + musicKeySpace + ".health_check SET creation_time = ? WHERE primary_id = ?");
364 pQuery.addValue(creationTime.toString());
365 pQuery.addValue(APPLICATION);
367 MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.CRITICAL);
368 } catch (MusicServiceException e) {
369 return Boolean.FALSE;
376 private boolean checkDatabasePermissions() {
377 boolean isUp = false;
378 Session localSession = null;
380 localSession = sessionFactory.openSession();
381 if (localSession != null) {
382 String sql = "SHOW GRANTS FOR CURRENT_USER";
383 Query query = localSession.createSQLQuery(sql);
384 @SuppressWarnings("unchecked")
385 List<String> grantsList = query.list();
386 for (String str : grantsList) {
387 if ((str.toUpperCase().contains("ALL"))
388 || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
389 && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
395 logger.error(EELFLoggerDelegate.errorLogger,
396 "checkDatabasePermissions returning false. SHOW GRANTS FOR CURRENT_USER being dumped:");
397 for (String str : grantsList) {
398 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
402 } catch (Exception e) {
403 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
404 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
405 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
409 if (localSession != null) {
410 localSession.close();