2 * ============LICENSE_START==========================================
4 * ===================================================================
5 * Copyright (C) 2017-2018 AT&T Intellectual Property. All rights reserved.
6 * ===================================================================
8 * Unless otherwise specified, all software contained herein is licensed
9 * under the Apache License, Version 2.0 (the "License");
10 * you may not use this software except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
21 * Unless otherwise specified, all documentation contained herein is licensed
22 * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23 * you may not use this documentation except in compliance with the License.
24 * You may obtain a copy of the License at
26 * https://creativecommons.org/licenses/by/4.0/
28 * Unless required by applicable law or agreed to in writing, documentation
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
34 * ============LICENSE_END============================================
38 package org.onap.portal.scheduler.healthMonitor;
40 import java.time.Instant;
41 import java.util.List;
42 import javax.annotation.PostConstruct;
43 import javax.annotation.PreDestroy;
44 import javax.persistence.EntityManagerFactory;
45 import lombok.NoArgsConstructor;
46 import org.apache.zookeeper.client.FourLetterWordMain;
47 import org.hibernate.Query;
48 import org.hibernate.Session;
49 import org.hibernate.SessionFactory;
50 import org.onap.music.datastore.PreparedQueryObject;
51 import org.onap.music.exceptions.MusicServiceException;
52 import org.onap.music.main.MusicCore;
53 import org.onap.music.main.MusicUtil;
54 import org.onap.portal.logging.format.EPAppMessagesEnum;
55 import org.onap.portal.logging.logic.EPLogUtil;
56 import org.onap.portal.utils.EPCommonSystemProperties;
57 import org.onap.portalapp.music.util.MusicProperties;
58 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
59 import org.onap.portalsdk.core.util.SystemProperties;
60 import org.springframework.beans.factory.annotation.Autowired;
61 import org.springframework.context.annotation.Configuration;
62 import org.springframework.context.annotation.EnableAspectJAutoProxy;
63 import org.springframework.transaction.annotation.Transactional;
68 @EnableAspectJAutoProxy
70 public class HealthMonitor {
71 private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
72 private Thread healthMonitorThread;
73 private static EntityManagerFactory entityManagerFactory;
74 private static boolean databaseUp;
75 private static boolean uebUp;
76 private static boolean frontEndUp;
77 private static boolean backEndUp;
78 private static boolean dbPermissionsOk;
79 private static boolean zookeeperStatusOk;
80 private static boolean cassandraStatusOk;
81 private static String application = "Portal";
82 private static boolean isSuspended = false;
85 public HealthMonitor(EntityManagerFactory entityManagerFactory) {
86 this.entityManagerFactory = entityManagerFactory;
90 private static void monitorEPHealth() {
92 int numIntervalsDatabaseHasBeenDown = 0;
93 int numIntervalsDatabasePermissionsIncorrect = 0;
94 int numIntervalsZookeeperNotHealthy = 0;
95 int numIntervalsCassandraNotHealthy = 0;
97 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
100 long sleepInterval = (Long
101 .parseLong(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
102 long numIntervalsBetweenAlerts = Long
103 .parseLong(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
104 logger.debug(EELFLoggerDelegate.debugLogger,
105 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
106 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
109 logger.debug(EELFLoggerDelegate.debugLogger,
110 "monitorEPHealth: Test Connection to all");
112 // Get DB status. If down, signal alert once every X intervals.
114 databaseUp = checkIfDatabaseUp();
116 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
117 logger.debug(EELFLoggerDelegate.debugLogger,
118 "monitorEPHealth: database down, logging to error log to trigger alert.");
119 // Write a Log entry that will generate an alert
120 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
121 numIntervalsDatabaseHasBeenDown++;
123 numIntervalsDatabaseHasBeenDown = 0;
127 dbPermissionsOk = checkDatabasePermissions();
128 if (!dbPermissionsOk) {
129 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
130 logger.debug(EELFLoggerDelegate.debugLogger,
131 "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
132 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
133 numIntervalsDatabasePermissionsIncorrect++;
135 numIntervalsDatabasePermissionsIncorrect = 0;
138 if(org.onap.portalapp.music.util.MusicUtil.isMusicEnable()){
139 cassandraStatusOk = checkCassandraStatus();
140 if (!cassandraStatusOk) {
141 if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
142 logger.debug(EELFLoggerDelegate.debugLogger,
143 "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
144 EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
145 numIntervalsCassandraNotHealthy++;
147 numIntervalsCassandraNotHealthy = 0;
154 if (Thread.interrupted()) {
155 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
160 Thread.sleep(sleepInterval);
161 } catch (InterruptedException e) {
162 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
163 Thread.currentThread().interrupt();
169 public void initHealthMonitor() {
170 healthMonitorThread = new Thread("EP HealthMonitor thread") {
176 catch (Exception e) {
177 logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
181 healthMonitorThread.start();
186 public void closeHealthMonitor() {
187 this.healthMonitorThread.interrupt();
191 * This routine checks whether the database can be read. In June 2017 we
192 * experimented with checking if the database can be WRITTEN. Writes failed
193 * with some regularity in a MariaDB Galera cluster, and in that
194 * environment, the resulting alerts in the log triggered a health monitor
195 * cron job to shut down the Tomcat instance. The root cause of the cluster
196 * write failures was not determined.
198 * @return true if the database can be read.
200 private static boolean checkIfDatabaseUp() {
201 boolean isUp = false;
202 Session localSession = null;
204 localSession = entityManagerFactory.unwrap(SessionFactory.class).openSession();
205 if (localSession != null) {
206 String sql = "select app_name from fn_app where app_id=1";
207 Query query = localSession.createSQLQuery(sql);
208 @SuppressWarnings("unchecked")
209 List<String> queryList = query.list();
210 if (queryList != null) {
214 } catch (Exception e) {
215 logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
218 if (localSession != null)
219 localSession.close();
224 private static boolean checkZookeeperStatus() {
226 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
227 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
228 for (String zookeeperNode : zookeeperNodes) {
230 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper :" + zookeeperNode.trim());
231 String[] iport = zookeeperNode.split(":");
232 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
233 Integer.parseInt(iport[1].trim()), "stat");
234 logger.info(EELFLoggerDelegate.applicationLogger,
235 "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
236 if (!zkNodeStatistics.isEmpty()) {
237 String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
238 zkNodeStatistics.indexOf("Node"));
239 logger.info(EELFLoggerDelegate.applicationLogger,
241 "Getting Status for zookeeper :" + zookeeperNode.trim() + ":------:" + state);
242 if (state.contains("leader") || state.contains("follower")) {
246 } catch (Exception e) {
247 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
255 private static boolean checkCassandraStatus() {
256 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
257 if (getAdminKeySpace()) {
260 logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
265 private static Boolean getAdminKeySpace() {
266 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE);
267 Instant creationTime = Instant.now();
268 PreparedQueryObject pQuery = new PreparedQueryObject();
269 pQuery.appendQueryString(
270 "UPDATE " + musicKeySpace + ".health_check SET creation_time = ? WHERE primary_id = ?");
271 pQuery.addValue(creationTime.toString());
272 pQuery.addValue(application);
274 MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.CRITICAL);
275 } catch (MusicServiceException e) {
276 logger.error(EELFLoggerDelegate.errorLogger, e.getErrorMessage(), e);
277 return Boolean.FALSE;
284 private static boolean checkDatabasePermissions() {
285 boolean isUp = false;
286 Session localSession = null;
288 localSession = entityManagerFactory.unwrap(SessionFactory.class).openSession();
289 if (localSession != null) {
290 String sql = "SHOW GRANTS FOR CURRENT_USER";
291 Query query = localSession.createSQLQuery(sql);
292 @SuppressWarnings("unchecked")
293 List<String> grantsList = query.list();
294 for (String str : grantsList) {
295 if ((str.toUpperCase().contains("ALL"))
296 || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
297 && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
303 logger.error(EELFLoggerDelegate.errorLogger,
304 "checkDatabasePermissions returning false. SHOW GRANTS FOR CURRENT_USER being dumped:");
305 for (String str : grantsList) {
306 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
310 } catch (Exception e) {
311 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
312 if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
313 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
317 if (localSession != null) {
318 localSession.close();
324 public static boolean isDatabaseUp() {
328 public static boolean isUebUp() {
332 public static boolean isFrontEndUp() {
336 public static boolean isBackEndUp() {
340 public static boolean isDbPermissionsOk() {
341 return dbPermissionsOk;
344 public static boolean isZookeeperStatusOk() {
345 return zookeeperStatusOk;
348 public static boolean isCassandraStatusOk() {
349 return cassandraStatusOk;
352 public static boolean isSuspended() {
356 public static void setSuspended(boolean isSuspended) {
357 HealthMonitor.isSuspended = isSuspended;