Bulk upload changes and music health check apis
[portal.git] / ecomp-portal-BE-common / src / main / java / org / onap / portalapp / portal / listener / HealthMonitor.java
1 /*-
2  * ============LICENSE_START==========================================
3  * ONAP Portal
4  * ===================================================================
5  * Copyright (C) 2017 AT&T Intellectual Property. All rights reserved.
6  * ===================================================================
7  *
8  * Unless otherwise specified, all software contained herein is licensed
9  * under the Apache License, Version 2.0 (the "License");
10  * you may not use this software except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *             http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  * Unless otherwise specified, all documentation contained herein is licensed
22  * under the Creative Commons License, Attribution 4.0 Intl. (the "License");
23  * you may not use this documentation except in compliance with the License.
24  * You may obtain a copy of the License at
25  *
26  *             https://creativecommons.org/licenses/by/4.0/
27  *
28  * Unless required by applicable law or agreed to in writing, documentation
29  * distributed under the License is distributed on an "AS IS" BASIS,
30  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31  * See the License for the specific language governing permissions and
32  * limitations under the License.
33  *
34  * ============LICENSE_END============================================
35  *
36  * 
37  */
38 package org.onap.portalapp.portal.listener;
39
40 import java.io.IOException;
41 import java.util.HashSet;
42 import java.util.List;
43 import java.util.Set;
44 import java.util.UUID;
45
46 import javax.annotation.PostConstruct;
47 import javax.annotation.PreDestroy;
48
49 import org.apache.commons.lang3.StringUtils;
50 import org.apache.zookeeper.ZooKeeper;
51 import org.apache.zookeeper.client.FourLetterWordMain;
52 import org.hibernate.Query;
53 import org.hibernate.Session;
54 import org.hibernate.SessionFactory;
55 import org.onap.music.datastore.PreparedQueryObject;
56 import org.onap.music.exceptions.MusicServiceException;
57 import org.onap.music.main.MusicCore;
58 import org.onap.music.main.MusicUtil;
59 import org.onap.portalapp.music.util.MusicProperties;
60 import org.onap.portalapp.portal.logging.aop.EPMetricsLog;
61 import org.onap.portalapp.portal.logging.format.EPAppMessagesEnum;
62 import org.onap.portalapp.portal.logging.logic.EPLogUtil;
63 import org.onap.portalapp.portal.utils.EPCommonSystemProperties;
64 import org.onap.portalsdk.core.logging.logic.EELFLoggerDelegate;
65 import org.onap.portalsdk.core.util.SystemProperties;
66 import org.springframework.beans.factory.annotation.Autowired;
67 import org.springframework.context.annotation.EnableAspectJAutoProxy;
68 import org.springframework.transaction.annotation.Transactional;
69
70
71
72
73 @Transactional
74 @org.springframework.context.annotation.Configuration
75 @EnableAspectJAutoProxy
76 @EPMetricsLog
77 public class HealthMonitor {
78
79         
80         ZooKeeper zookeeper = null;
81
82         private static EELFLoggerDelegate logger = EELFLoggerDelegate.getLogger(HealthMonitor.class);
83
84         @Autowired
85         private SessionFactory sessionFactory;
86
87
88         private static boolean databaseUp;
89         private static boolean uebUp;
90         private static boolean frontEndUp;
91         private static boolean backEndUp;
92         private static boolean dbClusterStatusOk;
93         private static boolean dbPermissionsOk;
94         private static boolean zookeeperStatusOk;
95         private static boolean cassandraStatusOk;
96         
97         /**
98          * Read directly by external classes.
99          */
100         public static boolean isSuspended = false;
101
102         private Thread healthMonitorThread;
103
104         public HealthMonitor() {
105         }
106
107         public static boolean isDatabaseUp() {
108                 return databaseUp;
109         }
110
111         public static boolean isClusterStatusOk() {
112                 return dbClusterStatusOk;
113         }
114
115         public static boolean isDatabasePermissionsOk() {
116                 return dbPermissionsOk;
117         }
118
119         public static boolean isUebUp() {
120                 return uebUp;
121         }
122
123         public static boolean isFrontEndUp() {
124                 return frontEndUp;
125         }
126
127         public static boolean isBackEndUp() {
128                 return backEndUp;
129         }
130         
131         public static boolean isZookeeperStatusOk() {
132                 return zookeeperStatusOk;
133         }
134
135         public static boolean isCassandraStatusOk() {
136                 return cassandraStatusOk;
137         }
138
139         private void monitorEPHealth() throws InterruptedException {
140
141                 int numIntervalsDatabaseHasBeenDown = 0;
142                 int numIntervalsClusterNotHealthy = 0;
143                 int numIntervalsDatabasePermissionsIncorrect = 0;
144                 int numIntervalsZookeeperNotHealthy = 0;
145                 int numIntervalsCassandraNotHealthy = 0;
146
147                 logger.debug(EELFLoggerDelegate.debugLogger, "monitorEPHealth thread started");
148
149                 long sleepInterval = (Long
150                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTH_POLL_INTERVAL_SECONDS)) * 1000);
151                 long numIntervalsBetweenAlerts = Long
152                                 .valueOf(SystemProperties.getProperty(EPCommonSystemProperties.HEALTHFAIL_ALERT_EVERY_X_INTERVALS));
153                 logger.debug(EELFLoggerDelegate.debugLogger,
154                                 "monitorEPHealth: Polling health every " + sleepInterval + " milliseconds. Alerting every "
155                                                 + (sleepInterval * numIntervalsBetweenAlerts) / 1000 + " seconds when component remains down.");
156
157                 while (true) {
158                         //
159                         // Get DB status. If down, signal alert once every X intervals.
160                         //
161                         databaseUp = this.checkIfDatabaseUp();
162                         if (databaseUp == false) {
163                                 if ((numIntervalsDatabaseHasBeenDown % numIntervalsBetweenAlerts) == 0) {
164                                         logger.debug(EELFLoggerDelegate.debugLogger,
165                                                         "monitorEPHealth: database down, logging to error log to trigger alert.");
166                                         // Write a Log entry that will generate an alert
167                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
168                                         numIntervalsDatabaseHasBeenDown++;
169                                 } else {
170                                         numIntervalsDatabaseHasBeenDown = 0;
171                                 }
172                         }
173
174                         dbClusterStatusOk = this.checkClusterStatus();
175                         if (dbClusterStatusOk == false) {
176                                 if ((numIntervalsClusterNotHealthy % numIntervalsBetweenAlerts) == 0) {
177                                         logger.debug(EELFLoggerDelegate.debugLogger,
178                                                         "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
179                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
180                                         numIntervalsClusterNotHealthy++;
181                                 } else {
182                                         numIntervalsClusterNotHealthy = 0;
183                                 }
184                         }
185
186                         dbPermissionsOk = this.checkDatabasePermissions();
187                         if (dbPermissionsOk == false) {
188                                 if ((numIntervalsDatabasePermissionsIncorrect % numIntervalsBetweenAlerts) == 0) {
189                                         logger.debug(EELFLoggerDelegate.debugLogger,
190                                                         "monitorEPHealth: database permissions incorrect, logging to error log to trigger alert.");
191                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.BeHealthCheckMySqlError);
192                                         numIntervalsDatabasePermissionsIncorrect++;
193                                 } else {
194                                         numIntervalsDatabasePermissionsIncorrect = 0;
195                                 }
196                         }
197                         
198                         zookeeperStatusOk = this.checkZookeeperStatus();
199                         if (zookeeperStatusOk == false) {
200                                 if ((numIntervalsZookeeperNotHealthy % numIntervalsBetweenAlerts) == 0) {
201                                         logger.debug(EELFLoggerDelegate.debugLogger,
202                                                         "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
203                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckZookeeperError);
204                                         numIntervalsZookeeperNotHealthy++;
205                                 } else {
206                                         numIntervalsZookeeperNotHealthy = 0;
207                                 }
208                         }
209
210                         cassandraStatusOk = this.checkCassandraStatus();
211                         if (cassandraStatusOk == false) {
212                                 if ((numIntervalsCassandraNotHealthy % numIntervalsBetweenAlerts) == 0) {
213                                         logger.debug(EELFLoggerDelegate.debugLogger,
214                                                         "monitorEPHealth: cluster nodes down, logging to error log to trigger alert.");
215                                         EPLogUtil.logEcompError(logger, EPAppMessagesEnum.MusicHealthCheckCassandraError);
216                                         numIntervalsCassandraNotHealthy++;
217                                 } else {
218                                         numIntervalsCassandraNotHealthy = 0;
219                                 }
220                         }
221                         
222                         //
223                         // Get UEB status. Publish a bogus message to EP inbox, if 200 OK
224                         // returned, status is Up.
225                         // If down, signal alert once every X intervals.
226                         // EP will ignore this bogus message.
227                         // Commenting this out as Dependency on UEB is being deprecated
228                         /*
229                          * uebUp = this.checkIfUebUp(); if (uebUp == false) {
230                          * 
231                          * if ((numIntervalsUebHasBeenDown % numIntervalsBetweenAlerts) == 0) {
232                          * logger.debug(EELFLoggerDelegate.debugLogger,
233                          * "monitorEPHealth: UEB down, logging to error log to trigger alert"); // Write
234                          * a Log entry that will generate an alert EPLogUtil.logEcompError(logger,
235                          * EPAppMessagesEnum.BeHealthCheckUebClusterError);
236                          * numIntervalsUebHasBeenDown++; } else { numIntervalsUebHasBeenDown = 0; } }
237                          */
238
239                         // The front end should be up because the API is called through
240                         // proxy front end server.
241                         frontEndUp = true;
242
243                         // If the rest API called, the backend is always up
244                         backEndUp = true;
245
246                         //
247                         // future nice to have...get Partner status
248                         //
249                         // For all apps exposing a rest url, query one of the rest
250                         // urls(/roles?) and manage a list
251                         // of app name/status. We might not return back a non 200 OK in
252                         // health check, but we
253                         // could return information in the json content of a health check.
254                         //
255
256                         //
257                         // Get DB status. If down, signal alert once every X intervals.
258                         //
259                         if (Thread.interrupted()) {
260                                 logger.info(EELFLoggerDelegate.errorLogger, "monitorEPHealth: thread interrupted");
261                                 break;
262                         }
263
264                         try {
265                                 Thread.sleep(sleepInterval);
266                         } catch (InterruptedException e) {
267                                 logger.error(EELFLoggerDelegate.errorLogger, "monitorEPHealth: sleep interrupted", e);
268                                 Thread.currentThread().interrupt();
269                         }
270                 }
271         }
272
273         @PostConstruct
274         public void initHealthMonitor() {
275                 healthMonitorThread = new Thread("EP HealthMonitor thread") {
276                         public void run() {
277                                 try {
278                                         monitorEPHealth();
279                                 } catch (InterruptedException e) {
280                                         logger.debug(EELFLoggerDelegate.debugLogger, "healthMonitorThread interrupted", e);
281                                 } catch (Exception e) {
282                                         logger.error(EELFLoggerDelegate.errorLogger, "healthMonitorThread failed", e);
283                                 }
284                         }
285                 };
286                 healthMonitorThread.start();
287                 
288         }
289
290         @PreDestroy
291         public void closeHealthMonitor() {
292                 this.healthMonitorThread.interrupt();
293         }
294
295         /**
296          * This routine checks whether the database can be read. In June 2017 we
297          * experimented with checking if the database can be WRITTEN. Writes failed
298          * with some regularity in a MariaDB Galera cluster, and in that
299          * environment, the resulting alerts in the log triggered a health monitor
300          * cron job to shut down the Tomcat instance. The root cause of the cluster
301          * write failures was not determined.
302          * 
303          * @return true if the database can be read.
304          */
305         private boolean checkIfDatabaseUp() {
306                 boolean isUp = false;
307                 Session localSession = null;
308                 try {
309                         localSession = sessionFactory.openSession();
310                         if (localSession != null) {
311                                 String sql = "select app_name from fn_app where app_id=1";
312                                 Query query = localSession.createSQLQuery(sql);
313                                 @SuppressWarnings("unchecked")
314                                 List<String> queryList = query.list();
315                                 if (queryList != null) {
316                                         isUp = true;
317                                 }
318                         }
319                 } catch (Exception e) {
320                         logger.debug(EELFLoggerDelegate.debugLogger, "checkIfDatabaseUp failed", e);
321                         isUp = false;
322                 } finally {
323                         if (localSession != null)
324                                 localSession.close();
325                 }
326                 return isUp;
327         }
328
329         private boolean checkClusterStatus() {
330                 boolean isUp = false;
331                 Session localSession = null;
332                 try {
333                         localSession = sessionFactory.openSession();
334                         if (localSession != null) {
335                                 // If all nodes are unhealthy in a cluster, this will throw an
336                                 // exception
337                                 String sql = "select * from mysql.user";
338                                 Query query = localSession.createSQLQuery(sql);
339                                 @SuppressWarnings("unchecked")
340                                 List<String> queryList = query.list();
341                                 if (queryList != null) {
342                                         isUp = true;
343                                 }
344                         }
345                 } catch (Exception e) {
346                         logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failed", e);
347                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
348                                 logger.error(EELFLoggerDelegate.errorLogger, "checkClusterStatus failure cause", e.getCause());
349                         }
350                         isUp = false;
351                 } finally {
352                         if (localSession != null) {
353                                 localSession.close();
354                         }
355                 }
356                 return isUp;
357         }
358
359         private boolean checkZookeeperStatus() {
360
361                 String[] zookeeperNodes = MusicUtil.getMyZkHost().split(",");
362                 logger.info(EELFLoggerDelegate.applicationLogger, "MusicUtil.getMyZkHost()---- :" + MusicUtil.getMyZkHost());
363                 for (int i = 0; i < zookeeperNodes.length; i++) {
364                         try {
365                                 logger.info(EELFLoggerDelegate.applicationLogger, "server ip--zookeeper  :" + zookeeperNodes[i].trim());
366                                 String[] iport = zookeeperNodes[i].split(":");
367                                 String zkNodeStatistics = FourLetterWordMain.send4LetterWord(iport[0].trim(),
368                                                 Integer.parseInt(iport[1].trim()), "stat");
369                                 logger.info(EELFLoggerDelegate.applicationLogger,
370                                                 "Getting Status for Zookeeper zkNodeStatistics :" + zkNodeStatistics);
371                                 if (StringUtils.isNotBlank(zkNodeStatistics)) {
372                                         String state = zkNodeStatistics.substring(zkNodeStatistics.indexOf("Mode:"),
373                                                         zkNodeStatistics.indexOf("Node"));
374                                         logger.info(EELFLoggerDelegate.applicationLogger,
375                                                         "Getting Status for zookeeper :" + zookeeperNodes[i].trim() + ":------:" + state);
376                                         if (state.contains("leader"))
377                                                 return true;
378                                 }
379                         } catch (Exception e) {
380                                 logger.error(EELFLoggerDelegate.errorLogger, "ZookeeperStatus Service is not responding", e.getCause());
381                         }
382                 }
383
384                 return false;
385         }
386
387
388         public boolean checkCassandraStatus() {
389                 logger.info(EELFLoggerDelegate.applicationLogger, "Getting Status for Cassandra");
390                 if (this.getAdminKeySpace()) {
391                         return true;
392                 } else {
393                         logger.error(EELFLoggerDelegate.errorLogger, "Cassandra Service is not responding");
394                         return false;
395                 }
396         }
397         
398         private Boolean getAdminKeySpace() {
399                 String musicKeySpace = MusicProperties.getProperty(MusicProperties.MUSIC_SESSION_KEYSPACE );
400                 //deletePortalHealthcheck(musicKeySpace);
401                 PreparedQueryObject pQuery = new PreparedQueryObject();
402                 pQuery.appendQueryString("insert into  "+musicKeySpace+".healthcheck (id) values (?)");
403                 pQuery.addValue(UUID.randomUUID());
404                 try {
405                          MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.EVENTUAL);
406                 } catch (MusicServiceException e) {
407                         logger.error(EELFLoggerDelegate.errorLogger, "getAdminKeySpace() failed", e.getCause());
408                         return Boolean.FALSE;
409                 }
410                         return Boolean.TRUE;
411         }
412
413         
414         private void  deletePortalHealthcheck(String musicKeySpace) {
415                 PreparedQueryObject pQuery = new PreparedQueryObject();
416                 pQuery.appendQueryString("TRUNCATE  "+musicKeySpace+".healthcheck");
417                 try {
418                         MusicCore.nonKeyRelatedPut(pQuery, MusicUtil.EVENTUAL);
419                 } catch (MusicServiceException e) {
420                         logger.error(EELFLoggerDelegate.errorLogger, "deletePortalHealthcheck() failed", e.getCause());
421                 }
422         }
423         
424         private boolean checkDatabasePermissions() {
425                 boolean isUp = false;
426                 Session localSession = null;
427                 try {
428                         localSession = sessionFactory.openSession();
429                         if (localSession != null) {
430                                 String sql = "SHOW GRANTS FOR CURRENT_USER";
431                                 Query query = localSession.createSQLQuery(sql);
432                                 @SuppressWarnings("unchecked")
433                                 List<String> grantsList = query.list();
434                                 for (String str : grantsList) {
435                                         if ((str.toUpperCase().contains("ALL"))
436                                                         || (str.toUpperCase().contains("DELETE") && str.toUpperCase().contains("SELECT")
437                                                                         && str.toUpperCase().contains("UPDATE") && str.toUpperCase().contains("INSERT"))) {
438                                                 isUp = true;
439                                                 break;
440                                         }
441                                 }
442                                 if (isUp == false) {
443                                         logger.error(EELFLoggerDelegate.errorLogger,
444                                                         "checkDatabasePermissions returning false.  SHOW GRANTS FOR CURRENT_USER being dumped:");
445                                         for (String str : grantsList) {
446                                                 logger.error(EELFLoggerDelegate.errorLogger, "grants output item = [" + str + "]");
447                                         }
448                                 }
449                         }
450                 } catch (Exception e) {
451                         logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failed", e);
452                         if ((e.getCause() != null) && (e.getCause().getMessage() != null)) {
453                                 logger.error(EELFLoggerDelegate.errorLogger, "checkDatabasePermissions failure cause", e.getCause());
454                         }
455                         isUp = false;
456                 } finally {
457                         if (localSession != null) {
458                                 localSession.close();
459                         }
460                 }
461                 return isUp;
462         }
463         
464 }