Sync Integ to Master
[sdc.git] / catalog-be / src / main / java / org / openecomp / sdc / be / components / health / HealthCheckBusinessLogic.java
1 /*-
2  * ============LICENSE_START=======================================================
3  * SDC
4  * ================================================================================
5  * Copyright (C) 2017 AT&T Intellectual Property. All rights reserved.
6  * ================================================================================
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  * ============LICENSE_END=========================================================
19  */
20
21 package org.openecomp.sdc.be.components.health;
22
23 import com.fasterxml.jackson.core.type.TypeReference;
24 import com.fasterxml.jackson.databind.ObjectMapper;
25 import org.apache.commons.lang3.tuple.ImmutablePair;
26 import org.apache.commons.lang3.tuple.Pair;
27 import org.openecomp.sdc.be.components.distribution.engine.DistributionEngineClusterHealth;
28 import org.openecomp.sdc.be.components.distribution.engine.DmaapHealth;
29 import org.openecomp.sdc.be.components.distribution.engine.UebHealthCheckCall;
30 import org.openecomp.sdc.be.components.impl.CassandraHealthCheck;
31 import org.openecomp.sdc.be.config.BeEcompErrorManager;
32 import org.openecomp.sdc.be.config.Configuration;
33 import org.openecomp.sdc.be.config.ConfigurationManager;
34 import org.openecomp.sdc.be.dao.impl.EsHealthCheckDao;
35 import org.openecomp.sdc.be.dao.titan.TitanGenericDao;
36 import org.openecomp.sdc.be.switchover.detector.SwitchoverDetector;
37 import org.openecomp.sdc.common.api.HealthCheckInfo;
38 import org.openecomp.sdc.common.api.HealthCheckInfo.HealthCheckStatus;
39 import org.openecomp.sdc.common.http.client.api.HttpRequest;
40 import org.openecomp.sdc.common.http.client.api.HttpResponse;
41 import org.openecomp.sdc.common.http.config.HttpClientConfig;
42 import org.openecomp.sdc.common.http.config.Timeouts;
43 import org.openecomp.sdc.common.util.HealthCheckUtil;
44 import org.slf4j.Logger;
45 import org.slf4j.LoggerFactory;
46 import org.springframework.beans.factory.annotation.Autowired;
47 import org.springframework.stereotype.Component;
48
49 import javax.annotation.PostConstruct;
50 import javax.annotation.PreDestroy;
51 import javax.annotation.Resource;
52 import java.util.ArrayList;
53 import java.util.HashMap;
54 import java.util.List;
55 import java.util.Map;
56 import java.util.Map.Entry;
57 import java.util.concurrent.Executors;
58 import java.util.concurrent.ScheduledExecutorService;
59 import java.util.concurrent.ScheduledFuture;
60 import java.util.stream.Collectors;
61
62 import static java.lang.String.format;
63 import static java.util.concurrent.TimeUnit.SECONDS;
64 import static org.apache.http.HttpStatus.SC_INTERNAL_SERVER_ERROR;
65 import static org.apache.http.HttpStatus.SC_OK;
66 import static org.openecomp.sdc.common.api.Constants.*;
67 import static org.openecomp.sdc.common.api.HealthCheckInfo.HealthCheckStatus.DOWN;
68 import static org.openecomp.sdc.common.api.HealthCheckInfo.HealthCheckStatus.UP;
69 import static org.openecomp.sdc.common.impl.ExternalConfiguration.getAppVersion;
70
71
72 @Component("healthCheckBusinessLogic")
73 public class HealthCheckBusinessLogic {
74
75     protected static final String BE_HEALTH_LOG_CONTEXT = "be.healthcheck";
76     private static final String BE_HEALTH_CHECK_STR = "beHealthCheck";
77     private static final String COMPONENT_CHANGED_MESSAGE = "BE Component %s state changed from %s to %s";
78     private static final Logger healthLogger = LoggerFactory.getLogger(BE_HEALTH_LOG_CONTEXT);
79     private static final Logger log = LoggerFactory.getLogger(HealthCheckBusinessLogic.class.getName());
80     private static final HealthCheckUtil healthCheckUtil = new HealthCheckUtil();
81     ScheduledExecutorService healthCheckScheduler = Executors.newSingleThreadScheduledExecutor((Runnable r) -> new Thread(r, "BE-Health-Check-Task"));
82     HealthCheckScheduledTask healthCheckScheduledTask = null;
83     @Resource
84     private TitanGenericDao titanGenericDao;
85     @Resource
86     private EsHealthCheckDao esHealthCheckDao;
87     @Resource
88     private DistributionEngineClusterHealth distributionEngineClusterHealth;
89     @Resource
90     private DmaapHealth dmaapHealth;
91     @Resource
92     private CassandraHealthCheck cassandraHealthCheck;
93     @Autowired
94     private SwitchoverDetector switchoverDetector;
95     private volatile List<HealthCheckInfo> prevBeHealthCheckInfos = null;
96     private ScheduledFuture<?> scheduledFuture = null;
97
98     @PostConstruct
99     public void init() {
100
101         prevBeHealthCheckInfos = getBeHealthCheckInfos();
102
103         log.debug("After initializing prevBeHealthCheckInfos: {}", prevBeHealthCheckInfos);
104
105         healthCheckScheduledTask = new HealthCheckScheduledTask();
106
107         if (this.scheduledFuture == null) {
108             this.scheduledFuture = this.healthCheckScheduler.scheduleAtFixedRate(healthCheckScheduledTask, 0, 3, SECONDS);
109         }
110
111     }
112
113     public boolean isDistributionEngineUp() {
114
115         HealthCheckInfo healthCheckInfo = distributionEngineClusterHealth.getHealthCheckInfo();
116         return !healthCheckInfo.getHealthCheckStatus().equals(DOWN);
117     }
118
119     public Pair<Boolean, List<HealthCheckInfo>> getBeHealthCheckInfosStatus() {
120         Configuration config = ConfigurationManager.getConfigurationManager().getConfiguration();
121         return new ImmutablePair<>(healthCheckUtil.getAggregateStatus(prevBeHealthCheckInfos, config.getHealthStatusExclude()), prevBeHealthCheckInfos);
122     }
123
124     private List<HealthCheckInfo> getBeHealthCheckInfos() {
125
126         log.trace("In getBeHealthCheckInfos");
127
128         List<HealthCheckInfo> healthCheckInfos = new ArrayList<>();
129
130         //Dmaap
131         getDmaapHealthCheck(healthCheckInfos);
132         // BE
133         getBeHealthCheck(healthCheckInfos);
134
135         // Titan
136         getTitanHealthCheck(healthCheckInfos);
137         // ES
138         getEsHealthCheck(healthCheckInfos);
139
140         // Distribution Engine
141         getDistributionEngineCheck(healthCheckInfos);
142
143         //Cassandra
144         getCassandraHealthCheck(healthCheckInfos);
145
146         // Amdocs
147         getAmdocsHealthCheck(healthCheckInfos);
148
149         //DCAE
150         getDcaeHealthCheck(healthCheckInfos);
151
152         return healthCheckInfos;
153     }
154
155     private List<HealthCheckInfo> getEsHealthCheck(List<HealthCheckInfo> healthCheckInfos) {
156
157         // ES health check and version
158         String appVersion = getAppVersion();
159         HealthCheckStatus healthCheckStatus;
160         String description;
161
162         try {
163             healthCheckStatus = esHealthCheckDao.getClusterHealthStatus();
164         } catch (Exception e) {
165             healthCheckStatus = DOWN;
166             description = "ES cluster error: " + e.getMessage();
167             healthCheckInfos.add(new HealthCheckInfo(HC_COMPONENT_ES, healthCheckStatus, appVersion, description));
168             return healthCheckInfos;
169         }
170         if (healthCheckStatus.equals(DOWN)) {
171             description = "ES cluster is down";
172         } else {
173             description = "OK";
174         }
175         healthCheckInfos.add(new HealthCheckInfo(HC_COMPONENT_ES, healthCheckStatus, appVersion, description));
176         return healthCheckInfos;
177     }
178
179     private List<HealthCheckInfo> getBeHealthCheck(List<HealthCheckInfo> healthCheckInfos) {
180         String appVersion = getAppVersion();
181         String description = "OK";
182         healthCheckInfos.add(new HealthCheckInfo(HC_COMPONENT_BE, UP, appVersion, description));
183         return healthCheckInfos;
184     }
185
186     private List<HealthCheckInfo> getDmaapHealthCheck(List<HealthCheckInfo> healthCheckInfos) {
187         String appVersion = getAppVersion();
188         dmaapHealth.getHealthCheckInfo().setVersion(appVersion);
189         healthCheckInfos.add(dmaapHealth.getHealthCheckInfo());
190         return healthCheckInfos;
191     }
192
193
194     public List<HealthCheckInfo> getTitanHealthCheck(List<HealthCheckInfo> healthCheckInfos) {
195         // Titan health check and version
196         String description;
197         boolean isTitanUp;
198
199         try {
200             isTitanUp = titanGenericDao.isGraphOpen();
201         } catch (Exception e) {
202             description = "Titan error: " + e.getMessage();
203             healthCheckInfos.add(new HealthCheckInfo(HC_COMPONENT_TITAN, DOWN, null, description));
204             return healthCheckInfos;
205         }
206         if (isTitanUp) {
207             description = "OK";
208             healthCheckInfos.add(new HealthCheckInfo(HC_COMPONENT_TITAN, UP, null, description));
209         } else {
210             description = "Titan graph is down";
211             healthCheckInfos.add(new HealthCheckInfo(HC_COMPONENT_TITAN, DOWN, null, description));
212         }
213         return healthCheckInfos;
214     }
215
216     private List<HealthCheckInfo> getCassandraHealthCheck(List<HealthCheckInfo> healthCheckInfos) {
217
218         String description;
219         boolean isCassandraUp;
220
221         try {
222             isCassandraUp = cassandraHealthCheck.getCassandraStatus();
223         } catch (Exception e) {
224             isCassandraUp = false;
225             log.debug("Cassandra error: " + e.getMessage());
226         }
227         if (isCassandraUp) {
228             description = "OK";
229             healthCheckInfos.add(new HealthCheckInfo(HC_COMPONENT_CASSANDRA, UP, null, description));
230         } else {
231             description = "Cassandra is down";
232             healthCheckInfos.add(new HealthCheckInfo(HC_COMPONENT_CASSANDRA, DOWN, null, description));
233         }
234         return healthCheckInfos;
235
236     }
237
238     private void getDistributionEngineCheck(List<HealthCheckInfo> healthCheckInfos) {
239
240         HealthCheckInfo healthCheckInfo = distributionEngineClusterHealth.getHealthCheckInfo();
241
242         healthCheckInfos.add(healthCheckInfo);
243
244     }
245
246     private List<HealthCheckInfo> getAmdocsHealthCheck(List<HealthCheckInfo> healthCheckInfos) {
247         HealthCheckInfo beHealthCheckInfo = getHostedComponentsBeHealthCheck(HC_COMPONENT_ON_BOARDING, buildOnBoardingHealthCheckUrl());
248         healthCheckInfos.add(beHealthCheckInfo);
249         return healthCheckInfos;
250     }
251
252     private List<HealthCheckInfo> getDcaeHealthCheck(List<HealthCheckInfo> healthCheckInfos) {
253         HealthCheckInfo beHealthCheckInfo = getHostedComponentsBeHealthCheck(HC_COMPONENT_DCAE, buildDcaeHealthCheckUrl());
254         healthCheckInfos.add(beHealthCheckInfo);
255         return healthCheckInfos;
256     }
257
258     private HealthCheckInfo getHostedComponentsBeHealthCheck(String componentName, String healthCheckUrl) {
259         HealthCheckStatus healthCheckStatus;
260         String description;
261         String version = null;
262         List<HealthCheckInfo> componentsInfo = new ArrayList<>();
263         final int timeout = 3000;
264
265         if (healthCheckUrl != null) {
266             try {
267                 HttpResponse<String> httpResponse = HttpRequest.get(healthCheckUrl, new HttpClientConfig(new Timeouts(timeout, timeout)));
268                 int statusCode = httpResponse.getStatusCode();
269                 String aggDescription = "";
270
271                 if (statusCode == SC_OK || statusCode == SC_INTERNAL_SERVER_ERROR) {
272                     String response = httpResponse.getResponse();
273                     log.trace("{} Health Check response: {}", componentName, response);
274                     ObjectMapper mapper = new ObjectMapper();
275                     Map<String, Object> healthCheckMap = mapper.readValue(response, new TypeReference<Map<String, Object>>() {
276                     });
277                     version = healthCheckMap.get("sdcVersion") != null ? healthCheckMap.get("sdcVersion").toString() : null;
278                     if (healthCheckMap.containsKey("componentsInfo")) {
279                         componentsInfo = mapper.convertValue(healthCheckMap.get("componentsInfo"), new TypeReference<List<HealthCheckInfo>>() {
280                         });
281                     }
282
283                     if (!componentsInfo.isEmpty()) {
284                         aggDescription = healthCheckUtil.getAggregateDescription(componentsInfo, null);
285                     } else {
286                         componentsInfo.add(new HealthCheckInfo(HC_COMPONENT_BE, DOWN, null, null));
287                     }
288                 } else {
289                     log.trace("{} Health Check Response code: {}", componentName, statusCode);
290                 }
291
292                 if (statusCode != SC_OK) {
293                     healthCheckStatus = DOWN;
294                     description = aggDescription.length() > 0
295                             ? aggDescription
296                             : componentName + " is Down, specific reason unknown";//No inner component returned DOWN, but the status of HC is still DOWN.
297                     if (componentsInfo.isEmpty()) {
298                         componentsInfo.add(new HealthCheckInfo(HC_COMPONENT_BE, DOWN, null, description));
299                     }
300                 } else {
301                     healthCheckStatus = UP;
302                     description = "OK";
303                 }
304
305             } catch (Exception e) {
306                 log.error("{} unexpected response: ", componentName, e);
307                 healthCheckStatus = DOWN;
308                 description = componentName + " unexpected response: " + e.getMessage();
309                 if (componentsInfo != null && componentsInfo.isEmpty()) {
310                     componentsInfo.add(new HealthCheckInfo(HC_COMPONENT_BE, DOWN, null, description));
311                 }
312             }
313         } else {
314             healthCheckStatus = DOWN;
315             description = componentName + " health check Configuration is missing";
316             componentsInfo.add(new HealthCheckInfo(HC_COMPONENT_BE, DOWN, null, description));
317         }
318
319         return new HealthCheckInfo(componentName, healthCheckStatus, version, description, componentsInfo);
320     }
321
322     @PreDestroy
323     protected void destroy() {
324
325         if (scheduledFuture != null) {
326             scheduledFuture.cancel(true);
327             scheduledFuture = null;
328         }
329
330         if (healthCheckScheduler != null) {
331             healthCheckScheduler.shutdown();
332         }
333
334     }
335
336     private void logAlarm(String componentChangedMsg) {
337         BeEcompErrorManager.getInstance().logBeHealthCheckRecovery(componentChangedMsg);
338     }
339
340     public String getSiteMode() {
341         return switchoverDetector.getSiteMode();
342     }
343
344     public boolean anyStatusChanged(List<HealthCheckInfo> beHealthCheckInfos, List<HealthCheckInfo> prevBeHealthCheckInfos) {
345
346         boolean result = false;
347
348         if (beHealthCheckInfos != null && prevBeHealthCheckInfos != null) {
349
350             Map<String, HealthCheckStatus> currentValues = beHealthCheckInfos.stream().collect(Collectors.toMap(HealthCheckInfo::getHealthCheckComponent, HealthCheckInfo::getHealthCheckStatus));
351             Map<String, HealthCheckStatus> prevValues = prevBeHealthCheckInfos.stream().collect(Collectors.toMap(HealthCheckInfo::getHealthCheckComponent, HealthCheckInfo::getHealthCheckStatus));
352
353             if (currentValues != null && prevValues != null) {
354                 int currentSize = currentValues.size();
355                 int prevSize = prevValues.size();
356
357                 if (currentSize != prevSize) {
358
359                     result = true; //extra/missing component
360
361                     Map<String, HealthCheckStatus> notPresent = null;
362                     if (currentValues.keySet().containsAll(prevValues.keySet())) {
363                         notPresent = new HashMap<>(currentValues);
364                         notPresent.keySet().removeAll(prevValues.keySet());
365                     } else {
366                         notPresent = new HashMap<>(prevValues);
367                         notPresent.keySet().removeAll(currentValues.keySet());
368                     }
369
370                     for (String component : notPresent.keySet()) {
371                         logAlarm(format(COMPONENT_CHANGED_MESSAGE, component, prevValues.get(component), currentValues.get(component)));
372                     }
373
374                 } else {
375
376                     for (Entry<String, HealthCheckStatus> entry : currentValues.entrySet()) {
377                         String key = entry.getKey();
378                         HealthCheckStatus value = entry.getValue();
379
380                         if (!prevValues.containsKey(key)) {
381                             result = true; //component missing
382                             logAlarm(format(COMPONENT_CHANGED_MESSAGE, key, prevValues.get(key), currentValues.get(key)));
383                             break;
384                         }
385
386                         HealthCheckStatus prevHealthCheckStatus = prevValues.get(key);
387
388                         if (value != prevHealthCheckStatus) {
389                             result = true; //component status changed
390                             logAlarm(format(COMPONENT_CHANGED_MESSAGE, key, prevValues.get(key), currentValues.get(key)));
391                             break;
392                         }
393                     }
394                 }
395             }
396
397         } else if (beHealthCheckInfos == null && prevBeHealthCheckInfos == null) {
398             result = false;
399         } else {
400             logAlarm(format(COMPONENT_CHANGED_MESSAGE, "", prevBeHealthCheckInfos == null ? "null" : "true", prevBeHealthCheckInfos == null ? "true" : "null"));
401             result = true;
402         }
403
404         return result;
405     }
406
407     private String buildOnBoardingHealthCheckUrl() {
408
409         Configuration.OnboardingConfig onboardingConfig = ConfigurationManager.getConfigurationManager().getConfiguration().getOnboarding();
410
411         if (onboardingConfig != null) {
412             String protocol = onboardingConfig.getProtocol();
413             String host = onboardingConfig.getHost();
414             Integer port = onboardingConfig.getPort();
415             String uri = onboardingConfig.getHealthCheckUri();
416
417             return protocol + "://" + host + ":" + port + uri;
418         }
419
420         log.error("onboarding health check configuration is missing.");
421         return null;
422     }
423
424     private String buildDcaeHealthCheckUrl() {
425
426         Configuration.DcaeConfig dcaeConfig = ConfigurationManager.getConfigurationManager().getConfiguration().getDcae();
427
428         if (dcaeConfig != null) {
429             String protocol = dcaeConfig.getProtocol();
430             String host = dcaeConfig.getHost();
431             Integer port = dcaeConfig.getPort();
432             String uri = dcaeConfig.getHealthCheckUri();
433
434             return protocol + "://" + host + ":" + port + uri;
435         }
436
437         log.error("dcae health check configuration is missing.");
438         return null;
439     }
440
441     public class HealthCheckScheduledTask implements Runnable {
442
443         List<UebHealthCheckCall> healthCheckCalls = new ArrayList<>();
444
445         @Override
446         public void run() {
447             Configuration config = ConfigurationManager.getConfigurationManager().getConfiguration();
448             healthLogger.trace("Executing BE Health Check Task");
449
450             List<HealthCheckInfo> currentBeHealthCheckInfos = getBeHealthCheckInfos();
451             boolean healthStatus = healthCheckUtil.getAggregateStatus(currentBeHealthCheckInfos, config.getHealthStatusExclude());
452
453             boolean prevHealthStatus = healthCheckUtil.getAggregateStatus(prevBeHealthCheckInfos, config.getHealthStatusExclude());
454
455             boolean anyStatusChanged = anyStatusChanged(currentBeHealthCheckInfos, prevBeHealthCheckInfos);
456
457             if (prevHealthStatus != healthStatus || anyStatusChanged) {
458                 log.trace("BE Health State Changed to {}. Issuing alarm / recovery alarm...", healthStatus);
459
460                 prevBeHealthCheckInfos = currentBeHealthCheckInfos;
461                 logAlarm(healthStatus);
462             }
463
464         }
465
466         private void logAlarm(boolean prevHealthState) {
467             if (prevHealthState) {
468                 BeEcompErrorManager.getInstance().logBeHealthCheckRecovery(BE_HEALTH_CHECK_STR);
469             } else {
470                 BeEcompErrorManager.getInstance().logBeHealthCheckError(BE_HEALTH_CHECK_STR);
471             }
472         }
473     }
474
475 }