2 * ============LICENSE_START=======================================================
3 * feature-active-standby-management
4 * ================================================================================
5 * Copyright (C) 2017-2019 AT&T Intellectual Property. All rights reserved.
6 * ================================================================================
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * ============LICENSE_END=========================================================
21 package org.onap.policy.drools.activestandby;
24 * Per MultiSite_v1-10.ppt:
26 * Extends the StateChangeNotifier class and overwrites the abstract handleStateChange() method to get state changes
27 * and do the following:
29 * When the Standby Status changes (from providingservice) to hotstandby or coldstandby,
30 * the Active/Standby selection algorithm must stand down if the PDP-D is currently the lead/active node
31 * and allow another PDP-D to take over. It must also call lock on all engines in the engine management.
33 * When the Standby Status changes from (hotstandby) to coldstandby, the Active/Standby algorithm must NOT assume
34 * the active/lead role.
36 * When the Standby Status changes (from coldstandby or providingservice) to hotstandby,
37 * the Active/Standby algorithm may assume the active/lead role if the active/lead fails.
39 * When the Standby Status changes to providingservice (from hotstandby or coldstandby) call unlock on all
40 * engines in the engine management layer.
42 import java.util.Date;
43 import java.util.Timer;
44 import java.util.TimerTask;
46 import org.onap.policy.common.im.StateChangeNotifier;
47 import org.onap.policy.common.im.StateManagement;
48 import org.onap.policy.drools.system.PolicyEngine;
49 import org.onap.policy.drools.system.PolicyEngineConstants;
50 import org.slf4j.Logger;
51 import org.slf4j.LoggerFactory;
56 * Originally, there was a "StandbyStateChangeNotifier" that belonged to policy-core, and this class's
57 * handleStateChange() method used to take care of invoking conn.standDownPdp().
59 * But testing revealed that when a state change to hot standby
60 * occurred from a demote() operation, first the PMStandbyStateChangeNotifier.handleStateChange() method
61 * would be invoked and then the StandbyStateChangeNotifier.handleStateChange() method would be invoked,
62 * and this ordering was creating the following problem:
64 * When PMStandbyStateChangeNotifier.handleStateChange() was invoked it would take a long time to finish,
65 * because it would result in SingleThreadedUebTopicSource.stop() being invoked, which can potentially do a
66 * 5 second sleep for each controller being stopped.
68 * Meanwhile, as these controller stoppages and their associated sleeps were occurring, the election handler
69 * would discover the demoted PDP in hotstandby (but still designated!) and promote it, resulting in the
70 * standbyStatus going from hotstandby to providingservice. So then, by the time that
71 * PMStandbyStateChangeNotifier.handleStateChange() finished its work and
72 * StandbyStateChangeNotifier.handleStateChange() started executing, the standbyStatus was no longer hotstandby
73 * (as effected by the demote), but providingservice (as reset by the election handling logic) and
74 * conn.standDownPdp() would not get called!
76 * To fix this bug, we consolidated StandbyStateChangeNotifier and PMStandbyStateChangeNotifier,
77 * with the standDownPdp() always
78 * being invoked prior to the TopicEndpoint.manager.lock(). In this way, when the election handling logic is invoked
79 * during the controller stoppages, the PDP is in hotstandby and the standdown occurs.
82 public class PmStandbyStateChangeNotifier extends StateChangeNotifier {
83 // get an instance of logger
84 private static final Logger logger = LoggerFactory.getLogger(PmStandbyStateChangeNotifier.class);
85 private Timer delayActivateTimer;
86 private int pdpUpdateInterval;
87 private boolean isWaitingForActivation;
88 private long startTimeWaitingForActivationMs;
89 private long waitInterval;
90 private boolean isNowActivating;
91 private String previousStandbyStatus;
92 public static final String NONE = "none";
93 public static final String UNSUPPORTED = "unsupported";
94 public static final String HOTSTANDBY_OR_COLDSTANDBY = "hotstandby_or_coldstandby";
100 public PmStandbyStateChangeNotifier() {
102 Integer.parseInt(ActiveStandbyProperties.getProperty(ActiveStandbyProperties.PDP_UPDATE_INTERVAL));
103 isWaitingForActivation = false;
104 startTimeWaitingForActivationMs = new Date().getTime();
105 // delay the activate so the DesignatedWaiter can run twice - give it an extra 2 seconds
106 waitInterval = 2 * pdpUpdateInterval + 2000L;
107 isNowActivating = false;
108 previousStandbyStatus = PmStandbyStateChangeNotifier.NONE;
112 public void handleStateChange() {
114 * A note on synchronization: This method is not synchronized because the caller,
115 * stateManagememt, has synchronize all of its methods. Only one stateManagement operation
116 * can occur at a time. Thus, only one handleStateChange() call will ever be made at a time.
118 logger.debug("handleStateChange: Entering, message={}, standbyStatus={}", super.getMessage(),
119 super.getStateManagement().getStandbyStatus());
120 String standbyStatus = super.getStateManagement().getStandbyStatus();
121 String pdpId = ActiveStandbyProperties.getProperty(ActiveStandbyProperties.NODE_NAME);
123 logger.debug("handleStateChange: previousStandbyStatus = {}; standbyStatus = {}",
124 previousStandbyStatus, standbyStatus);
126 if (standbyStatus == null || standbyStatus.equals(StateManagement.NULL_VALUE)) {
127 logger.debug("handleStateChange: standbyStatus is null; standing down PDP={}", pdpId);
128 standDownPdpNull(pdpId);
130 } else if (standbyStatus.equals(StateManagement.HOT_STANDBY)
131 || standbyStatus.equals(StateManagement.COLD_STANDBY)) {
132 logger.debug("handleStateChange: standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
133 standDownPdp(pdpId, standbyStatus);
135 } else if (standbyStatus.equals(StateManagement.PROVIDING_SERVICE)) {
136 logger.debug("handleStateChange: standbyStatus= {} scheduling activation of PDP={}", standbyStatus,
138 schedulePdpActivation(pdpId, standbyStatus);
141 logger.error("handleStateChange: Unsupported standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
142 standDownPdpUnsupported(pdpId, standbyStatus);
145 logger.debug("handleStateChange: Exiting");
148 private void standDownPdpNull(String pdpId) {
149 if (previousStandbyStatus.equals(StateManagement.NULL_VALUE)) {
150 // We were just here and did this successfully
151 logger.debug("handleStateChange: "
152 + "Is returning because standbyStatus is null and was previously 'null'; PDP={}",
157 isWaitingForActivation = false;
159 logger.debug("handleStateChange: null: cancelling delayActivationTimer.");
161 // Only want to lock the endpoints, not the controllers.
162 getPolicyEngineManager().deactivate();
163 // The operation was fully successful, but you cannot assign it a real null value
164 // because later we might try to execute previousStandbyStatus.equals() and get
165 // a null pointer exception.
166 previousStandbyStatus = StateManagement.NULL_VALUE;
167 } catch (Exception e) {
168 logger.warn("handleStateChange: standbyStatus == null caught exception: ", e);
172 private void standDownPdp(String pdpId, String standbyStatus) {
173 if (previousStandbyStatus.equals(PmStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY)) {
174 // We were just here and did this successfully
175 logger.debug("handleStateChange: Is returning because standbyStatus is {}"
176 + " and was previously {}; PDP= {}", standbyStatus, previousStandbyStatus, pdpId);
180 isWaitingForActivation = false;
182 logger.debug("handleStateChange: HOT_STNDBY || COLD_STANDBY: cancelling delayActivationTimer.");
184 // Only want to lock the endpoints, not the controllers.
185 getPolicyEngineManager().deactivate();
186 // The operation was fully successful
187 previousStandbyStatus = PmStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY;
188 } catch (Exception e) {
189 logger.warn("handleStateChange: standbyStatus = {} caught exception: {}", standbyStatus, e.getMessage(),
194 private void schedulePdpActivation(String pdpId, String standbyStatus) {
195 if (previousStandbyStatus.equals(StateManagement.PROVIDING_SERVICE)) {
196 // We were just here and did this successfully
197 logger.debug("handleStateChange: Is returning because standbyStatus is {}"
198 + "and was previously {}; PDP={}", standbyStatus, previousStandbyStatus, pdpId);
203 // UnLock all the endpoints
204 logger.debug("handleStateChange: standbyStatus={}; controllers must be unlocked.", standbyStatus);
206 * Only endpoints should be unlocked. Controllers have not been locked. Because,
207 * sometimes, it is possible for more than one PDP-D to become active (race
208 * conditions) we need to delay the activation of the topic endpoint interfaces to
209 * give the election algorithm time to resolve the conflict.
211 logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation= {}",
212 isWaitingForActivation);
214 // Delay activation for 2*pdpUpdateInterval+2000 ms in case of an election handler
216 // You could have multiple election handlers thinking they can take over.
218 // First let's check that the timer has not died
221 if (!isWaitingForActivation) {
222 // Just in case there is an old timer hanging around
223 logger.debug("handleStateChange: PROVIDING_SERVICE cancelling delayActivationTimer.");
225 delayActivateTimer = makeTimer();
226 // delay the activate so the DesignatedWaiter can run twice
227 delayActivateTimer.schedule(new DelayActivateClass(), waitInterval);
228 isWaitingForActivation = true;
229 startTimeWaitingForActivationMs = new Date().getTime();
230 logger.debug("handleStateChange: PROVIDING_SERVICE scheduling delayActivationTimer in {} ms",
233 logger.debug("handleStateChange: PROVIDING_SERVICE delayActivationTimer is "
234 + "waiting for activation.");
237 } catch (Exception e) {
238 logger.warn("handleStateChange: PROVIDING_SERVICE standbyStatus == providingservice caught exception: ",
243 private void checkTimerStatus() {
244 if (isWaitingForActivation) {
245 logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation = {}",
246 isWaitingForActivation);
247 long now = new Date().getTime();
248 long waitTimeMs = now - startTimeWaitingForActivationMs;
249 if (waitTimeMs > 3 * waitInterval) {
250 logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer "
251 + "may be hung, waitTimeMs = {} and allowable waitInterval = {}"
252 + " Checking whether it is currently in activation. isNowActivating = {}",
253 waitTimeMs, waitInterval, isNowActivating);
254 // Now check that it is not currently executing an activation
255 if (!isNowActivating) {
256 logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation "
257 + "wait timer died");
258 // This will assure the timer is cancelled and rescheduled.
259 isWaitingForActivation = false;
265 private void standDownPdpUnsupported(String pdpId, String standbyStatus) {
266 if (previousStandbyStatus.equals(PmStandbyStateChangeNotifier.UNSUPPORTED)) {
267 // We were just here and did this successfully
268 logger.debug("handleStateChange: Is returning because standbyStatus is "
269 + "UNSUPPORTED and was previously {}; PDP={}", previousStandbyStatus, pdpId);
273 // Only want to lock the endpoints, not the controllers.
274 isWaitingForActivation = false;
276 logger.debug("handleStateChange: unsupported standbystatus: cancelling delayActivationTimer.");
278 getPolicyEngineManager().deactivate();
279 // We know the standbystatus is unsupported
280 previousStandbyStatus = PmStandbyStateChangeNotifier.UNSUPPORTED;
281 } catch (Exception e) {
282 logger.warn("handleStateChange: Unsupported standbyStatus = {} " + "caught exception: {} ",
283 standbyStatus, e.getMessage(), e);
287 private void cancelTimer() {
288 if (delayActivateTimer != null) {
289 delayActivateTimer.cancel();
293 private class DelayActivateClass extends TimerTask {
295 private Object delayActivateLock = new Object();
300 isNowActivating = true;
302 logger.debug("DelayActivateClass.run: entry");
303 synchronized (delayActivateLock) {
304 getPolicyEngineManager().activate();
305 // The state change fully succeeded
306 previousStandbyStatus = StateManagement.PROVIDING_SERVICE;
307 // We want to set this to false here because the activate call can take a while
308 isWaitingForActivation = false;
309 isNowActivating = false;
311 logger.debug("DelayActivateClass.run.exit");
312 } catch (Exception e) {
313 isWaitingForActivation = false;
314 isNowActivating = false;
315 logger.warn("DelayActivateClass.run: caught an unexpected exception "
316 + "calling PolicyEngineConstants.getManager().activate: ", e);
321 public String getPreviousStandbyStatus() {
322 return previousStandbyStatus;
325 // these may be overridden by junit tests
327 protected PolicyEngine getPolicyEngineManager() {
328 return PolicyEngineConstants.getManager();
331 protected Timer makeTimer() {