2 * ============LICENSE_START=======================================================
3 * feature-active-standby-management
4 * ================================================================================
5 * Copyright (C) 2017-2018 AT&T Intellectual Property. All rights reserved.
6 * ================================================================================
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * ============LICENSE_END=========================================================
21 package org.onap.policy.drools.activestandby;
24 * Per MultiSite_v1-10.ppt:
26 * Extends the StateChangeNotifier class and overwrites the abstract handleStateChange() method to get state changes
27 * and do the following:
29 * When the Standby Status changes (from providingservice) to hotstandby or coldstandby,
30 * the Active/Standby selection algorithm must stand down if the PDP-D is currently the lead/active node
31 * and allow another PDP-D to take over. It must also call lock on all engines in the engine management.
33 * When the Standby Status changes from (hotstandby) to coldstandby, the Active/Standby algorithm must NOT assume
34 * the active/lead role.
36 * When the Standby Status changes (from coldstandby or providingservice) to hotstandby,
37 * the Active/Standby algorithm may assume the active/lead role if the active/lead fails.
39 * When the Standby Status changes to providingservice (from hotstandby or coldstandby) call unlock on all
40 * engines in the engine management layer.
42 import java.util.Date;
43 import java.util.Timer;
44 import java.util.TimerTask;
46 import org.onap.policy.common.im.StateChangeNotifier;
47 import org.onap.policy.common.im.StateManagement;
48 import org.slf4j.Logger;
49 import org.slf4j.LoggerFactory;
50 import org.onap.policy.drools.system.PolicyEngine;
55 * Originally, there was a "StandbyStateChangeNotifier" that belonged to policy-core, and this class's handleStateChange() method
56 * used to take care of invoking conn.standDownPdp(). But testing revealed that when a state change to hot standby occurred
57 * from a demote() operation, first the PMStandbyStateChangeNotifier.handleStateChange() method would be invoked and then the
58 * StandbyStateChangeNotifier.handleStateChange() method would be invoked, and this ordering was creating the following problem:
60 * When PMStandbyStateChangeNotifier.handleStateChange() was invoked it would take a long time to finish, because it would result
61 * in SingleThreadedUebTopicSource.stop() being invoked, which can potentially do a 5 second sleep for each controller being stopped.
62 * Meanwhile, as these controller stoppages and their associated sleeps were occurring, the election handler would discover the
63 * demoted PDP in hotstandby (but still designated!) and promote it, resulting in the standbyStatus going from hotstandby
64 * to providingservice. So then, by the time that PMStandbyStateChangeNotifier.handleStateChange() finished its work and
65 * StandbyStateChangeNotifier.handleStateChange() started executing, the standbyStatus was no longer hotstandby (as effected by
66 * the demote), but providingservice (as reset by the election handling logic) and conn.standDownPdp() would not get called!
68 * To fix this bug, we consolidated StandbyStateChangeNotifier and PMStandbyStateChangeNotifier, with the standDownPdp() always
69 * being invoked prior to the ProxyTopicEndpointManager.getInstance().lock(). In this way, when the election handling logic is invoked
70 * during the controller stoppages, the PDP is in hotstandby and the standdown occurs.
73 public class PMStandbyStateChangeNotifier extends StateChangeNotifier {
74 // get an instance of logger
75 private static final Logger logger = LoggerFactory.getLogger(PMStandbyStateChangeNotifier.class);
76 private Timer delayActivateTimer;
77 private int pdpUpdateInterval;
78 private boolean isWaitingForActivation;
79 private long startTimeWaitingForActivationMs;
80 private long waitInterval;
81 private boolean isNowActivating;
82 private String previousStandbyStatus;
83 public static final String NONE = "none";
84 public static final String UNSUPPORTED = "unsupported";
85 public static final String HOTSTANDBY_OR_COLDSTANDBY = "hotstandby_or_coldstandby";
87 public PMStandbyStateChangeNotifier(){
88 pdpUpdateInterval = Integer.parseInt(ActiveStandbyProperties.getProperty(ActiveStandbyProperties.PDP_UPDATE_INTERVAL));
89 isWaitingForActivation = false;
90 startTimeWaitingForActivationMs = new Date().getTime();
91 //delay the activate so the DesignatedWaiter can run twice - give it an extra 2 seconds
92 waitInterval = 2*pdpUpdateInterval + 2000L;
93 isNowActivating=false;
94 previousStandbyStatus = PMStandbyStateChangeNotifier.NONE;
98 public void handleStateChange() {
100 * A note on synchronization: This method is not synchronized because the caller, stateManagememt,
101 * has synchronize all of its methods. Only one stateManagement operation can occur at a time. Thus,
102 * only one handleStateChange() call will ever be made at a time.
104 if(logger.isDebugEnabled()){
105 logger.debug("handleStateChange: Entering, message={}, standbyStatus={}",
106 super.getMessage(), super.getStateManagement().getStandbyStatus());
108 String standbyStatus = super.getStateManagement().getStandbyStatus();
109 String pdpId = ActiveStandbyProperties
110 .getProperty(ActiveStandbyProperties.NODE_NAME);
112 if(logger.isDebugEnabled()){
113 logger.debug("handleStateChange: previousStandbyStatus = {}"
114 + "; standbyStatus = {}", previousStandbyStatus, standbyStatus);
117 if (standbyStatus == null || standbyStatus.equals(StateManagement.NULL_VALUE)) {
118 if(logger.isDebugEnabled()){
119 logger.debug("handleStateChange: standbyStatus is null; standing down PDP={}", pdpId);
121 if(previousStandbyStatus.equals(StateManagement.NULL_VALUE)){
122 //We were just here and did this successfully
123 if(logger.isDebugEnabled()){
124 logger.debug("handleStateChange: Is returning because standbyStatus is null and was previously 'null'; PDP={}", pdpId);
128 isWaitingForActivation = false;
131 if(logger.isDebugEnabled()){
132 logger.debug("handleStateChange: null: cancelling delayActivationTimer.");
134 delayActivateTimer.cancel();
136 if(logger.isInfoEnabled()){
137 logger.info("handleStateChange: null no delayActivationTimer existed.", e);
139 //If you end of here, there was no active timer
141 //Only want to lock the endpoints, not the controllers.
142 PolicyEngine.manager.deactivate();
143 //The operation was fully successful, but you cannot assign it a real null value
144 //because later we might try to execute previousStandbyStatus.equals() and get
145 //a null pointer exception.
146 previousStandbyStatus = StateManagement.NULL_VALUE;
148 logger.warn("handleStateChange: standbyStatus == null caught exception: ", e);
150 } else if (standbyStatus.equals(StateManagement.HOT_STANDBY) || standbyStatus.equals(StateManagement.COLD_STANDBY)) {
151 if(logger.isDebugEnabled()){
152 logger.debug("handleStateChange: standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
154 if(previousStandbyStatus.equals(PMStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY)){
155 //We were just here and did this successfully
156 if(logger.isDebugEnabled()){
157 logger.debug("handleStateChange: Is returning because standbyStatus is {}"
158 + " and was previously {}; PDP= {}", standbyStatus, previousStandbyStatus, pdpId);
162 isWaitingForActivation = false;
165 if(logger.isDebugEnabled()){
166 logger.debug("handleStateChange: HOT_STNDBY || COLD_STANDBY: cancelling delayActivationTimer.");
168 delayActivateTimer.cancel();
170 if(logger.isDebugEnabled()){
171 logger.debug("handleStateChange: HOT_STANDBY || COLD_STANDBY no delayActivationTimer existed.", e);
173 //If you end of here, there was no active timer
175 //Only want to lock the endpoints, not the controllers.
176 PolicyEngine.manager.deactivate();
177 //The operation was fully successful
178 previousStandbyStatus = PMStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY;
180 logger.warn("handleStateChange: standbyStatus = {} caught exception: {}", standbyStatus, e.getMessage(), e);
183 } else if (standbyStatus.equals(StateManagement.PROVIDING_SERVICE)) {
184 if(logger.isDebugEnabled()){
185 logger.debug("handleStateChange: standbyStatus= {} "
186 + "scheduling activation of PDP={}",standbyStatus, pdpId);
188 if(previousStandbyStatus.equals(StateManagement.PROVIDING_SERVICE)){
189 //We were just here and did this successfully
190 if(logger.isDebugEnabled()){
191 logger.debug("handleStateChange: Is returning because standbyStatus is {}"
192 + "and was previously {}; PDP={}", standbyStatus, previousStandbyStatus, pdpId);
197 //UnLock all the endpoints
198 if(logger.isDebugEnabled()){
199 logger.debug("handleStateChange: standbyStatus={}; controllers must be unlocked.",standbyStatus );
202 * Only endpoints should be unlocked. Controllers have not been locked.
203 * Because, sometimes, it is possible for more than one PDP-D to become active (race conditions)
204 * we need to delay the activation of the topic endpoint interfaces to give the election algorithm
205 * time to resolve the conflict.
207 if(logger.isDebugEnabled()){
208 logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation= {}", isWaitingForActivation);
211 //Delay activation for 2*pdpUpdateInterval+2000 ms in case of an election handler conflict.
212 //You could have multiple election handlers thinking they can take over.
214 // First let's check that the timer has not died
215 if(isWaitingForActivation){
216 if(logger.isDebugEnabled()){
217 logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation = {}", isWaitingForActivation);
219 long now = new Date().getTime();
220 long waitTimeMs = now - startTimeWaitingForActivationMs;
221 if(waitTimeMs > 3*waitInterval){
222 if(logger.isDebugEnabled()){
223 logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer may be hung,"
224 + " waitTimeMs = {} and allowable waitInterval = {}"
225 + " Checking whether it is currently in activation. isNowActivating = {}",
226 waitTimeMs, waitInterval, isNowActivating);
228 //Now check that it is not currently executing an activation
229 if(!isNowActivating){
230 if(logger.isDebugEnabled()){
231 logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer died");
233 // This will assure the timer is cancelled and rescheduled.
234 isWaitingForActivation = false;
240 if(!isWaitingForActivation){
242 //Just in case there is an old timer hanging around
243 if(logger.isDebugEnabled()){
244 logger.debug("handleStateChange: PROVIDING_SERVICE cancelling delayActivationTimer.");
246 delayActivateTimer.cancel();
248 if(logger.isDebugEnabled()){
249 logger.debug("handleStateChange: PROVIDING_SERVICE no delayActivationTimer existed.", e);
251 //If you end of here, there was no active timer
253 delayActivateTimer = new Timer();
254 //delay the activate so the DesignatedWaiter can run twice
255 delayActivateTimer.schedule(new DelayActivateClass(), waitInterval);
256 isWaitingForActivation = true;
257 startTimeWaitingForActivationMs = new Date().getTime();
258 if(logger.isDebugEnabled()){
259 logger.debug("handleStateChange: PROVIDING_SERVICE scheduling delayActivationTimer in {} ms", waitInterval);
262 if(logger.isDebugEnabled()){
263 logger.debug("handleStateChange: PROVIDING_SERVICE delayActivationTimer is waiting for activation.");
268 logger.warn("handleStateChange: PROVIDING_SERVICE standbyStatus == providingservice caught exception: ", e);
272 logger.error("handleStateChange: Unsupported standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
273 if(previousStandbyStatus.equals(PMStandbyStateChangeNotifier.UNSUPPORTED)){
274 //We were just here and did this successfully
275 if(logger.isDebugEnabled()){
276 logger.debug("handleStateChange: Is returning because standbyStatus is "
277 + "UNSUPPORTED and was previously {}; PDP={}", previousStandbyStatus, pdpId);
281 //Only want to lock the endpoints, not the controllers.
282 isWaitingForActivation = false;
285 if(logger.isDebugEnabled()){
286 logger.debug("handleStateChange: unsupported standbystatus: cancelling delayActivationTimer.");
288 delayActivateTimer.cancel();
290 if(logger.isDebugEnabled()){
291 logger.debug("handleStateChange: unsupported standbystatus: no delayActivationTimer existed.", e);
293 //If you end of here, there was no active timer
295 PolicyEngine.manager.deactivate();
296 //We know the standbystatus is unsupported
297 previousStandbyStatus = PMStandbyStateChangeNotifier.UNSUPPORTED;
299 logger.warn("handleStateChange: Unsupported standbyStatus = {} "
300 + "caught exception: {} ",standbyStatus, e.getMessage(), e);
303 if(logger.isDebugEnabled()){
304 logger.debug("handleStateChange: Exiting");
308 private class DelayActivateClass extends TimerTask{
310 private Object delayActivateLock = new Object();
315 isNowActivating = true;
317 if(logger.isDebugEnabled()){
318 logger.debug("DelayActivateClass.run: entry");
320 synchronized(delayActivateLock){
321 PolicyEngine.manager.activate();
322 // The state change fully succeeded
323 previousStandbyStatus = StateManagement.PROVIDING_SERVICE;
324 // We want to set this to false here because the activate call can take a while
325 isWaitingForActivation = false;
326 isNowActivating = false;
328 if(logger.isDebugEnabled()){
329 logger.debug("DelayActivateClass.run.exit");
332 isWaitingForActivation = false;
333 isNowActivating = false;
334 logger.warn("DelayActivateClass.run: caught an unexpected exception "
335 + "calling PolicyEngine.manager.activate: ", e);
340 public String getPreviousStandbyStatus(){
341 return previousStandbyStatus;