Copy policy-endpoints from drools-pdp to common
[policy/drools-pdp.git] / feature-active-standby-management / src / main / java / org / onap / policy / drools / activestandby / PMStandbyStateChangeNotifier.java
1 /*
2  * ============LICENSE_START=======================================================
3  * feature-active-standby-management
4  * ================================================================================
5  * Copyright (C) 2017-2018 AT&T Intellectual Property. All rights reserved.
6  * ================================================================================
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  * 
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  * 
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  * ============LICENSE_END=========================================================
19  */
20
21 package org.onap.policy.drools.activestandby; 
22  
23 /* 
24  * Per MultiSite_v1-10.ppt:
25  * 
26  * Extends the StateChangeNotifier class and overwrites the abstract handleStateChange() method to get state changes 
27  * and do the following: 
28  * 
29  * When the Standby Status changes (from providingservice) to hotstandby or coldstandby, 
30  * the Active/Standby selection algorithm must stand down if the PDP-D is currently the lead/active node 
31  * and allow another PDP-D to take over.  It must also call lock on all engines in the engine management.
32  * 
33  * When the Standby Status changes from (hotstandby) to coldstandby, the Active/Standby algorithm must NOT assume 
34  * the active/lead role.
35  *  
36  * When the Standby Status changes (from coldstandby or providingservice) to hotstandby, 
37  * the Active/Standby algorithm may assume the active/lead role if the active/lead fails.
38  * 
39  * When the Standby Status changes to providingservice (from hotstandby or coldstandby) call unlock on all 
40  * engines in the engine management layer.
41  */
42 import java.util.Date;
43 import java.util.Timer;
44 import java.util.TimerTask;
45
46 import org.onap.policy.common.im.StateChangeNotifier;
47 import org.onap.policy.common.im.StateManagement;
48 import org.slf4j.Logger;
49 import org.slf4j.LoggerFactory;
50 import org.onap.policy.drools.system.PolicyEngine;
51
52 /*
53  * Some background:
54  * 
55  * Originally, there was a "StandbyStateChangeNotifier" that belonged to policy-core, and this class's handleStateChange() method
56  * used to take care of invoking conn.standDownPdp().   But testing revealed that when a state change to hot standby occurred 
57  * from a demote() operation, first the PMStandbyStateChangeNotifier.handleStateChange() method would be invoked and then the 
58  * StandbyStateChangeNotifier.handleStateChange() method would be invoked, and this ordering was creating the following problem:
59  * 
60  * When PMStandbyStateChangeNotifier.handleStateChange() was invoked it would take a long time to finish, because it would result
61  * in SingleThreadedUebTopicSource.stop() being invoked, which can potentially do a 5 second sleep for each controller being stopped.   
62  * Meanwhile, as these controller stoppages and their associated sleeps were occurring, the election handler would discover the
63  * demoted PDP in hotstandby (but still designated!) and promote it, resulting in the standbyStatus going from hotstandby
64  * to providingservice.  So then, by the time that PMStandbyStateChangeNotifier.handleStateChange() finished its work and
65  * StandbyStateChangeNotifier.handleStateChange() started executing, the standbyStatus was no longer hotstandby (as effected by
66  * the demote), but providingservice (as reset by the election handling logic) and conn.standDownPdp() would not get called!
67  * 
68  * To fix this bug, we consolidated StandbyStateChangeNotifier and PMStandbyStateChangeNotifier, with the standDownPdp() always 
69  * being invoked prior to the ProxyTopicEndpointManager.getInstance().lock().  In this way, when the election handling logic is invoked 
70  * during the controller stoppages, the PDP is in hotstandby and the standdown occurs.
71  * 
72  */
73 public class PMStandbyStateChangeNotifier extends StateChangeNotifier {
74         // get an instance of logger 
75         private static final Logger  logger = LoggerFactory.getLogger(PMStandbyStateChangeNotifier.class);
76         private Timer delayActivateTimer;
77         private int pdpUpdateInterval;
78         private boolean isWaitingForActivation;
79         private long startTimeWaitingForActivationMs;
80         private long waitInterval;
81         private boolean isNowActivating;
82         private String previousStandbyStatus;
83         public static final String NONE = "none";
84         public static final String UNSUPPORTED = "unsupported";
85         public static final String HOTSTANDBY_OR_COLDSTANDBY = "hotstandby_or_coldstandby";
86                 
87         public PMStandbyStateChangeNotifier(){
88                 pdpUpdateInterval = Integer.parseInt(ActiveStandbyProperties.getProperty(ActiveStandbyProperties.PDP_UPDATE_INTERVAL));
89                 isWaitingForActivation = false;
90                 startTimeWaitingForActivationMs = new Date().getTime();
91                 //delay the activate so the DesignatedWaiter can run twice - give it an extra 2 seconds
92                 waitInterval = 2*pdpUpdateInterval + 2000L;
93                 isNowActivating=false;
94                 previousStandbyStatus = PMStandbyStateChangeNotifier.NONE;
95         }
96
97         @Override
98         public void handleStateChange() {
99                 /*
100                  * A note on synchronization: This method is not synchronized because the caller, stateManagememt, 
101                  * has synchronize all of its methods. Only one stateManagement operation can occur at a time. Thus,
102                  * only one handleStateChange() call will ever be made at a time.
103                  */
104                 if(logger.isDebugEnabled()){
105                         logger.debug("handleStateChange: Entering, message={}, standbyStatus={}",
106                                          super.getMessage(), super.getStateManagement().getStandbyStatus());
107                 }
108                 String standbyStatus = super.getStateManagement().getStandbyStatus();
109                 String pdpId = ActiveStandbyProperties
110                                 .getProperty(ActiveStandbyProperties.NODE_NAME);
111
112                 if(logger.isDebugEnabled()){
113                         logger.debug("handleStateChange: previousStandbyStatus = {}"
114                                 + "; standbyStatus = {}", previousStandbyStatus, standbyStatus);
115                 }
116                 
117                 if (standbyStatus == null  || standbyStatus.equals(StateManagement.NULL_VALUE)) {
118                         if(logger.isDebugEnabled()){
119                                 logger.debug("handleStateChange: standbyStatus is null; standing down PDP={}", pdpId);
120                         }
121                         if(previousStandbyStatus.equals(StateManagement.NULL_VALUE)){
122                                 //We were just here and did this successfully
123                                 if(logger.isDebugEnabled()){
124                                         logger.debug("handleStateChange: Is returning because standbyStatus is null and was previously 'null'; PDP={}", pdpId);
125                                 }
126                                 return;
127                         }
128                         isWaitingForActivation = false;
129                         try{
130                                 try{
131                                         if(logger.isDebugEnabled()){
132                                                 logger.debug("handleStateChange: null:  cancelling delayActivationTimer.");
133                                         }
134                                         delayActivateTimer.cancel();
135                                 }catch(Exception e){
136                                         if(logger.isInfoEnabled()){
137                                                 logger.info("handleStateChange: null no delayActivationTimer existed.", e);
138                                         }
139                                         //If you end of here, there was no active timer
140                                 }
141                                 //Only want to lock the endpoints, not the controllers.
142                                 PolicyEngine.manager.deactivate();
143                                 //The operation was fully successful, but you cannot assign it a real null value
144                                 //because later we might try to execute previousStandbyStatus.equals() and get
145                                 //a null pointer exception.
146                                 previousStandbyStatus = StateManagement.NULL_VALUE;
147                         }catch(Exception e){
148                                 logger.warn("handleStateChange: standbyStatus == null caught exception: ", e);
149                         }
150                 } else if (standbyStatus.equals(StateManagement.HOT_STANDBY) || standbyStatus.equals(StateManagement.COLD_STANDBY)) {
151                         if(logger.isDebugEnabled()){
152                                 logger.debug("handleStateChange: standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
153                         }
154                         if(previousStandbyStatus.equals(PMStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY)){
155                                 //We were just here and did this successfully
156                                 if(logger.isDebugEnabled()){
157                                         logger.debug("handleStateChange: Is returning because standbyStatus is {}"
158                                                         + " and was previously {}; PDP= {}", standbyStatus, previousStandbyStatus, pdpId);
159                                 }
160                                 return;
161                         }
162                         isWaitingForActivation = false;
163                         try{
164                                 try{
165                                         if(logger.isDebugEnabled()){
166                                                 logger.debug("handleStateChange: HOT_STNDBY || COLD_STANDBY:  cancelling delayActivationTimer.");
167                                         }
168                                         delayActivateTimer.cancel();
169                                 }catch(Exception e){
170                                         if(logger.isDebugEnabled()){
171                                                 logger.debug("handleStateChange: HOT_STANDBY || COLD_STANDBY no delayActivationTimer existed.", e);
172                                         }
173                                         //If you end of here, there was no active timer
174                                 }
175                                 //Only want to lock the endpoints, not the controllers.
176                                 PolicyEngine.manager.deactivate();
177                                 //The operation was fully successful
178                                 previousStandbyStatus = PMStandbyStateChangeNotifier.HOTSTANDBY_OR_COLDSTANDBY;
179                         }catch(Exception e){
180                                 logger.warn("handleStateChange: standbyStatus = {} caught exception: {}", standbyStatus, e.getMessage(), e);
181                         }
182
183                 } else if (standbyStatus.equals(StateManagement.PROVIDING_SERVICE)) {
184                         if(logger.isDebugEnabled()){
185                                 logger.debug("handleStateChange: standbyStatus= {} "
186                                                 + "scheduling activation of PDP={}",standbyStatus, pdpId);
187                         }
188                         if(previousStandbyStatus.equals(StateManagement.PROVIDING_SERVICE)){
189                                 //We were just here and did this successfully
190                                 if(logger.isDebugEnabled()){
191                                         logger.debug("handleStateChange: Is returning because standbyStatus is {}"
192                                                         + "and was previously {}; PDP={}", standbyStatus, previousStandbyStatus, pdpId);
193                                 }
194                                 return;
195                         }
196                         try{
197                                 //UnLock all the endpoints
198                                 if(logger.isDebugEnabled()){
199                                         logger.debug("handleStateChange: standbyStatus={}; controllers must be unlocked.",standbyStatus );
200                                 }
201                                 /*
202                                  * Only endpoints should be unlocked. Controllers have not been locked.
203                                  * Because, sometimes, it is possible for more than one PDP-D to become active (race conditions)
204                                  * we need to delay the activation of the topic endpoint interfaces to give the election algorithm
205                                  * time to resolve the conflict.
206                                  */
207                                 if(logger.isDebugEnabled()){
208                                         logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation= {}", isWaitingForActivation);
209                                 }
210                                 
211                                 //Delay activation for 2*pdpUpdateInterval+2000 ms in case of an election handler conflict.  
212                                 //You could have multiple election handlers thinking they can take over.
213                                 
214                                  // First let's check that the timer has not died
215                                 if(isWaitingForActivation){
216                                         if(logger.isDebugEnabled()){
217                                                 logger.debug("handleStateChange: PROVIDING_SERVICE isWaitingForActivation = {}", isWaitingForActivation);
218                                         }
219                                         long now = new Date().getTime();
220                                         long waitTimeMs = now - startTimeWaitingForActivationMs;
221                                         if(waitTimeMs > 3*waitInterval){
222                                                 if(logger.isDebugEnabled()){
223                                                         logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer may be hung,"
224                                                                 + " waitTimeMs = {} and allowable waitInterval = {}"
225                                                                 + " Checking whether it is currently in activation. isNowActivating = {}",
226                                                                 waitTimeMs, waitInterval, isNowActivating);
227                                                 }
228                                                 //Now check that it is not currently executing an activation
229                                                 if(!isNowActivating){
230                                                         if(logger.isDebugEnabled()){
231                                                                 logger.debug("handleStateChange: PROVIDING_SERVICE looks like the activation wait timer died");
232                                                         }
233                                                         // This will assure the timer is cancelled and rescheduled.
234                                                         isWaitingForActivation = false;
235                                                 }
236                                         }
237                                         
238                                 }
239                                 
240                                 if(!isWaitingForActivation){
241                                         try{
242                                                 //Just in case there is an old timer hanging around
243                                                 if(logger.isDebugEnabled()){
244                                                         logger.debug("handleStateChange: PROVIDING_SERVICE cancelling delayActivationTimer.");
245                                                 }
246                                                 delayActivateTimer.cancel();
247                                         }catch(Exception e){
248                                                 if(logger.isDebugEnabled()){
249                                                         logger.debug("handleStateChange: PROVIDING_SERVICE no delayActivationTimer existed.", e);
250                                                 }
251                                                 //If you end of here, there was no active timer
252                                         }
253                                         delayActivateTimer = new Timer();
254                                         //delay the activate so the DesignatedWaiter can run twice
255                                         delayActivateTimer.schedule(new DelayActivateClass(), waitInterval);
256                                         isWaitingForActivation = true;
257                                         startTimeWaitingForActivationMs = new Date().getTime();
258                                         if(logger.isDebugEnabled()){
259                                                 logger.debug("handleStateChange: PROVIDING_SERVICE scheduling delayActivationTimer in {} ms", waitInterval);
260                                         }
261                                 }else{
262                                         if(logger.isDebugEnabled()){
263                                                 logger.debug("handleStateChange: PROVIDING_SERVICE delayActivationTimer is waiting for activation.");
264                                         }
265                                 }
266                                 
267                         }catch(Exception e){
268                                 logger.warn("handleStateChange: PROVIDING_SERVICE standbyStatus == providingservice caught exception: ", e);
269                         }
270
271                 } else {
272                         logger.error("handleStateChange: Unsupported standbyStatus={}; standing down PDP={}", standbyStatus, pdpId);
273                         if(previousStandbyStatus.equals(PMStandbyStateChangeNotifier.UNSUPPORTED)){
274                                 //We were just here and did this successfully
275                                 if(logger.isDebugEnabled()){
276                                         logger.debug("handleStateChange: Is returning because standbyStatus is "
277                                                 + "UNSUPPORTED and was previously {}; PDP={}", previousStandbyStatus, pdpId);
278                                 }
279                                 return;
280                         }
281                         //Only want to lock the endpoints, not the controllers.
282                         isWaitingForActivation = false;
283                         try{
284                                 try{
285                                         if(logger.isDebugEnabled()){
286                                                 logger.debug("handleStateChange: unsupported standbystatus:  cancelling delayActivationTimer.");
287                                         }
288                                         delayActivateTimer.cancel();
289                                 }catch(Exception e){
290                                         if(logger.isDebugEnabled()){
291                                                 logger.debug("handleStateChange: unsupported standbystatus: no delayActivationTimer existed.", e);
292                                         }
293                                         //If you end of here, there was no active timer
294                                 }
295                                 PolicyEngine.manager.deactivate();
296                                 //We know the standbystatus is unsupported
297                                 previousStandbyStatus = PMStandbyStateChangeNotifier.UNSUPPORTED;
298                         }catch(Exception e){
299                                 logger.warn("handleStateChange: Unsupported standbyStatus = {} "
300                                                 + "caught exception: {} ",standbyStatus, e.getMessage(), e);
301                         }
302                 }
303                 if(logger.isDebugEnabled()){
304                         logger.debug("handleStateChange: Exiting");
305                 }
306         }
307
308         private class DelayActivateClass extends TimerTask{
309
310                 private Object delayActivateLock = new Object();
311
312
313                 @Override
314                 public void run() {
315                         isNowActivating = true;
316                         try{
317                                 if(logger.isDebugEnabled()){
318                                         logger.debug("DelayActivateClass.run: entry");
319                                 }
320                                 synchronized(delayActivateLock){
321                                         PolicyEngine.manager.activate();
322                                         // The state change fully succeeded
323                                         previousStandbyStatus = StateManagement.PROVIDING_SERVICE;
324                                         // We want to set this to false here because the activate call can take a while
325                                         isWaitingForActivation = false;
326                                         isNowActivating = false;
327                                 }
328                                 if(logger.isDebugEnabled()){
329                                         logger.debug("DelayActivateClass.run.exit");
330                                 }
331                         }catch(Exception e){
332                                 isWaitingForActivation = false;
333                                 isNowActivating = false;
334                                 logger.warn("DelayActivateClass.run: caught an unexpected exception "
335                                                 + "calling PolicyEngine.manager.activate: ", e);
336                         }
337                 }
338         }
339         
340         public String getPreviousStandbyStatus(){
341                 return previousStandbyStatus;
342         }
343 }