[CCSDK-1985]GR Toolkit Refactor
[ccsdk/sli/plugins.git] / grToolkit / provider / src / main / java / org / onap / ccsdk / sli / plugins / grtoolkit / resolver / SixNodeHealthResolver.java
diff --git a/grToolkit/provider/src/main/java/org/onap/ccsdk/sli/plugins/grtoolkit/resolver/SixNodeHealthResolver.java b/grToolkit/provider/src/main/java/org/onap/ccsdk/sli/plugins/grtoolkit/resolver/SixNodeHealthResolver.java
new file mode 100644 (file)
index 0000000..e79262c
--- /dev/null
@@ -0,0 +1,316 @@
+/*-
+ * ============LICENSE_START=======================================================
+ * openECOMP : SDN-C
+ * ================================================================================
+ * Copyright (C) 2019 AT&T Intellectual Property. All rights
+ *                     reserved.
+ * ================================================================================
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ============LICENSE_END=========================================================
+ */
+
+package org.onap.ccsdk.sli.plugins.grtoolkit.resolver;
+
+import org.json.JSONArray;
+import org.json.JSONObject;
+
+import org.onap.ccsdk.sli.core.dblib.DbLibService;
+import org.onap.ccsdk.sli.plugins.grtoolkit.connection.ConnectionManager;
+import org.onap.ccsdk.sli.plugins.grtoolkit.connection.ConnectionResponse;
+import org.onap.ccsdk.sli.plugins.grtoolkit.data.AdminHealth;
+import org.onap.ccsdk.sli.plugins.grtoolkit.data.ClusterActor;
+import org.onap.ccsdk.sli.plugins.grtoolkit.data.ClusterHealth;
+import org.onap.ccsdk.sli.plugins.grtoolkit.data.DatabaseHealth;
+import org.onap.ccsdk.sli.plugins.grtoolkit.data.FailoverStatus;
+import org.onap.ccsdk.sli.plugins.grtoolkit.data.Health;
+import org.onap.ccsdk.sli.plugins.grtoolkit.data.SiteHealth;
+
+import org.opendaylight.yang.gen.v1.org.onap.ccsdk.sli.plugins.gr.toolkit.rev180926.FailoverInput;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Properties;
+import java.util.stream.Collectors;
+
+/**
+ * Implementation of {@code HealthResolver} for a six node controller
+ * architecture, where three nodes are located in one data center, and the
+ * other three nodes are located in another. The sites are assumed to be in an
+ * Active/Standby configuration, with the Active site nodes voting and the
+ * Standby site notes non-voting.
+ *
+ * @author Anthony Haddox
+ * @see HealthResolver
+ */
+public class SixNodeHealthResolver extends HealthResolver {
+    private final Logger log = LoggerFactory.getLogger(SixNodeHealthResolver.class);
+
+    /**
+     * Constructs the health resolver used by the {@code GrToolkitProvider} to
+     * determine the health of the application components.
+     *
+     * @param map a HashMap containing all of the nodes in the akka cluster
+     * @param properties the properties passed ino the provider
+     * @param dbLib a reference to the {@code DbLibService} of the provider
+     * @see HealthResolver
+     * @see org.onap.ccsdk.sli.plugins.grtoolkit.GrToolkitProvider
+     */
+    public SixNodeHealthResolver(Map<String, ClusterActor> map, Properties properties, DbLibService dbLib) {
+        super(map, properties, dbLib);
+        resolveSites();
+    }
+
+    /**
+     * Implementation of {@code getClusterHealth()}. Uses the
+     * {@code ShardResolver} to gather health information about the controller.
+     * If 4 of 6 members are healthy, the cluster is deemed healthy.
+     *
+     * @return an {@code ClusterHealth} object with health of the akka cluster
+     * @see org.onap.ccsdk.sli.plugins.grtoolkit.GrToolkitProvider
+     * @see HealthResolver
+     * @see ClusterHealth
+     * @see ShardResolver
+     */
+    @Override
+    public ClusterHealth getClusterHealth() {
+        log.info("getClusterHealth(): Getting cluster health...");
+        shardResolver.getControllerHealth(memberMap);
+        long healthyMembers = memberMap.values().stream().filter(member -> member.isUp() && ! member.isUnreachable()).count();
+        return (healthyMembers > 4) ? new ClusterHealth().withHealth(Health.HEALTHY) : new ClusterHealth().withHealth(Health.FAULTY);
+    }
+
+    /**
+     * Implementation of {@code getSiteHealth()}. Gathers health information on
+     * all of the contollers, then separates the nodes into voting and
+     * non-voting sites. Each site is then checked for its health and the
+     * result is returned as a List.
+     *
+     * @return a List of {@code SiteHealth} objects with health of the site
+     * @see org.onap.ccsdk.sli.plugins.grtoolkit.GrToolkitProvider
+     * @see HealthResolver
+     * @see SiteHealth
+     * @see ShardResolver
+     */
+    @Override
+    public List<SiteHealth> getSiteHealth() {
+        log.info("getSiteHealth(): Getting site health...");
+
+        // Get cluster health to populate memberMap with necessary values
+        getClusterHealth();
+        List<ClusterActor> votingActors = memberMap.values().stream().filter(ClusterActor::isVoting).collect(Collectors.toList());
+        List<ClusterActor> nonVotingActors = memberMap.values().stream().filter(member -> !member.isVoting()).collect(Collectors.toList());
+
+        SiteHealth votingSiteHealth = getSiteHealth(votingActors).withRole("ACTIVE");
+        SiteHealth nonVotingSiteHealth = getSiteHealth(nonVotingActors).withRole("STANDBY");
+        return Arrays.asList(votingSiteHealth, nonVotingSiteHealth);
+    }
+
+    /**
+     * Gathers the site identifier, admin health, and database health of a
+     * site.
+     *
+     * @return a {@code SiteHealth} object with health of the site
+     * @see org.onap.ccsdk.sli.plugins.grtoolkit.GrToolkitProvider
+     * @see ClusterActor
+     * @see SiteHealth
+     * @see ConnectionManager
+     */
+    public SiteHealth getSiteHealth(List<ClusterActor> actorList) {
+        AdminHealth adminHealth = null;
+        DatabaseHealth databaseHealth = null;
+        String siteId = null;
+        int healthyMembers = 0;
+
+        for(ClusterActor actor : actorList) {
+            if(actor.isUp() && !actor.isUnreachable()) {
+                healthyMembers++;
+            }
+            if(siteId == null) {
+                try {
+                    String content = ConnectionManager.getConnectionResponse(httpProtocol + actor.getNode() + ":" + controllerPort + "/restconf/operations/gr-toolkit:site-identifier", ConnectionManager.HttpMethod.POST, null, credentials).content;
+                    siteId = new JSONObject(content).getJSONObject(OUTPUT).getString("id");
+                } catch(IOException e) {
+                    log.error("getSiteHealth(): Error getting site identifier from {}", actor.getNode());
+                    log.error("getSiteHealth(): IOException", e);
+                }
+            }
+            if(adminHealth == null) {
+                try {
+                    boolean isAdminHealthy  = isRemoteComponentHealthy(httpProtocol + actor.getNode() + ":" + controllerPort + "/restconf/operations/gr-toolkit:admin-health");
+                    if(isAdminHealthy) {
+                        adminHealth = new AdminHealth(Health.HEALTHY, 200);
+                    }
+                } catch(IOException e) {
+                    log.error("getSiteHealth(): Error getting admin health from {}", actor.getNode());
+                    log.error("getSiteHealth(): IOException", e);
+                }
+            }
+            if(databaseHealth == null) {
+                try {
+                    boolean isDatabaseHealthy = isRemoteComponentHealthy(httpProtocol + actor.getNode() + ":" + controllerPort + "/restconf/operations/gr-toolkit:database-health");
+                    if(isDatabaseHealthy) {
+                        databaseHealth = new DatabaseHealth(Health.HEALTHY);
+                    }
+                } catch(IOException e) {
+                    log.error("getSiteHealth(): Error getting database health from {}", actor.getNode());
+                    log.error("getSiteHealth(): IOException", e);
+                }
+            }
+        }
+
+        if(siteId == null) {
+            siteId = "UNKNOWN SITE";
+        }
+        if(adminHealth == null) {
+            adminHealth = new AdminHealth(Health.FAULTY, 500);
+        }
+        if(databaseHealth == null) {
+            databaseHealth = new DatabaseHealth(Health.FAULTY);
+        }
+        SiteHealth health = new SiteHealth()
+                                    .withAdminHealth(adminHealth)
+                                    .withDatabaseHealth(databaseHealth)
+                                    .withId(siteId);
+        if(isHealthy(adminHealth.getHealth()) && isHealthy(databaseHealth.getHealth()) && healthyMembers > 1) {
+            health.setHealth(Health.HEALTHY);
+        }
+
+        return health;
+    }
+
+    /**
+     * Implementation of {@code tryFailover()}. Performs a preliminary call to
+     * {@code getClusterHealth} to populate information about the cluster. If
+     * no voting members can be found, the method terminates immediately. The
+     * nodes are separated into voting and non-voting sites, and a driving
+     * operator is selected from the non-voting nodes to perform requests
+     * against. A payload to swap voting between sites is sent to the operator
+     * to perform a controller-level failover.
+     *
+     * @return an {@code SiteHealth} object with health of the site
+     * @see org.onap.ccsdk.sli.plugins.grtoolkit.GrToolkitProvider
+     * @see HealthResolver
+     * @see FailoverStatus
+     * @see FailoverInput
+     */
+    @Override
+    public FailoverStatus tryFailover(FailoverInput input) {
+        // Get Cluster Health to populate the memberMap with the necessary values
+        log.info("tryFailover(): Performing preliminary health check...");
+        getClusterHealth();
+        FailoverStatus status = new FailoverStatus();
+        ConnectionResponse votingResponse = null;
+        List<ClusterActor> votingActors = memberMap.values().stream().filter(ClusterActor::isVoting).collect(Collectors.toList());
+        List<ClusterActor> nonVotingActors = memberMap.values().stream().filter(member -> !member.isVoting()).collect(Collectors.toList());
+
+        if(nonVotingActors.size() == 0) {
+            status.setStatusCode(500);
+            status.setMessage("No nonvoting members found. Cannot perform voting switch.");
+            return status;
+        }
+
+        ClusterActor operator;
+        try {
+            operator = nonVotingActors.stream().filter(this::isControllerHealthy).findFirst().get();
+        } catch(NoSuchElementException e) {
+            log.error("tryFailover(): Could not find any healthy members.", e);
+            status.setStatusCode(500);
+            status.setMessage("Could not find any healthy members.");
+            return status;
+        }
+
+        // Assuming two 3 node sites, 3 voting and 3 non voting
+        if(votingActors.size() < 3 || nonVotingActors.size() < 3) {
+            log.warn("tryFailover(): Sites do not contain an equal amount of voting and nonvoting members: Voting: {} | NonVoting: {}", votingActors.size(), nonVotingActors.size());
+        }
+        log.info("tryFailover(): Swapping voting...");
+        try {
+            JSONObject votingInput = new JSONObject();
+            JSONObject inputBlock = new JSONObject();
+            JSONArray votingStateArray = new JSONArray();
+            JSONObject memberVotingState;
+            for(ClusterActor actor : votingActors) {
+                memberVotingState = new JSONObject();
+                memberVotingState.put("member-name", actor.getMember());
+                memberVotingState.put("voting", false);
+                votingStateArray.put(memberVotingState);
+            }
+            for(ClusterActor actor : nonVotingActors) {
+                memberVotingState = new JSONObject();
+                memberVotingState.put("member-name", actor.getMember());
+                memberVotingState.put("voting", true);
+                votingStateArray.put(memberVotingState);
+            }
+            inputBlock.put("member-voting-state", votingStateArray);
+            votingInput.put("input", inputBlock);
+            log.debug("tryFailover(): {}", votingInput);
+            // Change voting all shards
+            votingResponse = ConnectionManager.getConnectionResponse(httpProtocol + operator.getNode() + ":" + controllerPort + "/restconf/operations/cluster-admin:change-member-voting-states-for-all-shards", ConnectionManager.HttpMethod.POST, votingInput.toString(), credentials);
+        } catch(IOException e) {
+            log.error("tryFailover(): Failure changing voting", e);
+        }
+        if(votingResponse != null) {
+            if(votingResponse.statusCode != 200) {
+                status.setStatusCode(votingResponse.statusCode);
+                status.setMessage("Failed to swap voting.");
+            } else {
+                status.setStatusCode(200);
+                status.setMessage("Failover complete.");
+            }
+        } else {
+            status.setStatusCode(500);
+            status.setMessage("Failed to swap voting.");
+        }
+
+        return status;
+    }
+
+    /**
+     * Implementation of {@code resolveSites()}. Calls
+     * {@code resolveSiteForMember()} to resolve which site a member belongs to.
+     *
+     * @see HealthResolver
+     */
+    @Override
+    public void resolveSites() {
+        log.info("Map contains {} entries", memberMap.size());
+        memberMap.forEach((key, value) -> resolveSiteForMember(value));
+    }
+
+    /**
+     * Resolves which site a member belongs to. Members 1-3 are assumed to be
+     * <i>Site 1</i> while members 4-6 are assumed to be <i>Site 2</i>.
+     *
+     * @see HealthResolver
+     */
+    private void resolveSiteForMember(ClusterActor actor) {
+        try {
+            int memberNumber = Integer.parseInt(actor.getMember().split("-")[1]);
+            if(memberNumber < 4) {
+                actor.setSite("Site 1");
+            } else {
+                actor.setSite("Site 2");
+            }
+            log.info("resolveSiteForMember(): {} belongs to {}", actor.getNode(), actor.getSite());
+        } catch (NumberFormatException e) {
+            log.error("resolveSiteForMember(): Could not parse member number for {}. Defaulting to Site 1.", actor.getNode());
+            actor.setSite("resolveSiteForMember(): Site 1");
+        }
+    }
+}
\ No newline at end of file