2 * ============LICENSE_START=======================================================
4 * ================================================================================
5 * Copyright © 2017-2018 AT&T Intellectual Property. All rights reserved.
6 * ================================================================================
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * ============LICENSE_END=========================================================
20 package org.onap.aai.dbgen;
22 import java.io.BufferedReader;
23 import java.io.BufferedWriter;
25 import java.io.FileReader;
26 import java.io.FileWriter;
27 import java.io.IOException;
28 import java.util.ArrayList;
29 import java.util.Collection;
30 import java.util.HashMap;
31 import java.util.Iterator;
32 import java.util.LinkedHashSet;
33 import java.util.List;
35 import java.util.Map.Entry;
36 import java.util.Properties;
38 import java.util.UUID;
40 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
41 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
42 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
43 import org.apache.tinkerpop.gremlin.structure.Direction;
44 import org.apache.tinkerpop.gremlin.structure.Edge;
45 import org.apache.tinkerpop.gremlin.structure.Graph;
46 import org.apache.tinkerpop.gremlin.structure.Property;
47 import org.apache.tinkerpop.gremlin.structure.Vertex;
48 import org.apache.tinkerpop.gremlin.structure.VertexProperty;
49 import org.onap.aai.db.props.AAIProperties;
50 import org.onap.aai.dbmap.AAIGraph;
51 import org.onap.aai.dbmap.AAIGraphConfig;
52 import org.onap.aai.exceptions.AAIException;
53 import org.onap.aai.introspection.Introspector;
54 import org.onap.aai.introspection.Loader;
55 import org.onap.aai.introspection.LoaderFactory;
56 import org.onap.aai.introspection.ModelType;
57 import org.onap.aai.introspection.exceptions.AAIUnknownObjectException;
58 import org.onap.aai.logging.ErrorLogHelper;
59 import org.onap.aai.logging.LogFormatTools;
60 import org.onap.aai.logging.LoggingContext;
61 import org.onap.aai.serialization.db.AAIDirection;
62 import org.onap.aai.serialization.db.EdgeProperty;
63 import org.onap.aai.util.*;
64 import org.onap.aai.logging.LoggingContext.StatusCode;
66 import com.att.eelf.configuration.Configuration;
67 import com.att.eelf.configuration.EELFLogger;
68 import com.att.eelf.configuration.EELFManager;
69 import org.janusgraph.core.JanusGraphFactory;
70 import org.janusgraph.core.JanusGraph;
73 public class DataGrooming {
75 private static EELFLogger logger;
76 private static final String FROMAPPID = "AAI-DB";
77 private static final String TRANSID = UUID.randomUUID().toString();
78 private static int dupeGrpsDeleted = 0;
83 * @param args the arguments
85 public static void main(String[] args) {
87 // Set the logging file properties to be used by EELFManager
88 System.setProperty("aai.service.name", DataGrooming.class.getSimpleName());
89 Properties props = System.getProperties();
90 props.setProperty(Configuration.PROPERTY_LOGGING_FILE_NAME, AAIConstants.AAI_DATA_GROOMING_LOGBACK_PROPS);
91 props.setProperty(Configuration.PROPERTY_LOGGING_FILE_PATH, AAIConstants.AAI_HOME_ETC_APP_PROPERTIES);
92 logger = EELFManager.getInstance().getLogger(DataGrooming.class);
93 String ver = "version"; // Placeholder
94 Boolean doAutoFix = false;
95 Boolean edgesOnlyFlag = false;
96 Boolean dontFixOrphansFlag = false;
97 Boolean skipHostCheck = false;
98 Boolean singleCommits = false;
99 Boolean dupeCheckOff = false;
100 Boolean dupeFixOn = false;
101 Boolean ghost2CheckOff = false;
102 Boolean ghost2FixOn = false;
103 Boolean neverUseCache = false;
104 Boolean skipEdgeCheckFlag = false;
106 LoggingContext.init();
107 LoggingContext.partnerName(FROMAPPID);
108 LoggingContext.serviceName(AAIConstants.AAI_RESOURCES_MS);
109 LoggingContext.component("dataGrooming");
110 LoggingContext.targetEntity(AAIConstants.AAI_RESOURCES_MS);
111 LoggingContext.targetServiceName("main");
112 LoggingContext.requestId(TRANSID);
113 LoggingContext.statusCode(StatusCode.COMPLETE);
114 LoggingContext.responseCode(LoggingContext.SUCCESS);
116 int timeWindowMinutes = 0; // A value of 0 means that we will not have a time-window -- we will look
117 // at all nodes of the passed-in nodeType.
120 int maxRecordsToFix = AAIConstants.AAI_GROOMING_DEFAULT_MAX_FIX;
121 int sleepMinutes = AAIConstants.AAI_GROOMING_DEFAULT_SLEEP_MINUTES;
123 String maxFixStr = AAIConfig.get("aai.grooming.default.max.fix");
124 if( maxFixStr != null && !maxFixStr.equals("") ){
125 maxRecordsToFix = Integer.parseInt(maxFixStr);
127 String sleepStr = AAIConfig.get("aai.grooming.default.sleep.minutes");
128 if( sleepStr != null && !sleepStr.equals("") ){
129 sleepMinutes = Integer.parseInt(sleepStr);
132 catch ( Exception e ){
133 // Don't worry, we'll just use the defaults that we got from AAIConstants
134 logger.warn("WARNING - could not pick up aai.grooming values from aaiconfig.properties file. ");
137 String prevFileName = "";
139 FormatDate fd = new FormatDate("yyyyMMddHHmm", "GMT");
140 String dteStr = fd.getDateTime();
142 if (args.length > 0) {
143 // They passed some arguments in that will affect processing
144 for (int i = 0; i < args.length; i++) {
145 String thisArg = args[i];
146 if (thisArg.equals("-edgesOnly")) {
147 edgesOnlyFlag = true;
148 } else if (thisArg.equals("-autoFix")) {
150 } else if (thisArg.equals("-skipHostCheck")) {
151 skipHostCheck = true;
152 } else if (thisArg.equals("-dontFixOrphans")) {
153 dontFixOrphansFlag = true;
154 } else if (thisArg.equals("-singleCommits")) {
155 singleCommits = true;
156 } else if (thisArg.equals("-dupeCheckOff")) {
158 } else if (thisArg.equals("-dupeFixOn")) {
160 } else if (thisArg.equals("-ghost2CheckOff")) {
161 ghost2CheckOff = true;
162 } else if (thisArg.equals("-neverUseCache")) {
163 neverUseCache = true;
164 } else if (thisArg.equals("-ghost2FixOn")) {
166 } else if (thisArg.equals("-skipEdgeChecks")) {
167 skipEdgeCheckFlag = true;
168 } else if (thisArg.equals("-maxFix")) {
170 if (i >= args.length) {
171 LoggingContext.statusCode(StatusCode.ERROR);
172 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
173 logger.error(" No value passed with -maxFix option. ");
174 AAISystemExitUtil.systemExitCloseAAIGraph(0);
176 String nextArg = args[i];
178 maxRecordsToFix = Integer.parseInt(nextArg);
179 } catch (Exception e) {
180 LoggingContext.statusCode(StatusCode.ERROR);
181 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
182 logger.error("Bad value passed with -maxFix option: ["
184 AAISystemExitUtil.systemExitCloseAAIGraph(0);
186 } else if (thisArg.equals("-sleepMinutes")) {
188 if (i >= args.length) {
189 LoggingContext.statusCode(StatusCode.ERROR);
190 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
191 logger.error("No value passed with -sleepMinutes option.");
192 AAISystemExitUtil.systemExitCloseAAIGraph(0);
194 String nextArg = args[i];
196 sleepMinutes = Integer.parseInt(nextArg);
197 } catch (Exception e) {
198 LoggingContext.statusCode(StatusCode.ERROR);
199 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
200 logger.error("Bad value passed with -sleepMinutes option: ["
202 AAISystemExitUtil.systemExitCloseAAIGraph(0);
204 } else if (thisArg.equals("-timeWindowMinutes")) {
206 if (i >= args.length) {
207 LoggingContext.statusCode(StatusCode.ERROR);
208 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
209 logger.error("No value passed with -timeWindowMinutes option.");
210 AAISystemExitUtil.systemExitCloseAAIGraph(0);
212 String nextArg = args[i];
214 timeWindowMinutes = Integer.parseInt(nextArg);
215 } catch (Exception e) {
216 LoggingContext.statusCode(StatusCode.ERROR);
217 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
218 logger.error("Bad value passed with -timeWindowMinutes option: ["
220 AAISystemExitUtil.systemExitCloseAAIGraph(0);
223 } else if (thisArg.equals("-f")) {
225 if (i >= args.length) {
226 LoggingContext.statusCode(StatusCode.ERROR);
227 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
228 logger.error(" No value passed with -f option. ");
229 AAISystemExitUtil.systemExitCloseAAIGraph(0);
231 prevFileName = args[i];
233 LoggingContext.statusCode(StatusCode.ERROR);
234 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
235 logger.error(" Unrecognized argument passed to DataGrooming: ["
237 logger.error(" Valid values are: -f -autoFix -maxFix -edgesOnly -skipEdgeChecks -dupeFixOn -donFixOrphans -timeWindowMinutes -sleepMinutes -neverUseCache");
238 AAISystemExitUtil.systemExitCloseAAIGraph(0);
243 String windowTag = "FULL";
244 if( timeWindowMinutes > 0 ){
245 windowTag = "PARTIAL";
247 String groomOutFileName = "dataGrooming." + windowTag + "." + dteStr + ".out";
250 LoaderFactory.createLoaderForVersion(ModelType.MOXY, AAIProperties.LATEST);
253 catch (Exception ex){
254 LoggingContext.statusCode(StatusCode.ERROR);
255 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
256 logger.error("ERROR - Could not create loader " + LogFormatTools.getStackTop(ex));
257 AAISystemExitUtil.systemExitCloseAAIGraph(1);
261 logger.info(" We will skip the HostCheck as requested. ");
265 if (!prevFileName.isEmpty()) {
266 // They are trying to fix some data based on a data in a
268 logger.info(" Call doTheGrooming() with a previous fileName ["
269 + prevFileName + "] for cleanup. ");
270 Boolean finalShutdownFlag = true;
271 Boolean cacheDbOkFlag = false;
272 doTheGrooming(prevFileName, edgesOnlyFlag, dontFixOrphansFlag,
273 maxRecordsToFix, groomOutFileName, ver, singleCommits,
274 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
275 finalShutdownFlag, cacheDbOkFlag,
276 skipEdgeCheckFlag, timeWindowMinutes);
277 } else if (doAutoFix) {
278 // They want us to run the processing twice -- first to look for
279 // delete candidates, then after
280 // napping for a while, run it again and delete any candidates
281 // that were found by the first run.
282 // Note: we will produce a separate output file for each of the
284 logger.info(" Doing an auto-fix call to Grooming. ");
285 logger.info(" First, Call doTheGrooming() to look at what's out there. ");
286 Boolean finalShutdownFlag = false;
287 Boolean cacheDbOkFlag = true;
288 int fixCandCount = doTheGrooming("", edgesOnlyFlag,
289 dontFixOrphansFlag, maxRecordsToFix, groomOutFileName,
290 ver, singleCommits, dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
291 finalShutdownFlag, cacheDbOkFlag,
292 skipEdgeCheckFlag, timeWindowMinutes);
293 if (fixCandCount == 0) {
294 logger.info(" No fix-Candidates were found by the first pass, so no second/fix-pass is needed. ");
296 // We'll sleep a little and then run a fix-pass based on the
297 // first-run's output file.
299 logger.info("About to sleep for " + sleepMinutes
301 int sleepMsec = sleepMinutes * 60 * 1000;
302 Thread.sleep(sleepMsec);
303 } catch (InterruptedException ie) {
304 logger.info("\n >>> Sleep Thread has been Interrupted <<< ");
305 AAISystemExitUtil.systemExitCloseAAIGraph(0);
308 dteStr = fd.getDateTime();
309 String secondGroomOutFileName = "dataGrooming." + dteStr
311 logger.info(" Now, call doTheGrooming() a second time and pass in the name of the file "
312 + "generated by the first pass for fixing: ["
313 + groomOutFileName + "]");
314 finalShutdownFlag = true;
315 cacheDbOkFlag = false;
316 doTheGrooming(groomOutFileName, edgesOnlyFlag,
317 dontFixOrphansFlag, maxRecordsToFix,
318 secondGroomOutFileName, ver, singleCommits,
319 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
320 finalShutdownFlag, cacheDbOkFlag,
321 skipEdgeCheckFlag, timeWindowMinutes);
324 // Do the grooming - plain vanilla (no fix-it-file, no
326 Boolean finalShutdownFlag = true;
327 logger.info(" Call doTheGrooming() ");
328 Boolean cacheDbOkFlag = true;
330 // They have forbidden us from using a cached db connection.
331 cacheDbOkFlag = false;
333 doTheGrooming("", edgesOnlyFlag, dontFixOrphansFlag,
334 maxRecordsToFix, groomOutFileName, ver, singleCommits,
335 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
336 finalShutdownFlag, cacheDbOkFlag,
337 skipEdgeCheckFlag, timeWindowMinutes);
339 } catch (Exception ex) {
340 LoggingContext.statusCode(StatusCode.ERROR);
341 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
342 logger.error("Exception while grooming data " + LogFormatTools.getStackTop(ex));
345 logger.info(" Done! ");
346 AAISystemExitUtil.systemExitCloseAAIGraph(0);
353 * @param fileNameForFixing the file name for fixing
354 * @param edgesOnlyFlag the edges only flag
355 * @param dontFixOrphansFlag the dont fix orphans flag
356 * @param maxRecordsToFix the max records to fix
357 * @param groomOutFileName the groom out file name
358 * @param version the version
359 * @param singleCommits the single commits
360 * @param dupeCheckOff the dupe check off
361 * @param dupeFixOn the dupe fix on
362 * @param ghost2CheckOff the ghost 2 check off
363 * @param ghost2FixOn the ghost 2 fix on
364 * @param finalShutdownFlag the final shutdown flag
365 * @param cacheDbOkFlag the cacheDbOk flag
368 private static int doTheGrooming(String fileNameForFixing,
369 Boolean edgesOnlyFlag, Boolean dontFixOrphansFlag,
370 int maxRecordsToFix, String groomOutFileName, String version,
371 Boolean singleCommits,
372 Boolean dupeCheckOff, Boolean dupeFixOn,
373 Boolean ghost2CheckOff, Boolean ghost2FixOn,
374 Boolean finalShutdownFlag, Boolean cacheDbOkFlag,
375 Boolean skipEdgeCheckFlag, int timeWindowMinutes) {
377 logger.debug(" Entering doTheGrooming \n");
379 int cleanupCandidateCount = 0;
380 long windowStartTime = 0; // Translation of the window into a starting timestamp
381 BufferedWriter bw = null;
382 JanusGraph graph = null;
383 JanusGraph graph2 = null;
385 boolean executeFinalCommit = false;
386 Set<String> deleteCandidateList = new LinkedHashSet<>();
387 Set<String> processedVertices = new LinkedHashSet<>();
391 if( timeWindowMinutes > 0 ){
392 // Translate the window value (ie. 30 minutes) into a unix timestamp like
393 // we use in the db - so we can select data created after that time.
394 windowStartTime = figureWindowStartTime( timeWindowMinutes );
398 String targetDir = AAIConstants.AAI_HOME + AAIConstants.AAI_FILESEP
399 + "logs" + AAIConstants.AAI_FILESEP + "data"
400 + AAIConstants.AAI_FILESEP + "dataGrooming";
402 // Make sure the target directory exists
403 new File(targetDir).mkdirs();
405 if (!fileNameForFixing.isEmpty()) {
406 deleteCandidateList = getDeleteList(targetDir,
407 fileNameForFixing, edgesOnlyFlag, dontFixOrphansFlag,
411 if (deleteCandidateList.size() > maxRecordsToFix) {
412 LoggingContext.statusCode(StatusCode.ERROR);
413 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
414 logger.warn(" >> WARNING >> Delete candidate list size ("
415 + deleteCandidateList.size()
416 + ") is too big. The maxFix we are using is: "
418 + ". No candidates will be deleted. ");
419 // Clear out the list so it won't be processed below.
420 deleteCandidateList = new LinkedHashSet<>();
423 String fullOutputFileName = targetDir + AAIConstants.AAI_FILESEP
425 File groomOutFile = new File(fullOutputFileName);
427 groomOutFile.createNewFile();
428 } catch (IOException e) {
429 String emsg = " Problem creating output file ["
430 + fullOutputFileName + "], exception=" + e.getMessage();
431 throw new AAIException("AAI_6124", emsg);
434 logger.info(" Will write to " + fullOutputFileName );
435 bw = new BufferedWriter(new FileWriter(groomOutFile.getAbsoluteFile()));
436 ErrorLogHelper.loadProperties();
438 logger.info(" ---- NOTE --- about to open graph (takes a little while)--------\n");
441 // Since we're just reading (not deleting/fixing anything), we can use
442 // a cached connection to the DB
443 graph = JanusGraphFactory.open(new AAIGraphConfig.Builder(AAIConstants.CACHED_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("cached").buildConfiguration());
446 graph = JanusGraphFactory.open(new AAIGraphConfig.Builder(AAIConstants.REALTIME_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("realtime1").buildConfiguration());
449 String emsg = "null graph object in DataGrooming\n";
450 throw new AAIException("AAI_6101", emsg);
453 logger.debug(" Got the graph object. ");
455 g = graph.newTransaction();
457 String emsg = "null graphTransaction object in DataGrooming\n";
458 throw new AAIException("AAI_6101", emsg);
460 GraphTraversalSource source1 = g.traversal();
462 ArrayList<String> errArr = new ArrayList<>();
463 int totalNodeCount = 0;
464 HashMap<String, String> misMatchedHash = new HashMap<String, String>();
465 HashMap<String, Vertex> orphanNodeHash = new HashMap<String, Vertex>();
466 HashMap<String, Vertex> missingDepNodeHash = new HashMap<String, Vertex>();
467 HashMap<String, Edge> oneArmedEdgeHash = new HashMap<String, Edge>();
468 HashMap<String, String> emptyVertexHash = new HashMap<String, String>();
469 HashMap<String, Vertex> ghostNodeHash = new HashMap<String, Vertex>();
470 ArrayList<String> dupeGroups = new ArrayList<>();
472 Loader loader = LoaderFactory.createLoaderForVersion(ModelType.MOXY, AAIProperties.LATEST);
474 Set<Entry<String, Introspector>> entrySet = loader.getAllObjects().entrySet();
477 logger.info(" Starting DataGrooming Processing ");
480 logger.info(" NOTE >> Skipping Node processing as requested. Will only process Edges. << ");
483 for (Entry<String, Introspector> entry : entrySet) {
484 String nType = entry.getKey();
486 int thisNtDeleteCount = 0;
488 logger.debug(" > Look at : [" + nType + "] ...");
489 ntList = ntList + "," + nType;
491 // Get a collection of the names of the key properties for this nodeType to use later
492 // Determine what the key fields are for this nodeType - use an arrayList so they
493 // can be gotten out in a consistent order.
494 Set <String> keyPropsSet = entry.getValue().getKeys();
495 ArrayList <String> keyProps = new ArrayList <String> ();
496 keyProps.addAll(keyPropsSet);
498 // Get the types of nodes that this nodetype depends on for uniqueness (if any)
499 Collection <String> depNodeTypes = loader.introspectorFromName(nType).getDependentOn();
501 // Loop through all the nodes of this Node type
502 int lastShownForNt = 0;
503 ArrayList <Vertex> tmpList = new ArrayList <> ();
504 Iterator <Vertex> iterv = source1.V().has("aai-node-type",nType);
505 while (iterv.hasNext()) {
506 // We put the nodes into an ArrayList because the graph.query iterator can time out
507 tmpList.add(iterv.next());
510 Iterator <Vertex> iter = tmpList.iterator();
511 while (iter.hasNext()) {
514 if( thisNtCount == lastShownForNt + 250 ){
515 lastShownForNt = thisNtCount;
516 logger.debug("count for " + nType + " so far = " + thisNtCount );
518 Vertex thisVtx = iter.next();
519 if( windowStartTime > 0 ){
520 // They are using the time-window, so we only want nodes that are updated after a
521 // passed-in timestamp OR that have no last-modified-timestamp which means they are suspicious.
522 Object objModTimeStamp = thisVtx.property("aai-last-mod-ts").orElse(null);
523 if( objModTimeStamp != null ){
524 long thisNodeModTime = (long)objModTimeStamp;
525 if( thisNodeModTime < windowStartTime ){
526 // It has a last modified ts and is NOT in our window, so we can pass over it
532 String thisVid = thisVtx.id().toString();
533 if (processedVertices.contains(thisVid)) {
534 logger.debug("skipping already processed vertex: " + thisVid);
538 List <Vertex> secondGetList = new ArrayList <> ();
539 // -----------------------------------------------------------------------
540 // For each vertex of this nodeType, we want to:
541 // a) make sure that it can be retrieved using it's AAI defined key
542 // b) make sure that it is not a duplicate
543 // -----------------------------------------------------------------------
545 // For this instance of this nodeType, get the key properties
546 HashMap<String, Object> propHashWithKeys = new HashMap<>();
547 Iterator<String> keyPropI = keyProps.iterator();
548 while (keyPropI.hasNext()) {
549 String propName = keyPropI.next();
551 //delete an already deleted vertex
552 Object obj = thisVtx.<Object>property(propName).orElse(null);
554 propVal = obj.toString();
556 propHashWithKeys.put(propName, propVal);
559 // If this node is dependent on another for uniqueness, then do the query from that parent node
560 // Note - all of our nodes that are dependent on others for uniqueness are
561 // "children" of that node.
562 boolean depNodeOk = true;
563 if( depNodeTypes.isEmpty() ){
564 // This kind of node is not dependent on any other.
565 // Make sure we can get it back using it's key properties (that is the
566 // phantom checking) and that we only get one. Note - we also need
567 // to collect data for a second type of dupe-checking which is done later.
568 secondGetList = getNodeJustUsingKeyParams( TRANSID, FROMAPPID, source1, nType,
569 propHashWithKeys, version );
572 // This kind of node is dependent on another for uniqueness.
573 // Start at it's parent (the dependent vertex) and make sure we can get it
574 // back using it's key properties and that we only get one.
575 Iterator <Vertex> vertI2 = source1.V(thisVtx).union(__.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).outV(), __.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).inV());
576 Vertex parentVtx = null;
578 while( vertI2 != null && vertI2.hasNext() ){
579 parentVtx = vertI2.next();
585 //List<Vertex> vertI2 = g.traversal().V(thisVtx).union(__.outE().has("isParent-REV",true).outV(),__.inE().has("isParent",true).inV()).toList();
586 //if( vertI2.isEmpty()){
588 // It's Missing it's dependent/parent node
590 boolean zeroEdges = false;
592 Iterator<Edge> tmpEdgeIter = thisVtx.edges(Direction.BOTH);
594 while( tmpEdgeIter.hasNext() ){
598 if( edgeCount == 0 ){
601 } catch (Exception ex) {
602 LoggingContext.statusCode(StatusCode.ERROR);
603 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
604 logger.warn("WARNING from inside the for-each-vid-loop orphan-edges-check " + LogFormatTools.getStackTop(ex) );
607 if (deleteCandidateList.contains(thisVid)) {
608 boolean okFlag = true;
610 processedVertices.add(thisVtx.id().toString());
614 } catch (Exception e) {
616 LoggingContext.statusCode(StatusCode.ERROR);
617 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
618 logger.error("ERROR trying to delete missing-dep-node VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
621 logger.info(" DELETED missing-dep-node VID = " + thisVid);
624 // We count nodes missing their depNodes two ways - the first if it has
625 // at least some edges, and the second if it has zero edges. Either
626 // way, they are effectively orphaned.
627 // NOTE - Only nodes that have dependent nodes are ever considered "orphaned".
629 missingDepNodeHash.put(thisVid, thisVtx);
632 orphanNodeHash.put(thisVid, thisVtx);
636 else if ( pCount > 1 ){
637 // Not sure how this could happen? Should we do something here?
641 // We found the parent - so use it to do the second-look.
642 // NOTE --- We're just going to do the same check from the other direction - because
643 // there could be duplicates or the pointer going the other way could be broken
644 ArrayList <Vertex> tmpListSec = new ArrayList <> ();
646 tmpListSec = getConnectedChildrenOfOneType( source1, parentVtx, nType ) ;
647 Iterator<Vertex> vIter = tmpListSec.iterator();
648 while (vIter.hasNext()) {
649 Vertex tmpV = vIter.next();
650 if( vertexHasTheseKeys(tmpV, propHashWithKeys) ){
651 secondGetList.add(tmpV);
655 }// end of -- else this is a dependent node -- piece
657 if( depNodeOk && (secondGetList == null || secondGetList.size() == 0) ){
658 // We could not get the node back using it's own key info.
659 // So, it's a PHANTOM
660 if (deleteCandidateList.contains(thisVid)) {
661 boolean okFlag = true;
666 } catch (Exception e) {
668 LoggingContext.statusCode(StatusCode.ERROR);
669 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
670 logger.error("ERROR trying to delete phantom VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
673 logger.info(" DELETED VID = " + thisVid);
676 ghostNodeHash.put(thisVid, thisVtx);
679 else if( (secondGetList.size() > 1) && depNodeOk && !dupeCheckOff ){
680 // Found some DUPLICATES - need to process them
681 logger.info(" - now check Dupes for this guy - ");
682 List<String> tmpDupeGroups = checkAndProcessDupes(
683 TRANSID, FROMAPPID, g, source1, version,
684 nType, secondGetList, dupeFixOn,
685 deleteCandidateList, singleCommits, dupeGroups, loader);
686 Iterator<String> dIter = tmpDupeGroups.iterator();
687 while (dIter.hasNext()) {
688 // Add in any newly found dupes to our running list
689 String tmpGrp = dIter.next();
690 logger.info("Found set of dupes: [" + tmpGrp + "]");
691 dupeGroups.add(tmpGrp);
695 catch (AAIException e1) {
696 LoggingContext.statusCode(StatusCode.ERROR);
697 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
698 logger.warn(" For nodeType = " + nType + " Caught exception", e1);
699 errArr.add(e1.getErrorObject().toString());
701 catch (Exception e2) {
702 LoggingContext.statusCode(StatusCode.ERROR);
703 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
704 logger.warn(" For nodeType = " + nType
705 + " Caught exception", e2);
706 errArr.add(e2.getMessage());
708 }// try block to enclose looping over each single vertex
709 catch (Exception exx) {
710 LoggingContext.statusCode(StatusCode.ERROR);
711 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
712 logger.warn("WARNING from inside the while-verts-loop ", exx);
715 } // while loop for each record of a nodeType
717 if( depNodeTypes.isEmpty() && !dupeCheckOff ){
718 // For this nodeType, we haven't looked at the possibility of a
719 // non-dependent node where two verts have same key info
720 ArrayList<ArrayList<Vertex>> nonDependentDupeSets = new ArrayList<ArrayList<Vertex>>();
721 nonDependentDupeSets = getDupeSets4NonDepNodes(
722 TRANSID, FROMAPPID, g,
723 version, nType, tmpList,
725 // For each set found (each set is for a unique instance of key-values),
726 // process the dupes found
727 Iterator<ArrayList<Vertex>> dsItr = nonDependentDupeSets.iterator();
728 while( dsItr.hasNext() ){
729 ArrayList<Vertex> dupeList = dsItr.next();
730 logger.info(" - now check Dupes for some non-dependent guys - ");
731 List<String> tmpDupeGroups = checkAndProcessDupes(
732 TRANSID, FROMAPPID, g, source1, version,
733 nType, dupeList, dupeFixOn,
734 deleteCandidateList, singleCommits, dupeGroups, loader);
735 Iterator<String> dIter = tmpDupeGroups.iterator();
736 while (dIter.hasNext()) {
737 // Add in any newly found dupes to our running list
738 String tmpGrp = dIter.next();
739 logger.info("Found set of dupes: [" + tmpGrp + "]");
740 dupeGroups.add(tmpGrp);
744 }// end of extra dupe check for non-dependent nodes
746 if ( (thisNtDeleteCount > 0) && singleCommits ) {
747 // NOTE - the singleCommits option is not used in normal processing
749 g = AAIGraph.getInstance().getGraph().newTransaction();
752 thisNtDeleteCount = 0;
753 logger.info( " Processed " + thisNtCount + " records for [" + nType + "], " + totalNodeCount + " total overall. " );
755 }// While-loop for each node type
757 }// end of check to make sure we weren't only supposed to do edges
760 if( !skipEdgeCheckFlag ){
761 // --------------------------------------------------------------------------------------
762 // Now, we're going to look for one-armed-edges. Ie. an edge that
764 // been deleted (because a vertex on one side was deleted) but
765 // somehow was not deleted.
766 // So the one end of it points to a vertexId -- but that vertex is
768 // --------------------------------------------------------------------------------------
770 // To do some strange checking - we need a second graph object
771 logger.debug(" ---- DEBUG --- about to open a SECOND graph (takes a little while)--------\n");
772 // Note - graph2 just reads - but we want it to use a fresh connection to
773 // the database, so we are NOT using the CACHED DB CONFIG here.
774 graph2 = JanusGraphFactory.open(new AAIGraphConfig.Builder(AAIConstants.REALTIME_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("realtime2").buildConfiguration());
775 if (graph2 == null) {
776 String emsg = "null graph2 object in DataGrooming\n";
777 throw new AAIException("AAI_6101", emsg);
779 logger.debug("Got the graph2 object... \n");
781 g2 = graph2.newTransaction();
783 String emsg = "null graphTransaction2 object in DataGrooming\n";
784 throw new AAIException("AAI_6101", emsg);
787 ArrayList<Vertex> vertList = new ArrayList<>();
788 Iterator<Vertex> vItor3 = g.traversal().V();
789 // Gotta hold these in a List - or else HBase times out as you cycle
791 while (vItor3.hasNext()) {
792 Vertex v = vItor3.next();
797 Iterator<Vertex> vItor2 = vertList.iterator();
798 logger.info(" Checking for bad edges --- ");
800 while (vItor2.hasNext()) {
805 } catch (Exception vex) {
806 LoggingContext.statusCode(StatusCode.ERROR);
807 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
808 logger.warn(">>> WARNING trying to get next vertex on the vItor2 ");
813 String thisVertId = "";
815 thisVertId = v.id().toString();
816 } catch (Exception ev) {
817 LoggingContext.statusCode(StatusCode.ERROR);
818 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
819 logger.warn("WARNING when doing getId() on a vertex from our vertex list. ");
822 if (ghostNodeHash.containsKey(thisVertId)) {
823 // This is a phantom node, so don't try to use it
824 logger.info(" >> Skipping edge check for edges from vertexId = "
826 + ", since that guy is a Phantom Node");
830 if( windowStartTime > 0 ){
831 // They are using the time-window, so we only want nodes that are updated after a
832 // passed-in timestamp OR that have no last-modified-timestamp which means they are suspicious.
833 Object objModTimeStamp = v.property("aai-last-mod-ts").orElse(null);
834 if( objModTimeStamp != null ){
835 long thisNodeModTime = (long)objModTimeStamp;
836 if( thisNodeModTime < windowStartTime ){
837 // It has a last modified ts and is NOT in our window, so we can pass over it
843 if (counter == lastShown + 250) {
845 logger.info("... Checking edges for vertex # "
848 Iterator<Edge> eItor = v.edges(Direction.BOTH);
849 while (eItor.hasNext()) {
855 } catch (Exception iex) {
856 LoggingContext.statusCode(StatusCode.ERROR);
857 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
858 logger.warn(">>> WARNING trying to get next edge on the eItor ", iex);
864 } catch (Exception err) {
865 LoggingContext.statusCode(StatusCode.ERROR);
866 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
867 logger.warn(">>> WARNING trying to get edge's In-vertex ", err);
871 Vertex ghost2 = null;
873 Boolean keysMissing = true;
874 Boolean cantGetUsingVid = false;
877 Object ob = vIn.<Object>property("aai-node-type").orElse(null);
879 vNtI = ob.toString();
880 keysMissing = anyKeyFieldsMissing(vNtI, vIn, loader);
885 vIdI = ob.toString();
886 vIdLong = Long.parseLong(vIdI);
889 if( ! ghost2CheckOff ){
890 Vertex connectedVert = g2.traversal().V(vIdLong).next();
891 if( connectedVert == null ) {
892 LoggingContext.statusCode(StatusCode.ERROR);
893 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
894 logger.warn( "GHOST2 -- got NULL when doing getVertex for vid = " + vIdLong);
895 cantGetUsingVid = true;
897 // If we can NOT get this ghost with the SECOND graph-object,
898 // it is still a ghost since even though we can get data about it using the FIRST graph
901 ghost2 = g.traversal().V(vIdLong).next();
903 catch( Exception ex){
904 LoggingContext.statusCode(StatusCode.ERROR);
905 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
906 logger.warn( "GHOST2 -- Could not get the ghost info for a bad edge for vtxId = " + vIdLong, ex);
908 if( ghost2 != null ){
909 ghostNodeHash.put(vIdI, ghost2);
912 }// end of the ghost2 checking
914 catch (Exception err) {
915 LoggingContext.statusCode(StatusCode.ERROR);
916 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
917 logger.warn(">>> WARNING trying to get edge's In-vertex props ", err);
920 if (keysMissing || vIn == null || vNtI.equals("")
921 || cantGetUsingVid) {
922 // this is a bad edge because it points to a vertex
923 // that isn't there anymore or is corrupted
924 String thisEid = e.id().toString();
925 if (deleteCandidateList.contains(thisEid) || deleteCandidateList.contains(vIdI)) {
926 boolean okFlag = true;
927 if (!vIdI.equals("")) {
928 // try to get rid of the corrupted vertex
930 if( (ghost2 != null) && ghost2FixOn ){
937 // NOTE - the singleCommits option is not used in normal processing
939 g = AAIGraph.getInstance().getGraph().newTransaction();
942 } catch (Exception e1) {
944 LoggingContext.statusCode(StatusCode.ERROR);
945 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
946 logger.warn("WARNING when trying to delete bad-edge-connected VERTEX VID = "
950 logger.info(" DELETED vertex from bad edge = "
954 // remove the edge if we couldn't get the
959 // NOTE - the singleCommits option is not used in normal processing
961 g = AAIGraph.getInstance().getGraph().newTransaction();
964 } catch (Exception ex) {
965 // NOTE - often, the exception is just
966 // that this edge has already been
969 LoggingContext.statusCode(StatusCode.ERROR);
970 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
971 logger.warn("WARNING when trying to delete edge = "
975 logger.info(" DELETED edge = " + thisEid);
979 oneArmedEdgeHash.put(thisEid, e);
980 if ((vIn != null) && (vIn.id() != null)) {
981 emptyVertexHash.put(thisEid, vIn.id()
988 vOut = e.outVertex();
989 } catch (Exception err) {
990 LoggingContext.statusCode(StatusCode.ERROR);
991 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
992 logger.warn(">>> WARNING trying to get edge's Out-vertex ");
998 cantGetUsingVid = false;
1001 Object ob = vOut.<Object>property("aai-node-type").orElse(null);
1003 vNtO = ob.toString();
1004 keysMissing = anyKeyFieldsMissing(vNtO,
1010 vIdO = ob.toString();
1011 vIdLong = Long.parseLong(vIdO);
1014 if( ! ghost2CheckOff ){
1015 Vertex connectedVert = g2.traversal().V(vIdLong).next();
1016 if( connectedVert == null ) {
1017 cantGetUsingVid = true;
1018 logger.info( "GHOST2 -- got NULL when doing getVertex for vid = " + vIdLong);
1019 // If we can get this ghost with the other graph-object, then get it -- it's still a ghost
1021 ghost2 = g.traversal().V(vIdLong).next();
1023 catch( Exception ex){
1024 LoggingContext.statusCode(StatusCode.ERROR);
1025 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1026 logger.warn( "GHOST2 -- Could not get the ghost info for a bad edge for vtxId = " + vIdLong, ex);
1028 if( ghost2 != null ){
1029 ghostNodeHash.put(vIdO, ghost2);
1033 } catch (Exception err) {
1034 LoggingContext.statusCode(StatusCode.ERROR);
1035 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1036 logger.warn(">>> WARNING trying to get edge's Out-vertex props ", err);
1039 if (keysMissing || vOut == null || vNtO.equals("")
1040 || cantGetUsingVid) {
1041 // this is a bad edge because it points to a vertex
1042 // that isn't there anymore
1043 String thisEid = e.id().toString();
1044 if (deleteCandidateList.contains(thisEid) || deleteCandidateList.contains(vIdO)) {
1045 boolean okFlag = true;
1046 if (!vIdO.equals("")) {
1047 // try to get rid of the corrupted vertex
1049 if( (ghost2 != null) && ghost2FixOn ){
1052 else if (vOut != null) {
1055 if (singleCommits) {
1056 // NOTE - the singleCommits option is not used in normal processing
1058 g = AAIGraph.getInstance().getGraph().newTransaction();
1061 } catch (Exception e1) {
1063 LoggingContext.statusCode(StatusCode.ERROR);
1064 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1065 logger.warn("WARNING when trying to delete bad-edge-connected VID = "
1069 logger.info(" DELETED vertex from bad edge = "
1073 // remove the edge if we couldn't get the
1077 if (singleCommits) {
1078 // NOTE - the singleCommits option is not used in normal processing
1080 g = AAIGraph.getInstance().getGraph().newTransaction();
1083 } catch (Exception ex) {
1084 // NOTE - often, the exception is just
1085 // that this edge has already been
1088 LoggingContext.statusCode(StatusCode.ERROR);
1089 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1090 logger.warn("WARNING when trying to delete edge = "
1094 logger.info(" DELETED edge = " + thisEid);
1098 oneArmedEdgeHash.put(thisEid, e);
1099 if ((vOut != null) && (vOut.id() != null)) {
1100 emptyVertexHash.put(thisEid, vOut.id()
1105 }// End of while-edges-loop
1106 } catch (Exception exx) {
1107 LoggingContext.statusCode(StatusCode.ERROR);
1108 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1109 logger.warn("WARNING from in the while-verts-loop ", exx);
1111 }// End of while-vertices-loop (the edge-checking)
1112 } // end of -- if we're not skipping the edge-checking
1115 deleteCount = deleteCount + dupeGrpsDeleted;
1116 if (!singleCommits && deleteCount > 0) {
1118 logger.info("About to do the commit for "
1119 + deleteCount + " removes. ");
1120 executeFinalCommit = true;
1121 logger.info("Commit was successful ");
1122 } catch (Exception excom) {
1123 LoggingContext.statusCode(StatusCode.ERROR);
1124 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1125 logger.error(" >>>> ERROR <<<< Could not commit changes. " + LogFormatTools.getStackTop(excom));
1130 int ghostNodeCount = ghostNodeHash.size();
1131 int orphanNodeCount = orphanNodeHash.size();
1132 int missingDepNodeCount = missingDepNodeHash.size();
1133 int oneArmedEdgeCount = oneArmedEdgeHash.size();
1134 int dupeCount = dupeGroups.size();
1136 deleteCount = deleteCount + dupeGrpsDeleted;
1138 bw.write("\n\n ============ Summary ==============\n");
1139 if( timeWindowMinutes == 0 ){
1140 bw.write("Ran FULL data grooming (no time-window). \n");
1143 bw.write("Ran PARTIAL data grooming just looking at data added/updated in the last " + timeWindowMinutes + " minutes. \n");
1146 bw.write("\nRan these nodeTypes: " + ntList + "\n\n");
1147 bw.write("There were this many delete candidates from previous run = "
1148 + deleteCandidateList.size() + "\n");
1149 if (dontFixOrphansFlag) {
1150 bw.write(" Note - we are not counting orphan nodes since the -dontFixOrphans parameter was used. \n");
1152 bw.write("Deleted this many delete candidates = " + deleteCount
1154 bw.write("Total number of nodes looked at = " + totalNodeCount
1156 bw.write("Ghost Nodes identified = " + ghostNodeCount + "\n");
1157 bw.write("Orphan Nodes identified = " + orphanNodeCount + "\n");
1158 bw.write("Bad Edges identified = " + oneArmedEdgeCount + "\n");
1159 bw.write("Missing Dependent Edge (but not orphaned) node count = "
1160 + missingDepNodeCount + "\n");
1161 bw.write("Duplicate Groups count = " + dupeCount + "\n");
1162 bw.write("MisMatching Label/aai-node-type count = "
1163 + misMatchedHash.size() + "\n");
1165 bw.write("\n ------------- Delete Candidates ---------\n");
1166 for (Map.Entry<String, Vertex> entry : ghostNodeHash
1168 String vid = entry.getKey();
1169 bw.write("DeleteCandidate: Phantom Vid = [" + vid + "]\n");
1170 cleanupCandidateCount++;
1172 for (Map.Entry<String, Vertex> entry : orphanNodeHash
1174 String vid = entry.getKey();
1175 bw.write("DeleteCandidate: OrphanDepNode Vid = [" + vid + "]\n");
1176 if (!dontFixOrphansFlag) {
1177 cleanupCandidateCount++;
1180 for (Map.Entry<String, Edge> entry : oneArmedEdgeHash.entrySet()) {
1181 String eid = entry.getKey();
1182 bw.write("DeleteCandidate: Bad EDGE Edge-id = [" + eid + "]\n");
1183 cleanupCandidateCount++;
1185 for (Map.Entry<String, Vertex> entry : missingDepNodeHash
1187 String vid = entry.getKey();
1188 bw.write("DeleteCandidate: (maybe) missingDepNode Vid = ["
1190 cleanupCandidateCount++;
1192 bw.write("\n-- NOTE - To see DeleteCandidates for Duplicates, you need to look in the Duplicates Detail section below.\n");
1194 bw.write("\n ------------- GHOST NODES - detail ");
1195 for (Map.Entry<String, Vertex> entry : ghostNodeHash
1198 String vid = entry.getKey();
1199 bw.write("\n ==> Phantom Vid = " + vid + "\n");
1200 ArrayList<String> retArr = showPropertiesForNode(
1201 TRANSID, FROMAPPID, entry.getValue());
1202 for (String info : retArr) {
1203 bw.write(info + "\n");
1206 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1208 for (String info : retArr) {
1209 bw.write(info + "\n");
1211 } catch (Exception dex) {
1212 LoggingContext.statusCode(StatusCode.ERROR);
1213 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1214 logger.error("error trying to print detail info for a ghost-node: " + LogFormatTools.getStackTop(dex));
1218 bw.write("\n ------------- Missing Dependent Edge ORPHAN NODES - detail: ");
1219 for (Map.Entry<String, Vertex> entry : orphanNodeHash
1222 String vid = entry.getKey();
1223 bw.write("\n> Orphan Node Vid = " + vid + "\n");
1224 ArrayList<String> retArr = showPropertiesForNode(
1225 TRANSID, FROMAPPID, entry.getValue());
1226 for (String info : retArr) {
1227 bw.write(info + "\n");
1230 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1232 for (String info : retArr) {
1233 bw.write(info + "\n");
1235 } catch (Exception dex) {
1236 LoggingContext.statusCode(StatusCode.ERROR);
1237 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1238 logger.error("error trying to print detail info for a Orphan Node /missing dependent edge " + LogFormatTools.getStackTop(dex));
1242 bw.write("\n ------------- Missing Dependent Edge (but not orphan) NODES: ");
1243 for (Map.Entry<String, Vertex> entry : missingDepNodeHash
1246 String vid = entry.getKey();
1247 bw.write("\n> Missing edge to Dependent Node (but has edges) Vid = "
1249 ArrayList<String> retArr = showPropertiesForNode(
1250 TRANSID, FROMAPPID, entry.getValue());
1251 for (String info : retArr) {
1252 bw.write(info + "\n");
1255 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1257 for (String info : retArr) {
1258 bw.write(info + "\n");
1260 } catch (Exception dex) {
1261 LoggingContext.statusCode(StatusCode.ERROR);
1262 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1263 logger.error("error trying to print detail info for a node missing its dependent edge but not an orphan "
1264 + LogFormatTools.getStackTop(dex));
1268 bw.write("\n ------------- EDGES pointing to empty/bad vertices: ");
1269 for (Map.Entry<String, Edge> entry : oneArmedEdgeHash.entrySet()) {
1271 String eid = entry.getKey();
1272 Edge thisE = entry.getValue();
1273 String badVid = emptyVertexHash.get(eid);
1274 bw.write("\n> Edge pointing to bad vertex (Vid = "
1275 + badVid + ") EdgeId = " + eid + "\n");
1276 bw.write("Label: [" + thisE.label() + "]\n");
1277 Iterator<Property<Object>> pI = thisE.properties();
1278 while (pI.hasNext()) {
1279 Property<Object> propKey = pI.next();
1280 bw.write("Prop: [" + propKey + "], val = ["
1281 + propKey.value() + "]\n");
1283 } catch (Exception pex) {
1284 LoggingContext.statusCode(StatusCode.ERROR);
1285 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1286 logger.error("error trying to print empty/bad vertex data: " + LogFormatTools.getStackTop(pex));
1290 bw.write("\n ------------- Duplicates: ");
1291 Iterator<String> dupeIter = dupeGroups.iterator();
1292 int dupeSetCounter = 0;
1293 while (dupeIter.hasNext()) {
1295 String dset = (String) dupeIter.next();
1297 bw.write("\n --- Duplicate Group # " + dupeSetCounter
1298 + " Detail -----------\n");
1300 // We expect each line to have at least two vid's, followed
1301 // by the preferred one to KEEP
1302 String[] dupeArr = dset.split("\\|");
1303 ArrayList<String> idArr = new ArrayList<>();
1304 int lastIndex = dupeArr.length - 1;
1305 for (int i = 0; i <= lastIndex; i++) {
1306 if (i < lastIndex) {
1307 // This is not the last entry, it is one of the
1308 // dupes, so we want to show all its info
1309 bw.write(" >> Duplicate Group # "
1310 + dupeSetCounter + " Node # " + i
1312 String vidString = dupeArr[i];
1313 idArr.add(vidString);
1314 long longVertId = Long.parseLong(vidString);
1315 Iterator<Vertex> vtxIterator = g.vertices(longVertId);
1317 if (vtxIterator.hasNext()) {
1318 vtx = vtxIterator.next();
1320 ArrayList<String> retArr = showPropertiesForNode(TRANSID, FROMAPPID, vtx);
1321 for (String info : retArr) {
1322 bw.write(info + "\n");
1325 retArr = showAllEdgesForNode(TRANSID,
1327 for (String info : retArr) {
1328 bw.write(info + "\n");
1331 // This is the last entry which should tell us if we
1332 // have a preferred keeper
1333 String prefString = dupeArr[i];
1334 if (prefString.equals("KeepVid=UNDETERMINED")) {
1335 bw.write("\n For this group of duplicates, could not tell which one to keep.\n");
1336 bw.write(" >>> This group needs to be taken care of with a manual/forced-delete.\n");
1338 // If we know which to keep, then the prefString
1339 // should look like, "KeepVid=12345"
1340 String[] prefArr = prefString.split("=");
1341 if (prefArr.length != 2
1342 || (!prefArr[0].equals("KeepVid"))) {
1343 throw new Exception("Bad format. Expecting KeepVid=999999");
1345 String keepVidStr = prefArr[1];
1346 if (idArr.contains(keepVidStr)) {
1347 bw.write("\n The vertex we want to KEEP has vertexId = "
1349 bw.write("\n The others become delete candidates: \n");
1350 idArr.remove(keepVidStr);
1351 for (int x = 0; x < idArr.size(); x++) {
1352 cleanupCandidateCount++;
1353 bw.write("DeleteCandidate: Duplicate Vid = ["
1354 + idArr.get(x) + "]\n");
1357 throw new Exception("ERROR - Vertex Id to keep not found in list of dupes. dset = ["
1361 }// else we know which one to keep
1363 }// for each vertex in a group
1364 } catch (Exception dex) {
1365 LoggingContext.statusCode(StatusCode.ERROR);
1366 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1367 logger.error("error trying to print duplicate vertex data " + LogFormatTools.getStackTop(dex));
1370 }// while - work on each group of dupes
1372 bw.write("\n ------------- Mis-matched Label/aai-node-type Nodes: \n ");
1373 for (Map.Entry<String, String> entry : misMatchedHash.entrySet()) {
1374 String msg = entry.getValue();
1375 bw.write("MixedMsg = " + msg + "\n");
1378 bw.write("\n ------------- Got these errors while processing: \n");
1379 Iterator<String> errIter = errArr.iterator();
1380 while (errIter.hasNext()) {
1381 String line = (String) errIter.next();
1382 bw.write(line + "\n");
1387 logger.info("\n ------------- Done doing all the checks ------------ ");
1388 logger.info("Output will be written to " + fullOutputFileName);
1390 if (cleanupCandidateCount > 0) {
1391 // Technically, this is not an error -- but we're throwing this
1392 // error so that hopefully a
1393 // monitoring system will pick it up and do something with it.
1394 throw new AAIException("AAI_6123", "See file: [" + fullOutputFileName
1395 + "] and investigate delete candidates. ");
1397 } catch (AAIException e) {
1398 LoggingContext.statusCode(StatusCode.ERROR);
1399 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1400 logger.error("Caught AAIException while grooming data");
1401 ErrorLogHelper.logException(e);
1402 } catch (Exception ex) {
1403 LoggingContext.statusCode(StatusCode.ERROR);
1404 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1405 logger.error("Caught exception while grooming data");
1406 ErrorLogHelper.logError("AAI_6128", ex.getMessage() + ", resolve and rerun dataGrooming");
1412 } catch (IOException iox) {
1413 LoggingContext.statusCode(StatusCode.ERROR);
1414 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1415 logger.warn("Got an IOException trying to close bufferedWriter() \n", iox);
1419 if (g != null && g.tx().isOpen()) {
1420 // Any changes that worked correctly should have already done
1423 if (executeFinalCommit) {
1427 } catch (Exception ex) {
1428 // Don't throw anything because JanusGraph sometimes is just saying that the graph is already closed
1429 LoggingContext.statusCode(StatusCode.ERROR);
1430 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1431 logger.warn("WARNING from final graphTransaction.rollback()", ex);
1435 if (g2 != null && g2.tx().isOpen()) {
1436 // Any changes that worked correctly should have already done
1440 } catch (Exception ex) {
1441 // Don't throw anything because JanusGraph sometimes is just saying that the graph is already closed
1442 LoggingContext.statusCode(StatusCode.ERROR);
1443 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1444 logger.warn("WARNING from final graphTransaction2.rollback()", ex);
1448 if( finalShutdownFlag ){
1450 if( graph != null && graph.isOpen() ){
1454 } catch (Exception ex) {
1455 // Don't throw anything because JanusGraph sometimes is just saying that the graph is already closed{
1456 LoggingContext.statusCode(StatusCode.ERROR);
1457 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1458 logger.warn("WARNING from final graph.shutdown()", ex);
1462 if( graph2 != null && graph2.isOpen() ){
1463 graph2.tx().close();
1466 } catch (Exception ex) {
1467 // Don't throw anything because JanusGraph sometimes is just saying that the graph is already closed{
1468 LoggingContext.statusCode(StatusCode.ERROR);
1469 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1470 logger.warn("WARNING from final graph2.shutdown()", ex);
1476 return cleanupCandidateCount;
1478 }// end of doTheGrooming()
1482 * Vertex has these keys.
1484 * @param tmpV the tmp V
1485 * @param propHashWithKeys the prop hash with keys
1486 * @return the boolean
1488 private static Boolean vertexHasTheseKeys( Vertex tmpV, HashMap <String, Object> propHashWithKeys) {
1489 Iterator <?> it = propHashWithKeys.entrySet().iterator();
1490 while( it.hasNext() ){
1491 String propName = "";
1492 String propVal = "";
1493 Map.Entry <?,?>propEntry = (Map.Entry<?,?>)it.next();
1494 Object propNameObj = propEntry.getKey();
1495 if( propNameObj != null ){
1496 propName = propNameObj.toString();
1498 Object propValObj = propEntry.getValue();
1499 if( propValObj != null ){
1500 propVal = propValObj.toString();
1502 Object checkValObj = tmpV.<Object>property(propName).orElse(null);
1503 if( checkValObj == null ) {
1506 else if( !propVal.equals(checkValObj.toString()) ){
1515 * Any key fields missing.
1517 * @param nType the n type
1519 * @return the boolean
1521 private static Boolean anyKeyFieldsMissing(String nType, Vertex v, Loader loader) {
1524 Introspector obj = null;
1526 obj = loader.introspectorFromName(nType);
1527 } catch (AAIUnknownObjectException e) {
1528 // They gave us a non-empty nodeType but our NodeKeyProps does
1529 // not have data for it. Since we do not know what the
1530 // key params are for this type of node, we will just
1532 String emsg = " -- WARNING -- Unrecognized nodeType: [" + nType
1533 + "]. We cannot determine required keys for this nType. ";
1534 // NOTE - this will be caught below and a "false" returned
1535 throw new AAIException("AAI_6121", emsg);
1538 // Determine what the key fields are for this nodeType
1539 Collection <String> keyPropNamesColl = obj.getKeys();
1540 Iterator<String> keyPropI = keyPropNamesColl.iterator();
1541 while (keyPropI.hasNext()) {
1542 String propName = keyPropI.next();
1543 Object ob = v.<Object>property(propName).orElse(null);
1544 if (ob == null || ob.toString().equals("")) {
1545 // It is missing a key property
1549 } catch (AAIException e) {
1550 // Something was wrong -- but since we weren't able to check
1551 // the keys, we will not declare that it is missing keys.
1559 * Gets the delete list.
1561 * @param targetDir the target dir
1562 * @param fileName the file name
1563 * @param edgesOnlyFlag the edges only flag
1564 * @param dontFixOrphans the dont fix orphans
1565 * @param dupeFixOn the dupe fix on
1566 * @return the delete list
1567 * @throws AAIException the AAI exception
1569 private static Set<String> getDeleteList(String targetDir,
1570 String fileName, Boolean edgesOnlyFlag, Boolean dontFixOrphans,
1571 Boolean dupeFixOn) throws AAIException {
1573 // Look in the file for lines formated like we expect - pull out any
1574 // Vertex Id's to delete on this run
1575 Set<String> delList = new LinkedHashSet<>();
1576 String fullFileName = targetDir + AAIConstants.AAI_FILESEP + fileName;
1578 try(BufferedReader br = new BufferedReader(new FileReader(fullFileName))) {
1579 String line = br.readLine();
1580 while (line != null) {
1581 if (!"".equals(line) && line.startsWith("DeleteCandidate")) {
1582 if (edgesOnlyFlag && (!line.contains("Bad Edge"))) {
1583 // We're not going to process edge guys
1584 } else if (dontFixOrphans && line.contains("Orphan")) {
1585 // We're not going to process orphans
1586 } else if (!dupeFixOn && line.contains("Duplicate")) {
1587 // We're not going to process Duplicates
1589 int begIndex = line.indexOf("id = ");
1590 int endIndex = line.indexOf("]");
1591 String vidVal = line.substring(begIndex + 6, endIndex);
1592 delList.add(vidVal);
1595 line = br.readLine();
1598 } catch (IOException e) {
1599 throw new AAIException("AAI_6124", e, "Could not open input-file [" + fullFileName
1600 + "], exception= " + e.getMessage());
1605 }// end of getDeleteList
1608 * Gets the preferred dupe.
1610 * @param transId the trans id
1611 * @param fromAppId the from app id
1613 * @param dupeVertexList the dupe vertex list
1614 * @param ver the ver
1616 * @throws AAIException the AAI exception
1618 public static Vertex getPreferredDupe(String transId,
1619 String fromAppId, GraphTraversalSource g,
1620 ArrayList<Vertex> dupeVertexList, String ver, Loader loader)
1621 throws AAIException {
1623 // This method assumes that it is being passed a List of vertex objects
1625 // violate our uniqueness constraints.
1627 Vertex nullVtx = null;
1629 if (dupeVertexList == null) {
1632 int listSize = dupeVertexList.size();
1633 if (listSize == 0) {
1636 if (listSize == 1) {
1637 return (dupeVertexList.get(0));
1640 Vertex vtxPreferred = null;
1641 Vertex currentFaveVtx = dupeVertexList.get(0);
1642 for (int i = 1; i < listSize; i++) {
1643 Vertex vtxB = dupeVertexList.get(i);
1644 vtxPreferred = pickOneOfTwoDupes(transId, fromAppId, g,
1645 currentFaveVtx, vtxB, ver, loader);
1646 if (vtxPreferred == null) {
1647 // We couldn't choose one
1650 currentFaveVtx = vtxPreferred;
1654 return (currentFaveVtx);
1656 } // end of getPreferredDupe()
1659 * Pick one of two dupes.
1661 * @param transId the trans id
1662 * @param fromAppId the from app id
1664 * @param vtxA the vtx A
1665 * @param vtxB the vtx B
1666 * @param ver the ver
1668 * @throws AAIException the AAI exception
1670 public static Vertex pickOneOfTwoDupes(String transId,
1671 String fromAppId, GraphTraversalSource g, Vertex vtxA,
1672 Vertex vtxB, String ver, Loader loader) throws AAIException {
1674 Vertex nullVtx = null;
1675 Vertex preferredVtx = null;
1677 Long vidA = new Long(vtxA.id().toString());
1678 Long vidB = new Long(vtxB.id().toString());
1680 String vtxANodeType = "";
1681 String vtxBNodeType = "";
1682 Object objType = vtxA.<Object>property("aai-node-type").orElse(null);
1683 if (objType != null) {
1684 vtxANodeType = objType.toString();
1686 objType = vtxB.<Object>property("aai-node-type").orElse(null);
1687 if (objType != null) {
1688 vtxBNodeType = objType.toString();
1691 if (vtxANodeType.equals("") || (!vtxANodeType.equals(vtxBNodeType))) {
1692 // Either they're not really dupes or there's some bad data - so
1697 // Check that node A and B both have the same key values (or else they
1699 // (We'll check dep-node later)
1700 // Determine what the key fields are for this nodeType
1701 Collection <String> keyProps = new ArrayList <>();
1702 HashMap <String,Object> keyPropValsHash = new HashMap <String,Object>();
1704 keyProps = loader.introspectorFromName(vtxANodeType).getKeys();
1705 } catch (AAIUnknownObjectException e) {
1706 logger.warn("Required property not found", e);
1707 throw new AAIException("AAI_6105", "Required Property name(s) not found for nodeType = " + vtxANodeType + ")");
1710 Iterator<String> keyPropI = keyProps.iterator();
1711 while (keyPropI.hasNext()) {
1712 String propName = keyPropI.next();
1713 String vtxAKeyPropVal = "";
1714 objType = vtxA.<Object>property(propName).orElse(null);
1715 if (objType != null) {
1716 vtxAKeyPropVal = objType.toString();
1718 String vtxBKeyPropVal = "";
1719 objType = vtxB.<Object>property(propName).orElse(null);
1720 if (objType != null) {
1721 vtxBKeyPropVal = objType.toString();
1724 if (vtxAKeyPropVal.equals("")
1725 || (!vtxAKeyPropVal.equals(vtxBKeyPropVal))) {
1726 // Either they're not really dupes or they are missing some key
1727 // data - so don't pick one
1731 // Keep these around for (potential) use later
1732 keyPropValsHash.put(propName, vtxAKeyPropVal);
1737 // Collect the vid's and aai-node-types of the vertices that each vertex
1738 // (A and B) is connected to.
1739 ArrayList<String> vtxIdsConn2A = new ArrayList<>();
1740 ArrayList<String> vtxIdsConn2B = new ArrayList<>();
1741 HashMap<String, String> nodeTypesConn2A = new HashMap<>();
1742 HashMap<String, String> nodeTypesConn2B = new HashMap<>();
1744 ArrayList<Vertex> vertListA = getConnectedNodes( g, vtxA );
1745 if (vertListA != null) {
1746 Iterator<Vertex> iter = vertListA.iterator();
1747 while (iter.hasNext()) {
1748 Vertex tvCon = iter.next();
1749 String conVid = tvCon.id().toString();
1751 objType = tvCon.<Object>property("aai-node-type").orElse(null);
1752 if (objType != null) {
1753 nt = objType.toString();
1755 nodeTypesConn2A.put(nt, conVid);
1756 vtxIdsConn2A.add(conVid);
1760 ArrayList<Vertex> vertListB = getConnectedNodes( g, vtxB );
1761 if (vertListB != null) {
1762 Iterator<Vertex> iter = vertListB.iterator();
1763 while (iter.hasNext()) {
1764 Vertex tvCon = iter.next();
1765 String conVid = tvCon.id().toString();
1767 objType = tvCon.<Object>property("aai-node-type").orElse(null);
1768 if (objType != null) {
1769 nt = objType.toString();
1771 nodeTypesConn2B.put(nt, conVid);
1772 vtxIdsConn2B.add(conVid);
1776 // 1 - If this kind of node needs a dependent node for uniqueness, then
1777 // verify that they both nodes point to the same dependent
1778 // node (otherwise they're not really duplicates)
1779 // Note - there are sometimes more than one dependent node type since
1780 // one nodeType can be used in different ways. But for a
1781 // particular node, it will only have one dependent node that
1782 // it's connected to.
1783 String onlyNodeThatIndexPointsToVidStr = "";
1784 Collection<String> depNodeTypes = loader.introspectorFromName(vtxANodeType).getDependentOn();
1785 if (depNodeTypes.isEmpty()) {
1786 // This kind of node is not dependent on any other. That is ok.
1787 // We need to find out if the unique index info is good or not and
1788 // use that later when deciding if we can delete one.
1789 onlyNodeThatIndexPointsToVidStr = findJustOneUsingIndex( transId,
1790 fromAppId, g, keyPropValsHash, vtxANodeType, vidA, vidB, ver );
1792 String depNodeVtxId4A = "";
1793 String depNodeVtxId4B = "";
1794 Iterator<String> iter = depNodeTypes.iterator();
1795 while (iter.hasNext()) {
1796 String depNodeType = iter.next();
1797 if (nodeTypesConn2A.containsKey(depNodeType)) {
1798 // This is the dependent node type that vertex A is using
1799 depNodeVtxId4A = nodeTypesConn2A.get(depNodeType);
1801 if (nodeTypesConn2B.containsKey(depNodeType)) {
1802 // This is the dependent node type that vertex B is using
1803 depNodeVtxId4B = nodeTypesConn2B.get(depNodeType);
1806 if (depNodeVtxId4A.equals("")
1807 || (!depNodeVtxId4A.equals(depNodeVtxId4B))) {
1808 // Either they're not really dupes or there's some bad data - so
1809 // don't pick either one
1814 if (vtxIdsConn2A.size() == vtxIdsConn2B.size()) {
1815 // 2 - If they both have edges to all the same vertices,
1816 // then return the one that can be reached uniquely via the
1817 // key if that is the case or
1818 // else the one with the lower vertexId
1820 boolean allTheSame = true;
1821 Iterator<String> iter = vtxIdsConn2A.iterator();
1822 while (iter.hasNext()) {
1823 String vtxIdConn2A = iter.next();
1824 if (!vtxIdsConn2B.contains(vtxIdConn2A)) {
1831 // If everything is the same, but one of the two has a good
1832 // pointer to it, then save that one. Otherwise, take the
1834 if( !onlyNodeThatIndexPointsToVidStr.equals("") ){
1835 // only one is reachable via the index - choose that one.
1836 if( onlyNodeThatIndexPointsToVidStr.equals(vidA.toString()) ){
1837 preferredVtx = vtxA;
1839 else if( onlyNodeThatIndexPointsToVidStr.equals(vidB.toString()) ){
1840 preferredVtx = vtxB;
1843 else if (vidA < vidB) {
1844 preferredVtx = vtxA;
1846 preferredVtx = vtxB;
1849 } else if (vtxIdsConn2A.size() > vtxIdsConn2B.size()) {
1850 // 3 - VertexA is connected to more things than vtxB.
1851 // We'll pick VtxA if its edges are a superset of vtxB's edges
1852 // and it doesn't contradict the check for the index/key pointer.
1853 boolean missingOne = false;
1854 Iterator<String> iter = vtxIdsConn2B.iterator();
1855 while (iter.hasNext()) {
1856 String vtxIdConn2B = iter.next();
1857 if (!vtxIdsConn2A.contains(vtxIdConn2B)) {
1863 if( onlyNodeThatIndexPointsToVidStr.equals("")
1864 || onlyNodeThatIndexPointsToVidStr.equals(vidA.toString()) ){
1865 preferredVtx = vtxA;
1868 } else if (vtxIdsConn2B.size() > vtxIdsConn2A.size()) {
1869 // 4 - VertexB is connected to more things than vtxA.
1870 // We'll pick VtxB if its edges are a superset of vtxA's edges
1871 // and it doesn't contradict the check for the index/key pointer.
1872 boolean missingOne = false;
1873 Iterator<String> iter = vtxIdsConn2A.iterator();
1874 while (iter.hasNext()) {
1875 String vtxIdConn2A = iter.next();
1876 if (!vtxIdsConn2B.contains(vtxIdConn2A)) {
1882 if( onlyNodeThatIndexPointsToVidStr.equals("")
1883 || onlyNodeThatIndexPointsToVidStr.equals(vidB.toString()) ){
1884 preferredVtx = vtxB;
1888 preferredVtx = nullVtx;
1891 return (preferredVtx);
1893 } // end of pickOneOfTwoDupes()
1896 * Check and process dupes.
1898 * @param transId the trans id
1899 * @param fromAppId the from app id
1901 * @param version the version
1902 * @param nType the n type
1903 * @param passedVertList the passed vert list
1904 * @param dupeFixOn the dupe fix on
1905 * @param deleteCandidateList the delete candidate list
1906 * @param singleCommits the single commits
1907 * @param alreadyFoundDupeGroups the already found dupe groups
1908 * @return the array list
1910 private static List<String> checkAndProcessDupes(String transId,
1911 String fromAppId, Graph g, GraphTraversalSource source, String version, String nType,
1912 List<Vertex> passedVertList, Boolean dupeFixOn,
1913 Set<String> deleteCandidateList, Boolean singleCommits,
1914 ArrayList<String> alreadyFoundDupeGroups, Loader loader ) {
1916 ArrayList<String> returnList = new ArrayList<>();
1917 ArrayList<Vertex> checkVertList = new ArrayList<>();
1918 ArrayList<String> alreadyFoundDupeVidArr = new ArrayList<>();
1919 Boolean noFilterList = true;
1920 Iterator<String> afItr = alreadyFoundDupeGroups.iterator();
1921 while (afItr.hasNext()) {
1922 String dupeGrpStr = afItr.next();
1923 String[] dupeArr = dupeGrpStr.split("\\|");
1924 int lastIndex = dupeArr.length - 1;
1925 for (int i = 0; i < lastIndex; i++) {
1926 // Note: we don't want the last one...
1927 String vidString = dupeArr[i];
1928 alreadyFoundDupeVidArr.add(vidString);
1929 noFilterList = false;
1933 // For a given set of Nodes that were found with a set of KEY
1934 // Parameters, (nodeType + key data) we will
1935 // see if we find any duplicate nodes that need to be cleaned up. Note -
1936 // it's legit to have more than one
1937 // node with the same key data if the nodes depend on a parent for
1938 // uniqueness -- as long as the two nodes
1939 // don't hang off the same Parent.
1940 // If we find duplicates, and we can figure out which of each set of
1941 // duplicates is the one that we
1942 // think should be preserved, we will record that. Whether we can tell
1943 // which one should be
1944 // preserved or not, we will return info about any sets of duplicates
1947 // Each element in the returned arrayList might look like this:
1948 // "1234|5678|keepVid=UNDETERMINED" (if there were 2 dupes, and we
1949 // couldn't figure out which one to keep)
1950 // or, "100017|200027|30037|keepVid=30037" (if there were 3 dupes and we
1951 // thought the third one was the one that should survive)
1953 // Because of the way the calling code loops over stuff, we can get the
1954 // same data multiple times - so we should
1955 // not process any vertices that we've already seen.
1958 Iterator<Vertex> pItr = passedVertList.iterator();
1959 while (pItr.hasNext()) {
1960 Vertex tvx = pItr.next();
1961 String passedId = tvx.id().toString();
1962 if (noFilterList || !alreadyFoundDupeVidArr.contains(passedId)) {
1963 // We haven't seen this one before - so we should check it.
1964 checkVertList.add(tvx);
1968 if (checkVertList.size() < 2) {
1969 // Nothing new to check.
1973 if (loader.introspectorFromName(nType).isTopLevel()) {
1974 // If this was a node that does NOT depend on other nodes for
1975 // uniqueness, and we
1976 // found more than one node using its key -- record the found
1977 // vertices as duplicates.
1978 String dupesStr = "";
1979 for (int i = 0; i < checkVertList.size(); i++) {
1981 + ((checkVertList.get(i))).id()
1984 if (dupesStr != "") {
1985 Vertex prefV = getPreferredDupe(transId, fromAppId,
1986 source, checkVertList, version, loader);
1987 if (prefV == null) {
1988 // We could not determine which duplicate to keep
1989 dupesStr = dupesStr + "KeepVid=UNDETERMINED";
1990 returnList.add(dupesStr);
1992 dupesStr = dupesStr + "KeepVid=" + prefV.id();
1993 Boolean didRemove = false;
1995 didRemove = deleteNonKeepersIfAppropriate(g,
1996 dupesStr, prefV.id().toString(),
1997 deleteCandidateList, singleCommits);
2002 // keep them on our list
2003 returnList.add(dupesStr);
2008 // More than one node have the same key fields since they may
2009 // depend on a parent node for uniqueness. Since we're finding
2010 // more than one, we want to check to see if any of the
2011 // vertices that have this set of keys (and are the same nodeType)
2012 // are also pointing at the same 'parent' node.
2013 // Note: for a given set of key data, it is possible that there
2014 // could be more than one set of duplicates.
2015 HashMap<String, ArrayList<Vertex>> vertsGroupedByParentHash = groupVertsByDepNodes(
2016 transId, fromAppId, source, version, nType,
2017 checkVertList, loader);
2018 for (Map.Entry<String, ArrayList<Vertex>> entry : vertsGroupedByParentHash
2020 ArrayList<Vertex> thisParentsVertList = entry
2022 if (thisParentsVertList.size() > 1) {
2023 // More than one vertex found with the same key info
2024 // hanging off the same parent/dependent node
2025 String dupesStr = "";
2026 for (int i = 0; i < thisParentsVertList.size(); i++) {
2028 + ((thisParentsVertList
2029 .get(i))).id() + "|";
2031 if (dupesStr != "") {
2032 Vertex prefV = getPreferredDupe(transId,
2033 fromAppId, source, thisParentsVertList,
2036 if (prefV == null) {
2037 // We could not determine which duplicate to
2039 dupesStr = dupesStr + "KeepVid=UNDETERMINED";
2040 returnList.add(dupesStr);
2042 Boolean didRemove = false;
2043 dupesStr = dupesStr + "KeepVid="
2044 + prefV.id().toString();
2046 didRemove = deleteNonKeepersIfAppropriate(
2047 g, dupesStr, prefV.id()
2049 deleteCandidateList, singleCommits);
2054 // keep them on our list
2055 returnList.add(dupesStr);
2062 } catch (Exception e) {
2063 LoggingContext.statusCode(StatusCode.ERROR);
2064 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2065 logger.warn(" >>> Threw an error in checkAndProcessDupes - just absorb this error and move on. ", e);
2070 }// End of checkAndProcessDupes()
2073 * Group verts by dep nodes.
2075 * @param transId the trans id
2076 * @param fromAppId the from app id
2078 * @param version the version
2079 * @param nType the n type
2080 * @param passedVertList the passed vert list
2081 * @return the hash map
2082 * @throws AAIException the AAI exception
2084 private static HashMap<String, ArrayList<Vertex>> groupVertsByDepNodes(
2085 String transId, String fromAppId, GraphTraversalSource g, String version,
2086 String nType, ArrayList<Vertex> passedVertList, Loader loader)
2087 throws AAIException {
2088 // Given a list of JanusGraph Vertices of one nodeType (see AAI-8956), group
2089 // them together by the parent node they depend on.
2090 // Ie. if given a list of ip address nodes (assumed to all have the
2091 // same key info) they might sit under several different parent vertices.
2092 // Under Normal conditions, there would only be one per parent -- but
2093 // we're trying to find duplicates - so we
2094 // allow for the case where more than one is under the same parent node.
2096 HashMap<String, ArrayList<Vertex>> retHash = new HashMap<String, ArrayList<Vertex>>();
2097 if (loader.introspectorFromName(nType).isTopLevel()) {
2098 // This method really should not have been called if this is not the
2100 // that depends on a parent for uniqueness, so just return the empty
2105 // Find out what types of nodes the passed in nodes can depend on
2106 ArrayList<String> depNodeTypeL = new ArrayList<>();
2107 Collection<String> depNTColl = loader.introspectorFromName(nType).getDependentOn();
2108 Iterator<String> ntItr = depNTColl.iterator();
2109 while (ntItr.hasNext()) {
2110 depNodeTypeL.add(ntItr.next());
2112 // For each vertex, we want find its depended-on/parent vertex so we
2113 // can track what other vertexes that are dependent on that same guy.
2114 if (passedVertList != null) {
2115 Iterator<Vertex> iter = passedVertList.iterator();
2116 while (iter.hasNext()) {
2117 Vertex thisVert = iter.next();
2118 Vertex tmpParentVtx = getConnectedParent( g, thisVert );
2119 if( tmpParentVtx != null ) {
2120 String parentNt = null;
2121 Object obj = tmpParentVtx.<Object>property("aai-node-type").orElse(null);
2123 parentNt = obj.toString();
2125 if (depNTColl.contains(parentNt)) {
2126 // This must be the parent/dependent node
2127 String parentVid = tmpParentVtx.id().toString();
2128 if (retHash.containsKey(parentVid)) {
2129 // add this vert to the list for this parent key
2130 retHash.get(parentVid).add(thisVert);
2132 // This is the first one we found on this parent
2133 ArrayList<Vertex> vList = new ArrayList<>();
2134 vList.add(thisVert);
2135 retHash.put(parentVid, vList);
2144 }// end of groupVertsByDepNodes()
2147 * Delete non keepers if appropriate.
2150 * @param dupeInfoString the dupe info string
2151 * @param vidToKeep the vid to keep
2152 * @param deleteCandidateList the delete candidate list
2153 * @param singleCommits the single commits
2154 * @return the boolean
2156 private static Boolean deleteNonKeepersIfAppropriate(Graph g,
2157 String dupeInfoString, String vidToKeep,
2158 Set<String> deleteCandidateList, Boolean singleCommits) {
2160 Boolean deletedSomething = false;
2161 // This assumes that the dupeInfoString is in the format of
2162 // pipe-delimited vid's followed by
2163 // ie. "3456|9880|keepVid=3456"
2164 if (deleteCandidateList == null || deleteCandidateList.size() == 0) {
2165 // No vid's on the candidate list -- so no deleting will happen on
2170 String[] dupeArr = dupeInfoString.split("\\|");
2171 ArrayList<String> idArr = new ArrayList<>();
2172 int lastIndex = dupeArr.length - 1;
2173 for (int i = 0; i <= lastIndex; i++) {
2174 if (i < lastIndex) {
2175 // This is not the last entry, it is one of the dupes,
2176 String vidString = dupeArr[i];
2177 idArr.add(vidString);
2179 // This is the last entry which should tell us if we have a
2181 String prefString = dupeArr[i];
2182 if (prefString.equals("KeepVid=UNDETERMINED")) {
2183 // They sent us a bad string -- nothing should be deleted if
2184 // no dupe could be tagged as preferred
2187 // If we know which to keep, then the prefString should look
2188 // like, "KeepVid=12345"
2189 String[] prefArr = prefString.split("=");
2190 if (prefArr.length != 2 || (!prefArr[0].equals("KeepVid"))) {
2191 LoggingContext.statusCode(StatusCode.ERROR);
2192 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2193 logger.error("Bad format. Expecting KeepVid=999999");
2196 String keepVidStr = prefArr[1];
2197 if (idArr.contains(keepVidStr)) {
2198 idArr.remove(keepVidStr);
2200 // So now, the idArr should just contain the vid's
2201 // that we want to remove.
2202 for (int x = 0; x < idArr.size(); x++) {
2203 boolean okFlag = true;
2204 String thisVid = idArr.get(x);
2205 if (deleteCandidateList.contains(thisVid)) {
2206 // This vid is a valid delete candidate from
2207 // a prev. run, so we can remove it.
2209 long longVertId = Long
2210 .parseLong(thisVid);
2212 .traversal().V(longVertId).next();
2214 if (singleCommits) {
2215 // NOTE - the singleCommits option is not used in normal processing
2217 g = AAIGraph.getInstance().getGraph().newTransaction();
2219 } catch (Exception e) {
2221 LoggingContext.statusCode(StatusCode.ERROR);
2222 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2223 logger.error("ERROR trying to delete VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
2226 logger.info(" DELETED VID = " + thisVid);
2227 deletedSomething = true;
2232 LoggingContext.statusCode(StatusCode.ERROR);
2233 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2234 logger.error("ERROR - Vertex Id to keep not found in list of dupes. dupeInfoString = ["
2235 + dupeInfoString + "]");
2239 }// else we know which one to keep
2241 }// for each vertex in a group
2243 return deletedSomething;
2245 }// end of deleteNonKeepersIfAppropriate()
2249 * Gets the node just using key params.
2251 * @param transId the trans id
2252 * @param fromAppId the from app id
2253 * @param graph the graph
2254 * @param nodeType the node type
2255 * @param keyPropsHash the key props hash
2256 * @param apiVersion the api version
2257 * @return the node just using key params
2258 * @throws AAIException the AAI exception
2260 public static List <Vertex> getNodeJustUsingKeyParams( String transId, String fromAppId, GraphTraversalSource graph, String nodeType,
2261 HashMap<String,Object> keyPropsHash, String apiVersion ) throws AAIException{
2263 List <Vertex> retVertList = new ArrayList <> ();
2265 // We assume that all NodeTypes have at least one key-property defined.
2266 // Note - instead of key-properties (the primary key properties), a user could pass
2267 // alternate-key values if they are defined for the nodeType.
2268 List<String> kName = new ArrayList<>();
2269 List<Object> kVal = new ArrayList<>();
2270 if( keyPropsHash == null || keyPropsHash.isEmpty() ) {
2271 throw new AAIException("AAI_6120", " NO key properties passed for this getNodeJustUsingKeyParams() request. NodeType = [" + nodeType + "]. ");
2275 for( Map.Entry<String, Object> entry : keyPropsHash.entrySet() ){
2277 kName.add(i, entry.getKey());
2278 kVal.add(i, entry.getValue());
2280 int topPropIndex = i;
2282 String propsAndValuesForMsg = "";
2283 Iterator <Vertex> verts = null;
2286 if( topPropIndex == 0 ){
2287 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ") ";
2288 verts= graph.V().has(kName.get(0),kVal.get(0)).has("aai-node-type",nodeType);
2290 else if( topPropIndex == 1 ){
2291 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2292 + kName.get(1) + " = " + kVal.get(1) + ") ";
2293 verts = graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has("aai-node-type",nodeType);
2295 else if( topPropIndex == 2 ){
2296 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2297 + kName.get(1) + " = " + kVal.get(1) + ", "
2298 + kName.get(2) + " = " + kVal.get(2) + ") ";
2299 verts= graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has(kName.get(2),kVal.get(2)).has("aai-node-type",nodeType);
2301 else if( topPropIndex == 3 ){
2302 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2303 + kName.get(1) + " = " + kVal.get(1) + ", "
2304 + kName.get(2) + " = " + kVal.get(2) + ", "
2305 + kName.get(3) + " = " + kVal.get(3) + ") ";
2306 verts= graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has(kName.get(2),kVal.get(2)).has(kName.get(3),kVal.get(3)).has("aai-node-type",nodeType);
2309 throw new AAIException("AAI_6114", " We only support 4 keys per nodeType for now \n");
2312 catch( Exception ex ){
2313 LoggingContext.statusCode(StatusCode.ERROR);
2314 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2315 logger.error( " ERROR trying to get node for: [" + propsAndValuesForMsg + "]" + LogFormatTools.getStackTop(ex));
2318 if( verts != null ){
2319 while( verts.hasNext() ){
2321 retVertList.add(tiV);
2325 if( retVertList.size() == 0 ){
2326 logger.debug("DEBUG No node found for nodeType = [" + nodeType +
2327 "], propsAndVal = " + propsAndValuesForMsg );
2332 }// End of getNodeJustUsingKeyParams()
2335 * Show all edges for node.
2337 * @param transId the trans id
2338 * @param fromAppId the from app id
2339 * @param tVert the t vert
2340 * @return the array list
2342 private static ArrayList <String> showAllEdgesForNode( String transId, String fromAppId, Vertex tVert ){
2344 ArrayList <String> retArr = new ArrayList <> ();
2345 Iterator <Edge> eI = tVert.edges(Direction.IN);
2346 if( ! eI.hasNext() ){
2347 retArr.add("No IN edges were found for this vertex. ");
2349 while( eI.hasNext() ){
2350 Edge ed = eI.next();
2351 String lab = ed.label();
2353 if (tVert.equals(ed.inVertex())) {
2354 vtx = ed.outVertex();
2356 vtx = ed.inVertex();
2359 retArr.add(" >>> COULD NOT FIND VERTEX on the other side of this edge edgeId = " + ed.id() + " <<< ");
2362 String nType = vtx.<String>property("aai-node-type").orElse(null);
2363 String vid = vtx.id().toString();
2364 retArr.add("Found an IN edge (" + lab + ") to this vertex from a [" + nType + "] node with VtxId = " + vid );
2369 eI = tVert.edges(Direction.OUT);
2370 if( ! eI.hasNext() ){
2371 retArr.add("No OUT edges were found for this vertex. ");
2373 while( eI.hasNext() ){
2374 Edge ed = eI.next();
2375 String lab = ed.label();
2377 if (tVert.equals(ed.inVertex())) {
2378 vtx = ed.outVertex();
2380 vtx = ed.inVertex();
2383 retArr.add(" >>> COULD NOT FIND VERTEX on the other side of this edge edgeId = " + ed.id() + " <<< ");
2386 String nType = vtx.<String>property("aai-node-type").orElse(null);
2387 String vid = vtx.id().toString();
2388 retArr.add("Found an OUT edge (" + lab + ") from this vertex to a [" + nType + "] node with VtxId = " + vid );
2396 * Show properties for node.
2398 * @param transId the trans id
2399 * @param fromAppId the from app id
2400 * @param tVert the t vert
2401 * @return the array list
2403 private static ArrayList <String> showPropertiesForNode( String transId, String fromAppId, Vertex tVert ){
2405 ArrayList <String> retArr = new ArrayList <> ();
2406 if( tVert == null ){
2407 retArr.add("null Node object passed to showPropertiesForNode()\n");
2410 String nodeType = "";
2411 Object ob = tVert.<Object>property("aai-node-type").orElse(null);
2416 nodeType = ob.toString();
2419 retArr.add(" AAINodeType/VtxID for this Node = [" + nodeType + "/" + tVert.id() + "]");
2420 retArr.add(" Property Detail: ");
2421 Iterator<VertexProperty<Object>> pI = tVert.properties();
2422 while( pI.hasNext() ){
2423 VertexProperty<Object> tp = pI.next();
2424 Object val = tp.value();
2425 retArr.add("Prop: [" + tp.key() + "], val = [" + val + "] ");
2432 private static ArrayList <Vertex> getConnectedNodes(GraphTraversalSource g, Vertex startVtx )
2433 throws AAIException {
2435 ArrayList <Vertex> retArr = new ArrayList <> ();
2436 if( startVtx == null ){
2440 GraphTraversal<Vertex, Vertex> modPipe = null;
2441 modPipe = g.V(startVtx).both();
2442 if( modPipe != null && modPipe.hasNext() ){
2443 while( modPipe.hasNext() ){
2444 Vertex conVert = modPipe.next();
2445 retArr.add(conVert);
2451 }// End of getConnectedNodes()
2454 private static ArrayList <Vertex> getConnectedChildrenOfOneType( GraphTraversalSource g,
2455 Vertex startVtx, String childNType ) throws AAIException{
2457 ArrayList <Vertex> childList = new ArrayList <> ();
2458 Iterator <Vertex> vertI = g.V(startVtx).union(__.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).inV(), __.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).outV());
2460 Vertex tmpVtx = null;
2461 while( vertI != null && vertI.hasNext() ){
2462 tmpVtx = vertI.next();
2463 Object ob = tmpVtx.<Object>property("aai-node-type").orElse(null);
2465 String tmpNt = ob.toString();
2466 if( tmpNt.equals(childNType)){
2467 childList.add(tmpVtx);
2474 }// End of getConnectedChildrenOfOneType()
2477 private static Vertex getConnectedParent( GraphTraversalSource g,
2478 Vertex startVtx ) throws AAIException{
2480 Vertex parentVtx = null;
2481 Iterator <Vertex> vertI = g.V(startVtx).union(__.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).outV(), __.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).inV());
2483 while( vertI != null && vertI.hasNext() ){
2484 // Note - there better only be one!
2485 parentVtx = vertI.next();
2490 }// End of getConnectedParent()
2493 private static long figureWindowStartTime( int timeWindowMinutes ){
2494 // Given a window size, calculate what the start-timestamp would be.
2496 if( timeWindowMinutes <= 0 ){
2497 // This just means that there is no window...
2500 long unixTimeNow = System.currentTimeMillis();
2501 long windowInMillis = timeWindowMinutes * 60L * 1000;
2503 long startTimeStamp = unixTimeNow - windowInMillis;
2505 return startTimeStamp;
2506 } // End of figureWindowStartTime()
2510 * Collect Duplicate Sets for nodes that are NOT dependent on parent nodes.
2512 * @param transId the trans id
2513 * @param fromAppId the from app id
2515 * @param version the version
2516 * @param nType the n type
2517 * @param passedVertList the passed vert list
2518 * @return the array list
2520 private static ArrayList<ArrayList<Vertex>> getDupeSets4NonDepNodes( String transId,
2521 String fromAppId, Graph g, String version, String nType,
2522 ArrayList<Vertex> passedVertList,
2523 ArrayList <String> keyPropNamesArr,
2526 ArrayList<ArrayList<Vertex>> returnList = new ArrayList<ArrayList<Vertex>>();
2528 // We've been passed a set of nodes that we want to check.
2529 // They are all NON-DEPENDENT nodes of the same nodeType meaning that they should be
2530 // unique in the DB based on their KEY DATA alone. So, if
2531 // we group them by their key data - if any key has more than one
2532 // vertex mapped to it, those vertices are dupes.
2534 // When we find duplicates, we group them in an ArrayList (there can be
2535 // more than one duplicate for one set of key data)
2536 // Then these dupeSets are grouped up and returned.
2539 HashMap <String, ArrayList<String>> keyVals2VidHash = new HashMap <String, ArrayList<String>>();
2540 HashMap <String,Vertex> vtxHash = new HashMap <String,Vertex>();
2541 Iterator<Vertex> pItr = passedVertList.iterator();
2542 while (pItr.hasNext()) {
2544 Vertex tvx = pItr.next();
2545 String thisVid = tvx.id().toString();
2546 vtxHash.put(thisVid, tvx);
2548 // if there are more than one vertexId mapping to the same keyProps -- they are dupes
2549 // we dont check till later since a set can contain more than 2.
2550 String hKey = getNodeKeyValString( tvx, keyPropNamesArr );
2551 if( keyVals2VidHash.containsKey(hKey) ){
2552 // We've already seen this key
2553 ArrayList <String> tmpVL = (ArrayList <String>)keyVals2VidHash.get(hKey);
2555 keyVals2VidHash.put(hKey, tmpVL);
2558 // First time for this key
2559 ArrayList <String> tmpVL = new ArrayList <String>();
2561 keyVals2VidHash.put(hKey, tmpVL);
2564 catch (Exception e) {
2565 logger.warn(" >>> Threw an error in getDupeSets4NonDepNodes - just absorb this error and move on. ", e);
2569 for( Map.Entry<String, ArrayList<String>> entry : keyVals2VidHash.entrySet() ){
2570 ArrayList <String> vidList = entry.getValue();
2572 if( !vidList.isEmpty() && vidList.size() > 1 ){
2573 // There are more than one vertex id's using the same key info
2574 ArrayList <Vertex> vertList = new ArrayList <Vertex> ();
2575 for (int i = 0; i < vidList.size(); i++) {
2576 String tmpVid = vidList.get(i);
2577 vertList.add(vtxHash.get(tmpVid));
2579 returnList.add(vertList);
2582 catch (Exception e) {
2583 logger.warn(" >>> Threw an error in getDupeSets4NonDepNodes - just absorb this error and move on. ", e);
2589 }// End of getDupeSets4NonDepNodes()
2593 * Get values of the key properties for a node as a single string
2595 * @param tvx the vertex to pull the properties from
2596 * @param keyPropNamesArr collection of key prop names
2597 * @return a String of concatenated values
2599 private static String getNodeKeyValString( Vertex tvx,
2600 ArrayList <String> keyPropNamesArr ) {
2602 String retString = "";
2603 Iterator <String> propItr = keyPropNamesArr.iterator();
2604 while( propItr.hasNext() ){
2605 String propName = propItr.next();
2607 Object propValObj = tvx.property(propName).orElse(null);
2608 retString = " " + retString + propValObj.toString();
2613 }// End of getNodeKeyValString()
2616 static private String findJustOneUsingIndex( String transId, String fromAppId,
2617 GraphTraversalSource gts, HashMap <String,Object> keyPropValsHash,
2618 String nType, Long vidAL, Long vidBL, String apiVer){
2620 // See if querying by JUST the key params (which should be indexed) brings back
2621 // ONLY one of the two vertices. Ie. the db still has a pointer to one of them
2622 // and the other one is sort of stranded.
2623 String returnVid = "";
2626 List <Vertex> tmpVertList = getNodeJustUsingKeyParams( transId, fromAppId, gts,
2627 nType, keyPropValsHash, apiVer );
2628 if( tmpVertList != null && tmpVertList.size() == 1 ){
2629 // We got just one - if it matches one of the ones we're looking
2630 // for, then return that VID
2631 Vertex tmpV = tmpVertList.get(0);
2632 String thisVid = tmpV.id().toString();
2633 if( thisVid.equals(vidAL.toString()) || thisVid.equals(vidBL.toString()) ){
2634 String msg = " vid = " + thisVid + " is one of two that the DB can retrieve directly ------";
2635 //System.out.println(msg);
2637 returnVid = thisVid;
2641 catch ( AAIException ae ){
2642 String emsg = "Error trying to get node just by key " + ae.getMessage();
2643 //System.out.println(emsg);
2649 }// End of findJustOneUsingIndex()