2 * ============LICENSE_START=======================================================
4 * ================================================================================
5 * Copyright © 2017-2018 AT&T Intellectual Property. All rights reserved.
6 * ================================================================================
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * ============LICENSE_END=========================================================
20 package org.onap.aai.dbgen;
22 import java.io.BufferedReader;
23 import java.io.BufferedWriter;
25 import java.io.FileReader;
26 import java.io.FileWriter;
27 import java.io.IOException;
28 import java.util.ArrayList;
29 import java.util.Arrays;
30 import java.util.Collection;
31 import java.util.HashMap;
32 import java.util.Iterator;
33 import java.util.LinkedHashSet;
34 import java.util.List;
36 import java.util.Map.Entry;
37 import java.util.Properties;
39 import java.util.UUID;
41 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
42 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
43 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
44 import org.apache.tinkerpop.gremlin.structure.Direction;
45 import org.apache.tinkerpop.gremlin.structure.Edge;
46 import org.apache.tinkerpop.gremlin.structure.Graph;
47 import org.apache.tinkerpop.gremlin.structure.Property;
48 import org.apache.tinkerpop.gremlin.structure.Vertex;
49 import org.apache.tinkerpop.gremlin.structure.VertexProperty;
50 import org.onap.aai.db.props.AAIProperties;
51 import org.onap.aai.dbmap.AAIGraph;
52 import org.onap.aai.dbmap.AAIGraphConfig;
53 import org.onap.aai.exceptions.AAIException;
54 import org.onap.aai.introspection.Introspector;
55 import org.onap.aai.introspection.Loader;
56 import org.onap.aai.introspection.LoaderFactory;
57 import org.onap.aai.introspection.ModelType;
58 import org.onap.aai.introspection.exceptions.AAIUnknownObjectException;
59 import org.onap.aai.logging.ErrorLogHelper;
60 import org.onap.aai.logging.LogFormatTools;
61 import org.onap.aai.logging.LoggingContext;
62 import org.onap.aai.serialization.db.AAIDirection;
63 import org.onap.aai.serialization.db.EdgeProperty;
64 import org.onap.aai.util.*;
65 import org.onap.aai.logging.LoggingContext.StatusCode;
67 import com.att.eelf.configuration.Configuration;
68 import com.att.eelf.configuration.EELFLogger;
69 import com.att.eelf.configuration.EELFManager;
70 import org.janusgraph.core.JanusGraphFactory;
71 import org.janusgraph.core.JanusGraph;
74 public class DataGrooming {
76 public static final String AAI_EXCEPTION_NUMBER = "AAI_6101";
77 private static EELFLogger logger;
78 private static final String FROMAPPID = "AAI-DB";
79 private static final String TRANSID = UUID.randomUUID().toString();
80 private static int dupeGrpsDeleted = 0;
81 private static final String AAI_NODE_TYPE = "aai-node-type";
82 private static final String KEEP_VID_UNDETERMINED ="KeepVid=UNDETERMINED";
87 * @param args the arguments
89 public static void main(String[] args) {
91 // Set the logging file properties to be used by EELFManager
92 System.setProperty("aai.service.name", DataGrooming.class.getSimpleName());
93 Properties props = System.getProperties();
94 props.setProperty(Configuration.PROPERTY_LOGGING_FILE_NAME, AAIConstants.AAI_DATA_GROOMING_LOGBACK_PROPS);
95 props.setProperty(Configuration.PROPERTY_LOGGING_FILE_PATH, AAIConstants.AAI_HOME_ETC_APP_PROPERTIES);
96 logger = EELFManager.getInstance().getLogger(DataGrooming.class);
97 String ver = "version"; // Placeholder
98 Boolean doAutoFix = false;
99 Boolean edgesOnlyFlag = false;
100 Boolean dontFixOrphansFlag = false;
101 Boolean skipHostCheck = false;
102 Boolean singleCommits = false;
103 Boolean dupeCheckOff = false;
104 Boolean dupeFixOn = false;
105 Boolean ghost2CheckOff = false;
106 Boolean ghost2FixOn = false;
107 Boolean neverUseCache = false;
108 Boolean skipEdgeCheckFlag = false;
110 LoggingContext.init();
111 LoggingContext.partnerName(FROMAPPID);
112 LoggingContext.serviceName(AAIConstants.AAI_RESOURCES_MS);
113 LoggingContext.component("dataGrooming");
114 LoggingContext.targetEntity(AAIConstants.AAI_RESOURCES_MS);
115 LoggingContext.targetServiceName("main");
116 LoggingContext.requestId(TRANSID);
117 LoggingContext.statusCode(StatusCode.COMPLETE);
118 LoggingContext.responseCode(LoggingContext.SUCCESS);
120 int timeWindowMinutes = 0; // A value of 0 means that we will not have a time-window -- we will look
121 // at all nodes of the passed-in nodeType.
124 int maxRecordsToFix = AAIConstants.AAI_GROOMING_DEFAULT_MAX_FIX;
125 int sleepMinutes = AAIConstants.AAI_GROOMING_DEFAULT_SLEEP_MINUTES;
127 String maxFixStr = AAIConfig.get("aai.grooming.default.max.fix");
128 if( maxFixStr != null && !maxFixStr.isEmpty() ){
129 maxRecordsToFix = Integer.parseInt(maxFixStr);
131 String sleepStr = AAIConfig.get("aai.grooming.default.sleep.minutes");
132 if( sleepStr != null && !sleepStr.isEmpty() ){
133 sleepMinutes = Integer.parseInt(sleepStr);
136 catch ( Exception e ){
137 // Don't worry, we'll just use the defaults that we got from AAIConstants
138 logger.warn("WARNING - could not pick up aai.grooming values from aaiconfig.properties file. ");
141 String prevFileName = "";
143 FormatDate fd = new FormatDate("yyyyMMddHHmm", "GMT");
144 String dteStr = fd.getDateTime();
146 if (args.length > 0) {
147 // They passed some arguments in that will affect processing
149 List<String> arguments = Arrays.asList(args);
150 Iterator<String> argsIterator = arguments.iterator();
151 while (argsIterator.hasNext()){
152 String argument = argsIterator.next();
156 edgesOnlyFlag = true;
161 case "-skipHostCheck":
162 skipHostCheck = true;
164 case "-dontFixOrphans":
165 dontFixOrphansFlag = true;
167 case "-singleCommits":
168 singleCommits = true;
170 case "-dupeCheckOff":
176 case "-ghost2CheckOff":
177 ghost2CheckOff = true;
179 case "-neverUseCache":
180 neverUseCache = true;
185 case "-skipEdgeChecks":
186 skipEdgeCheckFlag = true;
189 handleNoPassedArgument(argsIterator, argument);
190 nextArg = argsIterator.next();
191 maxRecordsToFix = handleNumericArgument(maxRecordsToFix, nextArg, argument);
193 case "-sleepMinutes":
194 handleNoPassedArgument(argsIterator, argument);
195 nextArg = argsIterator.next();
196 sleepMinutes = handleNumericArgument(sleepMinutes, nextArg, argument);
198 case "-timeWindowMinutes":
199 handleNoPassedArgument(argsIterator, argument);
200 nextArg = argsIterator.next();
201 timeWindowMinutes = handleNumericArgument(timeWindowMinutes, nextArg, argument);
204 handleNoPassedArgument(argsIterator, argument);
205 prevFileName = argsIterator.next();
208 handleUnrecognizedArguments(argument);
213 String windowTag = "FULL";
214 if( timeWindowMinutes > 0 ){
215 windowTag = "PARTIAL";
217 String groomOutFileName = "dataGrooming." + windowTag + "." + dteStr + ".out";
220 LoaderFactory.createLoaderForVersion(ModelType.MOXY, AAIProperties.LATEST);
223 catch (Exception ex){
224 LoggingContext.statusCode(StatusCode.ERROR);
225 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
226 logger.error("ERROR - Could not create loader " + LogFormatTools.getStackTop(ex));
227 AAISystemExitUtil.systemExitCloseAAIGraph(1);
231 logger.info(" We will skip the HostCheck as requested. ");
235 if (!prevFileName.isEmpty()) {
236 // They are trying to fix some data based on a data in a
238 logger.info(" Call doTheGrooming() with a previous fileName ["
239 + prevFileName + "] for cleanup. ");
240 Boolean finalShutdownFlag = true;
241 Boolean cacheDbOkFlag = false;
242 doTheGrooming(prevFileName, edgesOnlyFlag, dontFixOrphansFlag,
243 maxRecordsToFix, groomOutFileName, ver, singleCommits,
244 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
245 finalShutdownFlag, cacheDbOkFlag,
246 skipEdgeCheckFlag, timeWindowMinutes);
247 } else if (doAutoFix) {
248 // They want us to run the processing twice -- first to look for
249 // delete candidates, then after
250 // napping for a while, run it again and delete any candidates
251 // that were found by the first run.
252 // Note: we will produce a separate output file for each of the
254 logger.info(" Doing an auto-fix call to Grooming. ");
255 logger.info(" First, Call doTheGrooming() to look at what's out there. ");
256 Boolean finalShutdownFlag = false;
257 Boolean cacheDbOkFlag = true;
258 int fixCandCount = doTheGrooming("", edgesOnlyFlag,
259 dontFixOrphansFlag, maxRecordsToFix, groomOutFileName,
260 ver, singleCommits, dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
261 finalShutdownFlag, cacheDbOkFlag,
262 skipEdgeCheckFlag, timeWindowMinutes);
263 if (fixCandCount == 0) {
264 logger.info(" No fix-Candidates were found by the first pass, so no second/fix-pass is needed. ");
266 // We'll sleep a little and then run a fix-pass based on the
267 // first-run's output file.
269 logger.info("About to sleep for " + sleepMinutes
271 int sleepMsec = sleepMinutes * 60 * 1000;
272 Thread.sleep(sleepMsec);
273 } catch (InterruptedException ie) {
274 logger.info("\n >>> Sleep Thread has been Interrupted <<< ");
275 AAISystemExitUtil.systemExitCloseAAIGraph(0);
278 dteStr = fd.getDateTime();
279 String secondGroomOutFileName = "dataGrooming." + dteStr
281 logger.info(" Now, call doTheGrooming() a second time and pass in the name of the file "
282 + "generated by the first pass for fixing: ["
283 + groomOutFileName + "]");
284 finalShutdownFlag = true;
285 cacheDbOkFlag = false;
286 doTheGrooming(groomOutFileName, edgesOnlyFlag,
287 dontFixOrphansFlag, maxRecordsToFix,
288 secondGroomOutFileName, ver, singleCommits,
289 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
290 finalShutdownFlag, cacheDbOkFlag,
291 skipEdgeCheckFlag, timeWindowMinutes);
294 // Do the grooming - plain vanilla (no fix-it-file, no
296 Boolean finalShutdownFlag = true;
297 logger.info(" Call doTheGrooming() ");
298 Boolean cacheDbOkFlag = true;
300 // They have forbidden us from using a cached db connection.
301 cacheDbOkFlag = false;
303 doTheGrooming("", edgesOnlyFlag, dontFixOrphansFlag,
304 maxRecordsToFix, groomOutFileName, ver, singleCommits,
305 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
306 finalShutdownFlag, cacheDbOkFlag,
307 skipEdgeCheckFlag, timeWindowMinutes);
309 } catch (Exception ex) {
310 LoggingContext.statusCode(StatusCode.ERROR);
311 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
312 logger.error("Exception while grooming data " + LogFormatTools.getStackTop(ex));
315 logger.info(" Done! ");
316 AAISystemExitUtil.systemExitCloseAAIGraph(0);
320 private static void handleUnrecognizedArguments(String argument) {
321 LoggingContext.statusCode(StatusCode.ERROR);
322 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
323 logger.error(" Unrecognized argument passed to DataGrooming: ["
326 " Valid values are: -f -autoFix -maxFix -edgesOnly -skipEdgeChecks -dupeFixOn -donFixOrphans -timeWindowMinutes -sleepMinutes -neverUseCache");
327 AAISystemExitUtil.systemExitCloseAAIGraph(0);
330 private static int handleNumericArgument(int numericArgumentValue, String nextArg, String argument) {
332 numericArgumentValue = Integer.parseInt(nextArg);
333 } catch (Exception e) {
334 LoggingContext.statusCode(StatusCode.ERROR);
335 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
336 logger.error("Bad value passed with" + argument + "option: ["
338 AAISystemExitUtil.systemExitCloseAAIGraph(0);
340 return numericArgumentValue;
343 private static void handleNoPassedArgument(Iterator argsIterator, String argument) {
344 if (!argsIterator.hasNext()) {
345 LoggingContext.statusCode(StatusCode.ERROR);
346 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
347 logger.error("No value passed with" + argument + "option.");
348 AAISystemExitUtil.systemExitCloseAAIGraph(0);
355 * @param fileNameForFixing the file name for fixing
356 * @param edgesOnlyFlag the edges only flag
357 * @param dontFixOrphansFlag the dont fix orphans flag
358 * @param maxRecordsToFix the max records to fix
359 * @param groomOutFileName the groom out file name
360 * @param version the version
361 * @param singleCommits the single commits
362 * @param dupeCheckOff the dupe check off
363 * @param dupeFixOn the dupe fix on
364 * @param ghost2CheckOff the ghost 2 check off
365 * @param ghost2FixOn the ghost 2 fix on
366 * @param finalShutdownFlag the final shutdown flag
367 * @param cacheDbOkFlag the cacheDbOk flag
370 private static int doTheGrooming(String fileNameForFixing,
371 Boolean edgesOnlyFlag, Boolean dontFixOrphansFlag,
372 int maxRecordsToFix, String groomOutFileName, String version,
373 Boolean singleCommits,
374 Boolean dupeCheckOff, Boolean dupeFixOn,
375 Boolean ghost2CheckOff, Boolean ghost2FixOn,
376 Boolean finalShutdownFlag, Boolean cacheDbOkFlag,
377 Boolean skipEdgeCheckFlag, int timeWindowMinutes) {
379 logger.debug(" Entering doTheGrooming \n");
381 int cleanupCandidateCount = 0;
382 long windowStartTime = 0; // Translation of the window into a starting timestamp
383 BufferedWriter bw = null;
384 JanusGraph graph = null;
385 JanusGraph graph2 = null;
387 boolean executeFinalCommit = false;
388 Set<String> deleteCandidateList = new LinkedHashSet<>();
389 Set<String> processedVertices = new LinkedHashSet<>();
393 if( timeWindowMinutes > 0 ){
394 // Translate the window value (ie. 30 minutes) into a unix timestamp like
395 // we use in the db - so we can select data created after that time.
396 windowStartTime = figureWindowStartTime( timeWindowMinutes );
400 String targetDir = AAIConstants.AAI_HOME + AAIConstants.AAI_FILESEP
401 + "logs" + AAIConstants.AAI_FILESEP + "data"
402 + AAIConstants.AAI_FILESEP + "dataGrooming";
404 // Make sure the target directory exists
405 new File(targetDir).mkdirs();
407 if (!fileNameForFixing.isEmpty()) {
408 deleteCandidateList = getDeleteList(targetDir,
409 fileNameForFixing, edgesOnlyFlag, dontFixOrphansFlag,
413 if (deleteCandidateList.size() > maxRecordsToFix) {
414 LoggingContext.statusCode(StatusCode.ERROR);
415 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
416 logger.warn(" >> WARNING >> Delete candidate list size ("
417 + deleteCandidateList.size()
418 + ") is too big. The maxFix we are using is: "
420 + ". No candidates will be deleted. ");
421 // Clear out the list so it won't be processed below.
422 deleteCandidateList = new LinkedHashSet<>();
425 String fullOutputFileName = targetDir + AAIConstants.AAI_FILESEP
427 File groomOutFile = new File(fullOutputFileName);
429 groomOutFile.createNewFile();
430 } catch (IOException e) {
431 String emsg = " Problem creating output file ["
432 + fullOutputFileName + "], exception=" + e.getMessage();
433 throw new AAIException("AAI_6124", emsg);
436 logger.info(" Will write to " + fullOutputFileName );
437 bw = new BufferedWriter(new FileWriter(groomOutFile.getAbsoluteFile()));
438 ErrorLogHelper.loadProperties();
440 logger.info(" ---- NOTE --- about to open graph (takes a little while)--------\n");
443 // Since we're just reading (not deleting/fixing anything), we can use
444 // a cached connection to the DB
445 graph = JanusGraphFactory.open(new AAIGraphConfig.Builder(AAIConstants.CACHED_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("cached").buildConfiguration());
448 graph = JanusGraphFactory.open(new AAIGraphConfig.Builder(AAIConstants.REALTIME_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("realtime1").buildConfiguration());
451 String emsg = "null graph object in DataGrooming\n";
452 throw new AAIException(AAI_EXCEPTION_NUMBER, emsg);
455 logger.debug(" Got the graph object. ");
457 g = graph.newTransaction();
459 String emsg = "null graphTransaction object in DataGrooming\n";
460 throw new AAIException(AAI_EXCEPTION_NUMBER, emsg);
462 GraphTraversalSource source1 = g.traversal();
464 ArrayList<String> errArr = new ArrayList<>();
465 int totalNodeCount = 0;
466 HashMap<String, String> misMatchedHash = new HashMap<>();
467 HashMap<String, Vertex> orphanNodeHash = new HashMap<>();
468 HashMap<String, Vertex> missingDepNodeHash = new HashMap<>();
469 HashMap<String, Edge> oneArmedEdgeHash = new HashMap<>();
470 HashMap<String, String> emptyVertexHash = new HashMap<>();
471 HashMap<String, Vertex> ghostNodeHash = new HashMap<>();
472 ArrayList<String> dupeGroups = new ArrayList<>();
474 Loader loader = LoaderFactory.createLoaderForVersion(ModelType.MOXY, AAIProperties.LATEST);
476 Set<Entry<String, Introspector>> entrySet = loader.getAllObjects().entrySet();
479 logger.info(" Starting DataGrooming Processing ");
482 logger.info(" NOTE >> Skipping Node processing as requested. Will only process Edges. << ");
485 for (Entry<String, Introspector> entry : entrySet) {
486 String nType = entry.getKey();
488 int thisNtDeleteCount = 0;
490 logger.debug(" > Look at : [" + nType + "] ...");
491 ntList = ntList + "," + nType;
493 // Get a collection of the names of the key properties for this nodeType to use later
494 // Determine what the key fields are for this nodeType - use an arrayList so they
495 // can be gotten out in a consistent order.
496 Set <String> keyPropsSet = entry.getValue().getKeys();
497 ArrayList <String> keyProps = new ArrayList<>();
498 keyProps.addAll(keyPropsSet);
500 // Get the types of nodes that this nodetype depends on for uniqueness (if any)
501 Collection <String> depNodeTypes = loader.introspectorFromName(nType).getDependentOn();
503 // Loop through all the nodes of this Node type
504 int lastShownForNt = 0;
505 ArrayList <Vertex> tmpList = new ArrayList <> ();
506 Iterator <Vertex> iterv = source1.V().has(AAI_NODE_TYPE,nType);
507 while (iterv.hasNext()) {
508 // We put the nodes into an ArrayList because the graph.query iterator can time out
509 tmpList.add(iterv.next());
512 Iterator <Vertex> iter = tmpList.iterator();
513 while (iter.hasNext()) {
516 if( thisNtCount == lastShownForNt + 250 ){
517 lastShownForNt = thisNtCount;
518 logger.debug("count for " + nType + " so far = " + thisNtCount );
520 Vertex thisVtx = iter.next();
521 if( windowStartTime > 0 ){
522 // They are using the time-window, so we only want nodes that are updated after a
523 // passed-in timestamp OR that have no last-modified-timestamp which means they are suspicious.
524 Object objModTimeStamp = thisVtx.property("aai-last-mod-ts").orElse(null);
525 if( objModTimeStamp != null ){
526 long thisNodeModTime = (long)objModTimeStamp;
527 if( thisNodeModTime < windowStartTime ){
528 // It has a last modified ts and is NOT in our window, so we can pass over it
534 String thisVid = thisVtx.id().toString();
535 if (processedVertices.contains(thisVid)) {
536 logger.debug("skipping already processed vertex: " + thisVid);
540 List <Vertex> secondGetList = new ArrayList <> ();
541 // -----------------------------------------------------------------------
542 // For each vertex of this nodeType, we want to:
543 // a) make sure that it can be retrieved using it's AAI defined key
544 // b) make sure that it is not a duplicate
545 // -----------------------------------------------------------------------
547 // For this instance of this nodeType, get the key properties
548 HashMap<String, Object> propHashWithKeys = new HashMap<>();
549 Iterator<String> keyPropI = keyProps.iterator();
550 while (keyPropI.hasNext()) {
551 String propName = keyPropI.next();
553 //delete an already deleted vertex
554 Object obj = thisVtx.<Object>property(propName).orElse(null);
556 propVal = obj.toString();
558 propHashWithKeys.put(propName, propVal);
561 // If this node is dependent on another for uniqueness, then do the query from that parent node
562 // Note - all of our nodes that are dependent on others for uniqueness are
563 // "children" of that node.
564 boolean depNodeOk = true;
565 if( depNodeTypes.isEmpty() ){
566 // This kind of node is not dependent on any other.
567 // Make sure we can get it back using it's key properties (that is the
568 // phantom checking) and that we only get one. Note - we also need
569 // to collect data for a second type of dupe-checking which is done later.
570 secondGetList = getNodeJustUsingKeyParams( TRANSID, FROMAPPID, source1, nType,
571 propHashWithKeys, version );
574 // This kind of node is dependent on another for uniqueness.
575 // Start at it's parent (the dependent vertex) and make sure we can get it
576 // back using it's key properties and that we only get one.
577 Iterator <Vertex> vertI2 = source1.V(thisVtx).union(__.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).outV(), __.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).inV());
578 Vertex parentVtx = null;
580 while( vertI2 != null && vertI2.hasNext() ){
581 parentVtx = vertI2.next();
586 // It's Missing it's dependent/parent node
588 boolean zeroEdges = false;
590 Iterator<Edge> tmpEdgeIter = thisVtx.edges(Direction.BOTH);
592 while( tmpEdgeIter.hasNext() ){
596 if( edgeCount == 0 ){
599 } catch (Exception ex) {
600 LoggingContext.statusCode(StatusCode.ERROR);
601 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
602 logger.warn("WARNING from inside the for-each-vid-loop orphan-edges-check " + LogFormatTools.getStackTop(ex) );
605 if (deleteCandidateList.contains(thisVid)) {
606 boolean okFlag = true;
608 processedVertices.add(thisVtx.id().toString());
612 } catch (Exception e) {
614 LoggingContext.statusCode(StatusCode.ERROR);
615 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
616 logger.error("ERROR trying to delete missing-dep-node VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
619 logger.info(" DELETED missing-dep-node VID = " + thisVid);
622 // We count nodes missing their depNodes two ways - the first if it has
623 // at least some edges, and the second if it has zero edges. Either
624 // way, they are effectively orphaned.
625 // NOTE - Only nodes that have dependent nodes are ever considered "orphaned".
627 missingDepNodeHash.put(thisVid, thisVtx);
630 orphanNodeHash.put(thisVid, thisVtx);
634 else if ( pCount > 1 ){
635 // Not sure how this could happen? Should we do something here?
639 // We found the parent - so use it to do the second-look.
640 // NOTE --- We're just going to do the same check from the other direction - because
641 // there could be duplicates or the pointer going the other way could be broken
642 ArrayList <Vertex> tmpListSec = new ArrayList <> ();
644 tmpListSec = getConnectedChildrenOfOneType( source1, parentVtx, nType ) ;
645 Iterator<Vertex> vIter = tmpListSec.iterator();
646 while (vIter.hasNext()) {
647 Vertex tmpV = vIter.next();
648 if( vertexHasTheseKeys(tmpV, propHashWithKeys) ){
649 secondGetList.add(tmpV);
653 }// end of -- else this is a dependent node -- piece
655 if( depNodeOk && (secondGetList == null || secondGetList.isEmpty()) ){
656 // We could not get the node back using it's own key info.
657 // So, it's a PHANTOM
658 if (deleteCandidateList.contains(thisVid)) {
659 boolean okFlag = true;
664 } catch (Exception e) {
666 LoggingContext.statusCode(StatusCode.ERROR);
667 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
668 logger.error("ERROR trying to delete phantom VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
671 logger.info(" DELETED VID = " + thisVid);
674 ghostNodeHash.put(thisVid, thisVtx);
677 else if( (secondGetList.size() > 1) && depNodeOk && !dupeCheckOff ){
678 // Found some DUPLICATES - need to process them
679 logger.info(" - now check Dupes for this guy - ");
680 List<String> tmpDupeGroups = checkAndProcessDupes(
681 TRANSID, FROMAPPID, g, source1, version,
682 nType, secondGetList, dupeFixOn,
683 deleteCandidateList, singleCommits, dupeGroups, loader);
684 Iterator<String> dIter = tmpDupeGroups.iterator();
685 while (dIter.hasNext()) {
686 // Add in any newly found dupes to our running list
687 String tmpGrp = dIter.next();
688 logger.info("Found set of dupes: [" + tmpGrp + "]");
689 dupeGroups.add(tmpGrp);
693 catch (AAIException e1) {
694 LoggingContext.statusCode(StatusCode.ERROR);
695 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
696 logger.warn(" For nodeType = " + nType + " Caught exception", e1);
697 errArr.add(e1.getErrorObject().toString());
699 catch (Exception e2) {
700 LoggingContext.statusCode(StatusCode.ERROR);
701 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
702 logger.warn(" For nodeType = " + nType
703 + " Caught exception", e2);
704 errArr.add(e2.getMessage());
706 }// try block to enclose looping over each single vertex
707 catch (Exception exx) {
708 LoggingContext.statusCode(StatusCode.ERROR);
709 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
710 logger.warn("WARNING from inside the while-verts-loop ", exx);
713 } // while loop for each record of a nodeType
715 if( depNodeTypes.isEmpty() && !dupeCheckOff ){
716 // For this nodeType, we haven't looked at the possibility of a
717 // non-dependent node where two verts have same key info
718 ArrayList<ArrayList<Vertex>> nonDependentDupeSets = new ArrayList<ArrayList<Vertex>>();
719 nonDependentDupeSets = getDupeSets4NonDepNodes(
720 TRANSID, FROMAPPID, g,
721 version, nType, tmpList,
723 // For each set found (each set is for a unique instance of key-values),
724 // process the dupes found
725 Iterator<ArrayList<Vertex>> dsItr = nonDependentDupeSets.iterator();
726 while( dsItr.hasNext() ){
727 ArrayList<Vertex> dupeList = dsItr.next();
728 logger.info(" - now check Dupes for some non-dependent guys - ");
729 List<String> tmpDupeGroups = checkAndProcessDupes(
730 TRANSID, FROMAPPID, g, source1, version,
731 nType, dupeList, dupeFixOn,
732 deleteCandidateList, singleCommits, dupeGroups, loader);
733 Iterator<String> dIter = tmpDupeGroups.iterator();
734 while (dIter.hasNext()) {
735 // Add in any newly found dupes to our running list
736 String tmpGrp = dIter.next();
737 logger.info("Found set of dupes: [" + tmpGrp + "]");
738 dupeGroups.add(tmpGrp);
742 }// end of extra dupe check for non-dependent nodes
744 if ( (thisNtDeleteCount > 0) && singleCommits ) {
745 // NOTE - the singleCommits option is not used in normal processing
747 g = AAIGraph.getInstance().getGraph().newTransaction();
750 thisNtDeleteCount = 0;
751 logger.info( " Processed " + thisNtCount + " records for [" + nType + "], " + totalNodeCount + " total overall. " );
753 }// While-loop for each node type
755 }// end of check to make sure we weren't only supposed to do edges
758 if( !skipEdgeCheckFlag ){
759 // --------------------------------------------------------------------------------------
760 // Now, we're going to look for one-armed-edges. Ie. an edge that
762 // been deleted (because a vertex on one side was deleted) but
763 // somehow was not deleted.
764 // So the one end of it points to a vertexId -- but that vertex is
766 // --------------------------------------------------------------------------------------
768 // To do some strange checking - we need a second graph object
769 logger.debug(" ---- DEBUG --- about to open a SECOND graph (takes a little while)--------\n");
770 // Note - graph2 just reads - but we want it to use a fresh connection to
771 // the database, so we are NOT using the CACHED DB CONFIG here.
772 graph2 = JanusGraphFactory.open(new AAIGraphConfig.Builder(AAIConstants.REALTIME_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("realtime2").buildConfiguration());
773 if (graph2 == null) {
774 String emsg = "null graph2 object in DataGrooming\n";
775 throw new AAIException(AAI_EXCEPTION_NUMBER, emsg);
777 logger.debug("Got the graph2 object... \n");
779 g2 = graph2.newTransaction();
781 String emsg = "null graphTransaction2 object in DataGrooming\n";
782 throw new AAIException(AAI_EXCEPTION_NUMBER, emsg);
785 ArrayList<Vertex> vertList = new ArrayList<>();
786 Iterator<Vertex> vItor3 = g.traversal().V();
787 // Gotta hold these in a List - or else HBase times out as you cycle
789 while (vItor3.hasNext()) {
790 Vertex v = vItor3.next();
795 Iterator<Vertex> vItor2 = vertList.iterator();
796 logger.info(" Checking for bad edges --- ");
798 while (vItor2.hasNext()) {
803 } catch (Exception vex) {
804 LoggingContext.statusCode(StatusCode.ERROR);
805 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
806 logger.warn(">>> WARNING trying to get next vertex on the vItor2 ");
811 String thisVertId = "";
813 thisVertId = v.id().toString();
814 } catch (Exception ev) {
815 LoggingContext.statusCode(StatusCode.ERROR);
816 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
817 logger.warn("WARNING when doing getId() on a vertex from our vertex list. ");
820 if (ghostNodeHash.containsKey(thisVertId)) {
821 // This is a phantom node, so don't try to use it
822 logger.info(" >> Skipping edge check for edges from vertexId = "
824 + ", since that guy is a Phantom Node");
828 if( windowStartTime > 0 ){
829 // They are using the time-window, so we only want nodes that are updated after a
830 // passed-in timestamp OR that have no last-modified-timestamp which means they are suspicious.
831 Object objModTimeStamp = v.property("aai-last-mod-ts").orElse(null);
832 if( objModTimeStamp != null ){
833 long thisNodeModTime = (long)objModTimeStamp;
834 if( thisNodeModTime < windowStartTime ){
835 // It has a last modified ts and is NOT in our window, so we can pass over it
841 if (counter == lastShown + 250) {
843 logger.info("... Checking edges for vertex # "
846 Iterator<Edge> eItor = v.edges(Direction.BOTH);
847 while (eItor.hasNext()) {
853 } catch (Exception iex) {
854 LoggingContext.statusCode(StatusCode.ERROR);
855 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
856 logger.warn(">>> WARNING trying to get next edge on the eItor ", iex);
862 } catch (Exception err) {
863 LoggingContext.statusCode(StatusCode.ERROR);
864 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
865 logger.warn(">>> WARNING trying to get edge's In-vertex ", err);
869 Vertex ghost2 = null;
871 Boolean keysMissing = true;
872 Boolean cantGetUsingVid = false;
875 Object ob = vIn.<Object>property(AAI_NODE_TYPE).orElse(null);
877 vNtI = ob.toString();
878 keysMissing = anyKeyFieldsMissing(vNtI, vIn, loader);
883 vIdI = ob.toString();
884 vIdLong = Long.parseLong(vIdI);
887 if( ! ghost2CheckOff ){
888 Vertex connectedVert = g2.traversal().V(vIdLong).next();
889 if( connectedVert == null ) {
890 LoggingContext.statusCode(StatusCode.ERROR);
891 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
892 logger.warn( "GHOST2 -- got NULL when doing getVertex for vid = " + vIdLong);
893 cantGetUsingVid = true;
895 // If we can NOT get this ghost with the SECOND graph-object,
896 // it is still a ghost since even though we can get data about it using the FIRST graph
899 ghost2 = g.traversal().V(vIdLong).next();
901 catch( Exception ex){
902 LoggingContext.statusCode(StatusCode.ERROR);
903 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
904 logger.warn( "GHOST2 -- Could not get the ghost info for a bad edge for vtxId = " + vIdLong, ex);
906 if( ghost2 != null ){
907 ghostNodeHash.put(vIdI, ghost2);
910 }// end of the ghost2 checking
912 catch (Exception err) {
913 LoggingContext.statusCode(StatusCode.ERROR);
914 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
915 logger.warn(">>> WARNING trying to get edge's In-vertex props ", err);
918 if (keysMissing || vIn == null || vNtI.equals("")
919 || cantGetUsingVid) {
920 // this is a bad edge because it points to a vertex
921 // that isn't there anymore or is corrupted
922 String thisEid = e.id().toString();
923 if (deleteCandidateList.contains(thisEid) || deleteCandidateList.contains(vIdI)) {
924 boolean okFlag = true;
925 if (!vIdI.equals("")) {
926 // try to get rid of the corrupted vertex
928 if( (ghost2 != null) && ghost2FixOn ){
935 // NOTE - the singleCommits option is not used in normal processing
937 g = AAIGraph.getInstance().getGraph().newTransaction();
940 } catch (Exception e1) {
942 LoggingContext.statusCode(StatusCode.ERROR);
943 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
944 logger.warn("WARNING when trying to delete bad-edge-connected VERTEX VID = "
948 logger.info(" DELETED vertex from bad edge = "
952 // remove the edge if we couldn't get the
957 // NOTE - the singleCommits option is not used in normal processing
959 g = AAIGraph.getInstance().getGraph().newTransaction();
962 } catch (Exception ex) {
963 // NOTE - often, the exception is just
964 // that this edge has already been
967 LoggingContext.statusCode(StatusCode.ERROR);
968 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
969 logger.warn("WARNING when trying to delete edge = "
973 logger.info(" DELETED edge = " + thisEid);
977 oneArmedEdgeHash.put(thisEid, e);
978 if ((vIn != null) && (vIn.id() != null)) {
979 emptyVertexHash.put(thisEid, vIn.id()
986 vOut = e.outVertex();
987 } catch (Exception err) {
988 LoggingContext.statusCode(StatusCode.ERROR);
989 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
990 logger.warn(">>> WARNING trying to get edge's Out-vertex ");
996 cantGetUsingVid = false;
999 Object ob = vOut.<Object>property(AAI_NODE_TYPE).orElse(null);
1001 vNtO = ob.toString();
1002 keysMissing = anyKeyFieldsMissing(vNtO,
1008 vIdO = ob.toString();
1009 vIdLong = Long.parseLong(vIdO);
1012 if( ! ghost2CheckOff ){
1013 Vertex connectedVert = g2.traversal().V(vIdLong).next();
1014 if( connectedVert == null ) {
1015 cantGetUsingVid = true;
1016 logger.info( "GHOST2 -- got NULL when doing getVertex for vid = " + vIdLong);
1017 // If we can get this ghost with the other graph-object, then get it -- it's still a ghost
1019 ghost2 = g.traversal().V(vIdLong).next();
1021 catch( Exception ex){
1022 LoggingContext.statusCode(StatusCode.ERROR);
1023 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1024 logger.warn( "GHOST2 -- Could not get the ghost info for a bad edge for vtxId = " + vIdLong, ex);
1026 if( ghost2 != null ){
1027 ghostNodeHash.put(vIdO, ghost2);
1031 } catch (Exception err) {
1032 LoggingContext.statusCode(StatusCode.ERROR);
1033 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1034 logger.warn(">>> WARNING trying to get edge's Out-vertex props ", err);
1037 if (keysMissing || vOut == null || vNtO.isEmpty()
1038 || cantGetUsingVid) {
1039 // this is a bad edge because it points to a vertex
1040 // that isn't there anymore
1041 String thisEid = e.id().toString();
1042 if (deleteCandidateList.contains(thisEid) || deleteCandidateList.contains(vIdO)) {
1043 boolean okFlag = true;
1044 if (!vIdO.isEmpty()) {
1045 // try to get rid of the corrupted vertex
1047 if( (ghost2 != null) && ghost2FixOn ){
1050 else if (vOut != null) {
1053 if (singleCommits) {
1054 // NOTE - the singleCommits option is not used in normal processing
1056 g = AAIGraph.getInstance().getGraph().newTransaction();
1059 } catch (Exception e1) {
1061 LoggingContext.statusCode(StatusCode.ERROR);
1062 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1063 logger.warn("WARNING when trying to delete bad-edge-connected VID = "
1067 logger.info(" DELETED vertex from bad edge = "
1071 // remove the edge if we couldn't get the
1075 if (singleCommits) {
1076 // NOTE - the singleCommits option is not used in normal processing
1078 g = AAIGraph.getInstance().getGraph().newTransaction();
1081 } catch (Exception ex) {
1082 // NOTE - often, the exception is just
1083 // that this edge has already been
1086 LoggingContext.statusCode(StatusCode.ERROR);
1087 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1088 logger.warn("WARNING when trying to delete edge = "
1092 logger.info(" DELETED edge = " + thisEid);
1096 oneArmedEdgeHash.put(thisEid, e);
1097 if ((vOut != null) && (vOut.id() != null)) {
1098 emptyVertexHash.put(thisEid, vOut.id()
1103 }// End of while-edges-loop
1104 } catch (Exception exx) {
1105 LoggingContext.statusCode(StatusCode.ERROR);
1106 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1107 logger.warn("WARNING from in the while-verts-loop ", exx);
1109 }// End of while-vertices-loop (the edge-checking)
1110 } // end of -- if we're not skipping the edge-checking
1113 deleteCount = deleteCount + dupeGrpsDeleted;
1114 if (!singleCommits && deleteCount > 0) {
1116 logger.info("About to do the commit for "
1117 + deleteCount + " removes. ");
1118 executeFinalCommit = true;
1119 logger.info("Commit was successful ");
1120 } catch (Exception excom) {
1121 LoggingContext.statusCode(StatusCode.ERROR);
1122 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1123 logger.error(" >>>> ERROR <<<< Could not commit changes. " + LogFormatTools.getStackTop(excom));
1128 int ghostNodeCount = ghostNodeHash.size();
1129 int orphanNodeCount = orphanNodeHash.size();
1130 int missingDepNodeCount = missingDepNodeHash.size();
1131 int oneArmedEdgeCount = oneArmedEdgeHash.size();
1132 int dupeCount = dupeGroups.size();
1134 deleteCount = deleteCount + dupeGrpsDeleted;
1136 bw.write("\n\n ============ Summary ==============\n");
1137 if( timeWindowMinutes == 0 ){
1138 bw.write("Ran FULL data grooming (no time-window). \n");
1141 bw.write("Ran PARTIAL data grooming just looking at data added/updated in the last " + timeWindowMinutes + " minutes. \n");
1144 bw.write("\nRan these nodeTypes: " + ntList + "\n\n");
1145 bw.write("There were this many delete candidates from previous run = "
1146 + deleteCandidateList.size() + "\n");
1147 if (dontFixOrphansFlag) {
1148 bw.write(" Note - we are not counting orphan nodes since the -dontFixOrphans parameter was used. \n");
1150 bw.write("Deleted this many delete candidates = " + deleteCount
1152 bw.write("Total number of nodes looked at = " + totalNodeCount
1154 bw.write("Ghost Nodes identified = " + ghostNodeCount + "\n");
1155 bw.write("Orphan Nodes identified = " + orphanNodeCount + "\n");
1156 bw.write("Bad Edges identified = " + oneArmedEdgeCount + "\n");
1157 bw.write("Missing Dependent Edge (but not orphaned) node count = "
1158 + missingDepNodeCount + "\n");
1159 bw.write("Duplicate Groups count = " + dupeCount + "\n");
1160 bw.write("MisMatching Label/aai-node-type count = "
1161 + misMatchedHash.size() + "\n");
1163 bw.write("\n ------------- Delete Candidates ---------\n");
1164 for (Map.Entry<String, Vertex> entry : ghostNodeHash
1166 String vid = entry.getKey();
1167 bw.write("DeleteCandidate: Phantom Vid = [" + vid + "]\n");
1168 cleanupCandidateCount++;
1170 for (Map.Entry<String, Vertex> entry : orphanNodeHash
1172 String vid = entry.getKey();
1173 bw.write("DeleteCandidate: OrphanDepNode Vid = [" + vid + "]\n");
1174 if (!dontFixOrphansFlag) {
1175 cleanupCandidateCount++;
1178 for (Map.Entry<String, Edge> entry : oneArmedEdgeHash.entrySet()) {
1179 String eid = entry.getKey();
1180 bw.write("DeleteCandidate: Bad EDGE Edge-id = [" + eid + "]\n");
1181 cleanupCandidateCount++;
1183 for (Map.Entry<String, Vertex> entry : missingDepNodeHash
1185 String vid = entry.getKey();
1186 bw.write("DeleteCandidate: (maybe) missingDepNode Vid = ["
1188 cleanupCandidateCount++;
1190 bw.write("\n-- NOTE - To see DeleteCandidates for Duplicates, you need to look in the Duplicates Detail section below.\n");
1192 bw.write("\n ------------- GHOST NODES - detail ");
1193 for (Map.Entry<String, Vertex> entry : ghostNodeHash
1196 String vid = entry.getKey();
1197 bw.write("\n ==> Phantom Vid = " + vid + "\n");
1198 ArrayList<String> retArr = showPropertiesForNode(
1199 TRANSID, FROMAPPID, entry.getValue());
1200 for (String info : retArr) {
1201 bw.write(info + "\n");
1204 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1206 for (String info : retArr) {
1207 bw.write(info + "\n");
1209 } catch (Exception dex) {
1210 LoggingContext.statusCode(StatusCode.ERROR);
1211 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1212 logger.error("error trying to print detail info for a ghost-node: " + LogFormatTools.getStackTop(dex));
1216 bw.write("\n ------------- Missing Dependent Edge ORPHAN NODES - detail: ");
1217 for (Map.Entry<String, Vertex> entry : orphanNodeHash
1220 String vid = entry.getKey();
1221 bw.write("\n> Orphan Node Vid = " + vid + "\n");
1222 ArrayList<String> retArr = showPropertiesForNode(
1223 TRANSID, FROMAPPID, entry.getValue());
1224 for (String info : retArr) {
1225 bw.write(info + "\n");
1228 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1230 for (String info : retArr) {
1231 bw.write(info + "\n");
1233 } catch (Exception dex) {
1234 LoggingContext.statusCode(StatusCode.ERROR);
1235 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1236 logger.error("error trying to print detail info for a Orphan Node /missing dependent edge " + LogFormatTools.getStackTop(dex));
1240 bw.write("\n ------------- Missing Dependent Edge (but not orphan) NODES: ");
1241 for (Map.Entry<String, Vertex> entry : missingDepNodeHash
1244 String vid = entry.getKey();
1245 bw.write("\n> Missing edge to Dependent Node (but has edges) Vid = "
1247 ArrayList<String> retArr = showPropertiesForNode(
1248 TRANSID, FROMAPPID, entry.getValue());
1249 for (String info : retArr) {
1250 bw.write(info + "\n");
1253 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1255 for (String info : retArr) {
1256 bw.write(info + "\n");
1258 } catch (Exception dex) {
1259 LoggingContext.statusCode(StatusCode.ERROR);
1260 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1261 logger.error("error trying to print detail info for a node missing its dependent edge but not an orphan "
1262 + LogFormatTools.getStackTop(dex));
1266 bw.write("\n ------------- EDGES pointing to empty/bad vertices: ");
1267 for (Map.Entry<String, Edge> entry : oneArmedEdgeHash.entrySet()) {
1269 String eid = entry.getKey();
1270 Edge thisE = entry.getValue();
1271 String badVid = emptyVertexHash.get(eid);
1272 bw.write("\n> Edge pointing to bad vertex (Vid = "
1273 + badVid + ") EdgeId = " + eid + "\n");
1274 bw.write("Label: [" + thisE.label() + "]\n");
1275 Iterator<Property<Object>> pI = thisE.properties();
1276 while (pI.hasNext()) {
1277 Property<Object> propKey = pI.next();
1278 bw.write("Prop: [" + propKey + "], val = ["
1279 + propKey.value() + "]\n");
1281 } catch (Exception pex) {
1282 LoggingContext.statusCode(StatusCode.ERROR);
1283 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1284 logger.error("error trying to print empty/bad vertex data: " + LogFormatTools.getStackTop(pex));
1288 bw.write("\n ------------- Duplicates: ");
1289 Iterator<String> dupeIter = dupeGroups.iterator();
1290 int dupeSetCounter = 0;
1291 while (dupeIter.hasNext()) {
1293 String dset = (String) dupeIter.next();
1295 bw.write("\n --- Duplicate Group # " + dupeSetCounter
1296 + " Detail -----------\n");
1298 // We expect each line to have at least two vid's, followed
1299 // by the preferred one to KEEP
1300 String[] dupeArr = dset.split("\\|");
1301 ArrayList<String> idArr = new ArrayList<>();
1302 int lastIndex = dupeArr.length - 1;
1303 for (int i = 0; i <= lastIndex; i++) {
1304 if (i < lastIndex) {
1305 // This is not the last entry, it is one of the
1306 // dupes, so we want to show all its info
1307 bw.write(" >> Duplicate Group # "
1308 + dupeSetCounter + " Node # " + i
1310 String vidString = dupeArr[i];
1311 idArr.add(vidString);
1312 long longVertId = Long.parseLong(vidString);
1313 Iterator<Vertex> vtxIterator = g.vertices(longVertId);
1315 if (vtxIterator.hasNext()) {
1316 vtx = vtxIterator.next();
1318 ArrayList<String> retArr = showPropertiesForNode(TRANSID, FROMAPPID, vtx);
1319 for (String info : retArr) {
1320 bw.write(info + "\n");
1323 retArr = showAllEdgesForNode(TRANSID,
1325 for (String info : retArr) {
1326 bw.write(info + "\n");
1329 // This is the last entry which should tell us if we
1330 // have a preferred keeper
1331 String prefString = dupeArr[i];
1332 if (KEEP_VID_UNDETERMINED.equals(prefString)) {
1333 bw.write("\n For this group of duplicates, could not tell which one to keep.\n");
1334 bw.write(" >>> This group needs to be taken care of with a manual/forced-delete.\n");
1336 // If we know which to keep, then the prefString
1337 // should look like, "KeepVid=12345"
1338 String[] prefArr = prefString.split("=");
1339 if (prefArr.length != 2
1340 || (!"KeepVid".equals(prefArr[0]))) {
1341 throw new Exception("Bad format. Expecting KeepVid=999999");
1343 String keepVidStr = prefArr[1];
1344 if (idArr.contains(keepVidStr)) {
1345 bw.write("\n The vertex we want to KEEP has vertexId = "
1347 bw.write("\n The others become delete candidates: \n");
1348 idArr.remove(keepVidStr);
1349 for (int x = 0; x < idArr.size(); x++) {
1350 cleanupCandidateCount++;
1351 bw.write("DeleteCandidate: Duplicate Vid = ["
1352 + idArr.get(x) + "]\n");
1355 throw new Exception("ERROR - Vertex Id to keep not found in list of dupes. dset = ["
1359 }// else we know which one to keep
1361 }// for each vertex in a group
1362 } catch (Exception dex) {
1363 LoggingContext.statusCode(StatusCode.ERROR);
1364 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1365 logger.error("error trying to print duplicate vertex data " + LogFormatTools.getStackTop(dex));
1368 }// while - work on each group of dupes
1370 bw.write("\n ------------- Mis-matched Label/aai-node-type Nodes: \n ");
1371 for (Map.Entry<String, String> entry : misMatchedHash.entrySet()) {
1372 String msg = entry.getValue();
1373 bw.write("MixedMsg = " + msg + "\n");
1376 bw.write("\n ------------- Got these errors while processing: \n");
1377 Iterator<String> errIter = errArr.iterator();
1378 while (errIter.hasNext()) {
1379 String line = errIter.next();
1380 bw.write(line + "\n");
1385 logger.info("\n ------------- Done doing all the checks ------------ ");
1386 logger.info("Output will be written to " + fullOutputFileName);
1388 if (cleanupCandidateCount > 0) {
1389 // Technically, this is not an error -- but we're throwing this
1390 // error so that hopefully a
1391 // monitoring system will pick it up and do something with it.
1392 throw new AAIException("AAI_6123", "See file: [" + fullOutputFileName
1393 + "] and investigate delete candidates. ");
1395 } catch (AAIException e) {
1396 LoggingContext.statusCode(StatusCode.ERROR);
1397 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1398 logger.error("Caught AAIException while grooming data");
1399 ErrorLogHelper.logException(e);
1400 } catch (Exception ex) {
1401 LoggingContext.statusCode(StatusCode.ERROR);
1402 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1403 logger.error("Caught exception while grooming data");
1404 ErrorLogHelper.logError("AAI_6128", ex.getMessage() + ", resolve and rerun dataGrooming");
1410 } catch (IOException iox) {
1411 LoggingContext.statusCode(StatusCode.ERROR);
1412 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1413 logger.warn("Got an IOException trying to close bufferedWriter() \n", iox);
1417 if (g != null && g.tx().isOpen()) {
1418 // Any changes that worked correctly should have already done
1421 if (executeFinalCommit) {
1425 } catch (Exception ex) {
1426 // Don't throw anything because JanusGraph sometimes is just saying that the graph is already closed
1427 LoggingContext.statusCode(StatusCode.ERROR);
1428 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1429 logger.warn("WARNING from final graphTransaction.rollback()", ex);
1433 if (g2 != null && g2.tx().isOpen()) {
1434 // Any changes that worked correctly should have already done
1438 } catch (Exception ex) {
1439 // Don't throw anything because JanusGraph sometimes is just saying that the graph is already closed
1440 LoggingContext.statusCode(StatusCode.ERROR);
1441 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1442 logger.warn("WARNING from final graphTransaction2.rollback()", ex);
1446 if( finalShutdownFlag ){
1448 if( graph != null && graph.isOpen() ){
1452 } catch (Exception ex) {
1453 // Don't throw anything because JanusGraph sometimes is just saying that the graph is already closed{
1454 LoggingContext.statusCode(StatusCode.ERROR);
1455 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1456 logger.warn("WARNING from final graph.shutdown()", ex);
1460 if( graph2 != null && graph2.isOpen() ){
1461 graph2.tx().close();
1464 } catch (Exception ex) {
1465 // Don't throw anything because JanusGraph sometimes is just saying that the graph is already closed{
1466 LoggingContext.statusCode(StatusCode.ERROR);
1467 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1468 logger.warn("WARNING from final graph2.shutdown()", ex);
1474 return cleanupCandidateCount;
1476 }// end of doTheGrooming()
1480 * Vertex has these keys.
1482 * @param tmpV the tmp V
1483 * @param propHashWithKeys the prop hash with keys
1484 * @return the boolean
1486 private static Boolean vertexHasTheseKeys( Vertex tmpV, HashMap <String, Object> propHashWithKeys) {
1487 Iterator <?> it = propHashWithKeys.entrySet().iterator();
1488 while( it.hasNext() ){
1489 String propName = "";
1490 String propVal = "";
1491 Map.Entry <?,?>propEntry = (Map.Entry<?,?>)it.next();
1492 Object propNameObj = propEntry.getKey();
1493 if( propNameObj != null ){
1494 propName = propNameObj.toString();
1496 Object propValObj = propEntry.getValue();
1497 if( propValObj != null ){
1498 propVal = propValObj.toString();
1500 Object checkValObj = tmpV.<Object>property(propName).orElse(null);
1501 if( checkValObj == null ) {
1504 else if( !propVal.equals(checkValObj.toString()) ){
1513 * Any key fields missing.
1515 * @param nType the n type
1517 * @return the boolean
1519 private static Boolean anyKeyFieldsMissing(String nType, Vertex v, Loader loader) {
1522 Introspector obj = null;
1524 obj = loader.introspectorFromName(nType);
1525 } catch (AAIUnknownObjectException e) {
1526 // They gave us a non-empty nodeType but our NodeKeyProps does
1527 // not have data for it. Since we do not know what the
1528 // key params are for this type of node, we will just
1530 String emsg = " -- WARNING -- Unrecognized nodeType: [" + nType
1531 + "]. We cannot determine required keys for this nType. ";
1532 // NOTE - this will be caught below and a "false" returned
1533 throw new AAIException("AAI_6121", emsg);
1536 // Determine what the key fields are for this nodeType
1537 Collection <String> keyPropNamesColl = obj.getKeys();
1538 Iterator<String> keyPropI = keyPropNamesColl.iterator();
1539 while (keyPropI.hasNext()) {
1540 String propName = keyPropI.next();
1541 Object ob = v.<Object>property(propName).orElse(null);
1542 if (ob == null || ob.toString().isEmpty()) {
1543 // It is missing a key property
1547 } catch (AAIException e) {
1548 // Something was wrong -- but since we weren't able to check
1549 // the keys, we will not declare that it is missing keys.
1557 * Gets the delete list.
1559 * @param targetDir the target dir
1560 * @param fileName the file name
1561 * @param edgesOnlyFlag the edges only flag
1562 * @param dontFixOrphans the dont fix orphans
1563 * @param dupeFixOn the dupe fix on
1564 * @return the delete list
1565 * @throws AAIException the AAI exception
1567 private static Set<String> getDeleteList(String targetDir,
1568 String fileName, Boolean edgesOnlyFlag, Boolean dontFixOrphans,
1569 Boolean dupeFixOn) throws AAIException {
1571 // Look in the file for lines formated like we expect - pull out any
1572 // Vertex Id's to delete on this run
1573 Set<String> delList = new LinkedHashSet<>();
1574 String fullFileName = targetDir + AAIConstants.AAI_FILESEP + fileName;
1576 try(BufferedReader br = new BufferedReader(new FileReader(fullFileName))) {
1577 String line = br.readLine();
1578 while (line != null) {
1579 if (!"".equals(line) && line.startsWith("DeleteCandidate")) {
1580 if (edgesOnlyFlag && (!line.contains("Bad Edge"))) {
1581 // We're not going to process edge guys
1582 } else if (dontFixOrphans && line.contains("Orphan")) {
1583 // We're not going to process orphans
1584 } else if (!dupeFixOn && line.contains("Duplicate")) {
1585 // We're not going to process Duplicates
1587 int begIndex = line.indexOf("id = ");
1588 int endIndex = line.indexOf("]");
1589 String vidVal = line.substring(begIndex + 6, endIndex);
1590 delList.add(vidVal);
1593 line = br.readLine();
1596 } catch (IOException e) {
1597 throw new AAIException("AAI_6124", e, "Could not open input-file [" + fullFileName
1598 + "], exception= " + e.getMessage());
1603 }// end of getDeleteList
1606 * Gets the preferred dupe.
1608 * @param transId the trans id
1609 * @param fromAppId the from app id
1611 * @param dupeVertexList the dupe vertex list
1612 * @param ver the ver
1614 * @throws AAIException the AAI exception
1616 public static Vertex getPreferredDupe(String transId,
1617 String fromAppId, GraphTraversalSource g,
1618 ArrayList<Vertex> dupeVertexList, String ver, Loader loader)
1619 throws AAIException {
1621 // This method assumes that it is being passed a List of vertex objects
1623 // violate our uniqueness constraints.
1625 Vertex nullVtx = null;
1627 if (dupeVertexList == null) {
1630 int listSize = dupeVertexList.size();
1631 if (listSize == 0) {
1634 if (listSize == 1) {
1635 return (dupeVertexList.get(0));
1638 Vertex vtxPreferred = null;
1639 Vertex currentFaveVtx = dupeVertexList.get(0);
1640 for (int i = 1; i < listSize; i++) {
1641 Vertex vtxB = dupeVertexList.get(i);
1642 vtxPreferred = pickOneOfTwoDupes(transId, fromAppId, g,
1643 currentFaveVtx, vtxB, ver, loader);
1644 if (vtxPreferred == null) {
1645 // We couldn't choose one
1648 currentFaveVtx = vtxPreferred;
1652 return (currentFaveVtx);
1654 } // end of getPreferredDupe()
1657 * Pick one of two dupes.
1659 * @param transId the trans id
1660 * @param fromAppId the from app id
1662 * @param vtxA the vtx A
1663 * @param vtxB the vtx B
1664 * @param ver the ver
1666 * @throws AAIException the AAI exception
1668 public static Vertex pickOneOfTwoDupes(String transId,
1669 String fromAppId, GraphTraversalSource g, Vertex vtxA,
1670 Vertex vtxB, String ver, Loader loader) throws AAIException {
1672 Vertex nullVtx = null;
1673 Vertex preferredVtx = null;
1675 Long vidA = new Long(vtxA.id().toString());
1676 Long vidB = new Long(vtxB.id().toString());
1678 String vtxANodeType = "";
1679 String vtxBNodeType = "";
1680 Object objType = vtxA.<Object>property(AAI_NODE_TYPE).orElse(null);
1681 if (objType != null) {
1682 vtxANodeType = objType.toString();
1684 objType = vtxB.<Object>property(AAI_NODE_TYPE).orElse(null);
1685 if (objType != null) {
1686 vtxBNodeType = objType.toString();
1689 if (vtxANodeType.isEmpty() || (!vtxANodeType.equals(vtxBNodeType))) {
1690 // Either they're not really dupes or there's some bad data - so
1695 // Check that node A and B both have the same key values (or else they
1697 // (We'll check dep-node later)
1698 // Determine what the key fields are for this nodeType
1699 Collection <String> keyProps = new ArrayList <>();
1700 HashMap <String,Object> keyPropValsHash = new HashMap <String,Object>();
1702 keyProps = loader.introspectorFromName(vtxANodeType).getKeys();
1703 } catch (AAIUnknownObjectException e) {
1704 logger.warn("Required property not found", e);
1705 throw new AAIException("AAI_6105", "Required Property name(s) not found for nodeType = " + vtxANodeType + ")");
1708 Iterator<String> keyPropI = keyProps.iterator();
1709 while (keyPropI.hasNext()) {
1710 String propName = keyPropI.next();
1711 String vtxAKeyPropVal = "";
1712 objType = vtxA.<Object>property(propName).orElse(null);
1713 if (objType != null) {
1714 vtxAKeyPropVal = objType.toString();
1716 String vtxBKeyPropVal = "";
1717 objType = vtxB.<Object>property(propName).orElse(null);
1718 if (objType != null) {
1719 vtxBKeyPropVal = objType.toString();
1722 if (vtxAKeyPropVal.equals("")
1723 || (!vtxAKeyPropVal.equals(vtxBKeyPropVal))) {
1724 // Either they're not really dupes or they are missing some key
1725 // data - so don't pick one
1729 // Keep these around for (potential) use later
1730 keyPropValsHash.put(propName, vtxAKeyPropVal);
1735 // Collect the vid's and aai-node-types of the vertices that each vertex
1736 // (A and B) is connected to.
1737 ArrayList<String> vtxIdsConn2A = new ArrayList<>();
1738 ArrayList<String> vtxIdsConn2B = new ArrayList<>();
1739 HashMap<String, String> nodeTypesConn2A = new HashMap<>();
1740 HashMap<String, String> nodeTypesConn2B = new HashMap<>();
1742 ArrayList<Vertex> vertListA = getConnectedNodes( g, vtxA );
1743 if (vertListA != null) {
1744 Iterator<Vertex> iter = vertListA.iterator();
1745 while (iter.hasNext()) {
1746 Vertex tvCon = iter.next();
1747 String conVid = tvCon.id().toString();
1749 objType = tvCon.<Object>property(AAI_NODE_TYPE).orElse(null);
1750 if (objType != null) {
1751 nt = objType.toString();
1753 nodeTypesConn2A.put(nt, conVid);
1754 vtxIdsConn2A.add(conVid);
1758 ArrayList<Vertex> vertListB = getConnectedNodes( g, vtxB );
1759 if (vertListB != null) {
1760 Iterator<Vertex> iter = vertListB.iterator();
1761 while (iter.hasNext()) {
1762 Vertex tvCon = iter.next();
1763 String conVid = tvCon.id().toString();
1765 objType = tvCon.<Object>property(AAI_NODE_TYPE).orElse(null);
1766 if (objType != null) {
1767 nt = objType.toString();
1769 nodeTypesConn2B.put(nt, conVid);
1770 vtxIdsConn2B.add(conVid);
1774 // 1 - If this kind of node needs a dependent node for uniqueness, then
1775 // verify that they both nodes point to the same dependent
1776 // node (otherwise they're not really duplicates)
1777 // Note - there are sometimes more than one dependent node type since
1778 // one nodeType can be used in different ways. But for a
1779 // particular node, it will only have one dependent node that
1780 // it's connected to.
1781 String onlyNodeThatIndexPointsToVidStr = "";
1782 Collection<String> depNodeTypes = loader.introspectorFromName(vtxANodeType).getDependentOn();
1783 if (depNodeTypes.isEmpty()) {
1784 // This kind of node is not dependent on any other. That is ok.
1785 // We need to find out if the unique index info is good or not and
1786 // use that later when deciding if we can delete one.
1787 onlyNodeThatIndexPointsToVidStr = findJustOneUsingIndex( transId,
1788 fromAppId, g, keyPropValsHash, vtxANodeType, vidA, vidB, ver );
1790 String depNodeVtxId4A = "";
1791 String depNodeVtxId4B = "";
1792 Iterator<String> iter = depNodeTypes.iterator();
1793 while (iter.hasNext()) {
1794 String depNodeType = iter.next();
1795 if (nodeTypesConn2A.containsKey(depNodeType)) {
1796 // This is the dependent node type that vertex A is using
1797 depNodeVtxId4A = nodeTypesConn2A.get(depNodeType);
1799 if (nodeTypesConn2B.containsKey(depNodeType)) {
1800 // This is the dependent node type that vertex B is using
1801 depNodeVtxId4B = nodeTypesConn2B.get(depNodeType);
1804 if (depNodeVtxId4A.isEmpty()
1805 || (!depNodeVtxId4A.equals(depNodeVtxId4B))) {
1806 // Either they're not really dupes or there's some bad data - so
1807 // don't pick either one
1812 if (vtxIdsConn2A.size() == vtxIdsConn2B.size()) {
1813 // 2 - If they both have edges to all the same vertices,
1814 // then return the one that can be reached uniquely via the
1815 // key if that is the case or
1816 // else the one with the lower vertexId
1818 boolean allTheSame = true;
1819 Iterator<String> iter = vtxIdsConn2A.iterator();
1820 while (iter.hasNext()) {
1821 String vtxIdConn2A = iter.next();
1822 if (!vtxIdsConn2B.contains(vtxIdConn2A)) {
1829 // If everything is the same, but one of the two has a good
1830 // pointer to it, then save that one. Otherwise, take the
1832 if( !onlyNodeThatIndexPointsToVidStr.isEmpty() ){
1833 // only one is reachable via the index - choose that one.
1834 if( onlyNodeThatIndexPointsToVidStr.equals(vidA.toString()) ){
1835 preferredVtx = vtxA;
1837 else if( onlyNodeThatIndexPointsToVidStr.equals(vidB.toString()) ){
1838 preferredVtx = vtxB;
1841 else if (vidA < vidB) {
1842 preferredVtx = vtxA;
1844 preferredVtx = vtxB;
1847 } else if (vtxIdsConn2A.size() > vtxIdsConn2B.size()) {
1848 // 3 - VertexA is connected to more things than vtxB.
1849 // We'll pick VtxA if its edges are a superset of vtxB's edges
1850 // and it doesn't contradict the check for the index/key pointer.
1851 boolean missingOne = false;
1852 Iterator<String> iter = vtxIdsConn2B.iterator();
1853 while (iter.hasNext()) {
1854 String vtxIdConn2B = iter.next();
1855 if (!vtxIdsConn2A.contains(vtxIdConn2B)) {
1861 if( onlyNodeThatIndexPointsToVidStr.isEmpty()
1862 || onlyNodeThatIndexPointsToVidStr.equals(vidA.toString()) ){
1863 preferredVtx = vtxA;
1866 } else if (vtxIdsConn2B.size() > vtxIdsConn2A.size()) {
1867 // 4 - VertexB is connected to more things than vtxA.
1868 // We'll pick VtxB if its edges are a superset of vtxA's edges
1869 // and it doesn't contradict the check for the index/key pointer.
1870 boolean missingOne = false;
1871 Iterator<String> iter = vtxIdsConn2A.iterator();
1872 while (iter.hasNext()) {
1873 String vtxIdConn2A = iter.next();
1874 if (!vtxIdsConn2B.contains(vtxIdConn2A)) {
1880 if( onlyNodeThatIndexPointsToVidStr.isEmpty()
1881 || onlyNodeThatIndexPointsToVidStr.equals(vidB.toString()) ){
1882 preferredVtx = vtxB;
1886 preferredVtx = nullVtx;
1889 return (preferredVtx);
1891 } // end of pickOneOfTwoDupes()
1894 * Check and process dupes.
1896 * @param transId the trans id
1897 * @param fromAppId the from app id
1899 * @param version the version
1900 * @param nType the n type
1901 * @param passedVertList the passed vert list
1902 * @param dupeFixOn the dupe fix on
1903 * @param deleteCandidateList the delete candidate list
1904 * @param singleCommits the single commits
1905 * @param alreadyFoundDupeGroups the already found dupe groups
1906 * @return the array list
1908 private static List<String> checkAndProcessDupes(String transId,
1909 String fromAppId, Graph g, GraphTraversalSource source, String version, String nType,
1910 List<Vertex> passedVertList, Boolean dupeFixOn,
1911 Set<String> deleteCandidateList, Boolean singleCommits,
1912 ArrayList<String> alreadyFoundDupeGroups, Loader loader ) {
1914 ArrayList<String> returnList = new ArrayList<>();
1915 ArrayList<Vertex> checkVertList = new ArrayList<>();
1916 ArrayList<String> alreadyFoundDupeVidArr = new ArrayList<>();
1917 Boolean noFilterList = true;
1918 Iterator<String> afItr = alreadyFoundDupeGroups.iterator();
1919 while (afItr.hasNext()) {
1920 String dupeGrpStr = afItr.next();
1921 String[] dupeArr = dupeGrpStr.split("\\|");
1922 int lastIndex = dupeArr.length - 1;
1923 for (int i = 0; i < lastIndex; i++) {
1924 // Note: we don't want the last one...
1925 String vidString = dupeArr[i];
1926 alreadyFoundDupeVidArr.add(vidString);
1927 noFilterList = false;
1931 // For a given set of Nodes that were found with a set of KEY
1932 // Parameters, (nodeType + key data) we will
1933 // see if we find any duplicate nodes that need to be cleaned up. Note -
1934 // it's legit to have more than one
1935 // node with the same key data if the nodes depend on a parent for
1936 // uniqueness -- as long as the two nodes
1937 // don't hang off the same Parent.
1938 // If we find duplicates, and we can figure out which of each set of
1939 // duplicates is the one that we
1940 // think should be preserved, we will record that. Whether we can tell
1941 // which one should be
1942 // preserved or not, we will return info about any sets of duplicates
1945 // Each element in the returned arrayList might look like this:
1946 // "1234|5678|keepVid=UNDETERMINED" (if there were 2 dupes, and we
1947 // couldn't figure out which one to keep)
1948 // or, "100017|200027|30037|keepVid=30037" (if there were 3 dupes and we
1949 // thought the third one was the one that should survive)
1951 // Because of the way the calling code loops over stuff, we can get the
1952 // same data multiple times - so we should
1953 // not process any vertices that we've already seen.
1956 Iterator<Vertex> pItr = passedVertList.iterator();
1957 while (pItr.hasNext()) {
1958 Vertex tvx = pItr.next();
1959 String passedId = tvx.id().toString();
1960 if (noFilterList || !alreadyFoundDupeVidArr.contains(passedId)) {
1961 // We haven't seen this one before - so we should check it.
1962 checkVertList.add(tvx);
1966 if (checkVertList.size() < 2) {
1967 // Nothing new to check.
1971 if (loader.introspectorFromName(nType).isTopLevel()) {
1972 // If this was a node that does NOT depend on other nodes for
1973 // uniqueness, and we
1974 // found more than one node using its key -- record the found
1975 // vertices as duplicates.
1976 String dupesStr = "";
1977 for (int i = 0; i < checkVertList.size(); i++) {
1979 + checkVertList.get(i).id().toString() + "|";
1981 if (dupesStr != "") {
1982 Vertex prefV = getPreferredDupe(transId, fromAppId,
1983 source, checkVertList, version, loader);
1984 if (prefV == null) {
1985 // We could not determine which duplicate to keep
1986 dupesStr = dupesStr + KEEP_VID_UNDETERMINED;
1987 returnList.add(dupesStr);
1989 dupesStr = dupesStr + "KeepVid=" + prefV.id();
1990 Boolean didRemove = false;
1992 didRemove = deleteNonKeepersIfAppropriate(g,
1993 dupesStr, prefV.id().toString(),
1994 deleteCandidateList, singleCommits);
1999 // keep them on our list
2000 returnList.add(dupesStr);
2005 // More than one node have the same key fields since they may
2006 // depend on a parent node for uniqueness. Since we're finding
2007 // more than one, we want to check to see if any of the
2008 // vertices that have this set of keys (and are the same nodeType)
2009 // are also pointing at the same 'parent' node.
2010 // Note: for a given set of key data, it is possible that there
2011 // could be more than one set of duplicates.
2012 HashMap<String, ArrayList<Vertex>> vertsGroupedByParentHash = groupVertsByDepNodes(
2013 transId, fromAppId, source, version, nType,
2014 checkVertList, loader);
2015 for (Map.Entry<String, ArrayList<Vertex>> entry : vertsGroupedByParentHash
2017 ArrayList<Vertex> thisParentsVertList = entry
2019 if (thisParentsVertList.size() > 1) {
2020 // More than one vertex found with the same key info
2021 // hanging off the same parent/dependent node
2022 String dupesStr = "";
2023 for (int i = 0; i < thisParentsVertList.size(); i++) {
2025 + thisParentsVertList.get(i).id() + "|";
2027 if (dupesStr != "") {
2028 Vertex prefV = getPreferredDupe(transId,
2029 fromAppId, source, thisParentsVertList,
2032 if (prefV == null) {
2033 // We could not determine which duplicate to
2035 dupesStr = dupesStr + KEEP_VID_UNDETERMINED;
2036 returnList.add(dupesStr);
2038 Boolean didRemove = false;
2039 dupesStr = dupesStr + "KeepVid="
2040 + prefV.id().toString();
2042 didRemove = deleteNonKeepersIfAppropriate(
2043 g, dupesStr, prefV.id()
2045 deleteCandidateList, singleCommits);
2050 // keep them on our list
2051 returnList.add(dupesStr);
2058 } catch (Exception e) {
2059 LoggingContext.statusCode(StatusCode.ERROR);
2060 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2061 logger.warn(" >>> Threw an error in checkAndProcessDupes - just absorb this error and move on. ", e);
2066 }// End of checkAndProcessDupes()
2069 * Group verts by dep nodes.
2071 * @param transId the trans id
2072 * @param fromAppId the from app id
2074 * @param version the version
2075 * @param nType the n type
2076 * @param passedVertList the passed vert list
2077 * @return the hash map
2078 * @throws AAIException the AAI exception
2080 private static HashMap<String, ArrayList<Vertex>> groupVertsByDepNodes(
2081 String transId, String fromAppId, GraphTraversalSource g, String version,
2082 String nType, ArrayList<Vertex> passedVertList, Loader loader)
2083 throws AAIException {
2084 // Given a list of JanusGraph Vertices of one nodeType (see AAI-8956), group
2085 // them together by the parent node they depend on.
2086 // Ie. if given a list of ip address nodes (assumed to all have the
2087 // same key info) they might sit under several different parent vertices.
2088 // Under Normal conditions, there would only be one per parent -- but
2089 // we're trying to find duplicates - so we
2090 // allow for the case where more than one is under the same parent node.
2092 HashMap<String, ArrayList<Vertex>> retHash = new HashMap<String, ArrayList<Vertex>>();
2093 if (loader.introspectorFromName(nType).isTopLevel()) {
2094 // This method really should not have been called if this is not the
2096 // that depends on a parent for uniqueness, so just return the empty
2101 // Find out what types of nodes the passed in nodes can depend on
2102 ArrayList<String> depNodeTypeL = new ArrayList<>();
2103 Collection<String> depNTColl = loader.introspectorFromName(nType).getDependentOn();
2104 Iterator<String> ntItr = depNTColl.iterator();
2105 while (ntItr.hasNext()) {
2106 depNodeTypeL.add(ntItr.next());
2108 // For each vertex, we want find its depended-on/parent vertex so we
2109 // can track what other vertexes that are dependent on that same guy.
2110 if (passedVertList != null) {
2111 Iterator<Vertex> iter = passedVertList.iterator();
2112 while (iter.hasNext()) {
2113 Vertex thisVert = iter.next();
2114 Vertex tmpParentVtx = getConnectedParent( g, thisVert );
2115 if( tmpParentVtx != null ) {
2116 String parentNt = null;
2117 Object obj = tmpParentVtx.<Object>property(AAI_NODE_TYPE).orElse(null);
2119 parentNt = obj.toString();
2121 if (depNTColl.contains(parentNt)) {
2122 // This must be the parent/dependent node
2123 String parentVid = tmpParentVtx.id().toString();
2124 if (retHash.containsKey(parentVid)) {
2125 // add this vert to the list for this parent key
2126 retHash.get(parentVid).add(thisVert);
2128 // This is the first one we found on this parent
2129 ArrayList<Vertex> vList = new ArrayList<>();
2130 vList.add(thisVert);
2131 retHash.put(parentVid, vList);
2140 }// end of groupVertsByDepNodes()
2143 * Delete non keepers if appropriate.
2146 * @param dupeInfoString the dupe info string
2147 * @param vidToKeep the vid to keep
2148 * @param deleteCandidateList the delete candidate list
2149 * @param singleCommits the single commits
2150 * @return the boolean
2152 private static Boolean deleteNonKeepersIfAppropriate(Graph g,
2153 String dupeInfoString, String vidToKeep,
2154 Set<String> deleteCandidateList, Boolean singleCommits) {
2156 Boolean deletedSomething = false;
2157 // This assumes that the dupeInfoString is in the format of
2158 // pipe-delimited vid's followed by
2159 // ie. "3456|9880|keepVid=3456"
2160 if (deleteCandidateList == null || deleteCandidateList.isEmpty()) {
2161 // No vid's on the candidate list -- so no deleting will happen on
2166 String[] dupeArr = dupeInfoString.split("\\|");
2167 ArrayList<String> idArr = new ArrayList<>();
2168 int lastIndex = dupeArr.length - 1;
2169 for (int i = 0; i <= lastIndex; i++) {
2170 if (i < lastIndex) {
2171 // This is not the last entry, it is one of the dupes,
2172 String vidString = dupeArr[i];
2173 idArr.add(vidString);
2175 // This is the last entry which should tell us if we have a
2177 String prefString = dupeArr[i];
2178 if (prefString.equals(KEEP_VID_UNDETERMINED)) {
2179 // They sent us a bad string -- nothing should be deleted if
2180 // no dupe could be tagged as preferred
2183 // If we know which to keep, then the prefString should look
2184 // like, "KeepVid=12345"
2185 String[] prefArr = prefString.split("=");
2186 if (prefArr.length != 2 || (!"KeepVid".equals(prefArr[0]))) {
2187 LoggingContext.statusCode(StatusCode.ERROR);
2188 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2189 logger.error("Bad format. Expecting KeepVid=999999");
2192 String keepVidStr = prefArr[1];
2193 if (idArr.contains(keepVidStr)) {
2194 idArr.remove(keepVidStr);
2196 // So now, the idArr should just contain the vid's
2197 // that we want to remove.
2198 for (int x = 0; x < idArr.size(); x++) {
2199 boolean okFlag = true;
2200 String thisVid = idArr.get(x);
2201 if (deleteCandidateList.contains(thisVid)) {
2202 // This vid is a valid delete candidate from
2203 // a prev. run, so we can remove it.
2205 long longVertId = Long
2206 .parseLong(thisVid);
2208 .traversal().V(longVertId).next();
2210 if (singleCommits) {
2211 // NOTE - the singleCommits option is not used in normal processing
2213 g = AAIGraph.getInstance().getGraph().newTransaction();
2215 } catch (Exception e) {
2217 LoggingContext.statusCode(StatusCode.ERROR);
2218 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2219 logger.error("ERROR trying to delete VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
2222 logger.info(" DELETED VID = " + thisVid);
2223 deletedSomething = true;
2228 LoggingContext.statusCode(StatusCode.ERROR);
2229 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2230 logger.error("ERROR - Vertex Id to keep not found in list of dupes. dupeInfoString = ["
2231 + dupeInfoString + "]");
2235 }// else we know which one to keep
2237 }// for each vertex in a group
2239 return deletedSomething;
2241 }// end of deleteNonKeepersIfAppropriate()
2245 * Gets the node just using key params.
2247 * @param transId the trans id
2248 * @param fromAppId the from app id
2249 * @param graph the graph
2250 * @param nodeType the node type
2251 * @param keyPropsHash the key props hash
2252 * @param apiVersion the api version
2253 * @return the node just using key params
2254 * @throws AAIException the AAI exception
2256 public static List <Vertex> getNodeJustUsingKeyParams( String transId, String fromAppId, GraphTraversalSource graph, String nodeType,
2257 HashMap<String,Object> keyPropsHash, String apiVersion ) throws AAIException{
2259 List <Vertex> retVertList = new ArrayList <> ();
2261 // We assume that all NodeTypes have at least one key-property defined.
2262 // Note - instead of key-properties (the primary key properties), a user could pass
2263 // alternate-key values if they are defined for the nodeType.
2264 List<String> kName = new ArrayList<>();
2265 List<Object> kVal = new ArrayList<>();
2266 if( keyPropsHash == null || keyPropsHash.isEmpty() ) {
2267 throw new AAIException("AAI_6120", " NO key properties passed for this getNodeJustUsingKeyParams() request. NodeType = [" + nodeType + "]. ");
2271 for( Map.Entry<String, Object> entry : keyPropsHash.entrySet() ){
2273 kName.add(i, entry.getKey());
2274 kVal.add(i, entry.getValue());
2276 int topPropIndex = i;
2278 String propsAndValuesForMsg = "";
2279 Iterator <Vertex> verts = null;
2282 if( topPropIndex == 0 ){
2283 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ") ";
2284 verts= graph.V().has(kName.get(0),kVal.get(0)).has(AAI_NODE_TYPE,nodeType);
2286 else if( topPropIndex == 1 ){
2287 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2288 + kName.get(1) + " = " + kVal.get(1) + ") ";
2289 verts = graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has(AAI_NODE_TYPE,nodeType);
2291 else if( topPropIndex == 2 ){
2292 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2293 + kName.get(1) + " = " + kVal.get(1) + ", "
2294 + kName.get(2) + " = " + kVal.get(2) + ") ";
2295 verts= graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has(kName.get(2),kVal.get(2)).has(AAI_NODE_TYPE,nodeType);
2297 else if( topPropIndex == 3 ){
2298 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2299 + kName.get(1) + " = " + kVal.get(1) + ", "
2300 + kName.get(2) + " = " + kVal.get(2) + ", "
2301 + kName.get(3) + " = " + kVal.get(3) + ") ";
2302 verts= graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has(kName.get(2),kVal.get(2)).has(kName.get(3),kVal.get(3)).has(AAI_NODE_TYPE,nodeType);
2305 throw new AAIException("AAI_6114", " We only support 4 keys per nodeType for now \n");
2308 catch( Exception ex ){
2309 LoggingContext.statusCode(StatusCode.ERROR);
2310 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2311 logger.error( " ERROR trying to get node for: [" + propsAndValuesForMsg + "]" + LogFormatTools.getStackTop(ex));
2314 if( verts != null ){
2315 while( verts.hasNext() ){
2317 retVertList.add(tiV);
2321 if( retVertList.isEmpty() ){
2322 logger.debug("DEBUG No node found for nodeType = [" + nodeType +
2323 "], propsAndVal = " + propsAndValuesForMsg );
2328 }// End of getNodeJustUsingKeyParams()
2331 * Show all edges for node.
2333 * @param transId the trans id
2334 * @param fromAppId the from app id
2335 * @param tVert the t vert
2336 * @return the array list
2338 private static ArrayList <String> showAllEdgesForNode( String transId, String fromAppId, Vertex tVert ){
2340 ArrayList <String> retArr = new ArrayList <> ();
2341 Iterator <Edge> eI = tVert.edges(Direction.IN);
2342 if( ! eI.hasNext() ){
2343 retArr.add("No IN edges were found for this vertex. ");
2345 while( eI.hasNext() ){
2346 Edge ed = eI.next();
2347 String lab = ed.label();
2349 if (tVert.equals(ed.inVertex())) {
2350 vtx = ed.outVertex();
2352 vtx = ed.inVertex();
2355 retArr.add(" >>> COULD NOT FIND VERTEX on the other side of this edge edgeId = " + ed.id() + " <<< ");
2358 String nType = vtx.<String>property(AAI_NODE_TYPE).orElse(null);
2359 String vid = vtx.id().toString();
2360 retArr.add("Found an IN edge (" + lab + ") to this vertex from a [" + nType + "] node with VtxId = " + vid );
2365 eI = tVert.edges(Direction.OUT);
2366 if( ! eI.hasNext() ){
2367 retArr.add("No OUT edges were found for this vertex. ");
2369 while( eI.hasNext() ){
2370 Edge ed = eI.next();
2371 String lab = ed.label();
2373 if (tVert.equals(ed.inVertex())) {
2374 vtx = ed.outVertex();
2376 vtx = ed.inVertex();
2379 retArr.add(" >>> COULD NOT FIND VERTEX on the other side of this edge edgeId = " + ed.id() + " <<< ");
2382 String nType = vtx.<String>property(AAI_NODE_TYPE).orElse(null);
2383 String vid = vtx.id().toString();
2384 retArr.add("Found an OUT edge (" + lab + ") from this vertex to a [" + nType + "] node with VtxId = " + vid );
2392 * Show properties for node.
2394 * @param transId the trans id
2395 * @param fromAppId the from app id
2396 * @param tVert the t vert
2397 * @return the array list
2399 private static ArrayList <String> showPropertiesForNode( String transId, String fromAppId, Vertex tVert ){
2401 ArrayList <String> retArr = new ArrayList <> ();
2402 if( tVert == null ){
2403 retArr.add("null Node object passed to showPropertiesForNode()\n");
2406 String nodeType = "";
2407 Object ob = tVert.<Object>property(AAI_NODE_TYPE).orElse(null);
2412 nodeType = ob.toString();
2415 retArr.add(" AAINodeType/VtxID for this Node = [" + nodeType + "/" + tVert.id() + "]");
2416 retArr.add(" Property Detail: ");
2417 Iterator<VertexProperty<Object>> pI = tVert.properties();
2418 while( pI.hasNext() ){
2419 VertexProperty<Object> tp = pI.next();
2420 Object val = tp.value();
2421 retArr.add("Prop: [" + tp.key() + "], val = [" + val + "] ");
2428 private static ArrayList <Vertex> getConnectedNodes(GraphTraversalSource g, Vertex startVtx )
2429 throws AAIException {
2431 ArrayList <Vertex> retArr = new ArrayList <> ();
2432 if( startVtx == null ){
2436 GraphTraversal<Vertex, Vertex> modPipe = null;
2437 modPipe = g.V(startVtx).both();
2438 if( modPipe != null && modPipe.hasNext() ){
2439 while( modPipe.hasNext() ){
2440 Vertex conVert = modPipe.next();
2441 retArr.add(conVert);
2447 }// End of getConnectedNodes()
2450 private static ArrayList <Vertex> getConnectedChildrenOfOneType( GraphTraversalSource g,
2451 Vertex startVtx, String childNType ) {
2453 ArrayList <Vertex> childList = new ArrayList <> ();
2454 Iterator <Vertex> vertI = g.V(startVtx).union(__.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).inV(), __.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).outV());
2456 Vertex tmpVtx = null;
2457 while( vertI != null && vertI.hasNext() ){
2458 tmpVtx = vertI.next();
2459 Object ob = tmpVtx.<Object>property(AAI_NODE_TYPE).orElse(null);
2461 String tmpNt = ob.toString();
2462 if( tmpNt.equals(childNType)){
2463 childList.add(tmpVtx);
2470 }// End of getConnectedChildrenOfOneType()
2473 private static Vertex getConnectedParent( GraphTraversalSource g,
2476 Vertex parentVtx = null;
2477 Iterator <Vertex> vertI = g.V(startVtx).union(__.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).outV(), __.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).inV());
2479 while( vertI != null && vertI.hasNext() ){
2480 // Note - there better only be one!
2481 parentVtx = vertI.next();
2486 }// End of getConnectedParent()
2489 private static long figureWindowStartTime( int timeWindowMinutes ){
2490 // Given a window size, calculate what the start-timestamp would be.
2492 if( timeWindowMinutes <= 0 ){
2493 // This just means that there is no window...
2496 long unixTimeNow = System.currentTimeMillis();
2497 long windowInMillis = timeWindowMinutes * 60L * 1000;
2499 return unixTimeNow - windowInMillis;
2500 } // End of figureWindowStartTime()
2504 * Collect Duplicate Sets for nodes that are NOT dependent on parent nodes.
2506 * @param transId the trans id
2507 * @param fromAppId the from app id
2509 * @param version the version
2510 * @param nType the n type
2511 * @param passedVertList the passed vert list
2512 * @return the array list
2514 private static ArrayList<ArrayList<Vertex>> getDupeSets4NonDepNodes( String transId,
2515 String fromAppId, Graph g, String version, String nType,
2516 ArrayList<Vertex> passedVertList,
2517 ArrayList <String> keyPropNamesArr,
2520 ArrayList<ArrayList<Vertex>> returnList = new ArrayList<ArrayList<Vertex>>();
2522 // We've been passed a set of nodes that we want to check.
2523 // They are all NON-DEPENDENT nodes of the same nodeType meaning that they should be
2524 // unique in the DB based on their KEY DATA alone. So, if
2525 // we group them by their key data - if any key has more than one
2526 // vertex mapped to it, those vertices are dupes.
2528 // When we find duplicates, we group them in an ArrayList (there can be
2529 // more than one duplicate for one set of key data)
2530 // Then these dupeSets are grouped up and returned.
2533 HashMap <String, ArrayList<String>> keyVals2VidHash = new HashMap <String, ArrayList<String>>();
2534 HashMap <String,Vertex> vtxHash = new HashMap <String,Vertex>();
2535 Iterator<Vertex> pItr = passedVertList.iterator();
2536 while (pItr.hasNext()) {
2538 Vertex tvx = pItr.next();
2539 String thisVid = tvx.id().toString();
2540 vtxHash.put(thisVid, tvx);
2542 // if there are more than one vertexId mapping to the same keyProps -- they are dupes
2543 // we dont check till later since a set can contain more than 2.
2544 String hKey = getNodeKeyValString( tvx, keyPropNamesArr );
2545 if( keyVals2VidHash.containsKey(hKey) ){
2546 // We've already seen this key
2547 ArrayList <String> tmpVL = (ArrayList <String>)keyVals2VidHash.get(hKey);
2549 keyVals2VidHash.put(hKey, tmpVL);
2552 // First time for this key
2553 ArrayList <String> tmpVL = new ArrayList <String>();
2555 keyVals2VidHash.put(hKey, tmpVL);
2558 catch (Exception e) {
2559 logger.warn(" >>> Threw an error in getDupeSets4NonDepNodes - just absorb this error and move on. ", e);
2563 for( Map.Entry<String, ArrayList<String>> entry : keyVals2VidHash.entrySet() ){
2564 ArrayList <String> vidList = entry.getValue();
2566 if( !vidList.isEmpty() && vidList.size() > 1 ){
2567 // There are more than one vertex id's using the same key info
2568 ArrayList <Vertex> vertList = new ArrayList <Vertex> ();
2569 for (int i = 0; i < vidList.size(); i++) {
2570 String tmpVid = vidList.get(i);
2571 vertList.add(vtxHash.get(tmpVid));
2573 returnList.add(vertList);
2576 catch (Exception e) {
2577 logger.warn(" >>> Threw an error in getDupeSets4NonDepNodes - just absorb this error and move on. ", e);
2583 }// End of getDupeSets4NonDepNodes()
2587 * Get values of the key properties for a node as a single string
2589 * @param tvx the vertex to pull the properties from
2590 * @param keyPropNamesArr collection of key prop names
2591 * @return a String of concatenated values
2593 private static String getNodeKeyValString( Vertex tvx,
2594 ArrayList <String> keyPropNamesArr ) {
2596 String retString = "";
2597 Iterator <String> propItr = keyPropNamesArr.iterator();
2598 while( propItr.hasNext() ){
2599 String propName = propItr.next();
2601 Object propValObj = tvx.property(propName).orElse(null);
2602 retString = " " + retString + propValObj.toString();
2607 }// End of getNodeKeyValString()
2610 private static String findJustOneUsingIndex( String transId, String fromAppId,
2611 GraphTraversalSource gts, HashMap <String,Object> keyPropValsHash,
2612 String nType, Long vidAL, Long vidBL, String apiVer){
2614 // See if querying by JUST the key params (which should be indexed) brings back
2615 // ONLY one of the two vertices. Ie. the db still has a pointer to one of them
2616 // and the other one is sort of stranded.
2617 String returnVid = "";
2620 List <Vertex> tmpVertList = getNodeJustUsingKeyParams( transId, fromAppId, gts,
2621 nType, keyPropValsHash, apiVer );
2622 if( tmpVertList != null && tmpVertList.size() == 1 ){
2623 // We got just one - if it matches one of the ones we're looking
2624 // for, then return that VID
2625 Vertex tmpV = tmpVertList.get(0);
2626 String thisVid = tmpV.id().toString();
2627 if( thisVid.equals(vidAL.toString()) || thisVid.equals(vidBL.toString()) ){
2628 String msg = " vid = " + thisVid + " is one of two that the DB can retrieve directly ------";
2631 returnVid = thisVid;
2635 catch ( AAIException ae ){
2636 String emsg = "Error trying to get node just by key " + ae.getMessage();
2643 }// End of findJustOneUsingIndex()