2 * ============LICENSE_START=======================================================
4 * ================================================================================
5 * Copyright © 2017 AT&T Intellectual Property. All rights reserved.
6 * ================================================================================
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * ============LICENSE_END=========================================================
20 * ECOMP is a trademark and service mark of AT&T Intellectual Property.
22 package org.onap.aai.dbgen;
24 import java.io.BufferedReader;
25 import java.io.BufferedWriter;
27 import java.io.FileReader;
28 import java.io.FileWriter;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.Collection;
32 import java.util.HashMap;
33 import java.util.Iterator;
34 import java.util.LinkedHashSet;
35 import java.util.List;
37 import java.util.Map.Entry;
38 import java.util.Properties;
40 import java.util.UUID;
42 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
43 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
44 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
45 import org.apache.tinkerpop.gremlin.structure.Direction;
46 import org.apache.tinkerpop.gremlin.structure.Edge;
47 import org.apache.tinkerpop.gremlin.structure.Graph;
48 import org.apache.tinkerpop.gremlin.structure.Property;
49 import org.apache.tinkerpop.gremlin.structure.Vertex;
50 import org.apache.tinkerpop.gremlin.structure.VertexProperty;
51 import org.onap.aai.db.props.AAIProperties;
52 import org.onap.aai.dbmap.AAIGraph;
53 import org.onap.aai.dbmap.AAIGraphConfig;
54 import org.onap.aai.exceptions.AAIException;
55 import org.onap.aai.introspection.Introspector;
56 import org.onap.aai.introspection.Loader;
57 import org.onap.aai.introspection.LoaderFactory;
58 import org.onap.aai.introspection.ModelType;
59 import org.onap.aai.introspection.exceptions.AAIUnknownObjectException;
60 import org.onap.aai.logging.ErrorLogHelper;
61 import org.onap.aai.logging.LogFormatTools;
62 import org.onap.aai.logging.LoggingContext;
63 import org.onap.aai.serialization.db.AAIDirection;
64 import org.onap.aai.serialization.db.EdgeProperty;
65 import org.onap.aai.util.*;
66 import org.onap.aai.logging.LoggingContext;
67 import org.onap.aai.logging.LoggingContext.StatusCode;
69 import com.att.eelf.configuration.Configuration;
70 import com.att.eelf.configuration.EELFLogger;
71 import com.att.eelf.configuration.EELFManager;
72 import com.thinkaurelius.titan.core.TitanFactory;
73 import com.thinkaurelius.titan.core.TitanGraph;
76 public class DataGrooming {
78 private static EELFLogger LOGGER;
79 private static final String FROMAPPID = "AAI-DB";
80 private static final String TRANSID = UUID.randomUUID().toString();
81 private static int dupeGrpsDeleted = 0;
86 * @param args the arguments
88 public static void main(String[] args) {
90 // Set the logging file properties to be used by EELFManager
91 System.setProperty("aai.service.name", DataGrooming.class.getSimpleName());
92 Properties props = System.getProperties();
93 props.setProperty(Configuration.PROPERTY_LOGGING_FILE_NAME, AAIConstants.AAI_DATA_GROOMING_LOGBACK_PROPS);
94 props.setProperty(Configuration.PROPERTY_LOGGING_FILE_PATH, AAIConstants.AAI_HOME_ETC_APP_PROPERTIES);
95 LOGGER = EELFManager.getInstance().getLogger(DataGrooming.class);
96 String ver = "version"; // Placeholder
97 Boolean doAutoFix = false;
98 Boolean edgesOnlyFlag = false;
99 Boolean dontFixOrphansFlag = false;
100 Boolean skipHostCheck = false;
101 Boolean singleCommits = false;
102 Boolean dupeCheckOff = false;
103 Boolean dupeFixOn = false;
104 Boolean ghost2CheckOff = false;
105 Boolean ghost2FixOn = false;
106 Boolean neverUseCache = false;
107 Boolean skipEdgeCheckFlag = false;
109 LoggingContext.init();
110 LoggingContext.partnerName(FROMAPPID);
111 LoggingContext.serviceName(AAIConstants.AAI_RESOURCES_MS);
112 LoggingContext.component("dataGrooming");
113 LoggingContext.targetEntity(AAIConstants.AAI_RESOURCES_MS);
114 LoggingContext.targetServiceName("main");
115 LoggingContext.requestId(TRANSID);
116 LoggingContext.statusCode(StatusCode.COMPLETE);
117 LoggingContext.responseCode(LoggingContext.SUCCESS);
119 int timeWindowMinutes = 0; // A value of 0 means that we will not have a time-window -- we will look
120 // at all nodes of the passed-in nodeType.
123 int maxRecordsToFix = AAIConstants.AAI_GROOMING_DEFAULT_MAX_FIX;
124 int sleepMinutes = AAIConstants.AAI_GROOMING_DEFAULT_SLEEP_MINUTES;
126 String maxFixStr = AAIConfig.get("aai.grooming.default.max.fix");
127 if( maxFixStr != null && !maxFixStr.equals("") ){
128 maxRecordsToFix = Integer.parseInt(maxFixStr);
130 String sleepStr = AAIConfig.get("aai.grooming.default.sleep.minutes");
131 if( sleepStr != null && !sleepStr.equals("") ){
132 sleepMinutes = Integer.parseInt(sleepStr);
135 catch ( Exception e ){
136 // Don't worry, we'll just use the defaults that we got from AAIConstants
137 LOGGER.warn("WARNING - could not pick up aai.grooming values from aaiconfig.properties file. ");
140 String prevFileName = "";
142 FormatDate fd = new FormatDate("yyyyMMddHHmm", "GMT");
143 String dteStr = fd.getDateTime();
145 if (args.length > 0) {
146 // They passed some arguments in that will affect processing
147 for (int i = 0; i < args.length; i++) {
148 String thisArg = args[i];
149 if (thisArg.equals("-edgesOnly")) {
150 edgesOnlyFlag = true;
151 } else if (thisArg.equals("-autoFix")) {
153 } else if (thisArg.equals("-skipHostCheck")) {
154 skipHostCheck = true;
155 } else if (thisArg.equals("-dontFixOrphans")) {
156 dontFixOrphansFlag = true;
157 } else if (thisArg.equals("-singleCommits")) {
158 singleCommits = true;
159 } else if (thisArg.equals("-dupeCheckOff")) {
161 } else if (thisArg.equals("-dupeFixOn")) {
163 } else if (thisArg.equals("-ghost2CheckOff")) {
164 ghost2CheckOff = true;
165 } else if (thisArg.equals("-neverUseCache")) {
166 neverUseCache = true;
167 } else if (thisArg.equals("-ghost2FixOn")) {
169 } else if (thisArg.equals("-skipEdgeChecks")) {
170 skipEdgeCheckFlag = true;
171 } else if (thisArg.equals("-maxFix")) {
173 if (i >= args.length) {
174 LoggingContext.statusCode(StatusCode.ERROR);
175 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
176 LOGGER.error(" No value passed with -maxFix option. ");
177 AAISystemExitUtil.systemExitCloseAAIGraph(0);
179 String nextArg = args[i];
181 maxRecordsToFix = Integer.parseInt(nextArg);
182 } catch (Exception e) {
183 LoggingContext.statusCode(StatusCode.ERROR);
184 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
185 LOGGER.error("Bad value passed with -maxFix option: ["
187 AAISystemExitUtil.systemExitCloseAAIGraph(0);
189 } else if (thisArg.equals("-sleepMinutes")) {
191 if (i >= args.length) {
192 LoggingContext.statusCode(StatusCode.ERROR);
193 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
194 LOGGER.error("No value passed with -sleepMinutes option.");
195 AAISystemExitUtil.systemExitCloseAAIGraph(0);
197 String nextArg = args[i];
199 sleepMinutes = Integer.parseInt(nextArg);
200 } catch (Exception e) {
201 LoggingContext.statusCode(StatusCode.ERROR);
202 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
203 LOGGER.error("Bad value passed with -sleepMinutes option: ["
205 AAISystemExitUtil.systemExitCloseAAIGraph(0);
207 } else if (thisArg.equals("-timeWindowMinutes")) {
209 if (i >= args.length) {
210 LoggingContext.statusCode(StatusCode.ERROR);
211 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
212 LOGGER.error("No value passed with -timeWindowMinutes option.");
213 AAISystemExitUtil.systemExitCloseAAIGraph(0);
215 String nextArg = args[i];
217 timeWindowMinutes = Integer.parseInt(nextArg);
218 } catch (Exception e) {
219 LoggingContext.statusCode(StatusCode.ERROR);
220 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
221 LOGGER.error("Bad value passed with -timeWindowMinutes option: ["
223 AAISystemExitUtil.systemExitCloseAAIGraph(0);
226 } else if (thisArg.equals("-f")) {
228 if (i >= args.length) {
229 LoggingContext.statusCode(StatusCode.ERROR);
230 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
231 LOGGER.error(" No value passed with -f option. ");
232 AAISystemExitUtil.systemExitCloseAAIGraph(0);
234 prevFileName = args[i];
236 LoggingContext.statusCode(StatusCode.ERROR);
237 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
238 LOGGER.error(" Unrecognized argument passed to DataGrooming: ["
240 LOGGER.error(" Valid values are: -f -autoFix -maxFix -edgesOnly -skipEdgeChecks -dupeFixOn -donFixOrphans -timeWindowMinutes -sleepMinutes -neverUseCache");
241 AAISystemExitUtil.systemExitCloseAAIGraph(0);
246 String windowTag = "FULL";
247 if( timeWindowMinutes > 0 ){
248 windowTag = "PARTIAL";
250 String groomOutFileName = "dataGrooming." + windowTag + "." + dteStr + ".out";
253 LoaderFactory.createLoaderForVersion(ModelType.MOXY, AAIProperties.LATEST);
256 catch (Exception ex){
257 LoggingContext.statusCode(StatusCode.ERROR);
258 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
259 LOGGER.error("ERROR - Could not create loader " + LogFormatTools.getStackTop(ex));
260 AAISystemExitUtil.systemExitCloseAAIGraph(1);
264 LOGGER.info(" We will skip the HostCheck as requested. ");
268 if (!prevFileName.equals("")) {
269 // They are trying to fix some data based on a data in a
271 LOGGER.info(" Call doTheGrooming() with a previous fileName ["
272 + prevFileName + "] for cleanup. ");
273 Boolean finalShutdownFlag = true;
274 Boolean cacheDbOkFlag = false;
275 doTheGrooming(prevFileName, edgesOnlyFlag, dontFixOrphansFlag,
276 maxRecordsToFix, groomOutFileName, ver, singleCommits,
277 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
278 finalShutdownFlag, cacheDbOkFlag,
279 skipEdgeCheckFlag, timeWindowMinutes);
280 } else if (doAutoFix) {
281 // They want us to run the processing twice -- first to look for
282 // delete candidates, then after
283 // napping for a while, run it again and delete any candidates
284 // that were found by the first run.
285 // Note: we will produce a separate output file for each of the
287 LOGGER.info(" Doing an auto-fix call to Grooming. ");
288 LOGGER.info(" First, Call doTheGrooming() to look at what's out there. ");
289 Boolean finalShutdownFlag = false;
290 Boolean cacheDbOkFlag = true;
291 int fixCandCount = doTheGrooming("", edgesOnlyFlag,
292 dontFixOrphansFlag, maxRecordsToFix, groomOutFileName,
293 ver, singleCommits, dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
294 finalShutdownFlag, cacheDbOkFlag,
295 skipEdgeCheckFlag, timeWindowMinutes);
296 if (fixCandCount == 0) {
297 LOGGER.info(" No fix-Candidates were found by the first pass, so no second/fix-pass is needed. ");
299 // We'll sleep a little and then run a fix-pass based on the
300 // first-run's output file.
302 LOGGER.info("About to sleep for " + sleepMinutes
304 int sleepMsec = sleepMinutes * 60 * 1000;
305 Thread.sleep(sleepMsec);
306 } catch (InterruptedException ie) {
307 LOGGER.info("\n >>> Sleep Thread has been Interrupted <<< ");
308 AAISystemExitUtil.systemExitCloseAAIGraph(0);
311 dteStr = fd.getDateTime();
312 String secondGroomOutFileName = "dataGrooming." + dteStr
314 LOGGER.info(" Now, call doTheGrooming() a second time and pass in the name of the file "
315 + "generated by the first pass for fixing: ["
316 + groomOutFileName + "]");
317 finalShutdownFlag = true;
318 cacheDbOkFlag = false;
319 doTheGrooming(groomOutFileName, edgesOnlyFlag,
320 dontFixOrphansFlag, maxRecordsToFix,
321 secondGroomOutFileName, ver, singleCommits,
322 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
323 finalShutdownFlag, cacheDbOkFlag,
324 skipEdgeCheckFlag, timeWindowMinutes);
327 // Do the grooming - plain vanilla (no fix-it-file, no
329 Boolean finalShutdownFlag = true;
330 LOGGER.info(" Call doTheGrooming() ");
331 Boolean cacheDbOkFlag = true;
333 // They have forbidden us from using a cached db connection.
334 cacheDbOkFlag = false;
336 doTheGrooming("", edgesOnlyFlag, dontFixOrphansFlag,
337 maxRecordsToFix, groomOutFileName, ver, singleCommits,
338 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
339 finalShutdownFlag, cacheDbOkFlag,
340 skipEdgeCheckFlag, timeWindowMinutes);
342 } catch (Exception ex) {
343 LoggingContext.statusCode(StatusCode.ERROR);
344 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
345 LOGGER.error("Exception while grooming data " + LogFormatTools.getStackTop(ex));
348 LOGGER.info(" Done! ");
349 AAISystemExitUtil.systemExitCloseAAIGraph(0);
356 * @param fileNameForFixing the file name for fixing
357 * @param edgesOnlyFlag the edges only flag
358 * @param dontFixOrphansFlag the dont fix orphans flag
359 * @param maxRecordsToFix the max records to fix
360 * @param groomOutFileName the groom out file name
361 * @param version the version
362 * @param singleCommits the single commits
363 * @param dupeCheckOff the dupe check off
364 * @param dupeFixOn the dupe fix on
365 * @param ghost2CheckOff the ghost 2 check off
366 * @param ghost2FixOn the ghost 2 fix on
367 * @param finalShutdownFlag the final shutdown flag
368 * @param cacheDbOkFlag the cacheDbOk flag
371 private static int doTheGrooming(String fileNameForFixing,
372 Boolean edgesOnlyFlag, Boolean dontFixOrphansFlag,
373 int maxRecordsToFix, String groomOutFileName, String version,
374 Boolean singleCommits,
375 Boolean dupeCheckOff, Boolean dupeFixOn,
376 Boolean ghost2CheckOff, Boolean ghost2FixOn,
377 Boolean finalShutdownFlag, Boolean cacheDbOkFlag,
378 Boolean skipEdgeCheckFlag, int timeWindowMinutes) {
380 LOGGER.debug(" Entering doTheGrooming \n");
382 int cleanupCandidateCount = 0;
383 long windowStartTime = 0; // Translation of the window into a starting timestamp
384 BufferedWriter bw = null;
385 TitanGraph graph = null;
386 TitanGraph graph2 = null;
388 boolean executeFinalCommit = false;
389 Set<String> deleteCandidateList = new LinkedHashSet<>();
390 Set<String> processedVertices = new LinkedHashSet<>();
394 if( timeWindowMinutes > 0 ){
395 // Translate the window value (ie. 30 minutes) into a unix timestamp like
396 // we use in the db - so we can select data created after that time.
397 windowStartTime = figureWindowStartTime( timeWindowMinutes );
401 String targetDir = AAIConstants.AAI_HOME + AAIConstants.AAI_FILESEP
402 + "logs" + AAIConstants.AAI_FILESEP + "data"
403 + AAIConstants.AAI_FILESEP + "dataGrooming";
405 // Make sure the target directory exists
406 new File(targetDir).mkdirs();
408 if (!fileNameForFixing.equals("")) {
409 deleteCandidateList = getDeleteList(targetDir,
410 fileNameForFixing, edgesOnlyFlag, dontFixOrphansFlag,
414 if (deleteCandidateList.size() > maxRecordsToFix) {
415 LoggingContext.statusCode(StatusCode.ERROR);
416 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
417 LOGGER.warn(" >> WARNING >> Delete candidate list size ("
418 + deleteCandidateList.size()
419 + ") is too big. The maxFix we are using is: "
421 + ". No candidates will be deleted. ");
422 // Clear out the list so it won't be processed below.
423 deleteCandidateList = new LinkedHashSet<>();
426 String fullOutputFileName = targetDir + AAIConstants.AAI_FILESEP
428 File groomOutFile = new File(fullOutputFileName);
430 groomOutFile.createNewFile();
431 } catch (IOException e) {
432 String emsg = " Problem creating output file ["
433 + fullOutputFileName + "], exception=" + e.getMessage();
434 throw new AAIException("AAI_6124", emsg);
437 LOGGER.info(" Will write to " + fullOutputFileName );
438 bw = new BufferedWriter(new FileWriter(groomOutFile.getAbsoluteFile()));
439 ErrorLogHelper.loadProperties();
441 LOGGER.info(" ---- NOTE --- about to open graph (takes a little while)--------\n");
444 // Since we're just reading (not deleting/fixing anything), we can use
445 // a cached connection to the DB
446 graph = TitanFactory.open(new AAIGraphConfig.Builder(AAIConstants.CACHED_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("cached").buildConfiguration());
449 graph = TitanFactory.open(new AAIGraphConfig.Builder(AAIConstants.REALTIME_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("realtime1").buildConfiguration());
452 String emsg = "null graph object in DataGrooming\n";
453 throw new AAIException("AAI_6101", emsg);
456 LOGGER.debug(" Got the graph object. ");
458 g = graph.newTransaction();
460 String emsg = "null graphTransaction object in DataGrooming\n";
461 throw new AAIException("AAI_6101", emsg);
463 GraphTraversalSource source1 = g.traversal();
465 ArrayList<String> errArr = new ArrayList<>();
466 int totalNodeCount = 0;
467 HashMap<String, String> misMatchedHash = new HashMap<String, String>();
468 HashMap<String, Vertex> orphanNodeHash = new HashMap<String, Vertex>();
469 HashMap<String, Vertex> missingDepNodeHash = new HashMap<String, Vertex>();
470 HashMap<String, Edge> oneArmedEdgeHash = new HashMap<String, Edge>();
471 HashMap<String, String> emptyVertexHash = new HashMap<String, String>();
472 HashMap<String, Vertex> ghostNodeHash = new HashMap<String, Vertex>();
473 ArrayList<String> dupeGroups = new ArrayList<>();
475 Loader loader = LoaderFactory.createLoaderForVersion(ModelType.MOXY, AAIProperties.LATEST);
477 Set<Entry<String, Introspector>> entrySet = loader.getAllObjects().entrySet();
480 LOGGER.info(" Starting DataGrooming Processing ");
483 LOGGER.info(" NOTE >> Skipping Node processing as requested. Will only process Edges. << ");
486 for (Entry<String, Introspector> entry : entrySet) {
487 String nType = entry.getKey();
489 int thisNtDeleteCount = 0;
491 LOGGER.debug(" > Look at : [" + nType + "] ...");
492 ntList = ntList + "," + nType;
494 // Get a collection of the names of the key properties for this nodeType to use later
495 // Determine what the key fields are for this nodeType - use an arrayList so they
496 // can be gotten out in a consistent order.
497 Set <String> keyPropsSet = entry.getValue().getKeys();
498 ArrayList <String> keyProps = new ArrayList <String> ();
499 keyProps.addAll(keyPropsSet);
501 // Get the types of nodes that this nodetype depends on for uniqueness (if any)
502 Collection <String> depNodeTypes = loader.introspectorFromName(nType).getDependentOn();
504 // Loop through all the nodes of this Node type
505 int lastShownForNt = 0;
506 ArrayList <Vertex> tmpList = new ArrayList <> ();
507 Iterator <Vertex> iterv = source1.V().has("aai-node-type",nType);
508 while (iterv.hasNext()) {
509 // We put the nodes into an ArrayList because the graph.query iterator can time out
510 tmpList.add(iterv.next());
513 Iterator <Vertex> iter = tmpList.iterator();
514 while (iter.hasNext()) {
517 if( thisNtCount == lastShownForNt + 250 ){
518 lastShownForNt = thisNtCount;
519 LOGGER.debug("count for " + nType + " so far = " + thisNtCount );
521 Vertex thisVtx = iter.next();
522 if( windowStartTime > 0 ){
523 // They are using the time-window, so we only want nodes that are updated after a
524 // passed-in timestamp OR that have no last-modified-timestamp which means they are suspicious.
525 Object objModTimeStamp = thisVtx.property("aai-last-mod-ts").orElse(null);
526 if( objModTimeStamp != null ){
527 long thisNodeModTime = (long)objModTimeStamp;
528 if( thisNodeModTime < windowStartTime ){
529 // It has a last modified ts and is NOT in our window, so we can pass over it
535 String thisVid = thisVtx.id().toString();
536 if (processedVertices.contains(thisVid)) {
537 LOGGER.debug("skipping already processed vertex: " + thisVid);
541 List <Vertex> secondGetList = new ArrayList <> ();
542 // -----------------------------------------------------------------------
543 // For each vertex of this nodeType, we want to:
544 // a) make sure that it can be retrieved using it's AAI defined key
545 // b) make sure that it is not a duplicate
546 // -----------------------------------------------------------------------
548 // For this instance of this nodeType, get the key properties
549 HashMap<String, Object> propHashWithKeys = new HashMap<>();
550 Iterator<String> keyPropI = keyProps.iterator();
551 while (keyPropI.hasNext()) {
552 String propName = keyPropI.next();
554 //delete an already deleted vertex
555 Object obj = thisVtx.<Object>property(propName).orElse(null);
557 propVal = obj.toString();
559 propHashWithKeys.put(propName, propVal);
562 // If this node is dependent on another for uniqueness, then do the query from that parent node
563 // Note - all of our nodes that are dependent on others for uniqueness are
564 // "children" of that node.
565 boolean depNodeOk = true;
566 if( depNodeTypes.isEmpty() ){
567 // This kind of node is not dependent on any other.
568 // Make sure we can get it back using it's key properties (that is the
569 // phantom checking) and that we only get one. Note - we also need
570 // to collect data for a second type of dupe-checking which is done later.
571 secondGetList = getNodeJustUsingKeyParams( TRANSID, FROMAPPID, source1, nType,
572 propHashWithKeys, version );
575 // This kind of node is dependent on another for uniqueness.
576 // Start at it's parent (the dependent vertex) and make sure we can get it
577 // back using it's key properties and that we only get one.
578 Iterator <Vertex> vertI2 = source1.V(thisVtx).union(__.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).outV(), __.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).inV());
579 Vertex parentVtx = null;
581 while( vertI2 != null && vertI2.hasNext() ){
582 parentVtx = vertI2.next();
588 //List<Vertex> vertI2 = g.traversal().V(thisVtx).union(__.outE().has("isParent-REV",true).outV(),__.inE().has("isParent",true).inV()).toList();
589 //if( vertI2.isEmpty()){
591 // It's Missing it's dependent/parent node
593 boolean zeroEdges = false;
595 Iterator<Edge> tmpEdgeIter = thisVtx.edges(Direction.BOTH);
597 while( tmpEdgeIter.hasNext() ){
601 if( edgeCount == 0 ){
604 } catch (Exception ex) {
605 LoggingContext.statusCode(StatusCode.ERROR);
606 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
607 LOGGER.warn("WARNING from inside the for-each-vid-loop orphan-edges-check " + LogFormatTools.getStackTop(ex) );
610 if (deleteCandidateList.contains(thisVid)) {
611 boolean okFlag = true;
613 processedVertices.add(thisVtx.id().toString());
617 } catch (Exception e) {
619 LoggingContext.statusCode(StatusCode.ERROR);
620 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
621 LOGGER.error("ERROR trying to delete missing-dep-node VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
624 LOGGER.info(" DELETED missing-dep-node VID = " + thisVid);
627 // We count nodes missing their depNodes two ways - the first if it has
628 // at least some edges, and the second if it has zero edges. Either
629 // way, they are effectively orphaned.
630 // NOTE - Only nodes that have dependent nodes are ever considered "orphaned".
632 missingDepNodeHash.put(thisVid, thisVtx);
635 orphanNodeHash.put(thisVid, thisVtx);
639 else if ( pCount > 1 ){
640 // Not sure how this could happen? Should we do something here?
644 // We found the parent - so use it to do the second-look.
645 // NOTE --- We're just going to do the same check from the other direction - because
646 // there could be duplicates or the pointer going the other way could be broken
647 ArrayList <Vertex> tmpListSec = new ArrayList <> ();
649 tmpListSec = getConnectedChildrenOfOneType( source1, parentVtx, nType ) ;
650 Iterator<Vertex> vIter = tmpListSec.iterator();
651 while (vIter.hasNext()) {
652 Vertex tmpV = vIter.next();
653 if( vertexHasTheseKeys(tmpV, propHashWithKeys) ){
654 secondGetList.add(tmpV);
658 }// end of -- else this is a dependent node -- piece
660 if( depNodeOk && (secondGetList == null || secondGetList.size() == 0) ){
661 // We could not get the node back using it's own key info.
662 // So, it's a PHANTOM
663 if (deleteCandidateList.contains(thisVid)) {
664 boolean okFlag = true;
669 } catch (Exception e) {
671 LoggingContext.statusCode(StatusCode.ERROR);
672 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
673 LOGGER.error("ERROR trying to delete phantom VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
676 LOGGER.info(" DELETED VID = " + thisVid);
679 ghostNodeHash.put(thisVid, thisVtx);
682 else if( (secondGetList.size() > 1) && depNodeOk && !dupeCheckOff ){
683 // Found some DUPLICATES - need to process them
684 LOGGER.info(" - now check Dupes for this guy - ");
685 List<String> tmpDupeGroups = checkAndProcessDupes(
686 TRANSID, FROMAPPID, g, source1, version,
687 nType, secondGetList, dupeFixOn,
688 deleteCandidateList, singleCommits, dupeGroups, loader);
689 Iterator<String> dIter = tmpDupeGroups.iterator();
690 while (dIter.hasNext()) {
691 // Add in any newly found dupes to our running list
692 String tmpGrp = dIter.next();
693 LOGGER.info("Found set of dupes: [" + tmpGrp + "]");
694 dupeGroups.add(tmpGrp);
698 catch (AAIException e1) {
699 LoggingContext.statusCode(StatusCode.ERROR);
700 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
701 LOGGER.warn(" For nodeType = " + nType + " Caught exception", e1);
702 errArr.add(e1.getErrorObject().toString());
704 catch (Exception e2) {
705 LoggingContext.statusCode(StatusCode.ERROR);
706 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
707 LOGGER.warn(" For nodeType = " + nType
708 + " Caught exception", e2);
709 errArr.add(e2.getMessage());
711 }// try block to enclose looping over each single vertex
712 catch (Exception exx) {
713 LoggingContext.statusCode(StatusCode.ERROR);
714 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
715 LOGGER.warn("WARNING from inside the while-verts-loop ", exx);
718 } // while loop for each record of a nodeType
720 if( depNodeTypes.isEmpty() && !dupeCheckOff ){
721 // For this nodeType, we haven't looked at the possibility of a
722 // non-dependent node where two verts have same key info
723 ArrayList<ArrayList<Vertex>> nonDependentDupeSets = new ArrayList<ArrayList<Vertex>>();
724 nonDependentDupeSets = getDupeSets4NonDepNodes(
725 TRANSID, FROMAPPID, g,
726 version, nType, tmpList,
728 // For each set found (each set is for a unique instance of key-values),
729 // process the dupes found
730 Iterator<ArrayList<Vertex>> dsItr = nonDependentDupeSets.iterator();
731 while( dsItr.hasNext() ){
732 ArrayList<Vertex> dupeList = dsItr.next();
733 LOGGER.info(" - now check Dupes for some non-dependent guys - ");
734 List<String> tmpDupeGroups = checkAndProcessDupes(
735 TRANSID, FROMAPPID, g, source1, version,
736 nType, dupeList, dupeFixOn,
737 deleteCandidateList, singleCommits, dupeGroups, loader);
738 Iterator<String> dIter = tmpDupeGroups.iterator();
739 while (dIter.hasNext()) {
740 // Add in any newly found dupes to our running list
741 String tmpGrp = dIter.next();
742 LOGGER.info("Found set of dupes: [" + tmpGrp + "]");
743 dupeGroups.add(tmpGrp);
747 }// end of extra dupe check for non-dependent nodes
749 if ( (thisNtDeleteCount > 0) && singleCommits ) {
750 // NOTE - the singleCommits option is not used in normal processing
752 g = AAIGraph.getInstance().getGraph().newTransaction();
755 thisNtDeleteCount = 0;
756 LOGGER.info( " Processed " + thisNtCount + " records for [" + nType + "], " + totalNodeCount + " total overall. " );
758 }// While-loop for each node type
760 }// end of check to make sure we weren't only supposed to do edges
763 if( !skipEdgeCheckFlag ){
764 // --------------------------------------------------------------------------------------
765 // Now, we're going to look for one-armed-edges. Ie. an edge that
767 // been deleted (because a vertex on one side was deleted) but
768 // somehow was not deleted.
769 // So the one end of it points to a vertexId -- but that vertex is
771 // --------------------------------------------------------------------------------------
773 // To do some strange checking - we need a second graph object
774 LOGGER.debug(" ---- DEBUG --- about to open a SECOND graph (takes a little while)--------\n");
775 // Note - graph2 just reads - but we want it to use a fresh connection to
776 // the database, so we are NOT using the CACHED DB CONFIG here.
777 graph2 = TitanFactory.open(new AAIGraphConfig.Builder(AAIConstants.REALTIME_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("realtime2").buildConfiguration());
778 if (graph2 == null) {
779 String emsg = "null graph2 object in DataGrooming\n";
780 throw new AAIException("AAI_6101", emsg);
782 LOGGER.debug("Got the graph2 object... \n");
784 g2 = graph2.newTransaction();
786 String emsg = "null graphTransaction2 object in DataGrooming\n";
787 throw new AAIException("AAI_6101", emsg);
790 ArrayList<Vertex> vertList = new ArrayList<>();
791 Iterator<Vertex> vItor3 = g.traversal().V();
792 // Gotta hold these in a List - or else HBase times out as you cycle
794 while (vItor3.hasNext()) {
795 Vertex v = vItor3.next();
800 Iterator<Vertex> vItor2 = vertList.iterator();
801 LOGGER.info(" Checking for bad edges --- ");
803 while (vItor2.hasNext()) {
808 } catch (Exception vex) {
809 LoggingContext.statusCode(StatusCode.ERROR);
810 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
811 LOGGER.warn(">>> WARNING trying to get next vertex on the vItor2 ");
816 String thisVertId = "";
818 thisVertId = v.id().toString();
819 } catch (Exception ev) {
820 LoggingContext.statusCode(StatusCode.ERROR);
821 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
822 LOGGER.warn("WARNING when doing getId() on a vertex from our vertex list. ");
825 if (ghostNodeHash.containsKey(thisVertId)) {
826 // This is a phantom node, so don't try to use it
827 LOGGER.info(" >> Skipping edge check for edges from vertexId = "
829 + ", since that guy is a Phantom Node");
833 if( windowStartTime > 0 ){
834 // They are using the time-window, so we only want nodes that are updated after a
835 // passed-in timestamp OR that have no last-modified-timestamp which means they are suspicious.
836 Object objModTimeStamp = v.property("aai-last-mod-ts").orElse(null);
837 if( objModTimeStamp != null ){
838 long thisNodeModTime = (long)objModTimeStamp;
839 if( thisNodeModTime < windowStartTime ){
840 // It has a last modified ts and is NOT in our window, so we can pass over it
846 if (counter == lastShown + 250) {
848 LOGGER.info("... Checking edges for vertex # "
851 Iterator<Edge> eItor = v.edges(Direction.BOTH);
852 while (eItor.hasNext()) {
858 } catch (Exception iex) {
859 LoggingContext.statusCode(StatusCode.ERROR);
860 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
861 LOGGER.warn(">>> WARNING trying to get next edge on the eItor ", iex);
867 } catch (Exception err) {
868 LoggingContext.statusCode(StatusCode.ERROR);
869 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
870 LOGGER.warn(">>> WARNING trying to get edge's In-vertex ", err);
874 Vertex ghost2 = null;
876 Boolean keysMissing = true;
877 Boolean cantGetUsingVid = false;
880 Object ob = vIn.<Object>property("aai-node-type").orElse(null);
882 vNtI = ob.toString();
883 keysMissing = anyKeyFieldsMissing(vNtI, vIn, loader);
888 vIdI = ob.toString();
889 vIdLong = Long.parseLong(vIdI);
892 if( ! ghost2CheckOff ){
893 Vertex connectedVert = g2.traversal().V(vIdLong).next();
894 if( connectedVert == null ) {
895 LoggingContext.statusCode(StatusCode.ERROR);
896 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
897 LOGGER.warn( "GHOST2 -- got NULL when doing getVertex for vid = " + vIdLong);
898 cantGetUsingVid = true;
900 // If we can NOT get this ghost with the SECOND graph-object,
901 // it is still a ghost since even though we can get data about it using the FIRST graph
904 ghost2 = g.traversal().V(vIdLong).next();
906 catch( Exception ex){
907 LoggingContext.statusCode(StatusCode.ERROR);
908 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
909 LOGGER.warn( "GHOST2 -- Could not get the ghost info for a bad edge for vtxId = " + vIdLong, ex);
911 if( ghost2 != null ){
912 ghostNodeHash.put(vIdI, ghost2);
915 }// end of the ghost2 checking
917 catch (Exception err) {
918 LoggingContext.statusCode(StatusCode.ERROR);
919 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
920 LOGGER.warn(">>> WARNING trying to get edge's In-vertex props ", err);
923 if (keysMissing || vIn == null || vNtI.equals("")
924 || cantGetUsingVid) {
925 // this is a bad edge because it points to a vertex
926 // that isn't there anymore or is corrupted
927 String thisEid = e.id().toString();
928 if (deleteCandidateList.contains(thisEid) || deleteCandidateList.contains(vIdI)) {
929 boolean okFlag = true;
930 if (!vIdI.equals("")) {
931 // try to get rid of the corrupted vertex
933 if( (ghost2 != null) && ghost2FixOn ){
940 // NOTE - the singleCommits option is not used in normal processing
942 g = AAIGraph.getInstance().getGraph().newTransaction();
945 } catch (Exception e1) {
947 LoggingContext.statusCode(StatusCode.ERROR);
948 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
949 LOGGER.warn("WARNING when trying to delete bad-edge-connected VERTEX VID = "
953 LOGGER.info(" DELETED vertex from bad edge = "
957 // remove the edge if we couldn't get the
962 // NOTE - the singleCommits option is not used in normal processing
964 g = AAIGraph.getInstance().getGraph().newTransaction();
967 } catch (Exception ex) {
968 // NOTE - often, the exception is just
969 // that this edge has already been
972 LoggingContext.statusCode(StatusCode.ERROR);
973 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
974 LOGGER.warn("WARNING when trying to delete edge = "
978 LOGGER.info(" DELETED edge = " + thisEid);
982 oneArmedEdgeHash.put(thisEid, e);
983 if ((vIn != null) && (vIn.id() != null)) {
984 emptyVertexHash.put(thisEid, vIn.id()
991 vOut = e.outVertex();
992 } catch (Exception err) {
993 LoggingContext.statusCode(StatusCode.ERROR);
994 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
995 LOGGER.warn(">>> WARNING trying to get edge's Out-vertex ");
1001 cantGetUsingVid = false;
1004 Object ob = vOut.<Object>property("aai-node-type").orElse(null);
1006 vNtO = ob.toString();
1007 keysMissing = anyKeyFieldsMissing(vNtO,
1013 vIdO = ob.toString();
1014 vIdLong = Long.parseLong(vIdO);
1017 if( ! ghost2CheckOff ){
1018 Vertex connectedVert = g2.traversal().V(vIdLong).next();
1019 if( connectedVert == null ) {
1020 cantGetUsingVid = true;
1021 LOGGER.info( "GHOST2 -- got NULL when doing getVertex for vid = " + vIdLong);
1022 // If we can get this ghost with the other graph-object, then get it -- it's still a ghost
1024 ghost2 = g.traversal().V(vIdLong).next();
1026 catch( Exception ex){
1027 LoggingContext.statusCode(StatusCode.ERROR);
1028 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1029 LOGGER.warn( "GHOST2 -- Could not get the ghost info for a bad edge for vtxId = " + vIdLong, ex);
1031 if( ghost2 != null ){
1032 ghostNodeHash.put(vIdO, ghost2);
1036 } catch (Exception err) {
1037 LoggingContext.statusCode(StatusCode.ERROR);
1038 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1039 LOGGER.warn(">>> WARNING trying to get edge's Out-vertex props ", err);
1042 if (keysMissing || vOut == null || vNtO.equals("")
1043 || cantGetUsingVid) {
1044 // this is a bad edge because it points to a vertex
1045 // that isn't there anymore
1046 String thisEid = e.id().toString();
1047 if (deleteCandidateList.contains(thisEid) || deleteCandidateList.contains(vIdO)) {
1048 boolean okFlag = true;
1049 if (!vIdO.equals("")) {
1050 // try to get rid of the corrupted vertex
1052 if( (ghost2 != null) && ghost2FixOn ){
1055 else if (vOut != null) {
1058 if (singleCommits) {
1059 // NOTE - the singleCommits option is not used in normal processing
1061 g = AAIGraph.getInstance().getGraph().newTransaction();
1064 } catch (Exception e1) {
1066 LoggingContext.statusCode(StatusCode.ERROR);
1067 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1068 LOGGER.warn("WARNING when trying to delete bad-edge-connected VID = "
1072 LOGGER.info(" DELETED vertex from bad edge = "
1076 // remove the edge if we couldn't get the
1080 if (singleCommits) {
1081 // NOTE - the singleCommits option is not used in normal processing
1083 g = AAIGraph.getInstance().getGraph().newTransaction();
1086 } catch (Exception ex) {
1087 // NOTE - often, the exception is just
1088 // that this edge has already been
1091 LoggingContext.statusCode(StatusCode.ERROR);
1092 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1093 LOGGER.warn("WARNING when trying to delete edge = "
1097 LOGGER.info(" DELETED edge = " + thisEid);
1101 oneArmedEdgeHash.put(thisEid, e);
1102 if ((vOut != null) && (vOut.id() != null)) {
1103 emptyVertexHash.put(thisEid, vOut.id()
1108 }// End of while-edges-loop
1109 } catch (Exception exx) {
1110 LoggingContext.statusCode(StatusCode.ERROR);
1111 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1112 LOGGER.warn("WARNING from in the while-verts-loop ", exx);
1114 }// End of while-vertices-loop (the edge-checking)
1115 } // end of -- if we're not skipping the edge-checking
1118 deleteCount = deleteCount + dupeGrpsDeleted;
1119 if (!singleCommits && deleteCount > 0) {
1121 LOGGER.info("About to do the commit for "
1122 + deleteCount + " removes. ");
1123 executeFinalCommit = true;
1124 LOGGER.info("Commit was successful ");
1125 } catch (Exception excom) {
1126 LoggingContext.statusCode(StatusCode.ERROR);
1127 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1128 LOGGER.error(" >>>> ERROR <<<< Could not commit changes. " + LogFormatTools.getStackTop(excom));
1133 int ghostNodeCount = ghostNodeHash.size();
1134 int orphanNodeCount = orphanNodeHash.size();
1135 int missingDepNodeCount = missingDepNodeHash.size();
1136 int oneArmedEdgeCount = oneArmedEdgeHash.size();
1137 int dupeCount = dupeGroups.size();
1139 deleteCount = deleteCount + dupeGrpsDeleted;
1141 bw.write("\n\n ============ Summary ==============\n");
1142 if( timeWindowMinutes == 0 ){
1143 bw.write("Ran FULL data grooming (no time-window). \n");
1146 bw.write("Ran PARTIAL data grooming just looking at data added/updated in the last " + timeWindowMinutes + " minutes. \n");
1149 bw.write("\nRan these nodeTypes: " + ntList + "\n\n");
1150 bw.write("There were this many delete candidates from previous run = "
1151 + deleteCandidateList.size() + "\n");
1152 if (dontFixOrphansFlag) {
1153 bw.write(" Note - we are not counting orphan nodes since the -dontFixOrphans parameter was used. \n");
1155 bw.write("Deleted this many delete candidates = " + deleteCount
1157 bw.write("Total number of nodes looked at = " + totalNodeCount
1159 bw.write("Ghost Nodes identified = " + ghostNodeCount + "\n");
1160 bw.write("Orphan Nodes identified = " + orphanNodeCount + "\n");
1161 bw.write("Bad Edges identified = " + oneArmedEdgeCount + "\n");
1162 bw.write("Missing Dependent Edge (but not orphaned) node count = "
1163 + missingDepNodeCount + "\n");
1164 bw.write("Duplicate Groups count = " + dupeCount + "\n");
1165 bw.write("MisMatching Label/aai-node-type count = "
1166 + misMatchedHash.size() + "\n");
1168 bw.write("\n ------------- Delete Candidates ---------\n");
1169 for (Map.Entry<String, Vertex> entry : ghostNodeHash
1171 String vid = entry.getKey();
1172 bw.write("DeleteCandidate: Phantom Vid = [" + vid + "]\n");
1173 cleanupCandidateCount++;
1175 for (Map.Entry<String, Vertex> entry : orphanNodeHash
1177 String vid = entry.getKey();
1178 bw.write("DeleteCandidate: OrphanDepNode Vid = [" + vid + "]\n");
1179 if (!dontFixOrphansFlag) {
1180 cleanupCandidateCount++;
1183 for (Map.Entry<String, Edge> entry : oneArmedEdgeHash.entrySet()) {
1184 String eid = entry.getKey();
1185 bw.write("DeleteCandidate: Bad EDGE Edge-id = [" + eid + "]\n");
1186 cleanupCandidateCount++;
1188 for (Map.Entry<String, Vertex> entry : missingDepNodeHash
1190 String vid = entry.getKey();
1191 bw.write("DeleteCandidate: (maybe) missingDepNode Vid = ["
1193 cleanupCandidateCount++;
1195 bw.write("\n-- NOTE - To see DeleteCandidates for Duplicates, you need to look in the Duplicates Detail section below.\n");
1197 bw.write("\n ------------- GHOST NODES - detail ");
1198 for (Map.Entry<String, Vertex> entry : ghostNodeHash
1201 String vid = entry.getKey();
1202 bw.write("\n ==> Phantom Vid = " + vid + "\n");
1203 ArrayList<String> retArr = showPropertiesForNode(
1204 TRANSID, FROMAPPID, entry.getValue());
1205 for (String info : retArr) {
1206 bw.write(info + "\n");
1209 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1211 for (String info : retArr) {
1212 bw.write(info + "\n");
1214 } catch (Exception dex) {
1215 LoggingContext.statusCode(StatusCode.ERROR);
1216 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1217 LOGGER.error("error trying to print detail info for a ghost-node: " + LogFormatTools.getStackTop(dex));
1221 bw.write("\n ------------- Missing Dependent Edge ORPHAN NODES - detail: ");
1222 for (Map.Entry<String, Vertex> entry : orphanNodeHash
1225 String vid = entry.getKey();
1226 bw.write("\n> Orphan Node Vid = " + vid + "\n");
1227 ArrayList<String> retArr = showPropertiesForNode(
1228 TRANSID, FROMAPPID, entry.getValue());
1229 for (String info : retArr) {
1230 bw.write(info + "\n");
1233 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1235 for (String info : retArr) {
1236 bw.write(info + "\n");
1238 } catch (Exception dex) {
1239 LoggingContext.statusCode(StatusCode.ERROR);
1240 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1241 LOGGER.error("error trying to print detail info for a Orphan Node /missing dependent edge " + LogFormatTools.getStackTop(dex));
1245 bw.write("\n ------------- Missing Dependent Edge (but not orphan) NODES: ");
1246 for (Map.Entry<String, Vertex> entry : missingDepNodeHash
1249 String vid = entry.getKey();
1250 bw.write("\n> Missing edge to Dependent Node (but has edges) Vid = "
1252 ArrayList<String> retArr = showPropertiesForNode(
1253 TRANSID, FROMAPPID, entry.getValue());
1254 for (String info : retArr) {
1255 bw.write(info + "\n");
1258 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1260 for (String info : retArr) {
1261 bw.write(info + "\n");
1263 } catch (Exception dex) {
1264 LoggingContext.statusCode(StatusCode.ERROR);
1265 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1266 LOGGER.error("error trying to print detail info for a node missing its dependent edge but not an orphan "
1267 + LogFormatTools.getStackTop(dex));
1271 bw.write("\n ------------- EDGES pointing to empty/bad vertices: ");
1272 for (Map.Entry<String, Edge> entry : oneArmedEdgeHash.entrySet()) {
1274 String eid = entry.getKey();
1275 Edge thisE = entry.getValue();
1276 String badVid = emptyVertexHash.get(eid);
1277 bw.write("\n> Edge pointing to bad vertex (Vid = "
1278 + badVid + ") EdgeId = " + eid + "\n");
1279 bw.write("Label: [" + thisE.label() + "]\n");
1280 Iterator<Property<Object>> pI = thisE.properties();
1281 while (pI.hasNext()) {
1282 Property<Object> propKey = pI.next();
1283 bw.write("Prop: [" + propKey + "], val = ["
1284 + propKey.value() + "]\n");
1286 } catch (Exception pex) {
1287 LoggingContext.statusCode(StatusCode.ERROR);
1288 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1289 LOGGER.error("error trying to print empty/bad vertex data: " + LogFormatTools.getStackTop(pex));
1293 bw.write("\n ------------- Duplicates: ");
1294 Iterator<String> dupeIter = dupeGroups.iterator();
1295 int dupeSetCounter = 0;
1296 while (dupeIter.hasNext()) {
1298 String dset = (String) dupeIter.next();
1300 bw.write("\n --- Duplicate Group # " + dupeSetCounter
1301 + " Detail -----------\n");
1303 // We expect each line to have at least two vid's, followed
1304 // by the preferred one to KEEP
1305 String[] dupeArr = dset.split("\\|");
1306 ArrayList<String> idArr = new ArrayList<>();
1307 int lastIndex = dupeArr.length - 1;
1308 for (int i = 0; i <= lastIndex; i++) {
1309 if (i < lastIndex) {
1310 // This is not the last entry, it is one of the
1311 // dupes, so we want to show all its info
1312 bw.write(" >> Duplicate Group # "
1313 + dupeSetCounter + " Node # " + i
1315 String vidString = dupeArr[i];
1316 idArr.add(vidString);
1317 long longVertId = Long.parseLong(vidString);
1318 Iterator<Vertex> vtxIterator = g.vertices(longVertId);
1320 if (vtxIterator.hasNext()) {
1321 vtx = vtxIterator.next();
1323 ArrayList<String> retArr = showPropertiesForNode(TRANSID, FROMAPPID, vtx);
1324 for (String info : retArr) {
1325 bw.write(info + "\n");
1328 retArr = showAllEdgesForNode(TRANSID,
1330 for (String info : retArr) {
1331 bw.write(info + "\n");
1334 // This is the last entry which should tell us if we
1335 // have a preferred keeper
1336 String prefString = dupeArr[i];
1337 if (prefString.equals("KeepVid=UNDETERMINED")) {
1338 bw.write("\n For this group of duplicates, could not tell which one to keep.\n");
1339 bw.write(" >>> This group needs to be taken care of with a manual/forced-delete.\n");
1341 // If we know which to keep, then the prefString
1342 // should look like, "KeepVid=12345"
1343 String[] prefArr = prefString.split("=");
1344 if (prefArr.length != 2
1345 || (!prefArr[0].equals("KeepVid"))) {
1346 throw new Exception("Bad format. Expecting KeepVid=999999");
1348 String keepVidStr = prefArr[1];
1349 if (idArr.contains(keepVidStr)) {
1350 bw.write("\n The vertex we want to KEEP has vertexId = "
1352 bw.write("\n The others become delete candidates: \n");
1353 idArr.remove(keepVidStr);
1354 for (int x = 0; x < idArr.size(); x++) {
1355 cleanupCandidateCount++;
1356 bw.write("DeleteCandidate: Duplicate Vid = ["
1357 + idArr.get(x) + "]\n");
1360 throw new Exception("ERROR - Vertex Id to keep not found in list of dupes. dset = ["
1364 }// else we know which one to keep
1366 }// for each vertex in a group
1367 } catch (Exception dex) {
1368 LoggingContext.statusCode(StatusCode.ERROR);
1369 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1370 LOGGER.error("error trying to print duplicate vertex data " + LogFormatTools.getStackTop(dex));
1373 }// while - work on each group of dupes
1375 bw.write("\n ------------- Mis-matched Label/aai-node-type Nodes: \n ");
1376 for (Map.Entry<String, String> entry : misMatchedHash.entrySet()) {
1377 String msg = entry.getValue();
1378 bw.write("MixedMsg = " + msg + "\n");
1381 bw.write("\n ------------- Got these errors while processing: \n");
1382 Iterator<String> errIter = errArr.iterator();
1383 while (errIter.hasNext()) {
1384 String line = (String) errIter.next();
1385 bw.write(line + "\n");
1390 LOGGER.info("\n ------------- Done doing all the checks ------------ ");
1391 LOGGER.info("Output will be written to " + fullOutputFileName);
1393 if (cleanupCandidateCount > 0) {
1394 // Technically, this is not an error -- but we're throwing this
1395 // error so that hopefully a
1396 // monitoring system will pick it up and do something with it.
1397 throw new AAIException("AAI_6123", "See file: [" + fullOutputFileName
1398 + "] and investigate delete candidates. ");
1400 } catch (AAIException e) {
1401 LoggingContext.statusCode(StatusCode.ERROR);
1402 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1403 LOGGER.error("Caught AAIException while grooming data");
1404 ErrorLogHelper.logException(e);
1405 } catch (Exception ex) {
1406 LoggingContext.statusCode(StatusCode.ERROR);
1407 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1408 LOGGER.error("Caught exception while grooming data");
1409 ErrorLogHelper.logError("AAI_6128", ex.getMessage() + ", resolve and rerun dataGrooming");
1415 } catch (IOException iox) {
1416 LoggingContext.statusCode(StatusCode.ERROR);
1417 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1418 LOGGER.warn("Got an IOException trying to close bufferedWriter() \n", iox);
1422 if (g != null && g.tx().isOpen()) {
1423 // Any changes that worked correctly should have already done
1426 if (executeFinalCommit) {
1430 } catch (Exception ex) {
1431 // Don't throw anything because Titan sometimes is just saying that the graph is already closed
1432 LoggingContext.statusCode(StatusCode.ERROR);
1433 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1434 LOGGER.warn("WARNING from final graphTransaction.rollback()", ex);
1438 if (g2 != null && g2.tx().isOpen()) {
1439 // Any changes that worked correctly should have already done
1443 } catch (Exception ex) {
1444 // Don't throw anything because Titan sometimes is just saying that the graph is already closed
1445 LoggingContext.statusCode(StatusCode.ERROR);
1446 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1447 LOGGER.warn("WARNING from final graphTransaction2.rollback()", ex);
1451 if( finalShutdownFlag ){
1453 if( graph != null && graph.isOpen() ){
1457 } catch (Exception ex) {
1458 // Don't throw anything because Titan sometimes is just saying that the graph is already closed{
1459 LoggingContext.statusCode(StatusCode.ERROR);
1460 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1461 LOGGER.warn("WARNING from final graph.shutdown()", ex);
1465 if( graph2 != null && graph2.isOpen() ){
1466 graph2.tx().close();
1469 } catch (Exception ex) {
1470 // Don't throw anything because Titan sometimes is just saying that the graph is already closed{
1471 LoggingContext.statusCode(StatusCode.ERROR);
1472 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1473 LOGGER.warn("WARNING from final graph2.shutdown()", ex);
1479 return cleanupCandidateCount;
1481 }// end of doTheGrooming()
1485 * Vertex has these keys.
1487 * @param tmpV the tmp V
1488 * @param propHashWithKeys the prop hash with keys
1489 * @return the boolean
1491 private static Boolean vertexHasTheseKeys( Vertex tmpV, HashMap <String, Object> propHashWithKeys) {
1492 Iterator <?> it = propHashWithKeys.entrySet().iterator();
1493 while( it.hasNext() ){
1494 String propName = "";
1495 String propVal = "";
1496 Map.Entry <?,?>propEntry = (Map.Entry<?,?>)it.next();
1497 Object propNameObj = propEntry.getKey();
1498 if( propNameObj != null ){
1499 propName = propNameObj.toString();
1501 Object propValObj = propEntry.getValue();
1502 if( propValObj != null ){
1503 propVal = propValObj.toString();
1505 Object checkValObj = tmpV.<Object>property(propName).orElse(null);
1506 if( checkValObj == null ) {
1509 else if( !propVal.equals(checkValObj.toString()) ){
1518 * Any key fields missing.
1520 * @param nType the n type
1522 * @return the boolean
1524 private static Boolean anyKeyFieldsMissing(String nType, Vertex v, Loader loader) {
1527 Introspector obj = null;
1529 obj = loader.introspectorFromName(nType);
1530 } catch (AAIUnknownObjectException e) {
1531 // They gave us a non-empty nodeType but our NodeKeyProps does
1532 // not have data for it. Since we do not know what the
1533 // key params are for this type of node, we will just
1535 String emsg = " -- WARNING -- Unrecognized nodeType: [" + nType
1536 + "]. We cannot determine required keys for this nType. ";
1537 // NOTE - this will be caught below and a "false" returned
1538 throw new AAIException("AAI_6121", emsg);
1541 // Determine what the key fields are for this nodeType
1542 Collection <String> keyPropNamesColl = obj.getKeys();
1543 Iterator<String> keyPropI = keyPropNamesColl.iterator();
1544 while (keyPropI.hasNext()) {
1545 String propName = keyPropI.next();
1546 Object ob = v.<Object>property(propName).orElse(null);
1547 if (ob == null || ob.toString().equals("")) {
1548 // It is missing a key property
1552 } catch (AAIException e) {
1553 // Something was wrong -- but since we weren't able to check
1554 // the keys, we will not declare that it is missing keys.
1562 * Gets the delete list.
1564 * @param targetDir the target dir
1565 * @param fileName the file name
1566 * @param edgesOnlyFlag the edges only flag
1567 * @param dontFixOrphans the dont fix orphans
1568 * @param dupeFixOn the dupe fix on
1569 * @return the delete list
1570 * @throws AAIException the AAI exception
1572 private static Set<String> getDeleteList(String targetDir,
1573 String fileName, Boolean edgesOnlyFlag, Boolean dontFixOrphans,
1574 Boolean dupeFixOn) throws AAIException {
1576 // Look in the file for lines formated like we expect - pull out any
1577 // Vertex Id's to delete on this run
1578 Set<String> delList = new LinkedHashSet<>();
1579 String fullFileName = targetDir + AAIConstants.AAI_FILESEP + fileName;
1581 try(BufferedReader br = new BufferedReader(new FileReader(fullFileName))) {
1582 String line = br.readLine();
1583 while (line != null) {
1584 if (!"".equals(line) && line.startsWith("DeleteCandidate")) {
1585 if (edgesOnlyFlag && (!line.contains("Bad Edge"))) {
1586 // We're not going to process edge guys
1587 } else if (dontFixOrphans && line.contains("Orphan")) {
1588 // We're not going to process orphans
1589 } else if (!dupeFixOn && line.contains("Duplicate")) {
1590 // We're not going to process Duplicates
1592 int begIndex = line.indexOf("id = ");
1593 int endIndex = line.indexOf("]");
1594 String vidVal = line.substring(begIndex + 6, endIndex);
1595 delList.add(vidVal);
1598 line = br.readLine();
1601 } catch (IOException e) {
1602 throw new AAIException("AAI_6124", e, "Could not open input-file [" + fullFileName
1603 + "], exception= " + e.getMessage());
1608 }// end of getDeleteList
1611 * Gets the preferred dupe.
1613 * @param transId the trans id
1614 * @param fromAppId the from app id
1616 * @param dupeVertexList the dupe vertex list
1617 * @param ver the ver
1619 * @throws AAIException the AAI exception
1621 public static Vertex getPreferredDupe(String transId,
1622 String fromAppId, GraphTraversalSource g,
1623 ArrayList<Vertex> dupeVertexList, String ver, Loader loader)
1624 throws AAIException {
1626 // This method assumes that it is being passed a List of vertex objects
1628 // violate our uniqueness constraints.
1630 Vertex nullVtx = null;
1632 if (dupeVertexList == null) {
1635 int listSize = dupeVertexList.size();
1636 if (listSize == 0) {
1639 if (listSize == 1) {
1640 return (dupeVertexList.get(0));
1643 Vertex vtxPreferred = null;
1644 Vertex currentFaveVtx = dupeVertexList.get(0);
1645 for (int i = 1; i < listSize; i++) {
1646 Vertex vtxB = dupeVertexList.get(i);
1647 vtxPreferred = pickOneOfTwoDupes(transId, fromAppId, g,
1648 currentFaveVtx, vtxB, ver, loader);
1649 if (vtxPreferred == null) {
1650 // We couldn't choose one
1653 currentFaveVtx = vtxPreferred;
1657 return (currentFaveVtx);
1659 } // end of getPreferredDupe()
1662 * Pick one of two dupes.
1664 * @param transId the trans id
1665 * @param fromAppId the from app id
1667 * @param vtxA the vtx A
1668 * @param vtxB the vtx B
1669 * @param ver the ver
1671 * @throws AAIException the AAI exception
1673 public static Vertex pickOneOfTwoDupes(String transId,
1674 String fromAppId, GraphTraversalSource g, Vertex vtxA,
1675 Vertex vtxB, String ver, Loader loader) throws AAIException {
1677 Vertex nullVtx = null;
1678 Vertex preferredVtx = null;
1680 Long vidA = new Long(vtxA.id().toString());
1681 Long vidB = new Long(vtxB.id().toString());
1683 String vtxANodeType = "";
1684 String vtxBNodeType = "";
1685 Object objType = vtxA.<Object>property("aai-node-type").orElse(null);
1686 if (objType != null) {
1687 vtxANodeType = objType.toString();
1689 objType = vtxB.<Object>property("aai-node-type").orElse(null);
1690 if (objType != null) {
1691 vtxBNodeType = objType.toString();
1694 if (vtxANodeType.equals("") || (!vtxANodeType.equals(vtxBNodeType))) {
1695 // Either they're not really dupes or there's some bad data - so
1700 // Check that node A and B both have the same key values (or else they
1702 // (We'll check dep-node later)
1703 // Determine what the key fields are for this nodeType
1704 Collection <String> keyProps = new ArrayList <>();
1705 HashMap <String,Object> keyPropValsHash = new HashMap <String,Object>();
1707 keyProps = loader.introspectorFromName(vtxANodeType).getKeys();
1708 } catch (AAIUnknownObjectException e) {
1709 LOGGER.warn("Required property not found", e);
1710 throw new AAIException("AAI_6105", "Required Property name(s) not found for nodeType = " + vtxANodeType + ")");
1713 Iterator<String> keyPropI = keyProps.iterator();
1714 while (keyPropI.hasNext()) {
1715 String propName = keyPropI.next();
1716 String vtxAKeyPropVal = "";
1717 objType = vtxA.<Object>property(propName).orElse(null);
1718 if (objType != null) {
1719 vtxAKeyPropVal = objType.toString();
1721 String vtxBKeyPropVal = "";
1722 objType = vtxB.<Object>property(propName).orElse(null);
1723 if (objType != null) {
1724 vtxBKeyPropVal = objType.toString();
1727 if (vtxAKeyPropVal.equals("")
1728 || (!vtxAKeyPropVal.equals(vtxBKeyPropVal))) {
1729 // Either they're not really dupes or they are missing some key
1730 // data - so don't pick one
1734 // Keep these around for (potential) use later
1735 keyPropValsHash.put(propName, vtxAKeyPropVal);
1740 // Collect the vid's and aai-node-types of the vertices that each vertex
1741 // (A and B) is connected to.
1742 ArrayList<String> vtxIdsConn2A = new ArrayList<>();
1743 ArrayList<String> vtxIdsConn2B = new ArrayList<>();
1744 HashMap<String, String> nodeTypesConn2A = new HashMap<>();
1745 HashMap<String, String> nodeTypesConn2B = new HashMap<>();
1747 ArrayList<Vertex> vertListA = getConnectedNodes( g, vtxA );
1748 if (vertListA != null) {
1749 Iterator<Vertex> iter = vertListA.iterator();
1750 while (iter.hasNext()) {
1751 Vertex tvCon = iter.next();
1752 String conVid = tvCon.id().toString();
1754 objType = tvCon.<Object>property("aai-node-type").orElse(null);
1755 if (objType != null) {
1756 nt = objType.toString();
1758 nodeTypesConn2A.put(nt, conVid);
1759 vtxIdsConn2A.add(conVid);
1763 ArrayList<Vertex> vertListB = getConnectedNodes( g, vtxB );
1764 if (vertListB != null) {
1765 Iterator<Vertex> iter = vertListB.iterator();
1766 while (iter.hasNext()) {
1767 Vertex tvCon = iter.next();
1768 String conVid = tvCon.id().toString();
1770 objType = tvCon.<Object>property("aai-node-type").orElse(null);
1771 if (objType != null) {
1772 nt = objType.toString();
1774 nodeTypesConn2B.put(nt, conVid);
1775 vtxIdsConn2B.add(conVid);
1779 // 1 - If this kind of node needs a dependent node for uniqueness, then
1780 // verify that they both nodes point to the same dependent
1781 // node (otherwise they're not really duplicates)
1782 // Note - there are sometimes more than one dependent node type since
1783 // one nodeType can be used in different ways. But for a
1784 // particular node, it will only have one dependent node that
1785 // it's connected to.
1786 String onlyNodeThatIndexPointsToVidStr = "";
1787 Collection<String> depNodeTypes = loader.introspectorFromName(vtxANodeType).getDependentOn();
1788 if (depNodeTypes.isEmpty()) {
1789 // This kind of node is not dependent on any other. That is ok.
1790 // We need to find out if the unique index info is good or not and
1791 // use that later when deciding if we can delete one.
1792 onlyNodeThatIndexPointsToVidStr = findJustOneUsingIndex( transId,
1793 fromAppId, g, keyPropValsHash, vtxANodeType, vidA, vidB, ver );
1795 String depNodeVtxId4A = "";
1796 String depNodeVtxId4B = "";
1797 Iterator<String> iter = depNodeTypes.iterator();
1798 while (iter.hasNext()) {
1799 String depNodeType = iter.next();
1800 if (nodeTypesConn2A.containsKey(depNodeType)) {
1801 // This is the dependent node type that vertex A is using
1802 depNodeVtxId4A = nodeTypesConn2A.get(depNodeType);
1804 if (nodeTypesConn2B.containsKey(depNodeType)) {
1805 // This is the dependent node type that vertex B is using
1806 depNodeVtxId4B = nodeTypesConn2B.get(depNodeType);
1809 if (depNodeVtxId4A.equals("")
1810 || (!depNodeVtxId4A.equals(depNodeVtxId4B))) {
1811 // Either they're not really dupes or there's some bad data - so
1812 // don't pick either one
1817 if (vtxIdsConn2A.size() == vtxIdsConn2B.size()) {
1818 // 2 - If they both have edges to all the same vertices,
1819 // then return the one that can be reached uniquely via the
1820 // key if that is the case or
1821 // else the one with the lower vertexId
1823 boolean allTheSame = true;
1824 Iterator<String> iter = vtxIdsConn2A.iterator();
1825 while (iter.hasNext()) {
1826 String vtxIdConn2A = iter.next();
1827 if (!vtxIdsConn2B.contains(vtxIdConn2A)) {
1834 // If everything is the same, but one of the two has a good
1835 // pointer to it, then save that one. Otherwise, take the
1837 if( !onlyNodeThatIndexPointsToVidStr.equals("") ){
1838 // only one is reachable via the index - choose that one.
1839 if( onlyNodeThatIndexPointsToVidStr.equals(vidA.toString()) ){
1840 preferredVtx = vtxA;
1842 else if( onlyNodeThatIndexPointsToVidStr.equals(vidB.toString()) ){
1843 preferredVtx = vtxB;
1846 else if (vidA < vidB) {
1847 preferredVtx = vtxA;
1849 preferredVtx = vtxB;
1852 } else if (vtxIdsConn2A.size() > vtxIdsConn2B.size()) {
1853 // 3 - VertexA is connected to more things than vtxB.
1854 // We'll pick VtxA if its edges are a superset of vtxB's edges
1855 // and it doesn't contradict the check for the index/key pointer.
1856 boolean missingOne = false;
1857 Iterator<String> iter = vtxIdsConn2B.iterator();
1858 while (iter.hasNext()) {
1859 String vtxIdConn2B = iter.next();
1860 if (!vtxIdsConn2A.contains(vtxIdConn2B)) {
1866 if( onlyNodeThatIndexPointsToVidStr.equals("")
1867 || onlyNodeThatIndexPointsToVidStr.equals(vidA.toString()) ){
1868 preferredVtx = vtxA;
1871 } else if (vtxIdsConn2B.size() > vtxIdsConn2A.size()) {
1872 // 4 - VertexB is connected to more things than vtxA.
1873 // We'll pick VtxB if its edges are a superset of vtxA's edges
1874 // and it doesn't contradict the check for the index/key pointer.
1875 boolean missingOne = false;
1876 Iterator<String> iter = vtxIdsConn2A.iterator();
1877 while (iter.hasNext()) {
1878 String vtxIdConn2A = iter.next();
1879 if (!vtxIdsConn2B.contains(vtxIdConn2A)) {
1885 if( onlyNodeThatIndexPointsToVidStr.equals("")
1886 || onlyNodeThatIndexPointsToVidStr.equals(vidB.toString()) ){
1887 preferredVtx = vtxB;
1891 preferredVtx = nullVtx;
1894 return (preferredVtx);
1896 } // end of pickOneOfTwoDupes()
1899 * Check and process dupes.
1901 * @param transId the trans id
1902 * @param fromAppId the from app id
1904 * @param version the version
1905 * @param nType the n type
1906 * @param passedVertList the passed vert list
1907 * @param dupeFixOn the dupe fix on
1908 * @param deleteCandidateList the delete candidate list
1909 * @param singleCommits the single commits
1910 * @param alreadyFoundDupeGroups the already found dupe groups
1911 * @return the array list
1913 private static List<String> checkAndProcessDupes(String transId,
1914 String fromAppId, Graph g, GraphTraversalSource source, String version, String nType,
1915 List<Vertex> passedVertList, Boolean dupeFixOn,
1916 Set<String> deleteCandidateList, Boolean singleCommits,
1917 ArrayList<String> alreadyFoundDupeGroups, Loader loader ) {
1919 ArrayList<String> returnList = new ArrayList<>();
1920 ArrayList<Vertex> checkVertList = new ArrayList<>();
1921 ArrayList<String> alreadyFoundDupeVidArr = new ArrayList<>();
1922 Boolean noFilterList = true;
1923 Iterator<String> afItr = alreadyFoundDupeGroups.iterator();
1924 while (afItr.hasNext()) {
1925 String dupeGrpStr = afItr.next();
1926 String[] dupeArr = dupeGrpStr.split("\\|");
1927 int lastIndex = dupeArr.length - 1;
1928 for (int i = 0; i < lastIndex; i++) {
1929 // Note: we don't want the last one...
1930 String vidString = dupeArr[i];
1931 alreadyFoundDupeVidArr.add(vidString);
1932 noFilterList = false;
1936 // For a given set of Nodes that were found with a set of KEY
1937 // Parameters, (nodeType + key data) we will
1938 // see if we find any duplicate nodes that need to be cleaned up. Note -
1939 // it's legit to have more than one
1940 // node with the same key data if the nodes depend on a parent for
1941 // uniqueness -- as long as the two nodes
1942 // don't hang off the same Parent.
1943 // If we find duplicates, and we can figure out which of each set of
1944 // duplicates is the one that we
1945 // think should be preserved, we will record that. Whether we can tell
1946 // which one should be
1947 // preserved or not, we will return info about any sets of duplicates
1950 // Each element in the returned arrayList might look like this:
1951 // "1234|5678|keepVid=UNDETERMINED" (if there were 2 dupes, and we
1952 // couldn't figure out which one to keep)
1953 // or, "100017|200027|30037|keepVid=30037" (if there were 3 dupes and we
1954 // thought the third one was the one that should survive)
1956 // Because of the way the calling code loops over stuff, we can get the
1957 // same data multiple times - so we should
1958 // not process any vertices that we've already seen.
1961 Iterator<Vertex> pItr = passedVertList.iterator();
1962 while (pItr.hasNext()) {
1963 Vertex tvx = pItr.next();
1964 String passedId = tvx.id().toString();
1965 if (noFilterList || !alreadyFoundDupeVidArr.contains(passedId)) {
1966 // We haven't seen this one before - so we should check it.
1967 checkVertList.add(tvx);
1971 if (checkVertList.size() < 2) {
1972 // Nothing new to check.
1976 if (loader.introspectorFromName(nType).isTopLevel()) {
1977 // If this was a node that does NOT depend on other nodes for
1978 // uniqueness, and we
1979 // found more than one node using its key -- record the found
1980 // vertices as duplicates.
1981 String dupesStr = "";
1982 for (int i = 0; i < checkVertList.size(); i++) {
1984 + ((checkVertList.get(i))).id()
1987 if (dupesStr != "") {
1988 Vertex prefV = getPreferredDupe(transId, fromAppId,
1989 source, checkVertList, version, loader);
1990 if (prefV == null) {
1991 // We could not determine which duplicate to keep
1992 dupesStr = dupesStr + "KeepVid=UNDETERMINED";
1993 returnList.add(dupesStr);
1995 dupesStr = dupesStr + "KeepVid=" + prefV.id();
1996 Boolean didRemove = false;
1998 didRemove = deleteNonKeepersIfAppropriate(g,
1999 dupesStr, prefV.id().toString(),
2000 deleteCandidateList, singleCommits);
2005 // keep them on our list
2006 returnList.add(dupesStr);
2011 // More than one node have the same key fields since they may
2012 // depend on a parent node for uniqueness. Since we're finding
2013 // more than one, we want to check to see if any of the
2014 // vertices that have this set of keys (and are the same nodeType)
2015 // are also pointing at the same 'parent' node.
2016 // Note: for a given set of key data, it is possible that there
2017 // could be more than one set of duplicates.
2018 HashMap<String, ArrayList<Vertex>> vertsGroupedByParentHash = groupVertsByDepNodes(
2019 transId, fromAppId, source, version, nType,
2020 checkVertList, loader);
2021 for (Map.Entry<String, ArrayList<Vertex>> entry : vertsGroupedByParentHash
2023 ArrayList<Vertex> thisParentsVertList = entry
2025 if (thisParentsVertList.size() > 1) {
2026 // More than one vertex found with the same key info
2027 // hanging off the same parent/dependent node
2028 String dupesStr = "";
2029 for (int i = 0; i < thisParentsVertList.size(); i++) {
2031 + ((thisParentsVertList
2032 .get(i))).id() + "|";
2034 if (dupesStr != "") {
2035 Vertex prefV = getPreferredDupe(transId,
2036 fromAppId, source, thisParentsVertList,
2039 if (prefV == null) {
2040 // We could not determine which duplicate to
2042 dupesStr = dupesStr + "KeepVid=UNDETERMINED";
2043 returnList.add(dupesStr);
2045 Boolean didRemove = false;
2046 dupesStr = dupesStr + "KeepVid="
2047 + prefV.id().toString();
2049 didRemove = deleteNonKeepersIfAppropriate(
2050 g, dupesStr, prefV.id()
2052 deleteCandidateList, singleCommits);
2057 // keep them on our list
2058 returnList.add(dupesStr);
2065 } catch (Exception e) {
2066 LoggingContext.statusCode(StatusCode.ERROR);
2067 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2068 LOGGER.warn(" >>> Threw an error in checkAndProcessDupes - just absorb this error and move on. ", e);
2073 }// End of checkAndProcessDupes()
2076 * Group verts by dep nodes.
2078 * @param transId the trans id
2079 * @param fromAppId the from app id
2081 * @param version the version
2082 * @param nType the n type
2083 * @param passedVertList the passed vert list
2084 * @return the hash map
2085 * @throws AAIException the AAI exception
2087 private static HashMap<String, ArrayList<Vertex>> groupVertsByDepNodes(
2088 String transId, String fromAppId, GraphTraversalSource g, String version,
2089 String nType, ArrayList<Vertex> passedVertList, Loader loader)
2090 throws AAIException {
2091 // Given a list of Titan Vertices of one nodeType (see AAI-8956), group
2092 // them together by the parent node they depend on.
2093 // Ie. if given a list of ip address nodes (assumed to all have the
2094 // same key info) they might sit under several different parent vertices.
2095 // Under Normal conditions, there would only be one per parent -- but
2096 // we're trying to find duplicates - so we
2097 // allow for the case where more than one is under the same parent node.
2099 HashMap<String, ArrayList<Vertex>> retHash = new HashMap<String, ArrayList<Vertex>>();
2100 if (loader.introspectorFromName(nType).isTopLevel()) {
2101 // This method really should not have been called if this is not the
2103 // that depends on a parent for uniqueness, so just return the empty
2108 // Find out what types of nodes the passed in nodes can depend on
2109 ArrayList<String> depNodeTypeL = new ArrayList<>();
2110 Collection<String> depNTColl = loader.introspectorFromName(nType).getDependentOn();
2111 Iterator<String> ntItr = depNTColl.iterator();
2112 while (ntItr.hasNext()) {
2113 depNodeTypeL.add(ntItr.next());
2115 // For each vertex, we want find its depended-on/parent vertex so we
2116 // can track what other vertexes that are dependent on that same guy.
2117 if (passedVertList != null) {
2118 Iterator<Vertex> iter = passedVertList.iterator();
2119 while (iter.hasNext()) {
2120 Vertex thisVert = iter.next();
2121 Vertex tmpParentVtx = getConnectedParent( g, thisVert );
2122 if( tmpParentVtx != null ) {
2123 String parentNt = null;
2124 Object obj = tmpParentVtx.<Object>property("aai-node-type").orElse(null);
2126 parentNt = obj.toString();
2128 if (depNTColl.contains(parentNt)) {
2129 // This must be the parent/dependent node
2130 String parentVid = tmpParentVtx.id().toString();
2131 if (retHash.containsKey(parentVid)) {
2132 // add this vert to the list for this parent key
2133 retHash.get(parentVid).add(thisVert);
2135 // This is the first one we found on this parent
2136 ArrayList<Vertex> vList = new ArrayList<>();
2137 vList.add(thisVert);
2138 retHash.put(parentVid, vList);
2147 }// end of groupVertsByDepNodes()
2150 * Delete non keepers if appropriate.
2153 * @param dupeInfoString the dupe info string
2154 * @param vidToKeep the vid to keep
2155 * @param deleteCandidateList the delete candidate list
2156 * @param singleCommits the single commits
2157 * @return the boolean
2159 private static Boolean deleteNonKeepersIfAppropriate(Graph g,
2160 String dupeInfoString, String vidToKeep,
2161 Set<String> deleteCandidateList, Boolean singleCommits) {
2163 Boolean deletedSomething = false;
2164 // This assumes that the dupeInfoString is in the format of
2165 // pipe-delimited vid's followed by
2166 // ie. "3456|9880|keepVid=3456"
2167 if (deleteCandidateList == null || deleteCandidateList.size() == 0) {
2168 // No vid's on the candidate list -- so no deleting will happen on
2173 String[] dupeArr = dupeInfoString.split("\\|");
2174 ArrayList<String> idArr = new ArrayList<>();
2175 int lastIndex = dupeArr.length - 1;
2176 for (int i = 0; i <= lastIndex; i++) {
2177 if (i < lastIndex) {
2178 // This is not the last entry, it is one of the dupes,
2179 String vidString = dupeArr[i];
2180 idArr.add(vidString);
2182 // This is the last entry which should tell us if we have a
2184 String prefString = dupeArr[i];
2185 if (prefString.equals("KeepVid=UNDETERMINED")) {
2186 // They sent us a bad string -- nothing should be deleted if
2187 // no dupe could be tagged as preferred
2190 // If we know which to keep, then the prefString should look
2191 // like, "KeepVid=12345"
2192 String[] prefArr = prefString.split("=");
2193 if (prefArr.length != 2 || (!prefArr[0].equals("KeepVid"))) {
2194 LoggingContext.statusCode(StatusCode.ERROR);
2195 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2196 LOGGER.error("Bad format. Expecting KeepVid=999999");
2199 String keepVidStr = prefArr[1];
2200 if (idArr.contains(keepVidStr)) {
2201 idArr.remove(keepVidStr);
2203 // So now, the idArr should just contain the vid's
2204 // that we want to remove.
2205 for (int x = 0; x < idArr.size(); x++) {
2206 boolean okFlag = true;
2207 String thisVid = idArr.get(x);
2208 if (deleteCandidateList.contains(thisVid)) {
2209 // This vid is a valid delete candidate from
2210 // a prev. run, so we can remove it.
2212 long longVertId = Long
2213 .parseLong(thisVid);
2215 .traversal().V(longVertId).next();
2217 if (singleCommits) {
2218 // NOTE - the singleCommits option is not used in normal processing
2220 g = AAIGraph.getInstance().getGraph().newTransaction();
2222 } catch (Exception e) {
2224 LoggingContext.statusCode(StatusCode.ERROR);
2225 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2226 LOGGER.error("ERROR trying to delete VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
2229 LOGGER.info(" DELETED VID = " + thisVid);
2230 deletedSomething = true;
2235 LoggingContext.statusCode(StatusCode.ERROR);
2236 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2237 LOGGER.error("ERROR - Vertex Id to keep not found in list of dupes. dupeInfoString = ["
2238 + dupeInfoString + "]");
2242 }// else we know which one to keep
2244 }// for each vertex in a group
2246 return deletedSomething;
2248 }// end of deleteNonKeepersIfAppropriate()
2252 * Gets the node just using key params.
2254 * @param transId the trans id
2255 * @param fromAppId the from app id
2256 * @param graph the graph
2257 * @param nodeType the node type
2258 * @param keyPropsHash the key props hash
2259 * @param apiVersion the api version
2260 * @return the node just using key params
2261 * @throws AAIException the AAI exception
2263 public static List <Vertex> getNodeJustUsingKeyParams( String transId, String fromAppId, GraphTraversalSource graph, String nodeType,
2264 HashMap<String,Object> keyPropsHash, String apiVersion ) throws AAIException{
2266 List <Vertex> retVertList = new ArrayList <> ();
2268 // We assume that all NodeTypes have at least one key-property defined.
2269 // Note - instead of key-properties (the primary key properties), a user could pass
2270 // alternate-key values if they are defined for the nodeType.
2271 List<String> kName = new ArrayList<>();
2272 List<Object> kVal = new ArrayList<>();
2273 if( keyPropsHash == null || keyPropsHash.isEmpty() ) {
2274 throw new AAIException("AAI_6120", " NO key properties passed for this getNodeJustUsingKeyParams() request. NodeType = [" + nodeType + "]. ");
2278 for( Map.Entry<String, Object> entry : keyPropsHash.entrySet() ){
2280 kName.add(i, entry.getKey());
2281 kVal.add(i, entry.getValue());
2283 int topPropIndex = i;
2285 String propsAndValuesForMsg = "";
2286 Iterator <Vertex> verts = null;
2289 if( topPropIndex == 0 ){
2290 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ") ";
2291 verts= graph.V().has(kName.get(0),kVal.get(0)).has("aai-node-type",nodeType);
2293 else if( topPropIndex == 1 ){
2294 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2295 + kName.get(1) + " = " + kVal.get(1) + ") ";
2296 verts = graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has("aai-node-type",nodeType);
2298 else if( topPropIndex == 2 ){
2299 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2300 + kName.get(1) + " = " + kVal.get(1) + ", "
2301 + kName.get(2) + " = " + kVal.get(2) + ") ";
2302 verts= graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has(kName.get(2),kVal.get(2)).has("aai-node-type",nodeType);
2304 else if( topPropIndex == 3 ){
2305 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2306 + kName.get(1) + " = " + kVal.get(1) + ", "
2307 + kName.get(2) + " = " + kVal.get(2) + ", "
2308 + kName.get(3) + " = " + kVal.get(3) + ") ";
2309 verts= graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has(kName.get(2),kVal.get(2)).has(kName.get(3),kVal.get(3)).has("aai-node-type",nodeType);
2312 throw new AAIException("AAI_6114", " We only support 4 keys per nodeType for now \n");
2315 catch( Exception ex ){
2316 LoggingContext.statusCode(StatusCode.ERROR);
2317 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2318 LOGGER.error( " ERROR trying to get node for: [" + propsAndValuesForMsg + "]" + LogFormatTools.getStackTop(ex));
2321 if( verts != null ){
2322 while( verts.hasNext() ){
2324 retVertList.add(tiV);
2328 if( retVertList.size() == 0 ){
2329 LOGGER.debug("DEBUG No node found for nodeType = [" + nodeType +
2330 "], propsAndVal = " + propsAndValuesForMsg );
2335 }// End of getNodeJustUsingKeyParams()
2338 * Show all edges for node.
2340 * @param transId the trans id
2341 * @param fromAppId the from app id
2342 * @param tVert the t vert
2343 * @return the array list
2345 private static ArrayList <String> showAllEdgesForNode( String transId, String fromAppId, Vertex tVert ){
2347 ArrayList <String> retArr = new ArrayList <> ();
2348 Iterator <Edge> eI = tVert.edges(Direction.IN);
2349 if( ! eI.hasNext() ){
2350 retArr.add("No IN edges were found for this vertex. ");
2352 while( eI.hasNext() ){
2353 Edge ed = eI.next();
2354 String lab = ed.label();
2356 if (tVert.equals(ed.inVertex())) {
2357 vtx = ed.outVertex();
2359 vtx = ed.inVertex();
2362 retArr.add(" >>> COULD NOT FIND VERTEX on the other side of this edge edgeId = " + ed.id() + " <<< ");
2365 String nType = vtx.<String>property("aai-node-type").orElse(null);
2366 String vid = vtx.id().toString();
2367 retArr.add("Found an IN edge (" + lab + ") to this vertex from a [" + nType + "] node with VtxId = " + vid );
2372 eI = tVert.edges(Direction.OUT);
2373 if( ! eI.hasNext() ){
2374 retArr.add("No OUT edges were found for this vertex. ");
2376 while( eI.hasNext() ){
2377 Edge ed = eI.next();
2378 String lab = ed.label();
2380 if (tVert.equals(ed.inVertex())) {
2381 vtx = ed.outVertex();
2383 vtx = ed.inVertex();
2386 retArr.add(" >>> COULD NOT FIND VERTEX on the other side of this edge edgeId = " + ed.id() + " <<< ");
2389 String nType = vtx.<String>property("aai-node-type").orElse(null);
2390 String vid = vtx.id().toString();
2391 retArr.add("Found an OUT edge (" + lab + ") from this vertex to a [" + nType + "] node with VtxId = " + vid );
2399 * Show properties for node.
2401 * @param transId the trans id
2402 * @param fromAppId the from app id
2403 * @param tVert the t vert
2404 * @return the array list
2406 private static ArrayList <String> showPropertiesForNode( String transId, String fromAppId, Vertex tVert ){
2408 ArrayList <String> retArr = new ArrayList <> ();
2409 if( tVert == null ){
2410 retArr.add("null Node object passed to showPropertiesForNode()\n");
2413 String nodeType = "";
2414 Object ob = tVert.<Object>property("aai-node-type").orElse(null);
2419 nodeType = ob.toString();
2422 retArr.add(" AAINodeType/VtxID for this Node = [" + nodeType + "/" + tVert.id() + "]");
2423 retArr.add(" Property Detail: ");
2424 Iterator<VertexProperty<Object>> pI = tVert.properties();
2425 while( pI.hasNext() ){
2426 VertexProperty<Object> tp = pI.next();
2427 Object val = tp.value();
2428 retArr.add("Prop: [" + tp.key() + "], val = [" + val + "] ");
2435 private static ArrayList <Vertex> getConnectedNodes(GraphTraversalSource g, Vertex startVtx )
2436 throws AAIException {
2438 ArrayList <Vertex> retArr = new ArrayList <> ();
2439 if( startVtx == null ){
2443 GraphTraversal<Vertex, Vertex> modPipe = null;
2444 modPipe = g.V(startVtx).both();
2445 if( modPipe != null && modPipe.hasNext() ){
2446 while( modPipe.hasNext() ){
2447 Vertex conVert = modPipe.next();
2448 retArr.add(conVert);
2454 }// End of getConnectedNodes()
2457 private static ArrayList <Vertex> getConnectedChildrenOfOneType( GraphTraversalSource g,
2458 Vertex startVtx, String childNType ) throws AAIException{
2460 ArrayList <Vertex> childList = new ArrayList <> ();
2461 Iterator <Vertex> vertI = g.V(startVtx).union(__.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).inV(), __.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).outV());
2463 Vertex tmpVtx = null;
2464 while( vertI != null && vertI.hasNext() ){
2465 tmpVtx = vertI.next();
2466 Object ob = tmpVtx.<Object>property("aai-node-type").orElse(null);
2468 String tmpNt = ob.toString();
2469 if( tmpNt.equals(childNType)){
2470 childList.add(tmpVtx);
2477 }// End of getConnectedChildrenOfOneType()
2480 private static Vertex getConnectedParent( GraphTraversalSource g,
2481 Vertex startVtx ) throws AAIException{
2483 Vertex parentVtx = null;
2484 Iterator <Vertex> vertI = g.V(startVtx).union(__.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).outV(), __.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).inV());
2486 while( vertI != null && vertI.hasNext() ){
2487 // Note - there better only be one!
2488 parentVtx = vertI.next();
2493 }// End of getConnectedParent()
2496 private static long figureWindowStartTime( int timeWindowMinutes ){
2497 // Given a window size, calculate what the start-timestamp would be.
2499 if( timeWindowMinutes <= 0 ){
2500 // This just means that there is no window...
2503 long unixTimeNow = System.currentTimeMillis();
2504 long windowInMillis = timeWindowMinutes * 60L * 1000;
2506 long startTimeStamp = unixTimeNow - windowInMillis;
2508 return startTimeStamp;
2509 } // End of figureWindowStartTime()
2513 * Collect Duplicate Sets for nodes that are NOT dependent on parent nodes.
2515 * @param transId the trans id
2516 * @param fromAppId the from app id
2518 * @param version the version
2519 * @param nType the n type
2520 * @param passedVertList the passed vert list
2521 * @return the array list
2523 private static ArrayList<ArrayList<Vertex>> getDupeSets4NonDepNodes( String transId,
2524 String fromAppId, Graph g, String version, String nType,
2525 ArrayList<Vertex> passedVertList,
2526 ArrayList <String> keyPropNamesArr,
2529 ArrayList<ArrayList<Vertex>> returnList = new ArrayList<ArrayList<Vertex>>();
2531 // We've been passed a set of nodes that we want to check.
2532 // They are all NON-DEPENDENT nodes of the same nodeType meaning that they should be
2533 // unique in the DB based on their KEY DATA alone. So, if
2534 // we group them by their key data - if any key has more than one
2535 // vertex mapped to it, those vertices are dupes.
2537 // When we find duplicates, we group them in an ArrayList (there can be
2538 // more than one duplicate for one set of key data)
2539 // Then these dupeSets are grouped up and returned.
2542 HashMap <String, ArrayList<String>> keyVals2VidHash = new HashMap <String, ArrayList<String>>();
2543 HashMap <String,Vertex> vtxHash = new HashMap <String,Vertex>();
2544 Iterator<Vertex> pItr = passedVertList.iterator();
2545 while (pItr.hasNext()) {
2547 Vertex tvx = pItr.next();
2548 String thisVid = tvx.id().toString();
2549 vtxHash.put(thisVid, tvx);
2551 // if there are more than one vertexId mapping to the same keyProps -- they are dupes
2552 // we dont check till later since a set can contain more than 2.
2553 String hKey = getNodeKeyValString( tvx, keyPropNamesArr );
2554 if( keyVals2VidHash.containsKey(hKey) ){
2555 // We've already seen this key
2556 ArrayList <String> tmpVL = (ArrayList <String>)keyVals2VidHash.get(hKey);
2558 keyVals2VidHash.put(hKey, tmpVL);
2561 // First time for this key
2562 ArrayList <String> tmpVL = new ArrayList <String>();
2564 keyVals2VidHash.put(hKey, tmpVL);
2567 catch (Exception e) {
2568 LOGGER.warn(" >>> Threw an error in getDupeSets4NonDepNodes - just absorb this error and move on. ", e);
2572 for( Map.Entry<String, ArrayList<String>> entry : keyVals2VidHash.entrySet() ){
2573 ArrayList <String> vidList = entry.getValue();
2575 if( !vidList.isEmpty() && vidList.size() > 1 ){
2576 // There are more than one vertex id's using the same key info
2577 ArrayList <Vertex> vertList = new ArrayList <Vertex> ();
2578 for (int i = 0; i < vidList.size(); i++) {
2579 String tmpVid = vidList.get(i);
2580 vertList.add(vtxHash.get(tmpVid));
2582 returnList.add(vertList);
2585 catch (Exception e) {
2586 LOGGER.warn(" >>> Threw an error in getDupeSets4NonDepNodes - just absorb this error and move on. ", e);
2592 }// End of getDupeSets4NonDepNodes()
2596 * Get values of the key properties for a node as a single string
2598 * @param tvx the vertex to pull the properties from
2599 * @param keyPropNamesArr collection of key prop names
2600 * @return a String of concatenated values
2602 private static String getNodeKeyValString( Vertex tvx,
2603 ArrayList <String> keyPropNamesArr ) {
2605 String retString = "";
2606 Iterator <String> propItr = keyPropNamesArr.iterator();
2607 while( propItr.hasNext() ){
2608 String propName = propItr.next();
2610 Object propValObj = tvx.property(propName).orElse(null);
2611 retString = " " + retString + propValObj.toString();
2616 }// End of getNodeKeyValString()
2619 static private String findJustOneUsingIndex( String transId, String fromAppId,
2620 GraphTraversalSource gts, HashMap <String,Object> keyPropValsHash,
2621 String nType, Long vidAL, Long vidBL, String apiVer){
2623 // See if querying by JUST the key params (which should be indexed) brings back
2624 // ONLY one of the two vertices. Ie. the db still has a pointer to one of them
2625 // and the other one is sort of stranded.
2626 String returnVid = "";
2629 List <Vertex> tmpVertList = getNodeJustUsingKeyParams( transId, fromAppId, gts,
2630 nType, keyPropValsHash, apiVer );
2631 if( tmpVertList != null && tmpVertList.size() == 1 ){
2632 // We got just one - if it matches one of the ones we're looking
2633 // for, then return that VID
2634 Vertex tmpV = tmpVertList.get(0);
2635 String thisVid = tmpV.id().toString();
2636 if( thisVid.equals(vidAL.toString()) || thisVid.equals(vidBL.toString()) ){
2637 String msg = " vid = " + thisVid + " is one of two that the DB can retrieve directly ------";
2638 //System.out.println(msg);
2640 returnVid = thisVid;
2644 catch ( AAIException ae ){
2645 String emsg = "Error trying to get node just by key " + ae.getMessage();
2646 //System.out.println(emsg);
2652 }// End of findJustOneUsingIndex()