2 * ============LICENSE_START=======================================================
4 * ================================================================================
5 * Copyright © 2017 AT&T Intellectual Property. All rights reserved.
6 * ================================================================================
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * ============LICENSE_END=========================================================
20 * ECOMP is a trademark and service mark of AT&T Intellectual Property.
22 package org.onap.aai.dbgen;
24 import java.io.BufferedReader;
25 import java.io.BufferedWriter;
27 import java.io.FileReader;
28 import java.io.FileWriter;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.Collection;
32 import java.util.HashMap;
33 import java.util.Iterator;
34 import java.util.LinkedHashSet;
35 import java.util.List;
37 import java.util.Map.Entry;
38 import java.util.Properties;
40 import java.util.UUID;
42 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
43 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
44 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
45 import org.apache.tinkerpop.gremlin.structure.Direction;
46 import org.apache.tinkerpop.gremlin.structure.Edge;
47 import org.apache.tinkerpop.gremlin.structure.Graph;
48 import org.apache.tinkerpop.gremlin.structure.Property;
49 import org.apache.tinkerpop.gremlin.structure.Vertex;
50 import org.apache.tinkerpop.gremlin.structure.VertexProperty;
51 import org.onap.aai.db.props.AAIProperties;
52 import org.onap.aai.dbmap.AAIGraph;
53 import org.onap.aai.dbmap.AAIGraphConfig;
54 import org.onap.aai.exceptions.AAIException;
55 import org.onap.aai.introspection.Introspector;
56 import org.onap.aai.introspection.Loader;
57 import org.onap.aai.introspection.LoaderFactory;
58 import org.onap.aai.introspection.ModelType;
59 import org.onap.aai.introspection.exceptions.AAIUnknownObjectException;
60 import org.onap.aai.logging.ErrorLogHelper;
61 import org.onap.aai.logging.LogFormatTools;
62 import org.onap.aai.logging.LoggingContext;
63 import org.onap.aai.serialization.db.AAIDirection;
64 import org.onap.aai.serialization.db.EdgeProperty;
65 import org.onap.aai.util.*;
66 import org.onap.aai.logging.LoggingContext.StatusCode;
68 import com.att.eelf.configuration.Configuration;
69 import com.att.eelf.configuration.EELFLogger;
70 import com.att.eelf.configuration.EELFManager;
71 import com.thinkaurelius.titan.core.TitanFactory;
72 import com.thinkaurelius.titan.core.TitanGraph;
75 public class DataGrooming {
77 private static EELFLogger logger;
78 private static final String FROMAPPID = "AAI-DB";
79 private static final String TRANSID = UUID.randomUUID().toString();
80 private static int dupeGrpsDeleted = 0;
85 * @param args the arguments
87 public static void main(String[] args) {
89 // Set the logging file properties to be used by EELFManager
90 System.setProperty("aai.service.name", DataGrooming.class.getSimpleName());
91 Properties props = System.getProperties();
92 props.setProperty(Configuration.PROPERTY_LOGGING_FILE_NAME, AAIConstants.AAI_DATA_GROOMING_LOGBACK_PROPS);
93 props.setProperty(Configuration.PROPERTY_LOGGING_FILE_PATH, AAIConstants.AAI_HOME_ETC_APP_PROPERTIES);
94 logger = EELFManager.getInstance().getLogger(DataGrooming.class);
95 String ver = "version"; // Placeholder
96 Boolean doAutoFix = false;
97 Boolean edgesOnlyFlag = false;
98 Boolean dontFixOrphansFlag = false;
99 Boolean skipHostCheck = false;
100 Boolean singleCommits = false;
101 Boolean dupeCheckOff = false;
102 Boolean dupeFixOn = false;
103 Boolean ghost2CheckOff = false;
104 Boolean ghost2FixOn = false;
105 Boolean neverUseCache = false;
106 Boolean skipEdgeCheckFlag = false;
108 LoggingContext.init();
109 LoggingContext.partnerName(FROMAPPID);
110 LoggingContext.serviceName(AAIConstants.AAI_RESOURCES_MS);
111 LoggingContext.component("dataGrooming");
112 LoggingContext.targetEntity(AAIConstants.AAI_RESOURCES_MS);
113 LoggingContext.targetServiceName("main");
114 LoggingContext.requestId(TRANSID);
115 LoggingContext.statusCode(StatusCode.COMPLETE);
116 LoggingContext.responseCode(LoggingContext.SUCCESS);
118 int timeWindowMinutes = 0; // A value of 0 means that we will not have a time-window -- we will look
119 // at all nodes of the passed-in nodeType.
122 int maxRecordsToFix = AAIConstants.AAI_GROOMING_DEFAULT_MAX_FIX;
123 int sleepMinutes = AAIConstants.AAI_GROOMING_DEFAULT_SLEEP_MINUTES;
125 String maxFixStr = AAIConfig.get("aai.grooming.default.max.fix");
126 if( maxFixStr != null && !maxFixStr.equals("") ){
127 maxRecordsToFix = Integer.parseInt(maxFixStr);
129 String sleepStr = AAIConfig.get("aai.grooming.default.sleep.minutes");
130 if( sleepStr != null && !sleepStr.equals("") ){
131 sleepMinutes = Integer.parseInt(sleepStr);
134 catch ( Exception e ){
135 // Don't worry, we'll just use the defaults that we got from AAIConstants
136 logger.warn("WARNING - could not pick up aai.grooming values from aaiconfig.properties file. ");
139 String prevFileName = "";
141 FormatDate fd = new FormatDate("yyyyMMddHHmm", "GMT");
142 String dteStr = fd.getDateTime();
144 if (args.length > 0) {
145 // They passed some arguments in that will affect processing
146 for (int i = 0; i < args.length; i++) {
147 String thisArg = args[i];
148 if (thisArg.equals("-edgesOnly")) {
149 edgesOnlyFlag = true;
150 } else if (thisArg.equals("-autoFix")) {
152 } else if (thisArg.equals("-skipHostCheck")) {
153 skipHostCheck = true;
154 } else if (thisArg.equals("-dontFixOrphans")) {
155 dontFixOrphansFlag = true;
156 } else if (thisArg.equals("-singleCommits")) {
157 singleCommits = true;
158 } else if (thisArg.equals("-dupeCheckOff")) {
160 } else if (thisArg.equals("-dupeFixOn")) {
162 } else if (thisArg.equals("-ghost2CheckOff")) {
163 ghost2CheckOff = true;
164 } else if (thisArg.equals("-neverUseCache")) {
165 neverUseCache = true;
166 } else if (thisArg.equals("-ghost2FixOn")) {
168 } else if (thisArg.equals("-skipEdgeChecks")) {
169 skipEdgeCheckFlag = true;
170 } else if (thisArg.equals("-maxFix")) {
172 if (i >= args.length) {
173 LoggingContext.statusCode(StatusCode.ERROR);
174 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
175 logger.error(" No value passed with -maxFix option. ");
176 AAISystemExitUtil.systemExitCloseAAIGraph(0);
178 String nextArg = args[i];
180 maxRecordsToFix = Integer.parseInt(nextArg);
181 } catch (Exception e) {
182 LoggingContext.statusCode(StatusCode.ERROR);
183 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
184 logger.error("Bad value passed with -maxFix option: ["
186 AAISystemExitUtil.systemExitCloseAAIGraph(0);
188 } else if (thisArg.equals("-sleepMinutes")) {
190 if (i >= args.length) {
191 LoggingContext.statusCode(StatusCode.ERROR);
192 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
193 logger.error("No value passed with -sleepMinutes option.");
194 AAISystemExitUtil.systemExitCloseAAIGraph(0);
196 String nextArg = args[i];
198 sleepMinutes = Integer.parseInt(nextArg);
199 } catch (Exception e) {
200 LoggingContext.statusCode(StatusCode.ERROR);
201 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
202 logger.error("Bad value passed with -sleepMinutes option: ["
204 AAISystemExitUtil.systemExitCloseAAIGraph(0);
206 } else if (thisArg.equals("-timeWindowMinutes")) {
208 if (i >= args.length) {
209 LoggingContext.statusCode(StatusCode.ERROR);
210 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
211 logger.error("No value passed with -timeWindowMinutes option.");
212 AAISystemExitUtil.systemExitCloseAAIGraph(0);
214 String nextArg = args[i];
216 timeWindowMinutes = Integer.parseInt(nextArg);
217 } catch (Exception e) {
218 LoggingContext.statusCode(StatusCode.ERROR);
219 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
220 logger.error("Bad value passed with -timeWindowMinutes option: ["
222 AAISystemExitUtil.systemExitCloseAAIGraph(0);
225 } else if (thisArg.equals("-f")) {
227 if (i >= args.length) {
228 LoggingContext.statusCode(StatusCode.ERROR);
229 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
230 logger.error(" No value passed with -f option. ");
231 AAISystemExitUtil.systemExitCloseAAIGraph(0);
233 prevFileName = args[i];
235 LoggingContext.statusCode(StatusCode.ERROR);
236 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
237 logger.error(" Unrecognized argument passed to DataGrooming: ["
239 logger.error(" Valid values are: -f -autoFix -maxFix -edgesOnly -skipEdgeChecks -dupeFixOn -donFixOrphans -timeWindowMinutes -sleepMinutes -neverUseCache");
240 AAISystemExitUtil.systemExitCloseAAIGraph(0);
245 String windowTag = "FULL";
246 if( timeWindowMinutes > 0 ){
247 windowTag = "PARTIAL";
249 String groomOutFileName = "dataGrooming." + windowTag + "." + dteStr + ".out";
252 LoaderFactory.createLoaderForVersion(ModelType.MOXY, AAIProperties.LATEST);
255 catch (Exception ex){
256 LoggingContext.statusCode(StatusCode.ERROR);
257 LoggingContext.responseCode(LoggingContext.BUSINESS_PROCESS_ERROR);
258 logger.error("ERROR - Could not create loader " + LogFormatTools.getStackTop(ex));
259 AAISystemExitUtil.systemExitCloseAAIGraph(1);
263 logger.info(" We will skip the HostCheck as requested. ");
267 if (!prevFileName.isEmpty()) {
268 // They are trying to fix some data based on a data in a
270 logger.info(" Call doTheGrooming() with a previous fileName ["
271 + prevFileName + "] for cleanup. ");
272 Boolean finalShutdownFlag = true;
273 Boolean cacheDbOkFlag = false;
274 doTheGrooming(prevFileName, edgesOnlyFlag, dontFixOrphansFlag,
275 maxRecordsToFix, groomOutFileName, ver, singleCommits,
276 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
277 finalShutdownFlag, cacheDbOkFlag,
278 skipEdgeCheckFlag, timeWindowMinutes);
279 } else if (doAutoFix) {
280 // They want us to run the processing twice -- first to look for
281 // delete candidates, then after
282 // napping for a while, run it again and delete any candidates
283 // that were found by the first run.
284 // Note: we will produce a separate output file for each of the
286 logger.info(" Doing an auto-fix call to Grooming. ");
287 logger.info(" First, Call doTheGrooming() to look at what's out there. ");
288 Boolean finalShutdownFlag = false;
289 Boolean cacheDbOkFlag = true;
290 int fixCandCount = doTheGrooming("", edgesOnlyFlag,
291 dontFixOrphansFlag, maxRecordsToFix, groomOutFileName,
292 ver, singleCommits, dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
293 finalShutdownFlag, cacheDbOkFlag,
294 skipEdgeCheckFlag, timeWindowMinutes);
295 if (fixCandCount == 0) {
296 logger.info(" No fix-Candidates were found by the first pass, so no second/fix-pass is needed. ");
298 // We'll sleep a little and then run a fix-pass based on the
299 // first-run's output file.
301 logger.info("About to sleep for " + sleepMinutes
303 int sleepMsec = sleepMinutes * 60 * 1000;
304 Thread.sleep(sleepMsec);
305 } catch (InterruptedException ie) {
306 logger.info("\n >>> Sleep Thread has been Interrupted <<< ");
307 AAISystemExitUtil.systemExitCloseAAIGraph(0);
310 dteStr = fd.getDateTime();
311 String secondGroomOutFileName = "dataGrooming." + dteStr
313 logger.info(" Now, call doTheGrooming() a second time and pass in the name of the file "
314 + "generated by the first pass for fixing: ["
315 + groomOutFileName + "]");
316 finalShutdownFlag = true;
317 cacheDbOkFlag = false;
318 doTheGrooming(groomOutFileName, edgesOnlyFlag,
319 dontFixOrphansFlag, maxRecordsToFix,
320 secondGroomOutFileName, ver, singleCommits,
321 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
322 finalShutdownFlag, cacheDbOkFlag,
323 skipEdgeCheckFlag, timeWindowMinutes);
326 // Do the grooming - plain vanilla (no fix-it-file, no
328 Boolean finalShutdownFlag = true;
329 logger.info(" Call doTheGrooming() ");
330 Boolean cacheDbOkFlag = true;
332 // They have forbidden us from using a cached db connection.
333 cacheDbOkFlag = false;
335 doTheGrooming("", edgesOnlyFlag, dontFixOrphansFlag,
336 maxRecordsToFix, groomOutFileName, ver, singleCommits,
337 dupeCheckOff, dupeFixOn, ghost2CheckOff, ghost2FixOn,
338 finalShutdownFlag, cacheDbOkFlag,
339 skipEdgeCheckFlag, timeWindowMinutes);
341 } catch (Exception ex) {
342 LoggingContext.statusCode(StatusCode.ERROR);
343 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
344 logger.error("Exception while grooming data " + LogFormatTools.getStackTop(ex));
347 logger.info(" Done! ");
348 AAISystemExitUtil.systemExitCloseAAIGraph(0);
355 * @param fileNameForFixing the file name for fixing
356 * @param edgesOnlyFlag the edges only flag
357 * @param dontFixOrphansFlag the dont fix orphans flag
358 * @param maxRecordsToFix the max records to fix
359 * @param groomOutFileName the groom out file name
360 * @param version the version
361 * @param singleCommits the single commits
362 * @param dupeCheckOff the dupe check off
363 * @param dupeFixOn the dupe fix on
364 * @param ghost2CheckOff the ghost 2 check off
365 * @param ghost2FixOn the ghost 2 fix on
366 * @param finalShutdownFlag the final shutdown flag
367 * @param cacheDbOkFlag the cacheDbOk flag
370 private static int doTheGrooming(String fileNameForFixing,
371 Boolean edgesOnlyFlag, Boolean dontFixOrphansFlag,
372 int maxRecordsToFix, String groomOutFileName, String version,
373 Boolean singleCommits,
374 Boolean dupeCheckOff, Boolean dupeFixOn,
375 Boolean ghost2CheckOff, Boolean ghost2FixOn,
376 Boolean finalShutdownFlag, Boolean cacheDbOkFlag,
377 Boolean skipEdgeCheckFlag, int timeWindowMinutes) {
379 logger.debug(" Entering doTheGrooming \n");
381 int cleanupCandidateCount = 0;
382 long windowStartTime = 0; // Translation of the window into a starting timestamp
383 BufferedWriter bw = null;
384 TitanGraph graph = null;
385 TitanGraph graph2 = null;
387 boolean executeFinalCommit = false;
388 Set<String> deleteCandidateList = new LinkedHashSet<>();
389 Set<String> processedVertices = new LinkedHashSet<>();
393 if( timeWindowMinutes > 0 ){
394 // Translate the window value (ie. 30 minutes) into a unix timestamp like
395 // we use in the db - so we can select data created after that time.
396 windowStartTime = figureWindowStartTime( timeWindowMinutes );
400 String targetDir = AAIConstants.AAI_HOME + AAIConstants.AAI_FILESEP
401 + "logs" + AAIConstants.AAI_FILESEP + "data"
402 + AAIConstants.AAI_FILESEP + "dataGrooming";
404 // Make sure the target directory exists
405 new File(targetDir).mkdirs();
407 if (!fileNameForFixing.isEmpty()) {
408 deleteCandidateList = getDeleteList(targetDir,
409 fileNameForFixing, edgesOnlyFlag, dontFixOrphansFlag,
413 if (deleteCandidateList.size() > maxRecordsToFix) {
414 LoggingContext.statusCode(StatusCode.ERROR);
415 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
416 logger.warn(" >> WARNING >> Delete candidate list size ("
417 + deleteCandidateList.size()
418 + ") is too big. The maxFix we are using is: "
420 + ". No candidates will be deleted. ");
421 // Clear out the list so it won't be processed below.
422 deleteCandidateList = new LinkedHashSet<>();
425 String fullOutputFileName = targetDir + AAIConstants.AAI_FILESEP
427 File groomOutFile = new File(fullOutputFileName);
429 groomOutFile.createNewFile();
430 } catch (IOException e) {
431 String emsg = " Problem creating output file ["
432 + fullOutputFileName + "], exception=" + e.getMessage();
433 throw new AAIException("AAI_6124", emsg);
436 logger.info(" Will write to " + fullOutputFileName );
437 bw = new BufferedWriter(new FileWriter(groomOutFile.getAbsoluteFile()));
438 ErrorLogHelper.loadProperties();
440 logger.info(" ---- NOTE --- about to open graph (takes a little while)--------\n");
443 // Since we're just reading (not deleting/fixing anything), we can use
444 // a cached connection to the DB
445 graph = TitanFactory.open(new AAIGraphConfig.Builder(AAIConstants.CACHED_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("cached").buildConfiguration());
448 graph = TitanFactory.open(new AAIGraphConfig.Builder(AAIConstants.REALTIME_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("realtime1").buildConfiguration());
451 String emsg = "null graph object in DataGrooming\n";
452 throw new AAIException("AAI_6101", emsg);
455 logger.debug(" Got the graph object. ");
457 g = graph.newTransaction();
459 String emsg = "null graphTransaction object in DataGrooming\n";
460 throw new AAIException("AAI_6101", emsg);
462 GraphTraversalSource source1 = g.traversal();
464 ArrayList<String> errArr = new ArrayList<>();
465 int totalNodeCount = 0;
466 HashMap<String, String> misMatchedHash = new HashMap<String, String>();
467 HashMap<String, Vertex> orphanNodeHash = new HashMap<String, Vertex>();
468 HashMap<String, Vertex> missingDepNodeHash = new HashMap<String, Vertex>();
469 HashMap<String, Edge> oneArmedEdgeHash = new HashMap<String, Edge>();
470 HashMap<String, String> emptyVertexHash = new HashMap<String, String>();
471 HashMap<String, Vertex> ghostNodeHash = new HashMap<String, Vertex>();
472 ArrayList<String> dupeGroups = new ArrayList<>();
474 Loader loader = LoaderFactory.createLoaderForVersion(ModelType.MOXY, AAIProperties.LATEST);
476 Set<Entry<String, Introspector>> entrySet = loader.getAllObjects().entrySet();
479 logger.info(" Starting DataGrooming Processing ");
482 logger.info(" NOTE >> Skipping Node processing as requested. Will only process Edges. << ");
485 for (Entry<String, Introspector> entry : entrySet) {
486 String nType = entry.getKey();
488 int thisNtDeleteCount = 0;
490 logger.debug(" > Look at : [" + nType + "] ...");
491 ntList = ntList + "," + nType;
493 // Get a collection of the names of the key properties for this nodeType to use later
494 // Determine what the key fields are for this nodeType - use an arrayList so they
495 // can be gotten out in a consistent order.
496 Set <String> keyPropsSet = entry.getValue().getKeys();
497 ArrayList <String> keyProps = new ArrayList <String> ();
498 keyProps.addAll(keyPropsSet);
500 // Get the types of nodes that this nodetype depends on for uniqueness (if any)
501 Collection <String> depNodeTypes = loader.introspectorFromName(nType).getDependentOn();
503 // Loop through all the nodes of this Node type
504 int lastShownForNt = 0;
505 ArrayList <Vertex> tmpList = new ArrayList <> ();
506 Iterator <Vertex> iterv = source1.V().has("aai-node-type",nType);
507 while (iterv.hasNext()) {
508 // We put the nodes into an ArrayList because the graph.query iterator can time out
509 tmpList.add(iterv.next());
512 Iterator <Vertex> iter = tmpList.iterator();
513 while (iter.hasNext()) {
516 if( thisNtCount == lastShownForNt + 250 ){
517 lastShownForNt = thisNtCount;
518 logger.debug("count for " + nType + " so far = " + thisNtCount );
520 Vertex thisVtx = iter.next();
521 if( windowStartTime > 0 ){
522 // They are using the time-window, so we only want nodes that are updated after a
523 // passed-in timestamp OR that have no last-modified-timestamp which means they are suspicious.
524 Object objModTimeStamp = thisVtx.property("aai-last-mod-ts").orElse(null);
525 if( objModTimeStamp != null ){
526 long thisNodeModTime = (long)objModTimeStamp;
527 if( thisNodeModTime < windowStartTime ){
528 // It has a last modified ts and is NOT in our window, so we can pass over it
534 String thisVid = thisVtx.id().toString();
535 if (processedVertices.contains(thisVid)) {
536 logger.debug("skipping already processed vertex: " + thisVid);
540 List <Vertex> secondGetList = new ArrayList <> ();
541 // -----------------------------------------------------------------------
542 // For each vertex of this nodeType, we want to:
543 // a) make sure that it can be retrieved using it's AAI defined key
544 // b) make sure that it is not a duplicate
545 // -----------------------------------------------------------------------
547 // For this instance of this nodeType, get the key properties
548 HashMap<String, Object> propHashWithKeys = new HashMap<>();
549 Iterator<String> keyPropI = keyProps.iterator();
550 while (keyPropI.hasNext()) {
551 String propName = keyPropI.next();
553 //delete an already deleted vertex
554 Object obj = thisVtx.<Object>property(propName).orElse(null);
556 propVal = obj.toString();
558 propHashWithKeys.put(propName, propVal);
561 // If this node is dependent on another for uniqueness, then do the query from that parent node
562 // Note - all of our nodes that are dependent on others for uniqueness are
563 // "children" of that node.
564 boolean depNodeOk = true;
565 if( depNodeTypes.isEmpty() ){
566 // This kind of node is not dependent on any other.
567 // Make sure we can get it back using it's key properties (that is the
568 // phantom checking) and that we only get one. Note - we also need
569 // to collect data for a second type of dupe-checking which is done later.
570 secondGetList = getNodeJustUsingKeyParams( TRANSID, FROMAPPID, source1, nType,
571 propHashWithKeys, version );
574 // This kind of node is dependent on another for uniqueness.
575 // Start at it's parent (the dependent vertex) and make sure we can get it
576 // back using it's key properties and that we only get one.
577 Iterator <Vertex> vertI2 = source1.V(thisVtx).union(__.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).outV(), __.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).inV());
578 Vertex parentVtx = null;
580 while( vertI2 != null && vertI2.hasNext() ){
581 parentVtx = vertI2.next();
587 //List<Vertex> vertI2 = g.traversal().V(thisVtx).union(__.outE().has("isParent-REV",true).outV(),__.inE().has("isParent",true).inV()).toList();
588 //if( vertI2.isEmpty()){
590 // It's Missing it's dependent/parent node
592 boolean zeroEdges = false;
594 Iterator<Edge> tmpEdgeIter = thisVtx.edges(Direction.BOTH);
596 while( tmpEdgeIter.hasNext() ){
600 if( edgeCount == 0 ){
603 } catch (Exception ex) {
604 LoggingContext.statusCode(StatusCode.ERROR);
605 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
606 logger.warn("WARNING from inside the for-each-vid-loop orphan-edges-check " + LogFormatTools.getStackTop(ex) );
609 if (deleteCandidateList.contains(thisVid)) {
610 boolean okFlag = true;
612 processedVertices.add(thisVtx.id().toString());
616 } catch (Exception e) {
618 LoggingContext.statusCode(StatusCode.ERROR);
619 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
620 logger.error("ERROR trying to delete missing-dep-node VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
623 logger.info(" DELETED missing-dep-node VID = " + thisVid);
626 // We count nodes missing their depNodes two ways - the first if it has
627 // at least some edges, and the second if it has zero edges. Either
628 // way, they are effectively orphaned.
629 // NOTE - Only nodes that have dependent nodes are ever considered "orphaned".
631 missingDepNodeHash.put(thisVid, thisVtx);
634 orphanNodeHash.put(thisVid, thisVtx);
638 else if ( pCount > 1 ){
639 // Not sure how this could happen? Should we do something here?
643 // We found the parent - so use it to do the second-look.
644 // NOTE --- We're just going to do the same check from the other direction - because
645 // there could be duplicates or the pointer going the other way could be broken
646 ArrayList <Vertex> tmpListSec = new ArrayList <> ();
648 tmpListSec = getConnectedChildrenOfOneType( source1, parentVtx, nType ) ;
649 Iterator<Vertex> vIter = tmpListSec.iterator();
650 while (vIter.hasNext()) {
651 Vertex tmpV = vIter.next();
652 if( vertexHasTheseKeys(tmpV, propHashWithKeys) ){
653 secondGetList.add(tmpV);
657 }// end of -- else this is a dependent node -- piece
659 if( depNodeOk && (secondGetList == null || secondGetList.size() == 0) ){
660 // We could not get the node back using it's own key info.
661 // So, it's a PHANTOM
662 if (deleteCandidateList.contains(thisVid)) {
663 boolean okFlag = true;
668 } catch (Exception e) {
670 LoggingContext.statusCode(StatusCode.ERROR);
671 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
672 logger.error("ERROR trying to delete phantom VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
675 logger.info(" DELETED VID = " + thisVid);
678 ghostNodeHash.put(thisVid, thisVtx);
681 else if( (secondGetList.size() > 1) && depNodeOk && !dupeCheckOff ){
682 // Found some DUPLICATES - need to process them
683 logger.info(" - now check Dupes for this guy - ");
684 List<String> tmpDupeGroups = checkAndProcessDupes(
685 TRANSID, FROMAPPID, g, source1, version,
686 nType, secondGetList, dupeFixOn,
687 deleteCandidateList, singleCommits, dupeGroups, loader);
688 Iterator<String> dIter = tmpDupeGroups.iterator();
689 while (dIter.hasNext()) {
690 // Add in any newly found dupes to our running list
691 String tmpGrp = dIter.next();
692 logger.info("Found set of dupes: [" + tmpGrp + "]");
693 dupeGroups.add(tmpGrp);
697 catch (AAIException e1) {
698 LoggingContext.statusCode(StatusCode.ERROR);
699 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
700 logger.warn(" For nodeType = " + nType + " Caught exception", e1);
701 errArr.add(e1.getErrorObject().toString());
703 catch (Exception e2) {
704 LoggingContext.statusCode(StatusCode.ERROR);
705 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
706 logger.warn(" For nodeType = " + nType
707 + " Caught exception", e2);
708 errArr.add(e2.getMessage());
710 }// try block to enclose looping over each single vertex
711 catch (Exception exx) {
712 LoggingContext.statusCode(StatusCode.ERROR);
713 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
714 logger.warn("WARNING from inside the while-verts-loop ", exx);
717 } // while loop for each record of a nodeType
719 if( depNodeTypes.isEmpty() && !dupeCheckOff ){
720 // For this nodeType, we haven't looked at the possibility of a
721 // non-dependent node where two verts have same key info
722 ArrayList<ArrayList<Vertex>> nonDependentDupeSets = new ArrayList<ArrayList<Vertex>>();
723 nonDependentDupeSets = getDupeSets4NonDepNodes(
724 TRANSID, FROMAPPID, g,
725 version, nType, tmpList,
727 // For each set found (each set is for a unique instance of key-values),
728 // process the dupes found
729 Iterator<ArrayList<Vertex>> dsItr = nonDependentDupeSets.iterator();
730 while( dsItr.hasNext() ){
731 ArrayList<Vertex> dupeList = dsItr.next();
732 logger.info(" - now check Dupes for some non-dependent guys - ");
733 List<String> tmpDupeGroups = checkAndProcessDupes(
734 TRANSID, FROMAPPID, g, source1, version,
735 nType, dupeList, dupeFixOn,
736 deleteCandidateList, singleCommits, dupeGroups, loader);
737 Iterator<String> dIter = tmpDupeGroups.iterator();
738 while (dIter.hasNext()) {
739 // Add in any newly found dupes to our running list
740 String tmpGrp = dIter.next();
741 logger.info("Found set of dupes: [" + tmpGrp + "]");
742 dupeGroups.add(tmpGrp);
746 }// end of extra dupe check for non-dependent nodes
748 if ( (thisNtDeleteCount > 0) && singleCommits ) {
749 // NOTE - the singleCommits option is not used in normal processing
751 g = AAIGraph.getInstance().getGraph().newTransaction();
754 thisNtDeleteCount = 0;
755 logger.info( " Processed " + thisNtCount + " records for [" + nType + "], " + totalNodeCount + " total overall. " );
757 }// While-loop for each node type
759 }// end of check to make sure we weren't only supposed to do edges
762 if( !skipEdgeCheckFlag ){
763 // --------------------------------------------------------------------------------------
764 // Now, we're going to look for one-armed-edges. Ie. an edge that
766 // been deleted (because a vertex on one side was deleted) but
767 // somehow was not deleted.
768 // So the one end of it points to a vertexId -- but that vertex is
770 // --------------------------------------------------------------------------------------
772 // To do some strange checking - we need a second graph object
773 logger.debug(" ---- DEBUG --- about to open a SECOND graph (takes a little while)--------\n");
774 // Note - graph2 just reads - but we want it to use a fresh connection to
775 // the database, so we are NOT using the CACHED DB CONFIG here.
776 graph2 = TitanFactory.open(new AAIGraphConfig.Builder(AAIConstants.REALTIME_DB_CONFIG).forService(DataGrooming.class.getSimpleName()).withGraphType("realtime2").buildConfiguration());
777 if (graph2 == null) {
778 String emsg = "null graph2 object in DataGrooming\n";
779 throw new AAIException("AAI_6101", emsg);
781 logger.debug("Got the graph2 object... \n");
783 g2 = graph2.newTransaction();
785 String emsg = "null graphTransaction2 object in DataGrooming\n";
786 throw new AAIException("AAI_6101", emsg);
789 ArrayList<Vertex> vertList = new ArrayList<>();
790 Iterator<Vertex> vItor3 = g.traversal().V();
791 // Gotta hold these in a List - or else HBase times out as you cycle
793 while (vItor3.hasNext()) {
794 Vertex v = vItor3.next();
799 Iterator<Vertex> vItor2 = vertList.iterator();
800 logger.info(" Checking for bad edges --- ");
802 while (vItor2.hasNext()) {
807 } catch (Exception vex) {
808 LoggingContext.statusCode(StatusCode.ERROR);
809 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
810 logger.warn(">>> WARNING trying to get next vertex on the vItor2 ");
815 String thisVertId = "";
817 thisVertId = v.id().toString();
818 } catch (Exception ev) {
819 LoggingContext.statusCode(StatusCode.ERROR);
820 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
821 logger.warn("WARNING when doing getId() on a vertex from our vertex list. ");
824 if (ghostNodeHash.containsKey(thisVertId)) {
825 // This is a phantom node, so don't try to use it
826 logger.info(" >> Skipping edge check for edges from vertexId = "
828 + ", since that guy is a Phantom Node");
832 if( windowStartTime > 0 ){
833 // They are using the time-window, so we only want nodes that are updated after a
834 // passed-in timestamp OR that have no last-modified-timestamp which means they are suspicious.
835 Object objModTimeStamp = v.property("aai-last-mod-ts").orElse(null);
836 if( objModTimeStamp != null ){
837 long thisNodeModTime = (long)objModTimeStamp;
838 if( thisNodeModTime < windowStartTime ){
839 // It has a last modified ts and is NOT in our window, so we can pass over it
845 if (counter == lastShown + 250) {
847 logger.info("... Checking edges for vertex # "
850 Iterator<Edge> eItor = v.edges(Direction.BOTH);
851 while (eItor.hasNext()) {
857 } catch (Exception iex) {
858 LoggingContext.statusCode(StatusCode.ERROR);
859 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
860 logger.warn(">>> WARNING trying to get next edge on the eItor ", iex);
866 } catch (Exception err) {
867 LoggingContext.statusCode(StatusCode.ERROR);
868 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
869 logger.warn(">>> WARNING trying to get edge's In-vertex ", err);
873 Vertex ghost2 = null;
875 Boolean keysMissing = true;
876 Boolean cantGetUsingVid = false;
879 Object ob = vIn.<Object>property("aai-node-type").orElse(null);
881 vNtI = ob.toString();
882 keysMissing = anyKeyFieldsMissing(vNtI, vIn, loader);
887 vIdI = ob.toString();
888 vIdLong = Long.parseLong(vIdI);
891 if( ! ghost2CheckOff ){
892 Vertex connectedVert = g2.traversal().V(vIdLong).next();
893 if( connectedVert == null ) {
894 LoggingContext.statusCode(StatusCode.ERROR);
895 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
896 logger.warn( "GHOST2 -- got NULL when doing getVertex for vid = " + vIdLong);
897 cantGetUsingVid = true;
899 // If we can NOT get this ghost with the SECOND graph-object,
900 // it is still a ghost since even though we can get data about it using the FIRST graph
903 ghost2 = g.traversal().V(vIdLong).next();
905 catch( Exception ex){
906 LoggingContext.statusCode(StatusCode.ERROR);
907 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
908 logger.warn( "GHOST2 -- Could not get the ghost info for a bad edge for vtxId = " + vIdLong, ex);
910 if( ghost2 != null ){
911 ghostNodeHash.put(vIdI, ghost2);
914 }// end of the ghost2 checking
916 catch (Exception err) {
917 LoggingContext.statusCode(StatusCode.ERROR);
918 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
919 logger.warn(">>> WARNING trying to get edge's In-vertex props ", err);
922 if (keysMissing || vIn == null || vNtI.equals("")
923 || cantGetUsingVid) {
924 // this is a bad edge because it points to a vertex
925 // that isn't there anymore or is corrupted
926 String thisEid = e.id().toString();
927 if (deleteCandidateList.contains(thisEid) || deleteCandidateList.contains(vIdI)) {
928 boolean okFlag = true;
929 if (!vIdI.equals("")) {
930 // try to get rid of the corrupted vertex
932 if( (ghost2 != null) && ghost2FixOn ){
939 // NOTE - the singleCommits option is not used in normal processing
941 g = AAIGraph.getInstance().getGraph().newTransaction();
944 } catch (Exception e1) {
946 LoggingContext.statusCode(StatusCode.ERROR);
947 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
948 logger.warn("WARNING when trying to delete bad-edge-connected VERTEX VID = "
952 logger.info(" DELETED vertex from bad edge = "
956 // remove the edge if we couldn't get the
961 // NOTE - the singleCommits option is not used in normal processing
963 g = AAIGraph.getInstance().getGraph().newTransaction();
966 } catch (Exception ex) {
967 // NOTE - often, the exception is just
968 // that this edge has already been
971 LoggingContext.statusCode(StatusCode.ERROR);
972 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
973 logger.warn("WARNING when trying to delete edge = "
977 logger.info(" DELETED edge = " + thisEid);
981 oneArmedEdgeHash.put(thisEid, e);
982 if ((vIn != null) && (vIn.id() != null)) {
983 emptyVertexHash.put(thisEid, vIn.id()
990 vOut = e.outVertex();
991 } catch (Exception err) {
992 LoggingContext.statusCode(StatusCode.ERROR);
993 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
994 logger.warn(">>> WARNING trying to get edge's Out-vertex ");
1000 cantGetUsingVid = false;
1003 Object ob = vOut.<Object>property("aai-node-type").orElse(null);
1005 vNtO = ob.toString();
1006 keysMissing = anyKeyFieldsMissing(vNtO,
1012 vIdO = ob.toString();
1013 vIdLong = Long.parseLong(vIdO);
1016 if( ! ghost2CheckOff ){
1017 Vertex connectedVert = g2.traversal().V(vIdLong).next();
1018 if( connectedVert == null ) {
1019 cantGetUsingVid = true;
1020 logger.info( "GHOST2 -- got NULL when doing getVertex for vid = " + vIdLong);
1021 // If we can get this ghost with the other graph-object, then get it -- it's still a ghost
1023 ghost2 = g.traversal().V(vIdLong).next();
1025 catch( Exception ex){
1026 LoggingContext.statusCode(StatusCode.ERROR);
1027 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1028 logger.warn( "GHOST2 -- Could not get the ghost info for a bad edge for vtxId = " + vIdLong, ex);
1030 if( ghost2 != null ){
1031 ghostNodeHash.put(vIdO, ghost2);
1035 } catch (Exception err) {
1036 LoggingContext.statusCode(StatusCode.ERROR);
1037 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1038 logger.warn(">>> WARNING trying to get edge's Out-vertex props ", err);
1041 if (keysMissing || vOut == null || vNtO.equals("")
1042 || cantGetUsingVid) {
1043 // this is a bad edge because it points to a vertex
1044 // that isn't there anymore
1045 String thisEid = e.id().toString();
1046 if (deleteCandidateList.contains(thisEid) || deleteCandidateList.contains(vIdO)) {
1047 boolean okFlag = true;
1048 if (!vIdO.equals("")) {
1049 // try to get rid of the corrupted vertex
1051 if( (ghost2 != null) && ghost2FixOn ){
1054 else if (vOut != null) {
1057 if (singleCommits) {
1058 // NOTE - the singleCommits option is not used in normal processing
1060 g = AAIGraph.getInstance().getGraph().newTransaction();
1063 } catch (Exception e1) {
1065 LoggingContext.statusCode(StatusCode.ERROR);
1066 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1067 logger.warn("WARNING when trying to delete bad-edge-connected VID = "
1071 logger.info(" DELETED vertex from bad edge = "
1075 // remove the edge if we couldn't get the
1079 if (singleCommits) {
1080 // NOTE - the singleCommits option is not used in normal processing
1082 g = AAIGraph.getInstance().getGraph().newTransaction();
1085 } catch (Exception ex) {
1086 // NOTE - often, the exception is just
1087 // that this edge has already been
1090 LoggingContext.statusCode(StatusCode.ERROR);
1091 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1092 logger.warn("WARNING when trying to delete edge = "
1096 logger.info(" DELETED edge = " + thisEid);
1100 oneArmedEdgeHash.put(thisEid, e);
1101 if ((vOut != null) && (vOut.id() != null)) {
1102 emptyVertexHash.put(thisEid, vOut.id()
1107 }// End of while-edges-loop
1108 } catch (Exception exx) {
1109 LoggingContext.statusCode(StatusCode.ERROR);
1110 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1111 logger.warn("WARNING from in the while-verts-loop ", exx);
1113 }// End of while-vertices-loop (the edge-checking)
1114 } // end of -- if we're not skipping the edge-checking
1117 deleteCount = deleteCount + dupeGrpsDeleted;
1118 if (!singleCommits && deleteCount > 0) {
1120 logger.info("About to do the commit for "
1121 + deleteCount + " removes. ");
1122 executeFinalCommit = true;
1123 logger.info("Commit was successful ");
1124 } catch (Exception excom) {
1125 LoggingContext.statusCode(StatusCode.ERROR);
1126 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1127 logger.error(" >>>> ERROR <<<< Could not commit changes. " + LogFormatTools.getStackTop(excom));
1132 int ghostNodeCount = ghostNodeHash.size();
1133 int orphanNodeCount = orphanNodeHash.size();
1134 int missingDepNodeCount = missingDepNodeHash.size();
1135 int oneArmedEdgeCount = oneArmedEdgeHash.size();
1136 int dupeCount = dupeGroups.size();
1138 deleteCount = deleteCount + dupeGrpsDeleted;
1140 bw.write("\n\n ============ Summary ==============\n");
1141 if( timeWindowMinutes == 0 ){
1142 bw.write("Ran FULL data grooming (no time-window). \n");
1145 bw.write("Ran PARTIAL data grooming just looking at data added/updated in the last " + timeWindowMinutes + " minutes. \n");
1148 bw.write("\nRan these nodeTypes: " + ntList + "\n\n");
1149 bw.write("There were this many delete candidates from previous run = "
1150 + deleteCandidateList.size() + "\n");
1151 if (dontFixOrphansFlag) {
1152 bw.write(" Note - we are not counting orphan nodes since the -dontFixOrphans parameter was used. \n");
1154 bw.write("Deleted this many delete candidates = " + deleteCount
1156 bw.write("Total number of nodes looked at = " + totalNodeCount
1158 bw.write("Ghost Nodes identified = " + ghostNodeCount + "\n");
1159 bw.write("Orphan Nodes identified = " + orphanNodeCount + "\n");
1160 bw.write("Bad Edges identified = " + oneArmedEdgeCount + "\n");
1161 bw.write("Missing Dependent Edge (but not orphaned) node count = "
1162 + missingDepNodeCount + "\n");
1163 bw.write("Duplicate Groups count = " + dupeCount + "\n");
1164 bw.write("MisMatching Label/aai-node-type count = "
1165 + misMatchedHash.size() + "\n");
1167 bw.write("\n ------------- Delete Candidates ---------\n");
1168 for (Map.Entry<String, Vertex> entry : ghostNodeHash
1170 String vid = entry.getKey();
1171 bw.write("DeleteCandidate: Phantom Vid = [" + vid + "]\n");
1172 cleanupCandidateCount++;
1174 for (Map.Entry<String, Vertex> entry : orphanNodeHash
1176 String vid = entry.getKey();
1177 bw.write("DeleteCandidate: OrphanDepNode Vid = [" + vid + "]\n");
1178 if (!dontFixOrphansFlag) {
1179 cleanupCandidateCount++;
1182 for (Map.Entry<String, Edge> entry : oneArmedEdgeHash.entrySet()) {
1183 String eid = entry.getKey();
1184 bw.write("DeleteCandidate: Bad EDGE Edge-id = [" + eid + "]\n");
1185 cleanupCandidateCount++;
1187 for (Map.Entry<String, Vertex> entry : missingDepNodeHash
1189 String vid = entry.getKey();
1190 bw.write("DeleteCandidate: (maybe) missingDepNode Vid = ["
1192 cleanupCandidateCount++;
1194 bw.write("\n-- NOTE - To see DeleteCandidates for Duplicates, you need to look in the Duplicates Detail section below.\n");
1196 bw.write("\n ------------- GHOST NODES - detail ");
1197 for (Map.Entry<String, Vertex> entry : ghostNodeHash
1200 String vid = entry.getKey();
1201 bw.write("\n ==> Phantom Vid = " + vid + "\n");
1202 ArrayList<String> retArr = showPropertiesForNode(
1203 TRANSID, FROMAPPID, entry.getValue());
1204 for (String info : retArr) {
1205 bw.write(info + "\n");
1208 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1210 for (String info : retArr) {
1211 bw.write(info + "\n");
1213 } catch (Exception dex) {
1214 LoggingContext.statusCode(StatusCode.ERROR);
1215 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1216 logger.error("error trying to print detail info for a ghost-node: " + LogFormatTools.getStackTop(dex));
1220 bw.write("\n ------------- Missing Dependent Edge ORPHAN NODES - detail: ");
1221 for (Map.Entry<String, Vertex> entry : orphanNodeHash
1224 String vid = entry.getKey();
1225 bw.write("\n> Orphan Node Vid = " + vid + "\n");
1226 ArrayList<String> retArr = showPropertiesForNode(
1227 TRANSID, FROMAPPID, entry.getValue());
1228 for (String info : retArr) {
1229 bw.write(info + "\n");
1232 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1234 for (String info : retArr) {
1235 bw.write(info + "\n");
1237 } catch (Exception dex) {
1238 LoggingContext.statusCode(StatusCode.ERROR);
1239 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1240 logger.error("error trying to print detail info for a Orphan Node /missing dependent edge " + LogFormatTools.getStackTop(dex));
1244 bw.write("\n ------------- Missing Dependent Edge (but not orphan) NODES: ");
1245 for (Map.Entry<String, Vertex> entry : missingDepNodeHash
1248 String vid = entry.getKey();
1249 bw.write("\n> Missing edge to Dependent Node (but has edges) Vid = "
1251 ArrayList<String> retArr = showPropertiesForNode(
1252 TRANSID, FROMAPPID, entry.getValue());
1253 for (String info : retArr) {
1254 bw.write(info + "\n");
1257 retArr = showAllEdgesForNode(TRANSID, FROMAPPID,
1259 for (String info : retArr) {
1260 bw.write(info + "\n");
1262 } catch (Exception dex) {
1263 LoggingContext.statusCode(StatusCode.ERROR);
1264 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1265 logger.error("error trying to print detail info for a node missing its dependent edge but not an orphan "
1266 + LogFormatTools.getStackTop(dex));
1270 bw.write("\n ------------- EDGES pointing to empty/bad vertices: ");
1271 for (Map.Entry<String, Edge> entry : oneArmedEdgeHash.entrySet()) {
1273 String eid = entry.getKey();
1274 Edge thisE = entry.getValue();
1275 String badVid = emptyVertexHash.get(eid);
1276 bw.write("\n> Edge pointing to bad vertex (Vid = "
1277 + badVid + ") EdgeId = " + eid + "\n");
1278 bw.write("Label: [" + thisE.label() + "]\n");
1279 Iterator<Property<Object>> pI = thisE.properties();
1280 while (pI.hasNext()) {
1281 Property<Object> propKey = pI.next();
1282 bw.write("Prop: [" + propKey + "], val = ["
1283 + propKey.value() + "]\n");
1285 } catch (Exception pex) {
1286 LoggingContext.statusCode(StatusCode.ERROR);
1287 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1288 logger.error("error trying to print empty/bad vertex data: " + LogFormatTools.getStackTop(pex));
1292 bw.write("\n ------------- Duplicates: ");
1293 Iterator<String> dupeIter = dupeGroups.iterator();
1294 int dupeSetCounter = 0;
1295 while (dupeIter.hasNext()) {
1297 String dset = (String) dupeIter.next();
1299 bw.write("\n --- Duplicate Group # " + dupeSetCounter
1300 + " Detail -----------\n");
1302 // We expect each line to have at least two vid's, followed
1303 // by the preferred one to KEEP
1304 String[] dupeArr = dset.split("\\|");
1305 ArrayList<String> idArr = new ArrayList<>();
1306 int lastIndex = dupeArr.length - 1;
1307 for (int i = 0; i <= lastIndex; i++) {
1308 if (i < lastIndex) {
1309 // This is not the last entry, it is one of the
1310 // dupes, so we want to show all its info
1311 bw.write(" >> Duplicate Group # "
1312 + dupeSetCounter + " Node # " + i
1314 String vidString = dupeArr[i];
1315 idArr.add(vidString);
1316 long longVertId = Long.parseLong(vidString);
1317 Iterator<Vertex> vtxIterator = g.vertices(longVertId);
1319 if (vtxIterator.hasNext()) {
1320 vtx = vtxIterator.next();
1322 ArrayList<String> retArr = showPropertiesForNode(TRANSID, FROMAPPID, vtx);
1323 for (String info : retArr) {
1324 bw.write(info + "\n");
1327 retArr = showAllEdgesForNode(TRANSID,
1329 for (String info : retArr) {
1330 bw.write(info + "\n");
1333 // This is the last entry which should tell us if we
1334 // have a preferred keeper
1335 String prefString = dupeArr[i];
1336 if (prefString.equals("KeepVid=UNDETERMINED")) {
1337 bw.write("\n For this group of duplicates, could not tell which one to keep.\n");
1338 bw.write(" >>> This group needs to be taken care of with a manual/forced-delete.\n");
1340 // If we know which to keep, then the prefString
1341 // should look like, "KeepVid=12345"
1342 String[] prefArr = prefString.split("=");
1343 if (prefArr.length != 2
1344 || (!prefArr[0].equals("KeepVid"))) {
1345 throw new Exception("Bad format. Expecting KeepVid=999999");
1347 String keepVidStr = prefArr[1];
1348 if (idArr.contains(keepVidStr)) {
1349 bw.write("\n The vertex we want to KEEP has vertexId = "
1351 bw.write("\n The others become delete candidates: \n");
1352 idArr.remove(keepVidStr);
1353 for (int x = 0; x < idArr.size(); x++) {
1354 cleanupCandidateCount++;
1355 bw.write("DeleteCandidate: Duplicate Vid = ["
1356 + idArr.get(x) + "]\n");
1359 throw new Exception("ERROR - Vertex Id to keep not found in list of dupes. dset = ["
1363 }// else we know which one to keep
1365 }// for each vertex in a group
1366 } catch (Exception dex) {
1367 LoggingContext.statusCode(StatusCode.ERROR);
1368 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1369 logger.error("error trying to print duplicate vertex data " + LogFormatTools.getStackTop(dex));
1372 }// while - work on each group of dupes
1374 bw.write("\n ------------- Mis-matched Label/aai-node-type Nodes: \n ");
1375 for (Map.Entry<String, String> entry : misMatchedHash.entrySet()) {
1376 String msg = entry.getValue();
1377 bw.write("MixedMsg = " + msg + "\n");
1380 bw.write("\n ------------- Got these errors while processing: \n");
1381 Iterator<String> errIter = errArr.iterator();
1382 while (errIter.hasNext()) {
1383 String line = (String) errIter.next();
1384 bw.write(line + "\n");
1389 logger.info("\n ------------- Done doing all the checks ------------ ");
1390 logger.info("Output will be written to " + fullOutputFileName);
1392 if (cleanupCandidateCount > 0) {
1393 // Technically, this is not an error -- but we're throwing this
1394 // error so that hopefully a
1395 // monitoring system will pick it up and do something with it.
1396 throw new AAIException("AAI_6123", "See file: [" + fullOutputFileName
1397 + "] and investigate delete candidates. ");
1399 } catch (AAIException e) {
1400 LoggingContext.statusCode(StatusCode.ERROR);
1401 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1402 logger.error("Caught AAIException while grooming data");
1403 ErrorLogHelper.logException(e);
1404 } catch (Exception ex) {
1405 LoggingContext.statusCode(StatusCode.ERROR);
1406 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
1407 logger.error("Caught exception while grooming data");
1408 ErrorLogHelper.logError("AAI_6128", ex.getMessage() + ", resolve and rerun dataGrooming");
1414 } catch (IOException iox) {
1415 LoggingContext.statusCode(StatusCode.ERROR);
1416 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1417 logger.warn("Got an IOException trying to close bufferedWriter() \n", iox);
1421 if (g != null && g.tx().isOpen()) {
1422 // Any changes that worked correctly should have already done
1425 if (executeFinalCommit) {
1429 } catch (Exception ex) {
1430 // Don't throw anything because Titan sometimes is just saying that the graph is already closed
1431 LoggingContext.statusCode(StatusCode.ERROR);
1432 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1433 logger.warn("WARNING from final graphTransaction.rollback()", ex);
1437 if (g2 != null && g2.tx().isOpen()) {
1438 // Any changes that worked correctly should have already done
1442 } catch (Exception ex) {
1443 // Don't throw anything because Titan sometimes is just saying that the graph is already closed
1444 LoggingContext.statusCode(StatusCode.ERROR);
1445 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1446 logger.warn("WARNING from final graphTransaction2.rollback()", ex);
1450 if( finalShutdownFlag ){
1452 if( graph != null && graph.isOpen() ){
1456 } catch (Exception ex) {
1457 // Don't throw anything because Titan sometimes is just saying that the graph is already closed{
1458 LoggingContext.statusCode(StatusCode.ERROR);
1459 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1460 logger.warn("WARNING from final graph.shutdown()", ex);
1464 if( graph2 != null && graph2.isOpen() ){
1465 graph2.tx().close();
1468 } catch (Exception ex) {
1469 // Don't throw anything because Titan sometimes is just saying that the graph is already closed{
1470 LoggingContext.statusCode(StatusCode.ERROR);
1471 LoggingContext.responseCode(LoggingContext.AVAILABILITY_TIMEOUT_ERROR);
1472 logger.warn("WARNING from final graph2.shutdown()", ex);
1478 return cleanupCandidateCount;
1480 }// end of doTheGrooming()
1484 * Vertex has these keys.
1486 * @param tmpV the tmp V
1487 * @param propHashWithKeys the prop hash with keys
1488 * @return the boolean
1490 private static Boolean vertexHasTheseKeys( Vertex tmpV, HashMap <String, Object> propHashWithKeys) {
1491 Iterator <?> it = propHashWithKeys.entrySet().iterator();
1492 while( it.hasNext() ){
1493 String propName = "";
1494 String propVal = "";
1495 Map.Entry <?,?>propEntry = (Map.Entry<?,?>)it.next();
1496 Object propNameObj = propEntry.getKey();
1497 if( propNameObj != null ){
1498 propName = propNameObj.toString();
1500 Object propValObj = propEntry.getValue();
1501 if( propValObj != null ){
1502 propVal = propValObj.toString();
1504 Object checkValObj = tmpV.<Object>property(propName).orElse(null);
1505 if( checkValObj == null ) {
1508 else if( !propVal.equals(checkValObj.toString()) ){
1517 * Any key fields missing.
1519 * @param nType the n type
1521 * @return the boolean
1523 private static Boolean anyKeyFieldsMissing(String nType, Vertex v, Loader loader) {
1526 Introspector obj = null;
1528 obj = loader.introspectorFromName(nType);
1529 } catch (AAIUnknownObjectException e) {
1530 // They gave us a non-empty nodeType but our NodeKeyProps does
1531 // not have data for it. Since we do not know what the
1532 // key params are for this type of node, we will just
1534 String emsg = " -- WARNING -- Unrecognized nodeType: [" + nType
1535 + "]. We cannot determine required keys for this nType. ";
1536 // NOTE - this will be caught below and a "false" returned
1537 throw new AAIException("AAI_6121", emsg);
1540 // Determine what the key fields are for this nodeType
1541 Collection <String> keyPropNamesColl = obj.getKeys();
1542 Iterator<String> keyPropI = keyPropNamesColl.iterator();
1543 while (keyPropI.hasNext()) {
1544 String propName = keyPropI.next();
1545 Object ob = v.<Object>property(propName).orElse(null);
1546 if (ob == null || ob.toString().equals("")) {
1547 // It is missing a key property
1551 } catch (AAIException e) {
1552 // Something was wrong -- but since we weren't able to check
1553 // the keys, we will not declare that it is missing keys.
1561 * Gets the delete list.
1563 * @param targetDir the target dir
1564 * @param fileName the file name
1565 * @param edgesOnlyFlag the edges only flag
1566 * @param dontFixOrphans the dont fix orphans
1567 * @param dupeFixOn the dupe fix on
1568 * @return the delete list
1569 * @throws AAIException the AAI exception
1571 private static Set<String> getDeleteList(String targetDir,
1572 String fileName, Boolean edgesOnlyFlag, Boolean dontFixOrphans,
1573 Boolean dupeFixOn) throws AAIException {
1575 // Look in the file for lines formated like we expect - pull out any
1576 // Vertex Id's to delete on this run
1577 Set<String> delList = new LinkedHashSet<>();
1578 String fullFileName = targetDir + AAIConstants.AAI_FILESEP + fileName;
1580 try(BufferedReader br = new BufferedReader(new FileReader(fullFileName))) {
1581 String line = br.readLine();
1582 while (line != null) {
1583 if (!"".equals(line) && line.startsWith("DeleteCandidate")) {
1584 if (edgesOnlyFlag && (!line.contains("Bad Edge"))) {
1585 // We're not going to process edge guys
1586 } else if (dontFixOrphans && line.contains("Orphan")) {
1587 // We're not going to process orphans
1588 } else if (!dupeFixOn && line.contains("Duplicate")) {
1589 // We're not going to process Duplicates
1591 int begIndex = line.indexOf("id = ");
1592 int endIndex = line.indexOf("]");
1593 String vidVal = line.substring(begIndex + 6, endIndex);
1594 delList.add(vidVal);
1597 line = br.readLine();
1600 } catch (IOException e) {
1601 throw new AAIException("AAI_6124", e, "Could not open input-file [" + fullFileName
1602 + "], exception= " + e.getMessage());
1607 }// end of getDeleteList
1610 * Gets the preferred dupe.
1612 * @param transId the trans id
1613 * @param fromAppId the from app id
1615 * @param dupeVertexList the dupe vertex list
1616 * @param ver the ver
1618 * @throws AAIException the AAI exception
1620 public static Vertex getPreferredDupe(String transId,
1621 String fromAppId, GraphTraversalSource g,
1622 ArrayList<Vertex> dupeVertexList, String ver, Loader loader)
1623 throws AAIException {
1625 // This method assumes that it is being passed a List of vertex objects
1627 // violate our uniqueness constraints.
1629 Vertex nullVtx = null;
1631 if (dupeVertexList == null) {
1634 int listSize = dupeVertexList.size();
1635 if (listSize == 0) {
1638 if (listSize == 1) {
1639 return (dupeVertexList.get(0));
1642 Vertex vtxPreferred = null;
1643 Vertex currentFaveVtx = dupeVertexList.get(0);
1644 for (int i = 1; i < listSize; i++) {
1645 Vertex vtxB = dupeVertexList.get(i);
1646 vtxPreferred = pickOneOfTwoDupes(transId, fromAppId, g,
1647 currentFaveVtx, vtxB, ver, loader);
1648 if (vtxPreferred == null) {
1649 // We couldn't choose one
1652 currentFaveVtx = vtxPreferred;
1656 return (currentFaveVtx);
1658 } // end of getPreferredDupe()
1661 * Pick one of two dupes.
1663 * @param transId the trans id
1664 * @param fromAppId the from app id
1666 * @param vtxA the vtx A
1667 * @param vtxB the vtx B
1668 * @param ver the ver
1670 * @throws AAIException the AAI exception
1672 public static Vertex pickOneOfTwoDupes(String transId,
1673 String fromAppId, GraphTraversalSource g, Vertex vtxA,
1674 Vertex vtxB, String ver, Loader loader) throws AAIException {
1676 Vertex nullVtx = null;
1677 Vertex preferredVtx = null;
1679 Long vidA = new Long(vtxA.id().toString());
1680 Long vidB = new Long(vtxB.id().toString());
1682 String vtxANodeType = "";
1683 String vtxBNodeType = "";
1684 Object objType = vtxA.<Object>property("aai-node-type").orElse(null);
1685 if (objType != null) {
1686 vtxANodeType = objType.toString();
1688 objType = vtxB.<Object>property("aai-node-type").orElse(null);
1689 if (objType != null) {
1690 vtxBNodeType = objType.toString();
1693 if (vtxANodeType.equals("") || (!vtxANodeType.equals(vtxBNodeType))) {
1694 // Either they're not really dupes or there's some bad data - so
1699 // Check that node A and B both have the same key values (or else they
1701 // (We'll check dep-node later)
1702 // Determine what the key fields are for this nodeType
1703 Collection <String> keyProps = new ArrayList <>();
1704 HashMap <String,Object> keyPropValsHash = new HashMap <String,Object>();
1706 keyProps = loader.introspectorFromName(vtxANodeType).getKeys();
1707 } catch (AAIUnknownObjectException e) {
1708 logger.warn("Required property not found", e);
1709 throw new AAIException("AAI_6105", "Required Property name(s) not found for nodeType = " + vtxANodeType + ")");
1712 Iterator<String> keyPropI = keyProps.iterator();
1713 while (keyPropI.hasNext()) {
1714 String propName = keyPropI.next();
1715 String vtxAKeyPropVal = "";
1716 objType = vtxA.<Object>property(propName).orElse(null);
1717 if (objType != null) {
1718 vtxAKeyPropVal = objType.toString();
1720 String vtxBKeyPropVal = "";
1721 objType = vtxB.<Object>property(propName).orElse(null);
1722 if (objType != null) {
1723 vtxBKeyPropVal = objType.toString();
1726 if (vtxAKeyPropVal.equals("")
1727 || (!vtxAKeyPropVal.equals(vtxBKeyPropVal))) {
1728 // Either they're not really dupes or they are missing some key
1729 // data - so don't pick one
1733 // Keep these around for (potential) use later
1734 keyPropValsHash.put(propName, vtxAKeyPropVal);
1739 // Collect the vid's and aai-node-types of the vertices that each vertex
1740 // (A and B) is connected to.
1741 ArrayList<String> vtxIdsConn2A = new ArrayList<>();
1742 ArrayList<String> vtxIdsConn2B = new ArrayList<>();
1743 HashMap<String, String> nodeTypesConn2A = new HashMap<>();
1744 HashMap<String, String> nodeTypesConn2B = new HashMap<>();
1746 ArrayList<Vertex> vertListA = getConnectedNodes( g, vtxA );
1747 if (vertListA != null) {
1748 Iterator<Vertex> iter = vertListA.iterator();
1749 while (iter.hasNext()) {
1750 Vertex tvCon = iter.next();
1751 String conVid = tvCon.id().toString();
1753 objType = tvCon.<Object>property("aai-node-type").orElse(null);
1754 if (objType != null) {
1755 nt = objType.toString();
1757 nodeTypesConn2A.put(nt, conVid);
1758 vtxIdsConn2A.add(conVid);
1762 ArrayList<Vertex> vertListB = getConnectedNodes( g, vtxB );
1763 if (vertListB != null) {
1764 Iterator<Vertex> iter = vertListB.iterator();
1765 while (iter.hasNext()) {
1766 Vertex tvCon = iter.next();
1767 String conVid = tvCon.id().toString();
1769 objType = tvCon.<Object>property("aai-node-type").orElse(null);
1770 if (objType != null) {
1771 nt = objType.toString();
1773 nodeTypesConn2B.put(nt, conVid);
1774 vtxIdsConn2B.add(conVid);
1778 // 1 - If this kind of node needs a dependent node for uniqueness, then
1779 // verify that they both nodes point to the same dependent
1780 // node (otherwise they're not really duplicates)
1781 // Note - there are sometimes more than one dependent node type since
1782 // one nodeType can be used in different ways. But for a
1783 // particular node, it will only have one dependent node that
1784 // it's connected to.
1785 String onlyNodeThatIndexPointsToVidStr = "";
1786 Collection<String> depNodeTypes = loader.introspectorFromName(vtxANodeType).getDependentOn();
1787 if (depNodeTypes.isEmpty()) {
1788 // This kind of node is not dependent on any other. That is ok.
1789 // We need to find out if the unique index info is good or not and
1790 // use that later when deciding if we can delete one.
1791 onlyNodeThatIndexPointsToVidStr = findJustOneUsingIndex( transId,
1792 fromAppId, g, keyPropValsHash, vtxANodeType, vidA, vidB, ver );
1794 String depNodeVtxId4A = "";
1795 String depNodeVtxId4B = "";
1796 Iterator<String> iter = depNodeTypes.iterator();
1797 while (iter.hasNext()) {
1798 String depNodeType = iter.next();
1799 if (nodeTypesConn2A.containsKey(depNodeType)) {
1800 // This is the dependent node type that vertex A is using
1801 depNodeVtxId4A = nodeTypesConn2A.get(depNodeType);
1803 if (nodeTypesConn2B.containsKey(depNodeType)) {
1804 // This is the dependent node type that vertex B is using
1805 depNodeVtxId4B = nodeTypesConn2B.get(depNodeType);
1808 if (depNodeVtxId4A.equals("")
1809 || (!depNodeVtxId4A.equals(depNodeVtxId4B))) {
1810 // Either they're not really dupes or there's some bad data - so
1811 // don't pick either one
1816 if (vtxIdsConn2A.size() == vtxIdsConn2B.size()) {
1817 // 2 - If they both have edges to all the same vertices,
1818 // then return the one that can be reached uniquely via the
1819 // key if that is the case or
1820 // else the one with the lower vertexId
1822 boolean allTheSame = true;
1823 Iterator<String> iter = vtxIdsConn2A.iterator();
1824 while (iter.hasNext()) {
1825 String vtxIdConn2A = iter.next();
1826 if (!vtxIdsConn2B.contains(vtxIdConn2A)) {
1833 // If everything is the same, but one of the two has a good
1834 // pointer to it, then save that one. Otherwise, take the
1836 if( !onlyNodeThatIndexPointsToVidStr.equals("") ){
1837 // only one is reachable via the index - choose that one.
1838 if( onlyNodeThatIndexPointsToVidStr.equals(vidA.toString()) ){
1839 preferredVtx = vtxA;
1841 else if( onlyNodeThatIndexPointsToVidStr.equals(vidB.toString()) ){
1842 preferredVtx = vtxB;
1845 else if (vidA < vidB) {
1846 preferredVtx = vtxA;
1848 preferredVtx = vtxB;
1851 } else if (vtxIdsConn2A.size() > vtxIdsConn2B.size()) {
1852 // 3 - VertexA is connected to more things than vtxB.
1853 // We'll pick VtxA if its edges are a superset of vtxB's edges
1854 // and it doesn't contradict the check for the index/key pointer.
1855 boolean missingOne = false;
1856 Iterator<String> iter = vtxIdsConn2B.iterator();
1857 while (iter.hasNext()) {
1858 String vtxIdConn2B = iter.next();
1859 if (!vtxIdsConn2A.contains(vtxIdConn2B)) {
1865 if( onlyNodeThatIndexPointsToVidStr.equals("")
1866 || onlyNodeThatIndexPointsToVidStr.equals(vidA.toString()) ){
1867 preferredVtx = vtxA;
1870 } else if (vtxIdsConn2B.size() > vtxIdsConn2A.size()) {
1871 // 4 - VertexB is connected to more things than vtxA.
1872 // We'll pick VtxB if its edges are a superset of vtxA's edges
1873 // and it doesn't contradict the check for the index/key pointer.
1874 boolean missingOne = false;
1875 Iterator<String> iter = vtxIdsConn2A.iterator();
1876 while (iter.hasNext()) {
1877 String vtxIdConn2A = iter.next();
1878 if (!vtxIdsConn2B.contains(vtxIdConn2A)) {
1884 if( onlyNodeThatIndexPointsToVidStr.equals("")
1885 || onlyNodeThatIndexPointsToVidStr.equals(vidB.toString()) ){
1886 preferredVtx = vtxB;
1890 preferredVtx = nullVtx;
1893 return (preferredVtx);
1895 } // end of pickOneOfTwoDupes()
1898 * Check and process dupes.
1900 * @param transId the trans id
1901 * @param fromAppId the from app id
1903 * @param version the version
1904 * @param nType the n type
1905 * @param passedVertList the passed vert list
1906 * @param dupeFixOn the dupe fix on
1907 * @param deleteCandidateList the delete candidate list
1908 * @param singleCommits the single commits
1909 * @param alreadyFoundDupeGroups the already found dupe groups
1910 * @return the array list
1912 private static List<String> checkAndProcessDupes(String transId,
1913 String fromAppId, Graph g, GraphTraversalSource source, String version, String nType,
1914 List<Vertex> passedVertList, Boolean dupeFixOn,
1915 Set<String> deleteCandidateList, Boolean singleCommits,
1916 ArrayList<String> alreadyFoundDupeGroups, Loader loader ) {
1918 ArrayList<String> returnList = new ArrayList<>();
1919 ArrayList<Vertex> checkVertList = new ArrayList<>();
1920 ArrayList<String> alreadyFoundDupeVidArr = new ArrayList<>();
1921 Boolean noFilterList = true;
1922 Iterator<String> afItr = alreadyFoundDupeGroups.iterator();
1923 while (afItr.hasNext()) {
1924 String dupeGrpStr = afItr.next();
1925 String[] dupeArr = dupeGrpStr.split("\\|");
1926 int lastIndex = dupeArr.length - 1;
1927 for (int i = 0; i < lastIndex; i++) {
1928 // Note: we don't want the last one...
1929 String vidString = dupeArr[i];
1930 alreadyFoundDupeVidArr.add(vidString);
1931 noFilterList = false;
1935 // For a given set of Nodes that were found with a set of KEY
1936 // Parameters, (nodeType + key data) we will
1937 // see if we find any duplicate nodes that need to be cleaned up. Note -
1938 // it's legit to have more than one
1939 // node with the same key data if the nodes depend on a parent for
1940 // uniqueness -- as long as the two nodes
1941 // don't hang off the same Parent.
1942 // If we find duplicates, and we can figure out which of each set of
1943 // duplicates is the one that we
1944 // think should be preserved, we will record that. Whether we can tell
1945 // which one should be
1946 // preserved or not, we will return info about any sets of duplicates
1949 // Each element in the returned arrayList might look like this:
1950 // "1234|5678|keepVid=UNDETERMINED" (if there were 2 dupes, and we
1951 // couldn't figure out which one to keep)
1952 // or, "100017|200027|30037|keepVid=30037" (if there were 3 dupes and we
1953 // thought the third one was the one that should survive)
1955 // Because of the way the calling code loops over stuff, we can get the
1956 // same data multiple times - so we should
1957 // not process any vertices that we've already seen.
1960 Iterator<Vertex> pItr = passedVertList.iterator();
1961 while (pItr.hasNext()) {
1962 Vertex tvx = pItr.next();
1963 String passedId = tvx.id().toString();
1964 if (noFilterList || !alreadyFoundDupeVidArr.contains(passedId)) {
1965 // We haven't seen this one before - so we should check it.
1966 checkVertList.add(tvx);
1970 if (checkVertList.size() < 2) {
1971 // Nothing new to check.
1975 if (loader.introspectorFromName(nType).isTopLevel()) {
1976 // If this was a node that does NOT depend on other nodes for
1977 // uniqueness, and we
1978 // found more than one node using its key -- record the found
1979 // vertices as duplicates.
1980 String dupesStr = "";
1981 for (int i = 0; i < checkVertList.size(); i++) {
1983 + ((checkVertList.get(i))).id()
1986 if (dupesStr != "") {
1987 Vertex prefV = getPreferredDupe(transId, fromAppId,
1988 source, checkVertList, version, loader);
1989 if (prefV == null) {
1990 // We could not determine which duplicate to keep
1991 dupesStr = dupesStr + "KeepVid=UNDETERMINED";
1992 returnList.add(dupesStr);
1994 dupesStr = dupesStr + "KeepVid=" + prefV.id();
1995 Boolean didRemove = false;
1997 didRemove = deleteNonKeepersIfAppropriate(g,
1998 dupesStr, prefV.id().toString(),
1999 deleteCandidateList, singleCommits);
2004 // keep them on our list
2005 returnList.add(dupesStr);
2010 // More than one node have the same key fields since they may
2011 // depend on a parent node for uniqueness. Since we're finding
2012 // more than one, we want to check to see if any of the
2013 // vertices that have this set of keys (and are the same nodeType)
2014 // are also pointing at the same 'parent' node.
2015 // Note: for a given set of key data, it is possible that there
2016 // could be more than one set of duplicates.
2017 HashMap<String, ArrayList<Vertex>> vertsGroupedByParentHash = groupVertsByDepNodes(
2018 transId, fromAppId, source, version, nType,
2019 checkVertList, loader);
2020 for (Map.Entry<String, ArrayList<Vertex>> entry : vertsGroupedByParentHash
2022 ArrayList<Vertex> thisParentsVertList = entry
2024 if (thisParentsVertList.size() > 1) {
2025 // More than one vertex found with the same key info
2026 // hanging off the same parent/dependent node
2027 String dupesStr = "";
2028 for (int i = 0; i < thisParentsVertList.size(); i++) {
2030 + ((thisParentsVertList
2031 .get(i))).id() + "|";
2033 if (dupesStr != "") {
2034 Vertex prefV = getPreferredDupe(transId,
2035 fromAppId, source, thisParentsVertList,
2038 if (prefV == null) {
2039 // We could not determine which duplicate to
2041 dupesStr = dupesStr + "KeepVid=UNDETERMINED";
2042 returnList.add(dupesStr);
2044 Boolean didRemove = false;
2045 dupesStr = dupesStr + "KeepVid="
2046 + prefV.id().toString();
2048 didRemove = deleteNonKeepersIfAppropriate(
2049 g, dupesStr, prefV.id()
2051 deleteCandidateList, singleCommits);
2056 // keep them on our list
2057 returnList.add(dupesStr);
2064 } catch (Exception e) {
2065 LoggingContext.statusCode(StatusCode.ERROR);
2066 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2067 logger.warn(" >>> Threw an error in checkAndProcessDupes - just absorb this error and move on. ", e);
2072 }// End of checkAndProcessDupes()
2075 * Group verts by dep nodes.
2077 * @param transId the trans id
2078 * @param fromAppId the from app id
2080 * @param version the version
2081 * @param nType the n type
2082 * @param passedVertList the passed vert list
2083 * @return the hash map
2084 * @throws AAIException the AAI exception
2086 private static HashMap<String, ArrayList<Vertex>> groupVertsByDepNodes(
2087 String transId, String fromAppId, GraphTraversalSource g, String version,
2088 String nType, ArrayList<Vertex> passedVertList, Loader loader)
2089 throws AAIException {
2090 // Given a list of Titan Vertices of one nodeType (see AAI-8956), group
2091 // them together by the parent node they depend on.
2092 // Ie. if given a list of ip address nodes (assumed to all have the
2093 // same key info) they might sit under several different parent vertices.
2094 // Under Normal conditions, there would only be one per parent -- but
2095 // we're trying to find duplicates - so we
2096 // allow for the case where more than one is under the same parent node.
2098 HashMap<String, ArrayList<Vertex>> retHash = new HashMap<String, ArrayList<Vertex>>();
2099 if (loader.introspectorFromName(nType).isTopLevel()) {
2100 // This method really should not have been called if this is not the
2102 // that depends on a parent for uniqueness, so just return the empty
2107 // Find out what types of nodes the passed in nodes can depend on
2108 ArrayList<String> depNodeTypeL = new ArrayList<>();
2109 Collection<String> depNTColl = loader.introspectorFromName(nType).getDependentOn();
2110 Iterator<String> ntItr = depNTColl.iterator();
2111 while (ntItr.hasNext()) {
2112 depNodeTypeL.add(ntItr.next());
2114 // For each vertex, we want find its depended-on/parent vertex so we
2115 // can track what other vertexes that are dependent on that same guy.
2116 if (passedVertList != null) {
2117 Iterator<Vertex> iter = passedVertList.iterator();
2118 while (iter.hasNext()) {
2119 Vertex thisVert = iter.next();
2120 Vertex tmpParentVtx = getConnectedParent( g, thisVert );
2121 if( tmpParentVtx != null ) {
2122 String parentNt = null;
2123 Object obj = tmpParentVtx.<Object>property("aai-node-type").orElse(null);
2125 parentNt = obj.toString();
2127 if (depNTColl.contains(parentNt)) {
2128 // This must be the parent/dependent node
2129 String parentVid = tmpParentVtx.id().toString();
2130 if (retHash.containsKey(parentVid)) {
2131 // add this vert to the list for this parent key
2132 retHash.get(parentVid).add(thisVert);
2134 // This is the first one we found on this parent
2135 ArrayList<Vertex> vList = new ArrayList<>();
2136 vList.add(thisVert);
2137 retHash.put(parentVid, vList);
2146 }// end of groupVertsByDepNodes()
2149 * Delete non keepers if appropriate.
2152 * @param dupeInfoString the dupe info string
2153 * @param vidToKeep the vid to keep
2154 * @param deleteCandidateList the delete candidate list
2155 * @param singleCommits the single commits
2156 * @return the boolean
2158 private static Boolean deleteNonKeepersIfAppropriate(Graph g,
2159 String dupeInfoString, String vidToKeep,
2160 Set<String> deleteCandidateList, Boolean singleCommits) {
2162 Boolean deletedSomething = false;
2163 // This assumes that the dupeInfoString is in the format of
2164 // pipe-delimited vid's followed by
2165 // ie. "3456|9880|keepVid=3456"
2166 if (deleteCandidateList == null || deleteCandidateList.size() == 0) {
2167 // No vid's on the candidate list -- so no deleting will happen on
2172 String[] dupeArr = dupeInfoString.split("\\|");
2173 ArrayList<String> idArr = new ArrayList<>();
2174 int lastIndex = dupeArr.length - 1;
2175 for (int i = 0; i <= lastIndex; i++) {
2176 if (i < lastIndex) {
2177 // This is not the last entry, it is one of the dupes,
2178 String vidString = dupeArr[i];
2179 idArr.add(vidString);
2181 // This is the last entry which should tell us if we have a
2183 String prefString = dupeArr[i];
2184 if (prefString.equals("KeepVid=UNDETERMINED")) {
2185 // They sent us a bad string -- nothing should be deleted if
2186 // no dupe could be tagged as preferred
2189 // If we know which to keep, then the prefString should look
2190 // like, "KeepVid=12345"
2191 String[] prefArr = prefString.split("=");
2192 if (prefArr.length != 2 || (!prefArr[0].equals("KeepVid"))) {
2193 LoggingContext.statusCode(StatusCode.ERROR);
2194 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2195 logger.error("Bad format. Expecting KeepVid=999999");
2198 String keepVidStr = prefArr[1];
2199 if (idArr.contains(keepVidStr)) {
2200 idArr.remove(keepVidStr);
2202 // So now, the idArr should just contain the vid's
2203 // that we want to remove.
2204 for (int x = 0; x < idArr.size(); x++) {
2205 boolean okFlag = true;
2206 String thisVid = idArr.get(x);
2207 if (deleteCandidateList.contains(thisVid)) {
2208 // This vid is a valid delete candidate from
2209 // a prev. run, so we can remove it.
2211 long longVertId = Long
2212 .parseLong(thisVid);
2214 .traversal().V(longVertId).next();
2216 if (singleCommits) {
2217 // NOTE - the singleCommits option is not used in normal processing
2219 g = AAIGraph.getInstance().getGraph().newTransaction();
2221 } catch (Exception e) {
2223 LoggingContext.statusCode(StatusCode.ERROR);
2224 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2225 logger.error("ERROR trying to delete VID = " + thisVid + " " + LogFormatTools.getStackTop(e));
2228 logger.info(" DELETED VID = " + thisVid);
2229 deletedSomething = true;
2234 LoggingContext.statusCode(StatusCode.ERROR);
2235 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2236 logger.error("ERROR - Vertex Id to keep not found in list of dupes. dupeInfoString = ["
2237 + dupeInfoString + "]");
2241 }// else we know which one to keep
2243 }// for each vertex in a group
2245 return deletedSomething;
2247 }// end of deleteNonKeepersIfAppropriate()
2251 * Gets the node just using key params.
2253 * @param transId the trans id
2254 * @param fromAppId the from app id
2255 * @param graph the graph
2256 * @param nodeType the node type
2257 * @param keyPropsHash the key props hash
2258 * @param apiVersion the api version
2259 * @return the node just using key params
2260 * @throws AAIException the AAI exception
2262 public static List <Vertex> getNodeJustUsingKeyParams( String transId, String fromAppId, GraphTraversalSource graph, String nodeType,
2263 HashMap<String,Object> keyPropsHash, String apiVersion ) throws AAIException{
2265 List <Vertex> retVertList = new ArrayList <> ();
2267 // We assume that all NodeTypes have at least one key-property defined.
2268 // Note - instead of key-properties (the primary key properties), a user could pass
2269 // alternate-key values if they are defined for the nodeType.
2270 List<String> kName = new ArrayList<>();
2271 List<Object> kVal = new ArrayList<>();
2272 if( keyPropsHash == null || keyPropsHash.isEmpty() ) {
2273 throw new AAIException("AAI_6120", " NO key properties passed for this getNodeJustUsingKeyParams() request. NodeType = [" + nodeType + "]. ");
2277 for( Map.Entry<String, Object> entry : keyPropsHash.entrySet() ){
2279 kName.add(i, entry.getKey());
2280 kVal.add(i, entry.getValue());
2282 int topPropIndex = i;
2284 String propsAndValuesForMsg = "";
2285 Iterator <Vertex> verts = null;
2288 if( topPropIndex == 0 ){
2289 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ") ";
2290 verts= graph.V().has(kName.get(0),kVal.get(0)).has("aai-node-type",nodeType);
2292 else if( topPropIndex == 1 ){
2293 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2294 + kName.get(1) + " = " + kVal.get(1) + ") ";
2295 verts = graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has("aai-node-type",nodeType);
2297 else if( topPropIndex == 2 ){
2298 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2299 + kName.get(1) + " = " + kVal.get(1) + ", "
2300 + kName.get(2) + " = " + kVal.get(2) + ") ";
2301 verts= graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has(kName.get(2),kVal.get(2)).has("aai-node-type",nodeType);
2303 else if( topPropIndex == 3 ){
2304 propsAndValuesForMsg = " (" + kName.get(0) + " = " + kVal.get(0) + ", "
2305 + kName.get(1) + " = " + kVal.get(1) + ", "
2306 + kName.get(2) + " = " + kVal.get(2) + ", "
2307 + kName.get(3) + " = " + kVal.get(3) + ") ";
2308 verts= graph.V().has(kName.get(0),kVal.get(0)).has(kName.get(1),kVal.get(1)).has(kName.get(2),kVal.get(2)).has(kName.get(3),kVal.get(3)).has("aai-node-type",nodeType);
2311 throw new AAIException("AAI_6114", " We only support 4 keys per nodeType for now \n");
2314 catch( Exception ex ){
2315 LoggingContext.statusCode(StatusCode.ERROR);
2316 LoggingContext.responseCode(LoggingContext.DATA_ERROR);
2317 logger.error( " ERROR trying to get node for: [" + propsAndValuesForMsg + "]" + LogFormatTools.getStackTop(ex));
2320 if( verts != null ){
2321 while( verts.hasNext() ){
2323 retVertList.add(tiV);
2327 if( retVertList.size() == 0 ){
2328 logger.debug("DEBUG No node found for nodeType = [" + nodeType +
2329 "], propsAndVal = " + propsAndValuesForMsg );
2334 }// End of getNodeJustUsingKeyParams()
2337 * Show all edges for node.
2339 * @param transId the trans id
2340 * @param fromAppId the from app id
2341 * @param tVert the t vert
2342 * @return the array list
2344 private static ArrayList <String> showAllEdgesForNode( String transId, String fromAppId, Vertex tVert ){
2346 ArrayList <String> retArr = new ArrayList <> ();
2347 Iterator <Edge> eI = tVert.edges(Direction.IN);
2348 if( ! eI.hasNext() ){
2349 retArr.add("No IN edges were found for this vertex. ");
2351 while( eI.hasNext() ){
2352 Edge ed = eI.next();
2353 String lab = ed.label();
2355 if (tVert.equals(ed.inVertex())) {
2356 vtx = ed.outVertex();
2358 vtx = ed.inVertex();
2361 retArr.add(" >>> COULD NOT FIND VERTEX on the other side of this edge edgeId = " + ed.id() + " <<< ");
2364 String nType = vtx.<String>property("aai-node-type").orElse(null);
2365 String vid = vtx.id().toString();
2366 retArr.add("Found an IN edge (" + lab + ") to this vertex from a [" + nType + "] node with VtxId = " + vid );
2371 eI = tVert.edges(Direction.OUT);
2372 if( ! eI.hasNext() ){
2373 retArr.add("No OUT edges were found for this vertex. ");
2375 while( eI.hasNext() ){
2376 Edge ed = eI.next();
2377 String lab = ed.label();
2379 if (tVert.equals(ed.inVertex())) {
2380 vtx = ed.outVertex();
2382 vtx = ed.inVertex();
2385 retArr.add(" >>> COULD NOT FIND VERTEX on the other side of this edge edgeId = " + ed.id() + " <<< ");
2388 String nType = vtx.<String>property("aai-node-type").orElse(null);
2389 String vid = vtx.id().toString();
2390 retArr.add("Found an OUT edge (" + lab + ") from this vertex to a [" + nType + "] node with VtxId = " + vid );
2398 * Show properties for node.
2400 * @param transId the trans id
2401 * @param fromAppId the from app id
2402 * @param tVert the t vert
2403 * @return the array list
2405 private static ArrayList <String> showPropertiesForNode( String transId, String fromAppId, Vertex tVert ){
2407 ArrayList <String> retArr = new ArrayList <> ();
2408 if( tVert == null ){
2409 retArr.add("null Node object passed to showPropertiesForNode()\n");
2412 String nodeType = "";
2413 Object ob = tVert.<Object>property("aai-node-type").orElse(null);
2418 nodeType = ob.toString();
2421 retArr.add(" AAINodeType/VtxID for this Node = [" + nodeType + "/" + tVert.id() + "]");
2422 retArr.add(" Property Detail: ");
2423 Iterator<VertexProperty<Object>> pI = tVert.properties();
2424 while( pI.hasNext() ){
2425 VertexProperty<Object> tp = pI.next();
2426 Object val = tp.value();
2427 retArr.add("Prop: [" + tp.key() + "], val = [" + val + "] ");
2434 private static ArrayList <Vertex> getConnectedNodes(GraphTraversalSource g, Vertex startVtx )
2435 throws AAIException {
2437 ArrayList <Vertex> retArr = new ArrayList <> ();
2438 if( startVtx == null ){
2442 GraphTraversal<Vertex, Vertex> modPipe = null;
2443 modPipe = g.V(startVtx).both();
2444 if( modPipe != null && modPipe.hasNext() ){
2445 while( modPipe.hasNext() ){
2446 Vertex conVert = modPipe.next();
2447 retArr.add(conVert);
2453 }// End of getConnectedNodes()
2456 private static ArrayList <Vertex> getConnectedChildrenOfOneType( GraphTraversalSource g,
2457 Vertex startVtx, String childNType ) throws AAIException{
2459 ArrayList <Vertex> childList = new ArrayList <> ();
2460 Iterator <Vertex> vertI = g.V(startVtx).union(__.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).inV(), __.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).outV());
2462 Vertex tmpVtx = null;
2463 while( vertI != null && vertI.hasNext() ){
2464 tmpVtx = vertI.next();
2465 Object ob = tmpVtx.<Object>property("aai-node-type").orElse(null);
2467 String tmpNt = ob.toString();
2468 if( tmpNt.equals(childNType)){
2469 childList.add(tmpVtx);
2476 }// End of getConnectedChildrenOfOneType()
2479 private static Vertex getConnectedParent( GraphTraversalSource g,
2480 Vertex startVtx ) throws AAIException{
2482 Vertex parentVtx = null;
2483 Iterator <Vertex> vertI = g.V(startVtx).union(__.inE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.OUT.toString()).outV(), __.outE().has(EdgeProperty.CONTAINS.toString(), AAIDirection.IN.toString()).inV());
2485 while( vertI != null && vertI.hasNext() ){
2486 // Note - there better only be one!
2487 parentVtx = vertI.next();
2492 }// End of getConnectedParent()
2495 private static long figureWindowStartTime( int timeWindowMinutes ){
2496 // Given a window size, calculate what the start-timestamp would be.
2498 if( timeWindowMinutes <= 0 ){
2499 // This just means that there is no window...
2502 long unixTimeNow = System.currentTimeMillis();
2503 long windowInMillis = timeWindowMinutes * 60L * 1000;
2505 long startTimeStamp = unixTimeNow - windowInMillis;
2507 return startTimeStamp;
2508 } // End of figureWindowStartTime()
2512 * Collect Duplicate Sets for nodes that are NOT dependent on parent nodes.
2514 * @param transId the trans id
2515 * @param fromAppId the from app id
2517 * @param version the version
2518 * @param nType the n type
2519 * @param passedVertList the passed vert list
2520 * @return the array list
2522 private static ArrayList<ArrayList<Vertex>> getDupeSets4NonDepNodes( String transId,
2523 String fromAppId, Graph g, String version, String nType,
2524 ArrayList<Vertex> passedVertList,
2525 ArrayList <String> keyPropNamesArr,
2528 ArrayList<ArrayList<Vertex>> returnList = new ArrayList<ArrayList<Vertex>>();
2530 // We've been passed a set of nodes that we want to check.
2531 // They are all NON-DEPENDENT nodes of the same nodeType meaning that they should be
2532 // unique in the DB based on their KEY DATA alone. So, if
2533 // we group them by their key data - if any key has more than one
2534 // vertex mapped to it, those vertices are dupes.
2536 // When we find duplicates, we group them in an ArrayList (there can be
2537 // more than one duplicate for one set of key data)
2538 // Then these dupeSets are grouped up and returned.
2541 HashMap <String, ArrayList<String>> keyVals2VidHash = new HashMap <String, ArrayList<String>>();
2542 HashMap <String,Vertex> vtxHash = new HashMap <String,Vertex>();
2543 Iterator<Vertex> pItr = passedVertList.iterator();
2544 while (pItr.hasNext()) {
2546 Vertex tvx = pItr.next();
2547 String thisVid = tvx.id().toString();
2548 vtxHash.put(thisVid, tvx);
2550 // if there are more than one vertexId mapping to the same keyProps -- they are dupes
2551 // we dont check till later since a set can contain more than 2.
2552 String hKey = getNodeKeyValString( tvx, keyPropNamesArr );
2553 if( keyVals2VidHash.containsKey(hKey) ){
2554 // We've already seen this key
2555 ArrayList <String> tmpVL = (ArrayList <String>)keyVals2VidHash.get(hKey);
2557 keyVals2VidHash.put(hKey, tmpVL);
2560 // First time for this key
2561 ArrayList <String> tmpVL = new ArrayList <String>();
2563 keyVals2VidHash.put(hKey, tmpVL);
2566 catch (Exception e) {
2567 logger.warn(" >>> Threw an error in getDupeSets4NonDepNodes - just absorb this error and move on. ", e);
2571 for( Map.Entry<String, ArrayList<String>> entry : keyVals2VidHash.entrySet() ){
2572 ArrayList <String> vidList = entry.getValue();
2574 if( !vidList.isEmpty() && vidList.size() > 1 ){
2575 // There are more than one vertex id's using the same key info
2576 ArrayList <Vertex> vertList = new ArrayList <Vertex> ();
2577 for (int i = 0; i < vidList.size(); i++) {
2578 String tmpVid = vidList.get(i);
2579 vertList.add(vtxHash.get(tmpVid));
2581 returnList.add(vertList);
2584 catch (Exception e) {
2585 logger.warn(" >>> Threw an error in getDupeSets4NonDepNodes - just absorb this error and move on. ", e);
2591 }// End of getDupeSets4NonDepNodes()
2595 * Get values of the key properties for a node as a single string
2597 * @param tvx the vertex to pull the properties from
2598 * @param keyPropNamesArr collection of key prop names
2599 * @return a String of concatenated values
2601 private static String getNodeKeyValString( Vertex tvx,
2602 ArrayList <String> keyPropNamesArr ) {
2604 String retString = "";
2605 Iterator <String> propItr = keyPropNamesArr.iterator();
2606 while( propItr.hasNext() ){
2607 String propName = propItr.next();
2609 Object propValObj = tvx.property(propName).orElse(null);
2610 retString = " " + retString + propValObj.toString();
2615 }// End of getNodeKeyValString()
2618 static private String findJustOneUsingIndex( String transId, String fromAppId,
2619 GraphTraversalSource gts, HashMap <String,Object> keyPropValsHash,
2620 String nType, Long vidAL, Long vidBL, String apiVer){
2622 // See if querying by JUST the key params (which should be indexed) brings back
2623 // ONLY one of the two vertices. Ie. the db still has a pointer to one of them
2624 // and the other one is sort of stranded.
2625 String returnVid = "";
2628 List <Vertex> tmpVertList = getNodeJustUsingKeyParams( transId, fromAppId, gts,
2629 nType, keyPropValsHash, apiVer );
2630 if( tmpVertList != null && tmpVertList.size() == 1 ){
2631 // We got just one - if it matches one of the ones we're looking
2632 // for, then return that VID
2633 Vertex tmpV = tmpVertList.get(0);
2634 String thisVid = tmpV.id().toString();
2635 if( thisVid.equals(vidAL.toString()) || thisVid.equals(vidBL.toString()) ){
2636 String msg = " vid = " + thisVid + " is one of two that the DB can retrieve directly ------";
2637 //System.out.println(msg);
2639 returnVid = thisVid;
2643 catch ( AAIException ae ){
2644 String emsg = "Error trying to get node just by key " + ae.getMessage();
2645 //System.out.println(emsg);
2651 }// End of findJustOneUsingIndex()