Improve readiness image to allow for shorter check intervals 48/138648/2 6.1.0
authorFiete Ostkamp <Fiete.Ostkamp@telekom.de>
Thu, 8 Aug 2024 07:28:40 +0000 (09:28 +0200)
committerFiete Ostkamp <Fiete.Ostkamp@telekom.de>
Thu, 8 Aug 2024 07:48:01 +0000 (09:48 +0200)
- introduce interval parameter that can be passed to define check interval
- reduce default wait intervals from between 5-11 to 2-6 seconds
- move checks to separate methods [0]

[0] this prepares executing the checks in parallel in another change once this
one should work fine

Issue-ID: INT-2284
Change-Id: Ie93360e700b3d1898bed51c0612e5430d7d502cc
Signed-off-by: Fiete Ostkamp <Fiete.Ostkamp@telekom.de>
ready.py
version.properties

index a8b1999..3544b1b 100755 (executable)
--- a/ready.py
+++ b/ready.py
@@ -433,7 +433,7 @@ USAGE = "Usage: ready.py [-t <timeout>] [-n <namespace>] -c <container_name> ..
 
 def main(argv):
     """
-    Checks if a container, pod or service is ready, 
+    Checks if a container, pod or service is ready,
     if a job is finished or if the main container of a job has completed.
     The check is done according to the name of the container op pod,
     not the name of its parent (Job, Deployment, StatefulSet, DaemonSet).
@@ -452,8 +452,9 @@ def main(argv):
     timeout = DEF_TIMEOUT
     url = DEF_URL
     ns = ""
+    interval=None
     try:
-        opts, _args = getopt.getopt(argv, "hj:s:c:p:a:t:m:u:n:", ["service-name=",
+        opts, _args = getopt.getopt(argv, "hj:s:c:p:a:t:m:u:n:i:", ["service-name=",
                                                     "container-name=",
                                                     "pod-name=",
                                                     "app-name=",
@@ -461,7 +462,8 @@ def main(argv):
                                                     "service-mesh-check=",
                                                     "url=",
                                                     "job-name=",
-                                                    "namespace="
+                                                    "namespace=",
+                                                    "interval="
                                                     "help"])
         for opt, arg in opts:
             if opt in ("-h", "--help"):
@@ -485,6 +487,8 @@ def main(argv):
                 ns = arg
             elif opt in ("-t", "--timeout"):
                 timeout = float(arg)
+            elif opt in ("-i", "--interval"):
+                interval = int(arg)
     except (getopt.GetoptError, ValueError) as exc:
         print("Error parsing input parameters: {}\n".format(exc))
         print(USAGE)
@@ -501,95 +505,128 @@ def main(argv):
     else:
         namespace = ns
 
-    for service_name in service_names:
+    check_service_readiness(service_names, timeout, interval)
+    check_container_readiness(container_names, timeout, interval)
+    check_pod_readiness(pod_names, timeout, interval)
+    check_app_readiness(app_names, timeout, interval)
+    check_job_readiness(job_names, timeout, interval)
+    check_service_mesh_job_readiness(service_mesh_job_container_names, timeout, url)
+
+def check_service_mesh_job_readiness(service_mesh_job_container_names, timeout, url):
+    for service_mesh_job_container_name in service_mesh_job_container_names:
         timeout = time.time() + timeout * 60
         while True:
-            ready = is_service_ready(service_name)
+            ready = service_mesh_job_check(service_mesh_job_container_name)
             if ready is True:
+                sideCarKilled = quitquitquit_post(url)
+                if sideCarKilled is True:
+                    log.info("Side Car Killed through QuitQuitQuit API")
+                else:
+                    log.info("Side Car Failed to be Killed through QuitQuitQuit API")
                 break
             if time.time() > timeout:
                 log.warning("timed out waiting for '%s' to be ready",
-                            service_name)
+                            service_mesh_job_container_name)
                 sys.exit(1)
             else:
                 # spread in time potentially parallel execution in multiple
                 # containers
-                time.sleep(random.randint(5, 11))
-    for container_name in container_names:
+                time.sleep(random.randint(2, 6))
+
+def check_job_readiness(job_names, timeout, interval=None):
+    for job_name in job_names:
         timeout = time.time() + timeout * 60
         while True:
-            ready = is_ready(container_name)
+            ready = is_job_complete(job_name)
             if ready is True:
                 break
             if time.time() > timeout:
                 log.warning("timed out waiting for '%s' to be ready",
-                            container_name)
+                            job_name)
                 sys.exit(1)
             else:
-                # spread in time potentially parallel execution in multiple
-                # containers
-                time.sleep(random.randint(5, 11))
-    for pod_name in pod_names:
+                if interval != None:
+                    time.sleep(interval)
+                else:
+                    # spread in time potentially parallel execution in multiple
+                    # containers
+                    time.sleep(random.randint(2, 6))
+
+def check_app_readiness(app_names, timeout, interval=None):
+    for app_name in app_names:
         timeout = time.time() + timeout * 60
         while True:
-            ready = is_pod_ready(pod_name)
+            ready = is_app_ready(app_name)
             if ready is True:
                 break
             if time.time() > timeout:
                 log.warning("timed out waiting for '%s' to be ready",
-                            pod_name)
+                            app_name)
                 sys.exit(1)
             else:
-                # spread in time potentially parallel execution in multiple
-                # containers
-                time.sleep(random.randint(5, 11))
-    for app_name in app_names:
+                if interval != None:
+                    time.sleep(interval)
+                else:
+                    # spread in time potentially parallel execution in multiple
+                    # containers
+                    time.sleep(random.randint(2, 6))
+
+def check_pod_readiness(pod_names, timeout, interval=None):
+    for pod_name in pod_names:
         timeout = time.time() + timeout * 60
         while True:
-            ready = is_app_ready(app_name)
+            ready = is_pod_ready(pod_name)
             if ready is True:
                 break
             if time.time() > timeout:
                 log.warning("timed out waiting for '%s' to be ready",
-                            app_name)
+                            pod_name)
                 sys.exit(1)
             else:
-                # spread in time potentially parallel execution in multiple
-                # containers
-                time.sleep(random.randint(5, 11))
-    for job_name in job_names:
+                if interval != None:
+                    time.sleep(interval)
+                else:
+                    # spread in time potentially parallel execution in multiple
+                    # containers
+                    time.sleep(random.randint(2, 6))
+
+def check_container_readiness(container_names, timeout, interval=None):
+    for container_name in container_names:
         timeout = time.time() + timeout * 60
         while True:
-            ready = is_job_complete(job_name)
+            ready = is_ready(container_name)
             if ready is True:
                 break
             if time.time() > timeout:
                 log.warning("timed out waiting for '%s' to be ready",
-                            job_name)
+                            container_name)
                 sys.exit(1)
             else:
-                # spread in time potentially parallel execution in multiple
-                # containers
-                time.sleep(random.randint(5, 11))
-    for service_mesh_job_container_name in service_mesh_job_container_names:
+                if interval != None:
+                    time.sleep(interval)
+                else:
+                    # spread in time potentially parallel execution in multiple
+                    # containers
+                    time.sleep(random.randint(2, 6))
+
+def check_service_readiness(service_names, timeout, interval=None):
+    for service_name in service_names:
         timeout = time.time() + timeout * 60
         while True:
-            ready = service_mesh_job_check(service_mesh_job_container_name)
+            ready = is_service_ready(service_name)
             if ready is True:
-                sideCarKilled = quitquitquit_post(url)
-                if sideCarKilled is True:
-                    log.info("Side Car Killed through QuitQuitQuit API")
-                else:
-                    log.info("Side Car Failed to be Killed through QuitQuitQuit API")
                 break
             if time.time() > timeout:
                 log.warning("timed out waiting for '%s' to be ready",
-                            service_mesh_job_container_name)
+                            service_name)
                 sys.exit(1)
             else:
-                # spread in time potentially parallel execution in multiple
-                # containers
-                time.sleep(random.randint(5, 11))
+                if interval != None:
+                    time.sleep(interval)
+                else:
+                    # spread in time potentially parallel execution in multiple
+                    # containers
+                    time.sleep(random.randint(2, 6))
 
 if __name__ == "__main__":
     main(sys.argv[1:])
index 9603e41..01ff003 100644 (file)
@@ -2,8 +2,8 @@
 # Note that these variables cannot be structured (e.g. : version.release or version.snapshot etc... )
 # because they are used in Jenkins, whose plug-in doesn't support
 
-major=3
-minor=0
+major=6
+minor=1
 patch=0
 
 base_version=${major}.${minor}.${patch}