Merge "Revert "basic auth for so-monitoring""
[oom.git] / kubernetes / readiness / src / main / scripts / ready.py
1 #!/usr/bin/env python
2 import getopt
3 import logging
4 import os
5 import sys
6 import time
7 import random
8
9 from kubernetes import client
10
11 # extract env variables.
12 namespace = os.environ['NAMESPACE']
13 cert = os.environ['CERT']
14 host = os.environ['KUBERNETES_SERVICE_HOST']
15 token_path = os.environ['TOKEN']
16
17 with open(token_path, 'r') as token_file:
18     token = token_file.read().replace('\n', '')
19
20 # setup logging
21 log = logging.getLogger(__name__)
22 handler = logging.StreamHandler(sys.stdout)
23 formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
24 handler.setFormatter(formatter)
25 handler.setLevel(logging.INFO)
26 log.addHandler(handler)
27 log.setLevel(logging.INFO)
28
29 configuration = client.Configuration()
30 configuration.host = "https://" + host
31 configuration.ssl_ca_cert = cert
32 configuration.api_key['authorization'] = token
33 configuration.api_key_prefix['authorization'] = 'Bearer'
34 coreV1Api = client.CoreV1Api(client.ApiClient(configuration))
35 api_instance = client.ExtensionsV1beta1Api(client.ApiClient(configuration))
36 api = client.AppsV1beta1Api(client.ApiClient(configuration))
37 batchV1Api = client.BatchV1Api(client.ApiClient(configuration))
38
39
40 def is_job_complete(job_name):
41     complete = False
42     log.info("Checking if " + job_name + "  is complete")
43     try:
44         response = batchV1Api.read_namespaced_job_status(job_name, namespace)
45         if response.status.succeeded == 1:
46             job_status_type = response.status.conditions[0].type
47             if job_status_type == "Complete":
48                 complete = True
49                 log.info(job_name + " is complete")
50             else:
51                 log.info(job_name + " is not complete")
52         else:
53             log.info(job_name + " has not succeeded yet")
54         return complete
55     except Exception as e:
56         log.error("Exception when calling read_namespaced_job_status: %s\n" % e)
57
58
59 def wait_for_statefulset_complete(statefulset_name):
60     try:
61         response = api.read_namespaced_stateful_set(statefulset_name, namespace)
62         s = response.status
63         if (s.updated_replicas == response.spec.replicas and
64                 s.replicas == response.spec.replicas and
65                 s.ready_replicas == response.spec.replicas and
66                 s.current_replicas == response.spec.replicas and
67                 s.observed_generation == response.metadata.generation):
68             log.info("Statefulset " + statefulset_name + "  is ready")
69             return True
70         else:
71             log.info("Statefulset " + statefulset_name + "  is not ready")
72         return False
73     except Exception as e:
74         log.error("Exception when waiting for Statefulset status: %s\n" % e)
75
76
77 def wait_for_deployment_complete(deployment_name):
78     try:
79         response = api.read_namespaced_deployment(deployment_name, namespace)
80         s = response.status
81         if (s.unavailable_replicas is None and
82                 ( s.updated_replicas is None or s.updated_replicas == response.spec.replicas ) and
83                 s.replicas == response.spec.replicas and
84                 s.ready_replicas == response.spec.replicas and
85                 s.observed_generation == response.metadata.generation):
86             log.info("Deployment " + deployment_name + "  is ready")
87             return True
88         else:
89             log.info("Deployment " + deployment_name + "  is not ready")
90         return False
91     except Exception as e:
92         log.error("Exception when waiting for deployment status: %s\n" % e)
93
94
95 def wait_for_daemonset_complete(daemonset_name):
96     try:
97         response = api_instance.read_namespaced_daemon_set(daemonset_name, namespace)
98         s = response.status
99         if s.desired_number_scheduled == s.number_ready:
100             log.info("DaemonSet: " + str(s.number_ready) + "/" + str(s.desired_number_scheduled) + " nodes ready --> " + daemonset_name + " is ready")
101             return True
102         else:
103             log.info("DaemonSet: " + str(s.number_ready) + "/" + str(s.desired_number_scheduled) + " nodes ready --> " + daemonset_name + " is not ready")
104             return False
105     except Exception as e:
106         log.error("Exception when waiting for DaemonSet status: %s\n" % e)
107
108
109 def is_ready(container_name):
110     ready = False
111     log.info("Checking if " + container_name + "  is ready")
112     try:
113         response = coreV1Api.list_namespaced_pod(namespace=namespace,
114                                                  watch=False)
115         for i in response.items:
116             # container_statuses can be None, which is non-iterable.
117             if i.status.container_statuses is None:
118                 continue
119             for s in i.status.container_statuses:
120                 if s.name == container_name:
121                     name = read_name(i)
122                     if i.metadata.owner_references[0].kind == "StatefulSet":
123                         ready = wait_for_statefulset_complete(name)
124                     elif i.metadata.owner_references[0].kind == "ReplicaSet":
125                         deployment_name = get_deployment_name(name)
126                         ready = wait_for_deployment_complete(deployment_name)
127                     elif i.metadata.owner_references[0].kind == "Job":
128                         ready = is_job_complete(name)
129                     elif i.metadata.owner_references[0].kind == "DaemonSet":
130                         ready = wait_for_daemonset_complete(i.metadata.owner_references[0].name)
131
132                     return ready
133
134                 else:
135                     continue
136         return ready
137     except Exception as e:
138         log.error("Exception when calling list_namespaced_pod: %s\n" % e)
139
140
141 def read_name(item):
142     return item.metadata.owner_references[0].name
143
144
145 def get_deployment_name(replicaset):
146     api_response = api_instance.read_namespaced_replica_set_status(replicaset,
147                                                                    namespace)
148     deployment_name = read_name(api_response)
149     return deployment_name
150
151
152 DEF_TIMEOUT = 10
153 DESCRIPTION = "Kubernetes container readiness check utility"
154 USAGE = "Usage: ready.py [-t <timeout>] -c <container_name> " \
155         "[-c <container_name> ...]\n" \
156         "where\n" \
157         "<timeout> - wait for container readiness timeout in min, " \
158         "default is " + str(DEF_TIMEOUT) + "\n" \
159         "<container_name> - name of the container to wait for\n"
160
161
162 def main(argv):
163     # args are a list of container names
164     container_names = []
165     timeout = DEF_TIMEOUT
166     try:
167         opts, args = getopt.getopt(argv, "hc:t:", ["container-name=",
168                                                    "timeout=",
169                                                    "help"])
170         for opt, arg in opts:
171             if opt in ("-h", "--help"):
172                 print("%s\n\n%s" % (DESCRIPTION, USAGE))
173                 sys.exit()
174             elif opt in ("-c", "--container-name"):
175                 container_names.append(arg)
176             elif opt in ("-t", "--timeout"):
177                 timeout = float(arg)
178     except (getopt.GetoptError, ValueError) as e:
179         print("Error parsing input parameters: %s\n" % e)
180         print(USAGE)
181         sys.exit(2)
182     if container_names.__len__() == 0:
183         print("Missing required input parameter(s)\n")
184         print(USAGE)
185         sys.exit(2)
186
187     for container_name in container_names:
188         timeout = time.time() + timeout * 60
189         while True:
190             ready = is_ready(container_name)
191             if ready is True:
192                 break
193             elif time.time() > timeout:
194                 log.warning("timed out waiting for '" + container_name +
195                             "' to be ready")
196                 exit(1)
197             else:
198                 # spread in time potentially parallel execution in multiple
199                 # containers
200                 time.sleep(random.randint(5, 11))
201
202
203 if __name__ == "__main__":
204     main(sys.argv[1:])
205