Fixing branch in docs
[oom/offline-installer.git] / helm_deployment_status.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 #   COPYRIGHT NOTICE STARTS HERE
5
6 #   Copyright 2019 © Samsung Electronics Co., Ltd.
7 #
8 #   Licensed under the Apache License, Version 2.0 (the "License");
9 #   you may not use this file except in compliance with the License.
10 #   You may obtain a copy of the License at
11 #
12 #       http://www.apache.org/licenses/LICENSE-2.0
13 #
14 #   Unless required by applicable law or agreed to in writing, software
15 #   distributed under the License is distributed on an "AS IS" BASIS,
16 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 #   See the License for the specific language governing permissions and
18 #   limitations under the License.
19
20 #   COPYRIGHT NOTICE ENDS HERE
21
22
23 from __future__ import print_function
24 import sys
25 import argparse
26 import yaml
27 import requests
28 from subprocess import Popen,STDOUT,PIPE,check_output
29 import datetime
30 from time import sleep
31 from os.path import expanduser
32 from itertools import chain
33 import csv
34 from requests.packages.urllib3.exceptions import InsecureRequestWarning
35 from base64 import b64decode
36 from tempfile import NamedTemporaryFile
37
38 def add_resource_kind(resources, kind):
39     for item in resources:
40         item['kind'] = kind
41     return resources
42
43 def pods_by_parent(pods, parent):
44     for pod in pods:
45         if pod['metadata']['labels']['app'] == parent:
46             yield pod
47
48 def k8s_controller_ready(k8s_controller):
49     if k8s_controller['kind'] == 'Job':
50         return k8s_controller['status'].get('succeeded', 0) == k8s_controller['spec']['completions']
51     return k8s_controller['status'].get('readyReplicas', 0) == k8s_controller['spec']['replicas']
52
53 def get_not_ready(data):
54     return [x for x in data if not k8s_controller_ready(x)]
55
56 def get_apps(data):
57     return [x['metadata']['labels']['app'] for x in data]
58
59 def get_names(data):
60     return [x['metadata']['name'] for x in data]
61
62 def pod_ready(pod):
63     try:
64         return [x['status'] for x in pod['status']['conditions']
65                     if x['type'] == 'Ready'][0] == 'True'
66     except (KeyError, IndexError):
67         return False
68
69 def not_ready_pods(pods):
70     for pod in pods:
71         if not pod_ready(pod):
72             yield pod
73
74 def analyze_k8s_controllers(resources_data):
75     resources = {'total_count': len(resources_data)}
76     resources['not_ready_list'] = get_apps(get_not_ready(resources_data))
77     resources['ready_count'] = resources['total_count'] - len(resources['not_ready_list'])
78
79     return resources
80
81 def get_k8s_controllers(k8s):
82     k8s_controllers = {}
83
84     k8s_controllers['deployments'] = {'data': k8s.get_resources(
85         'apis/apps/v1', 'deployments')}
86     k8s_controllers['deployments'].update(analyze_k8s_controllers(
87         k8s_controllers['deployments']['data']))
88
89     k8s_controllers['statefulsets'] = {'data': k8s.get_resources(
90         'apis/apps/v1', 'statefulsets')}
91     k8s_controllers['statefulsets'].update(analyze_k8s_controllers(
92         k8s_controllers['statefulsets']['data']))
93
94     k8s_controllers['jobs'] = {'data': k8s.get_resources(
95         'apis/batch/v1', 'jobs')}
96     k8s_controllers['jobs'].update(analyze_k8s_controllers(
97         k8s_controllers['jobs']['data']))
98
99     not_ready_controllers = chain.from_iterable(
100             k8s_controllers[x]['not_ready_list'] for x in k8s_controllers)
101
102     return k8s_controllers, list(not_ready_controllers)
103
104 def exec_healthcheck(hp_script, namespace, hp_mode):
105     # spawn healthcheck script and redirect it's stderr to stdout
106     hc = Popen(['sh',hp_script,namespace,hp_mode],stdout=PIPE,stderr=STDOUT)
107     # Trace the output of subprocess until it has finished
108     for line in iter(hc.stdout.readline, ''):
109         print(line.strip())
110     hc.poll() # set returncode in Popen object
111     return hc.returncode
112
113 def check_readiness(k8s, verbosity):
114         k8s_controllers, not_ready_controllers = get_k8s_controllers(k8s)
115
116         # check pods only when it is explicitly wanted (judging readiness by deployment status)
117         if verbosity > 1:
118             pods = k8s.get_resources('api/v1', 'pods')
119             unready_pods = chain.from_iterable(
120                    get_names(not_ready_pods(
121                        pods_by_parent(pods, x)))
122                    for x in not_ready_controllers)
123         else:
124             unready_pods = []
125
126         print_status(verbosity, k8s_controllers, unready_pods)
127         return not not_ready_controllers
128
129 def check_in_loop(k8s, max_time, sleep_time, verbosity):
130     max_end_time = datetime.datetime.now() + datetime.timedelta(minutes=max_time)
131     ready = False
132     while datetime.datetime.now() < max_end_time:
133         ready = check_readiness(k8s, verbosity)
134         if ready:
135             return ready
136         sleep(sleep_time)
137     return ready
138
139 def check_helm_releases():
140     helm = check_output(['helm', 'ls'])
141     if helm == '':
142         sys.exit('No Helm releases detected.')
143     helm_releases = csv.DictReader(
144             map(lambda x: x.replace(' ', ''), helm.split('\n')),
145             delimiter='\t')
146     failed_releases = [release['NAME'] for release in helm_releases
147             if release['STATUS'] == 'FAILED']
148     return helm, failed_releases
149
150
151 def create_ready_string(ready, total, prefix):
152     return '{:12} {}/{}'.format(prefix, ready, total)
153
154 def print_status(verbosity, resources, not_ready_pods):
155     ready_strings = []
156     ready = {k: v['ready_count'] for k,v in resources.items()}
157     count = {k: v['total_count'] for k,v in resources.items()}
158     if verbosity > 0:
159         ready_strings += [
160                 create_ready_string(ready[k], count[k], k.capitalize()) for k in ready
161                 ]
162     total_ready = sum(ready.values())
163     total_count = sum(count.values())
164     ready_strings.append(create_ready_string(total_ready, total_count, 'Ready'))
165     status_strings = ['\n'.join(ready_strings)]
166     if verbosity > 1:
167         if not_ready_pods:
168             status_strings.append('\nWaiting for pods:\n{}'.format('\n'.join(not_ready_pods)))
169         else:
170             status_strings.append('\nAll pods are ready!')
171     print('\n'.join(status_strings), '\n')
172
173 def parse_args():
174     parser = argparse.ArgumentParser(description='Monitor ONAP deployment progress',
175             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
176     parser.add_argument('--namespace', '-n', default='onap',
177             help='Kubernetes namespace of ONAP')
178     parser.add_argument('--server', '-s', help='address of Kubernetes cluster')
179     parser.add_argument('--kubeconfig', '-c',
180             default=expanduser('~') + '/.kube/config',
181             help='path to .kube/config file')
182     parser.add_argument('--health-path', '-hp', help='path to ONAP robot ete-k8s.sh')
183     parser.add_argument('--health-mode', '-hm', default='health', help='healthcheck mode')
184     parser.add_argument('--no-helm', action='store_true', help='Do not check Helm')
185     parser.add_argument('--check-frequency', '-w', default=300, type=int,
186             help='time between readiness checks in seconds')
187     parser.add_argument('--max-time', '-t', default=120, type=int,
188             help='max time to run readiness checks in minutes')
189     parser.add_argument('--single-run', '-1', action='store_true',
190             help='run check loop only once')
191     parser.add_argument('-v', dest='verbosity', action='count', default=0,
192             help='increase output verbosity, e.g. -vv is more verbose than -v')
193     parser.add_argument('--no-ssl-auth', action='store_true',
194             help='Disable SSL certificate based authentication while connecting to server')
195
196     return parser.parse_args()
197
198 class Kubernetes:
199     '''Class exposing get_resources() routine for connecting to kube API.
200        It keeps all attributes required by that call as an internal
201        object state.'''
202
203     requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
204
205     def __init__(self,args):
206
207         self.config = args.kubeconfig
208         self.url = args.server if args.server is not None else \
209                    self._get_k8s_url()
210         self.no_ssl_auth = args.no_ssl_auth
211         self.certs = self._get_k8s_certs() if not self.no_ssl_auth else {}
212         self.namespace = args.namespace
213
214         # Setup tmp file with ca chain only if certs were gathered successfully
215         # and --no-ssl-auth wasn't set
216         if self.certs and not self.no_ssl_auth:
217             self._setup_cert_files()
218
219     def get_resources(self, api, kind):
220         '''Performs actual API call'''
221         url = '/'.join([self.url, api, 'namespaces', self.namespace, kind])
222         try:
223             if self.no_ssl_auth:
224                 req = requests.get(url, verify=False)
225             else:
226                 req = requests.get(url, verify=self.crt_tmp_file.name, cert=self.crt_tmp_file.name)
227         except requests.exceptions.ConnectionError as err:
228             sys.exit('Error: Could not connect to {}'.format(self.url))
229         if req.status_code == 200:
230             json = req.json()
231             # kind is <resource>List in response so [:-4] removes 'List' from value
232             return add_resource_kind(json['items'], json['kind'][:-4])
233         elif (req.status_code == 401):
234             sys.exit('Error: Server replied with "401 Unauthorized" while making connection')
235         else:
236             sys.exit("Error: There's been an unspecified issue while making a request to the API")
237
238     def _setup_cert_files(self):
239         '''Helper funtion to setup named file for requests.get() call
240            in self.get_resources() which is able read certificate only
241            from file'''
242         ca_chain = NamedTemporaryFile()
243         for crt in self.certs.values():
244             ca_chain.write(crt)
245         ca_chain.read() # flush the file buffer
246         self.crt_tmp_file = ca_chain
247
248     def _get_k8s_url(self):
249         # TODO: Get login info
250         with open(self.config) as f:
251             config = yaml.load(f)
252         # TODO: Support cluster by name
253         return config['clusters'][0]['cluster']['server']
254
255     def _get_k8s_certs(self):
256         '''Helper function to read and decode certificates from kube config'''
257         with open(self.config) as f:
258             config = yaml.load(f)
259         certs = {}
260         try:
261             certs.update(dict(ca_cert=b64decode(
262               config['clusters'][0]['cluster']['certificate-authority-data'])))
263             certs.update(dict(client_cert=b64decode(
264               config['users'][0]['user']['client-certificate-data'])))
265             certs.update(dict(client_key=b64decode(
266               config['users'][0]['user']['client-key-data'])))
267         except KeyError as err:
268             print('Warning: could not get Kubernetes config for certificates. ' \
269                       'Turning off SSL authentication.')
270             self.no_ssl_auth = True
271         return certs
272
273 def main():
274     args = parse_args()
275
276     if not args.no_helm:
277         try:
278             helm_output, failed_releases = check_helm_releases()
279             if failed_releases:
280                 print('Deployment of {} failed.'.format(','.join(failed_releases)))
281                 sys.exit(1)
282             elif args.verbosity > 1:
283                 print(helm_output)
284         except IOError as err:
285             sys.exit(err.strerror)
286
287     k8s = Kubernetes(args)
288
289     ready = False
290     if args.single_run:
291         ready = check_readiness(k8s, args.verbosity)
292     else:
293         if not check_in_loop(k8s, args.max_time, args.check_frequency, args.verbosity):
294             # Double-check last 5 minutes and write verbosely in case it is not ready
295             ready = check_readiness(k8s, 2)
296
297     if args.health_path is not None:
298         hc_rc = exec_healthcheck(args.health_path, args.namespace, args.health_mode)
299         if hc_rc:
300             sys.exit(hc_rc)
301
302     if not ready:
303         sys.exit('Deployment is not ready')
304
305 if __name__ == '__main__':
306     main()