Merge changes from topic "OOM-1915"
[oom/offline-installer.git] / helm_deployment_status.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 #   COPYRIGHT NOTICE STARTS HERE
5
6 #   Copyright 2019 © Samsung Electronics Co., Ltd.
7 #
8 #   Licensed under the Apache License, Version 2.0 (the "License");
9 #   you may not use this file except in compliance with the License.
10 #   You may obtain a copy of the License at
11 #
12 #       http://www.apache.org/licenses/LICENSE-2.0
13 #
14 #   Unless required by applicable law or agreed to in writing, software
15 #   distributed under the License is distributed on an "AS IS" BASIS,
16 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 #   See the License for the specific language governing permissions and
18 #   limitations under the License.
19
20 #   COPYRIGHT NOTICE ENDS HERE
21
22
23 from __future__ import print_function
24 import sys
25 import argparse
26 import yaml
27 import requests
28 from subprocess import Popen,STDOUT,PIPE
29 import datetime
30 from time import sleep
31 from os.path import expanduser
32 from itertools import chain
33 import csv
34 from requests.packages.urllib3.exceptions import InsecureRequestWarning
35 from base64 import b64decode
36 from tempfile import NamedTemporaryFile
37
38 def add_resource_kind(resources, kind):
39     for item in resources:
40         item['kind'] = kind
41     return resources
42
43 def pods_by_parent(pods, parent):
44     for pod in pods:
45         if pod['metadata']['labels']['app'] == parent:
46             yield pod
47
48 def k8s_controller_ready(k8s_controller):
49     if k8s_controller['kind'] == 'Job':
50         return k8s_controller['status'].get('succeeded', 0) == k8s_controller['spec']['completions']
51     return k8s_controller['status'].get('readyReplicas', 0) == k8s_controller['spec']['replicas']
52
53 def get_not_ready(data):
54     return [x for x in data if not k8s_controller_ready(x)]
55
56 def get_apps(data):
57     return [x['metadata']['labels']['app'] for x in data]
58
59 def get_names(data):
60     return [x['metadata']['name'] for x in data]
61
62 def pod_ready(pod):
63     try:
64         return [x['status'] for x in pod['status']['conditions']
65                     if x['type'] == 'Ready'][0] == 'True'
66     except (KeyError, IndexError):
67         return False
68
69 def not_ready_pods(pods):
70     for pod in pods:
71         if not pod_ready(pod):
72             yield pod
73
74 def analyze_k8s_controllers(resources_data):
75     resources = {'total_count': len(resources_data)}
76     resources['not_ready_list'] = get_apps(get_not_ready(resources_data))
77     resources['ready_count'] = resources['total_count'] - len(resources['not_ready_list'])
78
79     return resources
80
81 def get_k8s_controllers(k8s):
82     k8s_controllers = {}
83
84     k8s_controllers['deployments'] = {'data': k8s.get_resources(
85         'apis/apps/v1', 'deployments')}
86     k8s_controllers['deployments'].update(analyze_k8s_controllers(
87         k8s_controllers['deployments']['data']))
88
89     k8s_controllers['statefulsets'] = {'data': k8s.get_resources(
90         'apis/apps/v1', 'statefulsets')}
91     k8s_controllers['statefulsets'].update(analyze_k8s_controllers(
92         k8s_controllers['statefulsets']['data']))
93
94     k8s_controllers['jobs'] = {'data': k8s.get_resources(
95         'apis/batch/v1', 'jobs')}
96     k8s_controllers['jobs'].update(analyze_k8s_controllers(
97         k8s_controllers['jobs']['data']))
98
99     not_ready_controllers = chain.from_iterable(
100             k8s_controllers[x]['not_ready_list'] for x in k8s_controllers)
101
102     return k8s_controllers, list(not_ready_controllers)
103
104 def exec_healthcheck(hp_script, namespace, hp_mode):
105     # spawn healthcheck script and redirect it's stderr to stdout
106     hc = Popen(['sh',hp_script,namespace,hp_mode],stdout=PIPE,stderr=STDOUT)
107     # Trace the output of subprocess until it has finished
108     for line in iter(hc.stdout.readline, ''):
109         print(line.strip())
110     hc.poll() # set returncode in Popen object
111     return hc.returncode
112
113 def check_readiness(k8s, verbosity):
114         k8s_controllers, not_ready_controllers = get_k8s_controllers(k8s)
115
116         # check pods only when it is explicitly wanted (judging readiness by deployment status)
117         if verbosity > 1:
118             pods = k8s.get_resources('api/v1', 'pods')
119             unready_pods = chain.from_iterable(
120                    get_names(not_ready_pods(
121                        pods_by_parent(pods, x)))
122                    for x in not_ready_controllers)
123         else:
124             unready_pods = []
125
126         print_status(verbosity, k8s_controllers, unready_pods)
127         return not not_ready_controllers
128
129 def check_in_loop(k8s, max_time, sleep_time, verbosity):
130     max_end_time = datetime.datetime.now() + datetime.timedelta(minutes=max_time)
131     ready = False
132     while datetime.datetime.now() < max_end_time:
133         ready = check_readiness(k8s, verbosity)
134         if ready:
135             return ready
136         sleep(sleep_time)
137     return ready
138
139 def check_helm_releases():
140     helm = subprocess.check_output(['helm', 'ls'])
141     if helm == '':
142         sys.exit('No Helm releases detected.')
143     helm_releases = csv.DictReader(
144             map(lambda x: x.replace(' ', ''), helm.split('\n')),
145             delimiter='\t')
146     failed_releases = [release['NAME'] for release in helm_releases
147             if release['STATUS'] == 'FAILED']
148     return helm, failed_releases
149
150
151 def create_ready_string(ready, total, prefix):
152     return '{:12} {}/{}'.format(prefix, ready, total)
153
154 def print_status(verbosity, resources, not_ready_pods):
155     ready_strings = []
156     ready = {k: v['ready_count'] for k,v in resources.items()}
157     count = {k: v['total_count'] for k,v in resources.items()}
158     if verbosity > 0:
159         ready_strings += [
160                 create_ready_string(ready[k], count[k], k.capitalize()) for k in ready
161                 ]
162     total_ready = sum(ready.values())
163     total_count = sum(count.values())
164     ready_strings.append(create_ready_string(total_ready, total_count, 'Ready'))
165     status_strings = ['\n'.join(ready_strings)]
166     if verbosity > 1:
167         if not_ready_pods:
168             status_strings.append('\nWaiting for pods:\n{}'.format('\n'.join(not_ready_pods)))
169         else:
170             status_strings.append('\nAll pods are ready!')
171     print('\n'.join(status_strings), '\n')
172
173 def parse_args():
174     parser = argparse.ArgumentParser(description='Monitor ONAP deployment progress',
175             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
176     parser.add_argument('--namespace', '-n', default='onap',
177             help='Kubernetes namespace of ONAP')
178     parser.add_argument('--server', '-s', help='address of Kubernetes cluster')
179     parser.add_argument('--kubeconfig', '-c',
180             default=expanduser('~') + '/.kube/config',
181             help='path to .kube/config file')
182     parser.add_argument('--health-path', '-hp', help='path to ONAP robot ete-k8s.sh')
183     parser.add_argument('--health-mode', default='health', help='healthcheck mode',
184             choices=('health','healthdist','distribute','instantiate','instantiateVFWCL',
185                      'instantiateDemoVFWCL','portal'))
186     parser.add_argument('--no-helm', action='store_true', help='Do not check Helm')
187     parser.add_argument('--check-frequency', '-w', default=300, type=int,
188             help='time between readiness checks in seconds')
189     parser.add_argument('--max-time', '-t', default=120, type=int,
190             help='max time to run readiness checks in minutes')
191     parser.add_argument('--single-run', '-1', action='store_true',
192             help='run check loop only once')
193     parser.add_argument('-v', dest='verbosity', action='count', default=0,
194             help='increase output verbosity, e.g. -vv is more verbose than -v')
195     parser.add_argument('--no-ssl-auth', action='store_true',
196             help='Disable SSL certificate based authentication while connecting to server')
197
198     return parser.parse_args()
199
200 class Kubernetes:
201     '''Class exposing get_resources() routine for connecting to kube API.
202        It keeps all attributes required by that call as an internal
203        object state.'''
204
205     requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
206
207     def __init__(self,args):
208
209         self.config = args.kubeconfig
210         self.url = args.server if args.server is not None else \
211                    self._get_k8s_url()
212         self.no_ssl_auth = args.no_ssl_auth
213         self.certs = self._get_k8s_certs() if not self.no_ssl_auth else {}
214         self.namespace = args.namespace
215
216         # Setup tmp file with ca chain only if certs were gathered successfully
217         # and --no-ssl-auth wasn't set
218         if self.certs and not self.no_ssl_auth:
219             self._setup_cert_files()
220
221     def get_resources(self, api, kind):
222         '''Performs actual API call'''
223         url = '/'.join([self.url, api, 'namespaces', self.namespace, kind])
224         try:
225             if self.no_ssl_auth:
226                 req = requests.get(url, verify=False)
227             else:
228                 req = requests.get(url, verify=self.crt_tmp_file.name, cert=self.crt_tmp_file.name)
229         except requests.exceptions.ConnectionError as err:
230             sys.exit('Error: Could not connect to {}'.format(self.url))
231         if req.status_code == 200:
232             json = req.json()
233             # kind is <resource>List in response so [:-4] removes 'List' from value
234             return add_resource_kind(json['items'], json['kind'][:-4])
235         elif (req.status_code == 401):
236             sys.exit('Error: Server replied with "401 Unauthorized" while making connection')
237         else:
238             sys.exit("Error: There's been an unspecified issue while making a request to the API")
239
240     def _setup_cert_files(self):
241         '''Helper funtion to setup named file for requests.get() call
242            in self.get_resources() which is able read certificate only
243            from file'''
244         ca_chain = NamedTemporaryFile()
245         for crt in self.certs.values():
246             ca_chain.write(crt)
247         ca_chain.read() # flush the file buffer
248         self.crt_tmp_file = ca_chain
249
250     def _get_k8s_url(self):
251         # TODO: Get login info
252         with open(self.config) as f:
253             config = yaml.load(f)
254         # TODO: Support cluster by name
255         return config['clusters'][0]['cluster']['server']
256
257     def _get_k8s_certs(self):
258         '''Helper function to read and decode certificates from kube config'''
259         with open(self.config) as f:
260             config = yaml.load(f)
261         certs = {}
262         try:
263             certs.update(dict(ca_cert=b64decode(
264               config['clusters'][0]['cluster']['certificate-authority-data'])))
265             certs.update(dict(client_cert=b64decode(
266               config['users'][0]['user']['client-certificate-data'])))
267             certs.update(dict(client_key=b64decode(
268               config['users'][0]['user']['client-key-data'])))
269         except KeyError as err:
270             print('Warning: could not get Kubernetes config for certificates. ' \
271                       'Turning off SSL authentication.')
272             self.no_ssl_auth = True
273         return certs
274
275 def main():
276     args = parse_args()
277
278     if not args.no_helm:
279         try:
280             helm_output, failed_releases = check_helm_releases()
281             if failed_releases:
282                 print('Deployment of {} failed.'.format(','.join(failed_releases)))
283                 sys.exit(1)
284             elif args.verbosity > 1:
285                 print(helm_output)
286         except IOError as err:
287             sys.exit(err.strerror)
288
289     k8s = Kubernetes(args)
290
291     ready = False
292     if args.single_run:
293         ready = check_readiness(k8s, args.verbosity)
294     else:
295         if not check_in_loop(k8s, args.max_time, args.check_frequency, args.verbosity):
296             # Double-check last 5 minutes and write verbosely in case it is not ready
297             ready = check_readiness(k8s, 2)
298
299     if args.health_path is not None:
300         hc_rc = exec_healthcheck(args.health_path, args.namespace, args.health_mode)
301         if hc_rc:
302             sys.exit(hc_rc)
303
304     if not ready:
305         sys.exit('Deployment is not ready')
306
307 if __name__ == '__main__':
308     main()