[BUILD] Add option to define excluded private registries
[oom/offline-installer.git] / build / download / docker_downloader.py
1 #! /usr/bin/env python3
2 # -*- coding: utf-8 -*-
3
4 #   COPYRIGHT NOTICE STARTS HERE
5
6 #   Copyright 2022 © Samsung Electronics Co., Ltd.
7 #
8 #   Licensed under the Apache License, Version 2.0 (the "License");
9 #   you may not use this file except in compliance with the License.
10 #   You may obtain a copy of the License at
11 #
12 #       http://www.apache.org/licenses/LICENSE-2.0
13 #
14 #   Unless required by applicable law or agreed to in writing, software
15 #   distributed under the License is distributed on an "AS IS" BASIS,
16 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 #   See the License for the specific language governing permissions and
18 #   limitations under the License.
19
20 #   COPYRIGHT NOTICE ENDS HERE
21
22 import argparse
23 import datetime
24 import itertools
25 import logging
26 import os
27 import sys
28 import timeit
29
30 import docker
31 from retrying import retry
32
33 from concurrent_downloader import ConcurrentDownloader
34
35 log = logging.getLogger(__name__)
36
37
38 class DockerDownloader(ConcurrentDownloader):
39     def __init__(self, save, *list_args, mirror=None, mirror_exclude=[], workers=3):
40         """
41         :param mirror: private repository mirror address (ip:port)
42         """
43         self._save = save
44         self._mirror = mirror
45         self._mirror_exclude = mirror_exclude
46         try:
47             # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB)
48             self._docker_client = docker.from_env(timeout=300)
49         except docker.errors.DockerException as err:
50             log.exception(
51                 'Error creating docker client. Check if docker is installed and running'
52                 ' or if you have right permissions.')
53             raise err
54         self._pulled_images = set(itertools.chain.from_iterable((image.tags for image
55                                                                  in self._docker_client.images.list())))
56         list_args = ([*x, None] if len(x) < 2 else x for x in list_args)
57         super().__init__('docker images', *list_args, workers=workers)
58
59     @staticmethod
60     def image_registry_name(image_name):
61         """
62         Get the name as shown in local registry. Since some strings are not part of name
63         when using default registry e.g. docker.io
64         :param image_name: name of the image from the list
65         :return: name of the image as it is shown by docker
66         """
67         name = image_name
68
69         if name.startswith('docker.io/'):
70             name = name.replace('docker.io/', '')
71
72         if name.startswith('library/'):
73             name = name.replace('library/', '')
74
75         if ':' not in name.rsplit('/')[-1]:
76             name = '{}:latest'.format(name)
77
78         return name
79
80     @property
81     def check_table(self):
82         """
83         Table showing information of which images are pulled/saved
84         """
85         self.missing()
86         return self._table(self._data_list)
87
88     @property
89     def fail_table(self):
90         """
91         Table showing information about state of download of images
92         that encountered problems while downloading
93         """
94         return self._table(self.missing())
95
96     @staticmethod
97     def _image_filename(image_name):
98         """
99         Get a name of a file where image will be saved.
100         :param image_name: Name of the image from list
101         :return: Filename of the image
102         """
103         return '{}.tar'.format(image_name.replace(':', '_').replace('/', '_'))
104
105     def _table(self, images):
106         """
107         Get table in format for images
108         :param images: images to put into table
109         :return: check table format with specified images
110         """
111         header = ['Name', 'Pulled', 'Saved']
112         data = []
113         for item in images:
114             if item not in self._missing:
115                 data.append((item, True, True if self._save else 'N/A'))
116             else:
117                 data.append((item, self._missing[item]['pulled'], self._missing[item]['saved']))
118         return self._check_table(header, {'Name': 'l'}, data)
119
120     def _is_pulled(self, image):
121         return self.image_registry_name(image) in self._pulled_images
122
123     def _is_saved(self, image):
124         dst = '{}/{}'.format(self._data_list[image], self._image_filename(image))
125         return os.path.isfile(dst)
126
127     def _is_missing(self, item):
128         """
129         Missing docker images are checked slightly differently.
130         """
131         pass
132
133     def missing(self):
134         """
135         Get dictionary of images not present locally.
136         """
137         missing = dict()
138         for image, dst in self._data_list.items():
139             pulled = self._is_pulled(image)
140             if self._save:
141                 # if pulling and save is True. Save every pulled image to assure parity
142                 saved = False if not pulled else self._is_saved(image)
143             else:
144                 saved = 'N/A'
145             if not pulled or not saved:
146                 missing[image] = {'dst': dst, 'pulled': pulled, 'saved': saved}
147         self._missing = missing
148         return self._missing
149
150     @retry(stop_max_attempt_number=5, wait_fixed=5000)
151     def _pull_image(self, image_name):
152         """
153         Pull docker image.
154         :param image_name: name of the image to be pulled
155         :return: pulled image (image object)
156         :raises docker.errors.APIError: after unsuccessful retries
157         """
158         if ':' not in image_name.rsplit('/')[-1]:
159             image_name = '{}:latest'.format(image_name)
160         try:
161             if self._mirror:
162                 # if docker mirroring repository is set
163                 image_name_split = image_name.split('/')
164                 if (len(image_name_split) > 1) \
165                    and (image_name_split[0].find(".")) \
166                    and not (image_name.startswith('docker.io/')) \
167                    and (image_name_split[0] not in self._mirror_exclude):
168                     # if image originates from private registry and its name does not start with 'docker.io'
169                     # and it does not originate from excluded registry
170                     # -> download image from docker mirror and retag it to its original name
171                     mirrored_image_name = self._mirror + "/" + '/'.join(image_name_split[1:])
172                     img = self._docker_client.images.pull(mirrored_image_name)
173                     self._docker_client.images.model.tag(img, image_name)
174                     # untag the image pulled from mirror
175                     self._docker_client.images.remove(mirrored_image_name)
176                     image = self._docker_client.images.get(image_name)
177                 else:
178                     image = self._docker_client.images.pull(image_name)
179             else:
180                 image = self._docker_client.images.pull(image_name)
181             log.info('Image {} pulled'.format(image_name))
182             return image
183         except docker.errors.APIError as err:
184             log.warning('Failed: {}: {}. Retrying...'.format(image_name, err))
185             raise err
186
187     def _save_image(self, image_name, image, output_dir):
188         """
189         Save image to tar.
190         :param output_dir: path to destination directory
191         :param image: image object from pull_image function
192         :param image_name: name of the image from list
193         """
194         dst = '{}/{}'.format(output_dir, self._image_filename(image_name))
195         os.makedirs(output_dir, exist_ok=True)
196         try:
197             with open(dst, 'wb') as f:
198                 for chunk in image.save(named=self.image_registry_name(image_name)):
199                     f.write(chunk)
200             log.info('Image {} saved as {}'.format(image_name, dst))
201         except Exception as err:
202             if os.path.isfile(dst):
203                 os.remove(dst)
204             raise err
205
206     def _download_item(self, image):
207         """ Pull and save docker image from specified docker registry
208         :param image: image to be downloaded
209         """
210         image_name, image_dict = image
211         log.info('Downloading image: {}'.format(image_name))
212         try:
213             if image_dict['pulled']:
214                 image_to_save = self._docker_client.images.get(image_name)
215             else:
216                 image_to_save = self._pull_image(image_name)
217             if self._save:
218                 self._save_image(image_name, image_to_save, image_dict['dst'])
219         except Exception as err:
220             log.exception('Error downloading {}: {}'.format(image_name, err))
221             raise err
222
223
224 def run_cli():
225     parser = argparse.ArgumentParser(description='Download docker images from list')
226     parser.add_argument('image_list', metavar='image-list',
227                         help='File with list of images to download.')
228     parser.add_argument('--save', '-s', action='store_true', default=False,
229                         help='Save images (without it only pull is executed)')
230     parser.add_argument('--output-dir', '-o', default=os.getcwd(),
231                         help='Download destination')
232     parser.add_argument('--private-registry-mirror', default=None, metavar='HOST:PORT',
233                         help='Address of docker mirroring repository that caches images'
234                              ' from private registries to get those images from')
235     parser.add_argument('--private-registry-exclude', action='append', default=[], metavar='REGISTRY_NAME',
236                         help='The name of a private registry to exclude when using --private-registry-mirror.'
237                              ' Images that originate from excluded registry will not be'
238                              ' pulled from mirroring repository. This option can be used multiple times.')
239     parser.add_argument('--check', '-c', action='store_true', default=False,
240                         help='Check what is missing. No download.'
241                              'Use with combination with -s to check saved images as well.')
242     parser.add_argument('--debug', action='store_true', default=False,
243                         help='Turn on debug output')
244     parser.add_argument('--workers', type=int, default=3,
245                         help='Set maximum workers for parallel download (default: 3)')
246
247     args = parser.parse_args()
248
249     if args.debug:
250         logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
251     else:
252         logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
253
254     downloader = DockerDownloader(args.save, [args.image_list, args.output_dir], mirror=args.private_registry_mirror, mirror_exclude=args.private_registry_exclude, workers=args.workers)
255
256     if args.check:
257         log.info('Check mode. No download will be executed.')
258         log.info(downloader.check_table)
259         sys.exit(0)
260
261     timer_start = timeit.default_timer()
262     try:
263         downloader.download()
264     except RuntimeError:
265         sys.exit(1)
266     finally:
267         log.info('Downloading finished in {}'.format(
268             datetime.timedelta(seconds=timeit.default_timer() - timer_start)))
269
270
271 if __name__ == '__main__':
272     run_cli()