Merge "[GENERAL] Add Andreas Geissler as committer."
[oom/offline-installer.git] / build / download / docker_downloader.py
1 #! /usr/bin/env python3
2 # -*- coding: utf-8 -*-
3
4 #   COPYRIGHT NOTICE STARTS HERE
5
6 #   Copyright 2022 © Samsung Electronics Co., Ltd.
7 #
8 #   Licensed under the Apache License, Version 2.0 (the "License");
9 #   you may not use this file except in compliance with the License.
10 #   You may obtain a copy of the License at
11 #
12 #       http://www.apache.org/licenses/LICENSE-2.0
13 #
14 #   Unless required by applicable law or agreed to in writing, software
15 #   distributed under the License is distributed on an "AS IS" BASIS,
16 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 #   See the License for the specific language governing permissions and
18 #   limitations under the License.
19
20 #   COPYRIGHT NOTICE ENDS HERE
21
22 import argparse
23 import datetime
24 import itertools
25 import logging
26 import os
27 import sys
28 import timeit
29
30 import docker
31 from retrying import retry
32
33 from concurrent_downloader import ConcurrentDownloader
34
35 log = logging.getLogger(__name__)
36
37
38 class DockerDownloader(ConcurrentDownloader):
39     def __init__(self, save, *list_args, mirror=None, mirror_exclude=[], workers=3):
40         """
41         :param mirror: private repository mirror address (ip:port)
42         """
43         self._save = save
44         self._mirror = mirror
45         self._mirror_exclude = mirror_exclude
46         try:
47             # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB)
48             self._docker_client = docker.from_env(timeout=300)
49         except docker.errors.DockerException as err:
50             log.exception(
51                 'Error creating docker client. Check if docker is installed and running'
52                 ' or if you have right permissions.')
53             raise err
54         self._pulled_images = set(itertools.chain.from_iterable((image.tags for image
55                                                                  in self._docker_client.images.list())))
56         list_args = ([*x, None] if len(x) < 2 else x for x in list_args)
57         super().__init__('docker images', *list_args, workers=workers)
58
59     @staticmethod
60     def image_registry_name(image_name):
61         """
62         Get the name as shown in local registry. Since some strings are not part of name
63         when using default registry e.g. docker.io
64         :param image_name: name of the image from the list
65         :return: name of the image as it is shown by docker
66         """
67         name = image_name
68
69         if name.startswith('docker.io/'):
70             name = name.replace('docker.io/', '')
71
72         if name.startswith('library/'):
73             name = name.replace('library/', '')
74
75         if ':' not in name.rsplit('/')[-1]:
76             name = '{}:latest'.format(name)
77
78         return name
79
80     @property
81     def check_table(self):
82         """
83         Table showing information of which images are pulled/saved
84         """
85         self.missing()
86         return self._table(self._data_list)
87
88     @property
89     def fail_table(self):
90         """
91         Table showing information about state of download of images
92         that encountered problems while downloading
93         """
94         return self._table(self.missing())
95
96     @staticmethod
97     def _image_filename(image_name):
98         """
99         Get a name of a file where image will be saved.
100         :param image_name: Name of the image from list
101         :return: Filename of the image
102         """
103         return '{}.tar'.format(image_name.replace(':', '_').replace('/', '_'))
104
105     def _table(self, images):
106         """
107         Get table in format for images
108         :param images: images to put into table
109         :return: check table format with specified images
110         """
111         header = ['Name', 'Pulled', 'Saved']
112         data = []
113         for item in images:
114             if item not in self._missing:
115                 data.append((item, True, True if self._save else 'N/A'))
116             else:
117                 data.append((item, self._missing[item]['pulled'], self._missing[item]['saved']))
118         return self._check_table(header, {'Name': 'l'}, data)
119
120     def _is_pulled(self, image):
121         return self.image_registry_name(image) in self._pulled_images
122
123     def _is_saved(self, image):
124         dst = '{}/{}'.format(self._data_list[image], self._image_filename(image))
125         return os.path.isfile(dst)
126
127     def _is_missing(self, item):
128         """
129         Missing docker images are checked slightly differently.
130         """
131         pass
132
133     def missing(self):
134         """
135         Get dictionary of images not present locally.
136         """
137         missing = dict()
138         for image, dst in self._data_list.items():
139             pulled = self._is_pulled(image)
140             if self._save:
141                 # if pulling and save is True. Save every pulled image to assure parity
142                 saved = False if not pulled else self._is_saved(image)
143             else:
144                 saved = 'N/A'
145             if not pulled or not saved:
146                 missing[image] = {'dst': dst, 'pulled': pulled, 'saved': saved}
147         self._missing = missing
148         return self._missing
149
150     @retry(stop_max_attempt_number=5, wait_fixed=5000)
151     def _pull_image(self, image_name):
152         """
153         Pull docker image.
154         :param image_name: name of the image to be pulled
155         :return: pulled image (image object)
156         :raises docker.errors.APIError: after unsuccessful retries
157         """
158         if ':' not in image_name.rsplit('/')[-1]:
159             image_name = '{}:latest'.format(image_name)
160         try:
161             if self._mirror:
162                 # if docker mirroring repository is set
163                 image_name_split = image_name.split('/')
164                 if (len(image_name_split) > 1) \
165                    and (image_name_split[0].find(".")) >= 0 \
166                    and not (image_name.startswith('docker.io/')) \
167                    and not (image_name.startswith(self._mirror)) \
168                    and (image_name_split[0] not in self._mirror_exclude):
169                     # if image originates from private registry and its name does not start with 'docker.io'
170                     # and it does not originate from excluded registry
171                     # and docker mirror name differs from private registry name
172                     # -> download image from docker mirror and retag it to its original name
173                     mirrored_image_name = self._mirror + "/" + '/'.join(image_name_split[1:])
174                     img = self._docker_client.images.pull(mirrored_image_name)
175                     self._docker_client.images.model.tag(img, image_name)
176                     # untag the image pulled from mirror
177                     self._docker_client.images.remove(mirrored_image_name)
178                     image = self._docker_client.images.get(image_name)
179                 else:
180                     image = self._docker_client.images.pull(image_name)
181             else:
182                 image = self._docker_client.images.pull(image_name)
183             log.info('Image {} pulled'.format(image_name))
184             return image
185         except docker.errors.APIError as err:
186             log.warning('Failed: {}: {}. Retrying...'.format(image_name, err))
187             raise err
188
189     def _save_image(self, image_name, image, output_dir):
190         """
191         Save image to tar.
192         :param output_dir: path to destination directory
193         :param image: image object from pull_image function
194         :param image_name: name of the image from list
195         """
196         dst = '{}/{}'.format(output_dir, self._image_filename(image_name))
197         os.makedirs(output_dir, exist_ok=True)
198         try:
199             with open(dst, 'wb') as f:
200                 for chunk in image.save(named=self.image_registry_name(image_name)):
201                     f.write(chunk)
202             log.info('Image {} saved as {}'.format(image_name, dst))
203         except Exception as err:
204             if os.path.isfile(dst):
205                 os.remove(dst)
206             raise err
207
208     def _download_item(self, image):
209         """ Pull and save docker image from specified docker registry
210         :param image: image to be downloaded
211         """
212         image_name, image_dict = image
213         log.info('Downloading image: {}'.format(image_name))
214         try:
215             if image_dict['pulled']:
216                 image_to_save = self._docker_client.images.get(image_name)
217             else:
218                 image_to_save = self._pull_image(image_name)
219             if self._save:
220                 self._save_image(image_name, image_to_save, image_dict['dst'])
221         except Exception as err:
222             log.exception('Error downloading {}: {}'.format(image_name, err))
223             raise err
224
225
226 def run_cli():
227     parser = argparse.ArgumentParser(description='Download docker images from list')
228     parser.add_argument('image_list', metavar='image-list',
229                         help='File with list of images to download.')
230     parser.add_argument('--save', '-s', action='store_true', default=False,
231                         help='Save images (without it only pull is executed)')
232     parser.add_argument('--output-dir', '-o', default=os.getcwd(),
233                         help='Download destination')
234     parser.add_argument('--private-registry-mirror', default=None, metavar='HOST:PORT',
235                         help='Address of docker mirroring repository that caches images'
236                              ' from private registries to get those images from')
237     parser.add_argument('--private-registry-exclude', action='append', default=[], metavar='REGISTRY_NAME',
238                         help='The name of a private registry to exclude when using --private-registry-mirror.'
239                              ' Images that originate from excluded registry will not be'
240                              ' pulled from mirroring repository. This option can be used multiple times.')
241     parser.add_argument('--check', '-c', action='store_true', default=False,
242                         help='Check what is missing. No download.'
243                              'Use with combination with -s to check saved images as well.')
244     parser.add_argument('--debug', action='store_true', default=False,
245                         help='Turn on debug output')
246     parser.add_argument('--workers', type=int, default=3,
247                         help='Set maximum workers for parallel download (default: 3)')
248
249     args = parser.parse_args()
250
251     if args.debug:
252         logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
253     else:
254         logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
255
256     downloader = DockerDownloader(args.save, [args.image_list, args.output_dir], mirror=args.private_registry_mirror, mirror_exclude=args.private_registry_exclude, workers=args.workers)
257
258     if args.check:
259         log.info('Check mode. No download will be executed.')
260         log.info(downloader.check_table)
261         sys.exit(0)
262
263     timer_start = timeit.default_timer()
264     try:
265         downloader.download()
266     except RuntimeError:
267         sys.exit(1)
268     finally:
269         log.info('Downloading finished in {}'.format(
270             datetime.timedelta(seconds=timeit.default_timer() - timer_start)))
271
272
273 if __name__ == '__main__':
274     run_cli()