[BUILD] Support docker mirror in download script
[oom/offline-installer.git] / build / download / docker_downloader.py
1 #! /usr/bin/env python3
2 # -*- coding: utf-8 -*-
3
4 #   COPYRIGHT NOTICE STARTS HERE
5
6 #   Copyright 2022 © Samsung Electronics Co., Ltd.
7 #
8 #   Licensed under the Apache License, Version 2.0 (the "License");
9 #   you may not use this file except in compliance with the License.
10 #   You may obtain a copy of the License at
11 #
12 #       http://www.apache.org/licenses/LICENSE-2.0
13 #
14 #   Unless required by applicable law or agreed to in writing, software
15 #   distributed under the License is distributed on an "AS IS" BASIS,
16 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 #   See the License for the specific language governing permissions and
18 #   limitations under the License.
19
20 #   COPYRIGHT NOTICE ENDS HERE
21
22 import argparse
23 import datetime
24 import itertools
25 import logging
26 import os
27 import sys
28 import timeit
29
30 import docker
31 from retrying import retry
32
33 from concurrent_downloader import ConcurrentDownloader
34
35 log = logging.getLogger(__name__)
36
37
38 class DockerDownloader(ConcurrentDownloader):
39     def __init__(self, save, *list_args, mirror=None, workers=3):
40         """
41         :param mirror: private repository mirror address (ip:port)
42         """
43         self._save = save
44         self._mirror = mirror
45         try:
46             # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB)
47             self._docker_client = docker.from_env(timeout=300)
48         except docker.errors.DockerException as err:
49             log.exception(
50                 'Error creating docker client. Check if docker is installed and running'
51                 ' or if you have right permissions.')
52             raise err
53         self._pulled_images = set(itertools.chain.from_iterable((image.tags for image
54                                                                  in self._docker_client.images.list())))
55         list_args = ([*x, None] if len(x) < 2 else x for x in list_args)
56         super().__init__('docker images', *list_args, workers=workers)
57
58     @staticmethod
59     def image_registry_name(image_name):
60         """
61         Get the name as shown in local registry. Since some strings are not part of name
62         when using default registry e.g. docker.io
63         :param image_name: name of the image from the list
64         :return: name of the image as it is shown by docker
65         """
66         name = image_name
67
68         if name.startswith('docker.io/'):
69             name = name.replace('docker.io/', '')
70
71         if name.startswith('library/'):
72             name = name.replace('library/', '')
73
74         if ':' not in name.rsplit('/')[-1]:
75             name = '{}:latest'.format(name)
76
77         return name
78
79     @property
80     def check_table(self):
81         """
82         Table showing information of which images are pulled/saved
83         """
84         self.missing()
85         return self._table(self._data_list)
86
87     @property
88     def fail_table(self):
89         """
90         Table showing information about state of download of images
91         that encountered problems while downloading
92         """
93         return self._table(self.missing())
94
95     @staticmethod
96     def _image_filename(image_name):
97         """
98         Get a name of a file where image will be saved.
99         :param image_name: Name of the image from list
100         :return: Filename of the image
101         """
102         return '{}.tar'.format(image_name.replace(':', '_').replace('/', '_'))
103
104     def _table(self, images):
105         """
106         Get table in format for images
107         :param images: images to put into table
108         :return: check table format with specified images
109         """
110         header = ['Name', 'Pulled', 'Saved']
111         data = []
112         for item in images:
113             if item not in self._missing:
114                 data.append((item, True, True if self._save else 'N/A'))
115             else:
116                 data.append((item, self._missing[item]['pulled'], self._missing[item]['saved']))
117         return self._check_table(header, {'Name': 'l'}, data)
118
119     def _is_pulled(self, image):
120         return self.image_registry_name(image) in self._pulled_images
121
122     def _is_saved(self, image):
123         dst = '{}/{}'.format(self._data_list[image], self._image_filename(image))
124         return os.path.isfile(dst)
125
126     def _is_missing(self, item):
127         """
128         Missing docker images are checked slightly differently.
129         """
130         pass
131
132     def missing(self):
133         """
134         Get dictionary of images not present locally.
135         """
136         missing = dict()
137         for image, dst in self._data_list.items():
138             pulled = self._is_pulled(image)
139             if self._save:
140                 # if pulling and save is True. Save every pulled image to assure parity
141                 saved = False if not pulled else self._is_saved(image)
142             else:
143                 saved = 'N/A'
144             if not pulled or not saved:
145                 missing[image] = {'dst': dst, 'pulled': pulled, 'saved': saved}
146         self._missing = missing
147         return self._missing
148
149     @retry(stop_max_attempt_number=5, wait_fixed=5000)
150     def _pull_image(self, image_name):
151         """
152         Pull docker image.
153         :param image_name: name of the image to be pulled
154         :return: pulled image (image object)
155         :raises docker.errors.APIError: after unsuccessful retries
156         """
157         if ':' not in image_name.rsplit('/')[-1]:
158             image_name = '{}:latest'.format(image_name)
159         try:
160             if self._mirror:
161                 # if docker mirroring repository is set
162                 image_name_split = image_name.split('/')
163                 if (len(image_name_split) > 1) \
164                    and (image_name_split[0].find(".")) \
165                    and not (image_name.startswith('docker.io/')):
166                     # if image originates from private registry and its name does not start with 'docker.io'
167                     # download image from docker mirror and retag it to its original name
168                     mirrored_image_name = self._mirror + "/" + '/'.join(image_name_split[1:])
169                     img = self._docker_client.images.pull(mirrored_image_name)
170                     self._docker_client.images.model.tag(img, image_name)
171                     # untag the image pulled from mirror
172                     self._docker_client.images.remove(mirrored_image_name)
173                     image = self._docker_client.images.get(image_name)
174                 else:
175                     image = self._docker_client.images.pull(image_name)
176             else:
177                 image = self._docker_client.images.pull(image_name)
178             log.info('Image {} pulled'.format(image_name))
179             return image
180         except docker.errors.APIError as err:
181             log.warning('Failed: {}: {}. Retrying...'.format(image_name, err))
182             raise err
183
184     def _save_image(self, image_name, image, output_dir):
185         """
186         Save image to tar.
187         :param output_dir: path to destination directory
188         :param image: image object from pull_image function
189         :param image_name: name of the image from list
190         """
191         dst = '{}/{}'.format(output_dir, self._image_filename(image_name))
192         os.makedirs(output_dir, exist_ok=True)
193         try:
194             with open(dst, 'wb') as f:
195                 for chunk in image.save(named=self.image_registry_name(image_name)):
196                     f.write(chunk)
197             log.info('Image {} saved as {}'.format(image_name, dst))
198         except Exception as err:
199             if os.path.isfile(dst):
200                 os.remove(dst)
201             raise err
202
203     def _download_item(self, image):
204         """ Pull and save docker image from specified docker registry
205         :param image: image to be downloaded
206         """
207         image_name, image_dict = image
208         log.info('Downloading image: {}'.format(image_name))
209         try:
210             if image_dict['pulled']:
211                 image_to_save = self._docker_client.images.get(image_name)
212             else:
213                 image_to_save = self._pull_image(image_name)
214             if self._save:
215                 self._save_image(image_name, image_to_save, image_dict['dst'])
216         except Exception as err:
217             log.exception('Error downloading {}: {}'.format(image_name, err))
218             raise err
219
220
221 def run_cli():
222     parser = argparse.ArgumentParser(description='Download docker images from list')
223     parser.add_argument('image_list', metavar='image-list',
224                         help='File with list of images to download.')
225     parser.add_argument('--save', '-s', action='store_true', default=False,
226                         help='Save images (without it only pull is executed)')
227     parser.add_argument('--output-dir', '-o', default=os.getcwd(),
228                         help='Download destination')
229     parser.add_argument('--private-registry-mirror', default=None, metavar='IP:PORT',
230                         help='Address of docker mirroring repository that caches images'
231                              ' from private registries to get those images from')
232     parser.add_argument('--check', '-c', action='store_true', default=False,
233                         help='Check what is missing. No download.'
234                              'Use with combination with -s to check saved images as well.')
235     parser.add_argument('--debug', action='store_true', default=False,
236                         help='Turn on debug output')
237     parser.add_argument('--workers', type=int, default=3,
238                         help='Set maximum workers for parallel download (default: 3)')
239
240     args = parser.parse_args()
241
242     if args.debug:
243         logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
244     else:
245         logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
246
247     downloader = DockerDownloader(args.save, [args.image_list, args.output_dir], mirror=args.private_registry_mirror, workers=args.workers)
248
249     if args.check:
250         log.info('Check mode. No download will be executed.')
251         log.info(downloader.check_table)
252         sys.exit(0)
253
254     timer_start = timeit.default_timer()
255     try:
256         downloader.download()
257     except RuntimeError:
258         sys.exit(1)
259     finally:
260         log.info('Downloading finished in {}'.format(
261             datetime.timedelta(seconds=timeit.default_timer() - timer_start)))
262
263
264 if __name__ == '__main__':
265     run_cli()