1 #! /usr/bin/env python3
2 # -*- coding: utf-8 -*-
4 # COPYRIGHT NOTICE STARTS HERE
6 # Copyright 2022 © Samsung Electronics Co., Ltd.
8 # Licensed under the Apache License, Version 2.0 (the "License");
9 # you may not use this file except in compliance with the License.
10 # You may obtain a copy of the License at
12 # http://www.apache.org/licenses/LICENSE-2.0
14 # Unless required by applicable law or agreed to in writing, software
15 # distributed under the License is distributed on an "AS IS" BASIS,
16 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 # See the License for the specific language governing permissions and
18 # limitations under the License.
20 # COPYRIGHT NOTICE ENDS HERE
31 from retrying import retry
33 from concurrent_downloader import ConcurrentDownloader
35 log = logging.getLogger(__name__)
38 class DockerDownloader(ConcurrentDownloader):
39 def __init__(self, save, *list_args, mirror=None, mirror_exclude=[], workers=3):
41 :param mirror: private repository mirror address (ip:port)
45 self._mirror_exclude = mirror_exclude
47 # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB)
48 self._docker_client = docker.from_env(timeout=300)
49 except docker.errors.DockerException as err:
51 'Error creating docker client. Check if docker is installed and running'
52 ' or if you have right permissions.')
54 self._pulled_images = set(itertools.chain.from_iterable((image.tags for image
55 in self._docker_client.images.list())))
56 list_args = ([*x, None] if len(x) < 2 else x for x in list_args)
57 super().__init__('docker images', *list_args, workers=workers)
60 def image_registry_name(image_name):
62 Get the name as shown in local registry. Since some strings are not part of name
63 when using default registry e.g. docker.io
64 :param image_name: name of the image from the list
65 :return: name of the image as it is shown by docker
69 if name.startswith('docker.io/'):
70 name = name.replace('docker.io/', '')
72 if name.startswith('library/'):
73 name = name.replace('library/', '')
75 if ':' not in name.rsplit('/')[-1]:
76 name = '{}:latest'.format(name)
81 def check_table(self):
83 Table showing information of which images are pulled/saved
86 return self._table(self._data_list)
91 Table showing information about state of download of images
92 that encountered problems while downloading
94 return self._table(self.missing())
97 def _image_filename(image_name):
99 Get a name of a file where image will be saved.
100 :param image_name: Name of the image from list
101 :return: Filename of the image
103 return '{}.tar'.format(image_name.replace(':', '_').replace('/', '_'))
105 def _table(self, images):
107 Get table in format for images
108 :param images: images to put into table
109 :return: check table format with specified images
111 header = ['Name', 'Pulled', 'Saved']
114 if item not in self._missing:
115 data.append((item, True, True if self._save else 'N/A'))
117 data.append((item, self._missing[item]['pulled'], self._missing[item]['saved']))
118 return self._check_table(header, {'Name': 'l'}, data)
120 def _is_pulled(self, image):
121 return self.image_registry_name(image) in self._pulled_images
123 def _is_saved(self, image):
124 dst = '{}/{}'.format(self._data_list[image], self._image_filename(image))
125 return os.path.isfile(dst)
127 def _is_missing(self, item):
129 Missing docker images are checked slightly differently.
135 Get dictionary of images not present locally.
138 for image, dst in self._data_list.items():
139 pulled = self._is_pulled(image)
141 # if pulling and save is True. Save every pulled image to assure parity
142 saved = False if not pulled else self._is_saved(image)
145 if not pulled or not saved:
146 missing[image] = {'dst': dst, 'pulled': pulled, 'saved': saved}
147 self._missing = missing
150 @retry(stop_max_attempt_number=5, wait_fixed=5000)
151 def _pull_image(self, image_name):
154 :param image_name: name of the image to be pulled
155 :return: pulled image (image object)
156 :raises docker.errors.APIError: after unsuccessful retries
158 if ':' not in image_name.rsplit('/')[-1]:
159 image_name = '{}:latest'.format(image_name)
162 # if docker mirroring repository is set
163 image_name_split = image_name.split('/')
164 if (len(image_name_split) > 1) \
165 and (image_name_split[0].find(".")) >= 0 \
166 and not (image_name.startswith('docker.io/')) \
167 and not (image_name.startswith(self._mirror)) \
168 and (image_name_split[0] not in self._mirror_exclude):
169 # if image originates from private registry and its name does not start with 'docker.io'
170 # and it does not originate from excluded registry
171 # and docker mirror name differs from private registry name
172 # -> download image from docker mirror and retag it to its original name
173 mirrored_image_name = self._mirror + "/" + '/'.join(image_name_split[1:])
174 img = self._docker_client.images.pull(mirrored_image_name)
175 self._docker_client.images.model.tag(img, image_name)
176 # untag the image pulled from mirror
177 self._docker_client.images.remove(mirrored_image_name)
178 image = self._docker_client.images.get(image_name)
180 image = self._docker_client.images.pull(image_name)
182 image = self._docker_client.images.pull(image_name)
183 log.info('Image {} pulled'.format(image_name))
185 except docker.errors.APIError as err:
186 log.warning('Failed: {}: {}. Retrying...'.format(image_name, err))
189 def _save_image(self, image_name, image, output_dir):
192 :param output_dir: path to destination directory
193 :param image: image object from pull_image function
194 :param image_name: name of the image from list
196 dst = '{}/{}'.format(output_dir, self._image_filename(image_name))
197 os.makedirs(output_dir, exist_ok=True)
199 with open(dst, 'wb') as f:
200 for chunk in image.save(named=self.image_registry_name(image_name)):
202 log.info('Image {} saved as {}'.format(image_name, dst))
203 except Exception as err:
204 if os.path.isfile(dst):
208 def _download_item(self, image):
209 """ Pull and save docker image from specified docker registry
210 :param image: image to be downloaded
212 image_name, image_dict = image
213 log.info('Downloading image: {}'.format(image_name))
215 if image_dict['pulled']:
216 image_to_save = self._docker_client.images.get(image_name)
218 image_to_save = self._pull_image(image_name)
220 self._save_image(image_name, image_to_save, image_dict['dst'])
221 except Exception as err:
222 log.exception('Error downloading {}: {}'.format(image_name, err))
227 parser = argparse.ArgumentParser(description='Download docker images from list')
228 parser.add_argument('image_list', metavar='image-list',
229 help='File with list of images to download.')
230 parser.add_argument('--save', '-s', action='store_true', default=False,
231 help='Save images (without it only pull is executed)')
232 parser.add_argument('--output-dir', '-o', default=os.getcwd(),
233 help='Download destination')
234 parser.add_argument('--private-registry-mirror', default=None, metavar='HOST:PORT',
235 help='Address of docker mirroring repository that caches images'
236 ' from private registries to get those images from')
237 parser.add_argument('--private-registry-exclude', action='append', default=[], metavar='REGISTRY_NAME',
238 help='The name of a private registry to exclude when using --private-registry-mirror.'
239 ' Images that originate from excluded registry will not be'
240 ' pulled from mirroring repository. This option can be used multiple times.')
241 parser.add_argument('--check', '-c', action='store_true', default=False,
242 help='Check what is missing. No download.'
243 'Use with combination with -s to check saved images as well.')
244 parser.add_argument('--debug', action='store_true', default=False,
245 help='Turn on debug output')
246 parser.add_argument('--workers', type=int, default=3,
247 help='Set maximum workers for parallel download (default: 3)')
249 args = parser.parse_args()
252 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
254 logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
256 downloader = DockerDownloader(args.save, [args.image_list, args.output_dir], mirror=args.private_registry_mirror, mirror_exclude=args.private_registry_exclude, workers=args.workers)
259 log.info('Check mode. No download will be executed.')
260 log.info(downloader.check_table)
263 timer_start = timeit.default_timer()
265 downloader.download()
269 log.info('Downloading finished in {}'.format(
270 datetime.timedelta(seconds=timeit.default_timer() - timer_start)))
273 if __name__ == '__main__':