Add base download script 45/88345/18
authorMilan Verespej <m.verespej@partner.samsung.com>
Thu, 23 May 2019 12:21:19 +0000 (14:21 +0200)
committerMilan Verespej <m.verespej@partner.samsung.com>
Tue, 4 Jun 2019 13:46:42 +0000 (15:46 +0200)
This script is supposed to be used for convenience when
downloading data from multiple lists at once.

Issue-ID: OOM-1803

Change-Id: I4031ed3650f7880883e299b43c79e6bfd08c886c
Signed-off-by: Milan Verespej <m.verespej@partner.samsung.com>
build/download/base.py
build/download/docker_images.py
build/download/download.py [new file with mode: 0755]
build/download/git_repos.py
build/download/http_files.py
build/download/npm_packages.py
build/download/rpm_packages.py

index 5bcd0ef..d8b4483 100644 (file)
@@ -38,7 +38,8 @@ def load_list(item_list):
     :return: set of items from file
     """
     with open(item_list, 'r') as f:
-        return {item for item in (line.strip() for line in f) if item}
+        return {item for item in (line.strip() for line in f)
+                if item and not item.startswith('#')}
 
 
 def init_progress(items_name):
index e4e742b..d8138dd 100755 (executable)
@@ -180,7 +180,7 @@ def download_docker_image(image, save, output_dir, docker_client):
         if save:
             save_image(image, pulled_image, output_dir)
     except Exception as err:
-        log.error('Error downloading {}: {}'.format(image, err))
+        log.exception('Error downloading {}: {}'.format(image, err))
         raise err
 
 
@@ -195,10 +195,10 @@ def download(image_list, save, output_dir, check_mode, progress, workers=3):
     :return: None
     """
     try:
-        docker_client = docker.client.DockerClient(version='auto')
+        # big timeout in case of massive images like pnda-mirror-container:5.0.0 (11.4GB)
+        docker_client = docker.client.DockerClient(version='auto', timeout=300)
     except docker.errors.DockerException as err:
-        log.error(err)
-        log.error('Error creating docker client. Check if is docker installed and running'
+        log.exception('Error creating docker client. Check if is docker installed and running'
                   ' or if you have right permissions.')
         raise err
 
@@ -221,14 +221,12 @@ def download(image_list, save, output_dir, check_mode, progress, workers=3):
                                        missing_images['not_saved'] - missing_images['not_pulled'],
                                        None, output_dir, docker_client)
 
+    base.finish_progress(progress, error_count, log)
     if error_count > 0:
         log.error('{} images were not downloaded'.format(error_count))
         missing_images = missing(docker_client, target_images, save, output_dir)
         log.info(check_table(merge_dict_sets(missing_images), missing_images, save))
-
-    base.finish_progress(progress, error_count, log)
-
-    return error_count
+        raise RuntimeError()
 
 
 def run_cli():
@@ -256,11 +254,13 @@ def run_cli():
 
     progress = base.init_progress('Docker images') if not args.check else None
     try:
-        sys.exit(download(args.image_list, args.save, args.output_dir, args.check,
-                 progress, args.workers))
+        download(args.image_list, args.save, args.output_dir, args.check,
+                 progress, args.workers)
     except docker.errors.DockerException:
-        log.error('Irrecoverable error detected.')
+        log.exception('Irrecoverable error detected.')
         sys.exit(1)
+    except RuntimeError as err:
+        log.exception(err)
 
 
 if __name__ == '__main__':
diff --git a/build/download/download.py b/build/download/download.py
new file mode 100755 (executable)
index 0000000..ebce931
--- /dev/null
@@ -0,0 +1,158 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#   COPYRIGHT NOTICE STARTS HERE
+
+#   Copyright 2019 © Samsung Electronics Co., Ltd.
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+#   COPYRIGHT NOTICE ENDS HERE
+
+import argparse
+import logging
+import sys
+import datetime
+import timeit
+
+import base
+import docker_images
+import git_repos
+import http_files
+import npm_packages
+import rpm_packages
+
+log = logging.getLogger(name=__name__)
+
+def parse_args():
+    parser=argparse.ArgumentParser(description='Download data from lists')
+    list_group = parser.add_argument_group()
+    list_group.add_argument('--docker', action='append', nargs='+', default=[],
+                        metavar=('list', 'dir-name'),
+                        help='Docker type list. If second argument is specified '
+                             'it is treated as directory where images will be saved '
+                             'otherwise only pull operation is executed')
+    list_group.add_argument('--http', action='append', nargs=2, default=[],
+                        metavar=('list', 'dir-name'),
+                        help='Http type list and directory to save downloaded files')
+    list_group.add_argument('--npm', action='append', nargs=2, default=[],
+                        metavar=('list', 'dir-name'),
+                        help='npm type list and directory to save downloaded files')
+    list_group.add_argument('--rpm', action='append', nargs=2, default=[],
+                        metavar=('list', 'dir-name'),
+                        help='rpm type list and directory to save downloaded files')
+    list_group.add_argument('--git', action='append', nargs=2, default=[],
+                        metavar=('list', 'dir-name'),
+                        help='git repo type list and directory to save downloaded files')
+    parser.add_argument('--npm-registry', default='https://registry.npmjs.org',
+                        help='npm registry to use (default: https://registry.npmjs.org)')
+    parser.add_argument('--check', '-c', action='store_true', default=False,
+                        help='Check what is missing. No download.')
+    parser.add_argument('--debug', action='store_true', default=False,
+                        help='Turn on debug output')
+
+    args = parser.parse_args()
+
+    for arg in ('docker', 'npm', 'http', 'rpm', 'git'):
+        if getattr(args, arg):
+            return args
+
+    parser.error('One of --docker, --npm, --http, --rpm, --git must be specified')
+
+
+def run_cli():
+    args = parse_args()
+
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_formatter = logging.Formatter('%(message)s')
+    console_handler.setFormatter(console_formatter)
+    now = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
+    log_file = 'download_data-{}.log'.format(now)
+    file_format = "%(asctime)s: %(filename)s: %(levelname)s: %(message)s"
+
+    if args.debug:
+        logging.basicConfig(level=logging.DEBUG, filename=log_file, format=file_format)
+    else:
+        logging.basicConfig(level=logging.INFO, filename=log_file, format=file_format)
+    root_logger = logging.getLogger()
+    root_logger.addHandler(console_handler)
+
+    list_with_errors = []
+    timer_start = timeit.default_timer()
+
+    for docker_list in args.docker:
+        log.info('Processing {}.'.format(docker_list[0]))
+        progress = None if args.check else base.init_progress('docker images')
+        save = False
+        if len(docker_list) > 1:
+            save = True
+        else:
+            docker_list.append(None)
+        try:
+            docker_images.download(docker_list[0], save,
+                                   docker_list[1], args.check, progress)
+        except RuntimeError:
+            list_with_errors.append(docker_list[0])
+
+    for http_list in args.http:
+        progress = None if args.check else base.init_progress('http files')
+        log.info('Processing {}.'.format(http_list[0]))
+        try:
+            http_files.download(http_list[0], http_list[1], args.check,
+                                progress)
+        except RuntimeError:
+            list_with_errors.append(http_list[0])
+
+    for npm_list in args.npm:
+        progress = None if args.check else base.init_progress('npm packages')
+        log.info('Processing {}.'.format(npm_list[0]))
+        try:
+            npm_packages.download(npm_list[0], args.npm_registry, npm_list[1],
+                                  args.check, progress)
+        except RuntimeError:
+            list_with_errors.append(npm_list[0])
+
+    for rpm_list in args.rpm:
+        if args.check:
+            log.info('Check mode for rpm packages is not implemented')
+            break
+        log.info('Processing {}.'.format(rpm_list[0]))
+        try:
+            rpm_packages.download(rpm_list[0], rpm_list[1])
+        except RuntimeError:
+            list_with_errors.append(rpm_list[0])
+
+    for git_list in args.git:
+        if args.check:
+            log.info('Check mode for git repositories is not implemented')
+            break
+        progress = None if args.check else base.init_progress('git repositories')
+        log.info('Processing {}.'.format(git_list[0]))
+        try:
+            git_repos.download(git_list[0], git_list[1], progress)
+        except RuntimeError:
+            list_with_errors.append(git_list[0])
+
+    e_time = datetime.timedelta(seconds=timeit.default_timer() - timer_start)
+    log.info(timeit.default_timer() - timer_start)
+    log.info('Execution ended. Total elapsed time {}'.format(e_time))
+
+    if list_with_errors:
+        log.error('Errors encountered while processing these lists:'
+                  '\n{}'.format('\n'.join(list_with_errors)))
+        sys.exit(1)
+
+
+
+if __name__ == '__main__':
+    run_cli()
index e388e94..aff01b8 100755 (executable)
@@ -45,10 +45,9 @@ def download(git_list, dst_dir, progress):
     if not base.check_tool('git'):
         log.error('ERROR: git is not installed')
         progress.finish(dirty=True)
-        return 1
+        raise RuntimeError('git missing')
 
-    git_set = {tuple(item.split()) for item in base.load_list(git_list)
-               if not item.startswith('#')}
+    git_set = {tuple(item.split()) for item in base.load_list(git_list)}
 
     error_count = 0
 
@@ -64,14 +63,13 @@ def download(git_list, dst_dir, progress):
             clone_repo(dst, *repo)
             progress.update(progress.value + 1)
         except subprocess.CalledProcessError as err:
-            log.error(err.output.decode())
+            log.exception(err.output.decode())
             error_count += 1
 
     base.finish_progress(progress, error_count, log)
     if error_count > 0:
         log.error('{} were not downloaded. Check logs for details'.format(error_count))
-    return error_count
-
+        raise RuntimeError('Download unsuccesfull')
 
 def run_cli():
     parser = argparse.ArgumentParser(description='Download git repositories from list')
@@ -85,8 +83,11 @@ def run_cli():
     logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
 
     progress = base.init_progress('git repositories')
-
-    sys.exit(download(args.git_list, args.output_dir, progress))
+    try:
+        download(args.git_list, args.output_dir, progress)
+    except RuntimeError as err:
+        log.exception(err)
+        sys.exit(1)
 
 
 if __name__ == '__main__':
index f5b1e59..c83158d 100755 (executable)
@@ -83,7 +83,7 @@ def download(data_list, dst_dir, check, progress, workers=None):
 
     if check:
         log.info(base.simple_check_table(file_set, missing_files))
-        return 0
+        return
 
     skipping = file_set - missing_files
 
@@ -91,12 +91,11 @@ def download(data_list, dst_dir, check, progress, workers=None):
 
     error_count = base.run_concurrent(workers, progress, download_file, missing_files, dst_dir)
 
+    base.finish_progress(progress, error_count, log)
     if error_count > 0:
         log.error('{} files were not downloaded. Check log for specific failures.'.format(error_count))
+        raise RuntimeError()
 
-    base.finish_progress(progress, error_count, log)
-
-    return error_count
 
 def run_cli():
     """
@@ -123,7 +122,10 @@ def run_cli():
 
     progress = base.init_progress('http files') if not args.check else None
 
-    sys.exit(download(args.file_list, args.output_dir, args.check, progress, args.workers))
+    try:
+        download(args.file_list, args.output_dir, args.check, progress, args.workers)
+    except RuntimeError:
+        sys.exit(1)
 
 
 if __name__ == '__main__':
index c174e2c..70c03ad 100755 (executable)
@@ -57,7 +57,7 @@ def download_npm(npm, registry, dst_dir):
     except Exception as err:
         if os.path.isfile(dst_path):
             os.remove(dst_path)
-        log.error('Failed: {}: {}'.format(npm, err))
+        log.exception('Failed: {}'.format(npm))
         raise err
     log.info('Downloaded: {}'.format(npm))
 
@@ -81,12 +81,10 @@ def download(npm_list, registry, dst_dir, check_mode, progress=None, workers=Non
     base.start_progress(progress, len(npm_set), skipping, log)
     error_count = base.run_concurrent(workers, progress, download_npm, missing_npms, registry, dst_dir)
 
+    base.finish_progress(progress, error_count, log)
     if error_count > 0:
         log.error('{} packages were not downloaded. Check log for specific failures.'.format(error_count))
-
-    base.finish_progress(progress, error_count, log)
-
-    return error_count
+        raise RuntimeError()
 
 
 def run_cli():
index 7f9700a..732af0e 100755 (executable)
@@ -33,7 +33,7 @@ log = logging.getLogger(name=__name__)
 def download(rpm_list, dst_dir):
     if not base.check_tool('yumdownloader'):
         log.error('ERROR: yumdownloader is not installed')
-        return 1
+        raise RuntimeError('yumdownloader missing')
 
     rpm_set = base.load_list(rpm_list)
 
@@ -41,11 +41,10 @@ def download(rpm_list, dst_dir):
     log.info('Running command: {}'.format(command))
     try:
         subprocess.check_call(command.split())
-        log.info('Downloaded')
     except subprocess.CalledProcessError as err:
-        log.error(err.output)
-        return err.returncode
-
+        log.exception(err.output)
+        raise err
+    log.info('Downloaded')
 
 
 def run_cli():
@@ -59,7 +58,11 @@ def run_cli():
 
     logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
 
-    sys.exit(download(args.rpm_list, args.output_dir))
+    try:
+        download(args.rpm_list, args.output_dir)
+    except (subprocess.CalledProcessError, RuntimeError):
+        sys.exit(1)
+
 
 
 if __name__ == '__main__':