[virt-tools-list] [virt-bootstrap] [PATCH v6 06/26] DockerSource: Split checksum and layers

Sat Aug 19 12:30:07 UTC 2017

On Thu, 2017-08-17 at 10:39 +0100, Radostin Stoyanov wrote:
> The current implementation store in one list:
>     - checksum
>     - checksum type
>     - file path
>     - file size
> However, the information about checksum and checksum type is only used
> to verify the content of tarball before it is being extracted. Splitting
> these into separate lists would allow us to reuse the function
> untar_layers() with FileSource.
> ---
>  src/virtBootstrap/sources/docker_source.py | 12 ++++++++++--
>  src/virtBootstrap/utils.py                 | 19 +++++++++----------
>  tests/docker_source.py                     | 16 +++++++++-------
>  3 files changed, 28 insertions(+), 19 deletions(-)
> 
> diff --git a/src/virtBootstrap/sources/docker_source.py b/src/virtBootstrap/sources/docker_source.py
> index 54d8903..246356a 100644
> --- a/src/virtBootstrap/sources/docker_source.py
> +++ b/src/virtBootstrap/sources/docker_source.py
> @@ -65,6 +65,7 @@ class DockerSource(object):
>          self.images_dir = utils.get_image_dir(self.no_cache)
>          self.manifest = None
>          self.layers = []
> +        self.checksums = []
>  
>          if self.username and not self.password:
>              self.password = getpass.getpass()
> @@ -94,9 +95,13 @@ class DockerSource(object):
>              layer_digest = layer[digest_field]
>              layer_size = layer['size'] if 'size' in layer else None
>  
> +            # Store checksums of layers
>              sum_type, layer_sum = layer_digest.split(':')
> +            self.checksums.append([sum_type, layer_sum])
> +
> +            # Store file path and size of each layer
>              file_path = os.path.join(self.images_dir, layer_sum + '.tar')
> -            self.layers.append([sum_type, layer_sum, file_path, layer_size])
> +            self.layers.append([file_path, layer_size])
>  
>      def gen_valid_uri(self, uri):
>          """
> @@ -230,7 +235,10 @@ class DockerSource(object):
>          and have valid hash sum.
>          """
>          self.progress("Checking cached layers", value=0, logger=logger)
> -        for sum_type, sum_expected, path, _ignore in self.layers:
> +        for index, checksum in enumerate(self.checksums):
> +            path = self.layers[index][0]
> +            sum_type, sum_expected = checksum
> +
>              logger.debug("Checking layer: %s", path)
>              if not (os.path.exists(path)
>                      and utils.checksum(path, sum_type, sum_expected)):
> diff --git a/src/virtBootstrap/utils.py b/src/virtBootstrap/utils.py
> index fff7b35..730f939 100644
> --- a/src/virtBootstrap/utils.py
> +++ b/src/virtBootstrap/utils.py
> @@ -143,17 +143,16 @@ def size_to_bytes(number, fmt):
>              else False)
>  
>  
> -def log_layer_extract(layer, current, total, progress):
> +def log_layer_extract(tar_file, tar_size, current, total, progress):
>      """
>      Create log message on layer extract.
>      """
> -    sum_type, sum_value, layer_file, layer_size = layer
>      msg = 'Extracting layer (%s/%s)' % (current, total)
>  
> -    if layer_size:
> -        msg += " with size: %s" % bytes_to_size(layer_size)
> +    if tar_size:
> +        msg += " with size: %s" % bytes_to_size(tar_size)
>      progress(msg, logger=logger)
> -    logger.debug('Untar layer: (%s:%s) %s', sum_type, sum_value, layer_file)
> +    logger.debug('Untar layer: %s', tar_file)
>  
>  
>  def untar_layers(layers_list, dest_dir, progress):
> @@ -162,11 +161,11 @@ def untar_layers(layers_list, dest_dir, progress):
>      """
>      nlayers = len(layers_list)
>      for index, layer in enumerate(layers_list):
> -        log_layer_extract(layer, index + 1, nlayers, progress)
> -        layer_file = layer[2]
> +        tar_file, tar_size = layer
> +        log_layer_extract(tar_file, tar_size, index + 1, nlayers, progress)
>  
>          # Extract layer tarball into destination directory
> -        safe_untar(layer_file, dest_dir)
> +        safe_untar(tar_file, dest_dir)
>  
>          # Update progress value
>          progress(value=(float(index + 1) / nlayers * 50) + 50)
> @@ -243,8 +242,8 @@ def extract_layers_in_qcow2(layers_list, dest_dir, progress):
>  
>      nlayers = len(layers_list)
>      for index, layer in enumerate(layers_list):
> -        log_layer_extract(layer, index + 1, nlayers, progress)
> -        tar_file = layer[2]
> +        tar_file, tar_size = layer
> +        log_layer_extract(tar_file, tar_size, index + 1, nlayers, progress)
>  
>          # Name format for the qcow2 image
>          qcow2_layer_file = "{}/layer-{}.qcow2".format(dest_dir, index)
> diff --git a/tests/docker_source.py b/tests/docker_source.py
> index cf2b2d2..ee294c1 100644
> --- a/tests/docker_source.py
> +++ b/tests/docker_source.py
> @@ -483,12 +483,14 @@ class TestDockerSource(unittest.TestCase):
>          }
>  
>          expected_result = [
> -            ['sha256', 'a7050fc1', '/images_path/a7050fc1.tar', None],
> -            ['sha256', 'c6ff40b6', '/images_path/c6ff40b6.tar', None],
> -            ['sha256', '75c416ea', '/images_path/75c416ea.tar', None]
> +            ['/images_path/a7050fc1.tar', None],
> +            ['/images_path/c6ff40b6.tar', None],
> +            ['/images_path/75c416ea.tar', None]
>          ]
>  
> -        src_instance = self._mock_retrieve_layers_info(manifest, kwargs)[0]
> +        with mock.patch('os.path.getsize') as m_getsize:
> +            m_getsize.return_value = None
> +            src_instance = self._mock_retrieve_layers_info(manifest, kwargs)[0]
>          self.assertEqual(src_instance.layers, expected_result)
>  
>      def test_retrieve_layers_info_schema_version_2(self):
> @@ -512,9 +514,9 @@ class TestDockerSource(unittest.TestCase):
>          }
>  
>          expected_result = [
> -            ['sha256', '75c416ea', '/images_path/75c416ea.tar', 47103294],
> -            ['sha256', 'c6ff40b6', '/images_path/c6ff40b6.tar', 814],
> -            ['sha256', 'a7050fc1', '/images_path/a7050fc1.tar', 513]
> +            ['/images_path/75c416ea.tar', 47103294],
> +            ['/images_path/c6ff40b6.tar', 814],
> +            ['/images_path/a7050fc1.tar', 513]
>          ]
>  
>          src_instance = self._mock_retrieve_layers_info(manifest, kwargs)[0]

Would be good to add the call to untar_layers() in the commit rather than have it
swamped in a future one.

ACK with this addition

--
Cedric