Merge pull request #5583 from docker/Rozelette-5465-no-image-error

Recover from ImageNotFound when recreating service
This commit is contained in:
Joffrey F 2018-01-19 15:37:08 -08:00 committed by GitHub
commit c97dbf260b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 138 additions and 63 deletions

View File

@ -971,25 +971,42 @@ class TopLevelCommand(object):
if ignore_orphans and remove_orphans: if ignore_orphans and remove_orphans:
raise UserError("COMPOSE_IGNORE_ORPHANS and --remove-orphans cannot be combined.") raise UserError("COMPOSE_IGNORE_ORPHANS and --remove-orphans cannot be combined.")
if no_start: opts = ['-d', '--abort-on-container-exit', '--exit-code-from']
for excluded in ['-d', '--abort-on-container-exit', '--exit-code-from']: for excluded in [x for x in opts if options.get(x) and no_start]:
if options.get(excluded): raise UserError('--no-start and {} cannot be combined.'.format(excluded))
raise UserError('--no-start and {} cannot be combined.'.format(excluded))
with up_shutdown_context(self.project, service_names, timeout, detached): with up_shutdown_context(self.project, service_names, timeout, detached):
to_attach = self.project.up( warn_for_swarm_mode(self.project.client)
service_names=service_names,
start_deps=start_deps, def up(rebuild):
strategy=convergence_strategy_from_opts(options), return self.project.up(
do_build=build_action_from_opts(options), service_names=service_names,
timeout=timeout, start_deps=start_deps,
detached=detached, strategy=convergence_strategy_from_opts(options),
remove_orphans=remove_orphans, do_build=build_action_from_opts(options),
ignore_orphans=ignore_orphans, timeout=timeout,
scale_override=parse_scale_args(options['--scale']), detached=detached,
start=not no_start, remove_orphans=remove_orphans,
always_recreate_deps=always_recreate_deps ignore_orphans=ignore_orphans,
) scale_override=parse_scale_args(options['--scale']),
start=not no_start,
always_recreate_deps=always_recreate_deps,
reset_container_image=rebuild,
)
try:
to_attach = up(False)
except docker.errors.ImageNotFound as e:
log.error(
"The image for the service you're trying to recreate has been removed. "
"If you continue, volume data could be lost. Consider backing up your data "
"before continuing.\n".format(e.explanation)
)
res = yesno("Continue with the new image? [yN]", False)
if res is None or not res:
raise e
to_attach = up(True)
if detached or no_start: if detached or no_start:
return return
@ -1412,3 +1429,19 @@ def build_filter(arg):
key, val = arg.split('=', 1) key, val = arg.split('=', 1)
filt[key] = val filt[key] = val
return filt return filt
def warn_for_swarm_mode(client):
info = client.info()
if info.get('Swarm', {}).get('LocalNodeState') == 'active':
if info.get('ServerVersion', '').startswith('ucp'):
# UCP does multi-node scheduling with traditional Compose files.
return
log.warn(
"The Docker Engine you're using is running in swarm mode.\n\n"
"Compose does not use swarm mode to deploy services to multiple nodes in a swarm. "
"All containers will be scheduled on the current node.\n\n"
"To deploy your application across the swarm, "
"use `docker stack deploy`.\n"
)

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
from functools import reduce from functools import reduce
import six import six
from docker.errors import ImageNotFound
from .const import LABEL_CONTAINER_NUMBER from .const import LABEL_CONTAINER_NUMBER
from .const import LABEL_PROJECT from .const import LABEL_PROJECT
@ -66,15 +67,17 @@ class Container(object):
def name(self): def name(self):
return self.dictionary['Name'][1:] return self.dictionary['Name'][1:]
@property
def project(self):
return self.labels.get(LABEL_PROJECT)
@property @property
def service(self): def service(self):
return self.labels.get(LABEL_SERVICE) return self.labels.get(LABEL_SERVICE)
@property @property
def name_without_project(self): def name_without_project(self):
project = self.labels.get(LABEL_PROJECT) if self.name.startswith('{0}_{1}'.format(self.project, self.service)):
if self.name.startswith('{0}_{1}'.format(project, self.service)):
return '{0}_{1}'.format(self.service, self.number) return '{0}_{1}'.format(self.service, self.number)
else: else:
return self.name return self.name
@ -230,10 +233,10 @@ class Container(object):
"""Rename the container to a hopefully unique temporary container name """Rename the container to a hopefully unique temporary container name
by prepending the short id. by prepending the short id.
""" """
self.client.rename( if not self.name.startswith(self.short_id):
self.id, self.client.rename(
'%s_%s' % (self.short_id, self.name) self.id, '{0}_{1}'.format(self.short_id, self.name)
) )
def inspect_if_not_inspected(self): def inspect_if_not_inspected(self):
if not self.has_been_inspected: if not self.has_been_inspected:
@ -250,6 +253,21 @@ class Container(object):
self.has_been_inspected = True self.has_been_inspected = True
return self.dictionary return self.dictionary
def image_exists(self):
try:
self.client.inspect_image(self.image)
except ImageNotFound:
return False
return True
def reset_image(self, img_id):
""" If this container's image has been removed, temporarily replace the old image ID
with `img_id`.
"""
if not self.image_exists():
self.dictionary['Image'] = img_id
def attach(self, *args, **kwargs): def attach(self, *args, **kwargs):
return self.client.attach(self.id, *args, **kwargs) return self.client.attach(self.id, *args, **kwargs)

View File

@ -8,6 +8,7 @@ from threading import Semaphore
from threading import Thread from threading import Thread
from docker.errors import APIError from docker.errors import APIError
from docker.errors import ImageNotFound
from six.moves import _thread as thread from six.moves import _thread as thread
from six.moves.queue import Empty from six.moves.queue import Empty
from six.moves.queue import Queue from six.moves.queue import Queue
@ -53,10 +54,7 @@ def parallel_execute(objects, func, get_name, msg, get_deps=None, limit=None, pa
writer = ParallelStreamWriter(stream, msg) writer = ParallelStreamWriter(stream, msg)
if parent_objects: display_objects = list(parent_objects) if parent_objects else objects
display_objects = list(parent_objects)
else:
display_objects = objects
for obj in display_objects: for obj in display_objects:
writer.add_object(get_name(obj)) writer.add_object(get_name(obj))
@ -76,6 +74,12 @@ def parallel_execute(objects, func, get_name, msg, get_deps=None, limit=None, pa
if exception is None: if exception is None:
writer.write(get_name(obj), 'done', green) writer.write(get_name(obj), 'done', green)
results.append(result) results.append(result)
elif isinstance(exception, ImageNotFound):
# This is to bubble up ImageNotFound exceptions to the client so we
# can prompt the user if they want to rebuild.
errors[get_name(obj)] = exception.explanation
writer.write(get_name(obj), 'error', red)
error_to_reraise = exception
elif isinstance(exception, APIError): elif isinstance(exception, APIError):
errors[get_name(obj)] = exception.explanation errors[get_name(obj)] = exception.explanation
writer.write(get_name(obj), 'error', red) writer.write(get_name(obj), 'error', red)

View File

@ -444,9 +444,8 @@ class Project(object):
scale_override=None, scale_override=None,
rescale=True, rescale=True,
start=True, start=True,
always_recreate_deps=False): always_recreate_deps=False,
reset_container_image=False):
warn_for_swarm_mode(self.client)
self.initialize() self.initialize()
if not ignore_orphans: if not ignore_orphans:
@ -474,7 +473,8 @@ class Project(object):
scale_override=scale_override.get(service.name), scale_override=scale_override.get(service.name),
rescale=rescale, rescale=rescale,
start=start, start=start,
project_services=scaled_services project_services=scaled_services,
reset_container_image=reset_container_image
) )
def get_deps(service): def get_deps(service):
@ -686,22 +686,6 @@ def get_secrets(service, service_secrets, secret_defs):
return secrets return secrets
def warn_for_swarm_mode(client):
info = client.info()
if info.get('Swarm', {}).get('LocalNodeState') == 'active':
if info.get('ServerVersion', '').startswith('ucp'):
# UCP does multi-node scheduling with traditional Compose files.
return
log.warn(
"The Docker Engine you're using is running in swarm mode.\n\n"
"Compose does not use swarm mode to deploy services to multiple nodes in a swarm. "
"All containers will be scheduled on the current node.\n\n"
"To deploy your application across the swarm, "
"use `docker stack deploy`.\n"
)
class NoSuchService(Exception): class NoSuchService(Exception):
def __init__(self, name): def __init__(self, name):
if isinstance(name, six.binary_type): if isinstance(name, six.binary_type):

View File

@ -468,7 +468,9 @@ class Service(object):
) )
def execute_convergence_plan(self, plan, timeout=None, detached=False, def execute_convergence_plan(self, plan, timeout=None, detached=False,
start=True, scale_override=None, rescale=True, project_services=None): start=True, scale_override=None,
rescale=True, project_services=None,
reset_container_image=False):
(action, containers) = plan (action, containers) = plan
scale = scale_override if scale_override is not None else self.scale_num scale = scale_override if scale_override is not None else self.scale_num
containers = sorted(containers, key=attrgetter('number')) containers = sorted(containers, key=attrgetter('number'))
@ -486,6 +488,12 @@ class Service(object):
scale = None scale = None
if action == 'recreate': if action == 'recreate':
if reset_container_image:
# Updating the image ID on the container object lets us recover old volumes if
# the new image uses them as well
img_id = self.image()['Id']
for c in containers:
c.reset_image(img_id)
return self._execute_convergence_recreate( return self._execute_convergence_recreate(
containers, scale, timeout, detached, start containers, scale, timeout, detached, start
) )
@ -507,12 +515,7 @@ class Service(object):
raise Exception("Invalid action: {}".format(action)) raise Exception("Invalid action: {}".format(action))
def recreate_container( def recreate_container(self, container, timeout=None, attach_logs=False, start_new_container=True):
self,
container,
timeout=None,
attach_logs=False,
start_new_container=True):
"""Recreate a container. """Recreate a container.
The original container is renamed to a temporary name so that data The original container is renamed to a temporary name so that data
@ -1316,6 +1319,7 @@ def get_container_data_volumes(container, volumes_option, tmpfs_option, mounts_o
a mapping of volume bindings for those volumes. a mapping of volume bindings for those volumes.
Anonymous volume mounts are updated in place instead. Anonymous volume mounts are updated in place instead.
""" """
volumes = [] volumes = []
volumes_option = volumes_option or [] volumes_option = volumes_option or []

View File

@ -10,6 +10,7 @@ from os import path
import pytest import pytest
from docker.errors import APIError from docker.errors import APIError
from docker.errors import ImageNotFound
from six import StringIO from six import StringIO
from six import text_type from six import text_type
@ -659,6 +660,35 @@ class ServiceTest(DockerClientTestCase):
assert len(service_containers) == 1 assert len(service_containers) == 1
assert not service_containers[0].is_running assert not service_containers[0].is_running
def test_execute_convergence_plan_image_with_volume_is_removed(self):
service = self.create_service(
'db', build={'context': 'tests/fixtures/dockerfile-with-volume'}
)
old_container = create_and_start_container(service)
assert (
[mount['Destination'] for mount in old_container.get('Mounts')] ==
['/data']
)
volume_path = old_container.get_mount('/data')['Source']
old_container.stop()
self.client.remove_image(service.image(), force=True)
service.ensure_image_exists()
with pytest.raises(ImageNotFound):
service.execute_convergence_plan(
ConvergencePlan('recreate', [old_container])
)
old_container.inspect() # retrieve new name from server
new_container, = service.execute_convergence_plan(
ConvergencePlan('recreate', [old_container]),
reset_container_image=True
)
assert [mount['Destination'] for mount in new_container.get('Mounts')] == ['/data']
assert new_container.get_mount('/data')['Source'] == volume_path
def test_start_container_passes_through_options(self): def test_start_container_passes_through_options(self):
db = self.create_service('db') db = self.create_service('db')
create_and_start_container(db, environment={'FOO': 'BAR'}) create_and_start_container(db, environment={'FOO': 'BAR'})

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import logging import logging
import docker
import pytest import pytest
from compose import container from compose import container
@ -11,6 +12,7 @@ from compose.cli.formatter import ConsoleWarningFormatter
from compose.cli.main import convergence_strategy_from_opts from compose.cli.main import convergence_strategy_from_opts
from compose.cli.main import filter_containers_to_service_names from compose.cli.main import filter_containers_to_service_names
from compose.cli.main import setup_console_handler from compose.cli.main import setup_console_handler
from compose.cli.main import warn_for_swarm_mode
from compose.service import ConvergenceStrategy from compose.service import ConvergenceStrategy
from tests import mock from tests import mock
@ -54,6 +56,14 @@ class TestCLIMainTestCase(object):
actual = filter_containers_to_service_names(containers, service_names) actual = filter_containers_to_service_names(containers, service_names)
assert actual == containers assert actual == containers
def test_warning_in_swarm_mode(self):
mock_client = mock.create_autospec(docker.APIClient)
mock_client.info.return_value = {'Swarm': {'LocalNodeState': 'active'}}
with mock.patch('compose.cli.main.log') as fake_log:
warn_for_swarm_mode(mock_client)
assert fake_log.warn.call_count == 1
class TestSetupConsoleHandlerTestCase(object): class TestSetupConsoleHandlerTestCase(object):

View File

@ -533,14 +533,6 @@ class ProjectTest(unittest.TestCase):
project.down(ImageType.all, True) project.down(ImageType.all, True)
self.mock_client.remove_image.assert_called_once_with("busybox:latest") self.mock_client.remove_image.assert_called_once_with("busybox:latest")
def test_warning_in_swarm_mode(self):
self.mock_client.info.return_value = {'Swarm': {'LocalNodeState': 'active'}}
project = Project('composetest', [], self.mock_client)
with mock.patch('compose.project.log') as fake_log:
project.up()
assert fake_log.warn.call_count == 1
def test_no_warning_on_stop(self): def test_no_warning_on_stop(self):
self.mock_client.info.return_value = {'Swarm': {'LocalNodeState': 'active'}} self.mock_client.info.return_value = {'Swarm': {'LocalNodeState': 'active'}}
project = Project('composetest', [], self.mock_client) project = Project('composetest', [], self.mock_client)