fix race condition in Service.create_container()

The Service.create_container() method fetches a list of the current
containers in order to determine the next container number. In doing so,
it makes several API calls: one to fetch the list of containers, then
one per container in order to inspect it.

In some situations it can happen that a container is removed after
having been listed: in that case, the call to inspect will get a 404 and
raise a NotFound.

One situation in which this has been observed is when trying to
concurrently create multiple one-off containers for the same service
(using `docker-compose run` and a unique `--name`), as described in
more detail in gh-5179.

This patch adds a unit test that simulates the race between the
calls to list and to inspect, and changes Service._next_container_number
to skip removed containers instead of blowing up.

Fixes gh-5179

Signed-off-by: Alberto Piai <apiai@sensational.ch>
This commit is contained in:
Alberto Piai 2018-03-25 22:58:59 +02:00
parent 7d68d4bb44
commit 394c8efe98
2 changed files with 51 additions and 6 deletions

View File

@ -685,15 +685,27 @@ class Service(object):
# TODO: this would benefit from github.com/docker/docker/pull/14699
# to remove the need to inspect every container
def _next_container_number(self, one_off=False):
containers = filter(None, [
Container.from_ps(self.client, container)
for container in self.client.containers(
all=True,
filters={'label': self.labels(one_off=one_off)})
])
containers = self._fetch_containers(
all=True,
filters={'label': self.labels(one_off=one_off)}
)
numbers = [c.number for c in containers]
return 1 if not numbers else max(numbers) + 1
def _fetch_containers(self, **fetch_options):
# Account for containers that might have been removed since we fetched
# the list.
def soft_inspect(container):
try:
return Container.from_id(self.client, container['Id'])
except NotFound:
return None
return filter(None, [
soft_inspect(container)
for container in self.client.containers(**fetch_options)
])
def _get_aliases(self, network, container=None):
return list(
{self.name} |

View File

@ -5,6 +5,7 @@ import docker
import pytest
from docker.constants import DEFAULT_DOCKER_API_VERSION
from docker.errors import APIError
from docker.errors import NotFound
from .. import mock
from .. import unittest
@ -888,6 +889,38 @@ class ServiceTest(unittest.TestCase):
'ftp_proxy': override_options['environment']['FTP_PROXY'],
}))
def test_create_when_removed_containers_are_listed(self):
# This is aimed at simulating a race between the API call to list the
# containers, and the ones to inspect each of the listed containers.
# It can happen that a container has been removed after we listed it.
# containers() returns a container that is about to be removed
self.mock_client.containers.return_value = [
{'Id': 'rm_cont_id', 'Name': 'rm_cont', 'Image': 'img_id'},
]
# inspect_container() will raise a NotFound when trying to inspect
# rm_cont_id, which at this point has been removed
def inspect(name):
if name == 'rm_cont_id':
raise NotFound(message='Not Found')
if name == 'new_cont_id':
return {'Id': 'new_cont_id'}
raise NotImplementedError("incomplete mock")
self.mock_client.inspect_container.side_effect = inspect
self.mock_client.inspect_image.return_value = {'Id': 'imageid'}
self.mock_client.create_container.return_value = {'Id': 'new_cont_id'}
# We should nonetheless be able to create a new container
service = Service('foo', client=self.mock_client)
assert service.create_container().id == 'new_cont_id'
class TestServiceNetwork(unittest.TestCase):
def setUp(self):