Fix split buffer with inconsistently delimited json objects.

Signed-off-by: Daniel Nephin <dnephin@docker.com>
This commit is contained in:
Daniel Nephin 2015-10-05 12:56:10 -04:00
parent 3661e8bc74
commit 15d0c60a73
6 changed files with 62 additions and 22 deletions

View File

@ -1,5 +1,3 @@
import json
from compose import utils from compose import utils
@ -14,8 +12,7 @@ def stream_output(output, stream):
lines = {} lines = {}
diff = 0 diff = 0
for chunk in utils.stream_as_text(output): for event in utils.json_stream(output):
event = json.loads(chunk)
all_events.append(event) all_events.append(event)
if 'progress' in event or 'progressDetail' in event: if 'progress' in event or 'progressDetail' in event:

View File

@ -33,7 +33,6 @@ from .progress_stream import stream_output
from .progress_stream import StreamOutputError from .progress_stream import StreamOutputError
from .utils import json_hash from .utils import json_hash
from .utils import parallel_execute from .utils import parallel_execute
from .utils import split_buffer
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -724,7 +723,7 @@ class Service(object):
) )
try: try:
all_events = stream_output(split_buffer(build_output), sys.stdout) all_events = stream_output(build_output, sys.stdout)
except StreamOutputError as e: except StreamOutputError as e:
raise BuildError(self, six.text_type(e)) raise BuildError(self, six.text_type(e))

View File

@ -1,6 +1,7 @@
import codecs import codecs
import hashlib import hashlib
import json import json
import json.decoder
import logging import logging
import sys import sys
from threading import Thread from threading import Thread
@ -13,6 +14,8 @@ from six.moves.queue import Queue
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
json_decoder = json.JSONDecoder()
def parallel_execute(objects, obj_callable, msg_index, msg): def parallel_execute(objects, obj_callable, msg_index, msg):
""" """
@ -96,29 +99,56 @@ def stream_as_text(stream):
yield data yield data
def split_buffer(reader, separator=u'\n'): def line_splitter(buffer, separator=u'\n'):
""" index = buffer.find(six.text_type(separator))
Given a generator which yields strings and a separator string, if index == -1:
return None, None
return buffer[:index + 1], buffer[index + 1:]
def split_buffer(stream, splitter=None, decoder=lambda a: a):
"""Given a generator which yields strings and a splitter function,
joins all input, splits on the separator and yields each chunk. joins all input, splits on the separator and yields each chunk.
Unlike string.split(), each chunk includes the trailing Unlike string.split(), each chunk includes the trailing
separator, except for the last one if none was found on the end separator, except for the last one if none was found on the end
of the input. of the input.
""" """
splitter = splitter or line_splitter
buffered = six.text_type('') buffered = six.text_type('')
separator = six.text_type(separator)
for data in stream_as_text(reader): for data in stream_as_text(stream):
buffered += data buffered += data
while True: while True:
index = buffered.find(separator) item, rest = splitter(buffered)
if index == -1: if not item:
break break
yield buffered[:index + 1]
buffered = buffered[index + 1:]
if len(buffered) > 0: buffered = rest
yield buffered yield item
if buffered:
yield decoder(buffered)
def json_splitter(buffer):
"""Attempt to parse a json object from a buffer. If there is at least one
object, return it and the rest of the buffer, otherwise return None.
"""
try:
obj, index = json_decoder.raw_decode(buffer)
rest = buffer[json.decoder.WHITESPACE.match(buffer, index).end():]
return obj, rest
except ValueError:
return None, None
def json_stream(stream):
"""Given a stream of text, return a stream of json objects.
This handles streams which are inconsistently buffered (some entries may
be newline delimited, and others are not).
"""
return split_buffer(stream_as_text(stream), json_splitter, json_decoder.decode)
def write_out_msg(stream, lines, msg_index, msg, status="done"): def write_out_msg(stream, lines, msg_index, msg, status="done"):

View File

@ -9,8 +9,6 @@ from compose.config.config import ServiceLoader
from compose.const import LABEL_PROJECT from compose.const import LABEL_PROJECT
from compose.progress_stream import stream_output from compose.progress_stream import stream_output
from compose.service import Service from compose.service import Service
from compose.utils import split_buffer
from compose.utils import stream_as_text
def pull_busybox(client): def pull_busybox(client):
@ -73,5 +71,5 @@ class DockerClientTestCase(unittest.TestCase):
def check_build(self, *args, **kwargs): def check_build(self, *args, **kwargs):
kwargs.setdefault('rm', True) kwargs.setdefault('rm', True)
build_output = stream_as_text(self.client.build(*args, **kwargs)) build_output = self.client.build(*args, **kwargs)
stream_output(split_buffer(build_output), open('/dev/null', 'w')) stream_output(build_output, open('/dev/null', 'w'))

View File

@ -47,7 +47,7 @@ class SplitBufferTest(unittest.TestCase):
self.assert_produces(reader, [string]) self.assert_produces(reader, [string])
def assert_produces(self, reader, expectations): def assert_produces(self, reader, expectations):
split = split_buffer(reader(), u'\n') split = split_buffer(reader())
for (actual, expected) in zip(split, expectations): for (actual, expected) in zip(split, expectations):
self.assertEqual(type(actual), type(expected)) self.assertEqual(type(actual), type(expected))

16
tests/unit/utils_test.py Normal file
View File

@ -0,0 +1,16 @@
from .. import unittest
from compose import utils
class JsonSplitterTestCase(unittest.TestCase):
def test_json_splitter_no_object(self):
data = '{"foo": "bar'
self.assertEqual(utils.json_splitter(data), (None, None))
def test_json_splitter_with_object(self):
data = '{"foo": "bar"}\n \n{"next": "obj"}'
self.assertEqual(
utils.json_splitter(data),
({'foo': 'bar'}, '{"next": "obj"}')
)