Merge pull request #1217 from ZyX-I/ucs-2-python

Add support for UCS-2 Python builds
This commit is contained in:
Nikolai Aleksandrovich Pavlov 2014-12-05 23:46:40 +03:00
commit 432cc5031f
12 changed files with 416 additions and 60 deletions

View File

@ -7,6 +7,15 @@ Generic requirements
* Python 2.6 or later, 3.2 or later, PyPy 2.0 or later. It is the only * Python 2.6 or later, 3.2 or later, PyPy 2.0 or later. It is the only
non-optional requirement. non-optional requirement.
.. warning:
It is highly advised to use UCS-4 version of Python because UCS-2 version
uses significantly slower text processing (length determination and
non-printable character replacement) functions due to the need of
supporting unicode characters above U+FFFF which are represented as
surrogate pairs. This price will be paid even if configuration has no such
characters.
* C compiler. Required to build powerline client on linux. If it is not present * C compiler. Required to build powerline client on linux. If it is not present
then powerline will fall back to shell script or python client. then powerline will fall back to shell script or python client.
* ``socat`` program. Required for shell variant of client which runs a bit * ``socat`` program. Required for shell variant of client which runs a bit

View File

@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import sys import sys
import codecs import codecs
from unicodedata import east_asian_width, combining
from powerline.lib.encoding import get_preferred_output_encoding from powerline.lib.encoding import get_preferred_output_encoding
@ -128,3 +130,99 @@ def string(s):
return s.encode('utf-8') return s.encode('utf-8')
else: else:
return s return s
def surrogate_pair_to_character(high, low):
'''Transform a pair of surrogate codepoints to one codepoint
'''
return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00)
_strwidth_documentation = (
'''Compute string width in display cells
{0}
:param dict width_data:
Dictionary which maps east_asian_width property values to strings
lengths. It is expected to contain the following keys and values (from
`East Asian Width annex <http://www.unicode.org/reports/tr11/>`_):
=== ====== ===========================================================
Key Value Description
=== ====== ===========================================================
F 2 Fullwidth: all characters that are defined as Fullwidth in
the Unicode Standard [Unicode] by having a compatibility
decomposition of type <wide> to characters elsewhere in the
Unicode Standard that are implicitly narrow but unmarked.
H 1 Halfwidth: all characters that are explicitly defined as
Halfwidth in the Unicode Standard by having a compatibility
decomposition of type <narrow> to characters elsewhere in
the Unicode Standard that are implicitly wide but unmarked,
plus U+20A9 WON SIGN.
W 2 Wide: all other characters that are always wide. These
characters occur only in the context of East Asian
typography where they are wide characters (such as the
Unified Han Ideographs or Squared Katakana Symbols). This
category includes characters that have explicit halfwidth
counterparts.
Na 1 Narrow: characters that are always narrow and have explicit
fullwidth or wide counterparts. These characters are
implicitly narrow in East Asian typography and legacy
character sets because they have explicit fullwidth or wide
counterparts. All of ASCII is an example of East Asian
Narrow characters.
A 1 or 2 Ambigious: characters that may sometimes be wide and
sometimes narrow. Ambiguous characters require additional
information not contained in the character code to further
resolve their width. This information is usually defined in
terminal setting that should in turn respect glyphs widths
in used fonts. Also see :ref:`ambiwidth configuration
option <config-common-ambiwidth>`.
N 1 Neutral characters: character that does not occur in legacy
East Asian character sets.
=== ====== ===========================================================
:param unicode string:
String whose width will be calculated.
:return: unsigned integer.''')
def strwidth_ucs_4(width_data, string):
return sum(((
(
0
) if combining(symbol) else (
width_data[east_asian_width(symbol)]
)
) for symbol in string))
strwidth_ucs_4.__doc__ = _strwidth_documentation.format(
'''This version of function expects that characters above 0xFFFF are
represented using one symbol. This is only the case in UCS-4 Python builds.
.. note:
Even in UCS-4 Python builds it is possible to represent characters above
0xFFFF using surrogate pairs. Characters represented this way are not
supported.''')
def strwidth_ucs_2(width_data, string):
return sum(((
(
width_data[east_asian_width(string[i - 1] + symbol)]
) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
0
) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
width_data[east_asian_width(symbol)]
)
) for i, symbol in enumerate(string)))
strwidth_ucs_2.__doc__ = _strwidth_documentation.format(
'''This version of function expects that characters above 0xFFFF are
represented using two symbols forming a surrogate pair, which is the only
option in UCS-2 Python builds. It still works correctly in UCS-4 Python
builds, but is slower then its UCS-4 counterpart.''')

View File

@ -41,7 +41,7 @@ def generate_json_config_loader(lhadproblem):
function_name_re = '^(\w+\.)*[a-zA-Z_]\w*$' function_name_re = '^(\w+\.)*[a-zA-Z_]\w*$'
divider_spec = Spec().type(unicode).len( divider_spec = Spec().printable().len(
'le', 3, (lambda value: 'Divider {0!r} is too large!'.format(value))).copy 'le', 3, (lambda value: 'Divider {0!r} is too large!'.format(value))).copy
ext_theme_spec = Spec().type(unicode).func(lambda *args: check_config('themes', *args)).copy ext_theme_spec = Spec().type(unicode).func(lambda *args: check_config('themes', *args)).copy
top_theme_spec = Spec().type(unicode).func(check_top_theme).copy top_theme_spec = Spec().type(unicode).func(check_top_theme).copy
@ -211,12 +211,12 @@ segment_spec = Spec(
display=Spec().type(bool).optional(), display=Spec().type(bool).optional(),
module=segment_module_spec(), module=segment_module_spec(),
priority=Spec().type(int, float, type(None)).optional(), priority=Spec().type(int, float, type(None)).optional(),
after=Spec().type(unicode).optional(), after=Spec().printable().optional(),
before=Spec().type(unicode).optional(), before=Spec().printable().optional(),
width=Spec().either(Spec().unsigned(), Spec().cmp('eq', 'auto')).optional(), width=Spec().either(Spec().unsigned(), Spec().cmp('eq', 'auto')).optional(),
align=Spec().oneof(set('lr')).optional(), align=Spec().oneof(set('lr')).optional(),
args=args_spec().func(lambda *args, **kwargs: check_args(get_one_segment_function, *args, **kwargs)), args=args_spec().func(lambda *args, **kwargs: check_args(get_one_segment_function, *args, **kwargs)),
contents=Spec().type(unicode).optional(), contents=Spec().printable().optional(),
highlight_group=Spec().list( highlight_group=Spec().list(
highlight_group_spec().re( highlight_group_spec().re(
'^(?:(?!:divider$).)+$', '^(?:(?!:divider$).)+$',
@ -243,11 +243,11 @@ divside_spec = Spec(
soft=divider_spec(), soft=divider_spec(),
).copy ).copy
segment_data_value_spec = Spec( segment_data_value_spec = Spec(
after=Spec().type(unicode).optional(), after=Spec().printable().optional(),
before=Spec().type(unicode).optional(), before=Spec().printable().optional(),
display=Spec().type(bool).optional(), display=Spec().type(bool).optional(),
args=args_spec().func(lambda *args, **kwargs: check_args(get_all_possible_functions, *args, **kwargs)), args=args_spec().func(lambda *args, **kwargs: check_args(get_all_possible_functions, *args, **kwargs)),
contents=Spec().type(unicode).optional(), contents=Spec().printable().optional(),
).copy ).copy
dividers_spec = Spec( dividers_spec = Spec(
left=divside_spec(), left=divside_spec(),

View File

@ -7,7 +7,26 @@ import re
from powerline.lib.unicode import unichr from powerline.lib.unicode import unichr
NON_PRINTABLE = re.compile('[^\t\n\x20-\x7E' + unichr(0x85) + (unichr(0xA0) + '-' + unichr(0xD7FF)) + (unichr(0xE000) + '-' + unichr(0xFFFD)) + ']') NON_PRINTABLE_STR = (
'[^'
# ASCII control characters: 0x00-0x19
+ '\t\n' # Tab, newline: allowed ASCII control characters
+ '\x20-\x7E' # ASCII printable characters
# Unicode control characters: 0x7F-0x9F
+ '\u0085' # Allowed unicode control character: next line character
+ '\u00A0-\uD7FF'
# Surrogate escapes: 0xD800-0xDFFF
+ '\uE000-\uFFFD'
+ ']'
+ ((
# Paired surrogate escapes: allowed in UCS-2 builds as the only way to
# represent characters above 0xFFFF. Only paired variant is allowed.
'|[\uD800-\uDBFF][\uDC00-\uDFFF]'
) if sys.maxunicode < 0x10FFFF else (
''
))
)
NON_PRINTABLE_RE = re.compile(NON_PRINTABLE_STR)
def repl(s): def repl(s):
@ -15,7 +34,7 @@ def repl(s):
def strtrans(s): def strtrans(s):
return NON_PRINTABLE.sub(repl, s.replace('\t', '>---')) return NON_PRINTABLE_RE.sub(repl, s.replace('\t', '>---'))
class Mark: class Mark:
@ -55,6 +74,13 @@ class Mark:
+ ' ' * (indent + len(head) + len(snippet[0])) + '^' + ' ' * (indent + len(head) + len(snippet[0])) + '^'
) )
def advance_string(self, diff):
ret = self.copy()
# FIXME Currently does not work properly with escaped strings.
ret.column += diff
ret.pointer += diff
return ret
def __str__(self): def __str__(self):
snippet = self.get_snippet() snippet = self.get_snippet()
where = (' in "%s", line %d, column %d' % ( where = (' in "%s", line %d, column %d' % (

View File

@ -33,12 +33,7 @@ class MarkedUnicode(unicode):
pointdiff = 1 pointdiff = 1
r = [] r = []
for s in part_result: for s in part_result:
mark = self.mark.copy() r.append(MarkedUnicode(s, self.mark.advance_string(pointdiff)))
# XXX Does not work properly with escaped strings, but this requires
# saving much more information in mark.
mark.column += pointdiff
mark.pointer += pointdiff
r.append(MarkedUnicode(s, mark))
pointdiff += len(s) pointdiff += len(s)
return tuple(r) return tuple(r)

View File

@ -3,7 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import codecs import codecs
from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE_RE
from powerline.lib.unicode import unicode from powerline.lib.unicode import unicode
@ -84,7 +84,7 @@ class Reader(object):
return Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer) return Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer)
def check_printable(self, data): def check_printable(self, data):
match = NON_PRINTABLE.search(data) match = NON_PRINTABLE_RE.search(data)
if match: if match:
self.update_pointer(match.start()) self.update_pointer(match.start())
raise ReaderError( raise ReaderError(
@ -125,7 +125,12 @@ class Reader(object):
self.raw_buffer = None self.raw_buffer = None
break break
def update_raw(self, size=4096): def update_raw(self, size=-1):
# Was size=4096
assert(size < 0)
# WARNING: reading the whole stream at once. To change this behaviour to
# former reading N characters at once one must make sure that reading
# never ends at partial unicode character.
data = self.stream.read(size) data = self.stream.read(size)
if self.raw_buffer is None: if self.raw_buffer is None:
self.raw_buffer = data self.raw_buffer = data

View File

@ -1,9 +1,14 @@
# vim:fileencoding=utf-8:noet # vim:fileencoding=utf-8:noet
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
from string import hexdigits
from powerline.lint.markedjson.error import MarkedError from powerline.lint.markedjson.error import MarkedError
from powerline.lint.markedjson import tokens from powerline.lint.markedjson import tokens
from powerline.lib.unicode import unicode from powerline.lib.unicode import unicode, unichr, surrogate_pair_to_character
hexdigits_set = set(hexdigits)
# Scanner produces tokens of the following types: # Scanner produces tokens of the following types:
@ -415,7 +420,7 @@ class Scanner:
length = self.ESCAPE_CODES[ch] length = self.ESCAPE_CODES[ch]
self.forward() self.forward()
for k in range(length): for k in range(length):
if self.peek(k) not in '0123456789ABCDEFabcdef': if self.peek(k) not in hexdigits:
raise ScannerError( raise ScannerError(
'while scanning a double-quoted scalar', start_mark, 'while scanning a double-quoted scalar', start_mark,
'expected escape sequence of %d hexdecimal numbers, but found %r' % ( 'expected escape sequence of %d hexdecimal numbers, but found %r' % (
@ -423,8 +428,26 @@ class Scanner:
self.get_mark() self.get_mark()
) )
code = int(self.prefix(length), 16) code = int(self.prefix(length), 16)
chunks.append(chr(code))
self.forward(length) self.forward(length)
if 0xD800 <= code <= 0xDC00:
# Start of the surrogate pair
next_char = self.prefix(6)
if (
next_char[0] != '\\'
or next_char[1] != 'u'
or not (set(next_char[2:]) < hexdigits_set)
or not (0xDC00 <= int(next_char[2:], 16) <= 0xDFFF)
):
raise ScannerError(
'while scanning a double-quoted scalar', start_mark,
'expected escape sequence with the next character in surrogate pair, but found %r' % (
next_char
),
self.get_mark()
)
code = surrogate_pair_to_character(code, int(next_char[2:], 16))
self.forward(6)
chunks.append(unichr(code))
else: else:
raise ScannerError( raise ScannerError(
'while scanning a double-quoted scalar', start_mark, 'while scanning a double-quoted scalar', start_mark,

View File

@ -7,10 +7,19 @@ import re
from copy import copy from copy import copy
from powerline.lib.unicode import unicode from powerline.lib.unicode import unicode
from powerline.lint.markedjson.error import echoerr, DelayedEchoErr from powerline.lint.markedjson.error import echoerr, DelayedEchoErr, NON_PRINTABLE_STR
from powerline.lint.selfcheck import havemarks from powerline.lint.selfcheck import havemarks
NON_PRINTABLE_RE = re.compile(
NON_PRINTABLE_STR.translate({
ord('\t'): None,
ord('\n'): None,
0x0085: None,
})
)
class Spec(object): class Spec(object):
'''Class that describes some JSON value '''Class that describes some JSON value
@ -342,6 +351,26 @@ class Spec(object):
return False, hadproblem return False, hadproblem
return True, hadproblem return True, hadproblem
def check_printable(self, value, context_mark, data, context, echoerr, _):
'''Check that given unicode string contains only printable characters
'''
hadproblem = False
for match in NON_PRINTABLE_RE.finditer(value):
hadproblem = True
echoerr(
context=self.cmsg.format(key=context.key),
context_mark=value.mark,
problem='found not printable character U+{0:04x} in a configuration string'.format(
ord(match.group(0))),
problem_mark=value.mark.advance_string(match.start() + 1)
)
return True, hadproblem
def printable(self, *args):
self.type(unicode)
self.checks.append(('check_printable', args))
return self
def type(self, *args): def type(self, *args):
'''Describe value that has one of the types given in arguments '''Describe value that has one of the types given in arguments

View File

@ -1,18 +1,93 @@
# vim:fileencoding=utf-8:noet # vim:fileencoding=utf-8:noet
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
import sys
import os import os
import re
from unicodedata import east_asian_width, combining
from itertools import chain from itertools import chain
from powerline.theme import Theme from powerline.theme import Theme
from powerline.lib.unicode import unichr from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4
NBSP = ' ' NBSP = ' '
np_control_character_translations = dict((
# Control characters: ^@ … ^Y
(i1, '^' + unichr(i1 + 0x40)) for i1 in range(0x20)
))
'''Control character translations
Dictionary that maps characters in range 0x000x1F (inclusive) to strings
``'^@'``, ``'^A'`` and so on.
.. note: maps tab to ``^I`` and newline to ``^J``.
'''
np_invalid_character_translations = dict((
# Invalid unicode characters obtained using 'surrogateescape' error
# handler.
(i2, '<{0:02x}>'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)
))
'''Invalid unicode character translations
When using ``surrogateescape`` encoding error handling method characters in
range 0x800xFF (inclusive) are transformed into unpaired surrogate escape
unicode codepoints 0xDC800xDD00. This dictionary maps such characters to
``<80>``, ``<81>``, and so on: in Python-3 they cannot be printed or
converted to UTF-8 because UTF-8 standard does not allow surrogate escape
characters, not even paired ones. Python-2 contains a bug that allows such
action, but printing them in any case makes no sense.
'''
# XXX: not using `r` because it makes no sense.
np_invalid_character_re = re.compile('(?<![\uD800-\uDBFF])[\uDC80-\uDD00]')
'''Regex that finds unpaired surrogate escape characters
Search is only limited to the ones obtained from ``surrogateescape`` error
handling method. This regex is only used for UCS-2 Python variants because
in this case characters above 0xFFFF are represented as surrogate escapes
characters and are thus subject to partial transformation if
``np_invalid_character_translations`` translation table is used.
'''
np_character_translations = np_control_character_translations.copy()
'''Dictionary that contains non-printable character translations
In UCS-4 versions of Python this is a union of
``np_invalid_character_translations`` and ``np_control_character_translations``
dictionaries. In UCS-2 for technical reasons ``np_invalid_character_re`` is used
instead and this dictionary only contains items from
``np_control_character_translations``.
'''
translate_np = (
(
lambda s: (
np_invalid_character_re.subn(
lambda match: (
np_invalid_character_translations[ord(match.group(0))]
), s
)[0].translate(np_character_translations)
)
) if sys.maxunicode < 0x10FFFF else (
lambda s: (
s.translate(np_character_translations)
)
)
)
'''Function that translates non-printable characters into printable strings
Is used to translate control characters and surrogate escape characters
obtained from ``surrogateescape`` encoding errors handling method into some
printable sequences. See documentation for
``np_invalid_character_translations`` and
``np_control_character_translations`` for more details.
'''
def construct_returned_value(rendered_highlighted, segments, width, output_raw, output_width): def construct_returned_value(rendered_highlighted, segments, width, output_raw, output_width):
if not (output_raw or output_width): if not (output_raw or output_width):
return rendered_highlighted return rendered_highlighted
@ -75,25 +150,6 @@ class Renderer(object):
See documentation of ``unicode.translate`` for details. See documentation of ``unicode.translate`` for details.
''' '''
np_character_translations = dict(chain(
# Control characters: ^@ … ^Y
((i1, '^' + unichr(i1 + 0x40)) for i1 in range(0x20)),
# Invalid unicode characters obtained using 'surrogateescape' error
# handler.
((i2, '<{0:02x}>'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)),
))
'''Non-printable character translations
These are used to transform characters in range 0x000x1F into ``^@``,
``^A`` and so on and characters in range 0xDC800xDCFF into ``<80>``,
``<81>`` and so on (latter are invalid characters obtained using
``surrogateescape`` error handling method used automatically in a number of
places in Python3). Unilke with ``.escape()`` method (and
``character_translations``) result is passed to ``.strwidth()`` method.
Note: transforms tab into ``^I``.
'''
def __init__(self, def __init__(self,
theme_config, theme_config,
local_themes, local_themes,
@ -120,7 +176,10 @@ class Renderer(object):
'F': 2, # Fullwidth 'F': 2, # Fullwidth
} }
def strwidth(self, string): strwidth = lambda self, s: (
(strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)(
self.width_data, s)
)
'''Function that returns string width. '''Function that returns string width.
Is used to calculate the place given string occupies when handling Is used to calculate the place given string occupies when handling
@ -132,7 +191,6 @@ class Renderer(object):
:return: unsigned integer. :return: unsigned integer.
''' '''
return sum((0 if combining(symbol) else self.width_data[east_asian_width(symbol)] for symbol in string))
def get_theme(self, matcher_info): def get_theme(self, matcher_info):
'''Get Theme object. '''Get Theme object.
@ -256,6 +314,8 @@ class Renderer(object):
current_width = 0 current_width = 0
self._prepare_segments(segments, output_width or width)
if not width: if not width:
# No width specified, so we dont need to crop or pad anything # No width specified, so we dont need to crop or pad anything
if output_width: if output_width:
@ -319,6 +379,15 @@ class Renderer(object):
return construct_returned_value(rendered_highlighted, segments, current_width, output_raw, output_width) return construct_returned_value(rendered_highlighted, segments, current_width, output_raw, output_width)
def _prepare_segments(self, segments, calculate_contents_len):
'''Translate non-printable characters and calculate segment width
'''
for segment in segments:
segment['contents'] = translate_np(segment['contents'])
if calculate_contents_len:
for segment in segments:
segment['_contents_len'] = self.strwidth(segment['contents'])
def _render_length(self, theme, segments, divider_widths): def _render_length(self, theme, segments, divider_widths):
'''Update segments lengths and return them '''Update segments lengths and return them
''' '''
@ -327,9 +396,6 @@ class Renderer(object):
divider_spaces = theme.get_spaces() divider_spaces = theme.get_spaces()
for index, segment in enumerate(segments): for index, segment in enumerate(segments):
side = segment['side'] side = segment['side']
if segment['_contents_len'] is None:
segment_len = segment['_contents_len'] = self.strwidth(segment['contents'])
else:
segment_len = segment['_contents_len'] segment_len = segment['_contents_len']
prev_segment = segments[index - 1] if index > 0 else theme.EMPTY_SEGMENT prev_segment = segments[index - 1] if index > 0 else theme.EMPTY_SEGMENT
@ -381,8 +447,6 @@ class Renderer(object):
contents_highlighted = '' contents_highlighted = ''
draw_divider = segment['draw_' + divider_type + '_divider'] draw_divider = segment['draw_' + divider_type + '_divider']
contents_raw = contents_raw.translate(self.np_character_translations)
# XXX Make sure self.hl() calls are called in the same order # XXX Make sure self.hl() calls are called in the same order
# segments are displayed. This is needed for Vim renderer to work. # segments are displayed. This is needed for Vim renderer to work.
if draw_divider: if draw_divider:

View File

@ -3,6 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import threading import threading
import os import os
import sys
import re import re
import shutil import shutil
@ -16,7 +17,9 @@ from powerline.lib.threaded import ThreadedSegment, KwThreadedSegment
from powerline.lib.monotonic import monotonic from powerline.lib.monotonic import monotonic
from powerline.lib.vcs.git import git_directory from powerline.lib.vcs.git import git_directory
from tests.lib import Pl import powerline.lib.unicode as plu
from tests.lib import Pl, replace_attr
from tests import TestCase, SkipTest from tests import TestCase, SkipTest
@ -397,6 +400,104 @@ class TestLib(TestCase):
self.assertEqual(humanize_bytes(1000000000, si_prefix=False), '953.7 MiB') self.assertEqual(humanize_bytes(1000000000, si_prefix=False), '953.7 MiB')
width_data = {
'N': 1, # Neutral
'Na': 1, # Narrow
'A': 1, # Ambigious
'H': 1, # Half-width
'W': 2, # Wide
'F': 2, # Fullwidth
}
class TestUnicode(TestCase):
def assertStringsIdentical(self, s1, s2):
self.assertTrue(type(s1) is type(s2), msg='string types differ')
self.assertEqual(s1, s2)
def test_unicode(self):
self.assertTrue(type('abc') is plu.unicode)
def test_unichr(self):
if not sys.maxunicode < 0x10FFFF:
self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF))
self.assertStringsIdentical('\uFFFF', plu.unichr(0xFFFF))
self.assertStringsIdentical('\x20', plu.unichr(0x20))
def test_u(self):
self.assertStringsIdentical('Test', plu.u('Test'))
self.assertStringsIdentical('Test', plu.u(b'Test'))
self.assertStringsIdentical('«»', plu.u(b'\xC2\xAB\xC2\xBB'))
self.assertRaises(UnicodeDecodeError, plu.u, b'\xFF')
def test_tointiter(self):
self.assertEqual([1, 2, 3], list(plu.tointiter(b'\x01\x02\x03')))
def test_decode_error(self):
self.assertStringsIdentical('<FF>', b'\xFF'.decode('utf-8', 'powerline_decode_error'))
self.assertStringsIdentical('abc', b'abc'.decode('utf-8', 'powerline_decode_error'))
def test_register_strwidth_error(self):
ename = plu.register_strwidth_error(lambda s: 3)
self.assertStringsIdentical(b'???', ''.encode('latin1', ename))
self.assertStringsIdentical(b'abc', 'abc'.encode('latin1', ename))
def test_out_u(self):
self.assertStringsIdentical('abc', plu.out_u('abc'))
self.assertStringsIdentical('abc', plu.out_u(b'abc'))
self.assertRaises(TypeError, plu.out_u, None)
def test_safe_unicode(self):
raise SkipTest('safe_unicode() function is buggy')
self.assertStringsIdentical('abc', plu.safe_unicode('abc'))
self.assertStringsIdentical('abc', plu.safe_unicode(b'abc'))
self.assertStringsIdentical('«»', plu.safe_unicode(b'\xc2\xab\xc2\xbb'))
with replace_attr(plu, 'get_preferred_output_encoding', lambda: 'latin1'):
self.assertStringsIdentical('ÿ', plu.safe_unicode(b'\xFF'))
self.assertStringsIdentical('None', plu.safe_unicode(None))
class FailingStr(object):
def __str__(self):
raise NotImplementedError('Fail!')
self.assertStringsIdentical('Fail!', plu.safe_unicode(FailingStr()))
def test_FailedUnicode(self):
self.assertTrue(isinstance(plu.FailedUnicode('abc'), plu.unicode))
self.assertEqual('abc', plu.FailedUnicode('abc'))
def test_string(self):
raise SkipTest('string() function is buggy')
self.assertStringsIdentical(str('abc'), plu.string('abc'))
self.assertStringsIdentical(str('abc'), plu.string(b'abc'))
def test_surrogate_pair_to_character(self):
self.assertEqual(0x1F48E, plu.surrogate_pair_to_character(0xD83D, 0xDC8E))
def test_strwidth_ucs_4(self):
self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'abcd'))
self.assertEqual(4, plu.strwidth_ucs_4(width_data, ''))
if sys.maxunicode < 0x10FFFF:
raise SkipTest('Can only test strwidth_ucs_4 in UCS-4 Pythons')
def east_asian_width(ch):
assert (len(ch) == 1)
assert ord(ch) == 0x1F48E
return 'F'
with replace_attr(plu, 'east_asian_width', east_asian_width):
# Warning: travis unicodedata.east_asian_width for some reason
# thinks this character is 5 symbols wide.
self.assertEqual(2, plu.strwidth_ucs_4(width_data, '\U0001F48E'))
def test_strwidth_ucs_2(self):
self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'abcd'))
self.assertEqual(4, plu.strwidth_ucs_2(width_data, ''))
if not sys.maxunicode < 0x10FFFF:
raise SkipTest('Can only test strwidth_ucs_2 in UCS-2 Pythons')
self.assertEqual(2, plu.strwidth_ucs_2(width_data, '\ud83d\udc8e'))
class TestVCS(TestCase): class TestVCS(TestCase):
def do_branch_rename_test(self, repo, q): def do_branch_rename_test(self, repo, q):
st = monotonic() st = monotonic()

View File

@ -28,6 +28,7 @@ except IOError:
hostname = socket.gethostname() hostname = socket.gethostname()
user = os.environ['USER'] user = os.environ['USER']
REFS_RE = re.compile(r'^\[\d+ refs\]\n')
IPYPY_DEANSI_RE = re.compile(r'\033(?:\[(?:\?\d+[lh]|[^a-zA-Z]+[a-ln-zA-Z])|[=>])') IPYPY_DEANSI_RE = re.compile(r'\033(?:\[(?:\?\d+[lh]|[^a-zA-Z]+[a-ln-zA-Z])|[=>])')
with codecs.open(fname, 'r', encoding='utf-8') as R: with codecs.open(fname, 'r', encoding='utf-8') as R:
@ -42,6 +43,8 @@ with codecs.open(fname, 'r', encoding='utf-8') as R:
line = line.translate({ line = line.translate({
ord('\r'): None ord('\r'): None
}) })
if REFS_RE.match(line):
continue
line = line.replace(hostname, 'HOSTNAME') line = line.replace(hostname, 'HOSTNAME')
line = line.replace(user, 'USER') line = line.replace(user, 'USER')
if pid is not None: if pid is not None:

View File

@ -91,8 +91,11 @@ run_test() {
SH="$1" SH="$1"
SESNAME="powerline-shell-test-${SH}-$$" SESNAME="powerline-shell-test-${SH}-$$"
# Note: when running screen with setuid libc unsets LD_LIBRARY_PATH, so it
# cannot be added to the `env -i` call above.
run "${TEST_TYPE}" "${TEST_CLIENT}" "${SH}" \ run "${TEST_TYPE}" "${TEST_CLIENT}" "${SH}" \
screen -L -c tests/test_shells/screenrc -d -m -S "$SESNAME" \ screen -L -c tests/test_shells/screenrc -d -m -S "$SESNAME" \
env LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \
"$@" "$@"
while ! screen -S "$SESNAME" -X readreg a tests/test_shells/input.$SH ; do while ! screen -S "$SESNAME" -X readreg a tests/test_shells/input.$SH ; do
sleep 0.1s sleep 0.1s