diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 6dcb0c29..c0033c0b 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -7,6 +7,15 @@ Generic requirements
* Python 2.6 or later, 3.2 or later, PyPy 2.0 or later. It is the only
non-optional requirement.
+
+ .. warning:
+ It is highly advised to use UCS-4 version of Python because UCS-2 version
+ uses significantly slower text processing (length determination and
+ non-printable character replacement) functions due to the need of
+ supporting unicode characters above U+FFFF which are represented as
+ surrogate pairs. This price will be paid even if configuration has no such
+ characters.
+
* C compiler. Required to build powerline client on linux. If it is not present
then powerline will fall back to shell script or python client.
* ``socat`` program. Required for shell variant of client which runs a bit
diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py
index 32ea3afe..b100b18f 100644
--- a/powerline/lib/unicode.py
+++ b/powerline/lib/unicode.py
@@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import sys
import codecs
+from unicodedata import east_asian_width, combining
+
from powerline.lib.encoding import get_preferred_output_encoding
@@ -128,3 +130,99 @@ def string(s):
return s.encode('utf-8')
else:
return s
+
+
+def surrogate_pair_to_character(high, low):
+ '''Transform a pair of surrogate codepoints to one codepoint
+ '''
+ return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00)
+
+
+_strwidth_documentation = (
+ '''Compute string width in display cells
+
+ {0}
+
+ :param dict width_data:
+ Dictionary which maps east_asian_width property values to strings
+ lengths. It is expected to contain the following keys and values (from
+ `East Asian Width annex `_):
+
+ === ====== ===========================================================
+ Key Value Description
+ === ====== ===========================================================
+ F 2 Fullwidth: all characters that are defined as Fullwidth in
+ the Unicode Standard [Unicode] by having a compatibility
+ decomposition of type to characters elsewhere in the
+ Unicode Standard that are implicitly narrow but unmarked.
+ H 1 Halfwidth: all characters that are explicitly defined as
+ Halfwidth in the Unicode Standard by having a compatibility
+ decomposition of type to characters elsewhere in
+ the Unicode Standard that are implicitly wide but unmarked,
+ plus U+20A9 ₩ WON SIGN.
+ W 2 Wide: all other characters that are always wide. These
+ characters occur only in the context of East Asian
+ typography where they are wide characters (such as the
+ Unified Han Ideographs or Squared Katakana Symbols). This
+ category includes characters that have explicit halfwidth
+ counterparts.
+ Na 1 Narrow: characters that are always narrow and have explicit
+ fullwidth or wide counterparts. These characters are
+ implicitly narrow in East Asian typography and legacy
+ character sets because they have explicit fullwidth or wide
+ counterparts. All of ASCII is an example of East Asian
+ Narrow characters.
+ A 1 or 2 Ambigious: characters that may sometimes be wide and
+ sometimes narrow. Ambiguous characters require additional
+ information not contained in the character code to further
+ resolve their width. This information is usually defined in
+ terminal setting that should in turn respect glyphs widths
+ in used fonts. Also see :ref:`ambiwidth configuration
+ option `.
+ N 1 Neutral characters: character that does not occur in legacy
+ East Asian character sets.
+ === ====== ===========================================================
+
+ :param unicode string:
+ String whose width will be calculated.
+
+ :return: unsigned integer.''')
+
+
+def strwidth_ucs_4(width_data, string):
+ return sum(((
+ (
+ 0
+ ) if combining(symbol) else (
+ width_data[east_asian_width(symbol)]
+ )
+ ) for symbol in string))
+
+
+strwidth_ucs_4.__doc__ = _strwidth_documentation.format(
+ '''This version of function expects that characters above 0xFFFF are
+ represented using one symbol. This is only the case in UCS-4 Python builds.
+
+ .. note:
+ Even in UCS-4 Python builds it is possible to represent characters above
+ 0xFFFF using surrogate pairs. Characters represented this way are not
+ supported.''')
+
+
+def strwidth_ucs_2(width_data, string):
+ return sum(((
+ (
+ width_data[east_asian_width(string[i - 1] + symbol)]
+ ) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
+ 0
+ ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
+ width_data[east_asian_width(symbol)]
+ )
+ ) for i, symbol in enumerate(string)))
+
+
+strwidth_ucs_2.__doc__ = _strwidth_documentation.format(
+ '''This version of function expects that characters above 0xFFFF are
+ represented using two symbols forming a surrogate pair, which is the only
+ option in UCS-2 Python builds. It still works correctly in UCS-4 Python
+ builds, but is slower then its UCS-4 counterpart.''')
diff --git a/powerline/lint/__init__.py b/powerline/lint/__init__.py
index 51c25860..d1797d99 100644
--- a/powerline/lint/__init__.py
+++ b/powerline/lint/__init__.py
@@ -41,7 +41,7 @@ def generate_json_config_loader(lhadproblem):
function_name_re = '^(\w+\.)*[a-zA-Z_]\w*$'
-divider_spec = Spec().type(unicode).len(
+divider_spec = Spec().printable().len(
'le', 3, (lambda value: 'Divider {0!r} is too large!'.format(value))).copy
ext_theme_spec = Spec().type(unicode).func(lambda *args: check_config('themes', *args)).copy
top_theme_spec = Spec().type(unicode).func(check_top_theme).copy
@@ -211,12 +211,12 @@ segment_spec = Spec(
display=Spec().type(bool).optional(),
module=segment_module_spec(),
priority=Spec().type(int, float, type(None)).optional(),
- after=Spec().type(unicode).optional(),
- before=Spec().type(unicode).optional(),
+ after=Spec().printable().optional(),
+ before=Spec().printable().optional(),
width=Spec().either(Spec().unsigned(), Spec().cmp('eq', 'auto')).optional(),
align=Spec().oneof(set('lr')).optional(),
args=args_spec().func(lambda *args, **kwargs: check_args(get_one_segment_function, *args, **kwargs)),
- contents=Spec().type(unicode).optional(),
+ contents=Spec().printable().optional(),
highlight_group=Spec().list(
highlight_group_spec().re(
'^(?:(?!:divider$).)+$',
@@ -243,11 +243,11 @@ divside_spec = Spec(
soft=divider_spec(),
).copy
segment_data_value_spec = Spec(
- after=Spec().type(unicode).optional(),
- before=Spec().type(unicode).optional(),
+ after=Spec().printable().optional(),
+ before=Spec().printable().optional(),
display=Spec().type(bool).optional(),
args=args_spec().func(lambda *args, **kwargs: check_args(get_all_possible_functions, *args, **kwargs)),
- contents=Spec().type(unicode).optional(),
+ contents=Spec().printable().optional(),
).copy
dividers_spec = Spec(
left=divside_spec(),
diff --git a/powerline/lint/markedjson/error.py b/powerline/lint/markedjson/error.py
index ec86d14a..dfde4086 100644
--- a/powerline/lint/markedjson/error.py
+++ b/powerline/lint/markedjson/error.py
@@ -7,7 +7,26 @@ import re
from powerline.lib.unicode import unichr
-NON_PRINTABLE = re.compile('[^\t\n\x20-\x7E' + unichr(0x85) + (unichr(0xA0) + '-' + unichr(0xD7FF)) + (unichr(0xE000) + '-' + unichr(0xFFFD)) + ']')
+NON_PRINTABLE_STR = (
+ '[^'
+ # ASCII control characters: 0x00-0x19
+ + '\t\n' # Tab, newline: allowed ASCII control characters
+ + '\x20-\x7E' # ASCII printable characters
+ # Unicode control characters: 0x7F-0x9F
+ + '\u0085' # Allowed unicode control character: next line character
+ + '\u00A0-\uD7FF'
+ # Surrogate escapes: 0xD800-0xDFFF
+ + '\uE000-\uFFFD'
+ + ']'
+ + ((
+ # Paired surrogate escapes: allowed in UCS-2 builds as the only way to
+ # represent characters above 0xFFFF. Only paired variant is allowed.
+ '|[\uD800-\uDBFF][\uDC00-\uDFFF]'
+ ) if sys.maxunicode < 0x10FFFF else (
+ ''
+ ))
+)
+NON_PRINTABLE_RE = re.compile(NON_PRINTABLE_STR)
def repl(s):
@@ -15,7 +34,7 @@ def repl(s):
def strtrans(s):
- return NON_PRINTABLE.sub(repl, s.replace('\t', '>---'))
+ return NON_PRINTABLE_RE.sub(repl, s.replace('\t', '>---'))
class Mark:
@@ -55,6 +74,13 @@ class Mark:
+ ' ' * (indent + len(head) + len(snippet[0])) + '^'
)
+ def advance_string(self, diff):
+ ret = self.copy()
+ # FIXME Currently does not work properly with escaped strings.
+ ret.column += diff
+ ret.pointer += diff
+ return ret
+
def __str__(self):
snippet = self.get_snippet()
where = (' in "%s", line %d, column %d' % (
diff --git a/powerline/lint/markedjson/markedvalue.py b/powerline/lint/markedjson/markedvalue.py
index 74a62b64..c17a8e35 100644
--- a/powerline/lint/markedjson/markedvalue.py
+++ b/powerline/lint/markedjson/markedvalue.py
@@ -33,12 +33,7 @@ class MarkedUnicode(unicode):
pointdiff = 1
r = []
for s in part_result:
- mark = self.mark.copy()
- # XXX Does not work properly with escaped strings, but this requires
- # saving much more information in mark.
- mark.column += pointdiff
- mark.pointer += pointdiff
- r.append(MarkedUnicode(s, mark))
+ r.append(MarkedUnicode(s, self.mark.advance_string(pointdiff)))
pointdiff += len(s)
return tuple(r)
diff --git a/powerline/lint/markedjson/reader.py b/powerline/lint/markedjson/reader.py
index bb518b06..0ca45160 100644
--- a/powerline/lint/markedjson/reader.py
+++ b/powerline/lint/markedjson/reader.py
@@ -3,7 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import codecs
-from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE
+from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE_RE
from powerline.lib.unicode import unicode
@@ -84,7 +84,7 @@ class Reader(object):
return Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer)
def check_printable(self, data):
- match = NON_PRINTABLE.search(data)
+ match = NON_PRINTABLE_RE.search(data)
if match:
self.update_pointer(match.start())
raise ReaderError(
@@ -125,7 +125,12 @@ class Reader(object):
self.raw_buffer = None
break
- def update_raw(self, size=4096):
+ def update_raw(self, size=-1):
+ # Was size=4096
+ assert(size < 0)
+ # WARNING: reading the whole stream at once. To change this behaviour to
+ # former reading N characters at once one must make sure that reading
+ # never ends at partial unicode character.
data = self.stream.read(size)
if self.raw_buffer is None:
self.raw_buffer = data
diff --git a/powerline/lint/markedjson/scanner.py b/powerline/lint/markedjson/scanner.py
index 543d7298..b0bddf38 100644
--- a/powerline/lint/markedjson/scanner.py
+++ b/powerline/lint/markedjson/scanner.py
@@ -1,9 +1,14 @@
# vim:fileencoding=utf-8:noet
from __future__ import (unicode_literals, division, absolute_import, print_function)
+from string import hexdigits
+
from powerline.lint.markedjson.error import MarkedError
from powerline.lint.markedjson import tokens
-from powerline.lib.unicode import unicode
+from powerline.lib.unicode import unicode, unichr, surrogate_pair_to_character
+
+
+hexdigits_set = set(hexdigits)
# Scanner produces tokens of the following types:
@@ -415,7 +420,7 @@ class Scanner:
length = self.ESCAPE_CODES[ch]
self.forward()
for k in range(length):
- if self.peek(k) not in '0123456789ABCDEFabcdef':
+ if self.peek(k) not in hexdigits:
raise ScannerError(
'while scanning a double-quoted scalar', start_mark,
'expected escape sequence of %d hexdecimal numbers, but found %r' % (
@@ -423,8 +428,26 @@ class Scanner:
self.get_mark()
)
code = int(self.prefix(length), 16)
- chunks.append(chr(code))
self.forward(length)
+ if 0xD800 <= code <= 0xDC00:
+ # Start of the surrogate pair
+ next_char = self.prefix(6)
+ if (
+ next_char[0] != '\\'
+ or next_char[1] != 'u'
+ or not (set(next_char[2:]) < hexdigits_set)
+ or not (0xDC00 <= int(next_char[2:], 16) <= 0xDFFF)
+ ):
+ raise ScannerError(
+ 'while scanning a double-quoted scalar', start_mark,
+ 'expected escape sequence with the next character in surrogate pair, but found %r' % (
+ next_char
+ ),
+ self.get_mark()
+ )
+ code = surrogate_pair_to_character(code, int(next_char[2:], 16))
+ self.forward(6)
+ chunks.append(unichr(code))
else:
raise ScannerError(
'while scanning a double-quoted scalar', start_mark,
diff --git a/powerline/lint/spec.py b/powerline/lint/spec.py
index 1d095721..6de14fea 100644
--- a/powerline/lint/spec.py
+++ b/powerline/lint/spec.py
@@ -7,10 +7,19 @@ import re
from copy import copy
from powerline.lib.unicode import unicode
-from powerline.lint.markedjson.error import echoerr, DelayedEchoErr
+from powerline.lint.markedjson.error import echoerr, DelayedEchoErr, NON_PRINTABLE_STR
from powerline.lint.selfcheck import havemarks
+NON_PRINTABLE_RE = re.compile(
+ NON_PRINTABLE_STR.translate({
+ ord('\t'): None,
+ ord('\n'): None,
+ 0x0085: None,
+ })
+)
+
+
class Spec(object):
'''Class that describes some JSON value
@@ -342,6 +351,26 @@ class Spec(object):
return False, hadproblem
return True, hadproblem
+ def check_printable(self, value, context_mark, data, context, echoerr, _):
+ '''Check that given unicode string contains only printable characters
+ '''
+ hadproblem = False
+ for match in NON_PRINTABLE_RE.finditer(value):
+ hadproblem = True
+ echoerr(
+ context=self.cmsg.format(key=context.key),
+ context_mark=value.mark,
+ problem='found not printable character U+{0:04x} in a configuration string'.format(
+ ord(match.group(0))),
+ problem_mark=value.mark.advance_string(match.start() + 1)
+ )
+ return True, hadproblem
+
+ def printable(self, *args):
+ self.type(unicode)
+ self.checks.append(('check_printable', args))
+ return self
+
def type(self, *args):
'''Describe value that has one of the types given in arguments
diff --git a/powerline/renderer.py b/powerline/renderer.py
index 1ba4ec63..e69abca1 100644
--- a/powerline/renderer.py
+++ b/powerline/renderer.py
@@ -1,18 +1,93 @@
# vim:fileencoding=utf-8:noet
from __future__ import (unicode_literals, division, absolute_import, print_function)
+import sys
import os
+import re
-from unicodedata import east_asian_width, combining
from itertools import chain
from powerline.theme import Theme
-from powerline.lib.unicode import unichr
+from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4
NBSP = ' '
+np_control_character_translations = dict((
+ # Control characters: ^@ … ^Y
+ (i1, '^' + unichr(i1 + 0x40)) for i1 in range(0x20)
+))
+'''Control character translations
+
+Dictionary that maps characters in range 0x00–0x1F (inclusive) to strings
+``'^@'``, ``'^A'`` and so on.
+
+.. note: maps tab to ``^I`` and newline to ``^J``.
+'''
+
+np_invalid_character_translations = dict((
+ # Invalid unicode characters obtained using 'surrogateescape' error
+ # handler.
+ (i2, '<{0:02x}>'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)
+))
+'''Invalid unicode character translations
+
+When using ``surrogateescape`` encoding error handling method characters in
+range 0x80–0xFF (inclusive) are transformed into unpaired surrogate escape
+unicode codepoints 0xDC80–0xDD00. This dictionary maps such characters to
+``<80>``, ``<81>``, and so on: in Python-3 they cannot be printed or
+converted to UTF-8 because UTF-8 standard does not allow surrogate escape
+characters, not even paired ones. Python-2 contains a bug that allows such
+action, but printing them in any case makes no sense.
+'''
+
+# XXX: not using `r` because it makes no sense.
+np_invalid_character_re = re.compile('(?'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)),
- ))
- '''Non-printable character translations
-
- These are used to transform characters in range 0x00—0x1F into ``^@``,
- ``^A`` and so on and characters in range 0xDC80—0xDCFF into ``<80>``,
- ``<81>`` and so on (latter are invalid characters obtained using
- ``surrogateescape`` error handling method used automatically in a number of
- places in Python3). Unilke with ``.escape()`` method (and
- ``character_translations``) result is passed to ``.strwidth()`` method.
-
- Note: transforms tab into ``^I``.
- '''
-
def __init__(self,
theme_config,
local_themes,
@@ -120,19 +176,21 @@ class Renderer(object):
'F': 2, # Fullwidth
}
- def strwidth(self, string):
- '''Function that returns string width.
+ strwidth = lambda self, s: (
+ (strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)(
+ self.width_data, s)
+ )
+ '''Function that returns string width.
- Is used to calculate the place given string occupies when handling
- ``width`` argument to ``.render()`` method. Must take east asian width
- into account.
+ Is used to calculate the place given string occupies when handling
+ ``width`` argument to ``.render()`` method. Must take east asian width
+ into account.
- :param unicode string:
- String whose width will be calculated.
+ :param unicode string:
+ String whose width will be calculated.
- :return: unsigned integer.
- '''
- return sum((0 if combining(symbol) else self.width_data[east_asian_width(symbol)] for symbol in string))
+ :return: unsigned integer.
+ '''
def get_theme(self, matcher_info):
'''Get Theme object.
@@ -256,6 +314,8 @@ class Renderer(object):
current_width = 0
+ self._prepare_segments(segments, output_width or width)
+
if not width:
# No width specified, so we don’t need to crop or pad anything
if output_width:
@@ -319,6 +379,15 @@ class Renderer(object):
return construct_returned_value(rendered_highlighted, segments, current_width, output_raw, output_width)
+ def _prepare_segments(self, segments, calculate_contents_len):
+ '''Translate non-printable characters and calculate segment width
+ '''
+ for segment in segments:
+ segment['contents'] = translate_np(segment['contents'])
+ if calculate_contents_len:
+ for segment in segments:
+ segment['_contents_len'] = self.strwidth(segment['contents'])
+
def _render_length(self, theme, segments, divider_widths):
'''Update segments lengths and return them
'''
@@ -327,10 +396,7 @@ class Renderer(object):
divider_spaces = theme.get_spaces()
for index, segment in enumerate(segments):
side = segment['side']
- if segment['_contents_len'] is None:
- segment_len = segment['_contents_len'] = self.strwidth(segment['contents'])
- else:
- segment_len = segment['_contents_len']
+ segment_len = segment['_contents_len']
prev_segment = segments[index - 1] if index > 0 else theme.EMPTY_SEGMENT
next_segment = segments[index + 1] if index < segments_len - 1 else theme.EMPTY_SEGMENT
@@ -381,8 +447,6 @@ class Renderer(object):
contents_highlighted = ''
draw_divider = segment['draw_' + divider_type + '_divider']
- contents_raw = contents_raw.translate(self.np_character_translations)
-
# XXX Make sure self.hl() calls are called in the same order
# segments are displayed. This is needed for Vim renderer to work.
if draw_divider:
diff --git a/tests/test_lib.py b/tests/test_lib.py
index 4af20fa1..06b28fa7 100644
--- a/tests/test_lib.py
+++ b/tests/test_lib.py
@@ -3,6 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import threading
import os
+import sys
import re
import shutil
@@ -16,7 +17,9 @@ from powerline.lib.threaded import ThreadedSegment, KwThreadedSegment
from powerline.lib.monotonic import monotonic
from powerline.lib.vcs.git import git_directory
-from tests.lib import Pl
+import powerline.lib.unicode as plu
+
+from tests.lib import Pl, replace_attr
from tests import TestCase, SkipTest
@@ -397,6 +400,104 @@ class TestLib(TestCase):
self.assertEqual(humanize_bytes(1000000000, si_prefix=False), '953.7 MiB')
+width_data = {
+ 'N': 1, # Neutral
+ 'Na': 1, # Narrow
+ 'A': 1, # Ambigious
+ 'H': 1, # Half-width
+ 'W': 2, # Wide
+ 'F': 2, # Fullwidth
+}
+
+
+class TestUnicode(TestCase):
+ def assertStringsIdentical(self, s1, s2):
+ self.assertTrue(type(s1) is type(s2), msg='string types differ')
+ self.assertEqual(s1, s2)
+
+ def test_unicode(self):
+ self.assertTrue(type('abc') is plu.unicode)
+
+ def test_unichr(self):
+ if not sys.maxunicode < 0x10FFFF:
+ self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF))
+ self.assertStringsIdentical('\uFFFF', plu.unichr(0xFFFF))
+ self.assertStringsIdentical('\x20', plu.unichr(0x20))
+
+ def test_u(self):
+ self.assertStringsIdentical('Test', plu.u('Test'))
+ self.assertStringsIdentical('Test', plu.u(b'Test'))
+ self.assertStringsIdentical('«»', plu.u(b'\xC2\xAB\xC2\xBB'))
+ self.assertRaises(UnicodeDecodeError, plu.u, b'\xFF')
+
+ def test_tointiter(self):
+ self.assertEqual([1, 2, 3], list(plu.tointiter(b'\x01\x02\x03')))
+
+ def test_decode_error(self):
+ self.assertStringsIdentical('', b'\xFF'.decode('utf-8', 'powerline_decode_error'))
+ self.assertStringsIdentical('abc', b'abc'.decode('utf-8', 'powerline_decode_error'))
+
+ def test_register_strwidth_error(self):
+ ename = plu.register_strwidth_error(lambda s: 3)
+ self.assertStringsIdentical(b'???', 'A'.encode('latin1', ename))
+ self.assertStringsIdentical(b'abc', 'abc'.encode('latin1', ename))
+
+ def test_out_u(self):
+ self.assertStringsIdentical('abc', plu.out_u('abc'))
+ self.assertStringsIdentical('abc', plu.out_u(b'abc'))
+ self.assertRaises(TypeError, plu.out_u, None)
+
+ def test_safe_unicode(self):
+ raise SkipTest('safe_unicode() function is buggy')
+ self.assertStringsIdentical('abc', plu.safe_unicode('abc'))
+ self.assertStringsIdentical('abc', plu.safe_unicode(b'abc'))
+ self.assertStringsIdentical('«»', plu.safe_unicode(b'\xc2\xab\xc2\xbb'))
+ with replace_attr(plu, 'get_preferred_output_encoding', lambda: 'latin1'):
+ self.assertStringsIdentical('ÿ', plu.safe_unicode(b'\xFF'))
+ self.assertStringsIdentical('None', plu.safe_unicode(None))
+
+ class FailingStr(object):
+ def __str__(self):
+ raise NotImplementedError('Fail!')
+
+ self.assertStringsIdentical('Fail!', plu.safe_unicode(FailingStr()))
+
+ def test_FailedUnicode(self):
+ self.assertTrue(isinstance(plu.FailedUnicode('abc'), plu.unicode))
+ self.assertEqual('abc', plu.FailedUnicode('abc'))
+
+ def test_string(self):
+ raise SkipTest('string() function is buggy')
+ self.assertStringsIdentical(str('abc'), plu.string('abc'))
+ self.assertStringsIdentical(str('abc'), plu.string(b'abc'))
+
+ def test_surrogate_pair_to_character(self):
+ self.assertEqual(0x1F48E, plu.surrogate_pair_to_character(0xD83D, 0xDC8E))
+
+ def test_strwidth_ucs_4(self):
+ self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'abcd'))
+ self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'AB'))
+ if sys.maxunicode < 0x10FFFF:
+ raise SkipTest('Can only test strwidth_ucs_4 in UCS-4 Pythons')
+
+ def east_asian_width(ch):
+ assert (len(ch) == 1)
+ assert ord(ch) == 0x1F48E
+ return 'F'
+
+ with replace_attr(plu, 'east_asian_width', east_asian_width):
+ # Warning: travis unicodedata.east_asian_width for some reason
+ # thinks this character is 5 symbols wide.
+ self.assertEqual(2, plu.strwidth_ucs_4(width_data, '\U0001F48E'))
+
+ def test_strwidth_ucs_2(self):
+ self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'abcd'))
+ self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'AB'))
+ if not sys.maxunicode < 0x10FFFF:
+ raise SkipTest('Can only test strwidth_ucs_2 in UCS-2 Pythons')
+ self.assertEqual(2, plu.strwidth_ucs_2(width_data, '\ud83d\udc8e'))
+
+
class TestVCS(TestCase):
def do_branch_rename_test(self, repo, q):
st = monotonic()
diff --git a/tests/test_shells/postproc.py b/tests/test_shells/postproc.py
index 3ef35972..df8a6dad 100755
--- a/tests/test_shells/postproc.py
+++ b/tests/test_shells/postproc.py
@@ -28,6 +28,7 @@ except IOError:
hostname = socket.gethostname()
user = os.environ['USER']
+REFS_RE = re.compile(r'^\[\d+ refs\]\n')
IPYPY_DEANSI_RE = re.compile(r'\033(?:\[(?:\?\d+[lh]|[^a-zA-Z]+[a-ln-zA-Z])|[=>])')
with codecs.open(fname, 'r', encoding='utf-8') as R:
@@ -42,6 +43,8 @@ with codecs.open(fname, 'r', encoding='utf-8') as R:
line = line.translate({
ord('\r'): None
})
+ if REFS_RE.match(line):
+ continue
line = line.replace(hostname, 'HOSTNAME')
line = line.replace(user, 'USER')
if pid is not None:
diff --git a/tests/test_shells/test.sh b/tests/test_shells/test.sh
index 275fa9c5..ddaafecf 100755
--- a/tests/test_shells/test.sh
+++ b/tests/test_shells/test.sh
@@ -91,8 +91,11 @@ run_test() {
SH="$1"
SESNAME="powerline-shell-test-${SH}-$$"
+ # Note: when running screen with setuid libc unsets LD_LIBRARY_PATH, so it
+ # cannot be added to the `env -i` call above.
run "${TEST_TYPE}" "${TEST_CLIENT}" "${SH}" \
screen -L -c tests/test_shells/screenrc -d -m -S "$SESNAME" \
+ env LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \
"$@"
while ! screen -S "$SESNAME" -X readreg a tests/test_shells/input.$SH ; do
sleep 0.1s