Merge pull request #1217 from ZyX-I/ucs-2-python
Add support for UCS-2 Python builds
This commit is contained in:
commit
432cc5031f
|
@ -7,6 +7,15 @@ Generic requirements
|
||||||
|
|
||||||
* Python 2.6 or later, 3.2 or later, PyPy 2.0 or later. It is the only
|
* Python 2.6 or later, 3.2 or later, PyPy 2.0 or later. It is the only
|
||||||
non-optional requirement.
|
non-optional requirement.
|
||||||
|
|
||||||
|
.. warning:
|
||||||
|
It is highly advised to use UCS-4 version of Python because UCS-2 version
|
||||||
|
uses significantly slower text processing (length determination and
|
||||||
|
non-printable character replacement) functions due to the need of
|
||||||
|
supporting unicode characters above U+FFFF which are represented as
|
||||||
|
surrogate pairs. This price will be paid even if configuration has no such
|
||||||
|
characters.
|
||||||
|
|
||||||
* C compiler. Required to build powerline client on linux. If it is not present
|
* C compiler. Required to build powerline client on linux. If it is not present
|
||||||
then powerline will fall back to shell script or python client.
|
then powerline will fall back to shell script or python client.
|
||||||
* ``socat`` program. Required for shell variant of client which runs a bit
|
* ``socat`` program. Required for shell variant of client which runs a bit
|
||||||
|
|
|
@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
|
||||||
import sys
|
import sys
|
||||||
import codecs
|
import codecs
|
||||||
|
|
||||||
|
from unicodedata import east_asian_width, combining
|
||||||
|
|
||||||
from powerline.lib.encoding import get_preferred_output_encoding
|
from powerline.lib.encoding import get_preferred_output_encoding
|
||||||
|
|
||||||
|
|
||||||
|
@ -128,3 +130,99 @@ def string(s):
|
||||||
return s.encode('utf-8')
|
return s.encode('utf-8')
|
||||||
else:
|
else:
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def surrogate_pair_to_character(high, low):
|
||||||
|
'''Transform a pair of surrogate codepoints to one codepoint
|
||||||
|
'''
|
||||||
|
return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00)
|
||||||
|
|
||||||
|
|
||||||
|
_strwidth_documentation = (
|
||||||
|
'''Compute string width in display cells
|
||||||
|
|
||||||
|
{0}
|
||||||
|
|
||||||
|
:param dict width_data:
|
||||||
|
Dictionary which maps east_asian_width property values to strings
|
||||||
|
lengths. It is expected to contain the following keys and values (from
|
||||||
|
`East Asian Width annex <http://www.unicode.org/reports/tr11/>`_):
|
||||||
|
|
||||||
|
=== ====== ===========================================================
|
||||||
|
Key Value Description
|
||||||
|
=== ====== ===========================================================
|
||||||
|
F 2 Fullwidth: all characters that are defined as Fullwidth in
|
||||||
|
the Unicode Standard [Unicode] by having a compatibility
|
||||||
|
decomposition of type <wide> to characters elsewhere in the
|
||||||
|
Unicode Standard that are implicitly narrow but unmarked.
|
||||||
|
H 1 Halfwidth: all characters that are explicitly defined as
|
||||||
|
Halfwidth in the Unicode Standard by having a compatibility
|
||||||
|
decomposition of type <narrow> to characters elsewhere in
|
||||||
|
the Unicode Standard that are implicitly wide but unmarked,
|
||||||
|
plus U+20A9 ₩ WON SIGN.
|
||||||
|
W 2 Wide: all other characters that are always wide. These
|
||||||
|
characters occur only in the context of East Asian
|
||||||
|
typography where they are wide characters (such as the
|
||||||
|
Unified Han Ideographs or Squared Katakana Symbols). This
|
||||||
|
category includes characters that have explicit halfwidth
|
||||||
|
counterparts.
|
||||||
|
Na 1 Narrow: characters that are always narrow and have explicit
|
||||||
|
fullwidth or wide counterparts. These characters are
|
||||||
|
implicitly narrow in East Asian typography and legacy
|
||||||
|
character sets because they have explicit fullwidth or wide
|
||||||
|
counterparts. All of ASCII is an example of East Asian
|
||||||
|
Narrow characters.
|
||||||
|
A 1 or 2 Ambigious: characters that may sometimes be wide and
|
||||||
|
sometimes narrow. Ambiguous characters require additional
|
||||||
|
information not contained in the character code to further
|
||||||
|
resolve their width. This information is usually defined in
|
||||||
|
terminal setting that should in turn respect glyphs widths
|
||||||
|
in used fonts. Also see :ref:`ambiwidth configuration
|
||||||
|
option <config-common-ambiwidth>`.
|
||||||
|
N 1 Neutral characters: character that does not occur in legacy
|
||||||
|
East Asian character sets.
|
||||||
|
=== ====== ===========================================================
|
||||||
|
|
||||||
|
:param unicode string:
|
||||||
|
String whose width will be calculated.
|
||||||
|
|
||||||
|
:return: unsigned integer.''')
|
||||||
|
|
||||||
|
|
||||||
|
def strwidth_ucs_4(width_data, string):
|
||||||
|
return sum(((
|
||||||
|
(
|
||||||
|
0
|
||||||
|
) if combining(symbol) else (
|
||||||
|
width_data[east_asian_width(symbol)]
|
||||||
|
)
|
||||||
|
) for symbol in string))
|
||||||
|
|
||||||
|
|
||||||
|
strwidth_ucs_4.__doc__ = _strwidth_documentation.format(
|
||||||
|
'''This version of function expects that characters above 0xFFFF are
|
||||||
|
represented using one symbol. This is only the case in UCS-4 Python builds.
|
||||||
|
|
||||||
|
.. note:
|
||||||
|
Even in UCS-4 Python builds it is possible to represent characters above
|
||||||
|
0xFFFF using surrogate pairs. Characters represented this way are not
|
||||||
|
supported.''')
|
||||||
|
|
||||||
|
|
||||||
|
def strwidth_ucs_2(width_data, string):
|
||||||
|
return sum(((
|
||||||
|
(
|
||||||
|
width_data[east_asian_width(string[i - 1] + symbol)]
|
||||||
|
) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
|
||||||
|
0
|
||||||
|
) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
|
||||||
|
width_data[east_asian_width(symbol)]
|
||||||
|
)
|
||||||
|
) for i, symbol in enumerate(string)))
|
||||||
|
|
||||||
|
|
||||||
|
strwidth_ucs_2.__doc__ = _strwidth_documentation.format(
|
||||||
|
'''This version of function expects that characters above 0xFFFF are
|
||||||
|
represented using two symbols forming a surrogate pair, which is the only
|
||||||
|
option in UCS-2 Python builds. It still works correctly in UCS-4 Python
|
||||||
|
builds, but is slower then its UCS-4 counterpart.''')
|
||||||
|
|
|
@ -41,7 +41,7 @@ def generate_json_config_loader(lhadproblem):
|
||||||
function_name_re = '^(\w+\.)*[a-zA-Z_]\w*$'
|
function_name_re = '^(\w+\.)*[a-zA-Z_]\w*$'
|
||||||
|
|
||||||
|
|
||||||
divider_spec = Spec().type(unicode).len(
|
divider_spec = Spec().printable().len(
|
||||||
'le', 3, (lambda value: 'Divider {0!r} is too large!'.format(value))).copy
|
'le', 3, (lambda value: 'Divider {0!r} is too large!'.format(value))).copy
|
||||||
ext_theme_spec = Spec().type(unicode).func(lambda *args: check_config('themes', *args)).copy
|
ext_theme_spec = Spec().type(unicode).func(lambda *args: check_config('themes', *args)).copy
|
||||||
top_theme_spec = Spec().type(unicode).func(check_top_theme).copy
|
top_theme_spec = Spec().type(unicode).func(check_top_theme).copy
|
||||||
|
@ -211,12 +211,12 @@ segment_spec = Spec(
|
||||||
display=Spec().type(bool).optional(),
|
display=Spec().type(bool).optional(),
|
||||||
module=segment_module_spec(),
|
module=segment_module_spec(),
|
||||||
priority=Spec().type(int, float, type(None)).optional(),
|
priority=Spec().type(int, float, type(None)).optional(),
|
||||||
after=Spec().type(unicode).optional(),
|
after=Spec().printable().optional(),
|
||||||
before=Spec().type(unicode).optional(),
|
before=Spec().printable().optional(),
|
||||||
width=Spec().either(Spec().unsigned(), Spec().cmp('eq', 'auto')).optional(),
|
width=Spec().either(Spec().unsigned(), Spec().cmp('eq', 'auto')).optional(),
|
||||||
align=Spec().oneof(set('lr')).optional(),
|
align=Spec().oneof(set('lr')).optional(),
|
||||||
args=args_spec().func(lambda *args, **kwargs: check_args(get_one_segment_function, *args, **kwargs)),
|
args=args_spec().func(lambda *args, **kwargs: check_args(get_one_segment_function, *args, **kwargs)),
|
||||||
contents=Spec().type(unicode).optional(),
|
contents=Spec().printable().optional(),
|
||||||
highlight_group=Spec().list(
|
highlight_group=Spec().list(
|
||||||
highlight_group_spec().re(
|
highlight_group_spec().re(
|
||||||
'^(?:(?!:divider$).)+$',
|
'^(?:(?!:divider$).)+$',
|
||||||
|
@ -243,11 +243,11 @@ divside_spec = Spec(
|
||||||
soft=divider_spec(),
|
soft=divider_spec(),
|
||||||
).copy
|
).copy
|
||||||
segment_data_value_spec = Spec(
|
segment_data_value_spec = Spec(
|
||||||
after=Spec().type(unicode).optional(),
|
after=Spec().printable().optional(),
|
||||||
before=Spec().type(unicode).optional(),
|
before=Spec().printable().optional(),
|
||||||
display=Spec().type(bool).optional(),
|
display=Spec().type(bool).optional(),
|
||||||
args=args_spec().func(lambda *args, **kwargs: check_args(get_all_possible_functions, *args, **kwargs)),
|
args=args_spec().func(lambda *args, **kwargs: check_args(get_all_possible_functions, *args, **kwargs)),
|
||||||
contents=Spec().type(unicode).optional(),
|
contents=Spec().printable().optional(),
|
||||||
).copy
|
).copy
|
||||||
dividers_spec = Spec(
|
dividers_spec = Spec(
|
||||||
left=divside_spec(),
|
left=divside_spec(),
|
||||||
|
|
|
@ -7,7 +7,26 @@ import re
|
||||||
from powerline.lib.unicode import unichr
|
from powerline.lib.unicode import unichr
|
||||||
|
|
||||||
|
|
||||||
NON_PRINTABLE = re.compile('[^\t\n\x20-\x7E' + unichr(0x85) + (unichr(0xA0) + '-' + unichr(0xD7FF)) + (unichr(0xE000) + '-' + unichr(0xFFFD)) + ']')
|
NON_PRINTABLE_STR = (
|
||||||
|
'[^'
|
||||||
|
# ASCII control characters: 0x00-0x19
|
||||||
|
+ '\t\n' # Tab, newline: allowed ASCII control characters
|
||||||
|
+ '\x20-\x7E' # ASCII printable characters
|
||||||
|
# Unicode control characters: 0x7F-0x9F
|
||||||
|
+ '\u0085' # Allowed unicode control character: next line character
|
||||||
|
+ '\u00A0-\uD7FF'
|
||||||
|
# Surrogate escapes: 0xD800-0xDFFF
|
||||||
|
+ '\uE000-\uFFFD'
|
||||||
|
+ ']'
|
||||||
|
+ ((
|
||||||
|
# Paired surrogate escapes: allowed in UCS-2 builds as the only way to
|
||||||
|
# represent characters above 0xFFFF. Only paired variant is allowed.
|
||||||
|
'|[\uD800-\uDBFF][\uDC00-\uDFFF]'
|
||||||
|
) if sys.maxunicode < 0x10FFFF else (
|
||||||
|
''
|
||||||
|
))
|
||||||
|
)
|
||||||
|
NON_PRINTABLE_RE = re.compile(NON_PRINTABLE_STR)
|
||||||
|
|
||||||
|
|
||||||
def repl(s):
|
def repl(s):
|
||||||
|
@ -15,7 +34,7 @@ def repl(s):
|
||||||
|
|
||||||
|
|
||||||
def strtrans(s):
|
def strtrans(s):
|
||||||
return NON_PRINTABLE.sub(repl, s.replace('\t', '>---'))
|
return NON_PRINTABLE_RE.sub(repl, s.replace('\t', '>---'))
|
||||||
|
|
||||||
|
|
||||||
class Mark:
|
class Mark:
|
||||||
|
@ -55,6 +74,13 @@ class Mark:
|
||||||
+ ' ' * (indent + len(head) + len(snippet[0])) + '^'
|
+ ' ' * (indent + len(head) + len(snippet[0])) + '^'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def advance_string(self, diff):
|
||||||
|
ret = self.copy()
|
||||||
|
# FIXME Currently does not work properly with escaped strings.
|
||||||
|
ret.column += diff
|
||||||
|
ret.pointer += diff
|
||||||
|
return ret
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
snippet = self.get_snippet()
|
snippet = self.get_snippet()
|
||||||
where = (' in "%s", line %d, column %d' % (
|
where = (' in "%s", line %d, column %d' % (
|
||||||
|
|
|
@ -33,12 +33,7 @@ class MarkedUnicode(unicode):
|
||||||
pointdiff = 1
|
pointdiff = 1
|
||||||
r = []
|
r = []
|
||||||
for s in part_result:
|
for s in part_result:
|
||||||
mark = self.mark.copy()
|
r.append(MarkedUnicode(s, self.mark.advance_string(pointdiff)))
|
||||||
# XXX Does not work properly with escaped strings, but this requires
|
|
||||||
# saving much more information in mark.
|
|
||||||
mark.column += pointdiff
|
|
||||||
mark.pointer += pointdiff
|
|
||||||
r.append(MarkedUnicode(s, mark))
|
|
||||||
pointdiff += len(s)
|
pointdiff += len(s)
|
||||||
return tuple(r)
|
return tuple(r)
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
|
|
||||||
from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE
|
from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE_RE
|
||||||
from powerline.lib.unicode import unicode
|
from powerline.lib.unicode import unicode
|
||||||
|
|
||||||
|
|
||||||
|
@ -84,7 +84,7 @@ class Reader(object):
|
||||||
return Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer)
|
return Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer)
|
||||||
|
|
||||||
def check_printable(self, data):
|
def check_printable(self, data):
|
||||||
match = NON_PRINTABLE.search(data)
|
match = NON_PRINTABLE_RE.search(data)
|
||||||
if match:
|
if match:
|
||||||
self.update_pointer(match.start())
|
self.update_pointer(match.start())
|
||||||
raise ReaderError(
|
raise ReaderError(
|
||||||
|
@ -125,7 +125,12 @@ class Reader(object):
|
||||||
self.raw_buffer = None
|
self.raw_buffer = None
|
||||||
break
|
break
|
||||||
|
|
||||||
def update_raw(self, size=4096):
|
def update_raw(self, size=-1):
|
||||||
|
# Was size=4096
|
||||||
|
assert(size < 0)
|
||||||
|
# WARNING: reading the whole stream at once. To change this behaviour to
|
||||||
|
# former reading N characters at once one must make sure that reading
|
||||||
|
# never ends at partial unicode character.
|
||||||
data = self.stream.read(size)
|
data = self.stream.read(size)
|
||||||
if self.raw_buffer is None:
|
if self.raw_buffer is None:
|
||||||
self.raw_buffer = data
|
self.raw_buffer = data
|
||||||
|
|
|
@ -1,9 +1,14 @@
|
||||||
# vim:fileencoding=utf-8:noet
|
# vim:fileencoding=utf-8:noet
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
|
from string import hexdigits
|
||||||
|
|
||||||
from powerline.lint.markedjson.error import MarkedError
|
from powerline.lint.markedjson.error import MarkedError
|
||||||
from powerline.lint.markedjson import tokens
|
from powerline.lint.markedjson import tokens
|
||||||
from powerline.lib.unicode import unicode
|
from powerline.lib.unicode import unicode, unichr, surrogate_pair_to_character
|
||||||
|
|
||||||
|
|
||||||
|
hexdigits_set = set(hexdigits)
|
||||||
|
|
||||||
|
|
||||||
# Scanner produces tokens of the following types:
|
# Scanner produces tokens of the following types:
|
||||||
|
@ -415,7 +420,7 @@ class Scanner:
|
||||||
length = self.ESCAPE_CODES[ch]
|
length = self.ESCAPE_CODES[ch]
|
||||||
self.forward()
|
self.forward()
|
||||||
for k in range(length):
|
for k in range(length):
|
||||||
if self.peek(k) not in '0123456789ABCDEFabcdef':
|
if self.peek(k) not in hexdigits:
|
||||||
raise ScannerError(
|
raise ScannerError(
|
||||||
'while scanning a double-quoted scalar', start_mark,
|
'while scanning a double-quoted scalar', start_mark,
|
||||||
'expected escape sequence of %d hexdecimal numbers, but found %r' % (
|
'expected escape sequence of %d hexdecimal numbers, but found %r' % (
|
||||||
|
@ -423,8 +428,26 @@ class Scanner:
|
||||||
self.get_mark()
|
self.get_mark()
|
||||||
)
|
)
|
||||||
code = int(self.prefix(length), 16)
|
code = int(self.prefix(length), 16)
|
||||||
chunks.append(chr(code))
|
|
||||||
self.forward(length)
|
self.forward(length)
|
||||||
|
if 0xD800 <= code <= 0xDC00:
|
||||||
|
# Start of the surrogate pair
|
||||||
|
next_char = self.prefix(6)
|
||||||
|
if (
|
||||||
|
next_char[0] != '\\'
|
||||||
|
or next_char[1] != 'u'
|
||||||
|
or not (set(next_char[2:]) < hexdigits_set)
|
||||||
|
or not (0xDC00 <= int(next_char[2:], 16) <= 0xDFFF)
|
||||||
|
):
|
||||||
|
raise ScannerError(
|
||||||
|
'while scanning a double-quoted scalar', start_mark,
|
||||||
|
'expected escape sequence with the next character in surrogate pair, but found %r' % (
|
||||||
|
next_char
|
||||||
|
),
|
||||||
|
self.get_mark()
|
||||||
|
)
|
||||||
|
code = surrogate_pair_to_character(code, int(next_char[2:], 16))
|
||||||
|
self.forward(6)
|
||||||
|
chunks.append(unichr(code))
|
||||||
else:
|
else:
|
||||||
raise ScannerError(
|
raise ScannerError(
|
||||||
'while scanning a double-quoted scalar', start_mark,
|
'while scanning a double-quoted scalar', start_mark,
|
||||||
|
|
|
@ -7,10 +7,19 @@ import re
|
||||||
from copy import copy
|
from copy import copy
|
||||||
|
|
||||||
from powerline.lib.unicode import unicode
|
from powerline.lib.unicode import unicode
|
||||||
from powerline.lint.markedjson.error import echoerr, DelayedEchoErr
|
from powerline.lint.markedjson.error import echoerr, DelayedEchoErr, NON_PRINTABLE_STR
|
||||||
from powerline.lint.selfcheck import havemarks
|
from powerline.lint.selfcheck import havemarks
|
||||||
|
|
||||||
|
|
||||||
|
NON_PRINTABLE_RE = re.compile(
|
||||||
|
NON_PRINTABLE_STR.translate({
|
||||||
|
ord('\t'): None,
|
||||||
|
ord('\n'): None,
|
||||||
|
0x0085: None,
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Spec(object):
|
class Spec(object):
|
||||||
'''Class that describes some JSON value
|
'''Class that describes some JSON value
|
||||||
|
|
||||||
|
@ -342,6 +351,26 @@ class Spec(object):
|
||||||
return False, hadproblem
|
return False, hadproblem
|
||||||
return True, hadproblem
|
return True, hadproblem
|
||||||
|
|
||||||
|
def check_printable(self, value, context_mark, data, context, echoerr, _):
|
||||||
|
'''Check that given unicode string contains only printable characters
|
||||||
|
'''
|
||||||
|
hadproblem = False
|
||||||
|
for match in NON_PRINTABLE_RE.finditer(value):
|
||||||
|
hadproblem = True
|
||||||
|
echoerr(
|
||||||
|
context=self.cmsg.format(key=context.key),
|
||||||
|
context_mark=value.mark,
|
||||||
|
problem='found not printable character U+{0:04x} in a configuration string'.format(
|
||||||
|
ord(match.group(0))),
|
||||||
|
problem_mark=value.mark.advance_string(match.start() + 1)
|
||||||
|
)
|
||||||
|
return True, hadproblem
|
||||||
|
|
||||||
|
def printable(self, *args):
|
||||||
|
self.type(unicode)
|
||||||
|
self.checks.append(('check_printable', args))
|
||||||
|
return self
|
||||||
|
|
||||||
def type(self, *args):
|
def type(self, *args):
|
||||||
'''Describe value that has one of the types given in arguments
|
'''Describe value that has one of the types given in arguments
|
||||||
|
|
||||||
|
|
|
@ -1,18 +1,93 @@
|
||||||
# vim:fileencoding=utf-8:noet
|
# vim:fileencoding=utf-8:noet
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
from unicodedata import east_asian_width, combining
|
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
from powerline.theme import Theme
|
from powerline.theme import Theme
|
||||||
from powerline.lib.unicode import unichr
|
from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4
|
||||||
|
|
||||||
|
|
||||||
NBSP = ' '
|
NBSP = ' '
|
||||||
|
|
||||||
|
|
||||||
|
np_control_character_translations = dict((
|
||||||
|
# Control characters: ^@ … ^Y
|
||||||
|
(i1, '^' + unichr(i1 + 0x40)) for i1 in range(0x20)
|
||||||
|
))
|
||||||
|
'''Control character translations
|
||||||
|
|
||||||
|
Dictionary that maps characters in range 0x00–0x1F (inclusive) to strings
|
||||||
|
``'^@'``, ``'^A'`` and so on.
|
||||||
|
|
||||||
|
.. note: maps tab to ``^I`` and newline to ``^J``.
|
||||||
|
'''
|
||||||
|
|
||||||
|
np_invalid_character_translations = dict((
|
||||||
|
# Invalid unicode characters obtained using 'surrogateescape' error
|
||||||
|
# handler.
|
||||||
|
(i2, '<{0:02x}>'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)
|
||||||
|
))
|
||||||
|
'''Invalid unicode character translations
|
||||||
|
|
||||||
|
When using ``surrogateescape`` encoding error handling method characters in
|
||||||
|
range 0x80–0xFF (inclusive) are transformed into unpaired surrogate escape
|
||||||
|
unicode codepoints 0xDC80–0xDD00. This dictionary maps such characters to
|
||||||
|
``<80>``, ``<81>``, and so on: in Python-3 they cannot be printed or
|
||||||
|
converted to UTF-8 because UTF-8 standard does not allow surrogate escape
|
||||||
|
characters, not even paired ones. Python-2 contains a bug that allows such
|
||||||
|
action, but printing them in any case makes no sense.
|
||||||
|
'''
|
||||||
|
|
||||||
|
# XXX: not using `r` because it makes no sense.
|
||||||
|
np_invalid_character_re = re.compile('(?<![\uD800-\uDBFF])[\uDC80-\uDD00]')
|
||||||
|
'''Regex that finds unpaired surrogate escape characters
|
||||||
|
|
||||||
|
Search is only limited to the ones obtained from ``surrogateescape`` error
|
||||||
|
handling method. This regex is only used for UCS-2 Python variants because
|
||||||
|
in this case characters above 0xFFFF are represented as surrogate escapes
|
||||||
|
characters and are thus subject to partial transformation if
|
||||||
|
``np_invalid_character_translations`` translation table is used.
|
||||||
|
'''
|
||||||
|
|
||||||
|
np_character_translations = np_control_character_translations.copy()
|
||||||
|
'''Dictionary that contains non-printable character translations
|
||||||
|
|
||||||
|
In UCS-4 versions of Python this is a union of
|
||||||
|
``np_invalid_character_translations`` and ``np_control_character_translations``
|
||||||
|
dictionaries. In UCS-2 for technical reasons ``np_invalid_character_re`` is used
|
||||||
|
instead and this dictionary only contains items from
|
||||||
|
``np_control_character_translations``.
|
||||||
|
'''
|
||||||
|
|
||||||
|
translate_np = (
|
||||||
|
(
|
||||||
|
lambda s: (
|
||||||
|
np_invalid_character_re.subn(
|
||||||
|
lambda match: (
|
||||||
|
np_invalid_character_translations[ord(match.group(0))]
|
||||||
|
), s
|
||||||
|
)[0].translate(np_character_translations)
|
||||||
|
)
|
||||||
|
) if sys.maxunicode < 0x10FFFF else (
|
||||||
|
lambda s: (
|
||||||
|
s.translate(np_character_translations)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
'''Function that translates non-printable characters into printable strings
|
||||||
|
|
||||||
|
Is used to translate control characters and surrogate escape characters
|
||||||
|
obtained from ``surrogateescape`` encoding errors handling method into some
|
||||||
|
printable sequences. See documentation for
|
||||||
|
``np_invalid_character_translations`` and
|
||||||
|
``np_control_character_translations`` for more details.
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
def construct_returned_value(rendered_highlighted, segments, width, output_raw, output_width):
|
def construct_returned_value(rendered_highlighted, segments, width, output_raw, output_width):
|
||||||
if not (output_raw or output_width):
|
if not (output_raw or output_width):
|
||||||
return rendered_highlighted
|
return rendered_highlighted
|
||||||
|
@ -75,25 +150,6 @@ class Renderer(object):
|
||||||
See documentation of ``unicode.translate`` for details.
|
See documentation of ``unicode.translate`` for details.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
np_character_translations = dict(chain(
|
|
||||||
# Control characters: ^@ … ^Y
|
|
||||||
((i1, '^' + unichr(i1 + 0x40)) for i1 in range(0x20)),
|
|
||||||
# Invalid unicode characters obtained using 'surrogateescape' error
|
|
||||||
# handler.
|
|
||||||
((i2, '<{0:02x}>'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)),
|
|
||||||
))
|
|
||||||
'''Non-printable character translations
|
|
||||||
|
|
||||||
These are used to transform characters in range 0x00—0x1F into ``^@``,
|
|
||||||
``^A`` and so on and characters in range 0xDC80—0xDCFF into ``<80>``,
|
|
||||||
``<81>`` and so on (latter are invalid characters obtained using
|
|
||||||
``surrogateescape`` error handling method used automatically in a number of
|
|
||||||
places in Python3). Unilke with ``.escape()`` method (and
|
|
||||||
``character_translations``) result is passed to ``.strwidth()`` method.
|
|
||||||
|
|
||||||
Note: transforms tab into ``^I``.
|
|
||||||
'''
|
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
theme_config,
|
theme_config,
|
||||||
local_themes,
|
local_themes,
|
||||||
|
@ -120,7 +176,10 @@ class Renderer(object):
|
||||||
'F': 2, # Fullwidth
|
'F': 2, # Fullwidth
|
||||||
}
|
}
|
||||||
|
|
||||||
def strwidth(self, string):
|
strwidth = lambda self, s: (
|
||||||
|
(strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)(
|
||||||
|
self.width_data, s)
|
||||||
|
)
|
||||||
'''Function that returns string width.
|
'''Function that returns string width.
|
||||||
|
|
||||||
Is used to calculate the place given string occupies when handling
|
Is used to calculate the place given string occupies when handling
|
||||||
|
@ -132,7 +191,6 @@ class Renderer(object):
|
||||||
|
|
||||||
:return: unsigned integer.
|
:return: unsigned integer.
|
||||||
'''
|
'''
|
||||||
return sum((0 if combining(symbol) else self.width_data[east_asian_width(symbol)] for symbol in string))
|
|
||||||
|
|
||||||
def get_theme(self, matcher_info):
|
def get_theme(self, matcher_info):
|
||||||
'''Get Theme object.
|
'''Get Theme object.
|
||||||
|
@ -256,6 +314,8 @@ class Renderer(object):
|
||||||
|
|
||||||
current_width = 0
|
current_width = 0
|
||||||
|
|
||||||
|
self._prepare_segments(segments, output_width or width)
|
||||||
|
|
||||||
if not width:
|
if not width:
|
||||||
# No width specified, so we don’t need to crop or pad anything
|
# No width specified, so we don’t need to crop or pad anything
|
||||||
if output_width:
|
if output_width:
|
||||||
|
@ -319,6 +379,15 @@ class Renderer(object):
|
||||||
|
|
||||||
return construct_returned_value(rendered_highlighted, segments, current_width, output_raw, output_width)
|
return construct_returned_value(rendered_highlighted, segments, current_width, output_raw, output_width)
|
||||||
|
|
||||||
|
def _prepare_segments(self, segments, calculate_contents_len):
|
||||||
|
'''Translate non-printable characters and calculate segment width
|
||||||
|
'''
|
||||||
|
for segment in segments:
|
||||||
|
segment['contents'] = translate_np(segment['contents'])
|
||||||
|
if calculate_contents_len:
|
||||||
|
for segment in segments:
|
||||||
|
segment['_contents_len'] = self.strwidth(segment['contents'])
|
||||||
|
|
||||||
def _render_length(self, theme, segments, divider_widths):
|
def _render_length(self, theme, segments, divider_widths):
|
||||||
'''Update segments lengths and return them
|
'''Update segments lengths and return them
|
||||||
'''
|
'''
|
||||||
|
@ -327,9 +396,6 @@ class Renderer(object):
|
||||||
divider_spaces = theme.get_spaces()
|
divider_spaces = theme.get_spaces()
|
||||||
for index, segment in enumerate(segments):
|
for index, segment in enumerate(segments):
|
||||||
side = segment['side']
|
side = segment['side']
|
||||||
if segment['_contents_len'] is None:
|
|
||||||
segment_len = segment['_contents_len'] = self.strwidth(segment['contents'])
|
|
||||||
else:
|
|
||||||
segment_len = segment['_contents_len']
|
segment_len = segment['_contents_len']
|
||||||
|
|
||||||
prev_segment = segments[index - 1] if index > 0 else theme.EMPTY_SEGMENT
|
prev_segment = segments[index - 1] if index > 0 else theme.EMPTY_SEGMENT
|
||||||
|
@ -381,8 +447,6 @@ class Renderer(object):
|
||||||
contents_highlighted = ''
|
contents_highlighted = ''
|
||||||
draw_divider = segment['draw_' + divider_type + '_divider']
|
draw_divider = segment['draw_' + divider_type + '_divider']
|
||||||
|
|
||||||
contents_raw = contents_raw.translate(self.np_character_translations)
|
|
||||||
|
|
||||||
# XXX Make sure self.hl() calls are called in the same order
|
# XXX Make sure self.hl() calls are called in the same order
|
||||||
# segments are displayed. This is needed for Vim renderer to work.
|
# segments are displayed. This is needed for Vim renderer to work.
|
||||||
if draw_divider:
|
if draw_divider:
|
||||||
|
|
|
@ -3,6 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
|
||||||
|
|
||||||
import threading
|
import threading
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
@ -16,7 +17,9 @@ from powerline.lib.threaded import ThreadedSegment, KwThreadedSegment
|
||||||
from powerline.lib.monotonic import monotonic
|
from powerline.lib.monotonic import monotonic
|
||||||
from powerline.lib.vcs.git import git_directory
|
from powerline.lib.vcs.git import git_directory
|
||||||
|
|
||||||
from tests.lib import Pl
|
import powerline.lib.unicode as plu
|
||||||
|
|
||||||
|
from tests.lib import Pl, replace_attr
|
||||||
from tests import TestCase, SkipTest
|
from tests import TestCase, SkipTest
|
||||||
|
|
||||||
|
|
||||||
|
@ -397,6 +400,104 @@ class TestLib(TestCase):
|
||||||
self.assertEqual(humanize_bytes(1000000000, si_prefix=False), '953.7 MiB')
|
self.assertEqual(humanize_bytes(1000000000, si_prefix=False), '953.7 MiB')
|
||||||
|
|
||||||
|
|
||||||
|
width_data = {
|
||||||
|
'N': 1, # Neutral
|
||||||
|
'Na': 1, # Narrow
|
||||||
|
'A': 1, # Ambigious
|
||||||
|
'H': 1, # Half-width
|
||||||
|
'W': 2, # Wide
|
||||||
|
'F': 2, # Fullwidth
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TestUnicode(TestCase):
|
||||||
|
def assertStringsIdentical(self, s1, s2):
|
||||||
|
self.assertTrue(type(s1) is type(s2), msg='string types differ')
|
||||||
|
self.assertEqual(s1, s2)
|
||||||
|
|
||||||
|
def test_unicode(self):
|
||||||
|
self.assertTrue(type('abc') is plu.unicode)
|
||||||
|
|
||||||
|
def test_unichr(self):
|
||||||
|
if not sys.maxunicode < 0x10FFFF:
|
||||||
|
self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF))
|
||||||
|
self.assertStringsIdentical('\uFFFF', plu.unichr(0xFFFF))
|
||||||
|
self.assertStringsIdentical('\x20', plu.unichr(0x20))
|
||||||
|
|
||||||
|
def test_u(self):
|
||||||
|
self.assertStringsIdentical('Test', plu.u('Test'))
|
||||||
|
self.assertStringsIdentical('Test', plu.u(b'Test'))
|
||||||
|
self.assertStringsIdentical('«»', plu.u(b'\xC2\xAB\xC2\xBB'))
|
||||||
|
self.assertRaises(UnicodeDecodeError, plu.u, b'\xFF')
|
||||||
|
|
||||||
|
def test_tointiter(self):
|
||||||
|
self.assertEqual([1, 2, 3], list(plu.tointiter(b'\x01\x02\x03')))
|
||||||
|
|
||||||
|
def test_decode_error(self):
|
||||||
|
self.assertStringsIdentical('<FF>', b'\xFF'.decode('utf-8', 'powerline_decode_error'))
|
||||||
|
self.assertStringsIdentical('abc', b'abc'.decode('utf-8', 'powerline_decode_error'))
|
||||||
|
|
||||||
|
def test_register_strwidth_error(self):
|
||||||
|
ename = plu.register_strwidth_error(lambda s: 3)
|
||||||
|
self.assertStringsIdentical(b'???', 'A'.encode('latin1', ename))
|
||||||
|
self.assertStringsIdentical(b'abc', 'abc'.encode('latin1', ename))
|
||||||
|
|
||||||
|
def test_out_u(self):
|
||||||
|
self.assertStringsIdentical('abc', plu.out_u('abc'))
|
||||||
|
self.assertStringsIdentical('abc', plu.out_u(b'abc'))
|
||||||
|
self.assertRaises(TypeError, plu.out_u, None)
|
||||||
|
|
||||||
|
def test_safe_unicode(self):
|
||||||
|
raise SkipTest('safe_unicode() function is buggy')
|
||||||
|
self.assertStringsIdentical('abc', plu.safe_unicode('abc'))
|
||||||
|
self.assertStringsIdentical('abc', plu.safe_unicode(b'abc'))
|
||||||
|
self.assertStringsIdentical('«»', plu.safe_unicode(b'\xc2\xab\xc2\xbb'))
|
||||||
|
with replace_attr(plu, 'get_preferred_output_encoding', lambda: 'latin1'):
|
||||||
|
self.assertStringsIdentical('ÿ', plu.safe_unicode(b'\xFF'))
|
||||||
|
self.assertStringsIdentical('None', plu.safe_unicode(None))
|
||||||
|
|
||||||
|
class FailingStr(object):
|
||||||
|
def __str__(self):
|
||||||
|
raise NotImplementedError('Fail!')
|
||||||
|
|
||||||
|
self.assertStringsIdentical('Fail!', plu.safe_unicode(FailingStr()))
|
||||||
|
|
||||||
|
def test_FailedUnicode(self):
|
||||||
|
self.assertTrue(isinstance(plu.FailedUnicode('abc'), plu.unicode))
|
||||||
|
self.assertEqual('abc', plu.FailedUnicode('abc'))
|
||||||
|
|
||||||
|
def test_string(self):
|
||||||
|
raise SkipTest('string() function is buggy')
|
||||||
|
self.assertStringsIdentical(str('abc'), plu.string('abc'))
|
||||||
|
self.assertStringsIdentical(str('abc'), plu.string(b'abc'))
|
||||||
|
|
||||||
|
def test_surrogate_pair_to_character(self):
|
||||||
|
self.assertEqual(0x1F48E, plu.surrogate_pair_to_character(0xD83D, 0xDC8E))
|
||||||
|
|
||||||
|
def test_strwidth_ucs_4(self):
|
||||||
|
self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'abcd'))
|
||||||
|
self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'AB'))
|
||||||
|
if sys.maxunicode < 0x10FFFF:
|
||||||
|
raise SkipTest('Can only test strwidth_ucs_4 in UCS-4 Pythons')
|
||||||
|
|
||||||
|
def east_asian_width(ch):
|
||||||
|
assert (len(ch) == 1)
|
||||||
|
assert ord(ch) == 0x1F48E
|
||||||
|
return 'F'
|
||||||
|
|
||||||
|
with replace_attr(plu, 'east_asian_width', east_asian_width):
|
||||||
|
# Warning: travis unicodedata.east_asian_width for some reason
|
||||||
|
# thinks this character is 5 symbols wide.
|
||||||
|
self.assertEqual(2, plu.strwidth_ucs_4(width_data, '\U0001F48E'))
|
||||||
|
|
||||||
|
def test_strwidth_ucs_2(self):
|
||||||
|
self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'abcd'))
|
||||||
|
self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'AB'))
|
||||||
|
if not sys.maxunicode < 0x10FFFF:
|
||||||
|
raise SkipTest('Can only test strwidth_ucs_2 in UCS-2 Pythons')
|
||||||
|
self.assertEqual(2, plu.strwidth_ucs_2(width_data, '\ud83d\udc8e'))
|
||||||
|
|
||||||
|
|
||||||
class TestVCS(TestCase):
|
class TestVCS(TestCase):
|
||||||
def do_branch_rename_test(self, repo, q):
|
def do_branch_rename_test(self, repo, q):
|
||||||
st = monotonic()
|
st = monotonic()
|
||||||
|
|
|
@ -28,6 +28,7 @@ except IOError:
|
||||||
hostname = socket.gethostname()
|
hostname = socket.gethostname()
|
||||||
user = os.environ['USER']
|
user = os.environ['USER']
|
||||||
|
|
||||||
|
REFS_RE = re.compile(r'^\[\d+ refs\]\n')
|
||||||
IPYPY_DEANSI_RE = re.compile(r'\033(?:\[(?:\?\d+[lh]|[^a-zA-Z]+[a-ln-zA-Z])|[=>])')
|
IPYPY_DEANSI_RE = re.compile(r'\033(?:\[(?:\?\d+[lh]|[^a-zA-Z]+[a-ln-zA-Z])|[=>])')
|
||||||
|
|
||||||
with codecs.open(fname, 'r', encoding='utf-8') as R:
|
with codecs.open(fname, 'r', encoding='utf-8') as R:
|
||||||
|
@ -42,6 +43,8 @@ with codecs.open(fname, 'r', encoding='utf-8') as R:
|
||||||
line = line.translate({
|
line = line.translate({
|
||||||
ord('\r'): None
|
ord('\r'): None
|
||||||
})
|
})
|
||||||
|
if REFS_RE.match(line):
|
||||||
|
continue
|
||||||
line = line.replace(hostname, 'HOSTNAME')
|
line = line.replace(hostname, 'HOSTNAME')
|
||||||
line = line.replace(user, 'USER')
|
line = line.replace(user, 'USER')
|
||||||
if pid is not None:
|
if pid is not None:
|
||||||
|
|
|
@ -91,8 +91,11 @@ run_test() {
|
||||||
SH="$1"
|
SH="$1"
|
||||||
SESNAME="powerline-shell-test-${SH}-$$"
|
SESNAME="powerline-shell-test-${SH}-$$"
|
||||||
|
|
||||||
|
# Note: when running screen with setuid libc unsets LD_LIBRARY_PATH, so it
|
||||||
|
# cannot be added to the `env -i` call above.
|
||||||
run "${TEST_TYPE}" "${TEST_CLIENT}" "${SH}" \
|
run "${TEST_TYPE}" "${TEST_CLIENT}" "${SH}" \
|
||||||
screen -L -c tests/test_shells/screenrc -d -m -S "$SESNAME" \
|
screen -L -c tests/test_shells/screenrc -d -m -S "$SESNAME" \
|
||||||
|
env LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \
|
||||||
"$@"
|
"$@"
|
||||||
while ! screen -S "$SESNAME" -X readreg a tests/test_shells/input.$SH ; do
|
while ! screen -S "$SESNAME" -X readreg a tests/test_shells/input.$SH ; do
|
||||||
sleep 0.1s
|
sleep 0.1s
|
||||||
|
|
Loading…
Reference in New Issue