parent
61f0542946
commit
209d6be91e
|
@ -7,7 +7,25 @@ import re
|
|||
from powerline.lib.unicode import unichr
|
||||
|
||||
|
||||
NON_PRINTABLE = re.compile('[^\t\n\x20-\x7E' + unichr(0x85) + (unichr(0xA0) + '-' + unichr(0xD7FF)) + (unichr(0xE000) + '-' + unichr(0xFFFD)) + ']')
|
||||
NON_PRINTABLE = re.compile(
|
||||
'[^'
|
||||
# ASCII control characters: 0x00-0x19
|
||||
+ '\t\n' # Tab, newline: allowed ASCII control characters
|
||||
+ '\x20-\x7E' # ASCII printable characters
|
||||
# Unicode control characters: 0x7F-0x9F
|
||||
+ '\u0085' # Allowed unicode control character: next line character
|
||||
+ '\u00A0-\uD7FF'
|
||||
# Surrogate escapes: 0xD800-0xDFFF
|
||||
+ '\uE000-\uFFFD'
|
||||
+ ']'
|
||||
+ ((
|
||||
# Paired surrogate escapes: allowed in UCS-2 builds as the only way to
|
||||
# represent characters above 0xFFFF. Only paired variant is allowed.
|
||||
'|[\uD800-\uDBFF][\uDC00-\uDFFF]'
|
||||
) if sys.maxunicode < 0x10FFFF else (
|
||||
''
|
||||
))
|
||||
)
|
||||
|
||||
|
||||
def repl(s):
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
# vim:fileencoding=utf-8:noet
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
from unicodedata import east_asian_width, combining
|
||||
from itertools import chain
|
||||
|
@ -13,6 +15,80 @@ from powerline.lib.unicode import unichr
|
|||
NBSP = ' '
|
||||
|
||||
|
||||
np_control_character_translations = dict((
|
||||
# Control characters: ^@ … ^Y
|
||||
(i1, '^' + unichr(i1 + 0x40)) for i1 in range(0x20)
|
||||
))
|
||||
'''Control character translations
|
||||
|
||||
Dictionary that maps characters in range 0x00–0x1F (inclusive) to strings
|
||||
``'^@'``, ``'^A'`` and so on.
|
||||
|
||||
.. note: maps tab to ``^I`` and newline to ``^J``.
|
||||
'''
|
||||
|
||||
np_invalid_character_translations = dict((
|
||||
# Invalid unicode characters obtained using 'surrogateescape' error
|
||||
# handler.
|
||||
(i2, '<{0:02x}>'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)
|
||||
))
|
||||
'''Invalid unicode character translations
|
||||
|
||||
When using ``surrogateescape`` encoding error handling method characters in
|
||||
range 0x80–0xFF (inclusive) are transformed into unpaired surrogate escape
|
||||
unicode codepoints 0xDC80–0xDD00. This dictionary maps such characters to
|
||||
``<80>``, ``<81>``, and so on: in Python-3 they cannot be printed or
|
||||
converted to UTF-8 because UTF-8 standard does not allow surrogate escape
|
||||
characters, not even paired ones. Python-2 contains a bug that allows such
|
||||
action, but printing them in any case makes no sense.
|
||||
'''
|
||||
|
||||
# XXX: not using `r` because it makes no sense.
|
||||
np_invalid_character_re = re.compile('(?<![\uD800-\uDBFF])[\uDC80-\uDD00]')
|
||||
'''Regex that finds unpaired surrogate escape characters
|
||||
|
||||
Search is only limited to the ones obtained from ``surrogateescape`` error
|
||||
handling method. This regex is only used for UCS-2 Python variants because
|
||||
in this case characters above 0xFFFF are represented as surrogate escapes
|
||||
characters and are thus subject to partial transformation if
|
||||
``np_invalid_character_translations`` translation table is used.
|
||||
'''
|
||||
|
||||
np_character_translations = np_control_character_translations.copy()
|
||||
'''Dictionary that contains non-printable character translations
|
||||
|
||||
In UCS-4 versions of Python this is a union of
|
||||
``np_invalid_character_translations`` and ``np_control_character_translations``
|
||||
dictionaries. In UCS-2 for technical reasons ``np_invalid_character_re`` is used
|
||||
instead and this dictionary only contains items from
|
||||
``np_control_character_translations``.
|
||||
'''
|
||||
|
||||
translate_np = (
|
||||
(
|
||||
lambda s: (
|
||||
np_invalid_character_re.subn(
|
||||
lambda match: (
|
||||
np_invalid_character_translations[ord(match.group(0))]
|
||||
), s
|
||||
)[0].translate(np_character_translations)
|
||||
)
|
||||
) if sys.maxunicode < 0x10FFFF else (
|
||||
lambda s: (
|
||||
s.translate(np_character_translations)
|
||||
)
|
||||
)
|
||||
)
|
||||
'''Function that translates non-printable characters into printable strings
|
||||
|
||||
Is used to translate control characters and surrogate escape characters
|
||||
obtained from ``surrogateescape`` encoding errors handling method into some
|
||||
printable sequences. See documentation for
|
||||
``np_invalid_character_translations`` and
|
||||
``np_control_character_translations`` for more details.
|
||||
'''
|
||||
|
||||
|
||||
def construct_returned_value(rendered_highlighted, segments, width, output_raw, output_width):
|
||||
if not (output_raw or output_width):
|
||||
return rendered_highlighted
|
||||
|
@ -75,25 +151,6 @@ class Renderer(object):
|
|||
See documentation of ``unicode.translate`` for details.
|
||||
'''
|
||||
|
||||
np_character_translations = dict(chain(
|
||||
# Control characters: ^@ … ^Y
|
||||
((i1, '^' + unichr(i1 + 0x40)) for i1 in range(0x20)),
|
||||
# Invalid unicode characters obtained using 'surrogateescape' error
|
||||
# handler.
|
||||
((i2, '<{0:02x}>'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)),
|
||||
))
|
||||
'''Non-printable character translations
|
||||
|
||||
These are used to transform characters in range 0x00—0x1F into ``^@``,
|
||||
``^A`` and so on and characters in range 0xDC80—0xDCFF into ``<80>``,
|
||||
``<81>`` and so on (latter are invalid characters obtained using
|
||||
``surrogateescape`` error handling method used automatically in a number of
|
||||
places in Python3). Unilke with ``.escape()`` method (and
|
||||
``character_translations``) result is passed to ``.strwidth()`` method.
|
||||
|
||||
Note: transforms tab into ``^I``.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
theme_config,
|
||||
local_themes,
|
||||
|
@ -381,7 +438,7 @@ class Renderer(object):
|
|||
contents_highlighted = ''
|
||||
draw_divider = segment['draw_' + divider_type + '_divider']
|
||||
|
||||
contents_raw = contents_raw.translate(self.np_character_translations)
|
||||
contents_raw = translate_np(contents_raw)
|
||||
|
||||
# XXX Make sure self.hl() calls are called in the same order
|
||||
# segments are displayed. This is needed for Vim renderer to work.
|
||||
|
|
Loading…
Reference in New Issue