diff --git a/client/powerline.c b/client/powerline.c index f53e3457..d6858770 100644 --- a/client/powerline.c +++ b/client/powerline.c @@ -42,12 +42,12 @@ void do_write(int sd, const char *raw, size_t len) { } } -#ifdef __APPLE__ -# define ADDRESS_TEMPLATE "/tmp/powerline-ipc-%d" -# define A -#else +#ifdef __linux__ # define ADDRESS_TEMPLATE "powerline-ipc-%d" # define A +1 +#else +# define ADDRESS_TEMPLATE "/tmp/powerline-ipc-%d" +# define A #endif #define ADDRESS_SIZE sizeof(ADDRESS_TEMPLATE) + (sizeof(uid_t) * 4) diff --git a/client/powerline.py b/client/powerline.py index 78403887..28492c15 100755 --- a/client/powerline.py +++ b/client/powerline.py @@ -26,9 +26,7 @@ if len(sys.argv) < 2: print('Must provide at least one argument.', file=sys.stderr) raise SystemExit(1) -platform = sys.platform.lower() -use_filesystem = 'darwin' in platform -del platform +use_filesystem = not sys.platform.lower().startswith('linux') if sys.argv[1] == '--socket': address = sys.argv[2] diff --git a/client/powerline.sh b/client/powerline.sh index b112ec21..b8e37956 100755 --- a/client/powerline.sh +++ b/client/powerline.sh @@ -1,6 +1,22 @@ #!/bin/sh -test "${OSTYPE#darwin}" = "${OSTYPE}" && darwin=n || darwin=y +use_filesystem=1 +darwin= +if test -n "$OSTYPE" ; then + # OSTYPE variable is a shell feature. supported by bash and zsh, but not + # dash, busybox or (m)ksh. + if test "${OSTYPE#linux}" '!=' "${OSTYPE}" ; then + use_filesystem= + elif test "${OSTYPE#darwin}" ; then + darwin=1 + fi +elif which uname >/dev/null ; then + if uname -o | grep -iqF linux ; then + use_filesystem= + elif uname -o | grep -iqF darwin ; then + darwin=1 + fi +fi if test "$1" = "--socket" ; then shift @@ -8,13 +24,16 @@ if test "$1" = "--socket" ; then shift else ADDRESS="powerline-ipc-${UID:-`id -u`}" - test "$darwin" = y && ADDRESS="/tmp/$ADDRESS" + test -n "$use_filesystem" && ADDRESS="/tmp/$ADDRESS" fi -if test "$darwin" = y; then +if test -n "$darwin" ; then ENV=genv else ENV=env +fi + +if test -z "$use_filesystem" ; then ADDRESS="abstract-client:$ADDRESS" fi diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 089cd190..c0033c0b 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -7,6 +7,15 @@ Generic requirements * Python 2.6 or later, 3.2 or later, PyPy 2.0 or later. It is the only non-optional requirement. + + .. warning: + It is highly advised to use UCS-4 version of Python because UCS-2 version + uses significantly slower text processing (length determination and + non-printable character replacement) functions due to the need of + supporting unicode characters above U+FFFF which are represented as + surrogate pairs. This price will be paid even if configuration has no such + characters. + * C compiler. Required to build powerline client on linux. If it is not present then powerline will fall back to shell script or python client. * ``socat`` program. Required for shell variant of client which runs a bit @@ -43,7 +52,7 @@ powerline with ``pip``: .. code-block:: sh - pip install -e --user {path_to_powerline} + pip install --user --editable={path_to_powerline} , but note that in this case ``pip`` will not install ``powerline`` executable and you will have to do something like diff --git a/powerline/bindings/config.py b/powerline/bindings/config.py index 9e46748a..ea961d38 100644 --- a/powerline/bindings/config.py +++ b/powerline/bindings/config.py @@ -145,6 +145,8 @@ def init_environment(pl, args): left_dividers = powerline.renderer.theme.dividers['left'] set_tmux_environment('_POWERLINE_LEFT_HARD_DIVIDER', left_dividers['hard']) set_tmux_environment('_POWERLINE_LEFT_SOFT_DIVIDER', left_dividers['soft']) + set_tmux_environment('_POWERLINE_LEFT_HARD_DIVIDER_SPACES', ( + ' ' * powerline.renderer.strwidth(left_dividers['hard']))) def get_main_config(args): diff --git a/powerline/bindings/tmux/powerline-base.conf b/powerline/bindings/tmux/powerline-base.conf index add06afa..38b5de60 100644 --- a/powerline/bindings/tmux/powerline-base.conf +++ b/powerline/bindings/tmux/powerline-base.conf @@ -4,7 +4,7 @@ set -g status-interval 2 set -g status-left-length 20 set -g status-right '#(eval $POWERLINE_COMMAND tmux right -R pane_id=`tmux display -p "#D"`)' set -g status-right-length 150 -set -g window-status-format "#[$_POWERLINE_WINDOW_COLOR] #I #[$_POWERLINE_WINDOW_DIVIDER_COLOR]$_POWERLINE_LEFT_SOFT_DIVIDER#[default]#W " +set -g window-status-format "#[$_POWERLINE_WINDOW_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER_SPACES#I #[$_POWERLINE_WINDOW_DIVIDER_COLOR]$_POWERLINE_LEFT_SOFT_DIVIDER#[default]#W $_POWERLINE_LEFT_HARD_DIVIDER_SPACES" set -g window-status-current-format "#[$_POWERLINE_WINDOW_CURRENT_HARD_DIVIDER_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER#[$_POWERLINE_WINDOW_CURRENT_COLOR]#I $_POWERLINE_LEFT_SOFT_DIVIDER#[$_POWERLINE_WINDOW_NAME_COLOR]#W #[$_POWERLINE_WINDOW_CURRENT_HARD_DIVIDER_NEXT_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER" # Legacy status-left definition to be overwritten for tmux Versions 1.8+ diff --git a/powerline/config_files/colorschemes/tmux/default.json b/powerline/config_files/colorschemes/tmux/default.json index 3745fd5a..e936f2ce 100644 --- a/powerline/config_files/colorschemes/tmux/default.json +++ b/powerline/config_files/colorschemes/tmux/default.json @@ -4,8 +4,8 @@ "window_status": {"fg": "gray70", "bg": "gray0", "attr": []}, "activity_status": {"fg": "yellow", "bg": "gray0", "attr": []}, "bell_status": {"fg": "red", "bg": "gray0", "attr": []}, - "window": {"fg": "gray6", "bg": "gray11", "attr": []}, - "window:divider": {"fg": "gray4", "bg": "gray11", "attr": []}, + "window": {"fg": "gray6", "bg": "gray0", "attr": []}, + "window:divider": {"fg": "gray4", "bg": "gray0", "attr": []}, "window:current": {"fg": "mediumcyan", "bg": "darkblue", "attr": []}, "window_name": {"fg": "white", "bg": "darkblue", "attr": ["bold"]}, "session": {"fg": "black", "bg": "gray90", "attr": ["bold"]}, diff --git a/powerline/lib/debug.py b/powerline/lib/debug.py index fc1ffeea..515e8c40 100755 --- a/powerline/lib/debug.py +++ b/powerline/lib/debug.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # vim:fileencoding=utf-8:noet from __future__ import (unicode_literals, division, absolute_import, print_function) diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index 32ea3afe..152bacd3 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import sys import codecs +from unicodedata import east_asian_width, combining + from powerline.lib.encoding import get_preferred_output_encoding @@ -19,6 +21,17 @@ except ImportError: unichr = chr +if sys.maxunicode < 0x10FFFF: + _unichr = unichr + + def unichr(ch): + if ch <= sys.maxunicode: + return _unichr(ch) + else: + ch -= 0x10000 + return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1 << 10) - 1)) + 0xDC00) + + def u(s): '''Return unicode instance assuming UTF-8 encoded string. ''' @@ -56,6 +69,27 @@ last_swe_idx = 0 def register_strwidth_error(strwidth): + '''Create new encode errors handling method similar to ``replace`` + + Like ``replace`` this method uses question marks in place of the characters + that cannot be represented in the requested encoding. Unlike ``replace`` the + amount of question marks is identical to the amount of display cells + offending character occupies. Thus encoding ``…`` (U+2026, HORIZONTAL + ELLIPSIS) to ``latin1`` will emit one question mark, but encoding ``A`` + (U+FF21, FULLWIDTH LATIN CAPITAL LETTER A) will emit two question marks. + + Since width of some characters depends on the terminal settings and + powerline knows how to respect them a single error handling method cannot be + used. Instead of it the generator function is used which takes ``strwidth`` + function (function that knows how to compute string width respecting all + needed settings) and emits new error handling method name. + + :param function strwidth: + Function that computs string width measured in display cells the string + occupies when displayed. + + :return: New error handling method name. + ''' global last_swe_idx last_swe_idx += 1 @@ -98,7 +132,10 @@ def safe_unicode(s): ''' try: try: - return unicode(s) + if type(s) is bytes: + return unicode(s, 'ascii') + else: + return unicode(s) except UnicodeDecodeError: try: return unicode(s, 'utf-8') @@ -111,8 +148,7 @@ def safe_unicode(s): class FailedUnicode(unicode): - '''Builtin ``unicode`` (``str`` in python 3) subclass indicating fatal - error. + '''Builtin ``unicode`` subclass indicating fatal error If your code for some reason wants to determine whether `.render()` method failed it should check returned string for being a FailedUnicode instance. @@ -123,8 +159,125 @@ class FailedUnicode(unicode): pass -def string(s): - if type(s) is not str: - return s.encode('utf-8') - else: - return s +if sys.version_info < (3,): + def string(s): + if type(s) is not str: + return s.encode('utf-8') + else: + return s +else: + def string(s): + if type(s) is not str: + return s.decode('utf-8') + else: + return s + + +string.__doc__ = ( + '''Transform ``unicode`` or ``bytes`` object into ``str`` object + + On Python-2 this encodes ``unicode`` to ``bytes`` (which is ``str``) using + UTF-8 encoding; on Python-3 this decodes ``bytes`` to ``unicode`` (which is + ``str``) using UTF-8 encoding. + + Useful for functions that expect an ``str`` object in both unicode versions, + not caring about the semantic differences between them in Python-2 and + Python-3. + ''' +) + + +def surrogate_pair_to_character(high, low): + '''Transform a pair of surrogate codepoints to one codepoint + ''' + return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00) + + +_strwidth_documentation = ( + '''Compute string width in display cells + + {0} + + :param dict width_data: + Dictionary which maps east_asian_width property values to strings + lengths. It is expected to contain the following keys and values (from + `East Asian Width annex `_): + + === ====== =========================================================== + Key Value Description + === ====== =========================================================== + F 2 Fullwidth: all characters that are defined as Fullwidth in + the Unicode Standard [Unicode] by having a compatibility + decomposition of type to characters elsewhere in the + Unicode Standard that are implicitly narrow but unmarked. + H 1 Halfwidth: all characters that are explicitly defined as + Halfwidth in the Unicode Standard by having a compatibility + decomposition of type to characters elsewhere in + the Unicode Standard that are implicitly wide but unmarked, + plus U+20A9 ₩ WON SIGN. + W 2 Wide: all other characters that are always wide. These + characters occur only in the context of East Asian + typography where they are wide characters (such as the + Unified Han Ideographs or Squared Katakana Symbols). This + category includes characters that have explicit halfwidth + counterparts. + Na 1 Narrow: characters that are always narrow and have explicit + fullwidth or wide counterparts. These characters are + implicitly narrow in East Asian typography and legacy + character sets because they have explicit fullwidth or wide + counterparts. All of ASCII is an example of East Asian + Narrow characters. + A 1 or 2 Ambigious: characters that may sometimes be wide and + sometimes narrow. Ambiguous characters require additional + information not contained in the character code to further + resolve their width. This information is usually defined in + terminal setting that should in turn respect glyphs widths + in used fonts. Also see :ref:`ambiwidth configuration + option `. + N 1 Neutral characters: character that does not occur in legacy + East Asian character sets. + === ====== =========================================================== + + :param unicode string: + String whose width will be calculated. + + :return: unsigned integer.''') + + +def strwidth_ucs_4(width_data, string): + return sum((( + ( + 0 + ) if combining(symbol) else ( + width_data[east_asian_width(symbol)] + ) + ) for symbol in string)) + + +strwidth_ucs_4.__doc__ = _strwidth_documentation.format( + '''This version of function expects that characters above 0xFFFF are + represented using one symbol. This is only the case in UCS-4 Python builds. + + .. note: + Even in UCS-4 Python builds it is possible to represent characters above + 0xFFFF using surrogate pairs. Characters represented this way are not + supported.''') + + +def strwidth_ucs_2(width_data, string): + return sum((( + ( + width_data[east_asian_width(string[i - 1] + symbol)] + ) if 0xDC00 <= ord(symbol) <= 0xDFFF else ( + 0 + ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else ( + width_data[east_asian_width(symbol)] + ) + ) for i, symbol in enumerate(string))) + + +strwidth_ucs_2.__doc__ = _strwidth_documentation.format( + '''This version of function expects that characters above 0xFFFF are + represented using two symbols forming a surrogate pair, which is the only + option in UCS-2 Python builds. It still works correctly in UCS-4 Python + builds, but is slower then its UCS-4 counterpart.''') diff --git a/powerline/lint/__init__.py b/powerline/lint/__init__.py index 51c25860..d1797d99 100644 --- a/powerline/lint/__init__.py +++ b/powerline/lint/__init__.py @@ -41,7 +41,7 @@ def generate_json_config_loader(lhadproblem): function_name_re = '^(\w+\.)*[a-zA-Z_]\w*$' -divider_spec = Spec().type(unicode).len( +divider_spec = Spec().printable().len( 'le', 3, (lambda value: 'Divider {0!r} is too large!'.format(value))).copy ext_theme_spec = Spec().type(unicode).func(lambda *args: check_config('themes', *args)).copy top_theme_spec = Spec().type(unicode).func(check_top_theme).copy @@ -211,12 +211,12 @@ segment_spec = Spec( display=Spec().type(bool).optional(), module=segment_module_spec(), priority=Spec().type(int, float, type(None)).optional(), - after=Spec().type(unicode).optional(), - before=Spec().type(unicode).optional(), + after=Spec().printable().optional(), + before=Spec().printable().optional(), width=Spec().either(Spec().unsigned(), Spec().cmp('eq', 'auto')).optional(), align=Spec().oneof(set('lr')).optional(), args=args_spec().func(lambda *args, **kwargs: check_args(get_one_segment_function, *args, **kwargs)), - contents=Spec().type(unicode).optional(), + contents=Spec().printable().optional(), highlight_group=Spec().list( highlight_group_spec().re( '^(?:(?!:divider$).)+$', @@ -243,11 +243,11 @@ divside_spec = Spec( soft=divider_spec(), ).copy segment_data_value_spec = Spec( - after=Spec().type(unicode).optional(), - before=Spec().type(unicode).optional(), + after=Spec().printable().optional(), + before=Spec().printable().optional(), display=Spec().type(bool).optional(), args=args_spec().func(lambda *args, **kwargs: check_args(get_all_possible_functions, *args, **kwargs)), - contents=Spec().type(unicode).optional(), + contents=Spec().printable().optional(), ).copy dividers_spec = Spec( left=divside_spec(), diff --git a/powerline/lint/markedjson/error.py b/powerline/lint/markedjson/error.py index ec86d14a..7ee65217 100644 --- a/powerline/lint/markedjson/error.py +++ b/powerline/lint/markedjson/error.py @@ -4,10 +4,33 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import sys import re -from powerline.lib.unicode import unichr - -NON_PRINTABLE = re.compile('[^\t\n\x20-\x7E' + unichr(0x85) + (unichr(0xA0) + '-' + unichr(0xD7FF)) + (unichr(0xE000) + '-' + unichr(0xFFFD)) + ']') +NON_PRINTABLE_STR = ( + '[^' + # ASCII control characters: 0x00-0x19 + + '\t\n' # Tab, newline: allowed ASCII control characters + + '\x20-\x7E' # ASCII printable characters + # Unicode control characters: 0x7F-0x9F + + '\u0085' # Allowed unicode control character: next line character + + '\u00A0-\uD7FF' + # Surrogate escapes: 0xD800-0xDFFF + + '\uE000-\uFFFD' + + (( + '\uD800-\uDFFF' + ) if sys.maxunicode < 0x10FFFF else ( + '\U00010000-\U0010FFFF' + )) + + ']' + + (( + # Paired surrogate escapes: allowed in UCS-2 builds as the only way to + # represent characters above 0xFFFF. Only paired variant is allowed. + '|(?---')) + return NON_PRINTABLE_RE.sub(repl, s.replace('\t', '>---')) class Mark: @@ -55,6 +78,13 @@ class Mark: + ' ' * (indent + len(head) + len(snippet[0])) + '^' ) + def advance_string(self, diff): + ret = self.copy() + # FIXME Currently does not work properly with escaped strings. + ret.column += diff + ret.pointer += diff + return ret + def __str__(self): snippet = self.get_snippet() where = (' in "%s", line %d, column %d' % ( diff --git a/powerline/lint/markedjson/markedvalue.py b/powerline/lint/markedjson/markedvalue.py index 74a62b64..c17a8e35 100644 --- a/powerline/lint/markedjson/markedvalue.py +++ b/powerline/lint/markedjson/markedvalue.py @@ -33,12 +33,7 @@ class MarkedUnicode(unicode): pointdiff = 1 r = [] for s in part_result: - mark = self.mark.copy() - # XXX Does not work properly with escaped strings, but this requires - # saving much more information in mark. - mark.column += pointdiff - mark.pointer += pointdiff - r.append(MarkedUnicode(s, mark)) + r.append(MarkedUnicode(s, self.mark.advance_string(pointdiff))) pointdiff += len(s) return tuple(r) diff --git a/powerline/lint/markedjson/reader.py b/powerline/lint/markedjson/reader.py index bb518b06..0ca45160 100644 --- a/powerline/lint/markedjson/reader.py +++ b/powerline/lint/markedjson/reader.py @@ -3,7 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import codecs -from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE +from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE_RE from powerline.lib.unicode import unicode @@ -84,7 +84,7 @@ class Reader(object): return Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer) def check_printable(self, data): - match = NON_PRINTABLE.search(data) + match = NON_PRINTABLE_RE.search(data) if match: self.update_pointer(match.start()) raise ReaderError( @@ -125,7 +125,12 @@ class Reader(object): self.raw_buffer = None break - def update_raw(self, size=4096): + def update_raw(self, size=-1): + # Was size=4096 + assert(size < 0) + # WARNING: reading the whole stream at once. To change this behaviour to + # former reading N characters at once one must make sure that reading + # never ends at partial unicode character. data = self.stream.read(size) if self.raw_buffer is None: self.raw_buffer = data diff --git a/powerline/lint/markedjson/scanner.py b/powerline/lint/markedjson/scanner.py index 543d7298..b0bddf38 100644 --- a/powerline/lint/markedjson/scanner.py +++ b/powerline/lint/markedjson/scanner.py @@ -1,9 +1,14 @@ # vim:fileencoding=utf-8:noet from __future__ import (unicode_literals, division, absolute_import, print_function) +from string import hexdigits + from powerline.lint.markedjson.error import MarkedError from powerline.lint.markedjson import tokens -from powerline.lib.unicode import unicode +from powerline.lib.unicode import unicode, unichr, surrogate_pair_to_character + + +hexdigits_set = set(hexdigits) # Scanner produces tokens of the following types: @@ -415,7 +420,7 @@ class Scanner: length = self.ESCAPE_CODES[ch] self.forward() for k in range(length): - if self.peek(k) not in '0123456789ABCDEFabcdef': + if self.peek(k) not in hexdigits: raise ScannerError( 'while scanning a double-quoted scalar', start_mark, 'expected escape sequence of %d hexdecimal numbers, but found %r' % ( @@ -423,8 +428,26 @@ class Scanner: self.get_mark() ) code = int(self.prefix(length), 16) - chunks.append(chr(code)) self.forward(length) + if 0xD800 <= code <= 0xDC00: + # Start of the surrogate pair + next_char = self.prefix(6) + if ( + next_char[0] != '\\' + or next_char[1] != 'u' + or not (set(next_char[2:]) < hexdigits_set) + or not (0xDC00 <= int(next_char[2:], 16) <= 0xDFFF) + ): + raise ScannerError( + 'while scanning a double-quoted scalar', start_mark, + 'expected escape sequence with the next character in surrogate pair, but found %r' % ( + next_char + ), + self.get_mark() + ) + code = surrogate_pair_to_character(code, int(next_char[2:], 16)) + self.forward(6) + chunks.append(unichr(code)) else: raise ScannerError( 'while scanning a double-quoted scalar', start_mark, diff --git a/powerline/lint/spec.py b/powerline/lint/spec.py index 1d095721..6de14fea 100644 --- a/powerline/lint/spec.py +++ b/powerline/lint/spec.py @@ -7,10 +7,19 @@ import re from copy import copy from powerline.lib.unicode import unicode -from powerline.lint.markedjson.error import echoerr, DelayedEchoErr +from powerline.lint.markedjson.error import echoerr, DelayedEchoErr, NON_PRINTABLE_STR from powerline.lint.selfcheck import havemarks +NON_PRINTABLE_RE = re.compile( + NON_PRINTABLE_STR.translate({ + ord('\t'): None, + ord('\n'): None, + 0x0085: None, + }) +) + + class Spec(object): '''Class that describes some JSON value @@ -342,6 +351,26 @@ class Spec(object): return False, hadproblem return True, hadproblem + def check_printable(self, value, context_mark, data, context, echoerr, _): + '''Check that given unicode string contains only printable characters + ''' + hadproblem = False + for match in NON_PRINTABLE_RE.finditer(value): + hadproblem = True + echoerr( + context=self.cmsg.format(key=context.key), + context_mark=value.mark, + problem='found not printable character U+{0:04x} in a configuration string'.format( + ord(match.group(0))), + problem_mark=value.mark.advance_string(match.start() + 1) + ) + return True, hadproblem + + def printable(self, *args): + self.type(unicode) + self.checks.append(('check_printable', args)) + return self + def type(self, *args): '''Describe value that has one of the types given in arguments diff --git a/powerline/renderer.py b/powerline/renderer.py index 1ba4ec63..e69abca1 100644 --- a/powerline/renderer.py +++ b/powerline/renderer.py @@ -1,18 +1,93 @@ # vim:fileencoding=utf-8:noet from __future__ import (unicode_literals, division, absolute_import, print_function) +import sys import os +import re -from unicodedata import east_asian_width, combining from itertools import chain from powerline.theme import Theme -from powerline.lib.unicode import unichr +from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4 NBSP = ' ' +np_control_character_translations = dict(( + # Control characters: ^@ … ^Y + (i1, '^' + unichr(i1 + 0x40)) for i1 in range(0x20) +)) +'''Control character translations + +Dictionary that maps characters in range 0x00–0x1F (inclusive) to strings +``'^@'``, ``'^A'`` and so on. + +.. note: maps tab to ``^I`` and newline to ``^J``. +''' + +np_invalid_character_translations = dict(( + # Invalid unicode characters obtained using 'surrogateescape' error + # handler. + (i2, '<{0:02x}>'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00) +)) +'''Invalid unicode character translations + +When using ``surrogateescape`` encoding error handling method characters in +range 0x80–0xFF (inclusive) are transformed into unpaired surrogate escape +unicode codepoints 0xDC80–0xDD00. This dictionary maps such characters to +``<80>``, ``<81>``, and so on: in Python-3 they cannot be printed or +converted to UTF-8 because UTF-8 standard does not allow surrogate escape +characters, not even paired ones. Python-2 contains a bug that allows such +action, but printing them in any case makes no sense. +''' + +# XXX: not using `r` because it makes no sense. +np_invalid_character_re = re.compile('(?'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)), - )) - '''Non-printable character translations - - These are used to transform characters in range 0x00—0x1F into ``^@``, - ``^A`` and so on and characters in range 0xDC80—0xDCFF into ``<80>``, - ``<81>`` and so on (latter are invalid characters obtained using - ``surrogateescape`` error handling method used automatically in a number of - places in Python3). Unilke with ``.escape()`` method (and - ``character_translations``) result is passed to ``.strwidth()`` method. - - Note: transforms tab into ``^I``. - ''' - def __init__(self, theme_config, local_themes, @@ -120,19 +176,21 @@ class Renderer(object): 'F': 2, # Fullwidth } - def strwidth(self, string): - '''Function that returns string width. + strwidth = lambda self, s: ( + (strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)( + self.width_data, s) + ) + '''Function that returns string width. - Is used to calculate the place given string occupies when handling - ``width`` argument to ``.render()`` method. Must take east asian width - into account. + Is used to calculate the place given string occupies when handling + ``width`` argument to ``.render()`` method. Must take east asian width + into account. - :param unicode string: - String whose width will be calculated. + :param unicode string: + String whose width will be calculated. - :return: unsigned integer. - ''' - return sum((0 if combining(symbol) else self.width_data[east_asian_width(symbol)] for symbol in string)) + :return: unsigned integer. + ''' def get_theme(self, matcher_info): '''Get Theme object. @@ -256,6 +314,8 @@ class Renderer(object): current_width = 0 + self._prepare_segments(segments, output_width or width) + if not width: # No width specified, so we don’t need to crop or pad anything if output_width: @@ -319,6 +379,15 @@ class Renderer(object): return construct_returned_value(rendered_highlighted, segments, current_width, output_raw, output_width) + def _prepare_segments(self, segments, calculate_contents_len): + '''Translate non-printable characters and calculate segment width + ''' + for segment in segments: + segment['contents'] = translate_np(segment['contents']) + if calculate_contents_len: + for segment in segments: + segment['_contents_len'] = self.strwidth(segment['contents']) + def _render_length(self, theme, segments, divider_widths): '''Update segments lengths and return them ''' @@ -327,10 +396,7 @@ class Renderer(object): divider_spaces = theme.get_spaces() for index, segment in enumerate(segments): side = segment['side'] - if segment['_contents_len'] is None: - segment_len = segment['_contents_len'] = self.strwidth(segment['contents']) - else: - segment_len = segment['_contents_len'] + segment_len = segment['_contents_len'] prev_segment = segments[index - 1] if index > 0 else theme.EMPTY_SEGMENT next_segment = segments[index + 1] if index < segments_len - 1 else theme.EMPTY_SEGMENT @@ -381,8 +447,6 @@ class Renderer(object): contents_highlighted = '' draw_divider = segment['draw_' + divider_type + '_divider'] - contents_raw = contents_raw.translate(self.np_character_translations) - # XXX Make sure self.hl() calls are called in the same order # segments are displayed. This is needed for Vim renderer to work. if draw_divider: diff --git a/powerline/segments/common/players.py b/powerline/segments/common/players.py index a53d36d9..c6922e03 100644 --- a/powerline/segments/common/players.py +++ b/powerline/segments/common/players.py @@ -237,10 +237,14 @@ else: return if not info: return - album = out_u(info.get('xesam:album')) - title = out_u(info.get('xesam:title')) + album = info.get('xesam:album') + title = info.get('xesam:title') artist = info.get('xesam:artist') state = _convert_state(status) + if album: + album = out_u(album) + if title: + title = out_u(title) if artist: artist = out_u(artist[0]) return { diff --git a/powerline/segments/vim/plugin/capslock.py b/powerline/segments/vim/plugin/capslock.py index 824d55b9..d2c474d5 100644 --- a/powerline/segments/vim/plugin/capslock.py +++ b/powerline/segments/vim/plugin/capslock.py @@ -14,7 +14,7 @@ from powerline.theme import requires_segment_info def capslock_indicator(pl, segment_info, text='CAPS'): '''Shows the indicator if tpope/vim-capslock plugin is enabled - .. _note:: + .. note:: In the current state plugin automatically disables itself when leaving insert mode. So trying to use this segment not in insert or replace modes is useless. diff --git a/powerline/segments/vim/plugin/commandt.py b/powerline/segments/vim/plugin/commandt.py index c51a9146..fcaa6712 100644 --- a/powerline/segments/vim/plugin/commandt.py +++ b/powerline/segments/vim/plugin/commandt.py @@ -55,7 +55,7 @@ def finder(pl): vim.command('ruby $powerline.commandt_set_active_finder') return [{ 'highlight_group': ['commandt:finder'], - 'contents': vim.eval('g:powerline_commandt_reply').replace('CommandT::', '') + 'contents': vim.eval('g:powerline_commandt_reply').replace('CommandT::', '').replace('Finder::', '') }] @@ -64,6 +64,10 @@ FINDERS_WITHOUT_PATH = set(( 'CommandT::BufferFinder', 'CommandT::TagFinder', 'CommandT::JumpFinder', + 'CommandT::Finder::MRUBufferFinder', + 'CommandT::Finder::BufferFinder', + 'CommandT::Finder::TagFinder', + 'CommandT::Finder::JumpFinder', )) diff --git a/scripts/powerline-daemon b/scripts/powerline-daemon index 62cb35f3..7e78b6b0 100755 --- a/scripts/powerline-daemon +++ b/scripts/powerline-daemon @@ -24,8 +24,7 @@ from powerline.commands.daemon import get_argparser as get_daemon_argparser is_daemon = False -platform = sys.platform.lower() -use_filesystem = 'darwin' in platform +use_filesystem = not sys.platform.lower().startswith('linux') address = None pidfile = None diff --git a/scripts/powerline-release.py b/scripts/powerline-release.py index 3a4c821d..0c3eba10 100755 --- a/scripts/powerline-release.py +++ b/scripts/powerline-release.py @@ -7,7 +7,7 @@ import codecs import os import re -from subprocess import check_output, check_call +from subprocess import check_output, check_call, CalledProcessError from getpass import getpass from github import Github @@ -50,7 +50,10 @@ def parse_version(s): def merge(version_string, rev, **kwargs): check_call(['git', 'checkout', 'master']) - check_call(['git', 'merge', '--no-ff', '--no-commit', '--log', rev]) + try: + check_call(['git', 'merge', '--no-ff', '--no-commit', '--log', rev]) + except CalledProcessError: + check_call(['git', 'mergetool', '--tool', 'vimdiff2']) with codecs.open('.setup.py.new', 'w', encoding='utf-8') as NS: with codecs.open('setup.py', 'r', encoding='utf-8') as OS: @@ -148,7 +151,7 @@ def create_ebuilds(version_string, overlay, user, **kwargs): check_call(['git', 'add', '--'] + new_files, cwd=overlay) check_call(['git', 'commit'] + new_files + ['-m', 'powerline*: Release {0}'.format(version_string)], cwd=overlay) - check_call(['git', 'push', 'git@github.com:{0}/{1}'.format(user, OVERLAY_NAME), branch], cwd=overlay) + check_call(['git', 'push', '-f', 'git@github.com:{0}/{1}'.format(user, OVERLAY_NAME), branch], cwd=overlay) def update_overlay(version_string, user, password, **kwargs): diff --git a/setup.py b/setup.py index 87e849bc..f95dcf99 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ def get_version(): setup( name='powerline-status', - version='1.3', + version='1.3.1', description='The ultimate statusline/prompt utility.', long_description=README, classifiers=[ diff --git a/tests/test_lib.py b/tests/test_lib.py index 4af20fa1..1cd91d04 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -3,6 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import threading import os +import sys import re import shutil @@ -16,7 +17,9 @@ from powerline.lib.threaded import ThreadedSegment, KwThreadedSegment from powerline.lib.monotonic import monotonic from powerline.lib.vcs.git import git_directory -from tests.lib import Pl +import powerline.lib.unicode as plu + +from tests.lib import Pl, replace_attr from tests import TestCase, SkipTest @@ -397,6 +400,101 @@ class TestLib(TestCase): self.assertEqual(humanize_bytes(1000000000, si_prefix=False), '953.7 MiB') +width_data = { + 'N': 1, # Neutral + 'Na': 1, # Narrow + 'A': 1, # Ambigious + 'H': 1, # Half-width + 'W': 2, # Wide + 'F': 2, # Fullwidth +} + + +class TestUnicode(TestCase): + def assertStringsIdentical(self, s1, s2): + self.assertTrue(type(s1) is type(s2), msg='string types differ') + self.assertEqual(s1, s2) + + def test_unicode(self): + self.assertTrue(type('abc') is plu.unicode) + + def test_unichr(self): + self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF)) + self.assertStringsIdentical('\uFFFF', plu.unichr(0xFFFF)) + self.assertStringsIdentical('\x20', plu.unichr(0x20)) + + def test_u(self): + self.assertStringsIdentical('Test', plu.u('Test')) + self.assertStringsIdentical('Test', plu.u(b'Test')) + self.assertStringsIdentical('«»', plu.u(b'\xC2\xAB\xC2\xBB')) + self.assertRaises(UnicodeDecodeError, plu.u, b'\xFF') + + def test_tointiter(self): + self.assertEqual([1, 2, 3], list(plu.tointiter(b'\x01\x02\x03'))) + + def test_decode_error(self): + self.assertStringsIdentical('', b'\xFF'.decode('utf-8', 'powerline_decode_error')) + self.assertStringsIdentical('abc', b'abc'.decode('utf-8', 'powerline_decode_error')) + + def test_register_strwidth_error(self): + ename = plu.register_strwidth_error(lambda s: 3) + self.assertStringsIdentical(b'???', 'A'.encode('latin1', ename)) + self.assertStringsIdentical(b'abc', 'abc'.encode('latin1', ename)) + + def test_out_u(self): + self.assertStringsIdentical('abc', plu.out_u('abc')) + self.assertStringsIdentical('abc', plu.out_u(b'abc')) + self.assertRaises(TypeError, plu.out_u, None) + + def test_safe_unicode(self): + self.assertStringsIdentical('abc', plu.safe_unicode('abc')) + self.assertStringsIdentical('abc', plu.safe_unicode(b'abc')) + self.assertStringsIdentical('«»', plu.safe_unicode(b'\xc2\xab\xc2\xbb')) + with replace_attr(plu, 'get_preferred_output_encoding', lambda: 'latin1'): + self.assertStringsIdentical('ÿ', plu.safe_unicode(b'\xFF')) + self.assertStringsIdentical('None', plu.safe_unicode(None)) + + class FailingStr(object): + def __str__(self): + raise NotImplementedError('Fail!') + + self.assertStringsIdentical('Fail!', plu.safe_unicode(FailingStr())) + + def test_FailedUnicode(self): + self.assertTrue(isinstance(plu.FailedUnicode('abc'), plu.unicode)) + self.assertEqual('abc', plu.FailedUnicode('abc')) + + def test_string(self): + self.assertStringsIdentical(str('abc'), plu.string('abc')) + self.assertStringsIdentical(str('abc'), plu.string(b'abc')) + + def test_surrogate_pair_to_character(self): + self.assertEqual(0x1F48E, plu.surrogate_pair_to_character(0xD83D, 0xDC8E)) + + def test_strwidth_ucs_4(self): + self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'abcd')) + self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'AB')) + if sys.maxunicode < 0x10FFFF: + raise SkipTest('Can only test strwidth_ucs_4 in UCS-4 Pythons') + + def east_asian_width(ch): + assert (len(ch) == 1) + assert ord(ch) == 0x1F48E + return 'F' + + with replace_attr(plu, 'east_asian_width', east_asian_width): + # Warning: travis unicodedata.east_asian_width for some reason + # thinks this character is 5 symbols wide. + self.assertEqual(2, plu.strwidth_ucs_4(width_data, '\U0001F48E')) + + def test_strwidth_ucs_2(self): + self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'abcd')) + self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'AB')) + if not sys.maxunicode < 0x10FFFF: + raise SkipTest('Can only test strwidth_ucs_2 in UCS-2 Pythons') + self.assertEqual(2, plu.strwidth_ucs_2(width_data, '\ud83d\udc8e')) + + class TestVCS(TestCase): def do_branch_rename_test(self, repo, q): st = monotonic() diff --git a/tests/test_shells/postproc.py b/tests/test_shells/postproc.py index 3ef35972..df8a6dad 100755 --- a/tests/test_shells/postproc.py +++ b/tests/test_shells/postproc.py @@ -28,6 +28,7 @@ except IOError: hostname = socket.gethostname() user = os.environ['USER'] +REFS_RE = re.compile(r'^\[\d+ refs\]\n') IPYPY_DEANSI_RE = re.compile(r'\033(?:\[(?:\?\d+[lh]|[^a-zA-Z]+[a-ln-zA-Z])|[=>])') with codecs.open(fname, 'r', encoding='utf-8') as R: @@ -42,6 +43,8 @@ with codecs.open(fname, 'r', encoding='utf-8') as R: line = line.translate({ ord('\r'): None }) + if REFS_RE.match(line): + continue line = line.replace(hostname, 'HOSTNAME') line = line.replace(user, 'USER') if pid is not None: diff --git a/tests/test_shells/test.sh b/tests/test_shells/test.sh index 275fa9c5..5ad1bbfe 100755 --- a/tests/test_shells/test.sh +++ b/tests/test_shells/test.sh @@ -91,8 +91,11 @@ run_test() { SH="$1" SESNAME="powerline-shell-test-${SH}-$$" + # Note: when running screen with setuid libc unsets LD_LIBRARY_PATH, so it + # cannot be added to the `env -i` call above. run "${TEST_TYPE}" "${TEST_CLIENT}" "${SH}" \ screen -L -c tests/test_shells/screenrc -d -m -S "$SESNAME" \ + env LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \ "$@" while ! screen -S "$SESNAME" -X readreg a tests/test_shells/input.$SH ; do sleep 0.1s @@ -213,6 +216,7 @@ ln -s "$(which mktemp)" tests/shell/path ln -s "$(which grep)" tests/shell/path ln -s "$(which sed)" tests/shell/path ln -s "$(which rm)" tests/shell/path +ln -s "$(which uname)" tests/shell/path ln -s ../../test_shells/bgscript.sh tests/shell/path ln -s ../../test_shells/waitpid.sh tests/shell/path if which socat ; then