diff --git a/client/powerline.c b/client/powerline.c
index f53e3457..d6858770 100644
--- a/client/powerline.c
+++ b/client/powerline.c
@@ -42,12 +42,12 @@ void do_write(int sd, const char *raw, size_t len) {
}
}
-#ifdef __APPLE__
-# define ADDRESS_TEMPLATE "/tmp/powerline-ipc-%d"
-# define A
-#else
+#ifdef __linux__
# define ADDRESS_TEMPLATE "powerline-ipc-%d"
# define A +1
+#else
+# define ADDRESS_TEMPLATE "/tmp/powerline-ipc-%d"
+# define A
#endif
#define ADDRESS_SIZE sizeof(ADDRESS_TEMPLATE) + (sizeof(uid_t) * 4)
diff --git a/client/powerline.py b/client/powerline.py
index 78403887..28492c15 100755
--- a/client/powerline.py
+++ b/client/powerline.py
@@ -26,9 +26,7 @@ if len(sys.argv) < 2:
print('Must provide at least one argument.', file=sys.stderr)
raise SystemExit(1)
-platform = sys.platform.lower()
-use_filesystem = 'darwin' in platform
-del platform
+use_filesystem = not sys.platform.lower().startswith('linux')
if sys.argv[1] == '--socket':
address = sys.argv[2]
diff --git a/client/powerline.sh b/client/powerline.sh
index b112ec21..b8e37956 100755
--- a/client/powerline.sh
+++ b/client/powerline.sh
@@ -1,6 +1,22 @@
#!/bin/sh
-test "${OSTYPE#darwin}" = "${OSTYPE}" && darwin=n || darwin=y
+use_filesystem=1
+darwin=
+if test -n "$OSTYPE" ; then
+ # OSTYPE variable is a shell feature. supported by bash and zsh, but not
+ # dash, busybox or (m)ksh.
+ if test "${OSTYPE#linux}" '!=' "${OSTYPE}" ; then
+ use_filesystem=
+ elif test "${OSTYPE#darwin}" ; then
+ darwin=1
+ fi
+elif which uname >/dev/null ; then
+ if uname -o | grep -iqF linux ; then
+ use_filesystem=
+ elif uname -o | grep -iqF darwin ; then
+ darwin=1
+ fi
+fi
if test "$1" = "--socket" ; then
shift
@@ -8,13 +24,16 @@ if test "$1" = "--socket" ; then
shift
else
ADDRESS="powerline-ipc-${UID:-`id -u`}"
- test "$darwin" = y && ADDRESS="/tmp/$ADDRESS"
+ test -n "$use_filesystem" && ADDRESS="/tmp/$ADDRESS"
fi
-if test "$darwin" = y; then
+if test -n "$darwin" ; then
ENV=genv
else
ENV=env
+fi
+
+if test -z "$use_filesystem" ; then
ADDRESS="abstract-client:$ADDRESS"
fi
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 089cd190..c0033c0b 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -7,6 +7,15 @@ Generic requirements
* Python 2.6 or later, 3.2 or later, PyPy 2.0 or later. It is the only
non-optional requirement.
+
+ .. warning:
+ It is highly advised to use UCS-4 version of Python because UCS-2 version
+ uses significantly slower text processing (length determination and
+ non-printable character replacement) functions due to the need of
+ supporting unicode characters above U+FFFF which are represented as
+ surrogate pairs. This price will be paid even if configuration has no such
+ characters.
+
* C compiler. Required to build powerline client on linux. If it is not present
then powerline will fall back to shell script or python client.
* ``socat`` program. Required for shell variant of client which runs a bit
@@ -43,7 +52,7 @@ powerline with ``pip``:
.. code-block:: sh
- pip install -e --user {path_to_powerline}
+ pip install --user --editable={path_to_powerline}
, but note that in this case ``pip`` will not install ``powerline`` executable
and you will have to do something like
diff --git a/powerline/bindings/config.py b/powerline/bindings/config.py
index 9e46748a..ea961d38 100644
--- a/powerline/bindings/config.py
+++ b/powerline/bindings/config.py
@@ -145,6 +145,8 @@ def init_environment(pl, args):
left_dividers = powerline.renderer.theme.dividers['left']
set_tmux_environment('_POWERLINE_LEFT_HARD_DIVIDER', left_dividers['hard'])
set_tmux_environment('_POWERLINE_LEFT_SOFT_DIVIDER', left_dividers['soft'])
+ set_tmux_environment('_POWERLINE_LEFT_HARD_DIVIDER_SPACES', (
+ ' ' * powerline.renderer.strwidth(left_dividers['hard'])))
def get_main_config(args):
diff --git a/powerline/bindings/tmux/powerline-base.conf b/powerline/bindings/tmux/powerline-base.conf
index add06afa..38b5de60 100644
--- a/powerline/bindings/tmux/powerline-base.conf
+++ b/powerline/bindings/tmux/powerline-base.conf
@@ -4,7 +4,7 @@ set -g status-interval 2
set -g status-left-length 20
set -g status-right '#(eval $POWERLINE_COMMAND tmux right -R pane_id=`tmux display -p "#D"`)'
set -g status-right-length 150
-set -g window-status-format "#[$_POWERLINE_WINDOW_COLOR] #I #[$_POWERLINE_WINDOW_DIVIDER_COLOR]$_POWERLINE_LEFT_SOFT_DIVIDER#[default]#W "
+set -g window-status-format "#[$_POWERLINE_WINDOW_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER_SPACES#I #[$_POWERLINE_WINDOW_DIVIDER_COLOR]$_POWERLINE_LEFT_SOFT_DIVIDER#[default]#W $_POWERLINE_LEFT_HARD_DIVIDER_SPACES"
set -g window-status-current-format "#[$_POWERLINE_WINDOW_CURRENT_HARD_DIVIDER_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER#[$_POWERLINE_WINDOW_CURRENT_COLOR]#I $_POWERLINE_LEFT_SOFT_DIVIDER#[$_POWERLINE_WINDOW_NAME_COLOR]#W #[$_POWERLINE_WINDOW_CURRENT_HARD_DIVIDER_NEXT_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER"
# Legacy status-left definition to be overwritten for tmux Versions 1.8+
diff --git a/powerline/config_files/colorschemes/tmux/default.json b/powerline/config_files/colorschemes/tmux/default.json
index 3745fd5a..e936f2ce 100644
--- a/powerline/config_files/colorschemes/tmux/default.json
+++ b/powerline/config_files/colorschemes/tmux/default.json
@@ -4,8 +4,8 @@
"window_status": {"fg": "gray70", "bg": "gray0", "attr": []},
"activity_status": {"fg": "yellow", "bg": "gray0", "attr": []},
"bell_status": {"fg": "red", "bg": "gray0", "attr": []},
- "window": {"fg": "gray6", "bg": "gray11", "attr": []},
- "window:divider": {"fg": "gray4", "bg": "gray11", "attr": []},
+ "window": {"fg": "gray6", "bg": "gray0", "attr": []},
+ "window:divider": {"fg": "gray4", "bg": "gray0", "attr": []},
"window:current": {"fg": "mediumcyan", "bg": "darkblue", "attr": []},
"window_name": {"fg": "white", "bg": "darkblue", "attr": ["bold"]},
"session": {"fg": "black", "bg": "gray90", "attr": ["bold"]},
diff --git a/powerline/lib/debug.py b/powerline/lib/debug.py
index fc1ffeea..515e8c40 100755
--- a/powerline/lib/debug.py
+++ b/powerline/lib/debug.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# vim:fileencoding=utf-8:noet
from __future__ import (unicode_literals, division, absolute_import, print_function)
diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py
index 32ea3afe..152bacd3 100644
--- a/powerline/lib/unicode.py
+++ b/powerline/lib/unicode.py
@@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import sys
import codecs
+from unicodedata import east_asian_width, combining
+
from powerline.lib.encoding import get_preferred_output_encoding
@@ -19,6 +21,17 @@ except ImportError:
unichr = chr
+if sys.maxunicode < 0x10FFFF:
+ _unichr = unichr
+
+ def unichr(ch):
+ if ch <= sys.maxunicode:
+ return _unichr(ch)
+ else:
+ ch -= 0x10000
+ return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1 << 10) - 1)) + 0xDC00)
+
+
def u(s):
'''Return unicode instance assuming UTF-8 encoded string.
'''
@@ -56,6 +69,27 @@ last_swe_idx = 0
def register_strwidth_error(strwidth):
+ '''Create new encode errors handling method similar to ``replace``
+
+ Like ``replace`` this method uses question marks in place of the characters
+ that cannot be represented in the requested encoding. Unlike ``replace`` the
+ amount of question marks is identical to the amount of display cells
+ offending character occupies. Thus encoding ``…`` (U+2026, HORIZONTAL
+ ELLIPSIS) to ``latin1`` will emit one question mark, but encoding ``A``
+ (U+FF21, FULLWIDTH LATIN CAPITAL LETTER A) will emit two question marks.
+
+ Since width of some characters depends on the terminal settings and
+ powerline knows how to respect them a single error handling method cannot be
+ used. Instead of it the generator function is used which takes ``strwidth``
+ function (function that knows how to compute string width respecting all
+ needed settings) and emits new error handling method name.
+
+ :param function strwidth:
+ Function that computs string width measured in display cells the string
+ occupies when displayed.
+
+ :return: New error handling method name.
+ '''
global last_swe_idx
last_swe_idx += 1
@@ -98,7 +132,10 @@ def safe_unicode(s):
'''
try:
try:
- return unicode(s)
+ if type(s) is bytes:
+ return unicode(s, 'ascii')
+ else:
+ return unicode(s)
except UnicodeDecodeError:
try:
return unicode(s, 'utf-8')
@@ -111,8 +148,7 @@ def safe_unicode(s):
class FailedUnicode(unicode):
- '''Builtin ``unicode`` (``str`` in python 3) subclass indicating fatal
- error.
+ '''Builtin ``unicode`` subclass indicating fatal error
If your code for some reason wants to determine whether `.render()` method
failed it should check returned string for being a FailedUnicode instance.
@@ -123,8 +159,125 @@ class FailedUnicode(unicode):
pass
-def string(s):
- if type(s) is not str:
- return s.encode('utf-8')
- else:
- return s
+if sys.version_info < (3,):
+ def string(s):
+ if type(s) is not str:
+ return s.encode('utf-8')
+ else:
+ return s
+else:
+ def string(s):
+ if type(s) is not str:
+ return s.decode('utf-8')
+ else:
+ return s
+
+
+string.__doc__ = (
+ '''Transform ``unicode`` or ``bytes`` object into ``str`` object
+
+ On Python-2 this encodes ``unicode`` to ``bytes`` (which is ``str``) using
+ UTF-8 encoding; on Python-3 this decodes ``bytes`` to ``unicode`` (which is
+ ``str``) using UTF-8 encoding.
+
+ Useful for functions that expect an ``str`` object in both unicode versions,
+ not caring about the semantic differences between them in Python-2 and
+ Python-3.
+ '''
+)
+
+
+def surrogate_pair_to_character(high, low):
+ '''Transform a pair of surrogate codepoints to one codepoint
+ '''
+ return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00)
+
+
+_strwidth_documentation = (
+ '''Compute string width in display cells
+
+ {0}
+
+ :param dict width_data:
+ Dictionary which maps east_asian_width property values to strings
+ lengths. It is expected to contain the following keys and values (from
+ `East Asian Width annex `_):
+
+ === ====== ===========================================================
+ Key Value Description
+ === ====== ===========================================================
+ F 2 Fullwidth: all characters that are defined as Fullwidth in
+ the Unicode Standard [Unicode] by having a compatibility
+ decomposition of type to characters elsewhere in the
+ Unicode Standard that are implicitly narrow but unmarked.
+ H 1 Halfwidth: all characters that are explicitly defined as
+ Halfwidth in the Unicode Standard by having a compatibility
+ decomposition of type to characters elsewhere in
+ the Unicode Standard that are implicitly wide but unmarked,
+ plus U+20A9 ₩ WON SIGN.
+ W 2 Wide: all other characters that are always wide. These
+ characters occur only in the context of East Asian
+ typography where they are wide characters (such as the
+ Unified Han Ideographs or Squared Katakana Symbols). This
+ category includes characters that have explicit halfwidth
+ counterparts.
+ Na 1 Narrow: characters that are always narrow and have explicit
+ fullwidth or wide counterparts. These characters are
+ implicitly narrow in East Asian typography and legacy
+ character sets because they have explicit fullwidth or wide
+ counterparts. All of ASCII is an example of East Asian
+ Narrow characters.
+ A 1 or 2 Ambigious: characters that may sometimes be wide and
+ sometimes narrow. Ambiguous characters require additional
+ information not contained in the character code to further
+ resolve their width. This information is usually defined in
+ terminal setting that should in turn respect glyphs widths
+ in used fonts. Also see :ref:`ambiwidth configuration
+ option `.
+ N 1 Neutral characters: character that does not occur in legacy
+ East Asian character sets.
+ === ====== ===========================================================
+
+ :param unicode string:
+ String whose width will be calculated.
+
+ :return: unsigned integer.''')
+
+
+def strwidth_ucs_4(width_data, string):
+ return sum(((
+ (
+ 0
+ ) if combining(symbol) else (
+ width_data[east_asian_width(symbol)]
+ )
+ ) for symbol in string))
+
+
+strwidth_ucs_4.__doc__ = _strwidth_documentation.format(
+ '''This version of function expects that characters above 0xFFFF are
+ represented using one symbol. This is only the case in UCS-4 Python builds.
+
+ .. note:
+ Even in UCS-4 Python builds it is possible to represent characters above
+ 0xFFFF using surrogate pairs. Characters represented this way are not
+ supported.''')
+
+
+def strwidth_ucs_2(width_data, string):
+ return sum(((
+ (
+ width_data[east_asian_width(string[i - 1] + symbol)]
+ ) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
+ 0
+ ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
+ width_data[east_asian_width(symbol)]
+ )
+ ) for i, symbol in enumerate(string)))
+
+
+strwidth_ucs_2.__doc__ = _strwidth_documentation.format(
+ '''This version of function expects that characters above 0xFFFF are
+ represented using two symbols forming a surrogate pair, which is the only
+ option in UCS-2 Python builds. It still works correctly in UCS-4 Python
+ builds, but is slower then its UCS-4 counterpart.''')
diff --git a/powerline/lint/__init__.py b/powerline/lint/__init__.py
index 51c25860..d1797d99 100644
--- a/powerline/lint/__init__.py
+++ b/powerline/lint/__init__.py
@@ -41,7 +41,7 @@ def generate_json_config_loader(lhadproblem):
function_name_re = '^(\w+\.)*[a-zA-Z_]\w*$'
-divider_spec = Spec().type(unicode).len(
+divider_spec = Spec().printable().len(
'le', 3, (lambda value: 'Divider {0!r} is too large!'.format(value))).copy
ext_theme_spec = Spec().type(unicode).func(lambda *args: check_config('themes', *args)).copy
top_theme_spec = Spec().type(unicode).func(check_top_theme).copy
@@ -211,12 +211,12 @@ segment_spec = Spec(
display=Spec().type(bool).optional(),
module=segment_module_spec(),
priority=Spec().type(int, float, type(None)).optional(),
- after=Spec().type(unicode).optional(),
- before=Spec().type(unicode).optional(),
+ after=Spec().printable().optional(),
+ before=Spec().printable().optional(),
width=Spec().either(Spec().unsigned(), Spec().cmp('eq', 'auto')).optional(),
align=Spec().oneof(set('lr')).optional(),
args=args_spec().func(lambda *args, **kwargs: check_args(get_one_segment_function, *args, **kwargs)),
- contents=Spec().type(unicode).optional(),
+ contents=Spec().printable().optional(),
highlight_group=Spec().list(
highlight_group_spec().re(
'^(?:(?!:divider$).)+$',
@@ -243,11 +243,11 @@ divside_spec = Spec(
soft=divider_spec(),
).copy
segment_data_value_spec = Spec(
- after=Spec().type(unicode).optional(),
- before=Spec().type(unicode).optional(),
+ after=Spec().printable().optional(),
+ before=Spec().printable().optional(),
display=Spec().type(bool).optional(),
args=args_spec().func(lambda *args, **kwargs: check_args(get_all_possible_functions, *args, **kwargs)),
- contents=Spec().type(unicode).optional(),
+ contents=Spec().printable().optional(),
).copy
dividers_spec = Spec(
left=divside_spec(),
diff --git a/powerline/lint/markedjson/error.py b/powerline/lint/markedjson/error.py
index ec86d14a..7ee65217 100644
--- a/powerline/lint/markedjson/error.py
+++ b/powerline/lint/markedjson/error.py
@@ -4,10 +4,33 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import sys
import re
-from powerline.lib.unicode import unichr
-
-NON_PRINTABLE = re.compile('[^\t\n\x20-\x7E' + unichr(0x85) + (unichr(0xA0) + '-' + unichr(0xD7FF)) + (unichr(0xE000) + '-' + unichr(0xFFFD)) + ']')
+NON_PRINTABLE_STR = (
+ '[^'
+ # ASCII control characters: 0x00-0x19
+ + '\t\n' # Tab, newline: allowed ASCII control characters
+ + '\x20-\x7E' # ASCII printable characters
+ # Unicode control characters: 0x7F-0x9F
+ + '\u0085' # Allowed unicode control character: next line character
+ + '\u00A0-\uD7FF'
+ # Surrogate escapes: 0xD800-0xDFFF
+ + '\uE000-\uFFFD'
+ + ((
+ '\uD800-\uDFFF'
+ ) if sys.maxunicode < 0x10FFFF else (
+ '\U00010000-\U0010FFFF'
+ ))
+ + ']'
+ + ((
+ # Paired surrogate escapes: allowed in UCS-2 builds as the only way to
+ # represent characters above 0xFFFF. Only paired variant is allowed.
+ '|(?---'))
+ return NON_PRINTABLE_RE.sub(repl, s.replace('\t', '>---'))
class Mark:
@@ -55,6 +78,13 @@ class Mark:
+ ' ' * (indent + len(head) + len(snippet[0])) + '^'
)
+ def advance_string(self, diff):
+ ret = self.copy()
+ # FIXME Currently does not work properly with escaped strings.
+ ret.column += diff
+ ret.pointer += diff
+ return ret
+
def __str__(self):
snippet = self.get_snippet()
where = (' in "%s", line %d, column %d' % (
diff --git a/powerline/lint/markedjson/markedvalue.py b/powerline/lint/markedjson/markedvalue.py
index 74a62b64..c17a8e35 100644
--- a/powerline/lint/markedjson/markedvalue.py
+++ b/powerline/lint/markedjson/markedvalue.py
@@ -33,12 +33,7 @@ class MarkedUnicode(unicode):
pointdiff = 1
r = []
for s in part_result:
- mark = self.mark.copy()
- # XXX Does not work properly with escaped strings, but this requires
- # saving much more information in mark.
- mark.column += pointdiff
- mark.pointer += pointdiff
- r.append(MarkedUnicode(s, mark))
+ r.append(MarkedUnicode(s, self.mark.advance_string(pointdiff)))
pointdiff += len(s)
return tuple(r)
diff --git a/powerline/lint/markedjson/reader.py b/powerline/lint/markedjson/reader.py
index bb518b06..0ca45160 100644
--- a/powerline/lint/markedjson/reader.py
+++ b/powerline/lint/markedjson/reader.py
@@ -3,7 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import codecs
-from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE
+from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE_RE
from powerline.lib.unicode import unicode
@@ -84,7 +84,7 @@ class Reader(object):
return Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer)
def check_printable(self, data):
- match = NON_PRINTABLE.search(data)
+ match = NON_PRINTABLE_RE.search(data)
if match:
self.update_pointer(match.start())
raise ReaderError(
@@ -125,7 +125,12 @@ class Reader(object):
self.raw_buffer = None
break
- def update_raw(self, size=4096):
+ def update_raw(self, size=-1):
+ # Was size=4096
+ assert(size < 0)
+ # WARNING: reading the whole stream at once. To change this behaviour to
+ # former reading N characters at once one must make sure that reading
+ # never ends at partial unicode character.
data = self.stream.read(size)
if self.raw_buffer is None:
self.raw_buffer = data
diff --git a/powerline/lint/markedjson/scanner.py b/powerline/lint/markedjson/scanner.py
index 543d7298..b0bddf38 100644
--- a/powerline/lint/markedjson/scanner.py
+++ b/powerline/lint/markedjson/scanner.py
@@ -1,9 +1,14 @@
# vim:fileencoding=utf-8:noet
from __future__ import (unicode_literals, division, absolute_import, print_function)
+from string import hexdigits
+
from powerline.lint.markedjson.error import MarkedError
from powerline.lint.markedjson import tokens
-from powerline.lib.unicode import unicode
+from powerline.lib.unicode import unicode, unichr, surrogate_pair_to_character
+
+
+hexdigits_set = set(hexdigits)
# Scanner produces tokens of the following types:
@@ -415,7 +420,7 @@ class Scanner:
length = self.ESCAPE_CODES[ch]
self.forward()
for k in range(length):
- if self.peek(k) not in '0123456789ABCDEFabcdef':
+ if self.peek(k) not in hexdigits:
raise ScannerError(
'while scanning a double-quoted scalar', start_mark,
'expected escape sequence of %d hexdecimal numbers, but found %r' % (
@@ -423,8 +428,26 @@ class Scanner:
self.get_mark()
)
code = int(self.prefix(length), 16)
- chunks.append(chr(code))
self.forward(length)
+ if 0xD800 <= code <= 0xDC00:
+ # Start of the surrogate pair
+ next_char = self.prefix(6)
+ if (
+ next_char[0] != '\\'
+ or next_char[1] != 'u'
+ or not (set(next_char[2:]) < hexdigits_set)
+ or not (0xDC00 <= int(next_char[2:], 16) <= 0xDFFF)
+ ):
+ raise ScannerError(
+ 'while scanning a double-quoted scalar', start_mark,
+ 'expected escape sequence with the next character in surrogate pair, but found %r' % (
+ next_char
+ ),
+ self.get_mark()
+ )
+ code = surrogate_pair_to_character(code, int(next_char[2:], 16))
+ self.forward(6)
+ chunks.append(unichr(code))
else:
raise ScannerError(
'while scanning a double-quoted scalar', start_mark,
diff --git a/powerline/lint/spec.py b/powerline/lint/spec.py
index 1d095721..6de14fea 100644
--- a/powerline/lint/spec.py
+++ b/powerline/lint/spec.py
@@ -7,10 +7,19 @@ import re
from copy import copy
from powerline.lib.unicode import unicode
-from powerline.lint.markedjson.error import echoerr, DelayedEchoErr
+from powerline.lint.markedjson.error import echoerr, DelayedEchoErr, NON_PRINTABLE_STR
from powerline.lint.selfcheck import havemarks
+NON_PRINTABLE_RE = re.compile(
+ NON_PRINTABLE_STR.translate({
+ ord('\t'): None,
+ ord('\n'): None,
+ 0x0085: None,
+ })
+)
+
+
class Spec(object):
'''Class that describes some JSON value
@@ -342,6 +351,26 @@ class Spec(object):
return False, hadproblem
return True, hadproblem
+ def check_printable(self, value, context_mark, data, context, echoerr, _):
+ '''Check that given unicode string contains only printable characters
+ '''
+ hadproblem = False
+ for match in NON_PRINTABLE_RE.finditer(value):
+ hadproblem = True
+ echoerr(
+ context=self.cmsg.format(key=context.key),
+ context_mark=value.mark,
+ problem='found not printable character U+{0:04x} in a configuration string'.format(
+ ord(match.group(0))),
+ problem_mark=value.mark.advance_string(match.start() + 1)
+ )
+ return True, hadproblem
+
+ def printable(self, *args):
+ self.type(unicode)
+ self.checks.append(('check_printable', args))
+ return self
+
def type(self, *args):
'''Describe value that has one of the types given in arguments
diff --git a/powerline/renderer.py b/powerline/renderer.py
index 1ba4ec63..e69abca1 100644
--- a/powerline/renderer.py
+++ b/powerline/renderer.py
@@ -1,18 +1,93 @@
# vim:fileencoding=utf-8:noet
from __future__ import (unicode_literals, division, absolute_import, print_function)
+import sys
import os
+import re
-from unicodedata import east_asian_width, combining
from itertools import chain
from powerline.theme import Theme
-from powerline.lib.unicode import unichr
+from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4
NBSP = ' '
+np_control_character_translations = dict((
+ # Control characters: ^@ … ^Y
+ (i1, '^' + unichr(i1 + 0x40)) for i1 in range(0x20)
+))
+'''Control character translations
+
+Dictionary that maps characters in range 0x00–0x1F (inclusive) to strings
+``'^@'``, ``'^A'`` and so on.
+
+.. note: maps tab to ``^I`` and newline to ``^J``.
+'''
+
+np_invalid_character_translations = dict((
+ # Invalid unicode characters obtained using 'surrogateescape' error
+ # handler.
+ (i2, '<{0:02x}>'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)
+))
+'''Invalid unicode character translations
+
+When using ``surrogateescape`` encoding error handling method characters in
+range 0x80–0xFF (inclusive) are transformed into unpaired surrogate escape
+unicode codepoints 0xDC80–0xDD00. This dictionary maps such characters to
+``<80>``, ``<81>``, and so on: in Python-3 they cannot be printed or
+converted to UTF-8 because UTF-8 standard does not allow surrogate escape
+characters, not even paired ones. Python-2 contains a bug that allows such
+action, but printing them in any case makes no sense.
+'''
+
+# XXX: not using `r` because it makes no sense.
+np_invalid_character_re = re.compile('(?'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)),
- ))
- '''Non-printable character translations
-
- These are used to transform characters in range 0x00—0x1F into ``^@``,
- ``^A`` and so on and characters in range 0xDC80—0xDCFF into ``<80>``,
- ``<81>`` and so on (latter are invalid characters obtained using
- ``surrogateescape`` error handling method used automatically in a number of
- places in Python3). Unilke with ``.escape()`` method (and
- ``character_translations``) result is passed to ``.strwidth()`` method.
-
- Note: transforms tab into ``^I``.
- '''
-
def __init__(self,
theme_config,
local_themes,
@@ -120,19 +176,21 @@ class Renderer(object):
'F': 2, # Fullwidth
}
- def strwidth(self, string):
- '''Function that returns string width.
+ strwidth = lambda self, s: (
+ (strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)(
+ self.width_data, s)
+ )
+ '''Function that returns string width.
- Is used to calculate the place given string occupies when handling
- ``width`` argument to ``.render()`` method. Must take east asian width
- into account.
+ Is used to calculate the place given string occupies when handling
+ ``width`` argument to ``.render()`` method. Must take east asian width
+ into account.
- :param unicode string:
- String whose width will be calculated.
+ :param unicode string:
+ String whose width will be calculated.
- :return: unsigned integer.
- '''
- return sum((0 if combining(symbol) else self.width_data[east_asian_width(symbol)] for symbol in string))
+ :return: unsigned integer.
+ '''
def get_theme(self, matcher_info):
'''Get Theme object.
@@ -256,6 +314,8 @@ class Renderer(object):
current_width = 0
+ self._prepare_segments(segments, output_width or width)
+
if not width:
# No width specified, so we don’t need to crop or pad anything
if output_width:
@@ -319,6 +379,15 @@ class Renderer(object):
return construct_returned_value(rendered_highlighted, segments, current_width, output_raw, output_width)
+ def _prepare_segments(self, segments, calculate_contents_len):
+ '''Translate non-printable characters and calculate segment width
+ '''
+ for segment in segments:
+ segment['contents'] = translate_np(segment['contents'])
+ if calculate_contents_len:
+ for segment in segments:
+ segment['_contents_len'] = self.strwidth(segment['contents'])
+
def _render_length(self, theme, segments, divider_widths):
'''Update segments lengths and return them
'''
@@ -327,10 +396,7 @@ class Renderer(object):
divider_spaces = theme.get_spaces()
for index, segment in enumerate(segments):
side = segment['side']
- if segment['_contents_len'] is None:
- segment_len = segment['_contents_len'] = self.strwidth(segment['contents'])
- else:
- segment_len = segment['_contents_len']
+ segment_len = segment['_contents_len']
prev_segment = segments[index - 1] if index > 0 else theme.EMPTY_SEGMENT
next_segment = segments[index + 1] if index < segments_len - 1 else theme.EMPTY_SEGMENT
@@ -381,8 +447,6 @@ class Renderer(object):
contents_highlighted = ''
draw_divider = segment['draw_' + divider_type + '_divider']
- contents_raw = contents_raw.translate(self.np_character_translations)
-
# XXX Make sure self.hl() calls are called in the same order
# segments are displayed. This is needed for Vim renderer to work.
if draw_divider:
diff --git a/powerline/segments/common/players.py b/powerline/segments/common/players.py
index a53d36d9..c6922e03 100644
--- a/powerline/segments/common/players.py
+++ b/powerline/segments/common/players.py
@@ -237,10 +237,14 @@ else:
return
if not info:
return
- album = out_u(info.get('xesam:album'))
- title = out_u(info.get('xesam:title'))
+ album = info.get('xesam:album')
+ title = info.get('xesam:title')
artist = info.get('xesam:artist')
state = _convert_state(status)
+ if album:
+ album = out_u(album)
+ if title:
+ title = out_u(title)
if artist:
artist = out_u(artist[0])
return {
diff --git a/powerline/segments/vim/plugin/capslock.py b/powerline/segments/vim/plugin/capslock.py
index 824d55b9..d2c474d5 100644
--- a/powerline/segments/vim/plugin/capslock.py
+++ b/powerline/segments/vim/plugin/capslock.py
@@ -14,7 +14,7 @@ from powerline.theme import requires_segment_info
def capslock_indicator(pl, segment_info, text='CAPS'):
'''Shows the indicator if tpope/vim-capslock plugin is enabled
- .. _note::
+ .. note::
In the current state plugin automatically disables itself when leaving
insert mode. So trying to use this segment not in insert or replace
modes is useless.
diff --git a/powerline/segments/vim/plugin/commandt.py b/powerline/segments/vim/plugin/commandt.py
index c51a9146..fcaa6712 100644
--- a/powerline/segments/vim/plugin/commandt.py
+++ b/powerline/segments/vim/plugin/commandt.py
@@ -55,7 +55,7 @@ def finder(pl):
vim.command('ruby $powerline.commandt_set_active_finder')
return [{
'highlight_group': ['commandt:finder'],
- 'contents': vim.eval('g:powerline_commandt_reply').replace('CommandT::', '')
+ 'contents': vim.eval('g:powerline_commandt_reply').replace('CommandT::', '').replace('Finder::', '')
}]
@@ -64,6 +64,10 @@ FINDERS_WITHOUT_PATH = set((
'CommandT::BufferFinder',
'CommandT::TagFinder',
'CommandT::JumpFinder',
+ 'CommandT::Finder::MRUBufferFinder',
+ 'CommandT::Finder::BufferFinder',
+ 'CommandT::Finder::TagFinder',
+ 'CommandT::Finder::JumpFinder',
))
diff --git a/scripts/powerline-daemon b/scripts/powerline-daemon
index 62cb35f3..7e78b6b0 100755
--- a/scripts/powerline-daemon
+++ b/scripts/powerline-daemon
@@ -24,8 +24,7 @@ from powerline.commands.daemon import get_argparser as get_daemon_argparser
is_daemon = False
-platform = sys.platform.lower()
-use_filesystem = 'darwin' in platform
+use_filesystem = not sys.platform.lower().startswith('linux')
address = None
pidfile = None
diff --git a/scripts/powerline-release.py b/scripts/powerline-release.py
index 3a4c821d..0c3eba10 100755
--- a/scripts/powerline-release.py
+++ b/scripts/powerline-release.py
@@ -7,7 +7,7 @@ import codecs
import os
import re
-from subprocess import check_output, check_call
+from subprocess import check_output, check_call, CalledProcessError
from getpass import getpass
from github import Github
@@ -50,7 +50,10 @@ def parse_version(s):
def merge(version_string, rev, **kwargs):
check_call(['git', 'checkout', 'master'])
- check_call(['git', 'merge', '--no-ff', '--no-commit', '--log', rev])
+ try:
+ check_call(['git', 'merge', '--no-ff', '--no-commit', '--log', rev])
+ except CalledProcessError:
+ check_call(['git', 'mergetool', '--tool', 'vimdiff2'])
with codecs.open('.setup.py.new', 'w', encoding='utf-8') as NS:
with codecs.open('setup.py', 'r', encoding='utf-8') as OS:
@@ -148,7 +151,7 @@ def create_ebuilds(version_string, overlay, user, **kwargs):
check_call(['git', 'add', '--'] + new_files, cwd=overlay)
check_call(['git', 'commit'] + new_files + ['-m', 'powerline*: Release {0}'.format(version_string)],
cwd=overlay)
- check_call(['git', 'push', 'git@github.com:{0}/{1}'.format(user, OVERLAY_NAME), branch], cwd=overlay)
+ check_call(['git', 'push', '-f', 'git@github.com:{0}/{1}'.format(user, OVERLAY_NAME), branch], cwd=overlay)
def update_overlay(version_string, user, password, **kwargs):
diff --git a/setup.py b/setup.py
index 87e849bc..f95dcf99 100644
--- a/setup.py
+++ b/setup.py
@@ -62,7 +62,7 @@ def get_version():
setup(
name='powerline-status',
- version='1.3',
+ version='1.3.1',
description='The ultimate statusline/prompt utility.',
long_description=README,
classifiers=[
diff --git a/tests/test_lib.py b/tests/test_lib.py
index 4af20fa1..1cd91d04 100644
--- a/tests/test_lib.py
+++ b/tests/test_lib.py
@@ -3,6 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import threading
import os
+import sys
import re
import shutil
@@ -16,7 +17,9 @@ from powerline.lib.threaded import ThreadedSegment, KwThreadedSegment
from powerline.lib.monotonic import monotonic
from powerline.lib.vcs.git import git_directory
-from tests.lib import Pl
+import powerline.lib.unicode as plu
+
+from tests.lib import Pl, replace_attr
from tests import TestCase, SkipTest
@@ -397,6 +400,101 @@ class TestLib(TestCase):
self.assertEqual(humanize_bytes(1000000000, si_prefix=False), '953.7 MiB')
+width_data = {
+ 'N': 1, # Neutral
+ 'Na': 1, # Narrow
+ 'A': 1, # Ambigious
+ 'H': 1, # Half-width
+ 'W': 2, # Wide
+ 'F': 2, # Fullwidth
+}
+
+
+class TestUnicode(TestCase):
+ def assertStringsIdentical(self, s1, s2):
+ self.assertTrue(type(s1) is type(s2), msg='string types differ')
+ self.assertEqual(s1, s2)
+
+ def test_unicode(self):
+ self.assertTrue(type('abc') is plu.unicode)
+
+ def test_unichr(self):
+ self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF))
+ self.assertStringsIdentical('\uFFFF', plu.unichr(0xFFFF))
+ self.assertStringsIdentical('\x20', plu.unichr(0x20))
+
+ def test_u(self):
+ self.assertStringsIdentical('Test', plu.u('Test'))
+ self.assertStringsIdentical('Test', plu.u(b'Test'))
+ self.assertStringsIdentical('«»', plu.u(b'\xC2\xAB\xC2\xBB'))
+ self.assertRaises(UnicodeDecodeError, plu.u, b'\xFF')
+
+ def test_tointiter(self):
+ self.assertEqual([1, 2, 3], list(plu.tointiter(b'\x01\x02\x03')))
+
+ def test_decode_error(self):
+ self.assertStringsIdentical('', b'\xFF'.decode('utf-8', 'powerline_decode_error'))
+ self.assertStringsIdentical('abc', b'abc'.decode('utf-8', 'powerline_decode_error'))
+
+ def test_register_strwidth_error(self):
+ ename = plu.register_strwidth_error(lambda s: 3)
+ self.assertStringsIdentical(b'???', 'A'.encode('latin1', ename))
+ self.assertStringsIdentical(b'abc', 'abc'.encode('latin1', ename))
+
+ def test_out_u(self):
+ self.assertStringsIdentical('abc', plu.out_u('abc'))
+ self.assertStringsIdentical('abc', plu.out_u(b'abc'))
+ self.assertRaises(TypeError, plu.out_u, None)
+
+ def test_safe_unicode(self):
+ self.assertStringsIdentical('abc', plu.safe_unicode('abc'))
+ self.assertStringsIdentical('abc', plu.safe_unicode(b'abc'))
+ self.assertStringsIdentical('«»', plu.safe_unicode(b'\xc2\xab\xc2\xbb'))
+ with replace_attr(plu, 'get_preferred_output_encoding', lambda: 'latin1'):
+ self.assertStringsIdentical('ÿ', plu.safe_unicode(b'\xFF'))
+ self.assertStringsIdentical('None', plu.safe_unicode(None))
+
+ class FailingStr(object):
+ def __str__(self):
+ raise NotImplementedError('Fail!')
+
+ self.assertStringsIdentical('Fail!', plu.safe_unicode(FailingStr()))
+
+ def test_FailedUnicode(self):
+ self.assertTrue(isinstance(plu.FailedUnicode('abc'), plu.unicode))
+ self.assertEqual('abc', plu.FailedUnicode('abc'))
+
+ def test_string(self):
+ self.assertStringsIdentical(str('abc'), plu.string('abc'))
+ self.assertStringsIdentical(str('abc'), plu.string(b'abc'))
+
+ def test_surrogate_pair_to_character(self):
+ self.assertEqual(0x1F48E, plu.surrogate_pair_to_character(0xD83D, 0xDC8E))
+
+ def test_strwidth_ucs_4(self):
+ self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'abcd'))
+ self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'AB'))
+ if sys.maxunicode < 0x10FFFF:
+ raise SkipTest('Can only test strwidth_ucs_4 in UCS-4 Pythons')
+
+ def east_asian_width(ch):
+ assert (len(ch) == 1)
+ assert ord(ch) == 0x1F48E
+ return 'F'
+
+ with replace_attr(plu, 'east_asian_width', east_asian_width):
+ # Warning: travis unicodedata.east_asian_width for some reason
+ # thinks this character is 5 symbols wide.
+ self.assertEqual(2, plu.strwidth_ucs_4(width_data, '\U0001F48E'))
+
+ def test_strwidth_ucs_2(self):
+ self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'abcd'))
+ self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'AB'))
+ if not sys.maxunicode < 0x10FFFF:
+ raise SkipTest('Can only test strwidth_ucs_2 in UCS-2 Pythons')
+ self.assertEqual(2, plu.strwidth_ucs_2(width_data, '\ud83d\udc8e'))
+
+
class TestVCS(TestCase):
def do_branch_rename_test(self, repo, q):
st = monotonic()
diff --git a/tests/test_shells/postproc.py b/tests/test_shells/postproc.py
index 3ef35972..df8a6dad 100755
--- a/tests/test_shells/postproc.py
+++ b/tests/test_shells/postproc.py
@@ -28,6 +28,7 @@ except IOError:
hostname = socket.gethostname()
user = os.environ['USER']
+REFS_RE = re.compile(r'^\[\d+ refs\]\n')
IPYPY_DEANSI_RE = re.compile(r'\033(?:\[(?:\?\d+[lh]|[^a-zA-Z]+[a-ln-zA-Z])|[=>])')
with codecs.open(fname, 'r', encoding='utf-8') as R:
@@ -42,6 +43,8 @@ with codecs.open(fname, 'r', encoding='utf-8') as R:
line = line.translate({
ord('\r'): None
})
+ if REFS_RE.match(line):
+ continue
line = line.replace(hostname, 'HOSTNAME')
line = line.replace(user, 'USER')
if pid is not None:
diff --git a/tests/test_shells/test.sh b/tests/test_shells/test.sh
index 275fa9c5..5ad1bbfe 100755
--- a/tests/test_shells/test.sh
+++ b/tests/test_shells/test.sh
@@ -91,8 +91,11 @@ run_test() {
SH="$1"
SESNAME="powerline-shell-test-${SH}-$$"
+ # Note: when running screen with setuid libc unsets LD_LIBRARY_PATH, so it
+ # cannot be added to the `env -i` call above.
run "${TEST_TYPE}" "${TEST_CLIENT}" "${SH}" \
screen -L -c tests/test_shells/screenrc -d -m -S "$SESNAME" \
+ env LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \
"$@"
while ! screen -S "$SESNAME" -X readreg a tests/test_shells/input.$SH ; do
sleep 0.1s
@@ -213,6 +216,7 @@ ln -s "$(which mktemp)" tests/shell/path
ln -s "$(which grep)" tests/shell/path
ln -s "$(which sed)" tests/shell/path
ln -s "$(which rm)" tests/shell/path
+ln -s "$(which uname)" tests/shell/path
ln -s ../../test_shells/bgscript.sh tests/shell/path
ln -s ../../test_shells/waitpid.sh tests/shell/path
if which socat ; then