From 508f8f5eaadf49b0c977dd408f0425d5089e7393 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 30 Nov 2014 03:42:17 +0300 Subject: [PATCH 01/30] Supply merge.tool option when merging --- scripts/powerline-release.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/powerline-release.py b/scripts/powerline-release.py index 3a4c821d..39b1abc5 100755 --- a/scripts/powerline-release.py +++ b/scripts/powerline-release.py @@ -50,7 +50,7 @@ def parse_version(s): def merge(version_string, rev, **kwargs): check_call(['git', 'checkout', 'master']) - check_call(['git', 'merge', '--no-ff', '--no-commit', '--log', rev]) + check_call(['git', '-c', 'merge.tool=vimdiff', 'merge', '--no-ff', '--no-commit', '--log', rev]) with codecs.open('.setup.py.new', 'w', encoding='utf-8') as NS: with codecs.open('setup.py', 'r', encoding='utf-8') as OS: From c9964a12f513b80ae15a3ff57bb0cda01769fbfa Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 30 Nov 2014 03:46:30 +0300 Subject: [PATCH 02/30] Use `git mergetool` after error because `-c merge.tool` does not work --- scripts/powerline-release.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/powerline-release.py b/scripts/powerline-release.py index 39b1abc5..e916e74d 100755 --- a/scripts/powerline-release.py +++ b/scripts/powerline-release.py @@ -7,7 +7,7 @@ import codecs import os import re -from subprocess import check_output, check_call +from subprocess import check_output, check_call, CalledProcessError from getpass import getpass from github import Github @@ -50,7 +50,10 @@ def parse_version(s): def merge(version_string, rev, **kwargs): check_call(['git', 'checkout', 'master']) - check_call(['git', '-c', 'merge.tool=vimdiff', 'merge', '--no-ff', '--no-commit', '--log', rev]) + try: + check_call(['git', 'merge', '--no-ff', '--no-commit', '--log', rev]) + except CalledProcessError: + check_call(['git', 'mergetool', '--tool', 'vimdiff2']) with codecs.open('.setup.py.new', 'w', encoding='utf-8') as NS: with codecs.open('setup.py', 'r', encoding='utf-8') as OS: From 88233184eada78cb7672ed23720140e0a8ce0e2d Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 30 Nov 2014 04:02:56 +0300 Subject: [PATCH 03/30] Force push to my fork of the overlay --- scripts/powerline-release.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/powerline-release.py b/scripts/powerline-release.py index e916e74d..0c3eba10 100755 --- a/scripts/powerline-release.py +++ b/scripts/powerline-release.py @@ -151,7 +151,7 @@ def create_ebuilds(version_string, overlay, user, **kwargs): check_call(['git', 'add', '--'] + new_files, cwd=overlay) check_call(['git', 'commit'] + new_files + ['-m', 'powerline*: Release {0}'.format(version_string)], cwd=overlay) - check_call(['git', 'push', 'git@github.com:{0}/{1}'.format(user, OVERLAY_NAME), branch], cwd=overlay) + check_call(['git', 'push', '-f', 'git@github.com:{0}/{1}'.format(user, OVERLAY_NAME), branch], cwd=overlay) def update_overlay(version_string, user, password, **kwargs): From ebe5fca3779662e6b222fce6c9cba42411812b79 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 30 Nov 2014 04:26:04 +0300 Subject: [PATCH 04/30] Reorder pip arguments in installation documentation `--editable`/`-e` pip argument is accepting an argument, not specifying the action. --- docs/source/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 089cd190..6dcb0c29 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -43,7 +43,7 @@ powerline with ``pip``: .. code-block:: sh - pip install -e --user {path_to_powerline} + pip install --user --editable={path_to_powerline} , but note that in this case ``pip`` will not install ``powerline`` executable and you will have to do something like From 1af127b02362e658a2cdef5286053bbaa46b1ea6 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 30 Nov 2014 19:34:27 +0300 Subject: [PATCH 05/30] CommandT has moved finders under CommandT::Finder module, respect this --- powerline/segments/vim/plugin/commandt.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/powerline/segments/vim/plugin/commandt.py b/powerline/segments/vim/plugin/commandt.py index c51a9146..fcaa6712 100644 --- a/powerline/segments/vim/plugin/commandt.py +++ b/powerline/segments/vim/plugin/commandt.py @@ -55,7 +55,7 @@ def finder(pl): vim.command('ruby $powerline.commandt_set_active_finder') return [{ 'highlight_group': ['commandt:finder'], - 'contents': vim.eval('g:powerline_commandt_reply').replace('CommandT::', '') + 'contents': vim.eval('g:powerline_commandt_reply').replace('CommandT::', '').replace('Finder::', '') }] @@ -64,6 +64,10 @@ FINDERS_WITHOUT_PATH = set(( 'CommandT::BufferFinder', 'CommandT::TagFinder', 'CommandT::JumpFinder', + 'CommandT::Finder::MRUBufferFinder', + 'CommandT::Finder::BufferFinder', + 'CommandT::Finder::TagFinder', + 'CommandT::Finder::JumpFinder', )) From e8a53ebd9e615e43a8988bb3f595eda4f9f1ba0c Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 1 Dec 2014 00:58:48 +0300 Subject: [PATCH 06/30] Fix typo in capslock_indicator docstring --- powerline/segments/vim/plugin/capslock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerline/segments/vim/plugin/capslock.py b/powerline/segments/vim/plugin/capslock.py index 824d55b9..d2c474d5 100644 --- a/powerline/segments/vim/plugin/capslock.py +++ b/powerline/segments/vim/plugin/capslock.py @@ -14,7 +14,7 @@ from powerline.theme import requires_segment_info def capslock_indicator(pl, segment_info, text='CAPS'): '''Shows the indicator if tpope/vim-capslock plugin is enabled - .. _note:: + .. note:: In the current state plugin automatically disables itself when leaving insert mode. So trying to use this segment not in insert or replace modes is useless. From 9281f4a6c42bc217c7e657d3abff3597cbe26355 Mon Sep 17 00:00:00 2001 From: Andreas Schneider Date: Tue, 2 Dec 2014 20:39:21 +0100 Subject: [PATCH 07/30] lib: Remove wrong shebang from debug.py. --- powerline/lib/debug.py | 1 - 1 file changed, 1 deletion(-) diff --git a/powerline/lib/debug.py b/powerline/lib/debug.py index fc1ffeea..515e8c40 100755 --- a/powerline/lib/debug.py +++ b/powerline/lib/debug.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # vim:fileencoding=utf-8:noet from __future__ import (unicode_literals, division, absolute_import, print_function) From d061deb7e5119ad43ec2c3988a787a9978196160 Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 3 Dec 2014 07:45:49 +0300 Subject: [PATCH 08/30] Only convert clementine title and album if they are true It appears that at least album may be None. Fixes #1207 --- powerline/segments/common/players.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/powerline/segments/common/players.py b/powerline/segments/common/players.py index a53d36d9..c6922e03 100644 --- a/powerline/segments/common/players.py +++ b/powerline/segments/common/players.py @@ -237,10 +237,14 @@ else: return if not info: return - album = out_u(info.get('xesam:album')) - title = out_u(info.get('xesam:title')) + album = info.get('xesam:album') + title = info.get('xesam:title') artist = info.get('xesam:artist') state = _convert_state(status) + if album: + album = out_u(album) + if title: + title = out_u(title) if artist: artist = out_u(artist[0]) return { From 209d6be91ec3ade3ff4c6798a71c74994a35d0ae Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 20:23:37 +0300 Subject: [PATCH 09/30] Add support for UCS-2 Python versions Fixes #1213 --- powerline/lint/markedjson/error.py | 20 +++++- powerline/renderer.py | 97 ++++++++++++++++++++++++------ 2 files changed, 96 insertions(+), 21 deletions(-) diff --git a/powerline/lint/markedjson/error.py b/powerline/lint/markedjson/error.py index ec86d14a..ad3fd875 100644 --- a/powerline/lint/markedjson/error.py +++ b/powerline/lint/markedjson/error.py @@ -7,7 +7,25 @@ import re from powerline.lib.unicode import unichr -NON_PRINTABLE = re.compile('[^\t\n\x20-\x7E' + unichr(0x85) + (unichr(0xA0) + '-' + unichr(0xD7FF)) + (unichr(0xE000) + '-' + unichr(0xFFFD)) + ']') +NON_PRINTABLE = re.compile( + '[^' + # ASCII control characters: 0x00-0x19 + + '\t\n' # Tab, newline: allowed ASCII control characters + + '\x20-\x7E' # ASCII printable characters + # Unicode control characters: 0x7F-0x9F + + '\u0085' # Allowed unicode control character: next line character + + '\u00A0-\uD7FF' + # Surrogate escapes: 0xD800-0xDFFF + + '\uE000-\uFFFD' + + ']' + + (( + # Paired surrogate escapes: allowed in UCS-2 builds as the only way to + # represent characters above 0xFFFF. Only paired variant is allowed. + '|[\uD800-\uDBFF][\uDC00-\uDFFF]' + ) if sys.maxunicode < 0x10FFFF else ( + '' + )) +) def repl(s): diff --git a/powerline/renderer.py b/powerline/renderer.py index 1ba4ec63..07736853 100644 --- a/powerline/renderer.py +++ b/powerline/renderer.py @@ -1,7 +1,9 @@ # vim:fileencoding=utf-8:noet from __future__ import (unicode_literals, division, absolute_import, print_function) +import sys import os +import re from unicodedata import east_asian_width, combining from itertools import chain @@ -13,6 +15,80 @@ from powerline.lib.unicode import unichr NBSP = ' ' +np_control_character_translations = dict(( + # Control characters: ^@ … ^Y + (i1, '^' + unichr(i1 + 0x40)) for i1 in range(0x20) +)) +'''Control character translations + +Dictionary that maps characters in range 0x00–0x1F (inclusive) to strings +``'^@'``, ``'^A'`` and so on. + +.. note: maps tab to ``^I`` and newline to ``^J``. +''' + +np_invalid_character_translations = dict(( + # Invalid unicode characters obtained using 'surrogateescape' error + # handler. + (i2, '<{0:02x}>'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00) +)) +'''Invalid unicode character translations + +When using ``surrogateescape`` encoding error handling method characters in +range 0x80–0xFF (inclusive) are transformed into unpaired surrogate escape +unicode codepoints 0xDC80–0xDD00. This dictionary maps such characters to +``<80>``, ``<81>``, and so on: in Python-3 they cannot be printed or +converted to UTF-8 because UTF-8 standard does not allow surrogate escape +characters, not even paired ones. Python-2 contains a bug that allows such +action, but printing them in any case makes no sense. +''' + +# XXX: not using `r` because it makes no sense. +np_invalid_character_re = re.compile('(?'.format(i2 - 0xDC00)) for i2 in range(0xDC80, 0xDD00)), - )) - '''Non-printable character translations - - These are used to transform characters in range 0x00—0x1F into ``^@``, - ``^A`` and so on and characters in range 0xDC80—0xDCFF into ``<80>``, - ``<81>`` and so on (latter are invalid characters obtained using - ``surrogateescape`` error handling method used automatically in a number of - places in Python3). Unilke with ``.escape()`` method (and - ``character_translations``) result is passed to ``.strwidth()`` method. - - Note: transforms tab into ``^I``. - ''' - def __init__(self, theme_config, local_themes, @@ -381,7 +438,7 @@ class Renderer(object): contents_highlighted = '' draw_divider = segment['draw_' + divider_type + '_divider'] - contents_raw = contents_raw.translate(self.np_character_translations) + contents_raw = translate_np(contents_raw) # XXX Make sure self.hl() calls are called in the same order # segments are displayed. This is needed for Vim renderer to work. From 3779ec5b29eef207c4c7c0c31f33faa9ad281cbc Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 20:30:53 +0300 Subject: [PATCH 10/30] Read the whole stream at once, not just 4096 characters This may cause problems in case UTF-8 was read partially. Dunno how this code survived in pyyaml. --- powerline/lint/markedjson/reader.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/powerline/lint/markedjson/reader.py b/powerline/lint/markedjson/reader.py index bb518b06..a17a736b 100644 --- a/powerline/lint/markedjson/reader.py +++ b/powerline/lint/markedjson/reader.py @@ -125,7 +125,12 @@ class Reader(object): self.raw_buffer = None break - def update_raw(self, size=4096): + def update_raw(self, size=-1): + # Was size=4096 + assert(size < 0) + # WARNING: reading the whole stream at once. To change this behaviour to + # former reading N characters at once one must make sure that reading + # never ends at partial unicode character. data = self.stream.read(size) if self.raw_buffer is None: self.raw_buffer = data From 2656953f249c28c6889666ffd0e5e68b54ced8d0 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 20:39:12 +0300 Subject: [PATCH 11/30] Move some calculations into a separate function Also moves non-printable character translation *before* width calculations which is correct. --- powerline/renderer.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/powerline/renderer.py b/powerline/renderer.py index 07736853..57fb4287 100644 --- a/powerline/renderer.py +++ b/powerline/renderer.py @@ -313,6 +313,8 @@ class Renderer(object): current_width = 0 + self._prepare_segments(segments, output_width or width) + if not width: # No width specified, so we don’t need to crop or pad anything if output_width: @@ -376,6 +378,15 @@ class Renderer(object): return construct_returned_value(rendered_highlighted, segments, current_width, output_raw, output_width) + def _prepare_segments(self, segments, calculate_contents_len): + '''Translate non-printable characters and calculate segment width + ''' + for segment in segments: + segment['contents'] = translate_np(segment['contents']) + if calculate_contents_len: + for segment in segments: + segment['_contents_len'] = self.strwidth(segment['contents']) + def _render_length(self, theme, segments, divider_widths): '''Update segments lengths and return them ''' @@ -384,10 +395,7 @@ class Renderer(object): divider_spaces = theme.get_spaces() for index, segment in enumerate(segments): side = segment['side'] - if segment['_contents_len'] is None: - segment_len = segment['_contents_len'] = self.strwidth(segment['contents']) - else: - segment_len = segment['_contents_len'] + segment_len = segment['_contents_len'] prev_segment = segments[index - 1] if index > 0 else theme.EMPTY_SEGMENT next_segment = segments[index + 1] if index < segments_len - 1 else theme.EMPTY_SEGMENT @@ -438,8 +446,6 @@ class Renderer(object): contents_highlighted = '' draw_divider = segment['draw_' + divider_type + '_divider'] - contents_raw = translate_np(contents_raw) - # XXX Make sure self.hl() calls are called in the same order # segments are displayed. This is needed for Vim renderer to work. if draw_divider: From f37efeac5b9bfa3cabf2da6717854eeb5416e43b Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 21:14:13 +0300 Subject: [PATCH 12/30] lint: Do not allow non-printable characters in configuration --- powerline/lint/__init__.py | 14 +++++------ powerline/lint/markedjson/error.py | 12 +++++++-- powerline/lint/markedjson/markedvalue.py | 7 +----- powerline/lint/markedjson/reader.py | 4 +-- powerline/lint/spec.py | 31 +++++++++++++++++++++++- 5 files changed, 50 insertions(+), 18 deletions(-) diff --git a/powerline/lint/__init__.py b/powerline/lint/__init__.py index 51c25860..d1797d99 100644 --- a/powerline/lint/__init__.py +++ b/powerline/lint/__init__.py @@ -41,7 +41,7 @@ def generate_json_config_loader(lhadproblem): function_name_re = '^(\w+\.)*[a-zA-Z_]\w*$' -divider_spec = Spec().type(unicode).len( +divider_spec = Spec().printable().len( 'le', 3, (lambda value: 'Divider {0!r} is too large!'.format(value))).copy ext_theme_spec = Spec().type(unicode).func(lambda *args: check_config('themes', *args)).copy top_theme_spec = Spec().type(unicode).func(check_top_theme).copy @@ -211,12 +211,12 @@ segment_spec = Spec( display=Spec().type(bool).optional(), module=segment_module_spec(), priority=Spec().type(int, float, type(None)).optional(), - after=Spec().type(unicode).optional(), - before=Spec().type(unicode).optional(), + after=Spec().printable().optional(), + before=Spec().printable().optional(), width=Spec().either(Spec().unsigned(), Spec().cmp('eq', 'auto')).optional(), align=Spec().oneof(set('lr')).optional(), args=args_spec().func(lambda *args, **kwargs: check_args(get_one_segment_function, *args, **kwargs)), - contents=Spec().type(unicode).optional(), + contents=Spec().printable().optional(), highlight_group=Spec().list( highlight_group_spec().re( '^(?:(?!:divider$).)+$', @@ -243,11 +243,11 @@ divside_spec = Spec( soft=divider_spec(), ).copy segment_data_value_spec = Spec( - after=Spec().type(unicode).optional(), - before=Spec().type(unicode).optional(), + after=Spec().printable().optional(), + before=Spec().printable().optional(), display=Spec().type(bool).optional(), args=args_spec().func(lambda *args, **kwargs: check_args(get_all_possible_functions, *args, **kwargs)), - contents=Spec().type(unicode).optional(), + contents=Spec().printable().optional(), ).copy dividers_spec = Spec( left=divside_spec(), diff --git a/powerline/lint/markedjson/error.py b/powerline/lint/markedjson/error.py index ad3fd875..dfde4086 100644 --- a/powerline/lint/markedjson/error.py +++ b/powerline/lint/markedjson/error.py @@ -7,7 +7,7 @@ import re from powerline.lib.unicode import unichr -NON_PRINTABLE = re.compile( +NON_PRINTABLE_STR = ( '[^' # ASCII control characters: 0x00-0x19 + '\t\n' # Tab, newline: allowed ASCII control characters @@ -26,6 +26,7 @@ NON_PRINTABLE = re.compile( '' )) ) +NON_PRINTABLE_RE = re.compile(NON_PRINTABLE_STR) def repl(s): @@ -33,7 +34,7 @@ def repl(s): def strtrans(s): - return NON_PRINTABLE.sub(repl, s.replace('\t', '>---')) + return NON_PRINTABLE_RE.sub(repl, s.replace('\t', '>---')) class Mark: @@ -73,6 +74,13 @@ class Mark: + ' ' * (indent + len(head) + len(snippet[0])) + '^' ) + def advance_string(self, diff): + ret = self.copy() + # FIXME Currently does not work properly with escaped strings. + ret.column += diff + ret.pointer += diff + return ret + def __str__(self): snippet = self.get_snippet() where = (' in "%s", line %d, column %d' % ( diff --git a/powerline/lint/markedjson/markedvalue.py b/powerline/lint/markedjson/markedvalue.py index 74a62b64..c17a8e35 100644 --- a/powerline/lint/markedjson/markedvalue.py +++ b/powerline/lint/markedjson/markedvalue.py @@ -33,12 +33,7 @@ class MarkedUnicode(unicode): pointdiff = 1 r = [] for s in part_result: - mark = self.mark.copy() - # XXX Does not work properly with escaped strings, but this requires - # saving much more information in mark. - mark.column += pointdiff - mark.pointer += pointdiff - r.append(MarkedUnicode(s, mark)) + r.append(MarkedUnicode(s, self.mark.advance_string(pointdiff))) pointdiff += len(s) return tuple(r) diff --git a/powerline/lint/markedjson/reader.py b/powerline/lint/markedjson/reader.py index a17a736b..0ca45160 100644 --- a/powerline/lint/markedjson/reader.py +++ b/powerline/lint/markedjson/reader.py @@ -3,7 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import codecs -from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE +from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE_RE from powerline.lib.unicode import unicode @@ -84,7 +84,7 @@ class Reader(object): return Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer) def check_printable(self, data): - match = NON_PRINTABLE.search(data) + match = NON_PRINTABLE_RE.search(data) if match: self.update_pointer(match.start()) raise ReaderError( diff --git a/powerline/lint/spec.py b/powerline/lint/spec.py index 1d095721..6de14fea 100644 --- a/powerline/lint/spec.py +++ b/powerline/lint/spec.py @@ -7,10 +7,19 @@ import re from copy import copy from powerline.lib.unicode import unicode -from powerline.lint.markedjson.error import echoerr, DelayedEchoErr +from powerline.lint.markedjson.error import echoerr, DelayedEchoErr, NON_PRINTABLE_STR from powerline.lint.selfcheck import havemarks +NON_PRINTABLE_RE = re.compile( + NON_PRINTABLE_STR.translate({ + ord('\t'): None, + ord('\n'): None, + 0x0085: None, + }) +) + + class Spec(object): '''Class that describes some JSON value @@ -342,6 +351,26 @@ class Spec(object): return False, hadproblem return True, hadproblem + def check_printable(self, value, context_mark, data, context, echoerr, _): + '''Check that given unicode string contains only printable characters + ''' + hadproblem = False + for match in NON_PRINTABLE_RE.finditer(value): + hadproblem = True + echoerr( + context=self.cmsg.format(key=context.key), + context_mark=value.mark, + problem='found not printable character U+{0:04x} in a configuration string'.format( + ord(match.group(0))), + problem_mark=value.mark.advance_string(match.start() + 1) + ) + return True, hadproblem + + def printable(self, *args): + self.type(unicode) + self.checks.append(('check_printable', args)) + return self + def type(self, *args): '''Describe value that has one of the types given in arguments From 1e4dee6e0f7f041cc4bc52c9aa1644a677b55816 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 21:26:01 +0300 Subject: [PATCH 13/30] Fix colors used in tmux left side Fixes #1214 --- powerline/config_files/colorschemes/tmux/default.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/powerline/config_files/colorschemes/tmux/default.json b/powerline/config_files/colorschemes/tmux/default.json index 3745fd5a..e936f2ce 100644 --- a/powerline/config_files/colorschemes/tmux/default.json +++ b/powerline/config_files/colorschemes/tmux/default.json @@ -4,8 +4,8 @@ "window_status": {"fg": "gray70", "bg": "gray0", "attr": []}, "activity_status": {"fg": "yellow", "bg": "gray0", "attr": []}, "bell_status": {"fg": "red", "bg": "gray0", "attr": []}, - "window": {"fg": "gray6", "bg": "gray11", "attr": []}, - "window:divider": {"fg": "gray4", "bg": "gray11", "attr": []}, + "window": {"fg": "gray6", "bg": "gray0", "attr": []}, + "window:divider": {"fg": "gray4", "bg": "gray0", "attr": []}, "window:current": {"fg": "mediumcyan", "bg": "darkblue", "attr": []}, "window_name": {"fg": "white", "bg": "darkblue", "attr": ["bold"]}, "session": {"fg": "black", "bg": "gray90", "attr": ["bold"]}, From 8d10664c6243b8fe036fc200baa66cba7f101f14 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 21:42:48 +0300 Subject: [PATCH 14/30] In place of hardcoding spaces compute them --- powerline/bindings/config.py | 2 ++ powerline/bindings/tmux/powerline-base.conf | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/powerline/bindings/config.py b/powerline/bindings/config.py index 9e46748a..ea961d38 100644 --- a/powerline/bindings/config.py +++ b/powerline/bindings/config.py @@ -145,6 +145,8 @@ def init_environment(pl, args): left_dividers = powerline.renderer.theme.dividers['left'] set_tmux_environment('_POWERLINE_LEFT_HARD_DIVIDER', left_dividers['hard']) set_tmux_environment('_POWERLINE_LEFT_SOFT_DIVIDER', left_dividers['soft']) + set_tmux_environment('_POWERLINE_LEFT_HARD_DIVIDER_SPACES', ( + ' ' * powerline.renderer.strwidth(left_dividers['hard']))) def get_main_config(args): diff --git a/powerline/bindings/tmux/powerline-base.conf b/powerline/bindings/tmux/powerline-base.conf index add06afa..5998591a 100644 --- a/powerline/bindings/tmux/powerline-base.conf +++ b/powerline/bindings/tmux/powerline-base.conf @@ -4,7 +4,7 @@ set -g status-interval 2 set -g status-left-length 20 set -g status-right '#(eval $POWERLINE_COMMAND tmux right -R pane_id=`tmux display -p "#D"`)' set -g status-right-length 150 -set -g window-status-format "#[$_POWERLINE_WINDOW_COLOR] #I #[$_POWERLINE_WINDOW_DIVIDER_COLOR]$_POWERLINE_LEFT_SOFT_DIVIDER#[default]#W " +set -g window-status-format "#[$_POWERLINE_WINDOW_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER_SPACES#I #[$_POWERLINE_WINDOW_DIVIDER_COLOR]$_POWERLINE_LEFT_SOFT_DIVIDER#[default]#W " set -g window-status-current-format "#[$_POWERLINE_WINDOW_CURRENT_HARD_DIVIDER_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER#[$_POWERLINE_WINDOW_CURRENT_COLOR]#I $_POWERLINE_LEFT_SOFT_DIVIDER#[$_POWERLINE_WINDOW_NAME_COLOR]#W #[$_POWERLINE_WINDOW_CURRENT_HARD_DIVIDER_NEXT_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER" # Legacy status-left definition to be overwritten for tmux Versions 1.8+ From 52f3c838309bbba3ba99dd786f43520959ae3cec Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 21:44:33 +0300 Subject: [PATCH 15/30] Also add spaces to the end of the `window-status-format` --- powerline/bindings/tmux/powerline-base.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerline/bindings/tmux/powerline-base.conf b/powerline/bindings/tmux/powerline-base.conf index 5998591a..38b5de60 100644 --- a/powerline/bindings/tmux/powerline-base.conf +++ b/powerline/bindings/tmux/powerline-base.conf @@ -4,7 +4,7 @@ set -g status-interval 2 set -g status-left-length 20 set -g status-right '#(eval $POWERLINE_COMMAND tmux right -R pane_id=`tmux display -p "#D"`)' set -g status-right-length 150 -set -g window-status-format "#[$_POWERLINE_WINDOW_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER_SPACES#I #[$_POWERLINE_WINDOW_DIVIDER_COLOR]$_POWERLINE_LEFT_SOFT_DIVIDER#[default]#W " +set -g window-status-format "#[$_POWERLINE_WINDOW_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER_SPACES#I #[$_POWERLINE_WINDOW_DIVIDER_COLOR]$_POWERLINE_LEFT_SOFT_DIVIDER#[default]#W $_POWERLINE_LEFT_HARD_DIVIDER_SPACES" set -g window-status-current-format "#[$_POWERLINE_WINDOW_CURRENT_HARD_DIVIDER_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER#[$_POWERLINE_WINDOW_CURRENT_COLOR]#I $_POWERLINE_LEFT_SOFT_DIVIDER#[$_POWERLINE_WINDOW_NAME_COLOR]#W #[$_POWERLINE_WINDOW_CURRENT_HARD_DIVIDER_NEXT_COLOR]$_POWERLINE_LEFT_HARD_DIVIDER" # Legacy status-left definition to be overwritten for tmux Versions 1.8+ From 9576738bfaecb35ff9e6aa933e7efd4bbf3b0a98 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 22:34:00 +0300 Subject: [PATCH 16/30] When parsing JSON join surrogate pairs Also closes #1211 --- powerline/lib/unicode.py | 6 ++++++ powerline/lint/markedjson/scanner.py | 29 +++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index 32ea3afe..ae8bf8f2 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -128,3 +128,9 @@ def string(s): return s.encode('utf-8') else: return s + + +def surrogate_pair_to_character(high, low): + '''Transform a pair of surrogate codepoints to one codepoint + ''' + return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00) diff --git a/powerline/lint/markedjson/scanner.py b/powerline/lint/markedjson/scanner.py index 543d7298..b0bddf38 100644 --- a/powerline/lint/markedjson/scanner.py +++ b/powerline/lint/markedjson/scanner.py @@ -1,9 +1,14 @@ # vim:fileencoding=utf-8:noet from __future__ import (unicode_literals, division, absolute_import, print_function) +from string import hexdigits + from powerline.lint.markedjson.error import MarkedError from powerline.lint.markedjson import tokens -from powerline.lib.unicode import unicode +from powerline.lib.unicode import unicode, unichr, surrogate_pair_to_character + + +hexdigits_set = set(hexdigits) # Scanner produces tokens of the following types: @@ -415,7 +420,7 @@ class Scanner: length = self.ESCAPE_CODES[ch] self.forward() for k in range(length): - if self.peek(k) not in '0123456789ABCDEFabcdef': + if self.peek(k) not in hexdigits: raise ScannerError( 'while scanning a double-quoted scalar', start_mark, 'expected escape sequence of %d hexdecimal numbers, but found %r' % ( @@ -423,8 +428,26 @@ class Scanner: self.get_mark() ) code = int(self.prefix(length), 16) - chunks.append(chr(code)) self.forward(length) + if 0xD800 <= code <= 0xDC00: + # Start of the surrogate pair + next_char = self.prefix(6) + if ( + next_char[0] != '\\' + or next_char[1] != 'u' + or not (set(next_char[2:]) < hexdigits_set) + or not (0xDC00 <= int(next_char[2:], 16) <= 0xDFFF) + ): + raise ScannerError( + 'while scanning a double-quoted scalar', start_mark, + 'expected escape sequence with the next character in surrogate pair, but found %r' % ( + next_char + ), + self.get_mark() + ) + code = surrogate_pair_to_character(code, int(next_char[2:], 16)) + self.forward(6) + chunks.append(unichr(code)) else: raise ScannerError( 'while scanning a double-quoted scalar', start_mark, From b5b033d5122d5f20745bb91ebe3f7d836d76e7eb Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 22:43:46 +0300 Subject: [PATCH 17/30] Fix width calculations when using UCS-2 Python --- powerline/renderer.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/powerline/renderer.py b/powerline/renderer.py index 57fb4287..2c333aed 100644 --- a/powerline/renderer.py +++ b/powerline/renderer.py @@ -9,7 +9,7 @@ from unicodedata import east_asian_width, combining from itertools import chain from powerline.theme import Theme -from powerline.lib.unicode import unichr +from powerline.lib.unicode import unichr, surrogate_pair_to_character NBSP = ' ' @@ -189,7 +189,34 @@ class Renderer(object): :return: unsigned integer. ''' - return sum((0 if combining(symbol) else self.width_data[east_asian_width(symbol)] for symbol in string)) + return sum((( + ( + 0 + ) if combining(symbol) else ( + self.width_data[east_asian_width(symbol)] + ) + ) for symbol in string)) + + if sys.maxunicode < 0x10FFFF: + old_strwidth = strwidth + + def strwidth(self, string): + return sum((( + ( + self.width_data[ + east_asian_width( + unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol))) + ) + ] + ) if 0xDC00 <= ord(symbol) <= 0xDFFF else ( + 0 + ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else ( + self.width_data[east_asian_width(symbol)] + ) + ) for i, symbol in enumerate(string))) + + strwidth.__doc__ = old_strwidth.__doc__ + del old_strwidth def get_theme(self, matcher_info): '''Get Theme object. From f3c8413043b817f3fc518f2f2aec1e439d8188f8 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 22:57:15 +0300 Subject: [PATCH 18/30] Add a warning about UCS-2 builds to the documentation --- docs/source/installation.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 6dcb0c29..c0033c0b 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -7,6 +7,15 @@ Generic requirements * Python 2.6 or later, 3.2 or later, PyPy 2.0 or later. It is the only non-optional requirement. + + .. warning: + It is highly advised to use UCS-4 version of Python because UCS-2 version + uses significantly slower text processing (length determination and + non-printable character replacement) functions due to the need of + supporting unicode characters above U+FFFF which are represented as + surrogate pairs. This price will be paid even if configuration has no such + characters. + * C compiler. Required to build powerline client on linux. If it is not present then powerline will fall back to shell script or python client. * ``socat`` program. Required for shell variant of client which runs a bit From 6dc585b7ee82a251456a967cb40fa4c5c8713eca Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 4 Dec 2014 23:56:40 +0300 Subject: [PATCH 19/30] Move strwidth function variants to powerline.lib.unicode They can be tested thus. --- powerline/lib/unicode.py | 96 ++++++++++++++++++++++++++++++++++++++++ powerline/renderer.py | 52 ++++++---------------- 2 files changed, 109 insertions(+), 39 deletions(-) diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index ae8bf8f2..13073416 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import sys import codecs +from unicodedata import east_asian_width, combining + from powerline.lib.encoding import get_preferred_output_encoding @@ -134,3 +136,97 @@ def surrogate_pair_to_character(high, low): '''Transform a pair of surrogate codepoints to one codepoint ''' return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00) + + +_strwidth_documentation = ( + '''Compute string width in display cells + + {0} + + :param dict width_data: + Dictionary which maps east_asian_width property values to strings + lengths. It is expected to contain the following keys and values (from + `East Asian Width annex `_): + + === ====== =========================================================== + Key Value Description + === ====== =========================================================== + F 2 Fullwidth: all characters that are defined as Fullwidth in + the Unicode Standard [Unicode] by having a compatibility + decomposition of type to characters elsewhere in the + Unicode Standard that are implicitly narrow but unmarked. + H 1 Halfwidth: all characters that are explicitly defined as + Halfwidth in the Unicode Standard by having a compatibility + decomposition of type to characters elsewhere in + the Unicode Standard that are implicitly wide but unmarked, + plus U+20A9 ₩ WON SIGN. + W 2 Wide: all other characters that are always wide. These + characters occur only in the context of East Asian + typography where they are wide characters (such as the + Unified Han Ideographs or Squared Katakana Symbols). This + category includes characters that have explicit halfwidth + counterparts. + Na 1 Narrow: characters that are always narrow and have explicit + fullwidth or wide counterparts. These characters are + implicitly narrow in East Asian typography and legacy + character sets because they have explicit fullwidth or wide + counterparts. All of ASCII is an example of East Asian + Narrow characters. + A 1 or 2 Ambigious: characters that may sometimes be wide and + sometimes narrow. Ambiguous characters require additional + information not contained in the character code to further + resolve their width. This information is usually defined in + terminal setting that should in turn respect glyphs widths + in used fonts. Also see :ref:`ambiwidth configuration + option `. + N 1 Neutral characters: character that does not occur in legacy + East Asian character sets. + === ====== =========================================================== + + :param unicode string: + String whose width will be calculated. + + :return: unsigned integer.''') + + +def strwidth_ucs_4(width_data, string): + return sum((( + ( + 0 + ) if combining(symbol) else ( + width_data[east_asian_width(symbol)] + ) + ) for symbol in string)) + + +strwidth_ucs_4.__doc__ = _strwidth_documentation.format( + '''This version of function expects that characters above 0xFFFF are + represented using one symbol. This is only the case in UCS-4 Python builds. + + .. note: + Even in UCS-4 Python builds it is possible to represent characters above + 0xFFFF using surrogate pairs. Characters represented this way are not + supported.''') + + +def strwidth_ucs_2(width_data, string): + return sum((( + ( + width_data[ + east_asian_width( + unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol))) + ) + ] + ) if 0xDC00 <= ord(symbol) <= 0xDFFF else ( + 0 + ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else ( + width_data[east_asian_width(symbol)] + ) + ) for i, symbol in enumerate(string))) + + +strwidth_ucs_2.__doc__ = _strwidth_documentation.format( + '''This version of function expects that characters above 0xFFFF are + represented using two symbols forming a surrogate pair, which is the only + option in UCS-2 Python builds. It still works correctly in UCS-4 Python + builds, but is slower then its UCS-4 counterpart.''') diff --git a/powerline/renderer.py b/powerline/renderer.py index 2c333aed..e69abca1 100644 --- a/powerline/renderer.py +++ b/powerline/renderer.py @@ -5,11 +5,10 @@ import sys import os import re -from unicodedata import east_asian_width, combining from itertools import chain from powerline.theme import Theme -from powerline.lib.unicode import unichr, surrogate_pair_to_character +from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4 NBSP = ' ' @@ -177,46 +176,21 @@ class Renderer(object): 'F': 2, # Fullwidth } - def strwidth(self, string): - '''Function that returns string width. + strwidth = lambda self, s: ( + (strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)( + self.width_data, s) + ) + '''Function that returns string width. - Is used to calculate the place given string occupies when handling - ``width`` argument to ``.render()`` method. Must take east asian width - into account. + Is used to calculate the place given string occupies when handling + ``width`` argument to ``.render()`` method. Must take east asian width + into account. - :param unicode string: - String whose width will be calculated. + :param unicode string: + String whose width will be calculated. - :return: unsigned integer. - ''' - return sum((( - ( - 0 - ) if combining(symbol) else ( - self.width_data[east_asian_width(symbol)] - ) - ) for symbol in string)) - - if sys.maxunicode < 0x10FFFF: - old_strwidth = strwidth - - def strwidth(self, string): - return sum((( - ( - self.width_data[ - east_asian_width( - unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol))) - ) - ] - ) if 0xDC00 <= ord(symbol) <= 0xDFFF else ( - 0 - ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else ( - self.width_data[east_asian_width(symbol)] - ) - ) for i, symbol in enumerate(string))) - - strwidth.__doc__ = old_strwidth.__doc__ - del old_strwidth + :return: unsigned integer. + ''' def get_theme(self, matcher_info): '''Get Theme object. From 531d3e60c654b4fab0acbc34589a4696697f7a4b Mon Sep 17 00:00:00 2001 From: ZyX Date: Fri, 5 Dec 2014 22:24:13 +0300 Subject: [PATCH 20/30] Fix code: unichr on UCS-2 builds cannot emit surrogate pairs --- powerline/lib/unicode.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index 13073416..b100b18f 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -212,11 +212,7 @@ strwidth_ucs_4.__doc__ = _strwidth_documentation.format( def strwidth_ucs_2(width_data, string): return sum((( ( - width_data[ - east_asian_width( - unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol))) - ) - ] + width_data[east_asian_width(string[i - 1] + symbol)] ) if 0xDC00 <= ord(symbol) <= 0xDFFF else ( 0 ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else ( From df14aaaa9c43565f1751b6077ed4cc675099b525 Mon Sep 17 00:00:00 2001 From: ZyX Date: Fri, 5 Dec 2014 00:31:13 +0300 Subject: [PATCH 21/30] Add tests for unicode module Note: east_asian_width does not accept surrrogate pairs in UCS-4 Python builds. --- tests/test_lib.py | 101 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 1 deletion(-) diff --git a/tests/test_lib.py b/tests/test_lib.py index 4af20fa1..68e08a66 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -3,6 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import threading import os +import sys import re import shutil @@ -16,7 +17,9 @@ from powerline.lib.threaded import ThreadedSegment, KwThreadedSegment from powerline.lib.monotonic import monotonic from powerline.lib.vcs.git import git_directory -from tests.lib import Pl +import powerline.lib.unicode as plu + +from tests.lib import Pl, replace_attr from tests import TestCase, SkipTest @@ -397,6 +400,102 @@ class TestLib(TestCase): self.assertEqual(humanize_bytes(1000000000, si_prefix=False), '953.7 MiB') +width_data = { + 'N': 1, # Neutral + 'Na': 1, # Narrow + 'A': 1, # Ambigious + 'H': 1, # Half-width + 'W': 2, # Wide + 'F': 2, # Fullwidth +} + + +class TestUnicode(TestCase): + def assertStringsIdentical(self, s1, s2): + self.assertTrue(type(s1) is type(s2), msg='string types differ') + self.assertEqual(s1, s2) + + def test_unicode(self): + self.assertTrue(type('abc') is plu.unicode) + + def test_unichr(self): + if not sys.maxunicode < 0x10FFFF: + self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF)) + self.assertStringsIdentical('\uFFFF', plu.unichr(0xFFFF)) + self.assertStringsIdentical('\x20', plu.unichr(0x20)) + + def test_u(self): + self.assertStringsIdentical('Test', plu.u('Test')) + self.assertStringsIdentical('Test', plu.u(b'Test')) + self.assertStringsIdentical('«»', plu.u(b'\xC2\xAB\xC2\xBB')) + self.assertRaises(UnicodeDecodeError, plu.u, b'\xFF') + + def test_tointiter(self): + self.assertEqual([1, 2, 3], list(plu.tointiter(b'\x01\x02\x03'))) + + def test_decode_error(self): + self.assertStringsIdentical('', b'\xFF'.decode('utf-8', 'powerline_decode_error')) + self.assertStringsIdentical('abc', b'abc'.decode('utf-8', 'powerline_decode_error')) + + def test_register_strwidth_error(self): + ename = plu.register_strwidth_error(lambda s: 3) + self.assertStringsIdentical(b'???', 'A'.encode('latin1', ename)) + self.assertStringsIdentical(b'abc', 'abc'.encode('latin1', ename)) + + def test_out_u(self): + self.assertStringsIdentical('abc', plu.out_u('abc')) + self.assertStringsIdentical('abc', plu.out_u(b'abc')) + self.assertRaises(TypeError, plu.out_u, None) + + def test_safe_unicode(self): + self.assertStringsIdentical('abc', plu.safe_unicode('abc')) + self.assertStringsIdentical('abc', plu.safe_unicode(b'abc')) + self.assertStringsIdentical('«»', plu.safe_unicode(b'\xc2\xab\xc2\xbb')) + with replace_attr(plu, 'get_preferred_output_encoding', lambda: 'latin1'): + self.assertStringsIdentical('ÿ', plu.safe_unicode(b'\xFF')) + self.assertStringsIdentical('None', plu.safe_unicode(None)) + + class FailingStr(object): + def __str__(self): + raise NotImplementedError('Fail!') + + self.assertStringsIdentical('Fail!', plu.safe_unicode(FailingStr())) + + def test_FailedUnicode(self): + self.assertTrue(isinstance(plu.FailedUnicode('abc'), plu.unicode)) + self.assertEqual('abc', plu.FailedUnicode('abc')) + + def test_string(self): + self.assertStringsIdentical(str('abc'), plu.string('abc')) + self.assertStringsIdentical(str('abc'), plu.string(b'abc')) + + def test_surrogate_pair_to_character(self): + self.assertEqual(0x1F48E, plu.surrogate_pair_to_character(0xD83D, 0xDC8E)) + + def test_strwidth_ucs_4(self): + self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'abcd')) + self.assertEqual(4, plu.strwidth_ucs_4(width_data, 'AB')) + if sys.maxunicode < 0x10FFFF: + raise SkipTest('Can only test strwidth_ucs_4 in UCS-4 Pythons') + + def east_asian_width(ch): + assert (len(ch) == 1) + assert ord(ch) == 0x1F48E + return 'F' + + with replace_attr(plu, 'east_asian_width', east_asian_width): + # Warning: travis unicodedata.east_asian_width for some reason + # thinks this character is 5 symbols wide. + self.assertEqual(2, plu.strwidth_ucs_4(width_data, '\U0001F48E')) + + def test_strwidth_ucs_2(self): + self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'abcd')) + self.assertEqual(4, plu.strwidth_ucs_2(width_data, 'AB')) + if not sys.maxunicode < 0x10FFFF: + raise SkipTest('Can only test strwidth_ucs_2 in UCS-2 Pythons') + self.assertEqual(2, plu.strwidth_ucs_2(width_data, '\ud83d\udc8e')) + + class TestVCS(TestCase): def do_branch_rename_test(self, repo, q): st = monotonic() From cdfe6f03b2d4396dd21de496c93e6fc0d5c1cea2 Mon Sep 17 00:00:00 2001 From: ZyX Date: Fri, 5 Dec 2014 23:09:21 +0300 Subject: [PATCH 22/30] Add support for running tests with debug Python versions They are failing due to `[NNNN refs]` entries in daemon log and when switching modes in zsh without daemon, but who cares. --- tests/test_shells/postproc.py | 3 +++ tests/test_shells/test.sh | 3 +++ 2 files changed, 6 insertions(+) diff --git a/tests/test_shells/postproc.py b/tests/test_shells/postproc.py index 3ef35972..df8a6dad 100755 --- a/tests/test_shells/postproc.py +++ b/tests/test_shells/postproc.py @@ -28,6 +28,7 @@ except IOError: hostname = socket.gethostname() user = os.environ['USER'] +REFS_RE = re.compile(r'^\[\d+ refs\]\n') IPYPY_DEANSI_RE = re.compile(r'\033(?:\[(?:\?\d+[lh]|[^a-zA-Z]+[a-ln-zA-Z])|[=>])') with codecs.open(fname, 'r', encoding='utf-8') as R: @@ -42,6 +43,8 @@ with codecs.open(fname, 'r', encoding='utf-8') as R: line = line.translate({ ord('\r'): None }) + if REFS_RE.match(line): + continue line = line.replace(hostname, 'HOSTNAME') line = line.replace(user, 'USER') if pid is not None: diff --git a/tests/test_shells/test.sh b/tests/test_shells/test.sh index 275fa9c5..ddaafecf 100755 --- a/tests/test_shells/test.sh +++ b/tests/test_shells/test.sh @@ -91,8 +91,11 @@ run_test() { SH="$1" SESNAME="powerline-shell-test-${SH}-$$" + # Note: when running screen with setuid libc unsets LD_LIBRARY_PATH, so it + # cannot be added to the `env -i` call above. run "${TEST_TYPE}" "${TEST_CLIENT}" "${SH}" \ screen -L -c tests/test_shells/screenrc -d -m -S "$SESNAME" \ + env LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \ "$@" while ! screen -S "$SESNAME" -X readreg a tests/test_shells/input.$SH ; do sleep 0.1s From f8270a8e49942b30d821e5ba882d9304255113f4 Mon Sep 17 00:00:00 2001 From: ZyX Date: Fri, 5 Dec 2014 23:15:52 +0300 Subject: [PATCH 23/30] =?UTF-8?q?Skip=20some=20functions=E2=80=99=20tests?= =?UTF-8?q?=20as=20they=20are=20buggy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They are buggy, but fixing them is out of the scope of this PR. --- tests/test_lib.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_lib.py b/tests/test_lib.py index 68e08a66..06b28fa7 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -448,6 +448,7 @@ class TestUnicode(TestCase): self.assertRaises(TypeError, plu.out_u, None) def test_safe_unicode(self): + raise SkipTest('safe_unicode() function is buggy') self.assertStringsIdentical('abc', plu.safe_unicode('abc')) self.assertStringsIdentical('abc', plu.safe_unicode(b'abc')) self.assertStringsIdentical('«»', plu.safe_unicode(b'\xc2\xab\xc2\xbb')) @@ -466,6 +467,7 @@ class TestUnicode(TestCase): self.assertEqual('abc', plu.FailedUnicode('abc')) def test_string(self): + raise SkipTest('string() function is buggy') self.assertStringsIdentical(str('abc'), plu.string('abc')) self.assertStringsIdentical(str('abc'), plu.string(b'abc')) From aaf52c92c56e4608704d9c5d916cee65c30dd965 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Dec 2014 00:05:25 +0300 Subject: [PATCH 24/30] Use sockets in abstract namespace only if running on linux Also adds fallback to shell client which uses `uname -o` in place of `ostype`. Closes #1215 --- client/powerline.c | 8 ++++---- client/powerline.py | 4 +--- client/powerline.sh | 25 ++++++++++++++++++++++--- scripts/powerline-daemon | 3 +-- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/client/powerline.c b/client/powerline.c index f53e3457..d6858770 100644 --- a/client/powerline.c +++ b/client/powerline.c @@ -42,12 +42,12 @@ void do_write(int sd, const char *raw, size_t len) { } } -#ifdef __APPLE__ -# define ADDRESS_TEMPLATE "/tmp/powerline-ipc-%d" -# define A -#else +#ifdef __linux__ # define ADDRESS_TEMPLATE "powerline-ipc-%d" # define A +1 +#else +# define ADDRESS_TEMPLATE "/tmp/powerline-ipc-%d" +# define A #endif #define ADDRESS_SIZE sizeof(ADDRESS_TEMPLATE) + (sizeof(uid_t) * 4) diff --git a/client/powerline.py b/client/powerline.py index 78403887..28492c15 100755 --- a/client/powerline.py +++ b/client/powerline.py @@ -26,9 +26,7 @@ if len(sys.argv) < 2: print('Must provide at least one argument.', file=sys.stderr) raise SystemExit(1) -platform = sys.platform.lower() -use_filesystem = 'darwin' in platform -del platform +use_filesystem = not sys.platform.lower().startswith('linux') if sys.argv[1] == '--socket': address = sys.argv[2] diff --git a/client/powerline.sh b/client/powerline.sh index b112ec21..b8e37956 100755 --- a/client/powerline.sh +++ b/client/powerline.sh @@ -1,6 +1,22 @@ #!/bin/sh -test "${OSTYPE#darwin}" = "${OSTYPE}" && darwin=n || darwin=y +use_filesystem=1 +darwin= +if test -n "$OSTYPE" ; then + # OSTYPE variable is a shell feature. supported by bash and zsh, but not + # dash, busybox or (m)ksh. + if test "${OSTYPE#linux}" '!=' "${OSTYPE}" ; then + use_filesystem= + elif test "${OSTYPE#darwin}" ; then + darwin=1 + fi +elif which uname >/dev/null ; then + if uname -o | grep -iqF linux ; then + use_filesystem= + elif uname -o | grep -iqF darwin ; then + darwin=1 + fi +fi if test "$1" = "--socket" ; then shift @@ -8,13 +24,16 @@ if test "$1" = "--socket" ; then shift else ADDRESS="powerline-ipc-${UID:-`id -u`}" - test "$darwin" = y && ADDRESS="/tmp/$ADDRESS" + test -n "$use_filesystem" && ADDRESS="/tmp/$ADDRESS" fi -if test "$darwin" = y; then +if test -n "$darwin" ; then ENV=genv else ENV=env +fi + +if test -z "$use_filesystem" ; then ADDRESS="abstract-client:$ADDRESS" fi diff --git a/scripts/powerline-daemon b/scripts/powerline-daemon index 62cb35f3..7e78b6b0 100755 --- a/scripts/powerline-daemon +++ b/scripts/powerline-daemon @@ -24,8 +24,7 @@ from powerline.commands.daemon import get_argparser as get_daemon_argparser is_daemon = False -platform = sys.platform.lower() -use_filesystem = 'darwin' in platform +use_filesystem = not sys.platform.lower().startswith('linux') address = None pidfile = None From f0f6efcdbb7b33661b109136a1ca22dccc7ba927 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Dec 2014 00:52:17 +0300 Subject: [PATCH 25/30] Also have uname in $PATH Travis uses Ubuntu, Ubuntu is debian-based and debian uses dash as /bin/sh, so $OSTYPE is not available. . --- tests/test_shells/test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_shells/test.sh b/tests/test_shells/test.sh index ddaafecf..5ad1bbfe 100755 --- a/tests/test_shells/test.sh +++ b/tests/test_shells/test.sh @@ -216,6 +216,7 @@ ln -s "$(which mktemp)" tests/shell/path ln -s "$(which grep)" tests/shell/path ln -s "$(which sed)" tests/shell/path ln -s "$(which rm)" tests/shell/path +ln -s "$(which uname)" tests/shell/path ln -s ../../test_shells/bgscript.sh tests/shell/path ln -s ../../test_shells/waitpid.sh tests/shell/path if which socat ; then From 305b65f3650188c94ee4d0ee4231861464eea479 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Dec 2014 14:23:21 +0300 Subject: [PATCH 26/30] Fix regex that searches for non-printable characters --- powerline/lint/markedjson/error.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/powerline/lint/markedjson/error.py b/powerline/lint/markedjson/error.py index dfde4086..7ee65217 100644 --- a/powerline/lint/markedjson/error.py +++ b/powerline/lint/markedjson/error.py @@ -4,8 +4,6 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import sys import re -from powerline.lib.unicode import unichr - NON_PRINTABLE_STR = ( '[^' @@ -17,11 +15,17 @@ NON_PRINTABLE_STR = ( + '\u00A0-\uD7FF' # Surrogate escapes: 0xD800-0xDFFF + '\uE000-\uFFFD' + + (( + '\uD800-\uDFFF' + ) if sys.maxunicode < 0x10FFFF else ( + '\U00010000-\U0010FFFF' + )) + ']' + (( # Paired surrogate escapes: allowed in UCS-2 builds as the only way to # represent characters above 0xFFFF. Only paired variant is allowed. - '|[\uD800-\uDBFF][\uDC00-\uDFFF]' + '|(? Date: Sat, 6 Dec 2014 14:38:42 +0300 Subject: [PATCH 27/30] Make unichr() work with characters above U+10000 in UCS-2 builds --- powerline/lib/unicode.py | 11 +++++++++++ tests/test_lib.py | 3 +-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index b100b18f..8a720036 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -21,6 +21,17 @@ except ImportError: unichr = chr +if sys.maxunicode < 0x10FFFF: + _unichr = unichr + + def unichr(ch): + if ch <= sys.maxunicode: + return _unichr(ch) + else: + ch -= 0x10000 + return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1 << 10) - 1)) + 0xDC00) + + def u(s): '''Return unicode instance assuming UTF-8 encoded string. ''' diff --git a/tests/test_lib.py b/tests/test_lib.py index 06b28fa7..1766f6a1 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -419,8 +419,7 @@ class TestUnicode(TestCase): self.assertTrue(type('abc') is plu.unicode) def test_unichr(self): - if not sys.maxunicode < 0x10FFFF: - self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF)) + self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF)) self.assertStringsIdentical('\uFFFF', plu.unichr(0xFFFF)) self.assertStringsIdentical('\x20', plu.unichr(0x20)) From 8707f35bc936bc0d45359c57731b63c9fda06985 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Dec 2014 14:47:53 +0300 Subject: [PATCH 28/30] Fix safe_unicode and string unicode library functions --- powerline/lib/unicode.py | 22 ++++++++++++++++------ tests/test_lib.py | 2 -- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index b100b18f..93c097db 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -100,7 +100,10 @@ def safe_unicode(s): ''' try: try: - return unicode(s) + if type(s) is bytes: + return unicode(s, 'ascii') + else: + return unicode(s) except UnicodeDecodeError: try: return unicode(s, 'utf-8') @@ -125,11 +128,18 @@ class FailedUnicode(unicode): pass -def string(s): - if type(s) is not str: - return s.encode('utf-8') - else: - return s +if sys.version_info < (3,): + def string(s): + if type(s) is not str: + return s.encode('utf-8') + else: + return s +else: + def string(s): + if type(s) is not str: + return s.decode('utf-8') + else: + return s def surrogate_pair_to_character(high, low): diff --git a/tests/test_lib.py b/tests/test_lib.py index 06b28fa7..68e08a66 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -448,7 +448,6 @@ class TestUnicode(TestCase): self.assertRaises(TypeError, plu.out_u, None) def test_safe_unicode(self): - raise SkipTest('safe_unicode() function is buggy') self.assertStringsIdentical('abc', plu.safe_unicode('abc')) self.assertStringsIdentical('abc', plu.safe_unicode(b'abc')) self.assertStringsIdentical('«»', plu.safe_unicode(b'\xc2\xab\xc2\xbb')) @@ -467,7 +466,6 @@ class TestUnicode(TestCase): self.assertEqual('abc', plu.FailedUnicode('abc')) def test_string(self): - raise SkipTest('string() function is buggy') self.assertStringsIdentical(str('abc'), plu.string('abc')) self.assertStringsIdentical(str('abc'), plu.string(b'abc')) From f697d9ef67ffe24d3bb3c73475c65f0068748f96 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Dec 2014 14:48:31 +0300 Subject: [PATCH 29/30] Make FailedUnicode summary not too lengthy --- powerline/lib/unicode.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index 93c097db..fd3f05f4 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -116,8 +116,7 @@ def safe_unicode(s): class FailedUnicode(unicode): - '''Builtin ``unicode`` (``str`` in python 3) subclass indicating fatal - error. + '''Builtin ``unicode`` subclass indicating fatal error If your code for some reason wants to determine whether `.render()` method failed it should check returned string for being a FailedUnicode instance. From b80da891a13916846a4ca8d4218460a6f457145a Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Dec 2014 15:02:07 +0300 Subject: [PATCH 30/30] Add documentation for `register_strwidth_error` and `string` functions --- powerline/lib/unicode.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index fd3f05f4..e5697e85 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -58,6 +58,27 @@ last_swe_idx = 0 def register_strwidth_error(strwidth): + '''Create new encode errors handling method similar to ``replace`` + + Like ``replace`` this method uses question marks in place of the characters + that cannot be represented in the requested encoding. Unlike ``replace`` the + amount of question marks is identical to the amount of display cells + offending character occupies. Thus encoding ``…`` (U+2026, HORIZONTAL + ELLIPSIS) to ``latin1`` will emit one question mark, but encoding ``A`` + (U+FF21, FULLWIDTH LATIN CAPITAL LETTER A) will emit two question marks. + + Since width of some characters depends on the terminal settings and + powerline knows how to respect them a single error handling method cannot be + used. Instead of it the generator function is used which takes ``strwidth`` + function (function that knows how to compute string width respecting all + needed settings) and emits new error handling method name. + + :param function strwidth: + Function that computs string width measured in display cells the string + occupies when displayed. + + :return: New error handling method name. + ''' global last_swe_idx last_swe_idx += 1 @@ -141,6 +162,20 @@ else: return s +string.__doc__ = ( + '''Transform ``unicode`` or ``bytes`` object into ``str`` object + + On Python-2 this encodes ``unicode`` to ``bytes`` (which is ``str``) using + UTF-8 encoding; on Python-3 this decodes ``bytes`` to ``unicode`` (which is + ``str``) using UTF-8 encoding. + + Useful for functions that expect an ``str`` object in both unicode versions, + not caring about the semantic differences between them in Python-2 and + Python-3. + ''' +) + + def surrogate_pair_to_character(high, low): '''Transform a pair of surrogate codepoints to one codepoint '''