From a962f7eeababc341a22324a0ac31340c1f1c1262 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 7 Sep 2014 20:56:35 +0400 Subject: [PATCH] Replace u() function with out_u for safer unicode conversion --- powerline/lib/unicode.py | 42 ++++++++++++++++++++++++++++++++++++ powerline/segments/common.py | 12 +++++------ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index cea8b05f..99c2b43e 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -1,6 +1,9 @@ # vim:fileencoding=utf-8:noet from __future__ import (unicode_literals, division, absolute_import, print_function) +import sys +import codecs + from locale import getpreferredencoding @@ -25,6 +28,45 @@ def u(s): return unicode(s, 'utf-8') +if sys.version_info < (3,): + def tointiter(s): + '''Convert a byte string to the sequence of integers + ''' + return (ord(c) for c in s) +else: + def tointiter(s): + '''Convert a byte string to the sequence of integers + ''' + return iter(s) + + +def powerline_decode_error(e): + if not isinstance(e, UnicodeDecodeError): + raise NotImplementedError + return (''.join(( + '<{0:02X}>'.format(c) + for c in tointiter(e.object[e.start:e.end]) + )), e.end) + + +codecs.register_error('powerline_decode_error', powerline_decode_error) + + +def out_u(s): + '''Return unicode string suitable for displaying + + Unlike other functions assumes getpreferredencoding() first. Unlike u() does + not throw exceptions for invalid unicode strings. Unlike safe_unicode() does + throw an exception if object is not a string. + ''' + if isinstance(s, unicode): + return s + elif isinstance(s, bytes): + return unicode(s, getpreferredencoding(), 'powerline_decode_error') + else: + raise TypeError('Expected unicode or bytes instance, got {0}'.format(repr(type(s)))) + + def safe_unicode(s): '''Return unicode instance without raising an exception. diff --git a/powerline/segments/common.py b/powerline/segments/common.py index 9f19b1d7..4227ff0b 100644 --- a/powerline/segments/common.py +++ b/powerline/segments/common.py @@ -17,7 +17,7 @@ from powerline.lib.vcs import guess, tree_status from powerline.lib.threaded import ThreadedSegment, KwThreadedSegment from powerline.lib.monotonic import monotonic from powerline.lib.humanize_bytes import humanize_bytes -from powerline.lib.unicode import u +from powerline.lib.unicode import out_u from powerline.theme import requires_segment_info, requires_filesystem_watcher from powerline.segments import Segment, with_docstring @@ -94,7 +94,7 @@ class CwdSegment(Segment): def get_shortened_path(self, pl, segment_info, shorten_home=True, **kwargs): try: - path = u(segment_info['getcwd']()) + path = out_u(segment_info['getcwd']()) except OSError as e: if e.errno == 2: # user most probably deleted the directory @@ -106,7 +106,7 @@ class CwdSegment(Segment): if shorten_home: home = segment_info['home'] if home: - home = u(home) + home = out_u(home) if path.startswith(home): path = '~' + path[len(home):] return path @@ -1123,12 +1123,12 @@ class NowPlayingSegment(Segment): return if not info: return - album = u(info.get('xesam:album')) - title = u(info.get('xesam:title')) + album = out_u(info.get('xesam:album')) + title = out_u(info.get('xesam:title')) artist = info.get('xesam:artist') state = self._convert_state(status) if artist: - artist = u(artist[0]) + artist = out_u(artist[0]) return { 'state': state, 'album': album,