diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index ae8bf8f2..13073416 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import sys import codecs +from unicodedata import east_asian_width, combining + from powerline.lib.encoding import get_preferred_output_encoding @@ -134,3 +136,97 @@ def surrogate_pair_to_character(high, low): '''Transform a pair of surrogate codepoints to one codepoint ''' return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00) + + +_strwidth_documentation = ( + '''Compute string width in display cells + + {0} + + :param dict width_data: + Dictionary which maps east_asian_width property values to strings + lengths. It is expected to contain the following keys and values (from + `East Asian Width annex `_): + + === ====== =========================================================== + Key Value Description + === ====== =========================================================== + F 2 Fullwidth: all characters that are defined as Fullwidth in + the Unicode Standard [Unicode] by having a compatibility + decomposition of type to characters elsewhere in the + Unicode Standard that are implicitly narrow but unmarked. + H 1 Halfwidth: all characters that are explicitly defined as + Halfwidth in the Unicode Standard by having a compatibility + decomposition of type to characters elsewhere in + the Unicode Standard that are implicitly wide but unmarked, + plus U+20A9 ₩ WON SIGN. + W 2 Wide: all other characters that are always wide. These + characters occur only in the context of East Asian + typography where they are wide characters (such as the + Unified Han Ideographs or Squared Katakana Symbols). This + category includes characters that have explicit halfwidth + counterparts. + Na 1 Narrow: characters that are always narrow and have explicit + fullwidth or wide counterparts. These characters are + implicitly narrow in East Asian typography and legacy + character sets because they have explicit fullwidth or wide + counterparts. All of ASCII is an example of East Asian + Narrow characters. + A 1 or 2 Ambigious: characters that may sometimes be wide and + sometimes narrow. Ambiguous characters require additional + information not contained in the character code to further + resolve their width. This information is usually defined in + terminal setting that should in turn respect glyphs widths + in used fonts. Also see :ref:`ambiwidth configuration + option `. + N 1 Neutral characters: character that does not occur in legacy + East Asian character sets. + === ====== =========================================================== + + :param unicode string: + String whose width will be calculated. + + :return: unsigned integer.''') + + +def strwidth_ucs_4(width_data, string): + return sum((( + ( + 0 + ) if combining(symbol) else ( + width_data[east_asian_width(symbol)] + ) + ) for symbol in string)) + + +strwidth_ucs_4.__doc__ = _strwidth_documentation.format( + '''This version of function expects that characters above 0xFFFF are + represented using one symbol. This is only the case in UCS-4 Python builds. + + .. note: + Even in UCS-4 Python builds it is possible to represent characters above + 0xFFFF using surrogate pairs. Characters represented this way are not + supported.''') + + +def strwidth_ucs_2(width_data, string): + return sum((( + ( + width_data[ + east_asian_width( + unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol))) + ) + ] + ) if 0xDC00 <= ord(symbol) <= 0xDFFF else ( + 0 + ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else ( + width_data[east_asian_width(symbol)] + ) + ) for i, symbol in enumerate(string))) + + +strwidth_ucs_2.__doc__ = _strwidth_documentation.format( + '''This version of function expects that characters above 0xFFFF are + represented using two symbols forming a surrogate pair, which is the only + option in UCS-2 Python builds. It still works correctly in UCS-4 Python + builds, but is slower then its UCS-4 counterpart.''') diff --git a/powerline/renderer.py b/powerline/renderer.py index 2c333aed..e69abca1 100644 --- a/powerline/renderer.py +++ b/powerline/renderer.py @@ -5,11 +5,10 @@ import sys import os import re -from unicodedata import east_asian_width, combining from itertools import chain from powerline.theme import Theme -from powerline.lib.unicode import unichr, surrogate_pair_to_character +from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4 NBSP = ' ' @@ -177,46 +176,21 @@ class Renderer(object): 'F': 2, # Fullwidth } - def strwidth(self, string): - '''Function that returns string width. + strwidth = lambda self, s: ( + (strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)( + self.width_data, s) + ) + '''Function that returns string width. - Is used to calculate the place given string occupies when handling - ``width`` argument to ``.render()`` method. Must take east asian width - into account. + Is used to calculate the place given string occupies when handling + ``width`` argument to ``.render()`` method. Must take east asian width + into account. - :param unicode string: - String whose width will be calculated. + :param unicode string: + String whose width will be calculated. - :return: unsigned integer. - ''' - return sum((( - ( - 0 - ) if combining(symbol) else ( - self.width_data[east_asian_width(symbol)] - ) - ) for symbol in string)) - - if sys.maxunicode < 0x10FFFF: - old_strwidth = strwidth - - def strwidth(self, string): - return sum((( - ( - self.width_data[ - east_asian_width( - unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol))) - ) - ] - ) if 0xDC00 <= ord(symbol) <= 0xDFFF else ( - 0 - ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else ( - self.width_data[east_asian_width(symbol)] - ) - ) for i, symbol in enumerate(string))) - - strwidth.__doc__ = old_strwidth.__doc__ - del old_strwidth + :return: unsigned integer. + ''' def get_theme(self, matcher_info): '''Get Theme object.