diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py
index ae8bf8f2..13073416 100644
--- a/powerline/lib/unicode.py
+++ b/powerline/lib/unicode.py
@@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import sys
import codecs
+from unicodedata import east_asian_width, combining
+
from powerline.lib.encoding import get_preferred_output_encoding
@@ -134,3 +136,97 @@ def surrogate_pair_to_character(high, low):
'''Transform a pair of surrogate codepoints to one codepoint
'''
return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00)
+
+
+_strwidth_documentation = (
+ '''Compute string width in display cells
+
+ {0}
+
+ :param dict width_data:
+ Dictionary which maps east_asian_width property values to strings
+ lengths. It is expected to contain the following keys and values (from
+ `East Asian Width annex `_):
+
+ === ====== ===========================================================
+ Key Value Description
+ === ====== ===========================================================
+ F 2 Fullwidth: all characters that are defined as Fullwidth in
+ the Unicode Standard [Unicode] by having a compatibility
+ decomposition of type to characters elsewhere in the
+ Unicode Standard that are implicitly narrow but unmarked.
+ H 1 Halfwidth: all characters that are explicitly defined as
+ Halfwidth in the Unicode Standard by having a compatibility
+ decomposition of type to characters elsewhere in
+ the Unicode Standard that are implicitly wide but unmarked,
+ plus U+20A9 ₩ WON SIGN.
+ W 2 Wide: all other characters that are always wide. These
+ characters occur only in the context of East Asian
+ typography where they are wide characters (such as the
+ Unified Han Ideographs or Squared Katakana Symbols). This
+ category includes characters that have explicit halfwidth
+ counterparts.
+ Na 1 Narrow: characters that are always narrow and have explicit
+ fullwidth or wide counterparts. These characters are
+ implicitly narrow in East Asian typography and legacy
+ character sets because they have explicit fullwidth or wide
+ counterparts. All of ASCII is an example of East Asian
+ Narrow characters.
+ A 1 or 2 Ambigious: characters that may sometimes be wide and
+ sometimes narrow. Ambiguous characters require additional
+ information not contained in the character code to further
+ resolve their width. This information is usually defined in
+ terminal setting that should in turn respect glyphs widths
+ in used fonts. Also see :ref:`ambiwidth configuration
+ option `.
+ N 1 Neutral characters: character that does not occur in legacy
+ East Asian character sets.
+ === ====== ===========================================================
+
+ :param unicode string:
+ String whose width will be calculated.
+
+ :return: unsigned integer.''')
+
+
+def strwidth_ucs_4(width_data, string):
+ return sum(((
+ (
+ 0
+ ) if combining(symbol) else (
+ width_data[east_asian_width(symbol)]
+ )
+ ) for symbol in string))
+
+
+strwidth_ucs_4.__doc__ = _strwidth_documentation.format(
+ '''This version of function expects that characters above 0xFFFF are
+ represented using one symbol. This is only the case in UCS-4 Python builds.
+
+ .. note:
+ Even in UCS-4 Python builds it is possible to represent characters above
+ 0xFFFF using surrogate pairs. Characters represented this way are not
+ supported.''')
+
+
+def strwidth_ucs_2(width_data, string):
+ return sum(((
+ (
+ width_data[
+ east_asian_width(
+ unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol)))
+ )
+ ]
+ ) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
+ 0
+ ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
+ width_data[east_asian_width(symbol)]
+ )
+ ) for i, symbol in enumerate(string)))
+
+
+strwidth_ucs_2.__doc__ = _strwidth_documentation.format(
+ '''This version of function expects that characters above 0xFFFF are
+ represented using two symbols forming a surrogate pair, which is the only
+ option in UCS-2 Python builds. It still works correctly in UCS-4 Python
+ builds, but is slower then its UCS-4 counterpart.''')
diff --git a/powerline/renderer.py b/powerline/renderer.py
index 2c333aed..e69abca1 100644
--- a/powerline/renderer.py
+++ b/powerline/renderer.py
@@ -5,11 +5,10 @@ import sys
import os
import re
-from unicodedata import east_asian_width, combining
from itertools import chain
from powerline.theme import Theme
-from powerline.lib.unicode import unichr, surrogate_pair_to_character
+from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4
NBSP = ' '
@@ -177,46 +176,21 @@ class Renderer(object):
'F': 2, # Fullwidth
}
- def strwidth(self, string):
- '''Function that returns string width.
+ strwidth = lambda self, s: (
+ (strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)(
+ self.width_data, s)
+ )
+ '''Function that returns string width.
- Is used to calculate the place given string occupies when handling
- ``width`` argument to ``.render()`` method. Must take east asian width
- into account.
+ Is used to calculate the place given string occupies when handling
+ ``width`` argument to ``.render()`` method. Must take east asian width
+ into account.
- :param unicode string:
- String whose width will be calculated.
+ :param unicode string:
+ String whose width will be calculated.
- :return: unsigned integer.
- '''
- return sum(((
- (
- 0
- ) if combining(symbol) else (
- self.width_data[east_asian_width(symbol)]
- )
- ) for symbol in string))
-
- if sys.maxunicode < 0x10FFFF:
- old_strwidth = strwidth
-
- def strwidth(self, string):
- return sum(((
- (
- self.width_data[
- east_asian_width(
- unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol)))
- )
- ]
- ) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
- 0
- ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
- self.width_data[east_asian_width(symbol)]
- )
- ) for i, symbol in enumerate(string)))
-
- strwidth.__doc__ = old_strwidth.__doc__
- del old_strwidth
+ :return: unsigned integer.
+ '''
def get_theme(self, matcher_info):
'''Get Theme object.