mirror of
https://github.com/powerline/powerline.git
synced 2025-07-29 00:34:49 +02:00
Move strwidth function variants to powerline.lib.unicode
They can be tested thus.
This commit is contained in:
parent
f3c8413043
commit
6dc585b7ee
@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
|
||||
import sys
|
||||
import codecs
|
||||
|
||||
from unicodedata import east_asian_width, combining
|
||||
|
||||
from powerline.lib.encoding import get_preferred_output_encoding
|
||||
|
||||
|
||||
@ -134,3 +136,97 @@ def surrogate_pair_to_character(high, low):
|
||||
'''Transform a pair of surrogate codepoints to one codepoint
|
||||
'''
|
||||
return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00)
|
||||
|
||||
|
||||
_strwidth_documentation = (
|
||||
'''Compute string width in display cells
|
||||
|
||||
{0}
|
||||
|
||||
:param dict width_data:
|
||||
Dictionary which maps east_asian_width property values to strings
|
||||
lengths. It is expected to contain the following keys and values (from
|
||||
`East Asian Width annex <http://www.unicode.org/reports/tr11/>`_):
|
||||
|
||||
=== ====== ===========================================================
|
||||
Key Value Description
|
||||
=== ====== ===========================================================
|
||||
F 2 Fullwidth: all characters that are defined as Fullwidth in
|
||||
the Unicode Standard [Unicode] by having a compatibility
|
||||
decomposition of type <wide> to characters elsewhere in the
|
||||
Unicode Standard that are implicitly narrow but unmarked.
|
||||
H 1 Halfwidth: all characters that are explicitly defined as
|
||||
Halfwidth in the Unicode Standard by having a compatibility
|
||||
decomposition of type <narrow> to characters elsewhere in
|
||||
the Unicode Standard that are implicitly wide but unmarked,
|
||||
plus U+20A9 ₩ WON SIGN.
|
||||
W 2 Wide: all other characters that are always wide. These
|
||||
characters occur only in the context of East Asian
|
||||
typography where they are wide characters (such as the
|
||||
Unified Han Ideographs or Squared Katakana Symbols). This
|
||||
category includes characters that have explicit halfwidth
|
||||
counterparts.
|
||||
Na 1 Narrow: characters that are always narrow and have explicit
|
||||
fullwidth or wide counterparts. These characters are
|
||||
implicitly narrow in East Asian typography and legacy
|
||||
character sets because they have explicit fullwidth or wide
|
||||
counterparts. All of ASCII is an example of East Asian
|
||||
Narrow characters.
|
||||
A 1 or 2 Ambigious: characters that may sometimes be wide and
|
||||
sometimes narrow. Ambiguous characters require additional
|
||||
information not contained in the character code to further
|
||||
resolve their width. This information is usually defined in
|
||||
terminal setting that should in turn respect glyphs widths
|
||||
in used fonts. Also see :ref:`ambiwidth configuration
|
||||
option <config-common-ambiwidth>`.
|
||||
N 1 Neutral characters: character that does not occur in legacy
|
||||
East Asian character sets.
|
||||
=== ====== ===========================================================
|
||||
|
||||
:param unicode string:
|
||||
String whose width will be calculated.
|
||||
|
||||
:return: unsigned integer.''')
|
||||
|
||||
|
||||
def strwidth_ucs_4(width_data, string):
|
||||
return sum(((
|
||||
(
|
||||
0
|
||||
) if combining(symbol) else (
|
||||
width_data[east_asian_width(symbol)]
|
||||
)
|
||||
) for symbol in string))
|
||||
|
||||
|
||||
strwidth_ucs_4.__doc__ = _strwidth_documentation.format(
|
||||
'''This version of function expects that characters above 0xFFFF are
|
||||
represented using one symbol. This is only the case in UCS-4 Python builds.
|
||||
|
||||
.. note:
|
||||
Even in UCS-4 Python builds it is possible to represent characters above
|
||||
0xFFFF using surrogate pairs. Characters represented this way are not
|
||||
supported.''')
|
||||
|
||||
|
||||
def strwidth_ucs_2(width_data, string):
|
||||
return sum(((
|
||||
(
|
||||
width_data[
|
||||
east_asian_width(
|
||||
unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol)))
|
||||
)
|
||||
]
|
||||
) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
|
||||
0
|
||||
) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
|
||||
width_data[east_asian_width(symbol)]
|
||||
)
|
||||
) for i, symbol in enumerate(string)))
|
||||
|
||||
|
||||
strwidth_ucs_2.__doc__ = _strwidth_documentation.format(
|
||||
'''This version of function expects that characters above 0xFFFF are
|
||||
represented using two symbols forming a surrogate pair, which is the only
|
||||
option in UCS-2 Python builds. It still works correctly in UCS-4 Python
|
||||
builds, but is slower then its UCS-4 counterpart.''')
|
||||
|
@ -5,11 +5,10 @@ import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
from unicodedata import east_asian_width, combining
|
||||
from itertools import chain
|
||||
|
||||
from powerline.theme import Theme
|
||||
from powerline.lib.unicode import unichr, surrogate_pair_to_character
|
||||
from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4
|
||||
|
||||
|
||||
NBSP = ' '
|
||||
@ -177,46 +176,21 @@ class Renderer(object):
|
||||
'F': 2, # Fullwidth
|
||||
}
|
||||
|
||||
def strwidth(self, string):
|
||||
'''Function that returns string width.
|
||||
strwidth = lambda self, s: (
|
||||
(strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)(
|
||||
self.width_data, s)
|
||||
)
|
||||
'''Function that returns string width.
|
||||
|
||||
Is used to calculate the place given string occupies when handling
|
||||
``width`` argument to ``.render()`` method. Must take east asian width
|
||||
into account.
|
||||
Is used to calculate the place given string occupies when handling
|
||||
``width`` argument to ``.render()`` method. Must take east asian width
|
||||
into account.
|
||||
|
||||
:param unicode string:
|
||||
String whose width will be calculated.
|
||||
:param unicode string:
|
||||
String whose width will be calculated.
|
||||
|
||||
:return: unsigned integer.
|
||||
'''
|
||||
return sum(((
|
||||
(
|
||||
0
|
||||
) if combining(symbol) else (
|
||||
self.width_data[east_asian_width(symbol)]
|
||||
)
|
||||
) for symbol in string))
|
||||
|
||||
if sys.maxunicode < 0x10FFFF:
|
||||
old_strwidth = strwidth
|
||||
|
||||
def strwidth(self, string):
|
||||
return sum(((
|
||||
(
|
||||
self.width_data[
|
||||
east_asian_width(
|
||||
unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol)))
|
||||
)
|
||||
]
|
||||
) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
|
||||
0
|
||||
) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
|
||||
self.width_data[east_asian_width(symbol)]
|
||||
)
|
||||
) for i, symbol in enumerate(string)))
|
||||
|
||||
strwidth.__doc__ = old_strwidth.__doc__
|
||||
del old_strwidth
|
||||
:return: unsigned integer.
|
||||
'''
|
||||
|
||||
def get_theme(self, matcher_info):
|
||||
'''Get Theme object.
|
||||
|
Loading…
x
Reference in New Issue
Block a user