mirror of
https://github.com/powerline/powerline.git
synced 2025-07-29 16:55:07 +02:00
Move strwidth function variants to powerline.lib.unicode
They can be tested thus.
This commit is contained in:
parent
f3c8413043
commit
6dc585b7ee
@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
|
|||||||
import sys
|
import sys
|
||||||
import codecs
|
import codecs
|
||||||
|
|
||||||
|
from unicodedata import east_asian_width, combining
|
||||||
|
|
||||||
from powerline.lib.encoding import get_preferred_output_encoding
|
from powerline.lib.encoding import get_preferred_output_encoding
|
||||||
|
|
||||||
|
|
||||||
@ -134,3 +136,97 @@ def surrogate_pair_to_character(high, low):
|
|||||||
'''Transform a pair of surrogate codepoints to one codepoint
|
'''Transform a pair of surrogate codepoints to one codepoint
|
||||||
'''
|
'''
|
||||||
return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00)
|
return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00)
|
||||||
|
|
||||||
|
|
||||||
|
_strwidth_documentation = (
|
||||||
|
'''Compute string width in display cells
|
||||||
|
|
||||||
|
{0}
|
||||||
|
|
||||||
|
:param dict width_data:
|
||||||
|
Dictionary which maps east_asian_width property values to strings
|
||||||
|
lengths. It is expected to contain the following keys and values (from
|
||||||
|
`East Asian Width annex <http://www.unicode.org/reports/tr11/>`_):
|
||||||
|
|
||||||
|
=== ====== ===========================================================
|
||||||
|
Key Value Description
|
||||||
|
=== ====== ===========================================================
|
||||||
|
F 2 Fullwidth: all characters that are defined as Fullwidth in
|
||||||
|
the Unicode Standard [Unicode] by having a compatibility
|
||||||
|
decomposition of type <wide> to characters elsewhere in the
|
||||||
|
Unicode Standard that are implicitly narrow but unmarked.
|
||||||
|
H 1 Halfwidth: all characters that are explicitly defined as
|
||||||
|
Halfwidth in the Unicode Standard by having a compatibility
|
||||||
|
decomposition of type <narrow> to characters elsewhere in
|
||||||
|
the Unicode Standard that are implicitly wide but unmarked,
|
||||||
|
plus U+20A9 ₩ WON SIGN.
|
||||||
|
W 2 Wide: all other characters that are always wide. These
|
||||||
|
characters occur only in the context of East Asian
|
||||||
|
typography where they are wide characters (such as the
|
||||||
|
Unified Han Ideographs or Squared Katakana Symbols). This
|
||||||
|
category includes characters that have explicit halfwidth
|
||||||
|
counterparts.
|
||||||
|
Na 1 Narrow: characters that are always narrow and have explicit
|
||||||
|
fullwidth or wide counterparts. These characters are
|
||||||
|
implicitly narrow in East Asian typography and legacy
|
||||||
|
character sets because they have explicit fullwidth or wide
|
||||||
|
counterparts. All of ASCII is an example of East Asian
|
||||||
|
Narrow characters.
|
||||||
|
A 1 or 2 Ambigious: characters that may sometimes be wide and
|
||||||
|
sometimes narrow. Ambiguous characters require additional
|
||||||
|
information not contained in the character code to further
|
||||||
|
resolve their width. This information is usually defined in
|
||||||
|
terminal setting that should in turn respect glyphs widths
|
||||||
|
in used fonts. Also see :ref:`ambiwidth configuration
|
||||||
|
option <config-common-ambiwidth>`.
|
||||||
|
N 1 Neutral characters: character that does not occur in legacy
|
||||||
|
East Asian character sets.
|
||||||
|
=== ====== ===========================================================
|
||||||
|
|
||||||
|
:param unicode string:
|
||||||
|
String whose width will be calculated.
|
||||||
|
|
||||||
|
:return: unsigned integer.''')
|
||||||
|
|
||||||
|
|
||||||
|
def strwidth_ucs_4(width_data, string):
|
||||||
|
return sum(((
|
||||||
|
(
|
||||||
|
0
|
||||||
|
) if combining(symbol) else (
|
||||||
|
width_data[east_asian_width(symbol)]
|
||||||
|
)
|
||||||
|
) for symbol in string))
|
||||||
|
|
||||||
|
|
||||||
|
strwidth_ucs_4.__doc__ = _strwidth_documentation.format(
|
||||||
|
'''This version of function expects that characters above 0xFFFF are
|
||||||
|
represented using one symbol. This is only the case in UCS-4 Python builds.
|
||||||
|
|
||||||
|
.. note:
|
||||||
|
Even in UCS-4 Python builds it is possible to represent characters above
|
||||||
|
0xFFFF using surrogate pairs. Characters represented this way are not
|
||||||
|
supported.''')
|
||||||
|
|
||||||
|
|
||||||
|
def strwidth_ucs_2(width_data, string):
|
||||||
|
return sum(((
|
||||||
|
(
|
||||||
|
width_data[
|
||||||
|
east_asian_width(
|
||||||
|
unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol)))
|
||||||
|
)
|
||||||
|
]
|
||||||
|
) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
|
||||||
|
0
|
||||||
|
) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
|
||||||
|
width_data[east_asian_width(symbol)]
|
||||||
|
)
|
||||||
|
) for i, symbol in enumerate(string)))
|
||||||
|
|
||||||
|
|
||||||
|
strwidth_ucs_2.__doc__ = _strwidth_documentation.format(
|
||||||
|
'''This version of function expects that characters above 0xFFFF are
|
||||||
|
represented using two symbols forming a surrogate pair, which is the only
|
||||||
|
option in UCS-2 Python builds. It still works correctly in UCS-4 Python
|
||||||
|
builds, but is slower then its UCS-4 counterpart.''')
|
||||||
|
@ -5,11 +5,10 @@ import sys
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from unicodedata import east_asian_width, combining
|
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
from powerline.theme import Theme
|
from powerline.theme import Theme
|
||||||
from powerline.lib.unicode import unichr, surrogate_pair_to_character
|
from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4
|
||||||
|
|
||||||
|
|
||||||
NBSP = ' '
|
NBSP = ' '
|
||||||
@ -177,46 +176,21 @@ class Renderer(object):
|
|||||||
'F': 2, # Fullwidth
|
'F': 2, # Fullwidth
|
||||||
}
|
}
|
||||||
|
|
||||||
def strwidth(self, string):
|
strwidth = lambda self, s: (
|
||||||
'''Function that returns string width.
|
(strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)(
|
||||||
|
self.width_data, s)
|
||||||
|
)
|
||||||
|
'''Function that returns string width.
|
||||||
|
|
||||||
Is used to calculate the place given string occupies when handling
|
Is used to calculate the place given string occupies when handling
|
||||||
``width`` argument to ``.render()`` method. Must take east asian width
|
``width`` argument to ``.render()`` method. Must take east asian width
|
||||||
into account.
|
into account.
|
||||||
|
|
||||||
:param unicode string:
|
:param unicode string:
|
||||||
String whose width will be calculated.
|
String whose width will be calculated.
|
||||||
|
|
||||||
:return: unsigned integer.
|
:return: unsigned integer.
|
||||||
'''
|
'''
|
||||||
return sum(((
|
|
||||||
(
|
|
||||||
0
|
|
||||||
) if combining(symbol) else (
|
|
||||||
self.width_data[east_asian_width(symbol)]
|
|
||||||
)
|
|
||||||
) for symbol in string))
|
|
||||||
|
|
||||||
if sys.maxunicode < 0x10FFFF:
|
|
||||||
old_strwidth = strwidth
|
|
||||||
|
|
||||||
def strwidth(self, string):
|
|
||||||
return sum(((
|
|
||||||
(
|
|
||||||
self.width_data[
|
|
||||||
east_asian_width(
|
|
||||||
unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol)))
|
|
||||||
)
|
|
||||||
]
|
|
||||||
) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
|
|
||||||
0
|
|
||||||
) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
|
|
||||||
self.width_data[east_asian_width(symbol)]
|
|
||||||
)
|
|
||||||
) for i, symbol in enumerate(string)))
|
|
||||||
|
|
||||||
strwidth.__doc__ = old_strwidth.__doc__
|
|
||||||
del old_strwidth
|
|
||||||
|
|
||||||
def get_theme(self, matcher_info):
|
def get_theme(self, matcher_info):
|
||||||
'''Get Theme object.
|
'''Get Theme object.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user