Move strwidth function variants to powerline.lib.unicode

They can be tested thus.
2025-07-29 16:55:07 +02:00 · 2014-12-04 23:56:40 +03:00 · 2014-12-04 23:56:40 +03:00 · 6dc585b7ee
commit 6dc585b7ee
parent f3c8413043
2 changed files with 109 additions and 39 deletions
--- a/powerline/lib/unicode.py
+++ b/powerline/lib/unicode.py
@ -4,6 +4,8 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
 import sys
 import codecs
 from unicodedata import east_asian_width, combining
 from powerline.lib.encoding import get_preferred_output_encoding
@ -134,3 +136,97 @@ def surrogate_pair_to_character(high, low):
 	'''Transform a pair of surrogate codepoints to one codepoint
 	'''
 	return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00)
 _strwidth_documentation = (
 	'''Compute string width in display cells
 	{0}
 	:param dict width_data:
 		Dictionary which maps east_asian_width property values to strings 
 		lengths. It is expected to contain the following keys and values (from 
 		`East Asian Width annex <http://www.unicode.org/reports/tr11/>`_):
 		===  ======  ===========================================================
 		Key  Value   Description
 		===  ======  ===========================================================
 		F    2       Fullwidth: all characters that are defined as Fullwidth in 
 		             the Unicode Standard [Unicode] by having a compatibility 
 		             decomposition of type <wide> to characters elsewhere in the 
 		             Unicode Standard that are implicitly narrow but unmarked.
 		H    1       Halfwidth: all characters that are explicitly defined as 
 		             Halfwidth in the Unicode Standard by having a compatibility 
 		             decomposition of type <narrow> to characters elsewhere in 
 		             the Unicode Standard that are implicitly wide but unmarked, 
 		             plus U+20A9 ₩ WON SIGN.
 		W    2       Wide: all other characters that are always wide. These 
 		             characters occur only in the context of East Asian 
 		             typography where they are wide characters (such as the 
 		             Unified Han Ideographs or Squared Katakana Symbols). This 
 		             category includes characters that have explicit halfwidth 
 		             counterparts.
 		Na   1       Narrow: characters that are always narrow and have explicit 
 		             fullwidth or wide counterparts. These characters are 
 		             implicitly narrow in East Asian typography and legacy 
 		             character sets because they have explicit fullwidth or wide 
 		             counterparts. All of ASCII is an example of East Asian 
 		             Narrow characters.
 		A    1 or 2  Ambigious: characters that may sometimes be wide and 
 		             sometimes narrow. Ambiguous characters require additional 
 		             information not contained in the character code to further 
 		             resolve their width. This information is usually defined in 
 		             terminal setting that should in turn respect glyphs widths 
 		             in used fonts. Also see :ref:`ambiwidth configuration 
 		             option <config-common-ambiwidth>`.
 		N    1       Neutral characters: character that does not occur in legacy 
 		             East Asian character sets.
 		===  ======  ===========================================================
 	:param unicode string:
 		String whose width will be calculated.
 	:return: unsigned integer.''')
 def strwidth_ucs_4(width_data, string):
 	return sum(((
 		(
 			0
 		) if combining(symbol) else (
 			width_data[east_asian_width(symbol)]
 		)
 	) for symbol in string))
 strwidth_ucs_4.__doc__ = _strwidth_documentation.format(
 	'''This version of function expects that characters above 0xFFFF are 
 	represented using one symbol. This is only the case in UCS-4 Python builds.
 	.. note:
 		Even in UCS-4 Python builds it is possible to represent characters above 
 		0xFFFF using surrogate pairs. Characters represented this way are not 
 		supported.''')
 def strwidth_ucs_2(width_data, string):
 	return sum(((
 		(
 			width_data[
 				east_asian_width(
 					unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol)))
 				)
 			]
 		) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
 			0
 		) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
 			width_data[east_asian_width(symbol)]
 		)
 	) for i, symbol in enumerate(string)))
 strwidth_ucs_2.__doc__ = _strwidth_documentation.format(
 	'''This version of function expects that characters above 0xFFFF are 
 	represented using two symbols forming a surrogate pair, which is the only 
 	option in UCS-2 Python builds. It still works correctly in UCS-4 Python 
 	builds, but is slower then its UCS-4 counterpart.''')
--- a/powerline/renderer.py
+++ b/powerline/renderer.py
@ -5,11 +5,10 @@ import sys
 import os
 import re
 from unicodedata import east_asian_width, combining
 from itertools import chain
 from powerline.theme import Theme
-from powerline.lib.unicode import unichr, surrogate_pair_to_character
+from powerline.lib.unicode import unichr, strwidth_ucs_2, strwidth_ucs_4
 NBSP = ' '
@ -177,46 +176,21 @@ class Renderer(object):
 			'F': 2,          # Fullwidth
 		}
-	def strwidth(self, string):
+	strwidth = lambda self, s: (
-		'''Function that returns string width.
+		(strwidth_ucs_2 if sys.maxunicode < 0x10FFFF else strwidth_ucs_4)(
 			self.width_data, s)
 	)
 	'''Function that returns string width.
-		Is used to calculate the place given string occupies when handling 
+	Is used to calculate the place given string occupies when handling 
-		``width`` argument to ``.render()`` method. Must take east asian width 
+	``width`` argument to ``.render()`` method. Must take east asian width 
-		into account.
+	into account.
-		:param unicode string:
+	:param unicode string:
-			String whose width will be calculated.
+		String whose width will be calculated.
-		:return: unsigned integer.
+	:return: unsigned integer.
-		'''
+	'''
 		return sum(((
 			(
 				0
 			) if combining(symbol) else (
 				self.width_data[east_asian_width(symbol)]
 			)
 		) for symbol in string))
 	if sys.maxunicode < 0x10FFFF:
 		old_strwidth = strwidth
 		def strwidth(self, string):
 			return sum(((
 				(
 					self.width_data[
 						east_asian_width(
 							unichr(surrogate_pair_to_character(ord(string[i - 1]), ord(symbol)))
 						)
 					]
 				) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
 					0
 				) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
 					self.width_data[east_asian_width(symbol)]
 				)
 			) for i, symbol in enumerate(string)))
 		strwidth.__doc__ = old_strwidth.__doc__
 		del old_strwidth
 	def get_theme(self, matcher_info):
 		'''Get Theme object.