Merge pull request #1220 from ZyX-I/fix-higher-unicode

Fix higher unicode characters
This commit is contained in:
Nikolai Aleksandrovich Pavlov 2014-12-06 15:01:14 +03:00
commit 158b6d0177
3 changed files with 19 additions and 5 deletions

View File

@ -21,6 +21,17 @@ except ImportError:
unichr = chr
if sys.maxunicode < 0x10FFFF:
_unichr = unichr
def unichr(ch):
if ch <= sys.maxunicode:
return _unichr(ch)
else:
ch -= 0x10000
return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1 << 10) - 1)) + 0xDC00)
def u(s):
'''Return unicode instance assuming UTF-8 encoded string.
'''

View File

@ -4,8 +4,6 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import sys
import re
from powerline.lib.unicode import unichr
NON_PRINTABLE_STR = (
'[^'
@ -17,11 +15,17 @@ NON_PRINTABLE_STR = (
+ '\u00A0-\uD7FF'
# Surrogate escapes: 0xD800-0xDFFF
+ '\uE000-\uFFFD'
+ ((
'\uD800-\uDFFF'
) if sys.maxunicode < 0x10FFFF else (
'\U00010000-\U0010FFFF'
))
+ ']'
+ ((
# Paired surrogate escapes: allowed in UCS-2 builds as the only way to
# represent characters above 0xFFFF. Only paired variant is allowed.
'|[\uD800-\uDBFF][\uDC00-\uDFFF]'
'|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]'
+ '|[\uD800-\uDBFF](?![\uDC00-\uDFFF])'
) if sys.maxunicode < 0x10FFFF else (
''
))

View File

@ -419,7 +419,6 @@ class TestUnicode(TestCase):
self.assertTrue(type('abc') is plu.unicode)
def test_unichr(self):
if not sys.maxunicode < 0x10FFFF:
self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF))
self.assertStringsIdentical('\uFFFF', plu.unichr(0xFFFF))
self.assertStringsIdentical('\x20', plu.unichr(0x20))