Merge pull request #1220 from ZyX-I/fix-higher-unicode

Fix higher unicode characters
2025-09-25 19:09:10 +02:00 · 2014-12-06 15:01:14 +03:00 · 2014-12-06 15:01:14 +03:00 · 158b6d0177
commit 158b6d0177
parent 432cc5031f 87ca9bca5c
3 changed files with 19 additions and 5 deletions
--- a/powerline/lib/unicode.py
+++ b/powerline/lib/unicode.py
@ -21,6 +21,17 @@ except ImportError:
 	unichr = chr


+if sys.maxunicode < 0x10FFFF:
+	_unichr = unichr
+
+	def unichr(ch):
+		if ch <= sys.maxunicode:
+			return _unichr(ch)
+		else:
+			ch -= 0x10000
+			return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1 << 10) - 1)) + 0xDC00)
+
+
 def u(s):
 	'''Return unicode instance assuming UTF-8 encoded string.
 	'''
--- a/powerline/lint/markedjson/error.py
+++ b/powerline/lint/markedjson/error.py
@ -4,8 +4,6 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
 import sys
 import re

-from powerline.lib.unicode import unichr
-

 NON_PRINTABLE_STR = (
 	'[^'
@ -17,11 +15,17 @@ NON_PRINTABLE_STR = (
 	+ '\u00A0-\uD7FF'
 	# Surrogate escapes: 0xD800-0xDFFF
 	+ '\uE000-\uFFFD'
+	+ ((
+		'\uD800-\uDFFF'
+	) if sys.maxunicode < 0x10FFFF else (
+		'\U00010000-\U0010FFFF'
+	))
 	+ ']'
 	+ ((
 		# Paired surrogate escapes: allowed in UCS-2 builds as the only way to 
 		# represent characters above 0xFFFF. Only paired variant is allowed.
-		'|[\uD800-\uDBFF][\uDC00-\uDFFF]'
+		'|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]'
+		+ '|[\uD800-\uDBFF](?![\uDC00-\uDFFF])'
 	) if sys.maxunicode < 0x10FFFF else (
 		''
 	))
--- a/tests/test_lib.py
+++ b/tests/test_lib.py
@ -419,7 +419,6 @@ class TestUnicode(TestCase):
 		self.assertTrue(type('abc') is plu.unicode)

 	def test_unichr(self):
-		if not sys.maxunicode < 0x10FFFF:
 		self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF))
 		self.assertStringsIdentical('\uFFFF', plu.unichr(0xFFFF))
 		self.assertStringsIdentical('\x20', plu.unichr(0x20))