From 305b65f3650188c94ee4d0ee4231861464eea479 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Dec 2014 14:23:21 +0300 Subject: [PATCH 1/2] Fix regex that searches for non-printable characters --- powerline/lint/markedjson/error.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/powerline/lint/markedjson/error.py b/powerline/lint/markedjson/error.py index dfde4086..7ee65217 100644 --- a/powerline/lint/markedjson/error.py +++ b/powerline/lint/markedjson/error.py @@ -4,8 +4,6 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import sys import re -from powerline.lib.unicode import unichr - NON_PRINTABLE_STR = ( '[^' @@ -17,11 +15,17 @@ NON_PRINTABLE_STR = ( + '\u00A0-\uD7FF' # Surrogate escapes: 0xD800-0xDFFF + '\uE000-\uFFFD' + + (( + '\uD800-\uDFFF' + ) if sys.maxunicode < 0x10FFFF else ( + '\U00010000-\U0010FFFF' + )) + ']' + (( # Paired surrogate escapes: allowed in UCS-2 builds as the only way to # represent characters above 0xFFFF. Only paired variant is allowed. - '|[\uD800-\uDBFF][\uDC00-\uDFFF]' + '|(? Date: Sat, 6 Dec 2014 14:38:42 +0300 Subject: [PATCH 2/2] Make unichr() work with characters above U+10000 in UCS-2 builds --- powerline/lib/unicode.py | 11 +++++++++++ tests/test_lib.py | 3 +-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index b100b18f..8a720036 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -21,6 +21,17 @@ except ImportError: unichr = chr +if sys.maxunicode < 0x10FFFF: + _unichr = unichr + + def unichr(ch): + if ch <= sys.maxunicode: + return _unichr(ch) + else: + ch -= 0x10000 + return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1 << 10) - 1)) + 0xDC00) + + def u(s): '''Return unicode instance assuming UTF-8 encoded string. ''' diff --git a/tests/test_lib.py b/tests/test_lib.py index 06b28fa7..1766f6a1 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -419,8 +419,7 @@ class TestUnicode(TestCase): self.assertTrue(type('abc') is plu.unicode) def test_unichr(self): - if not sys.maxunicode < 0x10FFFF: - self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF)) + self.assertStringsIdentical('\U0010FFFF', plu.unichr(0x10FFFF)) self.assertStringsIdentical('\uFFFF', plu.unichr(0xFFFF)) self.assertStringsIdentical('\x20', plu.unichr(0x20))