diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index b100b18f..8a720036 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -21,6 +21,17 @@ except ImportError: unichr = chr +if sys.maxunicode < 0x10FFFF: + _unichr = unichr + + def unichr(ch): + if ch <= sys.maxunicode: + return _unichr(ch) + else: + ch -= 0x10000 + return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1 << 10) - 1)) + 0xDC00) + + def u(s): '''Return unicode instance assuming UTF-8 encoded string. ''' diff --git a/powerline/lint/markedjson/error.py b/powerline/lint/markedjson/error.py index dfde4086..7ee65217 100644 --- a/powerline/lint/markedjson/error.py +++ b/powerline/lint/markedjson/error.py @@ -4,8 +4,6 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import sys import re -from powerline.lib.unicode import unichr - NON_PRINTABLE_STR = ( '[^' @@ -17,11 +15,17 @@ NON_PRINTABLE_STR = ( + '\u00A0-\uD7FF' # Surrogate escapes: 0xD800-0xDFFF + '\uE000-\uFFFD' + + (( + '\uD800-\uDFFF' + ) if sys.maxunicode < 0x10FFFF else ( + '\U00010000-\U0010FFFF' + )) + ']' + (( # Paired surrogate escapes: allowed in UCS-2 builds as the only way to # represent characters above 0xFFFF. Only paired variant is allowed. - '|[\uD800-\uDBFF][\uDC00-\uDFFF]' + '|(?