Fix regex that searches for non-printable characters

This commit is contained in:
ZyX 2014-12-06 14:23:21 +03:00
parent 432cc5031f
commit 305b65f365
1 changed files with 7 additions and 3 deletions

View File

@ -4,8 +4,6 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct
import sys import sys
import re import re
from powerline.lib.unicode import unichr
NON_PRINTABLE_STR = ( NON_PRINTABLE_STR = (
'[^' '[^'
@ -17,11 +15,17 @@ NON_PRINTABLE_STR = (
+ '\u00A0-\uD7FF' + '\u00A0-\uD7FF'
# Surrogate escapes: 0xD800-0xDFFF # Surrogate escapes: 0xD800-0xDFFF
+ '\uE000-\uFFFD' + '\uE000-\uFFFD'
+ ((
'\uD800-\uDFFF'
) if sys.maxunicode < 0x10FFFF else (
'\U00010000-\U0010FFFF'
))
+ ']' + ']'
+ (( + ((
# Paired surrogate escapes: allowed in UCS-2 builds as the only way to # Paired surrogate escapes: allowed in UCS-2 builds as the only way to
# represent characters above 0xFFFF. Only paired variant is allowed. # represent characters above 0xFFFF. Only paired variant is allowed.
'|[\uD800-\uDBFF][\uDC00-\uDFFF]' '|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]'
+ '|[\uD800-\uDBFF](?![\uDC00-\uDFFF])'
) if sys.maxunicode < 0x10FFFF else ( ) if sys.maxunicode < 0x10FFFF else (
'' ''
)) ))