diff --git a/client/powerline.py b/client/powerline.py index 07a01cb6..e2b77eaf 100755 --- a/client/powerline.py +++ b/client/powerline.py @@ -7,13 +7,17 @@ import socket import errno import os -from locale import getpreferredencoding - try: from posix import environ except ImportError: from os import environ +try: + from powerline.lib.encoding import get_preferred_output_encoding +except ImportError: + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(os.path.realpath(__file__))))) + from powerline.lib.encoding import get_preferred_output_encoding + if len(sys.argv) < 2: print('Must provide at least one argument.', file=sys.stderr) @@ -51,7 +55,7 @@ except Exception: args = ['powerline-render'] + sys.argv[1:] os.execvp('powerline-render', args) -fenc = getpreferredencoding() or 'utf-8' +fenc = get_preferred_output_encoding() def tobytes(s): diff --git a/powerline/__init__.py b/powerline/__init__.py index 1d01d5d5..c36abd2b 100644 --- a/powerline/__init__.py +++ b/powerline/__init__.py @@ -5,7 +5,6 @@ import os import sys import logging -from locale import getpreferredencoding from threading import Lock, Event from powerline.colorscheme import Colorscheme @@ -13,6 +12,7 @@ from powerline.lib.config import ConfigLoader from powerline.lib.unicode import safe_unicode, FailedUnicode from powerline.config import DEFAULT_SYSTEM_CONFIG_DIR from powerline.lib import mergedicts +from powerline.lib.encoding import get_preferred_output_encoding def _config_loader_condition(path): @@ -413,7 +413,7 @@ class Powerline(object): self.setup_kwargs = {} self.imported_modules = set() - get_encoding = staticmethod(getpreferredencoding) + get_encoding = staticmethod(get_preferred_output_encoding) '''Get encoding used by the current application Usually returns encoding of the current locale. diff --git a/powerline/bindings/config.py b/powerline/bindings/config.py index 71afca84..bbd4974b 100644 --- a/powerline/bindings/config.py +++ b/powerline/bindings/config.py @@ -5,13 +5,12 @@ import os import re import sys -from locale import getpreferredencoding - from powerline.config import POWERLINE_ROOT, TMUX_CONFIG_DIRECTORY from powerline.lib.config import ConfigLoader from powerline import generate_config_finder, load_config, create_logger, PowerlineLogger, finish_common_config from powerline.lib.shell import which from powerline.bindings.tmux import TmuxVersionInfo, run_tmux_command, get_tmux_version +from powerline.lib.encoding import get_preferred_output_encoding CONFIG_FILE_NAME = re.compile(r'powerline_tmux_(?P<major>\d+)\.(?P<minor>\d+)(?P<suffix>[a-z]+)?(?:_(?P<mod>plus|minus))?\.conf') @@ -84,7 +83,7 @@ def get_main_config(args): def create_powerline_logger(args): config = get_main_config(args) - common_config = finish_common_config(getpreferredencoding(), config['common']) + common_config = finish_common_config(get_preferred_output_encoding(), config['common']) logger = create_logger(common_config) return PowerlineLogger(use_daemon_threads=True, logger=logger, ext='config') diff --git a/powerline/bindings/zsh/__init__.py b/powerline/bindings/zsh/__init__.py index 21b9f3fe..693f5186 100644 --- a/powerline/bindings/zsh/__init__.py +++ b/powerline/bindings/zsh/__init__.py @@ -4,13 +4,14 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import atexit from weakref import WeakValueDictionary, ref -from locale import getpreferredencoding import zsh from powerline.shell import ShellPowerline from powerline.lib import parsedotval from powerline.lib.unicode import unicode +from powerline.lib.encoding import (get_preferred_output_encoding, + get_preferred_environment_encoding) used_powerlines = WeakValueDictionary() @@ -66,7 +67,7 @@ class Args(object): def string(s): if type(s) is bytes: - return s.decode(getpreferredencoding(), 'replace') + return s.decode(get_preferred_environment_encoding(), 'replace') else: return str(s) @@ -155,9 +156,9 @@ class Prompt(object): ) if type(r) is not str: if type(r) is bytes: - return r.decode(getpreferredencoding(), 'replace') + return r.decode(get_preferred_output_encoding(), 'replace') else: - return r.encode(getpreferredencoding(), 'replace') + return r.encode(get_preferred_output_encoding(), 'replace') return r def __del__(self): diff --git a/powerline/lib/encoding.py b/powerline/lib/encoding.py new file mode 100644 index 00000000..24b0e4bd --- /dev/null +++ b/powerline/lib/encoding.py @@ -0,0 +1,73 @@ +# vim:fileencoding=utf-8:noet + +'''Encodings support + +This is the only module from which functions obtaining encoding should be +exported. Note: you should always care about errors= argument since it is not +guaranteed that encoding returned by some function can encode/decode given +string. + +All functions in this module must always return a valid encoding. Most of them +are not thread-safe. +''' + +from __future__ import (unicode_literals, division, absolute_import, print_function) + +import sys +import locale + + +def get_preferred_file_name_encoding(): + '''Get preferred file name encoding + ''' + return ( + sys.getfilesystemencoding() + or locale.getpreferredencoding() + or 'utf-8' + ) + + +def get_preferred_file_contents_encoding(): + '''Get encoding preferred for file contents + ''' + return ( + locale.getpreferredencoding() + or 'utf-8' + ) + + +def get_preferred_output_encoding(): + '''Get encoding that should be used for printing strings + + .. warning:: + Falls back to ASCII, so that output is most likely to be displayed + correctly. + ''' + return ( + locale.getlocale(locale.LC_MESSAGES)[1] + or locale.getdefaultlocale()[1] + or 'ascii' + ) + + +def get_preferred_input_encoding(): + '''Get encoding that should be used for reading shell command output + + .. warning:: + Falls back to latin1 so that function is less likely to throw as decoded + output is primary searched for ASCII values. + ''' + return ( + locale.getlocale(locale.LC_MESSAGES)[1] + or locale.getdefaultlocale()[1] + or 'latin1' + ) + + +def get_preferred_environment_encoding(): + '''Get encoding that should be used for decoding environment variables + ''' + return ( + locale.getpreferredencoding() + or 'utf-8' + ) diff --git a/powerline/lib/inotify.py b/powerline/lib/inotify.py index 821401e3..d1c65716 100644 --- a/powerline/lib/inotify.py +++ b/powerline/lib/inotify.py @@ -9,6 +9,8 @@ import struct from ctypes.util import find_library +from powerline.lib.encoding import get_preferred_file_name_encoding + __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __docformat__ = 'restructuredtext en' @@ -121,7 +123,7 @@ class INotify(object): raise INotifyError(os.strerror(ctypes.get_errno())) self._buf = ctypes.create_string_buffer(5000) - self.fenc = sys.getfilesystemencoding() or 'utf-8' + self.fenc = get_preferred_file_name_encoding() self.hdr = struct.Struct(b'iIII') # We keep a reference to os to prevent it from being deleted # during interpreter shutdown, which would lead to errors in the diff --git a/powerline/lib/shell.py b/powerline/lib/shell.py index 3359c843..8dd4ca13 100644 --- a/powerline/lib/shell.py +++ b/powerline/lib/shell.py @@ -5,9 +5,10 @@ import sys import os from subprocess import Popen, PIPE -from locale import getlocale, getdefaultlocale, LC_MESSAGES from functools import partial +from powerline.lib.encoding import get_preferred_input_encoding + if sys.platform.startswith('win32'): # Prevent windows from launching consoles when calling commands @@ -15,10 +16,6 @@ if sys.platform.startswith('win32'): Popen = partial(Popen, creationflags=0x08000000) -def _get_shell_encoding(): - return getlocale(LC_MESSAGES)[1] or getdefaultlocale()[1] or 'utf-8' - - def run_cmd(pl, cmd, stdin=None): '''Run command and return its stdout, stripped @@ -38,7 +35,7 @@ def run_cmd(pl, cmd, stdin=None): return None else: stdout, err = p.communicate(stdin) - stdout = stdout.decode(_get_shell_encoding()) + stdout = stdout.decode(get_preferred_input_encoding()) return stdout.strip() @@ -56,7 +53,7 @@ def readlines(cmd, cwd): Working directory of the command which will be run. ''' p = Popen(cmd, shell=False, stdout=PIPE, stderr=PIPE, cwd=cwd) - encoding = _get_shell_encoding() + encoding = get_preferred_input_encoding() p.stderr.close() with p.stdout: for line in p.stdout: diff --git a/powerline/lib/unicode.py b/powerline/lib/unicode.py index ed4ac2b4..32ea3afe 100644 --- a/powerline/lib/unicode.py +++ b/powerline/lib/unicode.py @@ -4,7 +4,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import sys import codecs -from locale import getpreferredencoding +from powerline.lib.encoding import get_preferred_output_encoding try: @@ -72,14 +72,14 @@ def register_strwidth_error(strwidth): def out_u(s): '''Return unicode string suitable for displaying - Unlike other functions assumes getpreferredencoding() first. Unlike u() does - not throw exceptions for invalid unicode strings. Unlike safe_unicode() does - throw an exception if object is not a string. + Unlike other functions assumes get_preferred_output_encoding() first. Unlike + u() does not throw exceptions for invalid unicode strings. Unlike + safe_unicode() does throw an exception if object is not a string. ''' if isinstance(s, unicode): return s elif isinstance(s, bytes): - return unicode(s, getpreferredencoding(), 'powerline_decode_error') + return unicode(s, get_preferred_output_encoding(), 'powerline_decode_error') else: raise TypeError('Expected unicode or bytes instance, got {0}'.format(repr(type(s)))) @@ -92,7 +92,7 @@ def safe_unicode(s): * UTF-8 string * Object with __str__() or __repr__() method that returns UTF-8 string or unicode object (depending on python version) - * String in locale.getpreferredencoding() encoding + * String in powerline.lib.encoding.get_preferred_output_encoding() encoding * If everything failed use safe_unicode on last exception with which everything failed ''' @@ -105,7 +105,7 @@ def safe_unicode(s): except TypeError: return unicode(str(s), 'utf-8') except UnicodeDecodeError: - return unicode(s, getpreferredencoding()) + return unicode(s, get_preferred_output_encoding()) except Exception as e: return safe_unicode(e) diff --git a/powerline/lib/vcs/bzr.py b/powerline/lib/vcs/bzr.py index f8f5c397..ce2b26bf 100644 --- a/powerline/lib/vcs/bzr.py +++ b/powerline/lib/vcs/bzr.py @@ -5,18 +5,18 @@ import os import re from io import StringIO -from locale import getpreferredencoding from bzrlib import (workingtree, status, library_state, trace, ui) from powerline.lib.vcs import get_branch_name, get_file_status from powerline.lib.path import join +from powerline.lib.encoding import get_preferred_file_contents_encoding class CoerceIO(StringIO): def write(self, arg): if isinstance(arg, bytes): - arg = arg.decode(getpreferredencoding(), 'replace') + arg = arg.decode(get_preferred_file_contents_encoding(), 'replace') return super(CoerceIO, self).write(arg) @@ -30,7 +30,7 @@ def branch_name_from_config_file(directory, config_file): for line in f: m = nick_pat.match(line) if m is not None: - ans = m.group(1).strip().decode(getpreferredencoding(), 'replace') + ans = m.group(1).strip().decode(get_preferred_file_contents_encoding(), 'replace') break except Exception: pass diff --git a/powerline/lib/vcs/git.py b/powerline/lib/vcs/git.py index b700ad8f..60ee0fc0 100644 --- a/powerline/lib/vcs/git.py +++ b/powerline/lib/vcs/git.py @@ -2,14 +2,13 @@ from __future__ import (unicode_literals, division, absolute_import, print_function) import os -import sys import re -from locale import getpreferredencoding - from powerline.lib.vcs import get_branch_name, get_file_status from powerline.lib.shell import readlines from powerline.lib.path import join +from powerline.lib.encoding import (get_preferred_file_name_encoding, + get_preferred_file_contents_encoding) _ref_pat = re.compile(br'ref:\s*refs/heads/(.+)') @@ -23,7 +22,7 @@ def branch_name_from_config_file(directory, config_file): return os.path.basename(directory) m = _ref_pat.match(raw) if m is not None: - return m.group(1).decode(getpreferredencoding(), 'replace') + return m.group(1).decode(get_preferred_file_contents_encoding(), 'replace') return raw[:7] @@ -38,7 +37,7 @@ def git_directory(directory): if raw[-1:] == b'\n': raw = raw[:-1] if not isinstance(path, bytes): - raw = raw.decode(sys.getfilesystemencoding() or 'utf-8') + raw = raw.decode(get_preferred_file_name_encoding()) if not raw: raise IOError('no path in gitfile') return os.path.abspath(os.path.join(directory, raw)) diff --git a/powerline/lib/vcs/mercurial.py b/powerline/lib/vcs/mercurial.py index 4f38f245..829e5603 100644 --- a/powerline/lib/vcs/mercurial.py +++ b/powerline/lib/vcs/mercurial.py @@ -3,19 +3,18 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import os -from locale import getpreferredencoding - from mercurial import hg, ui, match from powerline.lib.vcs import get_branch_name, get_file_status from powerline.lib.path import join +from powerline.lib.encoding import get_preferred_file_contents_encoding def branch_name_from_config_file(directory, config_file): try: with open(config_file, 'rb') as f: raw = f.read() - return raw.decode(getpreferredencoding(), 'replace').strip() + return raw.decode(get_preferred_file_contents_encoding(), 'replace').strip() except Exception: return 'default' diff --git a/powerline/lib/watcher/uv.py b/powerline/lib/watcher/uv.py index 351c68c3..eba020e1 100644 --- a/powerline/lib/watcher/uv.py +++ b/powerline/lib/watcher/uv.py @@ -2,7 +2,6 @@ from __future__ import (unicode_literals, division, absolute_import, print_function) import os -import sys from collections import defaultdict from threading import RLock @@ -11,6 +10,7 @@ from threading import Thread from errno import ENOENT from powerline.lib.path import realpath +from powerline.lib.encoding import get_preferred_file_name_encoding class UvNotFound(NotImplementedError): @@ -71,7 +71,7 @@ class UvWatcher(object): self.watches = {} self.lock = RLock() self.loop = start_uv_thread() - self.fenc = sys.getfilesystemencoding() or 'utf-8' + self.fenc = get_preferred_file_name_encoding() def watch(self, path): path = normpath(path, self.fenc) diff --git a/scripts/powerline-daemon b/scripts/powerline-daemon index 6883509e..3a67d1b8 100755 --- a/scripts/powerline-daemon +++ b/scripts/powerline-daemon @@ -12,11 +12,11 @@ from select import select from signal import signal, SIGTERM from time import sleep from functools import partial -from locale import getpreferredencoding from io import BytesIO from powerline.shell import get_argparser, finish_args, ShellPowerline, write_output from powerline.lib.monotonic import monotonic +from powerline.lib.encoding import get_preferred_output_encoding is_daemon = False @@ -144,7 +144,7 @@ def do_write(conn, result): pass -encoding = getpreferredencoding() or 'UTF-8' +encoding = get_preferred_output_encoding() def safe_bytes(o, encoding=encoding): diff --git a/scripts/powerline-render b/scripts/powerline-render index 048c34ce..4d4903b9 100755 --- a/scripts/powerline-render +++ b/scripts/powerline-render @@ -8,14 +8,14 @@ from __future__ import (unicode_literals, division, absolute_import, print_funct import sys import os -from locale import getpreferredencoding - try: from powerline.shell import ShellPowerline, get_argparser, finish_args, write_output except ImportError: sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(os.path.realpath(__file__))))) from powerline.shell import ShellPowerline, get_argparser, finish_args, write_output +from powerline.lib.unicode import get_preferred_output_encoding + if sys.version_info < (3,): write = sys.stdout.write @@ -28,4 +28,4 @@ if __name__ == '__main__': finish_args(args) powerline = ShellPowerline(args, run_once=True) segment_info = {'args': args, 'environ': os.environ} - write_output(args, powerline, segment_info, write, getpreferredencoding()) + write_output(args, powerline, segment_info, write, get_preferred_output_encoding())