Various performance improvements for the vcs backend

1) Fix a bug where watching a directory would also report the directory
as changed if any files inside it were changed. This would causes
excessive polling by the branch segment

2) Fix a bug where the watch for the repo dir in the branch segment
was being continually recreated, again causing excessive polling. This
was particularly noticeable with the patch to fix fugitive file status
updates.

3) Improve the performance of branch coloring for git repos by ignoring
change events for the index.lock file. This file is created/deleted
every time git access the index and would cause the entire working tree
status to be re-queried every time.
This commit is contained in:
Kovid Goyal 2013-06-14 13:38:24 +05:30
parent 0435d44984
commit 14bec211aa
4 changed files with 47 additions and 21 deletions

View File

@ -84,14 +84,18 @@ class INotifyWatch(INotify):
eno = ctypes.get_errno() eno = ctypes.get_errno()
if eno != errno.ENOTDIR: if eno != errno.ENOTDIR:
self.handle_error() self.handle_error()
# Try watching path as a file # Try watching path as a file
flags |= (self.MODIFY | self.ATTRIB) flags |= (self.MODIFY | self.ATTRIB)
wd = self._add_watch(self._inotify_fd, buf, flags) wd = self._add_watch(self._inotify_fd, buf, flags)
if wd == -1: if wd == -1:
self.handle_error() self.handle_error()
self.watches[path] = wd self.watches[path] = wd
self.modified[path] = False self.modified[path] = False
def is_watched(self, path):
with self.lock:
return realpath(path) in self.watches
def __call__(self, path): def __call__(self, path):
''' Return True if path has been modified since the last call. Can ''' Return True if path has been modified since the last call. Can
raise OSError if the path does not exist. ''' raise OSError if the path does not exist. '''
@ -141,6 +145,10 @@ class StatWatch(object):
with self.lock: with self.lock:
self.watches.pop(path, None) self.watches.pop(path, None)
def is_watched(self, path):
with self.lock:
return realpath(path) in self.watches
def __call__(self, path): def __call__(self, path):
path = realpath(path) path = realpath(path)
with self.lock: with self.lock:

View File

@ -31,11 +31,12 @@ class INotifyTreeWatcher(INotify):
is_dummy = False is_dummy = False
def __init__(self, basedir): def __init__(self, basedir, ignore_event=None):
super(INotifyTreeWatcher, self).__init__() super(INotifyTreeWatcher, self).__init__()
self.basedir = realpath(basedir) self.basedir = realpath(basedir)
self.watch_tree() self.watch_tree()
self.modified = True self.modified = True
self.ignore_event = (lambda path, name: False) if ignore_event is None else ignore_event
def watch_tree(self): def watch_tree(self):
self.watched_dirs = {} self.watched_dirs = {}
@ -93,7 +94,7 @@ class INotifyTreeWatcher(INotify):
self.MODIFY | self.CREATE | self.DELETE | self.MODIFY | self.CREATE | self.DELETE |
self.MOVE_SELF | self.MOVED_FROM | self.MOVED_TO | self.MOVE_SELF | self.MOVED_FROM | self.MOVED_TO |
self.ATTRIB | self.MOVE_SELF | self.DELETE_SELF) self.ATTRIB | self.DELETE_SELF)
if wd == -1: if wd == -1:
eno = ctypes.get_errno() eno = ctypes.get_errno()
if eno == errno.ENOTDIR: if eno == errno.ENOTDIR:
@ -112,7 +113,7 @@ class INotifyTreeWatcher(INotify):
return return
path = self.watched_rmap.get(wd, None) path = self.watched_rmap.get(wd, None)
if path is not None: if path is not None:
self.modified = True self.modified = not self.ignore_event(path, name)
if mask & self.CREATE: if mask & self.CREATE:
# A new sub-directory might have been created, monitor it. # A new sub-directory might have been created, monitor it.
try: try:
@ -152,10 +153,10 @@ class TreeWatcher(object):
self.last_query_times = {} self.last_query_times = {}
self.expire_time = expire_time * 60 self.expire_time = expire_time * 60
def watch(self, path, logger=None): def watch(self, path, logger=None, ignore_event=None):
path = realpath(path) path = realpath(path)
try: try:
w = INotifyTreeWatcher(path) w = INotifyTreeWatcher(path, ignore_event=ignore_event)
except (INotifyError, DirTooLarge) as e: except (INotifyError, DirTooLarge) as e:
if logger is not None: if logger is not None:
logger.warn('Failed to watch path: {0} with error: {1}'.format(path, e)) logger.warn('Failed to watch path: {0} with error: {1}'.format(path, e))
@ -176,14 +177,14 @@ class TreeWatcher(object):
for path in pop: for path in pop:
del self.last_query_times[path] del self.last_query_times[path]
def __call__(self, path, logger=None): def __call__(self, path, logger=None, ignore_event=None):
path = realpath(path) path = realpath(path)
self.expire_old_queries() self.expire_old_queries()
self.last_query_times[path] = monotonic() self.last_query_times[path] = monotonic()
w = self.watches.get(path, None) w = self.watches.get(path, None)
if w is None: if w is None:
try: try:
self.watch(path) self.watch(path, logger=logger, ignore_event=ignore_event)
except NoSuchDir: except NoSuchDir:
pass pass
return True return True

View File

@ -39,8 +39,15 @@ def get_branch_name(directory, config_file, get_func):
global branch_name_cache global branch_name_cache
with branch_lock: with branch_lock:
# Check if the repo directory was moved/deleted # Check if the repo directory was moved/deleted
# We cannot use the file_watcher for this as the inotify based file
# watcher will mark a directory as changed if any files inside it have
# changed, this is a big performance hit in vim, which continuously
# changes files inside the repo dir (backup/swap files, for instance).
# Check if the repo directory was moved/deleted
fw = file_watcher()
is_watched = fw.is_watched(directory)
try: try:
changed = file_watcher()(directory) changed = fw(directory)
except OSError as e: except OSError as e:
if getattr(e, 'errno', None) != errno.ENOENT: if getattr(e, 'errno', None) != errno.ENOENT:
raise raise
@ -48,12 +55,13 @@ def get_branch_name(directory, config_file, get_func):
if changed: if changed:
branch_name_cache.pop(config_file, None) branch_name_cache.pop(config_file, None)
# Remove the watches for this repo # Remove the watches for this repo
file_watcher().unwatch(directory) if is_watched:
file_watcher().unwatch(config_file) fw.unwatch(directory)
fw.unwatch(config_file)
else: else:
# Check if the config file has changed # Check if the config file has changed
try: try:
changed = file_watcher()(config_file) changed = fw(config_file)
except OSError as e: except OSError as e:
if getattr(e, 'errno', None) != errno.ENOENT: if getattr(e, 'errno', None) != errno.ENOENT:
raise raise
@ -176,7 +184,7 @@ class TreeStatusCache(dict):
def __call__(self, repo, logger): def __call__(self, repo, logger):
key = repo.directory key = repo.directory
try: try:
if self.tw(key): if self.tw(key, logger=logger, ignore_event=getattr(repo, 'ignore_event', None)):
self.pop(key, None) self.pop(key, None)
except OSError as e: except OSError as e:
logger.warn('Failed to check %s for changes, with error: %s'% key, e) logger.warn('Failed to check %s for changes, with error: %s'% key, e)
@ -209,7 +217,7 @@ def debug():
''' To use run python -c "from powerline.lib.vcs import debug; debug()" some_file_to_watch ''' ''' To use run python -c "from powerline.lib.vcs import debug; debug()" some_file_to_watch '''
import sys import sys
dest = sys.argv[-1] dest = sys.argv[-1]
repo = guess(dest) repo = guess(os.path.abspath(dest))
if repo is None: if repo is None:
print ('%s is not a recognized vcs repo' % dest) print ('%s is not a recognized vcs repo' % dest)
raise SystemExit(1) raise SystemExit(1)
@ -224,3 +232,5 @@ def debug():
raw_input('Press Enter to check again: ') raw_input('Press Enter to check again: ')
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
except EOFError:
pass

View File

@ -41,18 +41,24 @@ def do_status(directory, path, func):
# for some reason I cannot be bothered to figure out. # for some reason I cannot be bothered to figure out.
return get_file_status( return get_file_status(
directory, os.path.join(gitd, 'index'), directory, os.path.join(gitd, 'index'),
path, '.gitignore', func, extra_ignore_files=tuple(os.path.join(gitd, x) for x in ('HEAD', 'info/exclude'))) path, '.gitignore', func, extra_ignore_files=tuple(os.path.join(gitd, x) for x in ('logs/HEAD', 'info/exclude')))
return func(directory, path) return func(directory, path)
def ignore_event(path, name):
# Ignore changes to the index.lock file, since they happen frequently and
# dont indicate an actual change in the working tree status
return False
return path.endswith('.git') and name == 'index.lock'
try: try:
import pygit2 as git import pygit2 as git
class Repository(object): class Repository(object):
__slots__ = ('directory') __slots__ = ('directory', 'ignore_event')
def __init__(self, directory): def __init__(self, directory):
self.directory = os.path.abspath(directory) self.directory = os.path.abspath(directory)
self.ignore_event = ignore_event
def do_status(self, directory, path): def do_status(self, directory, path):
if path: if path:
@ -135,10 +141,11 @@ except ImportError:
yield line[:-1].decode('utf-8') yield line[:-1].decode('utf-8')
class Repository(object): class Repository(object):
__slots__ = ('directory',) __slots__ = ('directory', 'ignore_event')
def __init__(self, directory): def __init__(self, directory):
self.directory = os.path.abspath(directory) self.directory = os.path.abspath(directory)
self.ignore_event = ignore_event
def _gitcmd(self, directory, *args): def _gitcmd(self, directory, *args):
return readlines(('git',) + args, directory) return readlines(('git',) + args, directory)