Profiling showed a lot of the time was spent in two helper functions: - glob() which scans the filesystem for filenames that match a pattern - get_tags() which parses the "@tags" section of a jstest Printing the arguments of these functions showed that a lot of the calls are duplicates, because many suites use the same glob pattern, and many suites want to know what tags a given test contains. Since we already assume the resmokeconfig is not changing out from under us, it makes sense to cache these. I don't think memory usage is a concern because resmoke is either short-lived (as in find-suites) or it runs subprocesses that already use a lot of memory (mongod, or a whole sharded cluster). On my machine these two changes brought the time down from around 1m 20s to 10s. GitOrigin-RevId: f4538e44aaca8c1ae11c38cc48b6ec76d2a85d1b
41 lines
1.2 KiB
Python
41 lines
1.2 KiB
Python
"""Filename globbing utility."""
|
|
|
|
import glob as _glob
|
|
import os.path
|
|
import re
|
|
import functools
|
|
|
|
_CONTAINS_GLOB_PATTERN = re.compile("[*?[]")
|
|
|
|
|
|
def is_glob_pattern(string):
|
|
"""Return true if 'string' represents a glob pattern, and false otherwise."""
|
|
|
|
# Copied from glob.has_magic().
|
|
return _CONTAINS_GLOB_PATTERN.search(string) is not None
|
|
|
|
|
|
@functools.cache
|
|
def glob(globbed_pathname):
|
|
"""Return a list of pathnames matching the 'globbed_pathname' pattern.
|
|
|
|
In addition to containing simple shell-style wildcards a la fnmatch,
|
|
the pattern may also contain globstars ("**"), which is recursively
|
|
expanded to match zero or more subdirectories.
|
|
"""
|
|
|
|
return list(iglob(globbed_pathname))
|
|
|
|
|
|
def iglob(globbed_pathname):
|
|
"""Emit a list of pathnames matching the 'globbed_pathname' pattern.
|
|
|
|
In addition to containing simple shell-style wildcards a la fnmatch,
|
|
the pattern may also contain globstars ("**"), which is recursively
|
|
expanded to match zero or more subdirectories.
|
|
"""
|
|
|
|
for pathname in _glob.iglob(globbed_pathname, recursive=True):
|
|
# Normalize 'pathname' so exact string comparison can be used later.
|
|
yield os.path.normpath(pathname)
|