cpython/Lib/fnmatch.py

"""Filename matching with shell patterns.

fnmatch(FILENAME, PATTERN) matches according to the local convention.
fnmatchcase(FILENAME, PATTERN) always takes case in account.

The functions operate by translating the pattern into a regular
expression.  They cache the compiled regular expressions for speed.

The function translate(PATTERN) returns a regular expression
corresponding to PATTERN.  (It does not compile it.)
"""
import os
import posixpath
import re
import functools

__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]

def fnmatch(name, pat):
    """Test whether FILENAME matches PATTERN.

    Patterns are Unix shell style:

    *       matches everything
    ?       matches any single character
    [seq]   matches any character in seq
    [!seq]  matches any char not in seq

    An initial period in FILENAME is not special.
    Both FILENAME and PATTERN are first case-normalized
    if the operating system requires it.
    If you don't want this, use fnmatchcase(FILENAME, PATTERN).
    """
    name = os.path.normcase(name)
    pat = os.path.normcase(pat)
    return fnmatchcase(name, pat)

@functools.lru_cache(maxsize=32768, typed=True)
def _compile_pattern(pat):
    if isinstance(pat, bytes):
        pat_str = str(pat, 'ISO-8859-1')
        res_str = translate(pat_str)
        res = bytes(res_str, 'ISO-8859-1')
    else:
        res = translate(pat)
    return re.compile(res).match

def filter(names, pat):
    """Construct a list from those elements of the iterable NAMES that match PAT."""
    result = []
    pat = os.path.normcase(pat)
    match = _compile_pattern(pat)
    if os.path is posixpath:
        # normcase on posix is NOP. Optimize it away from the loop.
        for name in names:
            if match(name):
                result.append(name)
    else:
        for name in names:
            if match(os.path.normcase(name)):
                result.append(name)
    return result

def fnmatchcase(name, pat):
    """Test whether FILENAME matches PATTERN, including case.

    This is a version of fnmatch() which doesn't case-normalize
    its arguments.
    """
    match = _compile_pattern(pat)
    return match(name) is not None


def translate(pat):
    """Translate a shell PATTERN to a regular expression.

    There is no way to quote meta-characters.
    """

    parts, star_indices = _translate(pat, '*', '.')
    return _join_translated_parts(parts, star_indices)

_re_setops_sub = re.compile(r'([&~|])').sub
_re_escape = functools.lru_cache(maxsize=512)(re.escape)

def _translate(pat, star, question_mark):
    res = []
    add = res.append
    star_indices = []

    i, n = 0, len(pat)
    while i < n:
        c = pat[i]
        i = i+1
        if c == '*':
            # store the position of the wildcard
            star_indices.append(len(res))
            add(star)
            # compress consecutive `*` into one
            while i < n and pat[i] == '*':
                i += 1
        elif c == '?':
            add(question_mark)
        elif c == '[':
            j = i
            if j < n and pat[j] == '!':
                j = j+1
            if j < n and pat[j] == ']':
                j = j+1
            while j < n and pat[j] != ']':
                j = j+1
            if j >= n:
                add('\\[')
            else:
                stuff = pat[i:j]
                if '-' not in stuff:
                    stuff = stuff.replace('\\', r'\\')
                else:
                    chunks = []
                    k = i+2 if pat[i] == '!' else i+1
                    while True:
                        k = pat.find('-', k, j)
                        if k < 0:
                            break
                        chunks.append(pat[i:k])
                        i = k+1
                        k = k+3
                    chunk = pat[i:j]
                    if chunk:
                        chunks.append(chunk)
                    else:
                        chunks[-1] += '-'
                    # Remove empty ranges -- invalid in RE.
                    for k in range(len(chunks)-1, 0, -1):
                        if chunks[k-1][-1] > chunks[k][0]:
                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
                            del chunks[k]
                    # Escape backslashes and hyphens for set difference (--).
                    # Hyphens that create ranges shouldn't be escaped.
                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
                                     for s in chunks)
                i = j+1
                if not stuff:
                    # Empty range: never match.
                    add('(?!)')
                elif stuff == '!':
                    # Negated empty range: match any character.
                    add('.')
                else:
                    # Escape set operations (&&, ~~ and ||).
                    stuff = _re_setops_sub(r'\\\1', stuff)
                    if stuff[0] == '!':
                        stuff = '^' + stuff[1:]
                    elif stuff[0] in ('^', '['):
                        stuff = '\\' + stuff
                    add(f'[{stuff}]')
        else:
            add(_re_escape(c))
    assert i == n
    return res, star_indices


def _join_translated_parts(parts, star_indices):
    if not star_indices:
        return fr'(?s:{"".join(parts)})\Z'
    iter_star_indices = iter(star_indices)
    j = next(iter_star_indices)
    buffer = parts[:j]  # fixed pieces at the start
    append, extend = buffer.append, buffer.extend
    i = j + 1
    for j in iter_star_indices:
        # Now deal with STAR fixed STAR fixed ...
        # For an interior `STAR fixed` pairing, we want to do a minimal
        # .*? match followed by `fixed`, with no possibility of backtracking.
        # Atomic groups ("(?>...)") allow us to spell that directly.
        # Note: people rely on the undocumented ability to join multiple
        # translate() results together via "|" to build large regexps matching
        # "one of many" shell patterns.
        append('(?>.*?')
        extend(parts[i:j])
        append(')')
        i = j + 1
    append('.*')
    extend(parts[i:])
    res = ''.join(buffer)
    return fr'(?s:{res})\Z'
changes for the Mac 1995-01-27 02:41:45 +00:00			`"""Filename matching with shell patterns.`
Rewritten using regex. 1992-01-12 23:29:29 +00:00
changes for the Mac 1995-01-27 02:41:45 +00:00			`fnmatch(FILENAME, PATTERN) matches according to the local convention.`
			`fnmatchcase(FILENAME, PATTERN) always takes case in account.`
Rewritten using regex. 1992-01-12 23:29:29 +00:00
changes for the Mac 1995-01-27 02:41:45 +00:00			`The functions operate by translating the pattern into a regular`
			`expression. They cache the compiled regular expressions for speed.`

			`The function translate(PATTERN) returns a regular expression`
			`corresponding to PATTERN. (It does not compile it.)`
			`"""`
Make fnmatch be more PEP 8 compliant. Partially closes issue 9356. Thanks to Brian Brazil for the patch. 2010-07-23 16:22:25 +00:00			`import os`
			`import posixpath`
Convert all remaining simple cases of regex usage to re usage. 1997-10-22 21:00:49 +00:00			`import re`
Re-apply r83871. 2010-08-13 16:26:40 +00:00			`import functools`
Convert all remaining simple cases of regex usage to re usage. 1997-10-22 21:00:49 +00:00
Re-apply r83871. 2010-08-13 16:26:40 +00:00			`__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]`
Make fnmatch be more PEP 8 compliant. Partially closes issue 9356. Thanks to Brian Brazil for the patch. 2010-07-23 16:22:25 +00:00
Initial revision 1991-01-01 18:11:14 +00:00			`def fnmatch(name, pat):`
Whitespace normalization. 2001-01-14 23:36:06 +00:00			`"""Test whether FILENAME matches PATTERN.`

			`Patterns are Unix shell style:`

			`* matches everything`
			`? matches any single character`
			`[seq] matches any character in seq`
			`[!seq] matches any char not in seq`

			`An initial period in FILENAME is not special.`
			`Both FILENAME and PATTERN are first case-normalized`
			`if the operating system requires it.`
			`If you don't want this, use fnmatchcase(FILENAME, PATTERN).`
			`"""`
			`name = os.path.normcase(name)`
			`pat = os.path.normcase(pat)`
			`return fnmatchcase(name, pat)`
changes for the Mac 1995-01-27 02:41:45 +00:00
bpo-42799: fnmatch module: bump up size of lru_cache for patterns (GH-27084) 2021-07-15 10:53:26 +00:00			`@functools.lru_cache(maxsize=32768, typed=True)`
Simplify calls in fnmatch. 2011-10-20 16:22:10 +00:00			`def _compile_pattern(pat):`
			`if isinstance(pat, bytes):`
Re-apply r83871. 2010-08-13 16:26:40 +00:00			`pat_str = str(pat, 'ISO-8859-1')`
			`res_str = translate(pat_str)`
			`res = bytes(res_str, 'ISO-8859-1')`
			`else:`
			`res = translate(pat)`
			`return re.compile(res).match`
Make fnmatch be more PEP 8 compliant. Partially closes issue 9356. Thanks to Brian Brazil for the patch. 2010-07-23 16:22:25 +00:00
Patch #409973: Speedup glob.glob, add fnmatch.filter. 2001-06-06 06:24:38 +00:00			`def filter(names, pat):`
bpo-36769: Document that fnmatch.filter supports any kind of iterable (#13039) 2020-12-18 19:10:20 +00:00			`"""Construct a list from those elements of the iterable NAMES that match PAT."""`
Issue #3187: Better support for "undecodable" filenames. Code by Victor Stinner, with small tweaks by GvR. 2008-10-02 18:55:37 +00:00			`result = []`
			`pat = os.path.normcase(pat)`
Simplify calls in fnmatch. 2011-10-20 16:22:10 +00:00			`match = _compile_pattern(pat)`
Patch #409973: Speedup glob.glob, add fnmatch.filter. 2001-06-06 06:24:38 +00:00			`if os.path is posixpath:`
			`# normcase on posix is NOP. Optimize it away from the loop.`
			`for name in names:`
			`if match(name):`
			`result.append(name)`
			`else:`
			`for name in names:`
			`if match(os.path.normcase(name)):`
			`result.append(name)`
			`return result`

changes for the Mac 1995-01-27 02:41:45 +00:00			`def fnmatchcase(name, pat):`
Whitespace normalization. 2001-01-14 23:36:06 +00:00			`"""Test whether FILENAME matches PATTERN, including case.`

			`This is a version of fnmatch() which doesn't case-normalize`
			`its arguments.`
			`"""`
Simplify calls in fnmatch. 2011-10-20 16:22:10 +00:00			`match = _compile_pattern(pat)`
Issue #3187: Better support for "undecodable" filenames. Code by Victor Stinner, with small tweaks by GvR. 2008-10-02 18:55:37 +00:00			`return match(name) is not None`
Initial revision 1991-01-01 18:11:14 +00:00
Make fnmatch be more PEP 8 compliant. Partially closes issue 9356. Thanks to Brian Brazil for the patch. 2010-07-23 16:22:25 +00:00
Rewritten using regex. 1992-01-12 23:29:29 +00:00			`def translate(pat):`
Whitespace normalization. 2001-01-14 23:36:06 +00:00			`"""Translate a shell PATTERN to a regular expression.`

			`There is no way to quote meta-characters.`
			`"""`

gh-122288: Improve performances of `fnmatch.translate` (#122289) Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com> 2024-11-27 16:42:45 +00:00			`parts, star_indices = _translate(pat, '*', '.')`
			`return _join_translated_parts(parts, star_indices)`
GH-72904: Add `glob.translate()` function (#106703) Add `glob.translate()` function that converts a pathname with shell wildcards to a regular expression. The regular expression is used by pathlib to implement `match()` and `glob()`. This function differs from `fnmatch.translate()` in that wildcards do not match path separators by default, and that a `` pattern segment matches precisely one path segment. When recursive* is set to true, `` pattern segments match any number of path segments, and `` cannot appear outside its own segment. In pathlib, this change speeds up directory walking (because `_make_child_relpath()` does less work), makes path objects smaller (they don't need a `_lines` slot), and removes the need for some gnarly code. Co-authored-by: Jason R. Coombs <jaraco@jaraco.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-11-13 17:15:56 +00:00
gh-122288: Improve performances of `fnmatch.translate` (#122289) Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com> 2024-11-27 16:42:45 +00:00			`_re_setops_sub = re.compile(r'([&~\|])').sub`
			`_re_escape = functools.lru_cache(maxsize=512)(re.escape)`
GH-72904: Add `glob.translate()` function (#106703) Add `glob.translate()` function that converts a pathname with shell wildcards to a regular expression. The regular expression is used by pathlib to implement `match()` and `glob()`. This function differs from `fnmatch.translate()` in that wildcards do not match path separators by default, and that a `` pattern segment matches precisely one path segment. When recursive* is set to true, `` pattern segments match any number of path segments, and `` cannot appear outside its own segment. In pathlib, this change speeds up directory walking (because `_make_child_relpath()` does less work), makes path objects smaller (they don't need a `_lines` slot), and removes the need for some gnarly code. Co-authored-by: Jason R. Coombs <jaraco@jaraco.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-11-13 17:15:56 +00:00
gh-122288: Improve performances of `fnmatch.translate` (#122289) Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com> 2024-11-27 16:42:45 +00:00			`def _translate(pat, star, question_mark):`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 02:28:24 +00:00			`res = []`
			`add = res.append`
gh-122288: Improve performances of `fnmatch.translate` (#122289) Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com> 2024-11-27 16:42:45 +00:00			`star_indices = []`

Whitespace normalization. 2001-01-14 23:36:06 +00:00			`i, n = 0, len(pat)`
			`while i < n:`
			`c = pat[i]`
			`i = i+1`
			`if c == '*':`
gh-122288: Improve performances of `fnmatch.translate` (#122289) Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com> 2024-11-27 16:42:45 +00:00			`# store the position of the wildcard`
			`star_indices.append(len(res))`
			`add(star)`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 02:28:24 +00:00			# compress consecutive `*` into one
gh-122288: Improve performances of `fnmatch.translate` (#122289) Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com> 2024-11-27 16:42:45 +00:00			`while i < n and pat[i] == '*':`
			`i += 1`
Whitespace normalization. 2001-01-14 23:36:06 +00:00			`elif c == '?':`
gh-122288: Improve performances of `fnmatch.translate` (#122289) Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com> 2024-11-27 16:42:45 +00:00			`add(question_mark)`
Whitespace normalization. 2001-01-14 23:36:06 +00:00			`elif c == '[':`
			`j = i`
			`if j < n and pat[j] == '!':`
			`j = j+1`
			`if j < n and pat[j] == ']':`
			`j = j+1`
			`while j < n and pat[j] != ']':`
			`j = j+1`
			`if j >= n:`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 02:28:24 +00:00			`add('\\[')`
Whitespace normalization. 2001-01-14 23:36:06 +00:00			`else:`
bpo-32775: Fix regular expression warnings in fnmatch. (#5583) fnmatch.translate() no longer produces patterns which contain set operations. Sets starting with '[' or containing '--', '&&', '~~' or '\|\|' will be interpreted differently in regular expressions in future versions. Currently they emit warnings. fnmatch.translate() now avoids producing patterns containing such sets by accident. 2018-02-09 11:30:19 +00:00			`stuff = pat[i:j]`
gh-89973: Fix re.error in the fnmatch module. (GH-93072) Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error. 2022-06-05 08:46:29 +00:00			`if '-' not in stuff:`
bpo-32775: Fix regular expression warnings in fnmatch. (#5583) fnmatch.translate() no longer produces patterns which contain set operations. Sets starting with '[' or containing '--', '&&', '~~' or '\|\|' will be interpreted differently in regular expressions in future versions. Currently they emit warnings. fnmatch.translate() now avoids producing patterns containing such sets by accident. 2018-02-09 11:30:19 +00:00			`stuff = stuff.replace('\\', r'\\')`
			`else:`
			`chunks = []`
			`k = i+2 if pat[i] == '!' else i+1`
			`while True:`
			`k = pat.find('-', k, j)`
			`if k < 0:`
			`break`
			`chunks.append(pat[i:k])`
			`i = k+1`
			`k = k+3`
gh-89973: Fix re.error in the fnmatch module. (GH-93072) Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error. 2022-06-05 08:46:29 +00:00			`chunk = pat[i:j]`
			`if chunk:`
			`chunks.append(chunk)`
			`else:`
			`chunks[-1] += '-'`
			`# Remove empty ranges -- invalid in RE.`
			`for k in range(len(chunks)-1, 0, -1):`
			`if chunks[k-1][-1] > chunks[k][0]:`
			`chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]`
			`del chunks[k]`
bpo-32775: Fix regular expression warnings in fnmatch. (#5583) fnmatch.translate() no longer produces patterns which contain set operations. Sets starting with '[' or containing '--', '&&', '~~' or '\|\|' will be interpreted differently in regular expressions in future versions. Currently they emit warnings. fnmatch.translate() now avoids producing patterns containing such sets by accident. 2018-02-09 11:30:19 +00:00			`# Escape backslashes and hyphens for set difference (--).`
			`# Hyphens that create ranges shouldn't be escaped.`
			`stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')`
			`for s in chunks)`
Whitespace normalization. 2001-01-14 23:36:06 +00:00			`i = j+1`
gh-89973: Fix re.error in the fnmatch module. (GH-93072) Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error. 2022-06-05 08:46:29 +00:00			`if not stuff:`
			`# Empty range: never match.`
			`add('(?!)')`
			`elif stuff == '!':`
			`# Negated empty range: match any character.`
			`add('.')`
			`else:`
gh-122288: Improve performances of `fnmatch.translate` (#122289) Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com> 2024-11-27 16:42:45 +00:00			`# Escape set operations (&&, ~~ and \|\|).`
			`stuff = _re_setops_sub(r'\\\1', stuff)`
gh-89973: Fix re.error in the fnmatch module. (GH-93072) Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error. 2022-06-05 08:46:29 +00:00			`if stuff[0] == '!':`
			`stuff = '^' + stuff[1:]`
			`elif stuff[0] in ('^', '['):`
			`stuff = '\\' + stuff`
			`add(f'[{stuff}]')`
Whitespace normalization. 2001-01-14 23:36:06 +00:00			`else:`
gh-122288: Improve performances of `fnmatch.translate` (#122289) Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com> 2024-11-27 16:42:45 +00:00			`add(_re_escape(c))`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 02:28:24 +00:00			`assert i == n`
gh-122288: Improve performances of `fnmatch.translate` (#122289) Improve performance of this function by a factor of 1.7x. Co-authored-by: Barney Gale <barney.gale@gmail.com> 2024-11-27 16:42:45 +00:00			`return res, star_indices`


			`def _join_translated_parts(parts, star_indices):`
			`if not star_indices:`
			`return fr'(?s:{"".join(parts)})\Z'`
			`iter_star_indices = iter(star_indices)`
			`j = next(iter_star_indices)`
			`buffer = parts[:j] # fixed pieces at the start`
			`append, extend = buffer.append, buffer.extend`
			`i = j + 1`
			`for j in iter_star_indices:`
			`# Now deal with STAR fixed STAR fixed ...`
			# For an interior `STAR fixed` pairing, we want to do a minimal
			# .*? match followed by `fixed`, with no possibility of backtracking.
			`# Atomic groups ("(?>...)") allow us to spell that directly.`
			`# Note: people rely on the undocumented ability to join multiple`
			`# translate() results together via "\|" to build large regexps matching`
			`# "one of many" shell patterns.`
			`append('(?>.*?')`
			`extend(parts[i:j])`
			`append(')')`
			`i = j + 1`
			`append('.*')`
			`extend(parts[i:])`
			`res = ''.join(buffer)`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 02:28:24 +00:00			`return fr'(?s:{res})\Z'`