audk/BaseTools/Plugin/CodeQL/analyze/globber.py

# @file globber.py
#
# Provides global functionality for use by the CodeQL plugin.
#
# Copyright 2019 Jaakko Kangasharju
#
#            Apache License
#      Version 2.0, January 2004
#   http://www.apache.org/licenses/
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This file has been altered from its original form. Based on code in:
#   https://github.com/advanced-security/filter-sarif
#
# Specifically:
#   https://github.com/advanced-security/filter-sarif/blob/main/filter_sarif.py
#
# It primarily contains modifications made to integrate with the CodeQL plugin.
#
# SPDX-License-Identifier: Apache-2.0
##

import re

_double_star_after_invalid_regex = re.compile(r'[^/\\]\*\*')
_double_star_first_before_invalid_regex = re.compile('^\\*\\*[^/]')
_double_star_middle_before_invalid_regex = re.compile(r'[^\\]\*\*[^/]')


def _match_component(pattern_component, file_name_component):
    if len(pattern_component) == 0 and len(file_name_component) == 0:
        return True
    elif len(pattern_component) == 0:
        return False
    elif len(file_name_component) == 0:
        return pattern_component == '*'
    elif pattern_component[0] == '*':
        return (_match_component(pattern_component, file_name_component[1:]) or
                _match_component(pattern_component[1:], file_name_component))
    elif pattern_component[0] == '?':
        return _match_component(pattern_component[1:], file_name_component[1:])
    elif pattern_component[0] == '\\':
        return (len(pattern_component) >= 2 and
                pattern_component[1] == file_name_component[0] and
                _match_component(
                    pattern_component[2:], file_name_component[1:]))
    elif pattern_component[0] != file_name_component[0]:
        return False
    else:
        return _match_component(pattern_component[1:], file_name_component[1:])


def _match_components(pattern_components, file_name_components):
    if len(pattern_components) == 0 and len(file_name_components) == 0:
        return True
    if len(pattern_components) == 0:
        return False
    if len(file_name_components) == 0:
        return len(pattern_components) == 1 and pattern_components[0] == '**'
    if pattern_components[0] == '**':
        return (_match_components(pattern_components, file_name_components[1:])
                or _match_components(
                    pattern_components[1:], file_name_components))
    else:
        return (
            _match_component(
                pattern_components[0], file_name_components[0]) and
            _match_components(
                pattern_components[1:], file_name_components[1:]))


def match(pattern: str, file_name: str):
    """Match a glob pattern against a file name.

    Glob pattern matching is for file names, which do not need to exist as
    files on the file system.

    A file name is a sequence of directory names, possibly followed by the name
    of a file, with the components separated by a path separator. A glob
    pattern is similar, except it may contain special characters: A '?' matches
    any character in a name. A '*' matches any sequence of characters (possibly
    empty) in a name. Both of these match only within a single component, i.e.,
    they will not match a path separator. A component in a pattern may also be
    a literal '**', which matches zero or more components in the complete file
    name. A backslash '\\' in a pattern acts as an escape character, and
    indicates that the following character is to be matched literally, even if
    it is a special character.

    Args:
        pattern (str): The pattern to match. The path separator in patterns is
                       always '/'.
        file_name (str): The file name to match against. The path separator in
                         file names is the platform separator

    Returns:
        bool: True if the pattern matches, False otherwise.
    """
    if (_double_star_after_invalid_regex.search(pattern) is not None or
        _double_star_first_before_invalid_regex.search(
            pattern) is not None or
        _double_star_middle_before_invalid_regex.search(pattern) is not None):
        raise ValueError(
            '** in {} not alone between path separators'.format(pattern))

    pattern = pattern.rstrip('/')
    file_name = file_name.rstrip('/')

    while '**/**' in pattern:
        pattern = pattern.replace('**/**', '**')

    pattern_components = pattern.split('/')

    # We split on '\' as well as '/' to support unix and windows-style paths
    file_name_components = re.split(r'[\\/]', file_name)

    return _match_components(pattern_components, file_name_components)
BaseTools/Plugin/CodeQL: Add CodeQL build plugin Adds a CodeQL plugin that supports CodeQL in the build system. 1. CodeQlBuildPlugin - Generates a CodeQL database for a given build. 2. CodeQlAnalyzePlugin - Analyzes a CodeQL database and interprets results. 3. External dependencies - Assist with downloading the CodeQL CLI and making it available to the CodeQL plugins. 4. CodeQlQueries.qls - A C/C++ CodeQL query set run against the code. 5. Readme.md - A comprehensive readme file to help: - Platform integrators understand how to configure the plugin - Developers understand how to modify the plugin - Users understand how to use the plugin Read Readme.md for additional details. Cc: Bob Feng <bob.c.feng@intel.com> Cc: Liming Gao <gaoliming@byosoft.com.cn> Cc: Michael D Kinney <michael.d.kinney@intel.com> Cc: Rebecca Cran <rebecca@bsdio.com> Cc: Sean Brogan <sean.brogan@microsoft.com> Cc: Yuwei Chen <yuwei.chen@intel.com> Signed-off-by: Michael Kubacki <michael.kubacki@microsoft.com> Reviewed-by: Yuwei Chen <yuwei.chen@intel.com> Reviewed-by: Sean Brogan <sean.brogan@microsoft.com> Acked-by: Laszlo Ersek <lersek@redhat.com> Acked-by: Michael D Kinney <michael.d.kinney@intel.com> 2023-09-25 18:11:13 +02:00			`# @file globber.py`
			`#`
			`# Provides global functionality for use by the CodeQL plugin.`
			`#`
			`# Copyright 2019 Jaakko Kangasharju`
			`#`
			`# Apache License`
			`# Version 2.0, January 2004`
			`# http://www.apache.org/licenses/`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`#`
			`# This file has been altered from its original form. Based on code in:`
			`# https://github.com/advanced-security/filter-sarif`
			`#`
			`# Specifically:`
			`# https://github.com/advanced-security/filter-sarif/blob/main/filter_sarif.py`
			`#`
			`# It primarily contains modifications made to integrate with the CodeQL plugin.`
			`#`
			`# SPDX-License-Identifier: Apache-2.0`
			`##`

			`import re`

			`_double_star_after_invalid_regex = re.compile(r'[^/\\]\\')`
			`_double_star_first_before_invalid_regex = re.compile('^\\\\[^/]')`
			`_double_star_middle_before_invalid_regex = re.compile(r'[^\\]\\[^/]')`


			`def _match_component(pattern_component, file_name_component):`
			`if len(pattern_component) == 0 and len(file_name_component) == 0:`
			`return True`
			`elif len(pattern_component) == 0:`
			`return False`
			`elif len(file_name_component) == 0:`
			`return pattern_component == '*'`
			`elif pattern_component[0] == '*':`
			`return (_match_component(pattern_component, file_name_component[1:]) or`
			`_match_component(pattern_component[1:], file_name_component))`
			`elif pattern_component[0] == '?':`
			`return _match_component(pattern_component[1:], file_name_component[1:])`
			`elif pattern_component[0] == '\\':`
			`return (len(pattern_component) >= 2 and`
			`pattern_component[1] == file_name_component[0] and`
			`_match_component(`
			`pattern_component[2:], file_name_component[1:]))`
			`elif pattern_component[0] != file_name_component[0]:`
			`return False`
			`else:`
			`return _match_component(pattern_component[1:], file_name_component[1:])`


			`def _match_components(pattern_components, file_name_components):`
			`if len(pattern_components) == 0 and len(file_name_components) == 0:`
			`return True`
			`if len(pattern_components) == 0:`
			`return False`
			`if len(file_name_components) == 0:`
			`return len(pattern_components) == 1 and pattern_components[0] == '**'`
			`if pattern_components[0] == '**':`
			`return (_match_components(pattern_components, file_name_components[1:])`
			`or _match_components(`
			`pattern_components[1:], file_name_components))`
			`else:`
			`return (`
			`_match_component(`
			`pattern_components[0], file_name_components[0]) and`
			`_match_components(`
			`pattern_components[1:], file_name_components[1:]))`


			`def match(pattern: str, file_name: str):`
			`"""Match a glob pattern against a file name.`

			`Glob pattern matching is for file names, which do not need to exist as`
			`files on the file system.`

			`A file name is a sequence of directory names, possibly followed by the name`
			`of a file, with the components separated by a path separator. A glob`
			`pattern is similar, except it may contain special characters: A '?' matches`
			`any character in a name. A '*' matches any sequence of characters (possibly`
			`empty) in a name. Both of these match only within a single component, i.e.,`
			`they will not match a path separator. A component in a pattern may also be`
			`a literal '**', which matches zero or more components in the complete file`
			`name. A backslash '\\' in a pattern acts as an escape character, and`
			`indicates that the following character is to be matched literally, even if`
			`it is a special character.`

			`Args:`
			`pattern (str): The pattern to match. The path separator in patterns is`
			`always '/'.`
			`file_name (str): The file name to match against. The path separator in`
			`file names is the platform separator`

			`Returns:`
			`bool: True if the pattern matches, False otherwise.`
			`"""`
			`if (_double_star_after_invalid_regex.search(pattern) is not None or`
			`_double_star_first_before_invalid_regex.search(`
			`pattern) is not None or`
			`_double_star_middle_before_invalid_regex.search(pattern) is not None):`
			`raise ValueError(`
			`'** in {} not alone between path separators'.format(pattern))`

			`pattern = pattern.rstrip('/')`
			`file_name = file_name.rstrip('/')`

			`while '/' in pattern:`
			`pattern = pattern.replace('/', '**')`

			`pattern_components = pattern.split('/')`

			`# We split on '\' as well as '/' to support unix and windows-style paths`
			`file_name_components = re.split(r'[\\/]', file_name)`

			`return _match_components(pattern_components, file_name_components)`