Blame SOURCES/pyproject_save_files.py

7ef706
import argparse
7ef706
import fnmatch
7ef706
import json
7ef706
import os
7ef706
7ef706
from collections import defaultdict
7ef706
from keyword import iskeyword
7ef706
from pathlib import PosixPath, PurePosixPath
7ef706
from importlib.metadata import Distribution
7ef706
7ef706
7ef706
# From RPM's build/files.c strtokWithQuotes delim argument
7ef706
RPM_FILES_DELIMETERS = ' \n\t'
7ef706
7ef706
# RPM hardcodes the lists of manpage extensions and directories,
7ef706
# so we have to maintain separate ones :(
7ef706
# There is an issue for RPM to provide the lists as macros:
7ef706
# https://github.com/rpm-software-management/rpm/issues/1865
7ef706
# The original lists can be found here:
7ef706
# https://github.com/rpm-software-management/rpm/blob/master/scripts/brp-compress
7ef706
MANPAGE_EXTENSIONS = ['gz', 'Z', 'bz2', 'xz', 'lzma', 'zst', 'zstd']
7ef706
MANDIRS = [
7ef706
    '/man/man*',
7ef706
    '/man/*/man*',
7ef706
    '/info',
7ef706
    '/share/man/man*',
7ef706
    '/share/man/*/man*',
7ef706
    '/share/info',
7ef706
    '/kerberos/man',
7ef706
    '/X11R6/man/man*',
7ef706
    '/lib/perl5/man/man*',
7ef706
    '/share/doc/*/man/man*',
7ef706
    '/lib/*/man/man*',
7ef706
    '/share/fish/man/man*',
7ef706
]
7ef706
7ef706
7ef706
class BuildrootPath(PurePosixPath):
7ef706
    """
7ef706
    This path represents a path in a buildroot.
7ef706
    When absolute, it is "relative" to a buildroot.
7ef706
7ef706
    E.g. /usr/lib means %{buildroot}/usr/lib
7ef706
    The object carries no buildroot information.
7ef706
    """
7ef706
7ef706
    @staticmethod
7ef706
    def from_real(realpath, *, root):
7ef706
        """
7ef706
        For a given real disk path, return a BuildrootPath in the given root.
7ef706
7ef706
        For example::
7ef706
7ef706
            >>> BuildrootPath.from_real(PosixPath('/tmp/buildroot/foo'), root=PosixPath('/tmp/buildroot'))
7ef706
            BuildrootPath('/foo')
7ef706
        """
7ef706
        return BuildrootPath("/") / realpath.relative_to(root)
7ef706
7ef706
    def to_real(self, root):
7ef706
        """
7ef706
        Return a real PosixPath in the given root
7ef706
7ef706
        For example::
7ef706
7ef706
            >>> BuildrootPath('/foo').to_real(PosixPath('/tmp/buildroot'))
7ef706
            PosixPath('/tmp/buildroot/foo')
7ef706
        """
7ef706
        return root / self.relative_to("/")
7ef706
7ef706
    def normpath(self):
7ef706
        """
7ef706
        Normalize all the potential /../ parts of the path without touching real files.
7ef706
7ef706
        PurePaths don't have .resolve().
7ef706
        Paths have .resolve() but it touches real files.
7ef706
        This is an alternative. It assumes there are no symbolic links.
7ef706
7ef706
        Example:
7ef706
7ef706
            >>> BuildrootPath('/usr/lib/python/../pypy').normpath()
7ef706
            BuildrootPath('/usr/lib/pypy')
7ef706
        """
7ef706
        return type(self)(os.path.normpath(self))
7ef706
7ef706
7ef706
def pycache_dir(script):
7ef706
    """
7ef706
    For a script BuildrootPath, return a BuildrootPath of its __pycache__ directory.
7ef706
7ef706
    Example:
7ef706
7ef706
        >>> pycache_dir(BuildrootPath('/whatever/bar.py'))
7ef706
        BuildrootPath('/whatever/__pycache__')
7ef706
7ef706
        >>> pycache_dir(BuildrootPath('/opt/python3.10/foo.py'))
7ef706
        BuildrootPath('/opt/python3.10/__pycache__')
7ef706
    """
7ef706
    return script.parent / "__pycache__"
7ef706
7ef706
7ef706
def pycached(script, python_version):
7ef706
    """
7ef706
    For a script BuildrootPath, return a list with that path and its bytecode glob.
7ef706
    Like the %pycached macro.
7ef706
7ef706
    The glob is represented as a BuildrootPath.
7ef706
7ef706
    Examples:
7ef706
7ef706
        >>> pycached(BuildrootPath('/whatever/bar.py'), '3.8')
7ef706
        [BuildrootPath('/whatever/bar.py'), BuildrootPath('/whatever/__pycache__/bar.cpython-38{,.opt-?}.pyc')]
7ef706
7ef706
        >>> pycached(BuildrootPath('/opt/python3.10/foo.py'), '3.10')
7ef706
        [BuildrootPath('/opt/python3.10/foo.py'), BuildrootPath('/opt/python3.10/__pycache__/foo.cpython-310{,.opt-?}.pyc')]
7ef706
    """
7ef706
    assert script.suffix == ".py"
7ef706
    pyver = "".join(python_version.split(".")[:2])
7ef706
    pycname = f"{script.stem}.cpython-{pyver}{{,.opt-?}}.pyc"
7ef706
    pyc = pycache_dir(script) / pycname
7ef706
    return [script, pyc]
7ef706
7ef706
7ef706
def add_file_to_module(paths, module_name, module_type, files_dirs, *files):
7ef706
    """
7ef706
    Helper procedure, adds given files to the module_name of a given module_type
7ef706
    """
7ef706
    for module in paths["modules"][module_name]:
7ef706
        if module["type"] == module_type:
7ef706
            if files[0] not in module[files_dirs]:
7ef706
                module[files_dirs].extend(files)
7ef706
            break
7ef706
    else:
7ef706
        paths["modules"][module_name].append(
7ef706
            {"type": module_type, "files": [], "dirs": [], files_dirs: list(files)}
7ef706
        )
7ef706
7ef706
7ef706
def add_py_file_to_module(paths, module_name, module_type, path, python_version,
7ef706
                          *, include_pycache_dir):
7ef706
    """
7ef706
    Helper procedure, adds given .py file to the module_name of a given module_type
7ef706
    Always also adds the bytecode cache.
7ef706
    If include_pycache_dir is set, also include the __pycache__ directory.
7ef706
    """
7ef706
    add_file_to_module(paths, module_name, module_type, "files", *pycached(path, python_version))
7ef706
    if include_pycache_dir:
7ef706
        add_file_to_module(paths, module_name, module_type, "dirs", pycache_dir(path))
7ef706
7ef706
7ef706
def add_lang_to_module(paths, module_name, path):
7ef706
    """
7ef706
    Helper procedure, divides lang files by language and adds them to the module_name
7ef706
7ef706
    Returns True if the language code detection was successful
7ef706
    """
7ef706
    for i, parent in enumerate(path.parents):
5d3ed2
        if parent.name == 'LC_MESSAGES':
5d3ed2
            lang_country_code = path.parents[i+1].name
7ef706
            break
7ef706
    else:
7ef706
        return False
7ef706
    # convert potential en_US to plain en
7ef706
    lang_code = lang_country_code.partition('_')[0]
7ef706
    if module_name not in paths["lang"]:
7ef706
        paths["lang"].update({module_name: defaultdict(list)})
7ef706
    paths["lang"][module_name][lang_code].append(path)
7ef706
    return True
7ef706
7ef706
7ef706
def prepend_mandirs(prefix):
7ef706
    """
7ef706
    Return the list of man page directories prepended with the given prefix.
7ef706
    """
7ef706
    return [str(prefix) + mandir for mandir in MANDIRS]
7ef706
7ef706
7ef706
def normalize_manpage_filename(prefix, path):
7ef706
    """
7ef706
    If a path is processed by RPM's brp-compress script, strip it of the extension
7ef706
    (if the extension matches one of the listed by brp-compress),
7ef706
    append '*' to the filename and return it. If not, return the unchanged path.
7ef706
    Rationale: https://docs.fedoraproject.org/en-US/packaging-guidelines/#_manpages
7ef706
7ef706
    Examples:
7ef706
7ef706
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/share/man/de/man1/linkchecker.1'))
7ef706
        BuildrootPath('/usr/share/man/de/man1/linkchecker.1*')
7ef706
7ef706
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/share/doc/en/man/man1/getmac.1'))
7ef706
        BuildrootPath('/usr/share/doc/en/man/man1/getmac.1*')
7ef706
7ef706
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/share/man/man8/abc.8.zstd'))
7ef706
        BuildrootPath('/usr/share/man/man8/abc.8*')
7ef706
7ef706
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/kerberos/man/dir'))
7ef706
        BuildrootPath('/usr/kerberos/man/dir')
7ef706
7ef706
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/kerberos/man/dir.1'))
7ef706
        BuildrootPath('/usr/kerberos/man/dir.1*')
7ef706
7ef706
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/bin/getmac'))
7ef706
        BuildrootPath('/usr/bin/getmac')
7ef706
    """
7ef706
7ef706
    prefixed_mandirs = prepend_mandirs(prefix)
7ef706
    for mandir in prefixed_mandirs:
7ef706
        # "dir" is explicitly excluded by RPM
7ef706
        # https://github.com/rpm-software-management/rpm/blob/rpm-4.17.0-release/scripts/brp-compress#L24
7ef706
        if fnmatch.fnmatch(str(path.parent), mandir) and path.name != "dir":
7ef706
            # "abc.1.gz2" -> "abc.1*"
7ef706
            if path.suffix[1:] in MANPAGE_EXTENSIONS:
7ef706
                return BuildrootPath(path.parent / (path.stem + "*"))
7ef706
            # "abc.1 -> abc.1*"
7ef706
            else:
7ef706
                return BuildrootPath(path.parent / (path.name + "*"))
7ef706
    else:
7ef706
        return path
7ef706
7ef706
7ef706
def is_valid_module_name(s):
7ef706
    """Return True if a string is considered a valid module name and False otherwise.
7ef706
7ef706
    String must be a valid Python name, not a Python keyword and must not
7ef706
    start with underscore - we treat those as private.
7ef706
    Examples:
7ef706
7ef706
        >>> is_valid_module_name('module_name')
7ef706
        True
7ef706
7ef706
        >>> is_valid_module_name('12module_name')
7ef706
        False
7ef706
7ef706
        >>> is_valid_module_name('module-name')
7ef706
        False
7ef706
7ef706
        >>> is_valid_module_name('return')
7ef706
        False
7ef706
7ef706
        >>> is_valid_module_name('_module_name')
7ef706
        False
7ef706
    """
7ef706
    if (s.isidentifier() and not iskeyword(s) and not s.startswith("_")):
7ef706
        return True
7ef706
    return False
7ef706
7ef706
7ef706
def module_names_from_path(path):
7ef706
    """Get all importable module names from given path.
7ef706
7ef706
    Paths containing ".py" and ".so" files are considered importable modules,
7ef706
    and so their respective directories (ie. "foo/bar/baz.py": "foo", "foo.bar",
7ef706
    "foo.bar.baz").
7ef706
    Paths containing invalid Python strings are discarded.
7ef706
7ef706
    Return set of all valid possibilities.
7ef706
    """
7ef706
    # Discard all files that are not valid modules
7ef706
    if path.suffix not in (".py", ".so"):
7ef706
        return set()
7ef706
7ef706
    parts = list(path.parts)
7ef706
7ef706
    # Modify the file names according to their suffixes
7ef706
    if path.suffix == ".py":
7ef706
        parts[-1] = path.stem
7ef706
    elif path.suffix == ".so":
7ef706
        # .so files can have two suffixes - cut both of them
7ef706
        parts[-1] = PosixPath(path.stem).stem
7ef706
7ef706
    # '__init__' indicates a module but we don't want to import the actual file
7ef706
    # It's unclear whether there can be __init__.so files in the Python packages.
7ef706
    # The idea to implement this file was raised in 2008 on Python-ideas mailing list
7ef706
    # (https://mail.python.org/pipermail/python-ideas/2008-October/002292.html)
7ef706
    # and there are a few reports of people compiling their __init__.py to __init__.so.
7ef706
    # However it's not officially documented nor forbidden,
7ef706
    # so we're checking for the stem after stripping the suffix from the file.
7ef706
    if parts[-1] == "__init__":
7ef706
        del parts[-1]
7ef706
7ef706
    # For each part of the path check whether it's valid
7ef706
    # If not, discard the whole path - return an empty set
7ef706
    for path_part in parts:
7ef706
        if not is_valid_module_name(path_part):
7ef706
            return set()
7ef706
    else:
7ef706
        return {'.'.join(parts[:x+1]) for x in range(len(parts))}
7ef706
7ef706
5d3ed2
def is_license_file(path, license_files, license_directories):
5d3ed2
    """
5d3ed2
    Check if the given BuildrootPath path matches any of the "License-File" entries.
5d3ed2
    The path is considered matched when resolved from any of the license_directories
5d3ed2
    matches string-wise what is stored in any "License-File" entry (license_files).
5d3ed2
5d3ed2
    Examples:
5d3ed2
        >>> site_packages = BuildrootPath('/usr/lib/python3.12/site-packages')
5d3ed2
        >>> distinfo = site_packages / 'foo-1.0.dist-info'
5d3ed2
        >>> license_directories = [distinfo / 'licenses', distinfo]
5d3ed2
        >>> license_files = ['LICENSE.txt', 'AUTHORS.md']
5d3ed2
        >>> is_license_file(distinfo / 'AUTHORS.md', license_files, license_directories)
5d3ed2
        True
5d3ed2
        >>> is_license_file(distinfo / 'licenses/LICENSE.txt', license_files, license_directories)
5d3ed2
        True
5d3ed2
        >>> # we don't match based on directory only
5d3ed2
        >>> is_license_file(distinfo / 'licenses/COPYING', license_files, license_directories)  
5d3ed2
        False
5d3ed2
        >>> is_license_file(site_packages / 'foo/LICENSE.txt', license_files, license_directories)
5d3ed2
        False
5d3ed2
    """
5d3ed2
    if not license_files or not license_directories:
5d3ed2
        return False
5d3ed2
    for license_dir in license_directories:
5d3ed2
        if (path.is_relative_to(license_dir) and
5d3ed2
                str(path.relative_to(license_dir)) in license_files):
5d3ed2
            return True
5d3ed2
    return False
5d3ed2
5d3ed2
7ef706
def classify_paths(
7ef706
    record_path, parsed_record_content, metadata, sitedirs, python_version, prefix
7ef706
):
7ef706
    """
7ef706
    For each BuildrootPath in parsed_record_content classify it to a dict structure
7ef706
    that allows to filter the files for the %files and %check section easier.
7ef706
7ef706
    For the dict structure, look at the beginning of this function's code.
7ef706
7ef706
    Each "module" is a dict with "type" ("package", "script", "extension"), and "files" and "dirs".
7ef706
    """
7ef706
    distinfo = record_path.parent
7ef706
    paths = {
7ef706
        "metadata": {
7ef706
            "files": [],  # regular %file entries with dist-info content
7ef706
            "dirs": [distinfo],  # %dir %file entries with dist-info directory
7ef706
            "docs": [],  # to be used once there is upstream way to recognize READMEs
7ef706
            "licenses": [],  # %license entries parsed from dist-info METADATA file
7ef706
        },
7ef706
        "lang": {}, # %lang entries: [module_name or None][language_code] lists of .mo files
7ef706
        "modules": defaultdict(list),  # each importable module (directory, .py, .so)
7ef706
        "module_names": set(),  # qualified names of each importable module ("foo.bar.baz")
7ef706
        "other": {"files": []},  # regular %file entries we could not parse :(
7ef706
    }
7ef706
5d3ed2
    license_files = metadata.get_all('License-File')
5d3ed2
    license_directory = distinfo / 'licenses'  # See PEP 369 "Root License Directory"
5d3ed2
    # setuptools was the first known build backend to implement License-File.
5d3ed2
    # Unfortunately they don't put licenses to the license directory (yet):
5d3ed2
    #     https://github.com/pypa/setuptools/issues/3596
5d3ed2
    # Hence, we check licenses in both licenses and dist-info
5d3ed2
    license_directories = (license_directory, distinfo)
5d3ed2
7ef706
    # In RECORDs generated by pip, there are no directories, only files.
7ef706
    # The example RECORD from PEP 376 does not contain directories either.
7ef706
    # Hence, we'll only assume files, but TODO get it officially documented.
7ef706
    for path in parsed_record_content:
7ef706
        if path.suffix == ".pyc":
7ef706
            # we handle bytecode separately
7ef706
            continue
7ef706
20b2b6
        if distinfo in path.parents:
20b2b6
            if path.parent == distinfo and path.name in ("RECORD", "REQUESTED"):
7ef706
                # RECORD and REQUESTED files are removed in %pyproject_install
7ef706
                # See PEP 627
7ef706
                continue
5d3ed2
            if is_license_file(path, license_files, license_directories):
7ef706
                paths["metadata"]["licenses"].append(path)
7ef706
            else:
7ef706
                paths["metadata"]["files"].append(path)
20b2b6
            # nested directories within distinfo
20b2b6
            index = path.parents.index(distinfo)
20b2b6
            for parent in list(path.parents)[:index]:  # no direct slice until Python 3.10
20b2b6
                if parent not in paths["metadata"]["dirs"]:
20b2b6
                    paths["metadata"]["dirs"].append(parent)
7ef706
            continue
7ef706
7ef706
        for sitedir in sitedirs:
7ef706
            if sitedir in path.parents:
7ef706
                # Get only the part without sitedir prefix to classify module names
7ef706
                relative_path = path.relative_to(sitedir)
7ef706
                paths["module_names"].update(module_names_from_path(relative_path))
7ef706
                if path.parent == sitedir:
7ef706
                    if path.suffix == ".so":
7ef706
                        # extension modules can have 2 suffixes
7ef706
                        name = BuildrootPath(path.stem).stem
7ef706
                        add_file_to_module(paths, name, "extension", "files", path)
7ef706
                    elif path.suffix == ".py":
7ef706
                        name = path.stem
7ef706
                        # we add the .pyc files, but not top-level __pycache__
7ef706
                        add_py_file_to_module(
7ef706
                            paths, name, "script", path, python_version,
7ef706
                            include_pycache_dir=False
7ef706
                        )
7ef706
                    else:
7ef706
                        paths["other"]["files"].append(path)
7ef706
                else:
7ef706
                    # this file is inside a dir, we add all dirs upwards until sitedir
7ef706
                    index = path.parents.index(sitedir)
7ef706
                    module_dir = path.parents[index - 1]
7ef706
                    for parent in list(path.parents)[:index]:  # no direct slice until Python 3.10
7ef706
                        add_file_to_module(paths, module_dir.name, "package", "dirs", parent)
7ef706
                    is_lang = False
7ef706
                    if path.suffix == ".mo":
7ef706
                        is_lang = add_lang_to_module(paths, module_dir.name, path)
7ef706
                    if not is_lang:
7ef706
                        if path.suffix == ".py":
7ef706
                            # we add the .pyc files, and their __pycache__
7ef706
                            add_py_file_to_module(
7ef706
                                paths, module_dir.name, "package", path, python_version,
7ef706
                                include_pycache_dir=True
7ef706
                            )
7ef706
                        else:
7ef706
                            add_file_to_module(paths, module_dir.name, "package", "files", path)
7ef706
                break
7ef706
        else:
7ef706
            if path.suffix == ".mo":
7ef706
                add_lang_to_module(paths, None, path) or paths["other"]["files"].append(path)
7ef706
            else:
7ef706
                path = normalize_manpage_filename(prefix, path)
7ef706
                paths["other"]["files"].append(path)
7ef706
7ef706
    return paths
7ef706
7ef706
7ef706
def escape_rpm_path(path):
7ef706
    """
7ef706
    Escape special characters in string-paths or BuildrootPaths
7ef706
7ef706
    E.g. a space in path otherwise makes RPM think it's multiple paths,
7ef706
    unless we put it in "quotes".
7ef706
    Or a literal % symbol in path might be expanded as a macro if not escaped.
7ef706
7ef706
    Due to limitations in RPM,
7ef706
    some paths with spaces and other special characters are not supported.
7ef706
7ef706
    Examples:
7ef706
7ef706
        >>> escape_rpm_path(BuildrootPath('/usr/lib/python3.9/site-packages/setuptools'))
7ef706
        '/usr/lib/python3.9/site-packages/setuptools'
7ef706
7ef706
        >>> escape_rpm_path('/usr/lib/python3.9/site-packages/setuptools/script (dev).tmpl')
7ef706
        '"/usr/lib/python3.9/site-packages/setuptools/script (dev).tmpl"'
7ef706
7ef706
        >>> escape_rpm_path('/usr/share/data/100%valid.path')
7ef706
        '/usr/share/data/100%%%%%%%%valid.path'
7ef706
7ef706
        >>> escape_rpm_path('/usr/share/data/100 % valid.path')
7ef706
        '"/usr/share/data/100 %%%%%%%% valid.path"'
7ef706
7ef706
        >>> escape_rpm_path('/usr/share/data/1000 %% valid.path')
7ef706
        '"/usr/share/data/1000 %%%%%%%%%%%%%%%% valid.path"'
7ef706
7ef706
        >>> escape_rpm_path('/usr/share/data/spaces and "quotes"')
7ef706
        Traceback (most recent call last):
7ef706
          ...
7ef706
        NotImplementedError: ...
7ef706
7ef706
        >>> escape_rpm_path('/usr/share/data/spaces and [square brackets]')
7ef706
        Traceback (most recent call last):
7ef706
          ...
7ef706
        NotImplementedError: ...
7ef706
    """
7ef706
    orig_path = path = str(path)
7ef706
    if "%" in path:
7ef706
        # Escaping by 8 %s has been verified in RPM 4.16 and 4.17, but probably not stable
7ef706
        # See this thread http://lists.rpm.org/pipermail/rpm-list/2021-June/002048.html
7ef706
        # On the CI, we build tests/escape_percentages.spec to verify this assumption
7ef706
        path = path.replace("%", "%" * 8)
7ef706
    if any(symbol in path for symbol in RPM_FILES_DELIMETERS):
7ef706
        if '"' in path:
7ef706
            # As far as we know, RPM cannot list such file individually
7ef706
            # See this thread http://lists.rpm.org/pipermail/rpm-list/2021-June/002048.html
7ef706
            raise NotImplementedError(f'" symbol in path with spaces is not supported by %pyproject_save_files: {orig_path!r}')
7ef706
        if "[" in path or "]" in path:
7ef706
            # See https://bugzilla.redhat.com/show_bug.cgi?id=1990879
7ef706
            # and https://github.com/rpm-software-management/rpm/issues/1749
7ef706
            raise NotImplementedError(f'[ or ] symbol in path with spaces is not supported by %pyproject_save_files: {orig_path!r}')
7ef706
        return f'"{path}"'
7ef706
    return path
7ef706
7ef706
7ef706
def generate_file_list(paths_dict, module_globs, include_others=False):
7ef706
    """
7ef706
    This function takes the classified paths_dict and turns it into lines
7ef706
    for the %files section. Returns list with text lines, no Path objects.
7ef706
7ef706
    Only includes files from modules that match module_globs, metadata and
7ef706
    optionaly all other files.
7ef706
7ef706
    It asserts that all globs match at least one module, raises ValueError otherwise.
7ef706
    Multiple globs matching identical module(s) are OK.
7ef706
    """
7ef706
    files = set()
7ef706
7ef706
    if include_others:
7ef706
        files.update(f"{escape_rpm_path(p)}" for p in paths_dict["other"]["files"])
7ef706
        try:
7ef706
            for lang_code in paths_dict["lang"][None]:
7ef706
                files.update(f"%lang({lang_code}) {escape_rpm_path(p)}" for p in paths_dict["lang"][None][lang_code])
7ef706
        except KeyError:
7ef706
            pass
7ef706
7ef706
    files.update(f"{escape_rpm_path(p)}" for p in paths_dict["metadata"]["files"])
7ef706
    for macro in "dir", "doc", "license":
7ef706
        files.update(f"%{macro} {escape_rpm_path(p)}" for p in paths_dict["metadata"][f"{macro}s"])
7ef706
7ef706
    modules = paths_dict["modules"]
7ef706
    done_modules = set()
7ef706
    done_globs = set()
7ef706
7ef706
    for glob in module_globs:
7ef706
        for name in modules:
7ef706
            if fnmatch.fnmatchcase(name, glob):
7ef706
                if name not in done_modules:
7ef706
                    try:
7ef706
                        for lang_code in paths_dict["lang"][name]:
7ef706
                            files.update(f"%lang({lang_code}) {escape_rpm_path(p)}" for p in paths_dict["lang"][name][lang_code])
7ef706
                    except KeyError:
7ef706
                        pass
7ef706
                    for module in modules[name]:
7ef706
                        files.update(f"%dir {escape_rpm_path(p)}" for p in module["dirs"])
7ef706
                        files.update(f"{escape_rpm_path(p)}" for p in module["files"])
7ef706
                    done_modules.add(name)
7ef706
                done_globs.add(glob)
7ef706
20b2b6
    # Users using '*' don't care about the files in the package, so it's ok
20b2b6
    # not to fail the build when no modules are detected
20b2b6
    # There can be legitimate reasons to create a package without Python modules
20b2b6
    if not modules and fnmatch.fnmatchcase("", glob):
20b2b6
        done_globs.add(glob)
20b2b6
7ef706
    missed = module_globs - done_globs
7ef706
    if missed:
7ef706
        missed_text = ", ".join(sorted(missed))
7ef706
        raise ValueError(f"Globs did not match any module: {missed_text}")
7ef706
7ef706
    return sorted(files)
7ef706
7ef706
5d3ed2
def generate_module_list(paths_dict, module_globs):
5d3ed2
    """
5d3ed2
    This function takes the paths_dict created by the classify_paths() function and
5d3ed2
    reads the modules names from it.
5d3ed2
    It filters those whose top-level module names match any of the provided module_globs.
5d3ed2
5d3ed2
    Returns list with matching qualified module names.
5d3ed2
5d3ed2
    Examples:
5d3ed2
5d3ed2
        >>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'foo'})
5d3ed2
        ['foo', 'foo.bar']
5d3ed2
5d3ed2
        >>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'*foo'})
5d3ed2
        ['foo', 'foo.bar']
5d3ed2
5d3ed2
        >>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'foo', 'baz'})
5d3ed2
        ['baz', 'foo', 'foo.bar']
5d3ed2
5d3ed2
        >>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'*'})
5d3ed2
        ['baz', 'foo', 'foo.bar']
5d3ed2
5d3ed2
        >>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'bar'})
5d3ed2
        []
5d3ed2
5d3ed2
    Submodules aren't discovered:
5d3ed2
5d3ed2
        >>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'*bar'})
5d3ed2
        []
5d3ed2
    """
5d3ed2
5d3ed2
    module_names = paths_dict['module_names']
5d3ed2
    filtered_module_names = set()
5d3ed2
5d3ed2
    for glob in module_globs:
5d3ed2
        for name in module_names:
5d3ed2
            # Match the top-level part of the qualified name, eg. 'foo.bar.baz' -> 'foo'
5d3ed2
            top_level_name = name.split('.')[0]
5d3ed2
            if fnmatch.fnmatchcase(top_level_name, glob):
5d3ed2
                filtered_module_names.add(name)
5d3ed2
5d3ed2
    return sorted(filtered_module_names)
5d3ed2
5d3ed2
7ef706
def parse_varargs(varargs):
7ef706
    """
7ef706
    Parse varargs from the %pyproject_save_files macro
7ef706
7ef706
    Arguments starting with + are treated as a flags, everything else is a glob
7ef706
7ef706
    Returns as set of globs, boolean flag whether to include all the other files
7ef706
7ef706
    Raises ValueError for unknown flags and globs with dots (namespace packages).
7ef706
7ef706
    Good examples:
7ef706
7ef706
        >>> parse_varargs(['*'])
7ef706
        ({'*'}, False)
7ef706
7ef706
        >>> mods, auto = parse_varargs(['requests*', 'kerberos', '+auto'])
7ef706
        >>> auto
7ef706
        True
7ef706
        >>> sorted(mods)
7ef706
        ['kerberos', 'requests*']
7ef706
7ef706
        >>> mods, auto = parse_varargs(['tldr', 'tensorf*'])
7ef706
        >>> auto
7ef706
        False
7ef706
        >>> sorted(mods)
7ef706
        ['tensorf*', 'tldr']
7ef706
7ef706
        >>> parse_varargs(['+auto'])
7ef706
        (set(), True)
7ef706
7ef706
    Bad examples:
7ef706
7ef706
        >>> parse_varargs(['+kinkdir'])
7ef706
        Traceback (most recent call last):
7ef706
          ...
7ef706
        ValueError: Invalid argument: +kinkdir
7ef706
7ef706
        >>> parse_varargs(['good', '+bad', '*ugly*'])
7ef706
        Traceback (most recent call last):
7ef706
          ...
7ef706
        ValueError: Invalid argument: +bad
7ef706
7ef706
        >>> parse_varargs(['+bad', 'my.bad'])
7ef706
        Traceback (most recent call last):
7ef706
          ...
7ef706
        ValueError: Invalid argument: +bad
7ef706
7ef706
        >>> parse_varargs(['mod', 'mod.*'])
7ef706
        Traceback (most recent call last):
7ef706
          ...
7ef706
        ValueError: Attempted to use a namespaced package with . in the glob: mod.*. ...
7ef706
7ef706
        >>> parse_varargs(['my.bad', '+bad'])
7ef706
        Traceback (most recent call last):
7ef706
          ...
7ef706
        ValueError: Attempted to use a namespaced package with . in the glob: my.bad. ...
7ef706
7ef706
        >>> parse_varargs(['mod/submod'])
7ef706
        Traceback (most recent call last):
7ef706
          ...
7ef706
        ValueError: Attempted to use a namespaced package with / in the glob: mod/submod. ...
7ef706
    """
7ef706
    include_auto = False
7ef706
    globs = set()
7ef706
    namespace_error_template = (
7ef706
        "Attempted to use a namespaced package with {symbol} in the glob: {arg}. "
7ef706
        "That is not (yet) supported. Use {top} instead and see "
7ef706
        "https://bugzilla.redhat.com/1935266 for details."
7ef706
    )
7ef706
    for arg in varargs:
7ef706
        if arg.startswith("+"):
7ef706
            if arg == "+auto":
7ef706
                include_auto = True
7ef706
            else:
7ef706
                raise ValueError(f"Invalid argument: {arg}")
7ef706
        elif "." in arg:
7ef706
            top, *_ = arg.partition(".")
7ef706
            raise ValueError(namespace_error_template.format(symbol=".", arg=arg, top=top))
7ef706
        elif "/" in arg:
7ef706
            top, *_ = arg.partition("/")
7ef706
            raise ValueError(namespace_error_template.format(symbol="/", arg=arg, top=top))
7ef706
        else:
7ef706
            globs.add(arg)
7ef706
7ef706
    return globs, include_auto
7ef706
7ef706
7ef706
def load_parsed_record(pyproject_record):
7ef706
    parsed_record = {}
7ef706
    with open(pyproject_record) as pyproject_record_file:
7ef706
        content = json.load(pyproject_record_file)
7ef706
7ef706
    if len(content) > 1:
7ef706
        raise FileExistsError("%pyproject install has found more than one *.dist-info/RECORD file. "
7ef706
                              "Currently, %pyproject_save_files supports only one wheel → one file list mapping. "
7ef706
                              "Feel free to open a bugzilla for pyproject-rpm-macros and describe your usecase.")
7ef706
7ef706
    # Redefine strings stored in JSON to BuildRootPaths
7ef706
    for record_path, files in content.items():
7ef706
        parsed_record[BuildrootPath(record_path)] = [BuildrootPath(f) for f in files]
7ef706
7ef706
    return parsed_record
7ef706
7ef706
7ef706
def dist_metadata(buildroot, record_path):
7ef706
    """
7ef706
    Returns distribution metadata (email.message.EmailMessage), possibly empty
7ef706
    """
7ef706
    real_dist_path = record_path.parent.to_real(buildroot)
7ef706
    dist = Distribution.at(real_dist_path)
7ef706
    return dist.metadata
7ef706
7ef706
7ef706
def pyproject_save_files_and_modules(buildroot, sitelib, sitearch, python_version, pyproject_record, prefix, varargs):
7ef706
    """
7ef706
    Takes arguments from the %{pyproject_save_files} macro
7ef706
7ef706
    Returns tuple: list of paths for the %files section and list of module names
7ef706
    for the %check section
7ef706
    """
7ef706
    # On 32 bit architectures, sitelib equals to sitearch
7ef706
    # This saves us browsing one directory twice
7ef706
    sitedirs = sorted({sitelib, sitearch})
7ef706
7ef706
    globs, include_auto = parse_varargs(varargs)
7ef706
    parsed_records = load_parsed_record(pyproject_record)
7ef706
7ef706
    final_file_list = []
5d3ed2
    final_module_list = []
7ef706
7ef706
    for record_path, files in parsed_records.items():
7ef706
        metadata = dist_metadata(buildroot, record_path)
7ef706
        paths_dict = classify_paths(
7ef706
            record_path, files, metadata, sitedirs, python_version, prefix
7ef706
        )
7ef706
7ef706
        final_file_list.extend(
7ef706
            generate_file_list(paths_dict, globs, include_auto)
7ef706
        )
5d3ed2
        final_module_list.extend(
5d3ed2
            generate_module_list(paths_dict, globs)
5d3ed2
        )
7ef706
5d3ed2
    return final_file_list, final_module_list
7ef706
7ef706
7ef706
def main(cli_args):
7ef706
    file_section, module_names = pyproject_save_files_and_modules(
7ef706
        cli_args.buildroot,
7ef706
        cli_args.sitelib,
7ef706
        cli_args.sitearch,
7ef706
        cli_args.python_version,
7ef706
        cli_args.pyproject_record,
7ef706
        cli_args.prefix,
7ef706
        cli_args.varargs,
7ef706
    )
7ef706
7ef706
    cli_args.output_files.write_text("\n".join(file_section) + "\n", encoding="utf-8")
7ef706
    cli_args.output_modules.write_text("\n".join(module_names) + "\n", encoding="utf-8")
7ef706
7ef706
7ef706
def argparser():
5d3ed2
    parser = argparse.ArgumentParser(
5d3ed2
        description="Create %{pyproject_files} for a Python project.",
5d3ed2
        prog="%pyproject_save_files",
5d3ed2
        add_help=False,
5d3ed2
        # custom usage to add +auto
5d3ed2
        usage="%(prog)s MODULE_GLOB [MODULE_GLOB ...] [+auto]",
5d3ed2
    )
5d3ed2
    parser.add_argument(
5d3ed2
        '--help', action='help',
5d3ed2
        default=argparse.SUPPRESS,
5d3ed2
        help=argparse.SUPPRESS,
5d3ed2
    )
7ef706
    r = parser.add_argument_group("required arguments")
5d3ed2
    r.add_argument("--output-files", type=PosixPath, required=True, help=argparse.SUPPRESS)
5d3ed2
    r.add_argument("--output-modules", type=PosixPath, required=True, help=argparse.SUPPRESS)
5d3ed2
    r.add_argument("--buildroot", type=PosixPath, required=True, help=argparse.SUPPRESS)
5d3ed2
    r.add_argument("--sitelib", type=BuildrootPath, required=True, help=argparse.SUPPRESS)
5d3ed2
    r.add_argument("--sitearch", type=BuildrootPath, required=True, help=argparse.SUPPRESS)
5d3ed2
    r.add_argument("--python-version", type=str, required=True, help=argparse.SUPPRESS)
5d3ed2
    r.add_argument("--pyproject-record", type=PosixPath, required=True, help=argparse.SUPPRESS)
5d3ed2
    r.add_argument("--prefix", type=PosixPath, required=True, help=argparse.SUPPRESS)
5d3ed2
    parser.add_argument(
5d3ed2
        "varargs", nargs="+", metavar="MODULE_GLOB",
5d3ed2
        help="Shell-like glob matching top-level module names to save into %%{pyproject_files}",
5d3ed2
    )
7ef706
    return parser
7ef706
7ef706
7ef706
if __name__ == "__main__":
7ef706
    cli_args = argparser().parse_args()
7ef706
    main(cli_args)