Blame SOURCES/pyproject_save_files.py

838e4d
import argparse
838e4d
import fnmatch
838e4d
import json
838e4d
import os
838e4d
838e4d
from collections import defaultdict
98862f
from keyword import iskeyword
838e4d
from pathlib import PosixPath, PurePosixPath
838e4d
from importlib.metadata import Distribution
838e4d
838e4d
838e4d
# From RPM's build/files.c strtokWithQuotes delim argument
838e4d
RPM_FILES_DELIMETERS = ' \n\t'
838e4d
98862f
# RPM hardcodes the lists of manpage extensions and directories,
98862f
# so we have to maintain separate ones :(
98862f
# There is an issue for RPM to provide the lists as macros:
98862f
# https://github.com/rpm-software-management/rpm/issues/1865
98862f
# The original lists can be found here:
98862f
# https://github.com/rpm-software-management/rpm/blob/master/scripts/brp-compress
98862f
MANPAGE_EXTENSIONS = ['gz', 'Z', 'bz2', 'xz', 'lzma', 'zst', 'zstd']
98862f
MANDIRS = [
98862f
    '/man/man*',
98862f
    '/man/*/man*',
98862f
    '/info',
98862f
    '/share/man/man*',
98862f
    '/share/man/*/man*',
98862f
    '/share/info',
98862f
    '/kerberos/man',
98862f
    '/X11R6/man/man*',
98862f
    '/lib/perl5/man/man*',
98862f
    '/share/doc/*/man/man*',
98862f
    '/lib/*/man/man*',
98862f
    '/share/fish/man/man*',
98862f
]
98862f
838e4d
838e4d
class BuildrootPath(PurePosixPath):
838e4d
    """
838e4d
    This path represents a path in a buildroot.
838e4d
    When absolute, it is "relative" to a buildroot.
838e4d
838e4d
    E.g. /usr/lib means %{buildroot}/usr/lib
838e4d
    The object carries no buildroot information.
838e4d
    """
838e4d
838e4d
    @staticmethod
838e4d
    def from_real(realpath, *, root):
838e4d
        """
838e4d
        For a given real disk path, return a BuildrootPath in the given root.
838e4d
838e4d
        For example::
838e4d
838e4d
            >>> BuildrootPath.from_real(PosixPath('/tmp/buildroot/foo'), root=PosixPath('/tmp/buildroot'))
838e4d
            BuildrootPath('/foo')
838e4d
        """
838e4d
        return BuildrootPath("/") / realpath.relative_to(root)
838e4d
838e4d
    def to_real(self, root):
838e4d
        """
838e4d
        Return a real PosixPath in the given root
838e4d
838e4d
        For example::
838e4d
838e4d
            >>> BuildrootPath('/foo').to_real(PosixPath('/tmp/buildroot'))
838e4d
            PosixPath('/tmp/buildroot/foo')
838e4d
        """
838e4d
        return root / self.relative_to("/")
838e4d
838e4d
    def normpath(self):
838e4d
        """
838e4d
        Normalize all the potential /../ parts of the path without touching real files.
838e4d
838e4d
        PurePaths don't have .resolve().
838e4d
        Paths have .resolve() but it touches real files.
838e4d
        This is an alternative. It assumes there are no symbolic links.
838e4d
838e4d
        Example:
838e4d
838e4d
            >>> BuildrootPath('/usr/lib/python/../pypy').normpath()
838e4d
            BuildrootPath('/usr/lib/pypy')
838e4d
        """
838e4d
        return type(self)(os.path.normpath(self))
838e4d
838e4d
838e4d
def pycache_dir(script):
838e4d
    """
838e4d
    For a script BuildrootPath, return a BuildrootPath of its __pycache__ directory.
838e4d
838e4d
    Example:
838e4d
838e4d
        >>> pycache_dir(BuildrootPath('/whatever/bar.py'))
838e4d
        BuildrootPath('/whatever/__pycache__')
838e4d
838e4d
        >>> pycache_dir(BuildrootPath('/opt/python3.10/foo.py'))
838e4d
        BuildrootPath('/opt/python3.10/__pycache__')
838e4d
    """
838e4d
    return script.parent / "__pycache__"
838e4d
838e4d
838e4d
def pycached(script, python_version):
838e4d
    """
838e4d
    For a script BuildrootPath, return a list with that path and its bytecode glob.
838e4d
    Like the %pycached macro.
838e4d
838e4d
    The glob is represented as a BuildrootPath.
838e4d
838e4d
    Examples:
838e4d
838e4d
        >>> pycached(BuildrootPath('/whatever/bar.py'), '3.8')
838e4d
        [BuildrootPath('/whatever/bar.py'), BuildrootPath('/whatever/__pycache__/bar.cpython-38{,.opt-?}.pyc')]
838e4d
838e4d
        >>> pycached(BuildrootPath('/opt/python3.10/foo.py'), '3.10')
838e4d
        [BuildrootPath('/opt/python3.10/foo.py'), BuildrootPath('/opt/python3.10/__pycache__/foo.cpython-310{,.opt-?}.pyc')]
838e4d
    """
838e4d
    assert script.suffix == ".py"
838e4d
    pyver = "".join(python_version.split(".")[:2])
838e4d
    pycname = f"{script.stem}.cpython-{pyver}{{,.opt-?}}.pyc"
838e4d
    pyc = pycache_dir(script) / pycname
838e4d
    return [script, pyc]
838e4d
838e4d
838e4d
def add_file_to_module(paths, module_name, module_type, files_dirs, *files):
838e4d
    """
838e4d
    Helper procedure, adds given files to the module_name of a given module_type
838e4d
    """
838e4d
    for module in paths["modules"][module_name]:
838e4d
        if module["type"] == module_type:
838e4d
            if files[0] not in module[files_dirs]:
838e4d
                module[files_dirs].extend(files)
838e4d
            break
838e4d
    else:
838e4d
        paths["modules"][module_name].append(
838e4d
            {"type": module_type, "files": [], "dirs": [], files_dirs: list(files)}
838e4d
        )
838e4d
838e4d
838e4d
def add_py_file_to_module(paths, module_name, module_type, path, python_version,
838e4d
                          *, include_pycache_dir):
838e4d
    """
838e4d
    Helper procedure, adds given .py file to the module_name of a given module_type
838e4d
    Always also adds the bytecode cache.
838e4d
    If include_pycache_dir is set, also include the __pycache__ directory.
838e4d
    """
838e4d
    add_file_to_module(paths, module_name, module_type, "files", *pycached(path, python_version))
838e4d
    if include_pycache_dir:
838e4d
        add_file_to_module(paths, module_name, module_type, "dirs", pycache_dir(path))
838e4d
838e4d
838e4d
def add_lang_to_module(paths, module_name, path):
838e4d
    """
838e4d
    Helper procedure, divides lang files by language and adds them to the module_name
838e4d
838e4d
    Returns True if the language code detection was successful
838e4d
    """
838e4d
    for i, parent in enumerate(path.parents):
838e4d
        if i > 0 and parent.name == 'locale':
838e4d
            lang_country_code = path.parents[i-1].name
838e4d
            break
838e4d
    else:
838e4d
        return False
838e4d
    # convert potential en_US to plain en
838e4d
    lang_code = lang_country_code.partition('_')[0]
838e4d
    if module_name not in paths["lang"]:
838e4d
        paths["lang"].update({module_name: defaultdict(list)})
838e4d
    paths["lang"][module_name][lang_code].append(path)
838e4d
    return True
838e4d
838e4d
98862f
def prepend_mandirs(prefix):
98862f
    """
98862f
    Return the list of man page directories prepended with the given prefix.
98862f
    """
98862f
    return [str(prefix) + mandir for mandir in MANDIRS]
98862f
98862f
98862f
def normalize_manpage_filename(prefix, path):
98862f
    """
98862f
    If a path is processed by RPM's brp-compress script, strip it of the extension
98862f
    (if the extension matches one of the listed by brp-compress),
98862f
    append '*' to the filename and return it. If not, return the unchanged path.
98862f
    Rationale: https://docs.fedoraproject.org/en-US/packaging-guidelines/#_manpages
98862f
98862f
    Examples:
98862f
98862f
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/share/man/de/man1/linkchecker.1'))
98862f
        BuildrootPath('/usr/share/man/de/man1/linkchecker.1*')
98862f
98862f
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/share/doc/en/man/man1/getmac.1'))
98862f
        BuildrootPath('/usr/share/doc/en/man/man1/getmac.1*')
98862f
98862f
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/share/man/man8/abc.8.zstd'))
98862f
        BuildrootPath('/usr/share/man/man8/abc.8*')
98862f
98862f
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/kerberos/man/dir'))
98862f
        BuildrootPath('/usr/kerberos/man/dir')
98862f
98862f
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/kerberos/man/dir.1'))
98862f
        BuildrootPath('/usr/kerberos/man/dir.1*')
98862f
98862f
        >>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/bin/getmac'))
98862f
        BuildrootPath('/usr/bin/getmac')
98862f
    """
98862f
98862f
    prefixed_mandirs = prepend_mandirs(prefix)
98862f
    for mandir in prefixed_mandirs:
98862f
        # "dir" is explicitly excluded by RPM
98862f
        # https://github.com/rpm-software-management/rpm/blob/rpm-4.17.0-release/scripts/brp-compress#L24
98862f
        if fnmatch.fnmatch(str(path.parent), mandir) and path.name != "dir":
98862f
            # "abc.1.gz2" -> "abc.1*"
98862f
            if path.suffix[1:] in MANPAGE_EXTENSIONS:
98862f
                return BuildrootPath(path.parent / (path.stem + "*"))
98862f
            # "abc.1 -> abc.1*"
98862f
            else:
98862f
                return BuildrootPath(path.parent / (path.name + "*"))
98862f
    else:
98862f
        return path
98862f
98862f
98862f
def is_valid_module_name(s):
98862f
    """Return True if a string is considered a valid module name and False otherwise.
98862f
98862f
    String must be a valid Python name, not a Python keyword and must not
98862f
    start with underscore - we treat those as private.
98862f
    Examples:
98862f
98862f
        >>> is_valid_module_name('module_name')
98862f
        True
98862f
98862f
        >>> is_valid_module_name('12module_name')
98862f
        False
98862f
98862f
        >>> is_valid_module_name('module-name')
98862f
        False
98862f
98862f
        >>> is_valid_module_name('return')
98862f
        False
98862f
98862f
        >>> is_valid_module_name('_module_name')
98862f
        False
98862f
    """
98862f
    if (s.isidentifier() and not iskeyword(s) and not s.startswith("_")):
98862f
        return True
98862f
    return False
98862f
98862f
98862f
def module_names_from_path(path):
98862f
    """Get all importable module names from given path.
98862f
98862f
    Paths containing ".py" and ".so" files are considered importable modules,
98862f
    and so their respective directories (ie. "foo/bar/baz.py": "foo", "foo.bar",
98862f
    "foo.bar.baz").
98862f
    Paths containing invalid Python strings are discarded.
98862f
98862f
    Return set of all valid possibilities.
98862f
    """
98862f
    # Discard all files that are not valid modules
98862f
    if path.suffix not in (".py", ".so"):
98862f
        return set()
98862f
98862f
    parts = list(path.parts)
98862f
98862f
    # Modify the file names according to their suffixes
98862f
    if path.suffix == ".py":
98862f
        parts[-1] = path.stem
98862f
    elif path.suffix == ".so":
98862f
        # .so files can have two suffixes - cut both of them
98862f
        parts[-1] = PosixPath(path.stem).stem
98862f
98862f
    # '__init__' indicates a module but we don't want to import the actual file
98862f
    # It's unclear whether there can be __init__.so files in the Python packages.
98862f
    # The idea to implement this file was raised in 2008 on Python-ideas mailing list
98862f
    # (https://mail.python.org/pipermail/python-ideas/2008-October/002292.html)
98862f
    # and there are a few reports of people compiling their __init__.py to __init__.so.
98862f
    # However it's not officially documented nor forbidden,
98862f
    # so we're checking for the stem after stripping the suffix from the file.
98862f
    if parts[-1] == "__init__":
98862f
        del parts[-1]
98862f
98862f
    # For each part of the path check whether it's valid
98862f
    # If not, discard the whole path - return an empty set
98862f
    for path_part in parts:
98862f
        if not is_valid_module_name(path_part):
98862f
            return set()
98862f
    else:
98862f
        return {'.'.join(parts[:x+1]) for x in range(len(parts))}
98862f
98862f
838e4d
def classify_paths(
98862f
    record_path, parsed_record_content, metadata, sitedirs, python_version, prefix
838e4d
):
838e4d
    """
838e4d
    For each BuildrootPath in parsed_record_content classify it to a dict structure
98862f
    that allows to filter the files for the %files and %check section easier.
838e4d
838e4d
    For the dict structure, look at the beginning of this function's code.
838e4d
838e4d
    Each "module" is a dict with "type" ("package", "script", "extension"), and "files" and "dirs".
838e4d
    """
838e4d
    distinfo = record_path.parent
838e4d
    paths = {
838e4d
        "metadata": {
838e4d
            "files": [],  # regular %file entries with dist-info content
838e4d
            "dirs": [distinfo],  # %dir %file entries with dist-info directory
838e4d
            "docs": [],  # to be used once there is upstream way to recognize READMEs
838e4d
            "licenses": [],  # %license entries parsed from dist-info METADATA file
838e4d
        },
838e4d
        "lang": {}, # %lang entries: [module_name or None][language_code] lists of .mo files
838e4d
        "modules": defaultdict(list),  # each importable module (directory, .py, .so)
98862f
        "module_names": set(),  # qualified names of each importable module ("foo.bar.baz")
838e4d
        "other": {"files": []},  # regular %file entries we could not parse :(
838e4d
    }
838e4d
838e4d
    # In RECORDs generated by pip, there are no directories, only files.
838e4d
    # The example RECORD from PEP 376 does not contain directories either.
838e4d
    # Hence, we'll only assume files, but TODO get it officially documented.
838e4d
    license_files = metadata.get_all('License-File')
838e4d
    for path in parsed_record_content:
838e4d
        if path.suffix == ".pyc":
838e4d
            # we handle bytecode separately
838e4d
            continue
838e4d
838e4d
        if path.parent == distinfo:
838e4d
            if path.name in ("RECORD", "REQUESTED"):
838e4d
                # RECORD and REQUESTED files are removed in %pyproject_install
838e4d
                # See PEP 627
838e4d
                continue
838e4d
            if license_files and path.name in license_files:
838e4d
                paths["metadata"]["licenses"].append(path)
838e4d
            else:
838e4d
                paths["metadata"]["files"].append(path)
838e4d
            continue
838e4d
838e4d
        for sitedir in sitedirs:
838e4d
            if sitedir in path.parents:
98862f
                # Get only the part without sitedir prefix to classify module names
98862f
                relative_path = path.relative_to(sitedir)
98862f
                paths["module_names"].update(module_names_from_path(relative_path))
838e4d
                if path.parent == sitedir:
838e4d
                    if path.suffix == ".so":
838e4d
                        # extension modules can have 2 suffixes
838e4d
                        name = BuildrootPath(path.stem).stem
838e4d
                        add_file_to_module(paths, name, "extension", "files", path)
838e4d
                    elif path.suffix == ".py":
838e4d
                        name = path.stem
838e4d
                        # we add the .pyc files, but not top-level __pycache__
838e4d
                        add_py_file_to_module(
838e4d
                            paths, name, "script", path, python_version,
838e4d
                            include_pycache_dir=False
838e4d
                        )
838e4d
                    else:
838e4d
                        paths["other"]["files"].append(path)
838e4d
                else:
838e4d
                    # this file is inside a dir, we add all dirs upwards until sitedir
838e4d
                    index = path.parents.index(sitedir)
838e4d
                    module_dir = path.parents[index - 1]
838e4d
                    for parent in list(path.parents)[:index]:  # no direct slice until Python 3.10
838e4d
                        add_file_to_module(paths, module_dir.name, "package", "dirs", parent)
838e4d
                    is_lang = False
838e4d
                    if path.suffix == ".mo":
838e4d
                        is_lang = add_lang_to_module(paths, module_dir.name, path)
838e4d
                    if not is_lang:
838e4d
                        if path.suffix == ".py":
838e4d
                            # we add the .pyc files, and their __pycache__
838e4d
                            add_py_file_to_module(
838e4d
                                paths, module_dir.name, "package", path, python_version,
838e4d
                                include_pycache_dir=True
838e4d
                            )
838e4d
                        else:
838e4d
                            add_file_to_module(paths, module_dir.name, "package", "files", path)
838e4d
                break
838e4d
        else:
838e4d
            if path.suffix == ".mo":
838e4d
                add_lang_to_module(paths, None, path) or paths["other"]["files"].append(path)
838e4d
            else:
98862f
                path = normalize_manpage_filename(prefix, path)
838e4d
                paths["other"]["files"].append(path)
838e4d
838e4d
    return paths
838e4d
838e4d
838e4d
def escape_rpm_path(path):
838e4d
    """
838e4d
    Escape special characters in string-paths or BuildrootPaths
838e4d
838e4d
    E.g. a space in path otherwise makes RPM think it's multiple paths,
838e4d
    unless we put it in "quotes".
838e4d
    Or a literal % symbol in path might be expanded as a macro if not escaped.
838e4d
98862f
    Due to limitations in RPM,
98862f
    some paths with spaces and other special characters are not supported.
838e4d
838e4d
    Examples:
838e4d
838e4d
        >>> escape_rpm_path(BuildrootPath('/usr/lib/python3.9/site-packages/setuptools'))
838e4d
        '/usr/lib/python3.9/site-packages/setuptools'
838e4d
838e4d
        >>> escape_rpm_path('/usr/lib/python3.9/site-packages/setuptools/script (dev).tmpl')
838e4d
        '"/usr/lib/python3.9/site-packages/setuptools/script (dev).tmpl"'
838e4d
838e4d
        >>> escape_rpm_path('/usr/share/data/100%valid.path')
838e4d
        '/usr/share/data/100%%%%%%%%valid.path'
838e4d
838e4d
        >>> escape_rpm_path('/usr/share/data/100 % valid.path')
838e4d
        '"/usr/share/data/100 %%%%%%%% valid.path"'
838e4d
838e4d
        >>> escape_rpm_path('/usr/share/data/1000 %% valid.path')
838e4d
        '"/usr/share/data/1000 %%%%%%%%%%%%%%%% valid.path"'
838e4d
838e4d
        >>> escape_rpm_path('/usr/share/data/spaces and "quotes"')
838e4d
        Traceback (most recent call last):
838e4d
          ...
838e4d
        NotImplementedError: ...
98862f
98862f
        >>> escape_rpm_path('/usr/share/data/spaces and [square brackets]')
98862f
        Traceback (most recent call last):
98862f
          ...
98862f
        NotImplementedError: ...
838e4d
    """
838e4d
    orig_path = path = str(path)
838e4d
    if "%" in path:
838e4d
        # Escaping by 8 %s has been verified in RPM 4.16 and 4.17, but probably not stable
838e4d
        # See this thread http://lists.rpm.org/pipermail/rpm-list/2021-June/002048.html
838e4d
        # On the CI, we build tests/escape_percentages.spec to verify this assumption
838e4d
        path = path.replace("%", "%" * 8)
838e4d
    if any(symbol in path for symbol in RPM_FILES_DELIMETERS):
838e4d
        if '"' in path:
838e4d
            # As far as we know, RPM cannot list such file individually
838e4d
            # See this thread http://lists.rpm.org/pipermail/rpm-list/2021-June/002048.html
838e4d
            raise NotImplementedError(f'" symbol in path with spaces is not supported by %pyproject_save_files: {orig_path!r}')
98862f
        if "[" in path or "]" in path:
98862f
            # See https://bugzilla.redhat.com/show_bug.cgi?id=1990879
98862f
            # and https://github.com/rpm-software-management/rpm/issues/1749
98862f
            raise NotImplementedError(f'[ or ] symbol in path with spaces is not supported by %pyproject_save_files: {orig_path!r}')
838e4d
        return f'"{path}"'
838e4d
    return path
838e4d
838e4d
838e4d
def generate_file_list(paths_dict, module_globs, include_others=False):
838e4d
    """
838e4d
    This function takes the classified paths_dict and turns it into lines
838e4d
    for the %files section. Returns list with text lines, no Path objects.
838e4d
838e4d
    Only includes files from modules that match module_globs, metadata and
838e4d
    optionaly all other files.
838e4d
838e4d
    It asserts that all globs match at least one module, raises ValueError otherwise.
838e4d
    Multiple globs matching identical module(s) are OK.
838e4d
    """
838e4d
    files = set()
838e4d
838e4d
    if include_others:
838e4d
        files.update(f"{escape_rpm_path(p)}" for p in paths_dict["other"]["files"])
838e4d
        try:
838e4d
            for lang_code in paths_dict["lang"][None]:
838e4d
                files.update(f"%lang({lang_code}) {escape_rpm_path(p)}" for p in paths_dict["lang"][None][lang_code])
838e4d
        except KeyError:
838e4d
            pass
838e4d
838e4d
    files.update(f"{escape_rpm_path(p)}" for p in paths_dict["metadata"]["files"])
838e4d
    for macro in "dir", "doc", "license":
838e4d
        files.update(f"%{macro} {escape_rpm_path(p)}" for p in paths_dict["metadata"][f"{macro}s"])
838e4d
838e4d
    modules = paths_dict["modules"]
838e4d
    done_modules = set()
838e4d
    done_globs = set()
838e4d
838e4d
    for glob in module_globs:
838e4d
        for name in modules:
838e4d
            if fnmatch.fnmatchcase(name, glob):
838e4d
                if name not in done_modules:
838e4d
                    try:
838e4d
                        for lang_code in paths_dict["lang"][name]:
838e4d
                            files.update(f"%lang({lang_code}) {escape_rpm_path(p)}" for p in paths_dict["lang"][name][lang_code])
838e4d
                    except KeyError:
838e4d
                        pass
838e4d
                    for module in modules[name]:
838e4d
                        files.update(f"%dir {escape_rpm_path(p)}" for p in module["dirs"])
838e4d
                        files.update(f"{escape_rpm_path(p)}" for p in module["files"])
838e4d
                    done_modules.add(name)
838e4d
                done_globs.add(glob)
838e4d
838e4d
    missed = module_globs - done_globs
838e4d
    if missed:
838e4d
        missed_text = ", ".join(sorted(missed))
838e4d
        raise ValueError(f"Globs did not match any module: {missed_text}")
838e4d
838e4d
    return sorted(files)
838e4d
838e4d
838e4d
def parse_varargs(varargs):
838e4d
    """
838e4d
    Parse varargs from the %pyproject_save_files macro
838e4d
838e4d
    Arguments starting with + are treated as a flags, everything else is a glob
838e4d
838e4d
    Returns as set of globs, boolean flag whether to include all the other files
838e4d
838e4d
    Raises ValueError for unknown flags and globs with dots (namespace packages).
838e4d
838e4d
    Good examples:
838e4d
838e4d
        >>> parse_varargs(['*'])
838e4d
        ({'*'}, False)
838e4d
838e4d
        >>> mods, auto = parse_varargs(['requests*', 'kerberos', '+auto'])
838e4d
        >>> auto
838e4d
        True
838e4d
        >>> sorted(mods)
838e4d
        ['kerberos', 'requests*']
838e4d
838e4d
        >>> mods, auto = parse_varargs(['tldr', 'tensorf*'])
838e4d
        >>> auto
838e4d
        False
838e4d
        >>> sorted(mods)
838e4d
        ['tensorf*', 'tldr']
838e4d
838e4d
        >>> parse_varargs(['+auto'])
838e4d
        (set(), True)
838e4d
838e4d
    Bad examples:
838e4d
838e4d
        >>> parse_varargs(['+kinkdir'])
838e4d
        Traceback (most recent call last):
838e4d
          ...
838e4d
        ValueError: Invalid argument: +kinkdir
838e4d
838e4d
        >>> parse_varargs(['good', '+bad', '*ugly*'])
838e4d
        Traceback (most recent call last):
838e4d
          ...
838e4d
        ValueError: Invalid argument: +bad
838e4d
838e4d
        >>> parse_varargs(['+bad', 'my.bad'])
838e4d
        Traceback (most recent call last):
838e4d
          ...
838e4d
        ValueError: Invalid argument: +bad
838e4d
838e4d
        >>> parse_varargs(['mod', 'mod.*'])
838e4d
        Traceback (most recent call last):
838e4d
          ...
98862f
        ValueError: Attempted to use a namespaced package with . in the glob: mod.*. ...
838e4d
838e4d
        >>> parse_varargs(['my.bad', '+bad'])
838e4d
        Traceback (most recent call last):
838e4d
          ...
98862f
        ValueError: Attempted to use a namespaced package with . in the glob: my.bad. ...
98862f
98862f
        >>> parse_varargs(['mod/submod'])
98862f
        Traceback (most recent call last):
98862f
          ...
98862f
        ValueError: Attempted to use a namespaced package with / in the glob: mod/submod. ...
838e4d
    """
838e4d
    include_auto = False
838e4d
    globs = set()
98862f
    namespace_error_template = (
98862f
        "Attempted to use a namespaced package with {symbol} in the glob: {arg}. "
98862f
        "That is not (yet) supported. Use {top} instead and see "
98862f
        "https://bugzilla.redhat.com/1935266 for details."
98862f
    )
838e4d
    for arg in varargs:
838e4d
        if arg.startswith("+"):
838e4d
            if arg == "+auto":
838e4d
                include_auto = True
838e4d
            else:
838e4d
                raise ValueError(f"Invalid argument: {arg}")
838e4d
        elif "." in arg:
838e4d
            top, *_ = arg.partition(".")
98862f
            raise ValueError(namespace_error_template.format(symbol=".", arg=arg, top=top))
98862f
        elif "/" in arg:
98862f
            top, *_ = arg.partition("/")
98862f
            raise ValueError(namespace_error_template.format(symbol="/", arg=arg, top=top))
838e4d
        else:
838e4d
            globs.add(arg)
838e4d
838e4d
    return globs, include_auto
838e4d
838e4d
838e4d
def load_parsed_record(pyproject_record):
838e4d
    parsed_record = {}
838e4d
    with open(pyproject_record) as pyproject_record_file:
838e4d
        content = json.load(pyproject_record_file)
838e4d
838e4d
    if len(content) > 1:
838e4d
        raise FileExistsError("%pyproject install has found more than one *.dist-info/RECORD file. "
838e4d
                              "Currently, %pyproject_save_files supports only one wheel → one file list mapping. "
838e4d
                              "Feel free to open a bugzilla for pyproject-rpm-macros and describe your usecase.")
838e4d
838e4d
    # Redefine strings stored in JSON to BuildRootPaths
838e4d
    for record_path, files in content.items():
838e4d
        parsed_record[BuildrootPath(record_path)] = [BuildrootPath(f) for f in files]
838e4d
838e4d
    return parsed_record
838e4d
838e4d
838e4d
def dist_metadata(buildroot, record_path):
838e4d
    """
838e4d
    Returns distribution metadata (email.message.EmailMessage), possibly empty
838e4d
    """
838e4d
    real_dist_path = record_path.parent.to_real(buildroot)
838e4d
    dist = Distribution.at(real_dist_path)
838e4d
    return dist.metadata
838e4d
98862f
98862f
def pyproject_save_files_and_modules(buildroot, sitelib, sitearch, python_version, pyproject_record, prefix, varargs):
838e4d
    """
838e4d
    Takes arguments from the %{pyproject_save_files} macro
838e4d
98862f
    Returns tuple: list of paths for the %files section and list of module names
98862f
    for the %check section
838e4d
    """
838e4d
    # On 32 bit architectures, sitelib equals to sitearch
838e4d
    # This saves us browsing one directory twice
838e4d
    sitedirs = sorted({sitelib, sitearch})
838e4d
838e4d
    globs, include_auto = parse_varargs(varargs)
838e4d
    parsed_records = load_parsed_record(pyproject_record)
838e4d
838e4d
    final_file_list = []
98862f
    all_module_names = set()
838e4d
838e4d
    for record_path, files in parsed_records.items():
838e4d
        metadata = dist_metadata(buildroot, record_path)
838e4d
        paths_dict = classify_paths(
98862f
            record_path, files, metadata, sitedirs, python_version, prefix
838e4d
        )
838e4d
838e4d
        final_file_list.extend(
838e4d
            generate_file_list(paths_dict, globs, include_auto)
838e4d
        )
98862f
        all_module_names.update(paths_dict["module_names"])
98862f
98862f
    # Sort values, so they are always checked in the same order
98862f
    all_module_names = sorted(all_module_names)
838e4d
98862f
    return final_file_list, all_module_names
838e4d
838e4d
838e4d
def main(cli_args):
98862f
    file_section, module_names = pyproject_save_files_and_modules(
838e4d
        cli_args.buildroot,
838e4d
        cli_args.sitelib,
838e4d
        cli_args.sitearch,
838e4d
        cli_args.python_version,
838e4d
        cli_args.pyproject_record,
98862f
        cli_args.prefix,
838e4d
        cli_args.varargs,
838e4d
    )
838e4d
98862f
    cli_args.output_files.write_text("\n".join(file_section) + "\n", encoding="utf-8")
98862f
    cli_args.output_modules.write_text("\n".join(module_names) + "\n", encoding="utf-8")
838e4d
838e4d
838e4d
def argparser():
838e4d
    parser = argparse.ArgumentParser()
838e4d
    r = parser.add_argument_group("required arguments")
98862f
    r.add_argument("--output-files", type=PosixPath, required=True)
98862f
    r.add_argument("--output-modules", type=PosixPath, required=True)
838e4d
    r.add_argument("--buildroot", type=PosixPath, required=True)
838e4d
    r.add_argument("--sitelib", type=BuildrootPath, required=True)
838e4d
    r.add_argument("--sitearch", type=BuildrootPath, required=True)
838e4d
    r.add_argument("--python-version", type=str, required=True)
838e4d
    r.add_argument("--pyproject-record", type=PosixPath, required=True)
98862f
    r.add_argument("--prefix", type=PosixPath, required=True)
838e4d
    parser.add_argument("varargs", nargs="+")
838e4d
    return parser
838e4d
838e4d
838e4d
if __name__ == "__main__":
838e4d
    cli_args = argparser().parse_args()
838e4d
    main(cli_args)