|
|
838e4d |
import argparse
|
|
|
838e4d |
import fnmatch
|
|
|
838e4d |
import json
|
|
|
838e4d |
import os
|
|
|
838e4d |
|
|
|
838e4d |
from collections import defaultdict
|
|
|
98862f |
from keyword import iskeyword
|
|
|
838e4d |
from pathlib import PosixPath, PurePosixPath
|
|
|
838e4d |
from importlib.metadata import Distribution
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
# From RPM's build/files.c strtokWithQuotes delim argument
|
|
|
838e4d |
RPM_FILES_DELIMETERS = ' \n\t'
|
|
|
838e4d |
|
|
|
98862f |
# RPM hardcodes the lists of manpage extensions and directories,
|
|
|
98862f |
# so we have to maintain separate ones :(
|
|
|
98862f |
# There is an issue for RPM to provide the lists as macros:
|
|
|
98862f |
# https://github.com/rpm-software-management/rpm/issues/1865
|
|
|
98862f |
# The original lists can be found here:
|
|
|
98862f |
# https://github.com/rpm-software-management/rpm/blob/master/scripts/brp-compress
|
|
|
98862f |
MANPAGE_EXTENSIONS = ['gz', 'Z', 'bz2', 'xz', 'lzma', 'zst', 'zstd']
|
|
|
98862f |
MANDIRS = [
|
|
|
98862f |
'/man/man*',
|
|
|
98862f |
'/man/*/man*',
|
|
|
98862f |
'/info',
|
|
|
98862f |
'/share/man/man*',
|
|
|
98862f |
'/share/man/*/man*',
|
|
|
98862f |
'/share/info',
|
|
|
98862f |
'/kerberos/man',
|
|
|
98862f |
'/X11R6/man/man*',
|
|
|
98862f |
'/lib/perl5/man/man*',
|
|
|
98862f |
'/share/doc/*/man/man*',
|
|
|
98862f |
'/lib/*/man/man*',
|
|
|
98862f |
'/share/fish/man/man*',
|
|
|
98862f |
]
|
|
|
98862f |
|
|
|
838e4d |
|
|
|
838e4d |
class BuildrootPath(PurePosixPath):
|
|
|
838e4d |
"""
|
|
|
838e4d |
This path represents a path in a buildroot.
|
|
|
838e4d |
When absolute, it is "relative" to a buildroot.
|
|
|
838e4d |
|
|
|
838e4d |
E.g. /usr/lib means %{buildroot}/usr/lib
|
|
|
838e4d |
The object carries no buildroot information.
|
|
|
838e4d |
"""
|
|
|
838e4d |
|
|
|
838e4d |
@staticmethod
|
|
|
838e4d |
def from_real(realpath, *, root):
|
|
|
838e4d |
"""
|
|
|
838e4d |
For a given real disk path, return a BuildrootPath in the given root.
|
|
|
838e4d |
|
|
|
838e4d |
For example::
|
|
|
838e4d |
|
|
|
838e4d |
>>> BuildrootPath.from_real(PosixPath('/tmp/buildroot/foo'), root=PosixPath('/tmp/buildroot'))
|
|
|
838e4d |
BuildrootPath('/foo')
|
|
|
838e4d |
"""
|
|
|
838e4d |
return BuildrootPath("/") / realpath.relative_to(root)
|
|
|
838e4d |
|
|
|
838e4d |
def to_real(self, root):
|
|
|
838e4d |
"""
|
|
|
838e4d |
Return a real PosixPath in the given root
|
|
|
838e4d |
|
|
|
838e4d |
For example::
|
|
|
838e4d |
|
|
|
838e4d |
>>> BuildrootPath('/foo').to_real(PosixPath('/tmp/buildroot'))
|
|
|
838e4d |
PosixPath('/tmp/buildroot/foo')
|
|
|
838e4d |
"""
|
|
|
838e4d |
return root / self.relative_to("/")
|
|
|
838e4d |
|
|
|
838e4d |
def normpath(self):
|
|
|
838e4d |
"""
|
|
|
838e4d |
Normalize all the potential /../ parts of the path without touching real files.
|
|
|
838e4d |
|
|
|
838e4d |
PurePaths don't have .resolve().
|
|
|
838e4d |
Paths have .resolve() but it touches real files.
|
|
|
838e4d |
This is an alternative. It assumes there are no symbolic links.
|
|
|
838e4d |
|
|
|
838e4d |
Example:
|
|
|
838e4d |
|
|
|
838e4d |
>>> BuildrootPath('/usr/lib/python/../pypy').normpath()
|
|
|
838e4d |
BuildrootPath('/usr/lib/pypy')
|
|
|
838e4d |
"""
|
|
|
838e4d |
return type(self)(os.path.normpath(self))
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def pycache_dir(script):
|
|
|
838e4d |
"""
|
|
|
838e4d |
For a script BuildrootPath, return a BuildrootPath of its __pycache__ directory.
|
|
|
838e4d |
|
|
|
838e4d |
Example:
|
|
|
838e4d |
|
|
|
838e4d |
>>> pycache_dir(BuildrootPath('/whatever/bar.py'))
|
|
|
838e4d |
BuildrootPath('/whatever/__pycache__')
|
|
|
838e4d |
|
|
|
838e4d |
>>> pycache_dir(BuildrootPath('/opt/python3.10/foo.py'))
|
|
|
838e4d |
BuildrootPath('/opt/python3.10/__pycache__')
|
|
|
838e4d |
"""
|
|
|
838e4d |
return script.parent / "__pycache__"
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def pycached(script, python_version):
|
|
|
838e4d |
"""
|
|
|
838e4d |
For a script BuildrootPath, return a list with that path and its bytecode glob.
|
|
|
838e4d |
Like the %pycached macro.
|
|
|
838e4d |
|
|
|
838e4d |
The glob is represented as a BuildrootPath.
|
|
|
838e4d |
|
|
|
838e4d |
Examples:
|
|
|
838e4d |
|
|
|
838e4d |
>>> pycached(BuildrootPath('/whatever/bar.py'), '3.8')
|
|
|
838e4d |
[BuildrootPath('/whatever/bar.py'), BuildrootPath('/whatever/__pycache__/bar.cpython-38{,.opt-?}.pyc')]
|
|
|
838e4d |
|
|
|
838e4d |
>>> pycached(BuildrootPath('/opt/python3.10/foo.py'), '3.10')
|
|
|
838e4d |
[BuildrootPath('/opt/python3.10/foo.py'), BuildrootPath('/opt/python3.10/__pycache__/foo.cpython-310{,.opt-?}.pyc')]
|
|
|
838e4d |
"""
|
|
|
838e4d |
assert script.suffix == ".py"
|
|
|
838e4d |
pyver = "".join(python_version.split(".")[:2])
|
|
|
838e4d |
pycname = f"{script.stem}.cpython-{pyver}{{,.opt-?}}.pyc"
|
|
|
838e4d |
pyc = pycache_dir(script) / pycname
|
|
|
838e4d |
return [script, pyc]
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def add_file_to_module(paths, module_name, module_type, files_dirs, *files):
|
|
|
838e4d |
"""
|
|
|
838e4d |
Helper procedure, adds given files to the module_name of a given module_type
|
|
|
838e4d |
"""
|
|
|
838e4d |
for module in paths["modules"][module_name]:
|
|
|
838e4d |
if module["type"] == module_type:
|
|
|
838e4d |
if files[0] not in module[files_dirs]:
|
|
|
838e4d |
module[files_dirs].extend(files)
|
|
|
838e4d |
break
|
|
|
838e4d |
else:
|
|
|
838e4d |
paths["modules"][module_name].append(
|
|
|
838e4d |
{"type": module_type, "files": [], "dirs": [], files_dirs: list(files)}
|
|
|
838e4d |
)
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def add_py_file_to_module(paths, module_name, module_type, path, python_version,
|
|
|
838e4d |
*, include_pycache_dir):
|
|
|
838e4d |
"""
|
|
|
838e4d |
Helper procedure, adds given .py file to the module_name of a given module_type
|
|
|
838e4d |
Always also adds the bytecode cache.
|
|
|
838e4d |
If include_pycache_dir is set, also include the __pycache__ directory.
|
|
|
838e4d |
"""
|
|
|
838e4d |
add_file_to_module(paths, module_name, module_type, "files", *pycached(path, python_version))
|
|
|
838e4d |
if include_pycache_dir:
|
|
|
838e4d |
add_file_to_module(paths, module_name, module_type, "dirs", pycache_dir(path))
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def add_lang_to_module(paths, module_name, path):
|
|
|
838e4d |
"""
|
|
|
838e4d |
Helper procedure, divides lang files by language and adds them to the module_name
|
|
|
838e4d |
|
|
|
838e4d |
Returns True if the language code detection was successful
|
|
|
838e4d |
"""
|
|
|
838e4d |
for i, parent in enumerate(path.parents):
|
|
|
b6f61c |
if parent.name == 'LC_MESSAGES':
|
|
|
b6f61c |
lang_country_code = path.parents[i+1].name
|
|
|
838e4d |
break
|
|
|
838e4d |
else:
|
|
|
838e4d |
return False
|
|
|
838e4d |
# convert potential en_US to plain en
|
|
|
838e4d |
lang_code = lang_country_code.partition('_')[0]
|
|
|
838e4d |
if module_name not in paths["lang"]:
|
|
|
838e4d |
paths["lang"].update({module_name: defaultdict(list)})
|
|
|
838e4d |
paths["lang"][module_name][lang_code].append(path)
|
|
|
838e4d |
return True
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
98862f |
def prepend_mandirs(prefix):
|
|
|
98862f |
"""
|
|
|
98862f |
Return the list of man page directories prepended with the given prefix.
|
|
|
98862f |
"""
|
|
|
98862f |
return [str(prefix) + mandir for mandir in MANDIRS]
|
|
|
98862f |
|
|
|
98862f |
|
|
|
98862f |
def normalize_manpage_filename(prefix, path):
|
|
|
98862f |
"""
|
|
|
98862f |
If a path is processed by RPM's brp-compress script, strip it of the extension
|
|
|
98862f |
(if the extension matches one of the listed by brp-compress),
|
|
|
98862f |
append '*' to the filename and return it. If not, return the unchanged path.
|
|
|
98862f |
Rationale: https://docs.fedoraproject.org/en-US/packaging-guidelines/#_manpages
|
|
|
98862f |
|
|
|
98862f |
Examples:
|
|
|
98862f |
|
|
|
98862f |
>>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/share/man/de/man1/linkchecker.1'))
|
|
|
98862f |
BuildrootPath('/usr/share/man/de/man1/linkchecker.1*')
|
|
|
98862f |
|
|
|
98862f |
>>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/share/doc/en/man/man1/getmac.1'))
|
|
|
98862f |
BuildrootPath('/usr/share/doc/en/man/man1/getmac.1*')
|
|
|
98862f |
|
|
|
98862f |
>>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/share/man/man8/abc.8.zstd'))
|
|
|
98862f |
BuildrootPath('/usr/share/man/man8/abc.8*')
|
|
|
98862f |
|
|
|
98862f |
>>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/kerberos/man/dir'))
|
|
|
98862f |
BuildrootPath('/usr/kerberos/man/dir')
|
|
|
98862f |
|
|
|
98862f |
>>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/kerberos/man/dir.1'))
|
|
|
98862f |
BuildrootPath('/usr/kerberos/man/dir.1*')
|
|
|
98862f |
|
|
|
98862f |
>>> normalize_manpage_filename(PosixPath('/usr'), BuildrootPath('/usr/bin/getmac'))
|
|
|
98862f |
BuildrootPath('/usr/bin/getmac')
|
|
|
98862f |
"""
|
|
|
98862f |
|
|
|
98862f |
prefixed_mandirs = prepend_mandirs(prefix)
|
|
|
98862f |
for mandir in prefixed_mandirs:
|
|
|
98862f |
# "dir" is explicitly excluded by RPM
|
|
|
98862f |
# https://github.com/rpm-software-management/rpm/blob/rpm-4.17.0-release/scripts/brp-compress#L24
|
|
|
98862f |
if fnmatch.fnmatch(str(path.parent), mandir) and path.name != "dir":
|
|
|
98862f |
# "abc.1.gz2" -> "abc.1*"
|
|
|
98862f |
if path.suffix[1:] in MANPAGE_EXTENSIONS:
|
|
|
98862f |
return BuildrootPath(path.parent / (path.stem + "*"))
|
|
|
98862f |
# "abc.1 -> abc.1*"
|
|
|
98862f |
else:
|
|
|
98862f |
return BuildrootPath(path.parent / (path.name + "*"))
|
|
|
98862f |
else:
|
|
|
98862f |
return path
|
|
|
98862f |
|
|
|
98862f |
|
|
|
98862f |
def is_valid_module_name(s):
|
|
|
98862f |
"""Return True if a string is considered a valid module name and False otherwise.
|
|
|
98862f |
|
|
|
98862f |
String must be a valid Python name, not a Python keyword and must not
|
|
|
98862f |
start with underscore - we treat those as private.
|
|
|
98862f |
Examples:
|
|
|
98862f |
|
|
|
98862f |
>>> is_valid_module_name('module_name')
|
|
|
98862f |
True
|
|
|
98862f |
|
|
|
98862f |
>>> is_valid_module_name('12module_name')
|
|
|
98862f |
False
|
|
|
98862f |
|
|
|
98862f |
>>> is_valid_module_name('module-name')
|
|
|
98862f |
False
|
|
|
98862f |
|
|
|
98862f |
>>> is_valid_module_name('return')
|
|
|
98862f |
False
|
|
|
98862f |
|
|
|
98862f |
>>> is_valid_module_name('_module_name')
|
|
|
98862f |
False
|
|
|
98862f |
"""
|
|
|
98862f |
if (s.isidentifier() and not iskeyword(s) and not s.startswith("_")):
|
|
|
98862f |
return True
|
|
|
98862f |
return False
|
|
|
98862f |
|
|
|
98862f |
|
|
|
98862f |
def module_names_from_path(path):
|
|
|
98862f |
"""Get all importable module names from given path.
|
|
|
98862f |
|
|
|
98862f |
Paths containing ".py" and ".so" files are considered importable modules,
|
|
|
98862f |
and so their respective directories (ie. "foo/bar/baz.py": "foo", "foo.bar",
|
|
|
98862f |
"foo.bar.baz").
|
|
|
98862f |
Paths containing invalid Python strings are discarded.
|
|
|
98862f |
|
|
|
98862f |
Return set of all valid possibilities.
|
|
|
98862f |
"""
|
|
|
98862f |
# Discard all files that are not valid modules
|
|
|
98862f |
if path.suffix not in (".py", ".so"):
|
|
|
98862f |
return set()
|
|
|
98862f |
|
|
|
98862f |
parts = list(path.parts)
|
|
|
98862f |
|
|
|
98862f |
# Modify the file names according to their suffixes
|
|
|
98862f |
if path.suffix == ".py":
|
|
|
98862f |
parts[-1] = path.stem
|
|
|
98862f |
elif path.suffix == ".so":
|
|
|
98862f |
# .so files can have two suffixes - cut both of them
|
|
|
98862f |
parts[-1] = PosixPath(path.stem).stem
|
|
|
98862f |
|
|
|
98862f |
# '__init__' indicates a module but we don't want to import the actual file
|
|
|
98862f |
# It's unclear whether there can be __init__.so files in the Python packages.
|
|
|
98862f |
# The idea to implement this file was raised in 2008 on Python-ideas mailing list
|
|
|
98862f |
# (https://mail.python.org/pipermail/python-ideas/2008-October/002292.html)
|
|
|
98862f |
# and there are a few reports of people compiling their __init__.py to __init__.so.
|
|
|
98862f |
# However it's not officially documented nor forbidden,
|
|
|
98862f |
# so we're checking for the stem after stripping the suffix from the file.
|
|
|
98862f |
if parts[-1] == "__init__":
|
|
|
98862f |
del parts[-1]
|
|
|
98862f |
|
|
|
98862f |
# For each part of the path check whether it's valid
|
|
|
98862f |
# If not, discard the whole path - return an empty set
|
|
|
98862f |
for path_part in parts:
|
|
|
98862f |
if not is_valid_module_name(path_part):
|
|
|
98862f |
return set()
|
|
|
98862f |
else:
|
|
|
98862f |
return {'.'.join(parts[:x+1]) for x in range(len(parts))}
|
|
|
98862f |
|
|
|
98862f |
|
|
|
b6f61c |
def is_license_file(path, license_files, license_directories):
|
|
|
b6f61c |
"""
|
|
|
b6f61c |
Check if the given BuildrootPath path matches any of the "License-File" entries.
|
|
|
b6f61c |
The path is considered matched when resolved from any of the license_directories
|
|
|
b6f61c |
matches string-wise what is stored in any "License-File" entry (license_files).
|
|
|
b6f61c |
|
|
|
b6f61c |
Examples:
|
|
|
b6f61c |
>>> site_packages = BuildrootPath('/usr/lib/python3.12/site-packages')
|
|
|
b6f61c |
>>> distinfo = site_packages / 'foo-1.0.dist-info'
|
|
|
b6f61c |
>>> license_directories = [distinfo / 'licenses', distinfo]
|
|
|
b6f61c |
>>> license_files = ['LICENSE.txt', 'AUTHORS.md']
|
|
|
b6f61c |
>>> is_license_file(distinfo / 'AUTHORS.md', license_files, license_directories)
|
|
|
b6f61c |
True
|
|
|
b6f61c |
>>> is_license_file(distinfo / 'licenses/LICENSE.txt', license_files, license_directories)
|
|
|
b6f61c |
True
|
|
|
b6f61c |
>>> # we don't match based on directory only
|
|
|
b6f61c |
>>> is_license_file(distinfo / 'licenses/COPYING', license_files, license_directories)
|
|
|
b6f61c |
False
|
|
|
b6f61c |
>>> is_license_file(site_packages / 'foo/LICENSE.txt', license_files, license_directories)
|
|
|
b6f61c |
False
|
|
|
b6f61c |
"""
|
|
|
b6f61c |
if not license_files or not license_directories:
|
|
|
b6f61c |
return False
|
|
|
b6f61c |
for license_dir in license_directories:
|
|
|
b6f61c |
if (path.is_relative_to(license_dir) and
|
|
|
b6f61c |
str(path.relative_to(license_dir)) in license_files):
|
|
|
b6f61c |
return True
|
|
|
b6f61c |
return False
|
|
|
b6f61c |
|
|
|
b6f61c |
|
|
|
838e4d |
def classify_paths(
|
|
|
98862f |
record_path, parsed_record_content, metadata, sitedirs, python_version, prefix
|
|
|
838e4d |
):
|
|
|
838e4d |
"""
|
|
|
838e4d |
For each BuildrootPath in parsed_record_content classify it to a dict structure
|
|
|
98862f |
that allows to filter the files for the %files and %check section easier.
|
|
|
838e4d |
|
|
|
838e4d |
For the dict structure, look at the beginning of this function's code.
|
|
|
838e4d |
|
|
|
838e4d |
Each "module" is a dict with "type" ("package", "script", "extension"), and "files" and "dirs".
|
|
|
838e4d |
"""
|
|
|
838e4d |
distinfo = record_path.parent
|
|
|
838e4d |
paths = {
|
|
|
838e4d |
"metadata": {
|
|
|
838e4d |
"files": [], # regular %file entries with dist-info content
|
|
|
838e4d |
"dirs": [distinfo], # %dir %file entries with dist-info directory
|
|
|
838e4d |
"docs": [], # to be used once there is upstream way to recognize READMEs
|
|
|
838e4d |
"licenses": [], # %license entries parsed from dist-info METADATA file
|
|
|
838e4d |
},
|
|
|
838e4d |
"lang": {}, # %lang entries: [module_name or None][language_code] lists of .mo files
|
|
|
838e4d |
"modules": defaultdict(list), # each importable module (directory, .py, .so)
|
|
|
98862f |
"module_names": set(), # qualified names of each importable module ("foo.bar.baz")
|
|
|
838e4d |
"other": {"files": []}, # regular %file entries we could not parse :(
|
|
|
838e4d |
}
|
|
|
838e4d |
|
|
|
b6f61c |
license_files = metadata.get_all('License-File')
|
|
|
b6f61c |
license_directory = distinfo / 'licenses' # See PEP 369 "Root License Directory"
|
|
|
b6f61c |
# setuptools was the first known build backend to implement License-File.
|
|
|
b6f61c |
# Unfortunately they don't put licenses to the license directory (yet):
|
|
|
b6f61c |
# https://github.com/pypa/setuptools/issues/3596
|
|
|
b6f61c |
# Hence, we check licenses in both licenses and dist-info
|
|
|
b6f61c |
license_directories = (license_directory, distinfo)
|
|
|
b6f61c |
|
|
|
838e4d |
# In RECORDs generated by pip, there are no directories, only files.
|
|
|
838e4d |
# The example RECORD from PEP 376 does not contain directories either.
|
|
|
838e4d |
# Hence, we'll only assume files, but TODO get it officially documented.
|
|
|
838e4d |
for path in parsed_record_content:
|
|
|
838e4d |
if path.suffix == ".pyc":
|
|
|
838e4d |
# we handle bytecode separately
|
|
|
838e4d |
continue
|
|
|
838e4d |
|
|
|
557ab5 |
if distinfo in path.parents:
|
|
|
557ab5 |
if path.parent == distinfo and path.name in ("RECORD", "REQUESTED"):
|
|
|
838e4d |
# RECORD and REQUESTED files are removed in %pyproject_install
|
|
|
838e4d |
# See PEP 627
|
|
|
838e4d |
continue
|
|
|
b6f61c |
if is_license_file(path, license_files, license_directories):
|
|
|
838e4d |
paths["metadata"]["licenses"].append(path)
|
|
|
838e4d |
else:
|
|
|
838e4d |
paths["metadata"]["files"].append(path)
|
|
|
557ab5 |
# nested directories within distinfo
|
|
|
557ab5 |
index = path.parents.index(distinfo)
|
|
|
557ab5 |
for parent in list(path.parents)[:index]: # no direct slice until Python 3.10
|
|
|
557ab5 |
if parent not in paths["metadata"]["dirs"]:
|
|
|
557ab5 |
paths["metadata"]["dirs"].append(parent)
|
|
|
838e4d |
continue
|
|
|
838e4d |
|
|
|
838e4d |
for sitedir in sitedirs:
|
|
|
838e4d |
if sitedir in path.parents:
|
|
|
98862f |
# Get only the part without sitedir prefix to classify module names
|
|
|
98862f |
relative_path = path.relative_to(sitedir)
|
|
|
98862f |
paths["module_names"].update(module_names_from_path(relative_path))
|
|
|
838e4d |
if path.parent == sitedir:
|
|
|
838e4d |
if path.suffix == ".so":
|
|
|
838e4d |
# extension modules can have 2 suffixes
|
|
|
838e4d |
name = BuildrootPath(path.stem).stem
|
|
|
838e4d |
add_file_to_module(paths, name, "extension", "files", path)
|
|
|
838e4d |
elif path.suffix == ".py":
|
|
|
838e4d |
name = path.stem
|
|
|
838e4d |
# we add the .pyc files, but not top-level __pycache__
|
|
|
838e4d |
add_py_file_to_module(
|
|
|
838e4d |
paths, name, "script", path, python_version,
|
|
|
838e4d |
include_pycache_dir=False
|
|
|
838e4d |
)
|
|
|
838e4d |
else:
|
|
|
838e4d |
paths["other"]["files"].append(path)
|
|
|
838e4d |
else:
|
|
|
838e4d |
# this file is inside a dir, we add all dirs upwards until sitedir
|
|
|
838e4d |
index = path.parents.index(sitedir)
|
|
|
838e4d |
module_dir = path.parents[index - 1]
|
|
|
838e4d |
for parent in list(path.parents)[:index]: # no direct slice until Python 3.10
|
|
|
838e4d |
add_file_to_module(paths, module_dir.name, "package", "dirs", parent)
|
|
|
838e4d |
is_lang = False
|
|
|
838e4d |
if path.suffix == ".mo":
|
|
|
838e4d |
is_lang = add_lang_to_module(paths, module_dir.name, path)
|
|
|
838e4d |
if not is_lang:
|
|
|
838e4d |
if path.suffix == ".py":
|
|
|
838e4d |
# we add the .pyc files, and their __pycache__
|
|
|
838e4d |
add_py_file_to_module(
|
|
|
838e4d |
paths, module_dir.name, "package", path, python_version,
|
|
|
838e4d |
include_pycache_dir=True
|
|
|
838e4d |
)
|
|
|
838e4d |
else:
|
|
|
838e4d |
add_file_to_module(paths, module_dir.name, "package", "files", path)
|
|
|
838e4d |
break
|
|
|
838e4d |
else:
|
|
|
838e4d |
if path.suffix == ".mo":
|
|
|
838e4d |
add_lang_to_module(paths, None, path) or paths["other"]["files"].append(path)
|
|
|
838e4d |
else:
|
|
|
98862f |
path = normalize_manpage_filename(prefix, path)
|
|
|
838e4d |
paths["other"]["files"].append(path)
|
|
|
838e4d |
|
|
|
838e4d |
return paths
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def escape_rpm_path(path):
|
|
|
838e4d |
"""
|
|
|
838e4d |
Escape special characters in string-paths or BuildrootPaths
|
|
|
838e4d |
|
|
|
838e4d |
E.g. a space in path otherwise makes RPM think it's multiple paths,
|
|
|
838e4d |
unless we put it in "quotes".
|
|
|
838e4d |
Or a literal % symbol in path might be expanded as a macro if not escaped.
|
|
|
838e4d |
|
|
|
98862f |
Due to limitations in RPM,
|
|
|
98862f |
some paths with spaces and other special characters are not supported.
|
|
|
838e4d |
|
|
|
838e4d |
Examples:
|
|
|
838e4d |
|
|
|
838e4d |
>>> escape_rpm_path(BuildrootPath('/usr/lib/python3.9/site-packages/setuptools'))
|
|
|
838e4d |
'/usr/lib/python3.9/site-packages/setuptools'
|
|
|
838e4d |
|
|
|
838e4d |
>>> escape_rpm_path('/usr/lib/python3.9/site-packages/setuptools/script (dev).tmpl')
|
|
|
838e4d |
'"/usr/lib/python3.9/site-packages/setuptools/script (dev).tmpl"'
|
|
|
838e4d |
|
|
|
838e4d |
>>> escape_rpm_path('/usr/share/data/100%valid.path')
|
|
|
838e4d |
'/usr/share/data/100%%%%%%%%valid.path'
|
|
|
838e4d |
|
|
|
838e4d |
>>> escape_rpm_path('/usr/share/data/100 % valid.path')
|
|
|
838e4d |
'"/usr/share/data/100 %%%%%%%% valid.path"'
|
|
|
838e4d |
|
|
|
838e4d |
>>> escape_rpm_path('/usr/share/data/1000 %% valid.path')
|
|
|
838e4d |
'"/usr/share/data/1000 %%%%%%%%%%%%%%%% valid.path"'
|
|
|
838e4d |
|
|
|
838e4d |
>>> escape_rpm_path('/usr/share/data/spaces and "quotes"')
|
|
|
838e4d |
Traceback (most recent call last):
|
|
|
838e4d |
...
|
|
|
838e4d |
NotImplementedError: ...
|
|
|
98862f |
|
|
|
98862f |
>>> escape_rpm_path('/usr/share/data/spaces and [square brackets]')
|
|
|
98862f |
Traceback (most recent call last):
|
|
|
98862f |
...
|
|
|
98862f |
NotImplementedError: ...
|
|
|
838e4d |
"""
|
|
|
838e4d |
orig_path = path = str(path)
|
|
|
838e4d |
if "%" in path:
|
|
|
838e4d |
# Escaping by 8 %s has been verified in RPM 4.16 and 4.17, but probably not stable
|
|
|
838e4d |
# See this thread http://lists.rpm.org/pipermail/rpm-list/2021-June/002048.html
|
|
|
838e4d |
# On the CI, we build tests/escape_percentages.spec to verify this assumption
|
|
|
838e4d |
path = path.replace("%", "%" * 8)
|
|
|
838e4d |
if any(symbol in path for symbol in RPM_FILES_DELIMETERS):
|
|
|
838e4d |
if '"' in path:
|
|
|
838e4d |
# As far as we know, RPM cannot list such file individually
|
|
|
838e4d |
# See this thread http://lists.rpm.org/pipermail/rpm-list/2021-June/002048.html
|
|
|
838e4d |
raise NotImplementedError(f'" symbol in path with spaces is not supported by %pyproject_save_files: {orig_path!r}')
|
|
|
98862f |
if "[" in path or "]" in path:
|
|
|
98862f |
# See https://bugzilla.redhat.com/show_bug.cgi?id=1990879
|
|
|
98862f |
# and https://github.com/rpm-software-management/rpm/issues/1749
|
|
|
98862f |
raise NotImplementedError(f'[ or ] symbol in path with spaces is not supported by %pyproject_save_files: {orig_path!r}')
|
|
|
838e4d |
return f'"{path}"'
|
|
|
838e4d |
return path
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def generate_file_list(paths_dict, module_globs, include_others=False):
|
|
|
838e4d |
"""
|
|
|
838e4d |
This function takes the classified paths_dict and turns it into lines
|
|
|
838e4d |
for the %files section. Returns list with text lines, no Path objects.
|
|
|
838e4d |
|
|
|
838e4d |
Only includes files from modules that match module_globs, metadata and
|
|
|
838e4d |
optionaly all other files.
|
|
|
838e4d |
|
|
|
838e4d |
It asserts that all globs match at least one module, raises ValueError otherwise.
|
|
|
838e4d |
Multiple globs matching identical module(s) are OK.
|
|
|
838e4d |
"""
|
|
|
838e4d |
files = set()
|
|
|
838e4d |
|
|
|
838e4d |
if include_others:
|
|
|
838e4d |
files.update(f"{escape_rpm_path(p)}" for p in paths_dict["other"]["files"])
|
|
|
838e4d |
try:
|
|
|
838e4d |
for lang_code in paths_dict["lang"][None]:
|
|
|
838e4d |
files.update(f"%lang({lang_code}) {escape_rpm_path(p)}" for p in paths_dict["lang"][None][lang_code])
|
|
|
838e4d |
except KeyError:
|
|
|
838e4d |
pass
|
|
|
838e4d |
|
|
|
838e4d |
files.update(f"{escape_rpm_path(p)}" for p in paths_dict["metadata"]["files"])
|
|
|
838e4d |
for macro in "dir", "doc", "license":
|
|
|
838e4d |
files.update(f"%{macro} {escape_rpm_path(p)}" for p in paths_dict["metadata"][f"{macro}s"])
|
|
|
838e4d |
|
|
|
838e4d |
modules = paths_dict["modules"]
|
|
|
838e4d |
done_modules = set()
|
|
|
838e4d |
done_globs = set()
|
|
|
838e4d |
|
|
|
838e4d |
for glob in module_globs:
|
|
|
838e4d |
for name in modules:
|
|
|
838e4d |
if fnmatch.fnmatchcase(name, glob):
|
|
|
838e4d |
if name not in done_modules:
|
|
|
838e4d |
try:
|
|
|
838e4d |
for lang_code in paths_dict["lang"][name]:
|
|
|
838e4d |
files.update(f"%lang({lang_code}) {escape_rpm_path(p)}" for p in paths_dict["lang"][name][lang_code])
|
|
|
838e4d |
except KeyError:
|
|
|
838e4d |
pass
|
|
|
838e4d |
for module in modules[name]:
|
|
|
838e4d |
files.update(f"%dir {escape_rpm_path(p)}" for p in module["dirs"])
|
|
|
838e4d |
files.update(f"{escape_rpm_path(p)}" for p in module["files"])
|
|
|
838e4d |
done_modules.add(name)
|
|
|
838e4d |
done_globs.add(glob)
|
|
|
838e4d |
|
|
|
557ab5 |
# Users using '*' don't care about the files in the package, so it's ok
|
|
|
557ab5 |
# not to fail the build when no modules are detected
|
|
|
557ab5 |
# There can be legitimate reasons to create a package without Python modules
|
|
|
557ab5 |
if not modules and fnmatch.fnmatchcase("", glob):
|
|
|
557ab5 |
done_globs.add(glob)
|
|
|
557ab5 |
|
|
|
838e4d |
missed = module_globs - done_globs
|
|
|
838e4d |
if missed:
|
|
|
838e4d |
missed_text = ", ".join(sorted(missed))
|
|
|
838e4d |
raise ValueError(f"Globs did not match any module: {missed_text}")
|
|
|
838e4d |
|
|
|
838e4d |
return sorted(files)
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
b6f61c |
def generate_module_list(paths_dict, module_globs):
|
|
|
b6f61c |
"""
|
|
|
b6f61c |
This function takes the paths_dict created by the classify_paths() function and
|
|
|
b6f61c |
reads the modules names from it.
|
|
|
b6f61c |
It filters those whose top-level module names match any of the provided module_globs.
|
|
|
b6f61c |
|
|
|
b6f61c |
Returns list with matching qualified module names.
|
|
|
b6f61c |
|
|
|
b6f61c |
Examples:
|
|
|
b6f61c |
|
|
|
b6f61c |
>>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'foo'})
|
|
|
b6f61c |
['foo', 'foo.bar']
|
|
|
b6f61c |
|
|
|
b6f61c |
>>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'*foo'})
|
|
|
b6f61c |
['foo', 'foo.bar']
|
|
|
b6f61c |
|
|
|
b6f61c |
>>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'foo', 'baz'})
|
|
|
b6f61c |
['baz', 'foo', 'foo.bar']
|
|
|
b6f61c |
|
|
|
b6f61c |
>>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'*'})
|
|
|
b6f61c |
['baz', 'foo', 'foo.bar']
|
|
|
b6f61c |
|
|
|
b6f61c |
>>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'bar'})
|
|
|
b6f61c |
[]
|
|
|
b6f61c |
|
|
|
b6f61c |
Submodules aren't discovered:
|
|
|
b6f61c |
|
|
|
b6f61c |
>>> generate_module_list({'module_names': {'foo', 'foo.bar', 'baz'}}, {'*bar'})
|
|
|
b6f61c |
[]
|
|
|
b6f61c |
"""
|
|
|
b6f61c |
|
|
|
b6f61c |
module_names = paths_dict['module_names']
|
|
|
b6f61c |
filtered_module_names = set()
|
|
|
b6f61c |
|
|
|
b6f61c |
for glob in module_globs:
|
|
|
b6f61c |
for name in module_names:
|
|
|
b6f61c |
# Match the top-level part of the qualified name, eg. 'foo.bar.baz' -> 'foo'
|
|
|
b6f61c |
top_level_name = name.split('.')[0]
|
|
|
b6f61c |
if fnmatch.fnmatchcase(top_level_name, glob):
|
|
|
b6f61c |
filtered_module_names.add(name)
|
|
|
b6f61c |
|
|
|
b6f61c |
return sorted(filtered_module_names)
|
|
|
b6f61c |
|
|
|
b6f61c |
|
|
|
838e4d |
def parse_varargs(varargs):
|
|
|
838e4d |
"""
|
|
|
838e4d |
Parse varargs from the %pyproject_save_files macro
|
|
|
838e4d |
|
|
|
838e4d |
Arguments starting with + are treated as a flags, everything else is a glob
|
|
|
838e4d |
|
|
|
838e4d |
Returns as set of globs, boolean flag whether to include all the other files
|
|
|
838e4d |
|
|
|
838e4d |
Raises ValueError for unknown flags and globs with dots (namespace packages).
|
|
|
838e4d |
|
|
|
838e4d |
Good examples:
|
|
|
838e4d |
|
|
|
838e4d |
>>> parse_varargs(['*'])
|
|
|
838e4d |
({'*'}, False)
|
|
|
838e4d |
|
|
|
838e4d |
>>> mods, auto = parse_varargs(['requests*', 'kerberos', '+auto'])
|
|
|
838e4d |
>>> auto
|
|
|
838e4d |
True
|
|
|
838e4d |
>>> sorted(mods)
|
|
|
838e4d |
['kerberos', 'requests*']
|
|
|
838e4d |
|
|
|
838e4d |
>>> mods, auto = parse_varargs(['tldr', 'tensorf*'])
|
|
|
838e4d |
>>> auto
|
|
|
838e4d |
False
|
|
|
838e4d |
>>> sorted(mods)
|
|
|
838e4d |
['tensorf*', 'tldr']
|
|
|
838e4d |
|
|
|
838e4d |
>>> parse_varargs(['+auto'])
|
|
|
838e4d |
(set(), True)
|
|
|
838e4d |
|
|
|
838e4d |
Bad examples:
|
|
|
838e4d |
|
|
|
838e4d |
>>> parse_varargs(['+kinkdir'])
|
|
|
838e4d |
Traceback (most recent call last):
|
|
|
838e4d |
...
|
|
|
838e4d |
ValueError: Invalid argument: +kinkdir
|
|
|
838e4d |
|
|
|
838e4d |
>>> parse_varargs(['good', '+bad', '*ugly*'])
|
|
|
838e4d |
Traceback (most recent call last):
|
|
|
838e4d |
...
|
|
|
838e4d |
ValueError: Invalid argument: +bad
|
|
|
838e4d |
|
|
|
838e4d |
>>> parse_varargs(['+bad', 'my.bad'])
|
|
|
838e4d |
Traceback (most recent call last):
|
|
|
838e4d |
...
|
|
|
838e4d |
ValueError: Invalid argument: +bad
|
|
|
838e4d |
|
|
|
838e4d |
>>> parse_varargs(['mod', 'mod.*'])
|
|
|
838e4d |
Traceback (most recent call last):
|
|
|
838e4d |
...
|
|
|
98862f |
ValueError: Attempted to use a namespaced package with . in the glob: mod.*. ...
|
|
|
838e4d |
|
|
|
838e4d |
>>> parse_varargs(['my.bad', '+bad'])
|
|
|
838e4d |
Traceback (most recent call last):
|
|
|
838e4d |
...
|
|
|
98862f |
ValueError: Attempted to use a namespaced package with . in the glob: my.bad. ...
|
|
|
98862f |
|
|
|
98862f |
>>> parse_varargs(['mod/submod'])
|
|
|
98862f |
Traceback (most recent call last):
|
|
|
98862f |
...
|
|
|
98862f |
ValueError: Attempted to use a namespaced package with / in the glob: mod/submod. ...
|
|
|
838e4d |
"""
|
|
|
838e4d |
include_auto = False
|
|
|
838e4d |
globs = set()
|
|
|
98862f |
namespace_error_template = (
|
|
|
98862f |
"Attempted to use a namespaced package with {symbol} in the glob: {arg}. "
|
|
|
98862f |
"That is not (yet) supported. Use {top} instead and see "
|
|
|
98862f |
"https://bugzilla.redhat.com/1935266 for details."
|
|
|
98862f |
)
|
|
|
838e4d |
for arg in varargs:
|
|
|
838e4d |
if arg.startswith("+"):
|
|
|
838e4d |
if arg == "+auto":
|
|
|
838e4d |
include_auto = True
|
|
|
838e4d |
else:
|
|
|
838e4d |
raise ValueError(f"Invalid argument: {arg}")
|
|
|
838e4d |
elif "." in arg:
|
|
|
838e4d |
top, *_ = arg.partition(".")
|
|
|
98862f |
raise ValueError(namespace_error_template.format(symbol=".", arg=arg, top=top))
|
|
|
98862f |
elif "/" in arg:
|
|
|
98862f |
top, *_ = arg.partition("/")
|
|
|
98862f |
raise ValueError(namespace_error_template.format(symbol="/", arg=arg, top=top))
|
|
|
838e4d |
else:
|
|
|
838e4d |
globs.add(arg)
|
|
|
838e4d |
|
|
|
838e4d |
return globs, include_auto
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def load_parsed_record(pyproject_record):
|
|
|
838e4d |
parsed_record = {}
|
|
|
838e4d |
with open(pyproject_record) as pyproject_record_file:
|
|
|
838e4d |
content = json.load(pyproject_record_file)
|
|
|
838e4d |
|
|
|
838e4d |
if len(content) > 1:
|
|
|
838e4d |
raise FileExistsError("%pyproject install has found more than one *.dist-info/RECORD file. "
|
|
|
838e4d |
"Currently, %pyproject_save_files supports only one wheel → one file list mapping. "
|
|
|
838e4d |
"Feel free to open a bugzilla for pyproject-rpm-macros and describe your usecase.")
|
|
|
838e4d |
|
|
|
838e4d |
# Redefine strings stored in JSON to BuildRootPaths
|
|
|
838e4d |
for record_path, files in content.items():
|
|
|
838e4d |
parsed_record[BuildrootPath(record_path)] = [BuildrootPath(f) for f in files]
|
|
|
838e4d |
|
|
|
838e4d |
return parsed_record
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def dist_metadata(buildroot, record_path):
|
|
|
838e4d |
"""
|
|
|
838e4d |
Returns distribution metadata (email.message.EmailMessage), possibly empty
|
|
|
838e4d |
"""
|
|
|
838e4d |
real_dist_path = record_path.parent.to_real(buildroot)
|
|
|
838e4d |
dist = Distribution.at(real_dist_path)
|
|
|
838e4d |
return dist.metadata
|
|
|
838e4d |
|
|
|
98862f |
|
|
|
98862f |
def pyproject_save_files_and_modules(buildroot, sitelib, sitearch, python_version, pyproject_record, prefix, varargs):
|
|
|
838e4d |
"""
|
|
|
838e4d |
Takes arguments from the %{pyproject_save_files} macro
|
|
|
838e4d |
|
|
|
98862f |
Returns tuple: list of paths for the %files section and list of module names
|
|
|
98862f |
for the %check section
|
|
|
838e4d |
"""
|
|
|
838e4d |
# On 32 bit architectures, sitelib equals to sitearch
|
|
|
838e4d |
# This saves us browsing one directory twice
|
|
|
838e4d |
sitedirs = sorted({sitelib, sitearch})
|
|
|
838e4d |
|
|
|
838e4d |
globs, include_auto = parse_varargs(varargs)
|
|
|
838e4d |
parsed_records = load_parsed_record(pyproject_record)
|
|
|
838e4d |
|
|
|
838e4d |
final_file_list = []
|
|
|
b6f61c |
final_module_list = []
|
|
|
838e4d |
|
|
|
838e4d |
for record_path, files in parsed_records.items():
|
|
|
838e4d |
metadata = dist_metadata(buildroot, record_path)
|
|
|
838e4d |
paths_dict = classify_paths(
|
|
|
98862f |
record_path, files, metadata, sitedirs, python_version, prefix
|
|
|
838e4d |
)
|
|
|
838e4d |
|
|
|
838e4d |
final_file_list.extend(
|
|
|
838e4d |
generate_file_list(paths_dict, globs, include_auto)
|
|
|
838e4d |
)
|
|
|
b6f61c |
final_module_list.extend(
|
|
|
b6f61c |
generate_module_list(paths_dict, globs)
|
|
|
b6f61c |
)
|
|
|
838e4d |
|
|
|
b6f61c |
return final_file_list, final_module_list
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def main(cli_args):
|
|
|
98862f |
file_section, module_names = pyproject_save_files_and_modules(
|
|
|
838e4d |
cli_args.buildroot,
|
|
|
838e4d |
cli_args.sitelib,
|
|
|
838e4d |
cli_args.sitearch,
|
|
|
838e4d |
cli_args.python_version,
|
|
|
838e4d |
cli_args.pyproject_record,
|
|
|
98862f |
cli_args.prefix,
|
|
|
838e4d |
cli_args.varargs,
|
|
|
838e4d |
)
|
|
|
838e4d |
|
|
|
98862f |
cli_args.output_files.write_text("\n".join(file_section) + "\n", encoding="utf-8")
|
|
|
98862f |
cli_args.output_modules.write_text("\n".join(module_names) + "\n", encoding="utf-8")
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
def argparser():
|
|
|
b6f61c |
parser = argparse.ArgumentParser(
|
|
|
b6f61c |
description="Create %{pyproject_files} for a Python project.",
|
|
|
b6f61c |
prog="%pyproject_save_files",
|
|
|
b6f61c |
add_help=False,
|
|
|
b6f61c |
# custom usage to add +auto
|
|
|
b6f61c |
usage="%(prog)s MODULE_GLOB [MODULE_GLOB ...] [+auto]",
|
|
|
b6f61c |
)
|
|
|
b6f61c |
parser.add_argument(
|
|
|
b6f61c |
'--help', action='help',
|
|
|
b6f61c |
default=argparse.SUPPRESS,
|
|
|
b6f61c |
help=argparse.SUPPRESS,
|
|
|
b6f61c |
)
|
|
|
838e4d |
r = parser.add_argument_group("required arguments")
|
|
|
b6f61c |
r.add_argument("--output-files", type=PosixPath, required=True, help=argparse.SUPPRESS)
|
|
|
b6f61c |
r.add_argument("--output-modules", type=PosixPath, required=True, help=argparse.SUPPRESS)
|
|
|
b6f61c |
r.add_argument("--buildroot", type=PosixPath, required=True, help=argparse.SUPPRESS)
|
|
|
b6f61c |
r.add_argument("--sitelib", type=BuildrootPath, required=True, help=argparse.SUPPRESS)
|
|
|
b6f61c |
r.add_argument("--sitearch", type=BuildrootPath, required=True, help=argparse.SUPPRESS)
|
|
|
b6f61c |
r.add_argument("--python-version", type=str, required=True, help=argparse.SUPPRESS)
|
|
|
b6f61c |
r.add_argument("--pyproject-record", type=PosixPath, required=True, help=argparse.SUPPRESS)
|
|
|
b6f61c |
r.add_argument("--prefix", type=PosixPath, required=True, help=argparse.SUPPRESS)
|
|
|
b6f61c |
parser.add_argument(
|
|
|
b6f61c |
"varargs", nargs="+", metavar="MODULE_GLOB",
|
|
|
b6f61c |
help="Shell-like glob matching top-level module names to save into %%{pyproject_files}",
|
|
|
b6f61c |
)
|
|
|
838e4d |
return parser
|
|
|
838e4d |
|
|
|
838e4d |
|
|
|
838e4d |
if __name__ == "__main__":
|
|
|
838e4d |
cli_args = argparser().parse_args()
|
|
|
838e4d |
main(cli_args)
|