Blame SOURCES/pyproject_requirements_txt.py

7ef706
"""Best-effort parser for requirements.txt files"""
7ef706
7ef706
import urllib.parse
7ef706
from pathlib import Path
7ef706
import sys
7ef706
import os
7ef706
import re
7ef706
7ef706
# `#` starts a comment only at end of line and after whitespace
7ef706
COMMENT_RE = re.compile(r'(^|\s+)#.*$')
7ef706
7ef706
# Assume URLs start with a scheme; don't look for "egg=" URLs otherwise
7ef706
URL_START_RE = re.compile(r'^[-_+a-zA-Z0-9]+://')
7ef706
7ef706
ENV_VAR_RE = re.compile(r'(?P\$\{(?P<name>[A-Z0-9_]+)\})')
7ef706
PKGNAME_RE = re.compile(r'^[-_a-zA-Z0-9]+')
7ef706
7ef706
# The requirements.txt format evolved rather organically; expect weirdness.
7ef706
7ef706
def convert_requirements_txt(lines, path:Path = None):
7ef706
    """Convert lines of a requirements file to PEP 440-style requirement strs
7ef706
7ef706
    This does NOT handle all of requirements.txt features (only pip can do
7ef706
    that), but tries its best.
7ef706
7ef706
    The resulting requirements might not actually be valid (either because
7ef706
    they're wrong in the file, or because we missed a special case).
7ef706
7ef706
    path is the path to the requirements.txt file, used for options like `-r`.
7ef706
    """
7ef706
    requirements = []
7ef706
    lines = combine_logical_lines(lines)
7ef706
    lines = strip_comments(lines)
7ef706
    lines = expand_env_vars(lines)
7ef706
    if path:
7ef706
        filename = path.name
7ef706
    else:
7ef706
        filename = '<requirements file>'
7ef706
    for line in lines:
7ef706
        if URL_START_RE.match(line):
7ef706
            # Handle URLs with "egg=..." fragments
7ef706
            # see https://pip.pypa.io/en/stable/cli/pip_install/#vcs-support
7ef706
            parsed_url = urllib.parse.urlparse(line)
7ef706
            parsed_fragment = urllib.parse.parse_qs(parsed_url.fragment)
7ef706
            if 'egg' in parsed_fragment:
7ef706
                # Prepend the package name to the URL.
7ef706
                match = PKGNAME_RE.match(parsed_fragment['egg'][0])
7ef706
                if match:
7ef706
                    pkg_name = match[0]
7ef706
                    requirements.append(f'{pkg_name}@{line}')
7ef706
                    continue
7ef706
            # If that didn't work, pass the line on;
7ef706
            # the caller will deal with invalid requirements
7ef706
            requirements.append(line)
7ef706
        elif line.startswith('-r'):
7ef706
            recursed_path = line[2:].strip()
7ef706
            if path:
7ef706
                recursed_path = path.parent / recursed_path
7ef706
            recursed_path = Path(recursed_path)
7ef706
            with recursed_path.open() as f:
7ef706
                requirements.extend(convert_requirements_txt(f, recursed_path))
7ef706
        elif line.startswith('-'):
7ef706
            raise ValueError(f'{filename}: unsupported requirements file option: {line}')
7ef706
        else:
7ef706
            requirements.append(line)
7ef706
    return requirements
7ef706
7ef706
def combine_logical_lines(lines):
7ef706
    """Combine logical lines together (backslash line-continuation)"""
7ef706
    pieces = []
7ef706
    for line in lines:
7ef706
        line = line.rstrip('\n')
7ef706
        # Whole-line comments *only* are removed before line-contionuation
7ef706
        if COMMENT_RE.match(line):
7ef706
            continue
7ef706
        if line.endswith('\\'):
7ef706
            pieces.append(line[:-1])
7ef706
        else:
7ef706
            # trailing whitespace is only removed from full logical lines
7ef706
            pieces.append(line.rstrip())
7ef706
            yield ''.join(pieces)
7ef706
            pieces = []
7ef706
    yield ''.join(pieces)
7ef706
7ef706
7ef706
def strip_comments(lines):
7ef706
    for line in lines:
7ef706
        line, *rest = COMMENT_RE.split(line, maxsplit=1)
7ef706
        line = line.strip()
7ef706
        if line:
7ef706
            yield line
7ef706
7ef706
7ef706
def expand_env_vars(lines):
7ef706
    def repl(match):
7ef706
        value = os.getenv(match['name'])
7ef706
        if value is None:
7ef706
            return match['var']
7ef706
        return value
7ef706
    for line in lines:
7ef706
        if match := ENV_VAR_RE.search(line):
7ef706
            var = match['var']
7ef706
        yield ENV_VAR_RE.sub(repl, line)