Blame SOURCES/pyproject_requirements_txt.py

838e4d
"""Best-effort parser for requirements.txt files"""
838e4d
838e4d
import urllib.parse
838e4d
from pathlib import Path
838e4d
import sys
838e4d
import os
838e4d
import re
838e4d
838e4d
# `#` starts a comment only at end of line and after whitespace
838e4d
COMMENT_RE = re.compile(r'(^|\s+)#.*$')
838e4d
838e4d
# Assume URLs start with a scheme; don't look for "egg=" URLs otherwise
838e4d
URL_START_RE = re.compile(r'^[-_+a-zA-Z0-9]+://')
838e4d
838e4d
ENV_VAR_RE = re.compile(r'(?P\$\{(?P<name>[A-Z0-9_]+)\})')
838e4d
PKGNAME_RE = re.compile(r'^[-_a-zA-Z0-9]+')
838e4d
838e4d
# The requirements.txt format evolved rather organically; expect weirdness.
838e4d
838e4d
def convert_requirements_txt(lines, path:Path = None):
838e4d
    """Convert lines of a requirements file to PEP 440-style requirement strs
838e4d
838e4d
    This does NOT handle all of requirements.txt features (only pip can do
838e4d
    that), but tries its best.
838e4d
838e4d
    The resulting requirements might not actually be valid (either because
838e4d
    they're wrong in the file, or because we missed a special case).
838e4d
838e4d
    path is the path to the requirements.txt file, used for options like `-r`.
838e4d
    """
838e4d
    requirements = []
838e4d
    lines = combine_logical_lines(lines)
838e4d
    lines = strip_comments(lines)
838e4d
    lines = expand_env_vars(lines)
838e4d
    if path:
838e4d
        filename = path.name
838e4d
    else:
838e4d
        filename = '<requirements file>'
838e4d
    for line in lines:
838e4d
        if URL_START_RE.match(line):
838e4d
            # Handle URLs with "egg=..." fragments
838e4d
            # see https://pip.pypa.io/en/stable/cli/pip_install/#vcs-support
838e4d
            parsed_url = urllib.parse.urlparse(line)
838e4d
            parsed_fragment = urllib.parse.parse_qs(parsed_url.fragment)
838e4d
            if 'egg' in parsed_fragment:
838e4d
                # Prepend the package name to the URL.
838e4d
                match = PKGNAME_RE.match(parsed_fragment['egg'][0])
838e4d
                if match:
838e4d
                    pkg_name = match[0]
838e4d
                    requirements.append(f'{pkg_name}@{line}')
838e4d
                    continue
838e4d
            # If that didn't work, pass the line on;
838e4d
            # the caller will deal with invalid requirements
838e4d
            requirements.append(line)
838e4d
        elif line.startswith('-r'):
838e4d
            recursed_path = line[2:].strip()
838e4d
            if path:
838e4d
                recursed_path = path.parent / recursed_path
838e4d
            recursed_path = Path(recursed_path)
838e4d
            with recursed_path.open() as f:
838e4d
                requirements.extend(convert_requirements_txt(f, recursed_path))
838e4d
        elif line.startswith('-'):
838e4d
            raise ValueError(f'{filename}: unsupported requirements file option: {line}')
838e4d
        else:
838e4d
            requirements.append(line)
838e4d
    return requirements
838e4d
838e4d
def combine_logical_lines(lines):
838e4d
    """Combine logical lines together (backslash line-continuation)"""
838e4d
    pieces = []
838e4d
    for line in lines:
838e4d
        line = line.rstrip('\n')
838e4d
        # Whole-line comments *only* are removed before line-contionuation
838e4d
        if COMMENT_RE.match(line):
838e4d
            continue
838e4d
        if line.endswith('\\'):
838e4d
            pieces.append(line[:-1])
838e4d
        else:
838e4d
            # trailing whitespace is only removed from full logical lines
838e4d
            pieces.append(line.rstrip())
838e4d
            yield ''.join(pieces)
838e4d
            pieces = []
838e4d
    yield ''.join(pieces)
838e4d
838e4d
838e4d
def strip_comments(lines):
838e4d
    for line in lines:
838e4d
        line, *rest = COMMENT_RE.split(line, maxsplit=1)
838e4d
        line = line.strip()
838e4d
        if line:
838e4d
            yield line
838e4d
838e4d
838e4d
def expand_env_vars(lines):
838e4d
    def repl(match):
838e4d
        value = os.getenv(match['name'])
838e4d
        if value is None:
838e4d
            return match['var']
838e4d
        return value
838e4d
    for line in lines:
838e4d
        yield ENV_VAR_RE.sub(repl, line)