From a11f2297410f7b7a577192ef5ef4bf9d5e897b87 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Jun 23 2017 14:24:36 +0000 Subject: Backport parallel debuginfo processing. --- diff --git a/find-debuginfo-process-files-in-parallel.patch b/find-debuginfo-process-files-in-parallel.patch new file mode 100644 index 0000000..030c5b2 --- /dev/null +++ b/find-debuginfo-process-files-in-parallel.patch @@ -0,0 +1,113 @@ +commit 1b338aa84d4c67fefa957352a028eaca1a45d1f6 +Author: Michal Marek +Date: Sat Sep 10 23:13:25 2016 +0200 + + find-debuginfo.sh: Process files in parallel + + Add a -j option, which, when used, will spawn processes to do the + debuginfo extraction in parallel. A pipe is used to dispatch the files among + the processes. + + Signed-off-by: Michal Marek + +diff --git a/macros.in b/macros.in +index b03c5a9..8bde2d7 100644 +--- a/macros.in ++++ b/macros.in +@@ -180,7 +180,7 @@ + # the script. See the script for details. + # + %__debug_install_post \ +- %{_rpmconfigdir}/find-debuginfo.sh %{?_missing_build_ids_terminate_build:--strict-build-id} %{?_no_recompute_build_ids:-n} %{?_include_minidebuginfo:-m} %{?_include_gdb_index:-i} %{?_unique_build_ids:--ver-rel "%{VERSION}-%{RELEASE}"} %{?_unique_debug_names:--unique-debug-arch "%{_arch}"} %{?_unique_debug_srcs:--unique-debug-src-base "%{name}"} %{?_find_debuginfo_dwz_opts} %{?_find_debuginfo_opts} "%{_builddir}/%{?buildsubdir}"\ ++ %{_rpmconfigdir}/find-debuginfo.sh %{?_smp_mflags} %{?_missing_build_ids_terminate_build:--strict-build-id} %{?_no_recompute_build_ids:-n} %{?_include_minidebuginfo:-m} %{?_include_gdb_index:-i} %{?_unique_build_ids:--ver-rel "%{VERSION}-%{RELEASE}"} %{?_unique_debug_names:--unique-debug-arch "%{_arch}"} %{?_unique_debug_srcs:--unique-debug-src-base "%{name}"} %{?_find_debuginfo_dwz_opts} %{?_find_debuginfo_opts} "%{_builddir}/%{?buildsubdir}"\ + %{nil} + + # Template for debug information sub-package. +diff --git a/scripts/find-debuginfo.sh b/scripts/find-debuginfo.sh +index 6dcd5a4..2016222 100644 +--- a/scripts/find-debuginfo.sh ++++ b/scripts/find-debuginfo.sh +@@ -74,6 +74,9 @@ + # Base given by --unique-debug-src-base + unique_debug_src_base= + ++# Number of parallel jobs to spawn ++n_jobs=1 ++ + BUILDDIR=. + out=debugfiles.list + nout=0 +@@ -137,6 +140,13 @@ + -r) + strip_r=true + ;; ++ -j) ++ n_jobs=$2 ++ shift ++ ;; ++ -j*) ++ n_jobs=${1#-j} ++ ;; + *) + BUILDDIR=$1 + shift +@@ -389,9 +399,56 @@ + fi + } + +-while read nlinks inum f; do +- do_file "$nlinks" "$inum" "$f" +-done <"$temp/primary" ++# 16^6 - 1 or about 16 milion files ++FILENUM_DIGITS=6 ++run_job() ++{ ++ local jobid=$1 filenum ++ local SOURCEFILE=$temp/debugsources.$jobid ELFBINSFILE=$temp/elfbins.$jobid ++ ++ >"$SOURCEFILE" ++ >"$ELFBINSFILE" ++ # can't use read -n , because it reads bytes one by one, allowing for ++ # races ++ while :; do ++ filenum=$(dd bs=$(( FILENUM_DIGITS + 1 )) count=1 status=none) ++ if test -z "$filenum"; then ++ break ++ fi ++ do_file $(sed -n "$(( 0x$filenum )) p" "$temp/primary") ++ done ++ echo 0 >"$temp/res.$jobid" ++} ++ ++n_files=$(wc -l <"$temp/primary") ++if [ $n_jobs -gt $n_files ]; then ++ n_jobs=$n_files ++fi ++if [ $n_jobs -le 1 ]; then ++ while read nlinks inum f; do ++ do_file "$nlinks" "$inum" "$f" ++ done <"$temp/primary" ++else ++ for ((i = 1; i <= n_files; i++)); do ++ printf "%0${FILENUM_DIGITS}x\\n" $i ++ done | ( ++ exec 3<&0 ++ for ((i = 0; i < n_jobs; i++)); do ++ # The shell redirects stdin to /dev/null for background jobs. Work ++ # around this by duplicating fd 0 ++ run_job $i <&3 & ++ done ++ wait ++ ) ++ for f in "$temp"/res.*; do ++ res=$(< "$f") ++ if [ "$res" != "0" ]; then ++ exit 1 ++ fi ++ done ++ cat "$temp"/debugsources.* >"$SOURCEFILE" ++ cat "$temp"/elfbins.* >"$ELFBINSFILE" ++fi + + # Invoke the DWARF Compressor utility. + if $run_dwz \ diff --git a/find-debuginfo-split-traversal-and-extraction.patch b/find-debuginfo-split-traversal-and-extraction.patch new file mode 100644 index 0000000..6ceb7be --- /dev/null +++ b/find-debuginfo-split-traversal-and-extraction.patch @@ -0,0 +1,100 @@ +commit 038bfe01796f751001e02de41c5d8678f511f366 +Author: Michal Marek +Date: Sat Sep 10 23:13:24 2016 +0200 + + find-debuginfo.sh: Split directory traversal and debuginfo extraction + + This siplifies the handling of hardlinks a bit and allows a later patch + to parallelize the debuginfo extraction. + + Signed-off-by: Michal Marek + +diff --git a/scripts/find-debuginfo.sh b/scripts/find-debuginfo.sh +index d83c3e2..6dcd5a4 100644 +--- a/scripts/find-debuginfo.sh ++++ b/scripts/find-debuginfo.sh +@@ -283,32 +283,36 @@ + strict_error=ERROR + $strict || strict_error=WARNING + +-# Strip ELF binaries ++temp=$(mktemp -d ${TMPDIR:-/tmp}/find-debuginfo.XXXXXX) ++trap 'rm -rf "$temp"' EXIT ++ ++# Build a list of unstripped ELF files and their hardlinks ++touch "$temp/primary" + find "$RPM_BUILD_ROOT" ! -path "${debugdir}/*.debug" -type f \ + \( -perm -0100 -or -perm -0010 -or -perm -0001 \) \ + -print | + file -N -f - | sed -n -e 's/^\(.*\):[ ]*.*ELF.*, not stripped.*/\1/p' | + xargs --no-run-if-empty stat -c '%h %D_%i %n' | + while read nlinks inum f; do +- get_debugfn "$f" +- [ -f "${debugfn}" ] && continue +- +- # If this file has multiple links, keep track and make +- # the corresponding .debug files all links to one file too. + if [ $nlinks -gt 1 ]; then +- eval linked=\$linked_$inum +- if [ -n "$linked" ]; then +- eval id=\$linkedid_$inum +- link=$debugfn +- get_debugfn "$linked" +- echo "hard linked $link to $debugfn" +- mkdir -p "$(dirname "$link")" && ln -nf "$debugfn" "$link" ++ var=seen_$inum ++ if test -n "${!var}"; then ++ echo "$inum $f" >>"$temp/linked" + continue + else +- eval linked_$inum=\$f +- echo "file $f has $[$nlinks - 1] other hard links" ++ read "$var" < <(echo 1) + fi + fi ++ echo "$nlinks $inum $f" >>"$temp/primary" ++done ++ ++# Strip ELF binaries ++do_file() ++{ ++ local nlinks=$1 inum=$2 f=$3 id link linked ++ ++ get_debugfn "$f" ++ [ -f "${debugfn}" ] && return + + echo "extracting debug info from $f" + build_id_seed= +@@ -328,9 +332,6 @@ + fi + id=$(${lib_rpm_dir}/debugedit -b $debug_base_name -d $debug_dest_name \ + $no_recompute -i $build_id_seed -l "$SOURCEFILE" "$f") || exit +- if [ $nlinks -gt 1 ]; then +- eval linkedid_$inum=\$id +- fi + if [ -z "$id" ]; then + echo >&2 "*** ${strict_error}: No build ID note found in $f" + $strict && exit 2 +@@ -376,7 +377,21 @@ + + echo "./${f#$RPM_BUILD_ROOT}" >> "$ELFBINSFILE" + +-done || exit ++ # If this file has multiple links, make the corresponding .debug files ++ # all links to one file too. ++ if [ $nlinks -gt 1 ]; then ++ grep "^$inum " "$temp/linked" | while read inum linked; do ++ link=$debugfn ++ get_debugfn "$linked" ++ echo "hard linked $link to $debugfn" ++ mkdir -p "$(dirname "$debugfn")" && ln -nf "$link" "$debugfn" ++ done ++ fi ++} ++ ++while read nlinks inum f; do ++ do_file "$nlinks" "$inum" "$f" ++done <"$temp/primary" + + # Invoke the DWARF Compressor utility. + if $run_dwz \ diff --git a/rpm.spec b/rpm.spec index b413bf7..560c7d0 100644 --- a/rpm.spec +++ b/rpm.spec @@ -33,7 +33,7 @@ Summary: The RPM package management system Name: rpm Version: %{rpmver} -Release: %{?snapver:0.%{snapver}.}23%{?dist} +Release: %{?snapver:0.%{snapver}.}24%{?dist} Group: System Environment/Base Url: http://www.rpm.org/ Source0: http://ftp.rpm.org/releases/%{srcdir}/%{name}-%{srcver}.tar.bz2 @@ -93,6 +93,10 @@ Patch274: 0025-buildid-reset-attrs.patch # World writable empty (tmp) dirs in debuginfo packages (#641022) Patch280: rpm-4.13.x-writable-tmp-dir.patch +# Parallel debuginfo processing +Patch281: find-debuginfo-split-traversal-and-extraction.patch +Patch282: find-debuginfo-process-files-in-parallel.patch + # OpenSSL backend Patch300: 0001-Add-OpenSSL-support-for-digest-and-signatures.patch @@ -594,6 +598,9 @@ exit 0 %doc doc/librpm/html/* %changelog +* Fri Jun 23 2017 Mark Wielaard - 4.13.0.1-24 +- Backport parallel debuginfo processing. + * Tue May 30 2017 Mark Wielaard - 4.13.0.1-23 - Fix resetting attr flags in buildid creation (#1449732)