From b4d103c6dec2aa0f8461e1ca78ad23d692e68d36 Mon Sep 17 00:00:00 2001
From: Matthew Almond <malmond@fb.com>
Date: Thu, 20 May 2021 13:35:13 -0700
Subject: [PATCH] Add option --skip-filelists
This is a site-local optimization. Some packages and repos include an
enormous number of files. This is extremely expensive if said repo is
also fast changing.
Impact of skipping filelists: breaking ability to resolve file/path
based dependencies, `-f` (file ownership) and `-l` (list) options in
repoquery.
---
doc/createrepo_c.8 | 3 +++
src/cmd_parser.c | 2 ++
src/cmd_parser.h | 1 +
src/createrepo_c.c | 1 +
src/dumper_thread.c | 5 +++++
src/dumper_thread.h | 1 +
src/parsehdr.c | 3 ++-
src/parsehdr.h | 1 +
8 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/doc/createrepo_c.8 b/doc/createrepo_c.8
index e9b3fc2..10702f4 100644
--- a/doc/createrepo_c.8
+++ b/doc/createrepo_c.8
@@ -213,5 +213,8 @@ Exit with retval 2 if there were any errors during processing
.SS \-\-ignore\-lock
.sp
Expert (risky) option: Ignore an existing .repodata/. (Remove the existing .repodata/ and create an empty new one to serve as a lock for other createrepo instances. For the repodata generation, a different temporary dir with the name in format .repodata.time.microseconds.pid/ will be used). NOTE: Use this option on your own risk! If two createrepos run simultaneously, then the state of the generated metadata is not guaranteed \- it can be inconsistent and wrong.
+.SS \-\-skip\-filelists
+.sp
+Expert (risky) option: Skip filelist generation.
.\" Generated by docutils manpage writer.
.
diff --git a/src/cmd_parser.c b/src/cmd_parser.c
index bbefa08..0ecf7f9 100644
--- a/src/cmd_parser.c
+++ b/src/cmd_parser.c
@@ -224,6 +224,8 @@ static GOptionEntry expert_entries[] =
"own risk! If two createrepos run simultaneously, then the state of the "
"generated metadata is not guaranteed - it can be inconsistent and wrong.",
NULL },
+ { "skip-filelists", 0, 0, G_OPTION_ARG_NONE, &(_cmd_options.skip_filelists),
+ "Expert (risky) option: Skip filelist generation.", NULL},
{ NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL },
};
diff --git a/src/cmd_parser.h b/src/cmd_parser.h
index 32bcf99..5eff9d9 100644
--- a/src/cmd_parser.h
+++ b/src/cmd_parser.h
@@ -57,6 +57,7 @@ struct CmdOptions {
char *general_compress_type;/*!< which compression type to use (even for
primary, filelists and other xml) */
gboolean skip_symlinks; /*!< ignore symlinks of packages */
+ gboolean skip_filelists; /*!< Skip creating filelists */
gint changelog_limit; /*!< number of changelog messages in
other.(xml|sqlite) */
gboolean unique_md_filenames; /*!< include the file checksums in
diff --git a/src/createrepo_c.c b/src/createrepo_c.c
index f4f4544..9dd288e 100644
--- a/src/createrepo_c.c
+++ b/src/createrepo_c.c
@@ -1253,6 +1253,7 @@ main(int argc, char **argv)
user_data.checksum_type = cmd_options->checksum_type;
user_data.checksum_cachedir = cmd_options->checksum_cachedir;
user_data.skip_symlinks = cmd_options->skip_symlinks;
+ user_data.skip_filelists = cmd_options->skip_filelists;
user_data.repodir_name_len = strlen(in_dir);
user_data.task_count = task_count;
user_data.package_count = 0;
diff --git a/src/dumper_thread.c b/src/dumper_thread.c
index 119f3bd..f7c4e35 100644
--- a/src/dumper_thread.c
+++ b/src/dumper_thread.c
@@ -431,6 +431,11 @@ cr_dumper_thread(gpointer data, gpointer user_data)
if (udata->checksum_cachedir)
hdrrflags = CR_HDRR_LOADHDRID | CR_HDRR_LOADSIGNATURES;
+
+ // Load filelists, unless --skip-filelists is passed.
+ if (udata->skip_filelists)
+ hdrrflags |= CR_HDRR_SKIPFILES;
+
// Get stat info about file
if (udata->old_metadata && !(udata->skip_stat)) {
if (stat(task->full_path, &stat_buf) == -1) {
diff --git a/src/dumper_thread.h b/src/dumper_thread.h
index 60f984d..654991f 100644
--- a/src/dumper_thread.h
+++ b/src/dumper_thread.h
@@ -66,6 +66,7 @@ struct UserData {
cr_ChecksumType checksum_type; // Constant representing selected checksum
const char *checksum_cachedir; // Dir with cached checksums
gboolean skip_symlinks; // Skip symlinks
+ gboolean skip_filelists; // Skip filelists
long task_count; // Total number of task to process
long package_count; // Total number of packages processed
diff --git a/src/parsehdr.c b/src/parsehdr.c
index 2775bf3..97bb01e 100644
--- a/src/parsehdr.c
+++ b/src/parsehdr.c
@@ -253,7 +253,8 @@ cr_package_from_header(Header hdr,
assert(x == dir_count);
}
- if (headerGet(hdr, RPMTAG_FILENAMES, full_filenames, flags) &&
+ if (!(hdrrflags & CR_HDRR_SKIPFILES) &&
+ headerGet(hdr, RPMTAG_FILENAMES, full_filenames, flags) &&
headerGet(hdr, RPMTAG_DIRINDEXES, indexes, flags) &&
headerGet(hdr, RPMTAG_BASENAMES, filenames, flags) &&
headerGet(hdr, RPMTAG_FILEFLAGS, fileflags, flags) &&
diff --git a/src/parsehdr.h b/src/parsehdr.h
index 032acca..e7a4a4a 100644
--- a/src/parsehdr.h
+++ b/src/parsehdr.h
@@ -39,6 +39,7 @@ typedef enum {
CR_HDRR_NONE = (1 << 0),
CR_HDRR_LOADHDRID = (1 << 1), /*!< Load hdrid */
CR_HDRR_LOADSIGNATURES = (1 << 2), /*!< Load siggpg and siggpg */
+ CR_HDRR_SKIPFILES = (1 << 3), /*!< Skip filelists */
} cr_HeaderReadingFlags;
/** Read data from header and return filled cr_Package structure.