From 9419fc80e6a09561e8fb38fdea9727afdc2917e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ale=C5=A1=20Mat=C4=9Bj?= <amatej@redhat.com>
Date: Wed, 9 Mar 2022 15:24:05 +0100
Subject: [PATCH] Revert "Add c API for parsing metadata together"
This reverts commit ff0912d2a89722700f001ef462abd730dacaa8e7.
---
src/CMakeLists.txt | 1 -
src/xml_parser.h | 39 --
src/xml_parser_main_metadata_together.c | 468 ------------------------
3 files changed, 508 deletions(-)
delete mode 100644 src/xml_parser_main_metadata_together.c
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b3ca9e6..64de052 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -31,7 +31,6 @@ SET (createrepo_c_SRCS
xml_parser_primary.c
xml_parser_repomd.c
xml_parser_updateinfo.c
- xml_parser_main_metadata_together.c
koji.c)
SET(headers
diff --git a/src/xml_parser.h b/src/xml_parser.h
index 5ce7c0e..a31718d 100644
--- a/src/xml_parser.h
+++ b/src/xml_parser.h
@@ -278,45 +278,6 @@ cr_xml_parse_updateinfo(const char *path,
void *warningcb_data,
GError **err);
-/** Parse all 3 main metadata types (primary, filelists and other) at the same time.
- * Once a package is fully parsed pkgcb is called which transfers ownership of the package
- * to the user, cr_xml_parse_main_metadata_together no longer needs it and it can be freed.
- * This means we don't have store all the packages in memory at the same time, which
- * significantly reduces the memory footprint.
- * Input metadata files can be compressed.
- * @param primary_path Path to a primary xml file.
- * @param filelists_path Path to a filelists xml file.
- * @param other_path Path to an other xml file.
- * @param newpkgcb Callback for a new package. Called when the new package
- * xml chunk is found and a package object to store
- * the data is needed.
- * @param newpkgcb_data User data for the newpkgcb.
- * @param pkgcb Package callback. Called when a package is completely
- * parsed containing information from all 3 main metadata
- * files. Could be NULL if newpkgcb is not NULL.
- * @param pkgcb_data User data for the pkgcb.
- * @param warningcb Callback for warning messages.
- * @param warningcb_data User data for the warningcb.
- * @param allow_out_of_order Whether we should allow different order of packages
- * among the main metadata files. If allowed, the more
- * the order varies the more memory we will need to
- * store all the started but unfinished packages.
- * @param err GError **
- * @return cr_Error code.
- */
-int
-cr_xml_parse_main_metadata_together(const char *primary_path,
- const char *filelists_path,
- const char *other_path,
- cr_XmlParserNewPkgCb newpkgcb,
- void *newpkgcb_data,
- cr_XmlParserPkgCb pkgcb,
- void *pkgcb_data,
- cr_XmlParserWarningCb warningcb,
- void *warningcb_data,
- gboolean allow_out_of_order,
- GError **err);
-
/** @} */
#ifdef __cplusplus
diff --git a/src/xml_parser_main_metadata_together.c b/src/xml_parser_main_metadata_together.c
deleted file mode 100644
index 9347c79..0000000
--- a/src/xml_parser_main_metadata_together.c
+++ /dev/null
@@ -1,468 +0,0 @@
-/*
- * Copyright (C) 2021 Red Hat, Inc.
- *
- * Licensed under the GNU Lesser General Public License Version 2.1
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <glib.h>
-#include <glib/gprintf.h>
-#include <assert.h>
-#include <errno.h>
-#include "error.h"
-#include "xml_parser.h"
-#include "xml_parser_internal.h"
-#include "package_internal.h"
-#include "misc.h"
-
-#define ERR_DOMAIN CREATEREPO_C_ERROR
-
-typedef struct {
- GHashTable *in_progress_pkgs_hash; //used only when allowing out of order pkgs
- GSList *in_progress_pkgs_list; // used only when not allowing out of order pkgs
- int in_progress_count_primary;
- int in_progress_count_filelists;
- int in_progress_count_other;
- cr_XmlParserNewPkgCb newpkgcb; // newpkgcb passed in from user
- void *newpkgcb_data;// newpkgcb data passed in from user
- cr_XmlParserPkgCb pkgcb; // pkgcb passed in from user
- void *pkgcb_data; // pkgcb data passed in from user
-} cr_CbData;
-
-static int
-call_user_callback_if_package_finished(cr_Package *pkg, cr_CbData *cb_data, GError **err)
-{
- if (pkg && (pkg->loadingflags & CR_PACKAGE_LOADED_PRI) && (pkg->loadingflags & CR_PACKAGE_LOADED_OTH) &&
- (pkg->loadingflags & CR_PACKAGE_LOADED_FIL))
- {
- if (cb_data->in_progress_pkgs_hash) {
- g_hash_table_remove(cb_data->in_progress_pkgs_hash, pkg->pkgId);
- } else {
- //remove first element in the list
- cb_data->in_progress_pkgs_list = cb_data->in_progress_pkgs_list->next;
- }
-
- // One package was fully finished
- cb_data->in_progress_count_primary--;
- cb_data->in_progress_count_filelists--;
- cb_data->in_progress_count_other--;
-
- // call user package callback
- GError *tmp_err = NULL;
- if (cb_data->pkgcb && cb_data->pkgcb(pkg, cb_data->pkgcb_data, &tmp_err)) {
- if (tmp_err)
- g_propagate_prefixed_error(err, tmp_err, "Parsing interrupted: ");
- else
- g_set_error(err, ERR_DOMAIN, CRE_CBINTERRUPTED, "Parsing interrupted");
- return CR_CB_RET_ERR;
- } else {
- // If callback return CRE_OK but it simultaneously set
- // the tmp_err then it's a programming error.
- assert(tmp_err == NULL);
- };
- }
- return CR_CB_RET_OK;
-}
-
-static cr_Package*
-find_in_progress_pkg(cr_CbData *cb_data, const char *pkgId, int in_progress_pkg_index, GError **err)
-{
- gpointer pval = NULL;
- if (cb_data->in_progress_pkgs_hash) {
- if (!g_hash_table_lookup_extended(cb_data->in_progress_pkgs_hash, pkgId, NULL, &pval)) {
- pval = NULL;
- }
- } else {
- // This is checking out of order pkgs because if we don't have in_progress_pkgs_hash -> we enforce
- // order by using a list
- pval = g_slist_nth_data(cb_data->in_progress_pkgs_list, in_progress_pkg_index);
- if (pval && g_strcmp0(((cr_Package *) pval)->pkgId, pkgId)) {
- g_set_error(err, ERR_DOMAIN, CRE_XMLPARSER,
- "Out of order metadata: %s vs %s.", ((cr_Package *) pval)->pkgId, pkgId);
- pval = NULL;
- }
- }
-
- return pval;
-}
-
-static void
-store_in_progress_pkg(cr_CbData *cb_data, cr_Package *pkg, const char *pkgId)
-{
- if (!pkg) {
- return;
- }
- if (cb_data->in_progress_pkgs_hash) {
- g_hash_table_insert(cb_data->in_progress_pkgs_hash, g_strdup(pkgId), pkg);
- } else {
- cb_data->in_progress_pkgs_list = g_slist_append(cb_data->in_progress_pkgs_list, pkg);
- }
-}
-
-static int
-newpkgcb_primary(cr_Package **pkg,
- G_GNUC_UNUSED const char *pkgId,
- G_GNUC_UNUSED const char *name,
- G_GNUC_UNUSED const char *arch,
- G_GNUC_UNUSED void *cbdata,
- GError **err)
-{
- assert(pkg && *pkg == NULL);
- assert(!err || *err == NULL);
-
- // This callback is called when parsing of the opening element of a package
- // is done. However because the opening element doesn't contain pkgId
- // (instead it looks like: <package type="rpm">) we cannot check if we
- // already have this package.
- // The only option is to create a new package and after its fully
- // parsed (in pkgcb_primary) either use this package or copy its data
- // into an already existing one.
- // Filelists and other have pkgId present in the opening element so we can
- // avoid this overhead.
- *pkg = cr_package_new();
-
- return CR_CB_RET_OK;
-}
-
-static int
-newpkg_general(cr_Package **pkg,
- const char *pkgId,
- const char *name,
- const char *arch,
- void *cbdata,
- int in_progress_count,
- GError **err)
-{
- assert(pkg && *pkg == NULL);
- assert(!err || *err == NULL);
-
- cr_CbData *cb_data = cbdata;
-
- GError *out_of_order_err = NULL;
- *pkg = find_in_progress_pkg(cb_data, pkgId, in_progress_count, &out_of_order_err);
-
- if (!*pkg) {
- // we are handling never before seen package
-
- if (cb_data->newpkgcb) {
- // user specified their own new function: call it
- if (cb_data->newpkgcb(pkg, pkgId, name, arch, cb_data->newpkgcb_data, err)) {
- return CR_CB_RET_ERR;
- }
- if (!*pkg) {
- // when the user callback doesn't return a pkg we should skip it,
- // this means out of order error doesn't apply
- g_clear_error(&out_of_order_err);
- }
- } else {
- *pkg = cr_package_new();
- }
-
- store_in_progress_pkg(cb_data, *pkg, pkgId);
- }
-
- if (*err) {
- return CR_CB_RET_ERR;
- }
-
- if (out_of_order_err) {
- g_propagate_error(err, out_of_order_err);
- return CR_CB_RET_ERR;
- }
-
- return CR_CB_RET_OK;
-}
-
-static int
-newpkgcb_filelists(cr_Package **pkg,
- const char *pkgId,
- G_GNUC_UNUSED const char *name,
- G_GNUC_UNUSED const char *arch,
- void *cbdata,
- GError **err)
-{
- cr_CbData *cb_data = cbdata;
- return newpkg_general(pkg, pkgId, name, arch, cbdata, cb_data->in_progress_count_filelists, err);
-}
-
-static int
-newpkgcb_other(cr_Package **pkg,
- const char *pkgId,
- G_GNUC_UNUSED const char *name,
- G_GNUC_UNUSED const char *arch,
- void *cbdata,
- GError **err)
-{
- cr_CbData *cb_data = cbdata;
- return newpkg_general(pkg, pkgId, name, arch, cbdata, cb_data->in_progress_count_other, err);
-}
-
-static int
-pkgcb_filelists(cr_Package *pkg, void *cbdata, G_GNUC_UNUSED GError **err)
-{
- cr_CbData *cb_data = cbdata;
- cb_data->in_progress_count_filelists++;
- pkg->loadingflags |= CR_PACKAGE_LOADED_FIL;
- return call_user_callback_if_package_finished(pkg, cb_data, err);
-}
-
-static int
-pkgcb_other(cr_Package *pkg, void *cbdata, G_GNUC_UNUSED GError **err)
-{
- cr_CbData *cb_data = cbdata;
- cb_data->in_progress_count_other++;
- pkg->loadingflags |= CR_PACKAGE_LOADED_OTH;
- return call_user_callback_if_package_finished(pkg, cb_data, err);
-}
-
-static int
-pkgcb_primary(cr_Package *pkg, void *cbdata, G_GNUC_UNUSED GError **err)
-{
- cr_CbData *cb_data = cbdata;
-
- GError *out_of_order_err = NULL;
- cr_Package *in_progress_pkg = find_in_progress_pkg(cb_data, pkg->pkgId, cb_data->in_progress_count_primary,
- &out_of_order_err);
- if (in_progress_pkg) {
- // package was already encountered in some other metadata type
-
- cr_package_copy_into(pkg, in_progress_pkg);
- cr_package_free(pkg);
- pkg = in_progress_pkg;
- } else {
- // we are handling never before seen package
-
- if (cb_data->newpkgcb) {
- // user specified their own new function: call it and copy package data into user_created_pkg
- cr_Package *user_created_pkg = NULL;
- if (cb_data->newpkgcb(&user_created_pkg, pkg->pkgId, pkg->name, pkg->arch, cb_data->newpkgcb_data, err)) {
- return CR_CB_RET_ERR;
- } else {
- if (user_created_pkg) {
- cr_package_copy_into(pkg, user_created_pkg);
- }
- // user_created_pkg can be NULL if newpkgcb returns OK but
- // not an allocated pkg -> this means we should skip it
- store_in_progress_pkg(cb_data, user_created_pkg, pkg->pkgId);
- cr_package_free(pkg);
- pkg = user_created_pkg;
- }
- if (!pkg) {
- // when the user callback doesn't return a pkg we should skip it,
- // this means out of order error doesn't apply
- g_clear_error(&out_of_order_err);
- }
- } else {
- store_in_progress_pkg(cb_data, pkg, pkg->pkgId);
- }
-
- }
-
- if (*err) {
- return CR_CB_RET_ERR;
- }
-
- if (out_of_order_err) {
- g_propagate_error(err, out_of_order_err);
- return CR_CB_RET_ERR;
- }
-
-
- if (pkg) {
- cb_data->in_progress_count_primary++;
- pkg->loadingflags |= CR_PACKAGE_LOADED_PRI;
- pkg->loadingflags |= CR_PACKAGE_FROM_XML;
- }
-
- return call_user_callback_if_package_finished(pkg, cb_data, err);
-}
-
-static gboolean
-parse_next_section(CR_FILE *target_file, const char *path, cr_ParserData *pd, GError **err)
-{
- char buf[XML_BUFFER_SIZE];
- GError *tmp_err = NULL;
- int parsed_len = cr_read(target_file, buf, XML_BUFFER_SIZE, &tmp_err);
- if (tmp_err) {
- g_critical("%s: Error while reading xml '%s': %s", __func__, path, tmp_err->message);
- g_propagate_prefixed_error(err, tmp_err, "Read error: ");
- return FALSE;
- }
- int done = parsed_len == 0;
- if (xmlParseChunk(pd->parser, buf, parsed_len, done)) {
- xmlErrorPtr xml_err = xmlCtxtGetLastError(pd->parser);
- g_critical("%s: parsing error '%s': %s", __func__, path,
- (xml_err) ? xml_err->message : "UNKNOWN_ERROR");
- g_set_error(err, ERR_DOMAIN, CRE_XMLPARSER,
- "Parse error '%s' at line: %d (%s)",
- path,
- (xml_err) ? (int) xml_err->line : 0,
- (xml_err) ? (char *) xml_err->message : "UNKNOWN_ERROR");
- return FALSE;
- }
-
- if (pd->err) {
- g_propagate_error(err, pd->err);
- return FALSE;
- }
-
- return done;
-}
-
-//TODO(amatej): there is quite some overlap with this and cr_load_xml_files,
-// we could use this api and just wrap it in cr_loax_xml_files?
-int cr_xml_parse_main_metadata_together(const char *primary_path,
- const char *filelists_path,
- const char *other_path,
- cr_XmlParserNewPkgCb newpkgcb,
- void *newpkgcb_data,
- cr_XmlParserPkgCb pkgcb,
- void *pkgcb_data,
- cr_XmlParserWarningCb warningcb,
- void *warningcb_data,
- gboolean allow_out_of_order,
- GError **err)
-{
- int ret = CRE_OK;
- CR_FILE *primary_f = NULL;
- CR_FILE *filelists_f = NULL;
- CR_FILE *other_f = NULL;
- GError *tmp_err = NULL;
-
- cr_CbData cbdata;
- cbdata.in_progress_pkgs_list = NULL;
- cbdata.in_progress_pkgs_hash = NULL;
- cbdata.newpkgcb = newpkgcb;
- cbdata.newpkgcb_data = newpkgcb_data;
- cbdata.pkgcb = pkgcb;
- cbdata.pkgcb_data = pkgcb_data;
-
- if (allow_out_of_order) {
- cbdata.in_progress_pkgs_hash = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
- }
-
- assert(primary_path);
- assert(filelists_path);
- assert(other_path);
- assert(newpkgcb || pkgcb);
- assert(!err || *err == NULL);
-
- cr_ParserData *primary_pd = NULL;
- cr_ParserData *filelists_pd = NULL;
- cr_ParserData *other_pd = NULL;
-
- primary_f = cr_open(primary_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
- if (tmp_err) {
- ret = tmp_err->code;
- g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", primary_path);
- g_clear_error(&tmp_err);
- goto out;
- }
- filelists_f = cr_open(filelists_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
- if (tmp_err) {
- ret = tmp_err->code;
- g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", filelists_path);
- g_clear_error(&tmp_err);
- goto out;
- }
- other_f = cr_open(other_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
- if (tmp_err) {
- ret = tmp_err->code;
- g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", other_path);
- g_clear_error(&tmp_err);
- goto out;
- }
-
- //TODO(amatej): In the future we could make filelists/other optional if there is a need for it. That would mean we
- // should replace the last 0 in primary_parser_data_new depending on whether we have filelists or not.
- primary_pd = primary_parser_data_new(newpkgcb_primary, &cbdata, pkgcb_primary, &cbdata, warningcb, warningcb_data, 0);
- filelists_pd = filelists_parser_data_new(newpkgcb_filelists, &cbdata, pkgcb_filelists, &cbdata, warningcb, warningcb_data);
- other_pd = other_parser_data_new(newpkgcb_other, &cbdata, pkgcb_other, &cbdata, warningcb, warningcb_data);
-
- gboolean primary_is_done = 0;
- gboolean filelists_is_done = 0;
- gboolean other_is_done = 0;
- cbdata.in_progress_count_primary = 0;
- cbdata.in_progress_count_filelists = 0;
- cbdata.in_progress_count_other = 0;
- while (!primary_is_done || !filelists_is_done || !other_is_done) {
- while ((cbdata.in_progress_count_primary <= cbdata.in_progress_count_filelists ||
- cbdata.in_progress_count_primary <= cbdata.in_progress_count_other) &&
- !primary_is_done)
- {
- primary_is_done = parse_next_section(primary_f, primary_path, primary_pd, err);
- if (*err) {
- ret = (*err)->code;
- goto out;
- }
-
- }
-
- while ((cbdata.in_progress_count_filelists <= cbdata.in_progress_count_primary ||
- cbdata.in_progress_count_filelists <= cbdata.in_progress_count_other) &&
- !filelists_is_done)
- {
- filelists_is_done = parse_next_section(filelists_f, filelists_path, filelists_pd, err);
- if (*err) {
- ret = (*err)->code;
- goto out;
- }
- }
-
- while ((cbdata.in_progress_count_other <= cbdata.in_progress_count_filelists ||
- cbdata.in_progress_count_other <= cbdata.in_progress_count_primary) &&
- !other_is_done)
- {
- other_is_done = parse_next_section(other_f, other_path, other_pd, err);
- if (*err) {
- ret = (*err)->code;
- goto out;
- }
- }
- }
-
-out:
- if (ret != CRE_OK) {
- // An error already encountered
- // just close the file without error checking
- cr_close(primary_f, NULL);
- cr_close(filelists_f, NULL);
- cr_close(other_f, NULL);
- } else {
- // No error encountered yet
- cr_close(primary_f, &tmp_err);
- if (!tmp_err)
- cr_close(filelists_f, &tmp_err);
- if (!tmp_err)
- cr_close(other_f, &tmp_err);
- if (tmp_err) {
- ret = tmp_err->code;
- g_propagate_prefixed_error(err, tmp_err, "Error while closing: ");
- }
- }
-
- cr_xml_parser_data_free(primary_pd);
- cr_xml_parser_data_free(filelists_pd);
- cr_xml_parser_data_free(other_pd);
-
- if (allow_out_of_order) {
- g_hash_table_destroy(cbdata.in_progress_pkgs_hash);
- } else {
- cr_slist_free_full(cbdata.in_progress_pkgs_list, (GDestroyNotify) cr_package_free);
- }
-
- return ret;
-}
--
2.34.3