Skip to content

Commit

Permalink
Allow incremental parsing without filelists or other xml
Browse files Browse the repository at this point in the history
Supplying a NULL value as a filelists or other "path" argument ought to
work.

closes #423
  • Loading branch information
dralley committed Nov 11, 2024
1 parent ac62420 commit 47dc726
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 43 deletions.
4 changes: 2 additions & 2 deletions src/python/createrepo_c/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,8 @@ def _warningcb(warning_type, message):
metadata_files = {record.type: record for record in repomd.records}
parser = RepositoryReader()
parser._primary_xml_path = os.path.join(path, metadata_files["primary"].location_href)
parser._filelists_xml_path = os.path.join(path, metadata_files["filelists"].location_href)
parser._other_xml_path = os.path.join(path, metadata_files["other"].location_href)
parser._filelists_xml_path = os.path.join(path, metadata_files["filelists"].location_href) if metadata_files.get("filelists") else None
parser._other_xml_path = os.path.join(path, metadata_files["other"].location_href) if metadata_files.get("other") else None

if metadata_files.get("updateinfo"):
parser._updateinfo_path = os.path.join(path, metadata_files["updateinfo"].location_href)
Expand Down
6 changes: 3 additions & 3 deletions src/python/xml_parser-py.c
Original file line number Diff line number Diff line change
Expand Up @@ -780,14 +780,14 @@ pkg_iterator_init(_PkgIteratorObject *self, PyObject *args, PyObject *kwargs)
static char *kwlist[] = {"primary", "filelists", "other", "newpkgcb",
"warningcb", NULL};

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sssOO:pkg_iterator_init", kwlist,
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "szzOO:pkg_iterator_init", kwlist,
&primary_path, &filelists_path, &other_path, &py_newpkgcb,
&py_warningcb)) {
return -1;
}

if (!primary_path || !filelists_path || !other_path) {
PyErr_SetString(PyExc_TypeError, "file paths must be provided");
if (!primary_path) {
PyErr_SetString(PyExc_TypeError, "primary file path must be provided");
return -1;
}

Expand Down
82 changes: 44 additions & 38 deletions src/xml_parser_main_metadata_together.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,16 @@
#define ERR_DOMAIN CREATEREPO_C_ERROR

typedef struct {
GSList *in_progress_pkgs_list;
int in_progress_count_primary;
int in_progress_count_filelists;
int in_progress_count_other;
GQueue *finished_pkgs_queue;
cr_XmlParserNewPkgCb newpkgcb; // newpkgcb passed in from user
void *newpkgcb_data;// newpkgcb data passed in from user
cr_XmlParserPkgCb pkgcb; // pkgcb passed in from user
void *pkgcb_data; // pkgcb data passed in from user
GSList *in_progress_pkgs_list;
int in_progress_count_primary;
int in_progress_count_filelists;
int in_progress_count_other;
GQueue *finished_pkgs_queue;
cr_XmlParserNewPkgCb newpkgcb; // newpkgcb passed in from user
void *newpkgcb_data;// newpkgcb data passed in from user
cr_XmlParserPkgCb pkgcb; // pkgcb passed in from user
void *pkgcb_data; // pkgcb data passed in from user
cr_PackageLoadingFlags loadingflags; // which callbacks need to be called before the package is considered "finished" loading
} cr_CbData;

struct _cr_PkgIterator {
Expand Down Expand Up @@ -67,17 +68,19 @@ struct _cr_PkgIterator {
static int
queue_package_if_finished(cr_Package *pkg, cr_CbData *cb_data)
{
if (pkg && (pkg->loadingflags & CR_PACKAGE_LOADED_PRI) && (pkg->loadingflags & CR_PACKAGE_LOADED_OTH) &&
(pkg->loadingflags & CR_PACKAGE_LOADED_FIL))
if (pkg && ((pkg->loadingflags & cb_data->loadingflags) == cb_data->loadingflags))
{
//remove first element in the list
cb_data->in_progress_pkgs_list = g_slist_delete_link(cb_data->in_progress_pkgs_list, cb_data->in_progress_pkgs_list);

// One package was fully finished
cb_data->in_progress_count_primary--;
cb_data->in_progress_count_filelists--;
cb_data->in_progress_count_other--;

if (cb_data->loadingflags & CR_PACKAGE_LOADED_FIL) {
cb_data->in_progress_count_filelists--;
}
if (cb_data->loadingflags & CR_PACKAGE_LOADED_OTH) {
cb_data->in_progress_count_other--;
}
g_queue_push_tail(cb_data->finished_pkgs_queue, pkg);
}
return CR_CB_RET_OK;
Expand Down Expand Up @@ -319,8 +322,6 @@ parse_next_section(CR_FILE *target_file, const char *path, cr_ParserData *pd, GE

//TODO(amatej): there is quite some overlap with this and cr_load_xml_files,
// consider using this api to implement cr_load_xml_files?
// TODO: maybe whether or not individual files are parsed could be controlled by NULL paths? I think cr_load_xml_files
// already works that way.
cr_PkgIterator *
cr_PkgIterator_new(const char *primary_path,
const char *filelists_path,
Expand All @@ -332,8 +333,6 @@ cr_PkgIterator_new(const char *primary_path,
GError **err)
{
assert(primary_path);
assert(filelists_path);
assert(other_path);
assert(!err || *err == NULL);

cr_PkgIterator* new_iter = g_new0(cr_PkgIterator, 1);
Expand Down Expand Up @@ -370,30 +369,41 @@ cr_PkgIterator_new(const char *primary_path,
cbdata->newpkgcb_data = newpkgcb_data;

new_iter->tmp_err = NULL;
int parse_files_in_primary = 0;

GError* tmp_err = new_iter->tmp_err;
new_iter->primary_f = cr_open(primary_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
cbdata->loadingflags |= CR_PACKAGE_LOADED_PRI;
if (tmp_err) {
g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", primary_path);
cr_PkgIterator_free(new_iter, err);
return NULL;
}
new_iter->filelists_f = cr_open(filelists_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
if (tmp_err) {
g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", filelists_path);
cr_PkgIterator_free(new_iter, err);
return NULL;
if (new_iter->filelists_path) {
new_iter->filelists_f = cr_open(filelists_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
cbdata->loadingflags |= CR_PACKAGE_LOADED_FIL;
if (tmp_err) {
g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", filelists_path);
cr_PkgIterator_free(new_iter, err);
return NULL;
}
} else {
new_iter->filelists_is_done = 1;
parse_files_in_primary = 1;
}
new_iter->other_f = cr_open(other_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
if (tmp_err) {
g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", other_path);
cr_PkgIterator_free(new_iter, err);
return NULL;
if (new_iter->other_path) {
new_iter->other_f = cr_open(other_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err);
cbdata->loadingflags |= CR_PACKAGE_LOADED_OTH;
if (tmp_err) {
g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", other_path);
cr_PkgIterator_free(new_iter, err);
return NULL;
}
} else {
new_iter->other_is_done = 1;
}

//TODO(amatej): In the future we could make filelists/other optional if there is a need for it. That would mean we
// should replace the last 0 in primary_parser_data_new depending on whether we have filelists or not.
new_iter->primary_pd = primary_parser_data_new(newpkgcb_primary, cbdata, pkgcb_primary, cbdata, warningcb, warningcb_data, 0);
new_iter->primary_pd = primary_parser_data_new(newpkgcb_primary, cbdata, pkgcb_primary, cbdata, warningcb, warningcb_data, parse_files_in_primary);
new_iter->filelists_pd = filelists_parser_data_new(newpkgcb_filelists, cbdata, pkgcb_filelists, cbdata, warningcb, warningcb_data);
new_iter->other_pd = other_parser_data_new(newpkgcb_other, cbdata, pkgcb_other, cbdata, warningcb, warningcb_data);
return new_iter;
Expand All @@ -404,18 +414,15 @@ cr_PkgIterator_parse_next(cr_PkgIterator *iter, GError **err) {
cr_CbData *cbdata = (cr_CbData*) iter->cbdata;

while (!cr_PkgIterator_is_finished(iter) && g_queue_is_empty(cbdata->finished_pkgs_queue)) {
while ((cbdata->in_progress_count_primary <= cbdata->in_progress_count_filelists ||
cbdata->in_progress_count_primary <= cbdata->in_progress_count_other) &&
!iter->primary_is_done)
while (!iter->primary_is_done)
{
iter->primary_is_done = parse_next_section(iter->primary_f, iter->primary_path, iter->primary_pd, err);
if (*err) {
return NULL;
}
}

while ((cbdata->in_progress_count_filelists <= cbdata->in_progress_count_primary ||
cbdata->in_progress_count_filelists <= cbdata->in_progress_count_other) &&
while (cbdata->in_progress_count_filelists <= cbdata->in_progress_count_primary &&
!iter->filelists_is_done)
{
iter->filelists_is_done = parse_next_section(iter->filelists_f, iter->filelists_path, iter->filelists_pd, err);
Expand All @@ -424,8 +431,7 @@ cr_PkgIterator_parse_next(cr_PkgIterator *iter, GError **err) {
}
}

while ((cbdata->in_progress_count_other <= cbdata->in_progress_count_filelists ||
cbdata->in_progress_count_other <= cbdata->in_progress_count_primary) &&
while (cbdata->in_progress_count_other <= cbdata->in_progress_count_primary &&
!iter->other_is_done)
{
iter->other_is_done = parse_next_section(iter->other_f, iter->other_path, iter->other_pd, err);
Expand Down
68 changes: 68 additions & 0 deletions tests/python/tests/test_xml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,74 @@ def warningcb(warn_type, msg):
self.assertRaises(StopIteration, next, package_iterator)
self.assertTrue(package_iterator.is_finished())

def test_xml_parser_pkg_iterator_repo02_only_primary(self):
warnings = []
def warningcb(warn_type, msg):
warnings.append((warn_type, msg))

package_iterator = cr.PackageIterator(
primary_path=REPO_02_PRIXML, filelists_path=None, other_path=None,
warningcb=warningcb,
)
packages = list(package_iterator)

self.assertEqual(len(packages), 2)
self.assertEqual(packages[0].name, "fake_bash")
self.assertEqual(packages[0].files, [(None, '/usr/bin/', 'fake_bash')]) # the files still get parsed - only from primary.xml
self.assertEqual(packages[0].changelogs, [])
self.assertListEqual(warnings, [])
self.assertRaises(StopIteration, next, package_iterator)
self.assertTrue(package_iterator.is_finished())


def test_xml_parser_pkg_iterator_repo02_no_primary(self):
def test():
cr.PackageIterator(
primary_path=None, filelists_path=REPO_02_FILXML, other_path=REPO_02_OTHXML,
)

self.assertRaises(TypeError, test)

def test_xml_parser_pkg_iterator_repo02_no_filelists(self):
warnings = []
def warningcb(warn_type, msg):
warnings.append((warn_type, msg))

package_iterator = cr.PackageIterator(
primary_path=REPO_02_PRIXML, filelists_path=None, other_path=REPO_02_OTHXML,
warningcb=warningcb,
)
packages = list(package_iterator)

self.assertEqual(len(packages), 2)
self.assertEqual(packages[0].name, "fake_bash")
self.assertEqual(packages[0].changelogs, [
('Tomas Mlcoch <[email protected]> - 1.1.1-1', 1334664000, '- First release'),
])
self.assertEqual(packages[0].files, [(None, '/usr/bin/', 'fake_bash')]) # the files still get parsed - only from primary.xml
self.assertListEqual(warnings, [])
self.assertRaises(StopIteration, next, package_iterator)
self.assertTrue(package_iterator.is_finished())

def test_xml_parser_pkg_iterator_repo02_no_other(self):
warnings = []
def warningcb(warn_type, msg):
warnings.append((warn_type, msg))

package_iterator = cr.PackageIterator(
primary_path=REPO_02_PRIXML, filelists_path=REPO_02_FILXML, other_path=None,
warningcb=warningcb,
)
packages = list(package_iterator)

self.assertEqual(len(packages), 2)
self.assertEqual(packages[0].name, "fake_bash")
self.assertEqual(packages[0].changelogs, [])
self.assertEqual(packages[0].files, [(None, '/usr/bin/', 'fake_bash')])
self.assertListEqual(warnings, [])
self.assertRaises(StopIteration, next, package_iterator)
self.assertTrue(package_iterator.is_finished())

def test_xml_parser_pkg_iterator_repo02_newpkgcb_as_filter(self):
def newpkgcb(pkgId, name, arch):
if name in {"fake_bash"}:
Expand Down
50 changes: 50 additions & 0 deletions tests/test_xml_parser_main_metadata_together.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,50 @@ test_cr_xml_package_iterator_00(void)
g_assert_cmpint(parsed, ==, 2);
}

static void
test_cr_xml_package_iterator_00_no_filelists(void)
{
int parsed = 0;
GError *tmp_err = NULL;
cr_Package *package = NULL;

cr_PkgIterator *pkg_iterator = cr_PkgIterator_new(
TEST_REPO_02_PRIMARY, NULL, TEST_REPO_02_OTHER, NULL, NULL, NULL, NULL, &tmp_err);

while ((package = cr_PkgIterator_parse_next(pkg_iterator, &tmp_err))) {
parsed++;
cr_package_free(package);
}

g_assert(cr_PkgIterator_is_finished(pkg_iterator));
cr_PkgIterator_free(pkg_iterator, &tmp_err);

g_assert(tmp_err == NULL);
g_assert_cmpint(parsed, ==, 2);
}

static void
test_cr_xml_package_iterator_00_no_other(void)
{
int parsed = 0;
GError *tmp_err = NULL;
cr_Package *package = NULL;

cr_PkgIterator *pkg_iterator = cr_PkgIterator_new(
TEST_REPO_02_PRIMARY, TEST_REPO_02_FILELISTS, NULL, NULL, NULL, NULL, NULL, &tmp_err);

while ((package = cr_PkgIterator_parse_next(pkg_iterator, &tmp_err))) {
parsed++;
cr_package_free(package);
}

g_assert(cr_PkgIterator_is_finished(pkg_iterator));
cr_PkgIterator_free(pkg_iterator, &tmp_err);

g_assert(tmp_err == NULL);
g_assert_cmpint(parsed, ==, 2);
}

static void
test_cr_xml_package_iterator_filelists_ext_00(void)
{
Expand Down Expand Up @@ -320,6 +364,12 @@ main(int argc, char *argv[])
g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_00",
test_cr_xml_package_iterator_00);

g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_00_no_filelists",
test_cr_xml_package_iterator_00_no_filelists);

g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_00_no_other",
test_cr_xml_package_iterator_00_no_other);

g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_filelists_ext_00",
test_cr_xml_package_iterator_filelists_ext_00);

Expand Down

0 comments on commit 47dc726

Please sign in to comment.