Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split CPE file to reduce sync memory usage (9.0) #901

Merged
merged 9 commits into from
Dec 12, 2019
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Try importing private keys with libssh if GnuTLS fails [#841](https://github.com/greenbone/gvmd/pull/841)
- Allow resuming OSPd-based OpenVAS tasks [#869](https://github.com/greenbone/gvmd/pull/869)
- Require Postgres 9.6 as a minimum [#872](https://github.com/greenbone/gvmd/pull/872)
- Speed up the SCAP sync [#875](https://github.com/greenbone/gvmd/pull/875) [#877](https://github.com/greenbone/gvmd/pull/877) [#879](https://github.com/greenbone/gvmd/pull/879) [#881](https://github.com/greenbone/gvmd/pull/881) [#883](https://github.com/greenbone/gvmd/pull/883) [#887](https://github.com/greenbone/gvmd/pull/887) [#889](https://github.com/greenbone/gvmd/pull/889) [#890](https://github.com/greenbone/gvmd/pull/890) [#891](https://github.com/greenbone/gvmd/pull/891)
- Speed up the SCAP sync [#875](https://github.com/greenbone/gvmd/pull/875) [#877](https://github.com/greenbone/gvmd/pull/877) [#879](https://github.com/greenbone/gvmd/pull/879) [#881](https://github.com/greenbone/gvmd/pull/881) [#883](https://github.com/greenbone/gvmd/pull/883) [#887](https://github.com/greenbone/gvmd/pull/887) [#889](https://github.com/greenbone/gvmd/pull/889) [#890](https://github.com/greenbone/gvmd/pull/890) [#891](https://github.com/greenbone/gvmd/pull/891) [#901](https://github.com/greenbone/gvmd/pull/901)
- Change rows of built-in default filters to -2 (use "Rows Per Page" setting) [#896](https://github.com/greenbone/gvmd/pull/896)
- Force NVT update in migrate_219_to_220 [#895](https://github.com/greenbone/gvmd/pull/895)

Expand Down
1 change: 1 addition & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Prerequisites:
* PostgreSQL database >= 9.6
* pkg-config
* libical >= 1.0.0
* xml_split (optional, lowers sync memory usage, Debian package: xml-twig-tools)

Prerequisites for certificate generation:
* GnuTLS certtool
Expand Down
267 changes: 227 additions & 40 deletions src/manage_sql_secinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,127 @@ increment_transaction_size (int* current_size)
}
}

/**
* @brief Split a file.
*
* @param[in] path Path to file.
* @param[in] size Approx size of split files. In same format that
* xml_split accepts, eg "200Kb".
* @param[in] tail Text to replace last line of split files.
*
* @return Temp dir holding split files.
*/
static const gchar *
split_xml_file (gchar *path, const gchar *size, const gchar *tail)
{
int ret;
static gchar dir[] = "/tmp/gvmd-split-xml-file-XXXXXX";
gchar *previous_dir, *command;

if (mkdtemp (dir) == NULL)
{
g_warning ("%s: Failed to make temp dir: %s",
__func__,
strerror (errno));
return NULL;
}

previous_dir = getcwd (NULL, 0);
if (previous_dir == NULL)
{
g_warning ("%s: Failed to getcwd: %s",
__func__,
strerror (errno));
return NULL;
}

if (chdir (dir))
{
g_warning ("%s: Failed to chdir: %s",
__func__,
strerror (errno));
g_free (previous_dir);
return NULL;
}

if (gvm_file_copy (path, "split.xml") == FALSE)
{
g_free (previous_dir);
return NULL;
}

timopollmeier marked this conversation as resolved.
Show resolved Hide resolved
/* xml_split will chop split.xml into files that are roughly 'size' big.
*
* The generated files are always put in the directory that holds
* split.xml, as follows:
*
* split.xml Source XML.
* split-00.xml Master generated XML. No content, just includes other
* files. The include statements are wrapped in the
* root element from split.xml.
* split-01.xml Generated XML content. Wrapped in an <xml_split:root>
* element.
* split-02.xml Second generated content file.
* ...
* split-112.xml Last content, for example.
*
* Parsing the generated files independently will only work if the files
* contain the original root element (for example, because the parser
* requires the namespace definitions to be present).
*
* So the command below needs to mess around a little bit to replace the
* wrapper XML element in split-01.xml, split-02.xml, etc with the root
* element from split-00.xml.
*
* Using tail and head is not super robust, but it's simple and it will
* work as long as xml_split keeps the opening of the wrapper element
* in split-00.xml on a dedicated line. (It doesn't do this for the
* closing element, so we use the tail argument instead.)
*/

command = g_strdup_printf
("xml_split -s%s split.xml"
timopollmeier marked this conversation as resolved.
Show resolved Hide resolved
" && head -n 2 split-00.xml > head.xml"
" && echo '%s' > tail.xml"
" && for F in split-*.xml; do"
" tail -n +3 $F"
" | head -n -1"
" | cat head.xml - tail.xml"
" > new.xml;"
" mv new.xml $F;"
" done",
size,
tail);

g_debug ("%s: command: %s", __func__, command);
ret = system (command);
if ((ret == -1) || WEXITSTATUS (ret))
{
g_warning ("%s: system failed with ret %i, %i, %s",
__func__,
ret,
WEXITSTATUS (ret),
command);
g_free (command);
g_free (previous_dir);

if (chdir (previous_dir))
g_warning ("%s: and failed to chdir back", __func__);

return NULL;
}

g_free (command);

if (chdir (previous_dir))
g_warning ("%s: Failed to chdir back (will continue anyway)",
__func__);

g_free (previous_dir);

return dir;
}


/* Helper: buffer structure for INSERTs. */

Expand Down Expand Up @@ -2259,59 +2380,33 @@ insert_scap_cpe (inserts_t *inserts, element_t cpe_item, element_t item_metadata
}

/**
* @brief Update SCAP CPEs.
* @brief Update SCAP CPEs from a file.
*
* @param[in] last_scap_update Time of last SCAP update.
* @param[in] path Path to file.
* @param[in] last_cve_update Time of last CVE update.
*
* @return 0 nothing to do, 1 updated, -1 error.
*/
static int
update_scap_cpes (int last_scap_update)
update_scap_cpes_from_file (const gchar *path, int last_cve_update)
{
GError *error;
element_t element, cpe_list, cpe_item;
gchar *xml, *full_path;
gchar *xml;
gsize xml_len;
GStatBuf state;
int updated_scap_cpes, last_cve_update;
int updated_scap_cpes;
inserts_t inserts;

updated_scap_cpes = 0;
full_path = g_build_filename (GVM_SCAP_DATA_DIR,
"official-cpe-dictionary_v2.2.xml",
NULL);

if (g_stat (full_path, &state))
{
g_warning ("%s: No CPE dictionary found at %s",
__FUNCTION__,
strerror (errno));
return -1;
}

if ((state.st_mtime - (state.st_mtime % 60)) <= last_scap_update)
{
g_info ("Skipping CPEs, file is older than last revision"
" (this is not an error)");
g_free (full_path);
return -1;
}

g_info ("Updating CPEs");

/* This will be zero for an empty db, so everything will be added. */
last_cve_update = sql_int ("SELECT max (modification_time)"
" FROM scap.cves;");
g_debug ("%s: parsing %s", __func__, path);

g_debug ("%s: parsing %s", __FUNCTION__, full_path);
updated_scap_cpes = 0;

error = NULL;
g_file_get_contents (full_path, &xml, &xml_len, &error);
g_free (full_path);
g_file_get_contents (path, &xml, &xml_len, &error);
if (error)
{
g_warning ("%s: Failed to get contents: %s",
__FUNCTION__,
__func__,
error->message);
g_error_free (error);
return -1;
Expand All @@ -2320,7 +2415,7 @@ update_scap_cpes (int last_scap_update)
if (parse_element (xml, &element))
{
g_free (xml);
g_warning ("%s: Failed to parse element", __FUNCTION__);
g_warning ("%s: Failed to parse element", __func__);
return -1;
}
g_free (xml);
Expand All @@ -2329,7 +2424,7 @@ update_scap_cpes (int last_scap_update)
if (strcmp (element_name (cpe_list), "cpe-list"))
{
element_free (element);
g_warning ("%s: CPE dictionary missing CPE-LIST", __FUNCTION__);
g_warning ("%s: CPE dictionary missing CPE-LIST", __func__);
return -1;
}

Expand Down Expand Up @@ -2366,15 +2461,15 @@ update_scap_cpes (int last_scap_update)
item_metadata = element_child (cpe_item, "meta:item-metadata");
if (item_metadata == NULL)
{
g_warning ("%s: item-metadata missing", __FUNCTION__);
g_warning ("%s: item-metadata missing", __func__);
goto fail;
}

modification_date = element_attribute (item_metadata,
"modification-date");
if (modification_date == NULL)
{
g_warning ("%s: modification-date missing", __FUNCTION__);
g_warning ("%s: modification-date missing", __func__);
goto fail;
}

Expand Down Expand Up @@ -2407,6 +2502,91 @@ update_scap_cpes (int last_scap_update)
return -1;
}

/**
* @brief Update SCAP CPEs.
*
* @param[in] last_scap_update Time of last SCAP update.
*
* @return 0 nothing to do, 1 updated, -1 error.
*/
static int
update_scap_cpes (int last_scap_update)
{
gchar *full_path;
const gchar *split_dir;
GStatBuf state;
int updated_scap_cpes, last_cve_update, index;

updated_scap_cpes = 0;
full_path = g_build_filename (GVM_SCAP_DATA_DIR,
"official-cpe-dictionary_v2.2.xml",
NULL);

if (g_stat (full_path, &state))
{
g_warning ("%s: No CPE dictionary found at %s",
__func__,
strerror (errno));
return -1;
}

if ((state.st_mtime - (state.st_mtime % 60)) <= last_scap_update)
{
g_info ("Skipping CPEs, file is older than last revision"
" (this is not an error)");
g_free (full_path);
return -1;
}

g_info ("Updating CPEs");

/* This will be zero for an empty db, so everything will be added. */
last_cve_update = sql_int ("SELECT max (modification_time)"
" FROM scap.cves;");

split_dir = split_xml_file (full_path, "40Mb", "</cpe-list>");
if (split_dir == NULL)
{
g_warning ("%s: Failed to split CPEs, attempting with full file",
__func__);
updated_scap_cpes = update_scap_cpes_from_file (full_path,
last_cve_update);
g_free (full_path);
return updated_scap_cpes;
}
g_free (full_path);

for (index = 1; 1; index++)
{
int ret;
gchar *path, *name;

name = g_strdup_printf ("split-%02i.xml", index);
path = g_build_filename (split_dir, name, NULL);
g_free (name);

if (g_stat (path, &state))
{
g_free (path);
break;
}

ret = update_scap_cpes_from_file (path, last_cve_update);
g_free (path);
if (ret < 0)
{
gvm_file_remove_recurse (split_dir);
return -1;
}
if (ret)
updated_scap_cpes = 1;
}

gvm_file_remove_recurse (split_dir);

return updated_scap_cpes;
}


/* SCAP update: CVEs. */

Expand Down Expand Up @@ -4839,6 +5019,7 @@ sync_scap (int lockfile)
g_info ("%s: Updating data from feed", __FUNCTION__);

g_debug ("%s: update cpes", __FUNCTION__);
proctitle_set ("gvmd: Syncing SCAP: Updating CPEs");

updated_scap_cpes = update_scap_cpes (last_scap_update);
if (updated_scap_cpes == -1)
Expand All @@ -4848,6 +5029,7 @@ sync_scap (int lockfile)
}

g_debug ("%s: update cves", __FUNCTION__);
proctitle_set ("gvmd: Syncing SCAP: Updating CVEs");

updated_scap_cves = update_scap_cves (last_scap_update);
if (updated_scap_cves == -1)
Expand All @@ -4857,6 +5039,7 @@ sync_scap (int lockfile)
}

g_debug ("%s: update ovaldefs", __FUNCTION__);
proctitle_set ("gvmd: Syncing SCAP: Updating OVALdefs");

updated_scap_ovaldefs = update_scap_ovaldefs (last_scap_update,
0 /* Feed data. */);
Expand All @@ -4867,6 +5050,7 @@ sync_scap (int lockfile)
}

g_debug ("%s: updating user defined data", __FUNCTION__);
proctitle_set ("gvmd: Syncing SCAP: Updating private OVALdefs");

switch (update_scap_ovaldefs (last_scap_update,
1 /* Private data. */))
Expand All @@ -4882,11 +5066,13 @@ sync_scap (int lockfile)
}

g_debug ("%s: update max cvss", __FUNCTION__);
proctitle_set ("gvmd: Syncing SCAP: Updating max CVSS");

update_scap_cvss (updated_scap_cves, updated_scap_cpes,
updated_scap_ovaldefs);

g_debug ("%s: update placeholders", __FUNCTION__);
proctitle_set ("gvmd: Syncing SCAP: Updating placeholders");

update_scap_placeholders (updated_scap_cves);

Expand All @@ -4899,6 +5085,7 @@ sync_scap (int lockfile)
}

g_info ("%s: Updating SCAP info succeeded", __FUNCTION__);
proctitle_set ("gvmd: Syncing SCAP: done");

manage_update_scap_db_cleanup ();

Expand Down