From 26d24c788ba0e0f67f2bb4f502c25b3d080b928e Mon Sep 17 00:00:00 2001 From: Tom Scogland Date: Tue, 6 Aug 2024 10:46:21 -0700 Subject: [PATCH 1/2] libhostlist: increase performance of hostlist_find() problem: hostrange_hn_within requires an allocated hostname object, which must be re-created for every query. This is very expensive when the queries are frequent or repeated, especially in hostlist_find_host solution: add a new function hostrange_str_within that uses the exact same logic without building a newly allocated hostname object Use the existing string lengths in more places, and instead of re-parsing the suffix find the power of 10 matching the length of the string, and mod by that to chop off the extra digit in the recursive case Finally, add an interface to do all the prep work and return an opaque handle to the hostname object hostlist uses internally so a caller can re-use a pre-built object with the new hostlist_find_hostname interface. The result comes out a bit over twice as fast as directly using the allocation-free interface. --- src/common/libhostlist/hostlist.c | 52 +++++++-- src/common/libhostlist/hostlist.h | 29 +++++ src/common/libhostlist/hostname.c | 137 +++++++++++++++++++++--- src/common/libhostlist/hostname.h | 40 +++++-- src/common/libhostlist/hostrange.c | 55 +++++----- src/common/libhostlist/hostrange.h | 5 +- src/common/libhostlist/test/hostname.c | 2 +- src/common/libhostlist/test/hostrange.c | 37 +++---- 8 files changed, 277 insertions(+), 80 deletions(-) diff --git a/src/common/libhostlist/hostlist.c b/src/common/libhostlist/hostlist.c index 3360321ff31b..6ce57b56495a 100644 --- a/src/common/libhostlist/hostlist.c +++ b/src/common/libhostlist/hostlist.c @@ -26,6 +26,7 @@ #include "hostlist.h" #include "hostrange.h" +#include "hostname.h" #include "util.h" /* number of elements to allocate when extending the hostlist array */ @@ -384,7 +385,7 @@ static int hostlist_append_host (struct hostlist *hl, const char *str) { int rc = -1; struct hostrange * hr; - struct hostname * hn; + struct hostlist_hostname * hn; assert (hl != NULL); @@ -687,16 +688,10 @@ int hostlist_count (struct hostlist *hl) } static int hostlist_find_host (struct hostlist *hl, - const char *hostname, + struct stack_hostname *hn, struct current *cur) { int i, count, ret = -1; - struct hostname * hn; - - hn = hostname_create (hostname); - if (!hn) - return -1; - for (i = 0, count = 0; i < hl->nranges; i++) { int offset = hostrange_hn_within (hl->hr[i], hn); if (offset >= 0) { @@ -708,7 +703,6 @@ static int hostlist_find_host (struct hostlist *hl, count += hostrange_count (hl->hr[i]); } - hostname_destroy (hn); if (ret < 0) errno = ENOENT; return ret; @@ -721,7 +715,38 @@ int hostlist_find (struct hostlist *hl, const char *hostname) errno = EINVAL; return -1; } - return hostlist_find_host (hl, hostname, &hl->current); + struct stack_hostname hn_storage; + + struct stack_hostname *hn = hostname_stack_create (&hn_storage, hostname); + if (!hn) + return -1; + return hostlist_find_host (hl, hn, &hl->current); +} + +int hostlist_find_hostname (struct hostlist *hl, struct hostlist_hostname *hn) +{ + struct stack_hostname hn_storage; + struct stack_hostname *shn; + + if (!hl || !hn) { + errno = EINVAL; + return -1; + } + + shn = hostname_stack_create_from_hostname (&hn_storage, hn); + if (!hn) + return -1; + return hostlist_find_host (hl, shn, &hl->current); +} + +struct hostlist_hostname *hostlist_hostname_create (const char *hn) +{ + return hostname_create (hn); +} + +void hostlist_hostname_destroy (struct hostlist_hostname *hn) +{ + hostname_destroy (hn); } /* Remove host at cursor 'cur'. If the current real cursor hl->current @@ -786,7 +811,12 @@ static int hostlist_remove_at (struct hostlist *hl, struct current *cur) static int hostlist_delete_host (struct hostlist *hl, const char *hostname) { struct current cur = { 0 }; - int n = hostlist_find_host (hl, hostname, &cur); + struct stack_hostname hn_storage; + + struct stack_hostname *hn = hostname_stack_create (&hn_storage, hostname); + if (!hn) + return -1; + int n = hostlist_find_host (hl, hn, &cur); if (n < 0) return errno == ENOENT ? 0 : -1; return hostlist_remove_at (hl, &cur); diff --git a/src/common/libhostlist/hostlist.h b/src/common/libhostlist/hostlist.h index c2f982afad62..3ed1c9d595df 100644 --- a/src/common/libhostlist/hostlist.h +++ b/src/common/libhostlist/hostlist.h @@ -67,6 +67,35 @@ const char * hostlist_nth (struct hostlist * hl, int n); */ int hostlist_find (struct hostlist * hl, const char *hostname); +/* + * Create an opaque hostname object which can be used with the + * more efficient hostlist_find_hostname() below. This interface + * should be used when matching the same hostname to many hostlists + * since it caches processing of the hostname instead of duplicating + * the work on each call to hostlist_find(). + * + * Caller must call hostlist_hostname_destroy() to free memory associated + * with the returns hostlist_hostname object. + * + * Returns NULL on error. + */ +struct hostlist_hostname *hostlist_hostname_create (const char *hostname); + +/* + * Free resources associated with hostname hn. + */ +void hostlist_hostname_destroy (struct hostlist_hostname *hn); + +/* + * Search hostlist hl for the first host matching hostlist_hostname `hn` + * and return position in list if found. + * + * Leaves cursor pointing to the matching host. + * + * Returns -1 if host is not found. + */ +int hostlist_find_hostname (struct hostlist *hl, struct hostlist_hostname *hn); + /* * Delete all hosts in the list represented by `hosts' * diff --git a/src/common/libhostlist/hostname.c b/src/common/libhostlist/hostname.c index 613cbf35d333..bf2b6c0aee43 100644 --- a/src/common/libhostlist/hostname.c +++ b/src/common/libhostlist/hostname.c @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include "hostname.h" @@ -25,9 +27,10 @@ /* * return the location of the last char in the hostname prefix */ -static int host_prefix_end (const char *hostname) +static int host_prefix_end (const char *hostname, int len) { - int len = strlen (hostname); + if (len < 0) + return -1; int idx = len - 1; /* Rewind to the last non-digit in hostname @@ -44,16 +47,101 @@ static int host_prefix_end (const char *hostname) static int hostname_len (const char *hostname) { int len = strlen (hostname); - for (int i = 0; i < len; i++) - if (strchr (INVALID_CHARS, hostname[i])) - return -1; + if (len != strcspn (hostname, INVALID_CHARS)) + return -1; return len; } -struct hostname * hostname_create_with_suffix (const char *hostname, - int idx) +struct stack_hostname *hostname_stack_create_from_hostname ( + struct stack_hostname *hn, + struct hostlist_hostname *src) +{ + if (!hn || !src) { + errno = EINVAL; + return NULL; + } + hn->hostname = src->hostname; + hn->suffix = src->suffix; + hn->len = src->len; + hn->len_prefix = src->len_prefix; + hn->width = src->width; + hn->num = src->num; + return hn; +} + +struct stack_hostname *hostname_stack_create_with_suffix ( + struct stack_hostname *hn, + const char *hostname, + int len, + int idx) { - struct hostname * hn = NULL; + if (!hostname || !hn || len < 0) { + errno = EINVAL; + return NULL; + } + hn->hostname = hostname; + hn->len = len; + hn->len_prefix = idx + 1; + hn->num = 0; + hn->suffix = NULL; + if (idx == hn->len - 1) { + return hn; + } + hn->suffix = hn->hostname + hn->len_prefix; + + char *p = NULL; + hn->num = strtoul (hn->suffix, &p, 10); + if (p == hn->suffix && hn->num == 0) { + errno = EINVAL; + return NULL; + } + hn->width = hn->len - hn->len_prefix; + return hn; +} + +struct stack_hostname *hostname_stack_copy_one_less_digit ( + struct stack_hostname *dst, + struct stack_hostname *src) +{ + + if (!dst || !src) { + errno = EINVAL; + return NULL; + } + *dst = *src; + dst->len_prefix = src->len_prefix + 1; + if (src->len_prefix == dst->len - 1) { + return dst; + } + dst->suffix = dst->hostname + dst->len_prefix; + + dst->width = dst->len - dst->len_prefix; + if (dst->width < 0 || dst->width > 10) { + errno = EINVAL; + return NULL; + } + + // remove the most significant decimal digit without reparsing + // lookup table because pow is slow + static const int pow10[10] = { + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000}; + dst->num = src->num % pow10[dst->width]; + return dst; +} + +struct hostlist_hostname *hostname_create_with_suffix (const char *hostname, + int idx) +{ + struct hostlist_hostname *hn = NULL; int len; char *p = NULL; @@ -71,10 +159,13 @@ struct hostname * hostname_create_with_suffix (const char *hostname, } hn->num = 0; + hn->len = len; + hn->len_prefix = idx + 1; + hn->width = hn->len - hn->len_prefix; hn->prefix = NULL; hn->suffix = NULL; - if (idx == strlen (hostname) - 1) { + if (idx == len - 1) { if ((hn->prefix = strdup (hostname)) == NULL) { hostname_destroy (hn); return NULL; @@ -101,23 +192,37 @@ struct hostname * hostname_create_with_suffix (const char *hostname, return hn; } +struct stack_hostname *hostname_stack_create (struct stack_hostname *hn, + const char *hostname) +{ + int len = 0; + if (!hostname || (len = hostname_len (hostname)) < 0) { + errno = EINVAL; + return NULL; + } + return hostname_stack_create_with_suffix (hn, + hostname, + len, + host_prefix_end (hostname, len)); +} /* - * create a struct hostname * object from a string hostname + * create a struct hostlist_hostname * object from a string hostname */ -struct hostname * hostname_create (const char *hostname) +struct hostlist_hostname *hostname_create (const char *hostname) { + int end; if (!hostname) { errno = EINVAL; return NULL; } - return hostname_create_with_suffix (hostname, - host_prefix_end (hostname)); + end = host_prefix_end (hostname, hostname_len (hostname)); + return hostname_create_with_suffix (hostname, end); } /* free a hostname object */ -void hostname_destroy (struct hostname * hn) +void hostname_destroy (struct hostlist_hostname * hn) { int saved_errno = errno; if (hn) { @@ -131,14 +236,14 @@ void hostname_destroy (struct hostname * hn) /* return true if the hostname has a valid numeric suffix */ -int hostname_suffix_is_valid (struct hostname * hn) +int hostname_suffix_is_valid (struct hostlist_hostname * hn) { return (hn && hn->suffix != NULL); } /* return the width (in characters) of the numeric part of the hostname */ -int hostname_suffix_width (struct hostname * hn) +int hostname_suffix_width (struct hostlist_hostname * hn) { if (!hn) { errno = EINVAL; diff --git a/src/common/libhostlist/hostname.h b/src/common/libhostlist/hostname.h index 25c7f9c0c7c6..cb2856adcf97 100644 --- a/src/common/libhostlist/hostname.h +++ b/src/common/libhostlist/hostname.h @@ -15,9 +15,12 @@ * Convenience structure representing a hostname as a prefix, * optional numeric part, and suffix. */ -struct hostname { +struct hostlist_hostname { char *hostname; /* cache of initialized hostname */ char *prefix; /* hostname prefix */ + int len; /* length minus invalid characters */ + int len_prefix; /* length of the prefix */ + int width; /* length of the suffix */ unsigned long num; /* numeric suffix */ /* string representation of numeric suffix @@ -25,11 +28,36 @@ struct hostname { char *suffix; }; -struct hostname * hostname_create (const char *s); -struct hostname * hostname_create_with_suffix (const char *s, int i); -void hostname_destroy (struct hostname *hn); +struct stack_hostname { + const char *hostname; /* cache of initialized hostname */ + int len; /* length minus invalid characters */ + int len_prefix; /* length of the prefix */ + int width; /* length of the suffix */ + unsigned long num; /* numeric suffix */ + + /* string representation of numeric suffix + * points into `hostname' */ + const char *suffix; +}; + +struct hostlist_hostname * hostname_create (const char *s); +struct hostlist_hostname * hostname_create_with_suffix (const char *s, int i); +struct stack_hostname *hostname_stack_create (struct stack_hostname *hn, + const char *hostname); +struct stack_hostname *hostname_stack_create_from_hostname ( + struct stack_hostname *hn, + struct hostlist_hostname *hn_src); +struct stack_hostname *hostname_stack_create_with_suffix ( + struct stack_hostname *hn, + const char *hostname, + int len, + int idx); +struct stack_hostname *hostname_stack_copy_one_less_digit ( + struct stack_hostname *dst, + struct stack_hostname *src); +void hostname_destroy (struct hostlist_hostname *hn); -int hostname_suffix_is_valid (struct hostname *hn); -int hostname_suffix_width (struct hostname *hn); +int hostname_suffix_is_valid (struct hostlist_hostname *hn); +int hostname_suffix_width (struct hostlist_hostname *hn); #endif /* !HAVE_FLUX_HOSTLIST_HOSTNAME_H */ diff --git a/src/common/libhostlist/hostrange.c b/src/common/libhostlist/hostrange.c index 63e37719c3de..93b1c1ac8760 100644 --- a/src/common/libhostlist/hostrange.c +++ b/src/common/libhostlist/hostrange.c @@ -57,6 +57,7 @@ static struct hostrange * hostrange_new (const char *prefix) hostrange_destroy (new); return NULL; } + new->len_prefix = strlen (prefix); return new; } @@ -74,7 +75,7 @@ struct hostrange * hostrange_create_single (const char *prefix) /* Create a hostrange object with a prefix, hi, lo, and format width */ -struct hostrange * hostrange_create (char *prefix, +struct hostrange * hostrange_create (const char *prefix, unsigned long lo, unsigned long hi, int width) @@ -189,9 +190,18 @@ int hostrange_prefix_cmp (struct hostrange * h1, struct hostrange * h2) if (h2 == NULL) return 1; - retval = strcmp (h1->prefix, h2->prefix); + int min_len = h1->len_prefix < h2->len_prefix ? h1->len_prefix : h2->len_prefix; + retval = memcmp (h1->prefix, h2->prefix, min_len); - return retval == 0 ? h2->singlehost - h1->singlehost : retval; + if (retval == 0) { + if (h1->len_prefix < h2->len_prefix) + return -1; + if (h1->len_prefix > h2->len_prefix) + return 1; + retval = h2->singlehost - h1->singlehost; + } + + return retval; } @@ -335,12 +345,8 @@ struct hostrange * hostrange_intersect (struct hostrange * h1, /* return offset of hn if it is in the hostlist or * -1 if not. */ -int hostrange_hn_within (struct hostrange * hr, struct hostname * hn) +int hostrange_hn_within (struct hostrange * hr, struct stack_hostname * hn) { - int len_hr; - int len_hn; - int width; - if (hr->singlehost) { /* * If the current hostrange [hr] is a `singlehost' (no valid @@ -351,7 +357,9 @@ int hostrange_hn_within (struct hostrange * hr, struct hostname * hn) * which case we return true. Otherwise, there is no * possibility that [hn] matches [hr]. */ - if (strcmp (hn->hostname, hr->prefix) == 0) + if (hr->len_prefix != hn->len) + return -1; + if (memcmp (hn->hostname, hr->prefix, hr->len_prefix) == 0) return 0; else return -1; @@ -362,18 +370,18 @@ int hostrange_hn_within (struct hostrange * hr, struct hostname * hn) * better have a valid numeric suffix, or there is no * way we can match */ - if (!hostname_suffix_is_valid (hn)) + if (!hn || !hn->suffix) return -1; - len_hn = strlen (hn->prefix); - /* * If hostrange and hostname prefixes don't match to at least * the length of the hostname object (which will have the min * possible prefix length), then there is no way the hostname * falls within the range [hr]. */ - if (strncmp (hr->prefix, hn->prefix, len_hn) != 0) + if (hr->len_prefix < hn->len_prefix) + return -1; + if (memcmp (hr->prefix, hn->hostname, hn->len_prefix) != 0) return -1; /* @@ -387,24 +395,20 @@ int hostrange_hn_within (struct hostrange * hr, struct hostname * hn) * hostname with the longer prefix and calling this function * again with the new hostname. (Yes, this is ugly, sorry) */ - len_hr = strlen (hr->prefix); - width = hostname_suffix_width (hn); - - if ((len_hn < len_hr) - && (width > 1) - && (isdigit (hr->prefix [len_hr - 1])) - && (hr->prefix [len_hn] == hn->suffix[0]) ) { + if ((hn->len_prefix < hr->len_prefix) + && (hn->width > 1) + && (isdigit (hr->prefix [hr->len_prefix - 1])) + && (hr->prefix [hn->len_prefix] == hn->suffix[0]) ) { int rc; /* * Create new hostname object with its prefix offset by one */ - struct hostname * h = hostname_create_with_suffix (hn->hostname, - len_hn); + struct stack_hostname shn; + struct stack_hostname * h = hostname_stack_copy_one_less_digit (&shn, hn); /* * Recursive call :-o */ rc = hostrange_hn_within (hr, h); - hostname_destroy (h); return rc; } @@ -414,11 +418,10 @@ int hostrange_hn_within (struct hostrange * hr, struct hostname * hn) * falls within the range of [hr] if [hn] and [hr] prefix are * identical. */ - if ((len_hr == len_hn) - && (strcmp (hn->prefix, hr->prefix) == 0) + if ((hr->len_prefix == hn->len_prefix) && (hn->num <= hr->hi) && (hn->num >= hr->lo)) { - int width = hostname_suffix_width (hn); + int width = hn->width; if (!width_equiv (hr->lo, &hr->width, hn->num, &width)) return -1; return (hn->num - hr->lo); diff --git a/src/common/libhostlist/hostrange.h b/src/common/libhostlist/hostrange.h index 833f4b2ab771..1938946409e6 100644 --- a/src/common/libhostlist/hostrange.h +++ b/src/common/libhostlist/hostrange.h @@ -17,6 +17,7 @@ */ struct hostrange { char *prefix; /* alphanumeric prefix: */ + unsigned long len_prefix; /* length of the prefix */ /* beginning (lo) and end (hi) of suffix range */ unsigned long lo, hi; @@ -31,7 +32,7 @@ struct hostrange { struct hostrange * hostrange_create_single (const char *); -struct hostrange * hostrange_create (char *s, +struct hostrange * hostrange_create (const char *s, unsigned long lo, unsigned long hi, int width); @@ -56,7 +57,7 @@ int hostrange_join (struct hostrange *, struct hostrange *); struct hostrange * hostrange_intersect (struct hostrange *, struct hostrange *); -int hostrange_hn_within (struct hostrange *, struct hostname *); +int hostrange_hn_within (struct hostrange *, struct stack_hostname *); size_t hostrange_numstr(struct hostrange *, size_t, char *); diff --git a/src/common/libhostlist/test/hostname.c b/src/common/libhostlist/test/hostname.c index d0c531c5b01e..b65a1bc01221 100644 --- a/src/common/libhostlist/test/hostname.c +++ b/src/common/libhostlist/test/hostname.c @@ -48,7 +48,7 @@ int main (int argc, char *argv[]) t = hostname_tests; while (t && t->input != NULL) { - struct hostname *hn = hostname_create (t->input); + struct hostlist_hostname *hn = hostname_create (t->input); if (t->prefix == NULL) { /* Check expected failure */ ok (hn == NULL && errno == EINVAL, diff --git a/src/common/libhostlist/test/hostrange.c b/src/common/libhostlist/test/hostrange.c index 3af530ebe5e6..a203eaad4488 100644 --- a/src/common/libhostlist/test/hostrange.c +++ b/src/common/libhostlist/test/hostrange.c @@ -8,6 +8,7 @@ * SPDX-License-Identifier: LGPL-3.0 \************************************************************/ +#include "src/common/libhostlist/hostname.h" #if HAVE_CONFIG_H #include "config.h" #endif @@ -160,32 +161,32 @@ struct cmp_test { }; struct cmp_test cmp_tests[] = { - { { "foo", 0, 15, 0, 0 }, - { "foo", 1, 15, 0, 0 }, + { { "foo", 3, 0, 15, 0, 0 }, + { "foo", 3, 1, 15, 0, 0 }, -1, }, - { { "foo", 0, 15, 0, 0 }, - { "foo", 0, 15, 0, 0 }, + { { "foo", 3, 0, 15, 0, 0 }, + { "foo", 3, 0, 15, 0, 0 }, 0, }, - { { "foo", 0, 15, 0, 0 }, - { "foo", 0, 0, 0, 1 }, + { { "foo", 3, 0, 15, 0, 0 }, + { "foo", 3, 0, 0, 0, 1 }, 1, }, - { { "bar", 0, 0, 0, 1 }, - { "foo", 0, 0, 0, 1 }, + { { "bar", 3, 0, 0, 0, 1 }, + { "foo", 3, 0, 0, 0, 1 }, -1, }, - { { "", 0, 5, 0, 0 }, - { "", 5, 5, 0, 0 }, + { { "", 0, 0, 5, 0, 0 }, + { "", 0, 5, 5, 0, 0 }, -1, }, - { { "", 0, 5, 0, 0 }, - { "", 0, 5, 2, 0 }, + { { "", 0, 0, 5, 0, 0 }, + { "", 0, 0, 5, 2, 0 }, -1, }, - { { "", 12, 12, 0, 0 }, - { "", 15, 15, 0, 0 }, + { { "", 0, 12, 12, 0, 0 }, + { "", 0, 15, 15, 0, 0 }, -1, }, - { { "", 15, 15, 0, 0 }, - { "", 12, 12, 0, 0 }, + { { "", 0, 15, 15, 0, 0 }, + { "", 0, 12, 12, 0, 0 }, 1, }, { { 0 }, { 0 }, 0 }, @@ -405,7 +406,8 @@ void test_within () while (t && t->hostname) { int result; struct hostrange *hr; - struct hostname *hn = hostname_create (t->hostname); + struct stack_hostname hn_storage = {}; + struct stack_hostname *hn = hostname_stack_create (&hn_storage, t->hostname); if (hn == NULL) BAIL_OUT ("hostname_create failed!"); @@ -420,7 +422,6 @@ void test_within () "hostrange_hn_within (%s[%lu-%lu], %s) returned %d", t->prefix, t->lo, t->hi, t->hostname, result); - hostname_destroy (hn); hostrange_destroy (hr); t++; } From 92f327a602e15886d2e0fdbcb5e81fc86bd4d2b2 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Mon, 9 Sep 2024 19:56:58 -0700 Subject: [PATCH 2/2] libhostlist: add unit tests for hostlist_find_hostname() Problem: There are no tests of hostlist_find_hostname(). Add them using the existing hostlist_find() inputs. --- src/common/libhostlist/test/hostlist.c | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/common/libhostlist/test/hostlist.c b/src/common/libhostlist/test/hostlist.c index 019baf813ab9..fba8f49c4ee0 100644 --- a/src/common/libhostlist/test/hostlist.c +++ b/src/common/libhostlist/test/hostlist.c @@ -343,6 +343,35 @@ void test_find () } } +void test_find_hostname () +{ + struct find_test *t = find_tests; + + ok (hostlist_find_hostname (NULL, NULL) == -1 && errno == EINVAL, + "hostlist_find_hostname (NULL, NULL) returns EINVAL"); + + while (t && t->input) { + int rc; + struct hostlist_hostname *hn; + struct hostlist *hl = hostlist_decode (t->input); + if (!hl) + BAIL_OUT ("hostlist_decode (%s) failed!", t->input); + if (!(hn = hostlist_hostname_create (t->arg))) + BAIL_OUT ("hostlist_hostname_create (%s) failed!", t->arg); + rc = hostlist_find_hostname (hl, hn); + ok (rc == t->rc, + "hostlist_find_hostname ('%s', '%s') returned %d", + t->input, t->arg, rc); + if (t->rc >= 0) + is (hostlist_current (hl), t->arg, + "hostlist_find leaves cursor pointing to found host"); + hostlist_hostname_destroy (hn); + hostlist_destroy (hl); + t++; + } +} + + struct delete_test { char *input; char *delete; @@ -616,6 +645,7 @@ int main (int argc, char *argv[]) test_append (); test_nth (); test_find (); + test_find_hostname (); test_delete (); test_sortuniq (); test_iteration ();