-
-
Notifications
You must be signed in to change notification settings - Fork 30.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gh-97982: Remove asciilib_count() #98164
Conversation
I prefer to remove it to avoid people making the assumption that it's faster just because it exists: #98025 (comment)
|
Rationale for the removal: asciilib_count() is the same code than ucs1lib_count() and so has the same performance. See my benchmark: #98025 (comment) |
Objects/stringlib/count.h
Outdated
// gh-97982: Implementing asciilib_count() is not worth it, FASTSEARCH() does | ||
// not specialize the code for ASCII strings. Use ucs1lib_count() for ASCII and | ||
// UCS1 strings: it's the same than asciilib_count(). | ||
#if STRINGLIB_MAX_CHAR > 0x7Fu |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe !STRINGLIB_IS_UNICODE || STRINGLIB_MAX_CHAR > 0x7Fu
?
STRINGLIB_MAX_CHAR
was only used in codecs.h
when STRINGLIB_IS_UNICODE
is true.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh thanks, that makes my PR shorter!
asciilib_count() is the same than ucs1lib_count(): the code is not specialized for ASCII strings, so it's not worth it to have a separated function. Remove asciilib_count() function.
I compared the output of The only difference are the function names, the code is the same. --- ASCII 2022-10-11 17:58:22.182135206 +0200
+++ UCS1 2022-10-11 17:58:25.069159913 +0200
@@ -1,5 +1,5 @@
static inline Py_ssize_t
-asciilib_count(const Py_UCS1* str, Py_ssize_t str_len,
+ucs1lib_count(const Py_UCS1* str, Py_ssize_t str_len,
const Py_UCS1* sub, Py_ssize_t sub_len,
Py_ssize_t maxcount)
{
@@ -8,13 +8,13 @@
return 0;
if (sub_len == 0)
return (str_len < maxcount) ? str_len + 1 : maxcount;
- count = asciilib_fastsearch(str, str_len, sub, sub_len, maxcount, 0);
+ count = ucs1lib_fastsearch(str, str_len, sub, sub_len, maxcount, 0);
if (count < 0)
return 0;
return count;
}
static inline Py_ssize_t
-asciilib_find_char(const Py_UCS1* s, Py_ssize_t n, Py_UCS1 ch)
+ucs1lib_find_char(const Py_UCS1* s, Py_ssize_t n, Py_UCS1 ch)
{
const Py_UCS1 *p, *e;
p = s;
@@ -35,7 +35,7 @@
return -1;
}
static inline Py_ssize_t
-asciilib_rfind_char(const Py_UCS1* s, Py_ssize_t n, Py_UCS1 ch)
+ucs1lib_rfind_char(const Py_UCS1* s, Py_ssize_t n, Py_UCS1 ch)
{
const Py_UCS1 *p;
if (n > 15) {
@@ -55,7 +55,7 @@
return -1;
}
static inline Py_ssize_t
-asciilib__lex_search(const Py_UCS1 *needle, Py_ssize_t len_needle,
+ucs1lib__lex_search(const Py_UCS1 *needle, Py_ssize_t len_needle,
Py_ssize_t *return_period, int invert_alphabet)
{
Py_ssize_t max_suffix = 0;
@@ -90,13 +90,13 @@
return max_suffix;
}
static inline Py_ssize_t
-asciilib__factorize(const Py_UCS1 *needle,
+ucs1lib__factorize(const Py_UCS1 *needle,
Py_ssize_t len_needle,
Py_ssize_t *return_period)
{
Py_ssize_t cut1, period1, cut2, period2, cut, period;
- cut1 = asciilib__lex_search(needle, len_needle, &period1, 0);
- cut2 = asciilib__lex_search(needle, len_needle, &period2, 1);
+ cut1 = ucs1lib__lex_search(needle, len_needle, &period1, 0);
+ cut2 = ucs1lib__lex_search(needle, len_needle, &period2, 1);
if (cut1 > cut2) {
period = period1;
cut = cut1;
@@ -111,7 +111,7 @@
*return_period = period;
return cut;
}
-typedef struct asciilib__pre {
+typedef struct ucs1lib__pre {
const Py_UCS1 *needle;
Py_ssize_t len_needle;
Py_ssize_t cut;
@@ -119,14 +119,14 @@
Py_ssize_t gap;
int is_periodic;
uint8_t table[(1U << 6u)];
-} asciilib_prework;
+} ucs1lib_prework;
static void
-asciilib__preprocess(const Py_UCS1 *needle, Py_ssize_t len_needle,
- asciilib_prework *p)
+ucs1lib__preprocess(const Py_UCS1 *needle, Py_ssize_t len_needle,
+ ucs1lib_prework *p)
{
p->needle = needle;
p->len_needle = len_needle;
- p->cut = asciilib__factorize(needle, len_needle, &(p->period));
+ p->cut = ucs1lib__factorize(needle, len_needle, &(p->period));
((
p->period + p->cut <= len_needle
) ? (void) (0) : __assert_fail (
@@ -191,8 +191,8 @@
}
}
static Py_ssize_t
-asciilib__two_way(const Py_UCS1 *haystack, Py_ssize_t len_haystack,
- asciilib_prework *p)
+ucs1lib__two_way(const Py_UCS1 *haystack, Py_ssize_t len_haystack,
+ ucs1lib_prework *p)
{
const Py_ssize_t len_needle = p->len_needle;
const Py_ssize_t cut = p->cut;
@@ -333,30 +333,30 @@
return -1;
}
static Py_ssize_t
-asciilib__two_way_find(const Py_UCS1 *haystack,
+ucs1lib__two_way_find(const Py_UCS1 *haystack,
Py_ssize_t len_haystack,
const Py_UCS1 *needle,
Py_ssize_t len_needle)
{
;
- asciilib_prework p;
- asciilib__preprocess(needle, len_needle, &p);
- return asciilib__two_way(haystack, len_haystack, &p);
+ ucs1lib_prework p;
+ ucs1lib__preprocess(needle, len_needle, &p);
+ return ucs1lib__two_way(haystack, len_haystack, &p);
}
static Py_ssize_t
-asciilib__two_way_count(const Py_UCS1 *haystack,
+ucs1lib__two_way_count(const Py_UCS1 *haystack,
Py_ssize_t len_haystack,
const Py_UCS1 *needle,
Py_ssize_t len_needle,
Py_ssize_t maxcount)
{
;
- asciilib_prework p;
- asciilib__preprocess(needle, len_needle, &p);
+ ucs1lib_prework p;
+ ucs1lib__preprocess(needle, len_needle, &p);
Py_ssize_t index = 0, count = 0;
while (1) {
Py_ssize_t result;
- result = asciilib__two_way(haystack + index,
+ result = ucs1lib__two_way(haystack + index,
len_haystack - index, &p);
if (result == -1) {
return count;
@@ -370,7 +370,7 @@
return count;
}
static inline Py_ssize_t
-asciilib_default_find(const Py_UCS1* s, Py_ssize_t n,
+ucs1lib_default_find(const Py_UCS1* s, Py_ssize_t n,
const Py_UCS1* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
@@ -422,7 +422,7 @@
return mode == 0 ? count : -1;
}
static Py_ssize_t
-asciilib_adaptive_find(const Py_UCS1* s, Py_ssize_t n,
+ucs1lib_adaptive_find(const Py_UCS1* s, Py_ssize_t n,
const Py_UCS1* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
@@ -462,11 +462,11 @@
hits += j + 1;
if (hits > m / 4 && w - i > 2000) {
if (mode == 1) {
- res = asciilib__two_way_find(s + i, n - i, p, m);
+ res = ucs1lib__two_way_find(s + i, n - i, p, m);
return res == -1 ? -1 : res + i;
}
else {
- res = asciilib__two_way_count(s + i, n - i, p, m,
+ res = ucs1lib__two_way_count(s + i, n - i, p, m,
maxcount - count);
return res + count;
}
@@ -487,7 +487,7 @@
return mode == 0 ? count : -1;
}
static Py_ssize_t
-asciilib_default_rfind(const Py_UCS1* s, Py_ssize_t n,
+ucs1lib_default_rfind(const Py_UCS1* s, Py_ssize_t n,
const Py_UCS1* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
@@ -526,7 +526,7 @@
return -1;
}
static inline Py_ssize_t
-asciilib_count_char(const Py_UCS1 *s, Py_ssize_t n,
+ucs1lib_count_char(const Py_UCS1 *s, Py_ssize_t n,
const Py_UCS1 p0, Py_ssize_t maxcount)
{
Py_ssize_t i, count = 0;
@@ -541,7 +541,7 @@
return count;
}
static inline Py_ssize_t
-asciilib_fastsearch(const Py_UCS1* s, Py_ssize_t n,
+ucs1lib_fastsearch(const Py_UCS1* s, Py_ssize_t n,
const Py_UCS1* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
@@ -553,30 +553,30 @@
return -1;
}
if (mode == 1)
- return asciilib_find_char(s, n, p[0]);
+ return ucs1lib_find_char(s, n, p[0]);
else if (mode == 2)
- return asciilib_rfind_char(s, n, p[0]);
+ return ucs1lib_rfind_char(s, n, p[0]);
else {
- return asciilib_count_char(s, n, p[0], maxcount);
+ return ucs1lib_count_char(s, n, p[0], maxcount);
}
}
if (mode != 2) {
if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
- return asciilib_default_find(s, n, p, m, maxcount, mode);
+ return ucs1lib_default_find(s, n, p, m, maxcount, mode);
}
else if ((m >> 2) * 3 < (n >> 2)) {
if (mode == 1) {
- return asciilib__two_way_find(s, n, p, m);
+ return ucs1lib__two_way_find(s, n, p, m);
}
else {
- return asciilib__two_way_count(s, n, p, m, maxcount);
+ return ucs1lib__two_way_count(s, n, p, m, maxcount);
}
}
else {
- return asciilib_adaptive_find(s, n, p, m, maxcount, mode);
+ return ucs1lib_adaptive_find(s, n, p, m, maxcount, mode);
}
}
else {
- return asciilib_default_rfind(s, n, p, m, maxcount, mode);
+ return ucs1lib_default_rfind(s, n, p, m, maxcount, mode);
}
} |
* main: (38 commits) pythongh-98251: Allow venv to pass along PYTHON* variables to pip and ensurepip when they do not impact path resolution (pythonGH-98259) Bpo-41246: IOCP Proactor avoid callback code duplication (python#21399) bpo-46364: Use sockets for stdin of asyncio only on AIX (python#30596) pythongh-98178: syslog() is not thread-safe on macOS (python#98213) Mark all targets in `Doc/Makefile` as `PHONY` (pythonGH-98189) pythongh-97982: Factorize PyUnicode_Count() and unicode_count() code (python#98025) pythongh-96265: Formatting changes for faq/general (python#98129) tutorial: remove "with single quotes" (python#98204) pythongh-97669: Remove Tools/scripts/startuptime.py (python#98214) signalmodule.c uses _PyErr_WriteUnraisableMsg() (python#98217) pythongh-97669: Fix test_tools reference leak (python#98216) pythongh-97669: Create Tools/patchcheck/ directory (python#98186) pythongh-65046: Link to logging cookbook from asyncio docs (python#98207) Formatting fixes in contextlib docs (python#98111) pythongh-95276: Add callable entry to the glossary (python#95738) pythongh-96130: Rephrase use of "typecheck" verb for clarity (python#98144) Fix some incorrect indentation around the main switch (python#98177) pythongh-98172: Fix formatting in `except*` docs (python#98173) pythongh-97982: Remove asciilib_count() (python#98164) pythongh-95756: Free and NULL-out code caches when needed (pythonGH-98181) ...
* main: (37 commits) pythongh-98251: Allow venv to pass along PYTHON* variables to pip and ensurepip when they do not impact path resolution (pythonGH-98259) Bpo-41246: IOCP Proactor avoid callback code duplication (python#21399) bpo-46364: Use sockets for stdin of asyncio only on AIX (python#30596) pythongh-98178: syslog() is not thread-safe on macOS (python#98213) Mark all targets in `Doc/Makefile` as `PHONY` (pythonGH-98189) pythongh-97982: Factorize PyUnicode_Count() and unicode_count() code (python#98025) pythongh-96265: Formatting changes for faq/general (python#98129) tutorial: remove "with single quotes" (python#98204) pythongh-97669: Remove Tools/scripts/startuptime.py (python#98214) signalmodule.c uses _PyErr_WriteUnraisableMsg() (python#98217) pythongh-97669: Fix test_tools reference leak (python#98216) pythongh-97669: Create Tools/patchcheck/ directory (python#98186) pythongh-65046: Link to logging cookbook from asyncio docs (python#98207) Formatting fixes in contextlib docs (python#98111) pythongh-95276: Add callable entry to the glossary (python#95738) pythongh-96130: Rephrase use of "typecheck" verb for clarity (python#98144) Fix some incorrect indentation around the main switch (python#98177) pythongh-98172: Fix formatting in `except*` docs (python#98173) pythongh-97982: Remove asciilib_count() (python#98164) pythongh-95756: Free and NULL-out code caches when needed (pythonGH-98181) ...
asciilib_count() is the same than ucs1lib_count(): the code is not specialized for ASCII strings, so it's not worth it to have a separated function. Remove asciilib_count() function.
Define STRINGLIB_MAX_CHAR in C files of bytes strings.
PyUncode_Count
andunicode_count
#97982