Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-94808: Cover PyUnicode_Count in CAPI #96929

Merged
merged 1 commit into from
Oct 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2945,6 +2945,44 @@ def test_asutf8andsize(self):
self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')

# Test PyUnicode_Count()
@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
def test_count(self):
from _testcapi import unicode_count

st = 'abcabd'
self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2)
self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2)
self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1)
self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a"
# start < end
self.assertEqual(unicode_count(st, 'a', 3, len(st)), 1)
self.assertEqual(unicode_count(st, 'a', 4, len(st)), 0)
self.assertEqual(unicode_count(st, 'a', 0, sys.maxsize), 2)
# start >= end
self.assertEqual(unicode_count(st, 'abc', 0, 0), 0)
self.assertEqual(unicode_count(st, 'a', 3, 2), 0)
self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0)
# negative
self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2)
self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1)
# wrong args
self.assertRaises(TypeError, unicode_count, 'a', 'a')
self.assertRaises(TypeError, unicode_count, 'a', 'a', 1)
self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1)
self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1)
# empty string
self.assertEqual(unicode_count('abc', '', 0, 3), 4)
self.assertEqual(unicode_count('abc', '', 1, 3), 3)
self.assertEqual(unicode_count('', '', 0, 1), 1)
self.assertEqual(unicode_count('', 'a', 0, 1), 0)
# different unicode kinds
for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
for ch in uni:
self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1)
self.assertEqual(unicode_count(st, ch, 0, len(st)), 0)

# Test PyUnicode_FindChar()
@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
Expand Down
21 changes: 21 additions & 0 deletions Modules/_testcapi/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,26 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
return Py_BuildValue("(Nn)", result, utf8_len);
}

static PyObject *
unicode_count(PyObject *self, PyObject *args)
{
PyObject *str;
PyObject *substr;
Py_ssize_t result;
Py_ssize_t start, end;

if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr,
&start, &end)) {
return NULL;
}

result = PyUnicode_Count(str, substr, start, end);
if (result == -1)
return NULL;
else
return PyLong_FromSsize_t(result);
}

static PyObject *
unicode_findchar(PyObject *self, PyObject *args)
{
Expand Down Expand Up @@ -696,6 +716,7 @@ static PyMethodDef TestMethods[] = {
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
{"unicode_count", unicode_count, METH_VARARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{NULL},
Expand Down