Skip to content

Commit

Permalink
Fixed Python memory leak in map lookup.
Browse files Browse the repository at this point in the history
Previously we were allocating memory on the message's arena every time we performed a `map[key]` or `map.get(key)` operation.  This is unnecessary, as the key's data is only needed ephemerally, for the duration of the lookup, and we can therefore alias the Python object's string data instead of copying it.

This required fixing a bug in the convert.c operation.  Previously in the `arena==NULL` case, if the user passes a bytes object instead of a unicode string, the code would return a pointer to a temporary Python object that had already been freed, leading to use-after-free.  I fixed this by referencing the bytes object's data directly, and using utf8_range to verify the UTF-8.

Fixes: protocolbuffers#14571
PiperOrigin-RevId: 578563555
  • Loading branch information
haberman authored and copybara-github committed Nov 1, 2023
1 parent f083530 commit b0eeb35
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 18 deletions.
1 change: 1 addition & 0 deletions python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -203,5 +203,6 @@ py_extension(
"//upb/util:compare",
"//upb/util:def_to_proto",
"//upb/util:required_fields",
"@utf8_range",
],
)
32 changes: 20 additions & 12 deletions python/convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "upb/message/map.h"
#include "upb/reflection/message.h"
#include "upb/util/compare.h"
#include "utf8_range.h"

// Must be last.
#include "upb/port/def.inc"
Expand Down Expand Up @@ -259,20 +260,27 @@ bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
}
case kUpb_CType_String: {
Py_ssize_t size;
const char* ptr;
PyObject* unicode = NULL;
if (PyBytes_Check(obj)) {
unicode = obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
if (!obj) return false;
// Use the object's bytes if they are valid UTF-8.
char* ptr;
if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
if (utf8_range2((const unsigned char*)ptr, size) != 0) {
// Invalid UTF-8. Try to convert the message to a Python Unicode
// object, even though we know this will fail, just to get the
// idiomatic Python error message.
obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
assert(!obj);
return false;
}
*val = PyUpb_MaybeCopyString(ptr, size, arena);
return true;
} else {
const char* ptr;
ptr = PyUnicode_AsUTF8AndSize(obj, &size);
if (PyErr_Occurred()) return false;
*val = PyUpb_MaybeCopyString(ptr, size, arena);
return true;
}
ptr = PyUnicode_AsUTF8AndSize(obj, &size);
if (PyErr_Occurred()) {
Py_XDECREF(unicode);
return false;
}
*val = PyUpb_MaybeCopyString(ptr, size, arena);
Py_XDECREF(unicode);
return true;
}
case kUpb_CType_Message:
PyErr_Format(PyExc_ValueError, "Message objects may not be assigned");
Expand Down
1 change: 0 additions & 1 deletion python/google/protobuf/internal/message_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@

warnings.simplefilter('error', DeprecationWarning)


@_parameterized.named_parameters(('_proto2', unittest_pb2),
('_proto3', unittest_proto3_arena_pb2))
@testing_refleaks.TestCase
Expand Down
8 changes: 3 additions & 5 deletions python/map.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ int PyUpb_MapContainer_AssignSubscript(PyObject* _self, PyObject* key,
const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
upb_MessageValue u_key, u_val;
if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return -1;
if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return -1;

if (val) {
if (!PyUpb_PyToUpb(val, val_f, &u_val, arena)) return -1;
Expand All @@ -200,9 +200,8 @@ PyObject* PyUpb_MapContainer_Subscript(PyObject* _self, PyObject* key) {
const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
upb_MessageValue u_key, u_val;
if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return NULL;
if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return NULL;
if (!map || !upb_Map_Get(map, u_key, &u_val)) {
map = PyUpb_MapContainer_EnsureReified(_self);
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
Expand Down Expand Up @@ -256,9 +255,8 @@ static PyObject* PyUpb_MapContainer_Get(PyObject* _self, PyObject* args,
const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
upb_MessageValue u_key, u_val;
if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return NULL;
if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return NULL;
if (map && upb_Map_Get(map, u_key, &u_val)) {
return PyUpb_UpbToPy(u_val, val_f, self->arena);
}
Expand Down

0 comments on commit b0eeb35

Please sign in to comment.