diff --git a/python/BUILD b/python/BUILD index 92e9c496c5c3..ea1dac316044 100644 --- a/python/BUILD +++ b/python/BUILD @@ -203,5 +203,6 @@ py_extension( "//upb/util:compare", "//upb/util:def_to_proto", "//upb/util:required_fields", + "@utf8_range", ], ) diff --git a/python/convert.c b/python/convert.c index 413c42f7359d..0c1174ba6b7a 100644 --- a/python/convert.c +++ b/python/convert.c @@ -35,6 +35,7 @@ #include "upb/message/map.h" #include "upb/reflection/message.h" #include "upb/util/compare.h" +#include "utf8_range.h" // Must be last. #include "upb/port/def.inc" @@ -259,20 +260,27 @@ bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val, } case kUpb_CType_String: { Py_ssize_t size; - const char* ptr; - PyObject* unicode = NULL; if (PyBytes_Check(obj)) { - unicode = obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL); - if (!obj) return false; + // Use the object's bytes if they are valid UTF-8. + char* ptr; + if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false; + if (utf8_range2((const unsigned char*)ptr, size) != 0) { + // Invalid UTF-8. Try to convert the message to a Python Unicode + // object, even though we know this will fail, just to get the + // idiomatic Python error message. + obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL); + assert(!obj); + return false; + } + *val = PyUpb_MaybeCopyString(ptr, size, arena); + return true; + } else { + const char* ptr; + ptr = PyUnicode_AsUTF8AndSize(obj, &size); + if (PyErr_Occurred()) return false; + *val = PyUpb_MaybeCopyString(ptr, size, arena); + return true; } - ptr = PyUnicode_AsUTF8AndSize(obj, &size); - if (PyErr_Occurred()) { - Py_XDECREF(unicode); - return false; - } - *val = PyUpb_MaybeCopyString(ptr, size, arena); - Py_XDECREF(unicode); - return true; } case kUpb_CType_Message: PyErr_Format(PyExc_ValueError, "Message objects may not be assigned"); diff --git a/python/google/protobuf/internal/message_test.py b/python/google/protobuf/internal/message_test.py index 0bac00da023d..b0f1ae784fa6 100755 --- a/python/google/protobuf/internal/message_test.py +++ b/python/google/protobuf/internal/message_test.py @@ -48,7 +48,6 @@ warnings.simplefilter('error', DeprecationWarning) - @_parameterized.named_parameters(('_proto2', unittest_pb2), ('_proto3', unittest_proto3_arena_pb2)) @testing_refleaks.TestCase diff --git a/python/map.c b/python/map.c index a1d75de9a127..6bf12af43808 100644 --- a/python/map.c +++ b/python/map.c @@ -179,7 +179,7 @@ int PyUpb_MapContainer_AssignSubscript(PyObject* _self, PyObject* key, const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1); upb_Arena* arena = PyUpb_Arena_Get(self->arena); upb_MessageValue u_key, u_val; - if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return -1; + if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return -1; if (val) { if (!PyUpb_PyToUpb(val, val_f, &u_val, arena)) return -1; @@ -200,9 +200,8 @@ PyObject* PyUpb_MapContainer_Subscript(PyObject* _self, PyObject* key) { const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f); const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0); const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1); - upb_Arena* arena = PyUpb_Arena_Get(self->arena); upb_MessageValue u_key, u_val; - if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return NULL; + if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return NULL; if (!map || !upb_Map_Get(map, u_key, &u_val)) { map = PyUpb_MapContainer_EnsureReified(_self); upb_Arena* arena = PyUpb_Arena_Get(self->arena); @@ -256,9 +255,8 @@ static PyObject* PyUpb_MapContainer_Get(PyObject* _self, PyObject* args, const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f); const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0); const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1); - upb_Arena* arena = PyUpb_Arena_Get(self->arena); upb_MessageValue u_key, u_val; - if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return NULL; + if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return NULL; if (map && upb_Map_Get(map, u_key, &u_val)) { return PyUpb_UpbToPy(u_val, val_f, self->arena); }