diff --git a/Include/bytesobject.h b/Include/bytesobject.h index b7a7c36bcbb..fbb63226f64 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -128,17 +128,21 @@ PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGrouping(char *buffer, A _PyBytesWriter variable must be declared at the end of variables in a function to optimize the memory allocation on the stack. */ typedef struct { - /* bytes object */ + /* bytes, bytearray or NULL (when the small buffer is used) */ PyObject *buffer; - /* Number of allocated size */ + /* Number of allocated size. */ Py_ssize_t allocated; /* Minimum number of allocated bytes, incremented by _PyBytesWriter_Prepare() */ Py_ssize_t min_size; - /* If non-zero, overallocate the buffer (default: 0). */ + /* If non-zero, use a bytearray instead of a bytes object for buffer. */ + int use_bytearray; + + /* If non-zero, overallocate the buffer (default: 0). + This flag must be zero if use_bytearray is non-zero. */ int overallocate; /* Stack buffer */ @@ -153,7 +157,7 @@ typedef struct { PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer); /* Get the buffer content and reset the writer. - Return a bytes object. + Return a bytes object, or a bytearray object if use_bytearray is non-zero. Raise an exception and return NULL on error. */ PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 189673c219b..a1f2958fb25 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3852,11 +3852,8 @@ bytes_iter(PyObject *seq) void _PyBytesWriter_Init(_PyBytesWriter *writer) { - writer->buffer = NULL; - writer->allocated = 0; - writer->min_size = 0; - writer->overallocate = 0; - writer->use_small_buffer = 0; + /* Set all attributes before small_buffer to 0 */ + memset(writer, 0, offsetof(_PyBytesWriter, small_buffer)); #ifdef Py_DEBUG memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer)); #endif @@ -3871,14 +3868,18 @@ _PyBytesWriter_Dealloc(_PyBytesWriter *writer) Py_LOCAL_INLINE(char*) _PyBytesWriter_AsString(_PyBytesWriter *writer) { - if (!writer->use_small_buffer) { - assert(writer->buffer != NULL); - return PyBytes_AS_STRING(writer->buffer); - } - else { + if (writer->use_small_buffer) { assert(writer->buffer == NULL); return writer->small_buffer; } + else if (writer->use_bytearray) { + assert(writer->buffer != NULL); + return PyByteArray_AS_STRING(writer->buffer); + } + else { + assert(writer->buffer != NULL); + return PyBytes_AS_STRING(writer->buffer); + } } Py_LOCAL_INLINE(Py_ssize_t) @@ -3897,18 +3898,28 @@ _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str) #ifdef Py_DEBUG char *start, *end; - if (!writer->use_small_buffer) { - assert(writer->buffer != NULL); - assert(PyBytes_CheckExact(writer->buffer)); - assert(Py_REFCNT(writer->buffer) == 1); - } - else { + if (writer->use_small_buffer) { assert(writer->buffer == NULL); } + else { + assert(writer->buffer != NULL); + if (writer->use_bytearray) + assert(PyByteArray_CheckExact(writer->buffer)); + else + assert(PyBytes_CheckExact(writer->buffer)); + assert(Py_REFCNT(writer->buffer) == 1); + } - start = _PyBytesWriter_AsString(writer); + if (writer->use_bytearray) { + /* bytearray has its own overallocation algorithm, + writer overallocation must be disabled */ + assert(!writer->overallocate); + } + + assert(0 <= writer->allocated); assert(0 <= writer->min_size && writer->min_size <= writer->allocated); /* the last byte must always be null */ + start = _PyBytesWriter_AsString(writer); assert(start[writer->allocated] == 0); end = start + writer->allocated; @@ -3932,8 +3943,7 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size) if (writer->min_size > PY_SSIZE_T_MAX - size) { PyErr_NoMemory(); - _PyBytesWriter_Dealloc(writer); - return NULL; + goto error; } writer->min_size += size; @@ -3950,23 +3960,38 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size) pos = _PyBytesWriter_GetPos(writer, str); if (!writer->use_small_buffer) { - /* Note: Don't use a bytearray object because the conversion from - byterray to bytes requires to copy all bytes. */ - if (_PyBytes_Resize(&writer->buffer, allocated)) { - assert(writer->buffer == NULL); - return NULL; + if (writer->use_bytearray) { + if (PyByteArray_Resize(writer->buffer, allocated)) + goto error; + /* writer->allocated can be smaller than writer->buffer->ob_alloc, + but we cannot use ob_alloc because bytes may need to be moved + to use the whole buffer. bytearray uses an internal optimization + to avoid moving or copying bytes when bytes are removed at the + beginning (ex: del bytearray[:1]). */ + } + else { + if (_PyBytes_Resize(&writer->buffer, allocated)) + goto error; } } else { /* convert from stack buffer to bytes object buffer */ assert(writer->buffer == NULL); - writer->buffer = PyBytes_FromStringAndSize(NULL, allocated); + if (writer->use_bytearray) + writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated); + else + writer->buffer = PyBytes_FromStringAndSize(NULL, allocated); if (writer->buffer == NULL) - return NULL; + goto error; if (pos != 0) { - Py_MEMCPY(PyBytes_AS_STRING(writer->buffer), + char *dest; + if (writer->use_bytearray) + dest = PyByteArray_AS_STRING(writer->buffer); + else + dest = PyBytes_AS_STRING(writer->buffer); + Py_MEMCPY(dest, writer->small_buffer, pos); } @@ -3981,6 +4006,10 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size) str = _PyBytesWriter_AsString(writer) + pos; _PyBytesWriter_CheckConsistency(writer, str); return str; + +error: + _PyBytesWriter_Dealloc(writer); + return NULL; } /* Allocate the buffer to write size bytes. @@ -4013,7 +4042,7 @@ _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str) _PyBytesWriter_CheckConsistency(writer, str); pos = _PyBytesWriter_GetPos(writer, str); - if (pos == 0) { + if (pos == 0 && !writer->use_bytearray) { Py_CLEAR(writer->buffer); /* Get the empty byte string singleton */ result = PyBytes_FromStringAndSize(NULL, 0); @@ -4026,9 +4055,17 @@ _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str) writer->buffer = NULL; if (pos != writer->allocated) { - if (_PyBytes_Resize(&result, pos)) { - assert(result == NULL); - return NULL; + if (writer->use_bytearray) { + if (PyByteArray_Resize(result, pos)) { + Py_DECREF(result); + return NULL; + } + } + else { + if (_PyBytes_Resize(&result, pos)) { + assert(result == NULL); + return NULL; + } } } }