Py_TYPE(self)->tp_free((PyObject *)self);
}
+ static int
+ check_decoded(PyObject *decoded)
+ {
+ if (decoded == NULL)
+ return -1;
+ if (!PyUnicode_Check(decoded)) {
+ PyErr_Format(PyExc_TypeError,
+ "decoder should return a string result, not '%.200s'",
+ Py_TYPE(decoded)->tp_name);
+ Py_DECREF(decoded);
+ return -1;
+ }
++ if (PyUnicode_READY(decoded) < 0) {
++ Py_DECREF(decoded);
++ return -1;
++ }
+ return 0;
+ }
+
#define SEEN_CR 1
#define SEEN_LF 2
#define SEEN_CRLF 4
Py_INCREF(output);
}
- if (output == NULL)
+ if (check_decoded(output) < 0)
return NULL;
- if (!PyUnicode_Check(output)) {
- PyErr_SetString(PyExc_TypeError,
- "decoder should return a string result");
- goto error;
- }
-
- if (PyUnicode_READY(output) == -1)
- goto error;
-
- output_len = PyUnicode_GET_SIZE(output);
+ output_len = PyUnicode_GET_LENGTH(output);
if (self->pendingcr && (final || output_len > 0)) {
- Py_UNICODE *out;
- PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
+ /* Prefix output with CR */
+ int kind;
+ PyObject *modified;
+ char *out;
+
+ modified = PyUnicode_New(output_len + 1,
+ PyUnicode_MAX_CHAR_VALUE(output));
if (modified == NULL)
goto error;
- out = PyUnicode_AS_UNICODE(modified);
- out[0] = '\r';
- memcpy(out + 1, PyUnicode_AS_UNICODE(output),
- output_len * sizeof(Py_UNICODE));
+ kind = PyUnicode_KIND(modified);
+ out = PyUnicode_DATA(modified);
+ PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
+ memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Py_DECREF(output);
- output = modified;
+ output = modified; /* output remains ready */
self->pendingcr = 0;
output_len++;
}
Py_DECREF(chunk_size);
if (input_chunk == NULL)
goto fail;
- assert(PyBytes_Check(input_chunk));
+ if (!PyBytes_Check(input_chunk)) {
+ PyErr_Format(PyExc_TypeError,
+ "underlying %s() should have returned a bytes object, "
+ "not '%.200s'", (self->has_read1 ? "read1": "read"),
+ Py_TYPE(input_chunk)->tp_name);
+ goto fail;
+ }
- eof = (PyBytes_Size(input_chunk) == 0);
+ nbytes = PyBytes_Size(input_chunk);
+ eof = (nbytes == 0);
if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
decoded_chars = _PyIncrementalNewlineDecoder_decode(
_PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
}
- /* TODO sanity check: isinstance(decoded_chars, unicode) */
- if (decoded_chars == NULL)
- goto fail;
- if (PyUnicode_READY(decoded_chars) == -1)
+ if (check_decoded(decoded_chars) < 0)
goto fail;
textiowrapper_set_decoded_chars(self, decoded_chars);
- if (PyUnicode_GET_SIZE(decoded_chars) > 0)
+ nchars = PyUnicode_GET_LENGTH(decoded_chars);
+ if (nchars > 0)
+ self->b2cratio = (double) nbytes / nchars;
+ else
+ self->b2cratio = 0.0;
+ if (nchars > 0)
eof = 0;
if (self->telling) {
PyObject *decoded;
if (bytes == NULL)
goto fail;
- decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
- bytes, Py_True, NULL);
+
+ if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
+ decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
+ bytes, 1);
+ else
+ decoded = PyObject_CallMethodObjArgs(
+ self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Py_DECREF(bytes);
- if (decoded == NULL)
+ if (check_decoded(decoded) < 0)
goto fail;
result = textiowrapper_get_decoded_chars(self, -1);
goto fail;
}
- decoded = PyObject_CallMethod(self->decoder, "decode",
- "Oi", input_chunk, (int)cookie.need_eof);
+ decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
+ "Oi", input_chunk, (int)cookie.need_eof);
- if (decoded == NULL)
+ if (check_decoded(decoded) < 0)
goto fail;
- if (PyUnicode_READY(decoded) == -1) {
- Py_DECREF(decoded);
- goto fail;
- }
textiowrapper_set_decoded_chars(self, decoded);
if (saved_state == NULL)
goto fail;
- /* Note our initial start point. */
- if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
- goto fail;
+#define DECODER_GETSTATE() do { \
+ PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
+ _PyIO_str_getstate, NULL); \
+ if (_state == NULL) \
+ goto fail; \
+ if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
+ Py_DECREF(_state); \
+ goto fail; \
+ } \
+ Py_DECREF(_state); \
+ } while (0)
+
- /* TODO: replace assert with exception */
+#define DECODER_DECODE(start, len, res) do { \
+ PyObject *_decoded = _PyObject_CallMethodId( \
+ self->decoder, &PyId_decode, "y#", start, len); \
- if (_decoded == NULL) \
++ if (check_decoded(_decoded) < 0) \
+ goto fail; \
- assert (PyUnicode_Check(_decoded)); \
+ res = PyUnicode_GET_LENGTH(_decoded); \
+ Py_DECREF(_decoded); \
+ } while (0)
+
+ /* Fast search for an acceptable start point, close to our
+ current pos */
+ skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
+ skip_back = 1;
+ assert(skip_back <= PyBytes_GET_SIZE(next_input));
+ input = PyBytes_AS_STRING(next_input);
+ while (skip_bytes > 0) {
+ /* Decode up to temptative start point */
+ if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
+ goto fail;
+ DECODER_DECODE(input, skip_bytes, chars_decoded);
+ if (chars_decoded <= chars_to_skip) {
+ DECODER_GETSTATE();
+ if (dec_buffer_len == 0) {
+ /* Before pos and no bytes buffered in decoder => OK */
+ cookie.dec_flags = dec_flags;
+ chars_to_skip -= chars_decoded;
+ break;
+ }
+ /* Skip back by buffered amount and reset heuristic */
+ skip_bytes -= dec_buffer_len;
+ skip_back = 1;
+ }
+ else {
+ /* We're too far ahead, skip back a bit */
+ skip_bytes -= skip_back;
+ skip_back *= 2;
+ }
+ }
+ if (skip_bytes <= 0) {
+ skip_bytes = 0;
+ if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
+ goto fail;
+ }
- /* Feed the decoder one byte at a time. As we go, note the
- * nearest "safe start point" before the current location
- * (a point where the decoder has nothing buffered, so seek()
+ /* Note our initial start point. */
+ cookie.start_pos += skip_bytes;
+ cookie.chars_to_skip = chars_to_skip;
+ if (chars_to_skip == 0)
+ goto finally;
+
+ /* We should be close to the desired position. Now feed the decoder one
+ * byte at a time until we reach the `chars_to_skip` target.
+ * As we go, note the nearest "safe start point" before the current
+ * location (a point where the decoder has nothing buffered, so seek()
* can safely start from there and advance to this location).
*/
chars_decoded = 0;
}
if (input == input_end) {
/* We didn't get enough decoded data; signal EOF to get more. */
- PyObject *decoded = PyObject_CallMethod(
- self->decoder, "decode", "yi", "", /* final = */ 1);
+ PyObject *decoded = _PyObject_CallMethodId(
+ self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
- if (decoded == NULL)
+ if (check_decoded(decoded) < 0)
goto fail;
- assert (PyUnicode_Check(decoded));
- chars_decoded += PyUnicode_GET_SIZE(decoded);
+ chars_decoded += PyUnicode_GET_LENGTH(decoded);
Py_DECREF(decoded);
cookie.need_eof = 1;