No issues found
1 /*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "structmember.h"
12 #include "_iomodule.h"
13
14 /* TextIOBase */
15
16 PyDoc_STRVAR(textiobase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24 static PyObject *
25 _unsupported(const char *message)
26 {
27 PyErr_SetString(_PyIO_unsupported_operation, message);
28 return NULL;
29 }
30
31 PyDoc_STRVAR(textiobase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38 static PyObject *
39 textiobase_detach(PyObject *self)
40 {
41 return _unsupported("detach");
42 }
43
44 PyDoc_STRVAR(textiobase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51 static PyObject *
52 textiobase_read(PyObject *self, PyObject *args)
53 {
54 return _unsupported("read");
55 }
56
57 PyDoc_STRVAR(textiobase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63 static PyObject *
64 textiobase_readline(PyObject *self, PyObject *args)
65 {
66 return _unsupported("readline");
67 }
68
69 PyDoc_STRVAR(textiobase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75 static PyObject *
76 textiobase_write(PyObject *self, PyObject *args)
77 {
78 return _unsupported("write");
79 }
80
81 PyDoc_STRVAR(textiobase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87 static PyObject *
88 textiobase_encoding_get(PyObject *self, void *context)
89 {
90 Py_RETURN_NONE;
91 }
92
93 PyDoc_STRVAR(textiobase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101 static PyObject *
102 textiobase_newlines_get(PyObject *self, void *context)
103 {
104 Py_RETURN_NONE;
105 }
106
107 PyDoc_STRVAR(textiobase_errors_doc,
108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113 static PyObject *
114 textiobase_errors_get(PyObject *self, void *context)
115 {
116 Py_RETURN_NONE;
117 }
118
119
120 static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125 {NULL, NULL}
126 };
127
128 static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132 {NULL}
133 };
134
135 PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
156 textiobase_doc, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
163 textiobase_methods, /* tp_methods */
164 0, /* tp_members */
165 textiobase_getset, /* tp_getset */
166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174 };
175
176
177 /* IncrementalNewlineDecoder */
178
179 PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189 typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
196 } nldecoder_object;
197
198 static int
199 incrementalnewlinedecoder_init(nldecoder_object *self,
200 PyObject *args, PyObject *kwds)
201 {
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229 }
230
231 static void
232 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233 {
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237 }
238
239 #define SEEN_CR 1
240 #define SEEN_LF 2
241 #define SEEN_CRLF 4
242 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244 PyObject *
245 _PyIncrementalNewlineDecoder_decode(PyObject *_self,
246 PyObject *input, int final)
247 {
248 PyObject *output;
249 Py_ssize_t output_len;
250 nldecoder_object *self = (nldecoder_object *) _self;
251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
297 if (output_len > 0
298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
336 }
337
338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
365 Py_UNICODE *s, *end;
366 /* We have already seen all newline types, no need to scan again */
367 if (seennl == SEEN_ALL)
368 goto endscan;
369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
395 else {
396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460 }
461
462 static PyObject *
463 incrementalnewlinedecoder_decode(nldecoder_object *self,
464 PyObject *args, PyObject *kwds)
465 {
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474 }
475
476 static PyObject *
477 incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
478 {
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502 }
503
504 static PyObject *
505 incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
506 {
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521 }
522
523 static PyObject *
524 incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
525 {
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532 }
533
534 static PyObject *
535 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
536 {
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556 }
557
558
559 static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
564 {NULL}
565 };
566
567 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
569 {NULL}
570 };
571
572 PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
575 sizeof(nldecoder_object), /*tp_basicsize*/
576 0, /*tp_itemsize*/
577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
593 incrementalnewlinedecoder_doc, /* tp_doc */
594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
600 incrementalnewlinedecoder_methods, /* tp_methods */
601 0, /* tp_members */
602 incrementalnewlinedecoder_getset, /* tp_getset */
603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611 };
612
613
614 /* TextIOWrapper */
615
616 PyDoc_STRVAR(textiowrapper_doc,
617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
625 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
626 "handling of line endings. If it is None, universal newlines is\n"
627 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
628 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
629 "caller. Conversely, on output, '\\n' is translated to the system\n"
630 "default line seperator, os.linesep. If newline is any other of its\n"
631 "legal values, that newline becomes the newline when the file is read\n"
632 "and it is returned untranslated. On output, '\\n' is converted to the\n"
633 "newline.\n"
634 "\n"
635 "If line_buffering is True, a call to flush is implied when a call to\n"
636 "write contains a newline character."
637 );
638
639 typedef PyObject *
640 (*encodefunc_t)(PyObject *, PyObject *);
641
642 typedef struct
643 {
644 PyObject_HEAD
645 int ok; /* initialized? */
646 int detached;
647 Py_ssize_t chunk_size;
648 PyObject *buffer;
649 PyObject *encoding;
650 PyObject *encoder;
651 PyObject *decoder;
652 PyObject *readnl;
653 PyObject *errors;
654 const char *writenl; /* utf-8 encoded, NULL stands for \n */
655 char line_buffering;
656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
660 char telling;
661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
665
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
668
669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
674 */
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
677 PyObject *pending_bytes; /* list of bytes objects waiting to be
678 written, or NULL */
679 Py_ssize_t pending_bytes_count;
680 PyObject *snapshot;
681 /* snapshot is either None, or a tuple (dec_flags, next_input) where
682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
685 */
686
687 /* Cache raw object if it's a FileIO object */
688 PyObject *raw;
689
690 PyObject *weakreflist;
691 PyObject *dict;
692 } textio;
693
694
695 /* A couple of specialized cases in order to bypass the slow incremental
696 encoding methods for the most popular encodings. */
697
698 static PyObject *
699 ascii_encode(textio *self, PyObject *text)
700 {
701 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
702 PyUnicode_GET_SIZE(text),
703 PyBytes_AS_STRING(self->errors));
704 }
705
706 static PyObject *
707 utf16be_encode(textio *self, PyObject *text)
708 {
709 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
710 PyUnicode_GET_SIZE(text),
711 PyBytes_AS_STRING(self->errors), 1);
712 }
713
714 static PyObject *
715 utf16le_encode(textio *self, PyObject *text)
716 {
717 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
718 PyUnicode_GET_SIZE(text),
719 PyBytes_AS_STRING(self->errors), -1);
720 }
721
722 static PyObject *
723 utf16_encode(textio *self, PyObject *text)
724 {
725 if (!self->encoding_start_of_stream) {
726 /* Skip the BOM and use native byte ordering */
727 #if defined(WORDS_BIGENDIAN)
728 return utf16be_encode(self, text);
729 #else
730 return utf16le_encode(self, text);
731 #endif
732 }
733 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
734 PyUnicode_GET_SIZE(text),
735 PyBytes_AS_STRING(self->errors), 0);
736 }
737
738 static PyObject *
739 utf32be_encode(textio *self, PyObject *text)
740 {
741 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
742 PyUnicode_GET_SIZE(text),
743 PyBytes_AS_STRING(self->errors), 1);
744 }
745
746 static PyObject *
747 utf32le_encode(textio *self, PyObject *text)
748 {
749 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
750 PyUnicode_GET_SIZE(text),
751 PyBytes_AS_STRING(self->errors), -1);
752 }
753
754 static PyObject *
755 utf32_encode(textio *self, PyObject *text)
756 {
757 if (!self->encoding_start_of_stream) {
758 /* Skip the BOM and use native byte ordering */
759 #if defined(WORDS_BIGENDIAN)
760 return utf32be_encode(self, text);
761 #else
762 return utf32le_encode(self, text);
763 #endif
764 }
765 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
766 PyUnicode_GET_SIZE(text),
767 PyBytes_AS_STRING(self->errors), 0);
768 }
769
770 static PyObject *
771 utf8_encode(textio *self, PyObject *text)
772 {
773 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
774 PyUnicode_GET_SIZE(text),
775 PyBytes_AS_STRING(self->errors));
776 }
777
778 static PyObject *
779 latin1_encode(textio *self, PyObject *text)
780 {
781 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
782 PyUnicode_GET_SIZE(text),
783 PyBytes_AS_STRING(self->errors));
784 }
785
786 /* Map normalized encoding names onto the specialized encoding funcs */
787
788 typedef struct {
789 const char *name;
790 encodefunc_t encodefunc;
791 } encodefuncentry;
792
793 static encodefuncentry encodefuncs[] = {
794 {"ascii", (encodefunc_t) ascii_encode},
795 {"iso8859-1", (encodefunc_t) latin1_encode},
796 {"utf-8", (encodefunc_t) utf8_encode},
797 {"utf-16-be", (encodefunc_t) utf16be_encode},
798 {"utf-16-le", (encodefunc_t) utf16le_encode},
799 {"utf-16", (encodefunc_t) utf16_encode},
800 {"utf-32-be", (encodefunc_t) utf32be_encode},
801 {"utf-32-le", (encodefunc_t) utf32le_encode},
802 {"utf-32", (encodefunc_t) utf32_encode},
803 {NULL, NULL}
804 };
805
806
807 static int
808 textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
809 {
810 char *kwlist[] = {"buffer", "encoding", "errors",
811 "newline", "line_buffering",
812 NULL};
813 PyObject *buffer, *raw;
814 char *encoding = NULL;
815 char *errors = NULL;
816 char *newline = NULL;
817 int line_buffering = 0;
818
819 PyObject *res;
820 int r;
821
822 self->ok = 0;
823 self->detached = 0;
824 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
825 kwlist, &buffer, &encoding, &errors,
826 &newline, &line_buffering))
827 return -1;
828
829 if (newline && newline[0] != '\0'
830 && !(newline[0] == '\n' && newline[1] == '\0')
831 && !(newline[0] == '\r' && newline[1] == '\0')
832 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
833 PyErr_Format(PyExc_ValueError,
834 "illegal newline value: %s", newline);
835 return -1;
836 }
837
838 Py_CLEAR(self->buffer);
839 Py_CLEAR(self->encoding);
840 Py_CLEAR(self->encoder);
841 Py_CLEAR(self->decoder);
842 Py_CLEAR(self->readnl);
843 Py_CLEAR(self->decoded_chars);
844 Py_CLEAR(self->pending_bytes);
845 Py_CLEAR(self->snapshot);
846 Py_CLEAR(self->errors);
847 Py_CLEAR(self->raw);
848 self->decoded_chars_used = 0;
849 self->pending_bytes_count = 0;
850 self->encodefunc = NULL;
851 self->writenl = NULL;
852
853 if (encoding == NULL && self->encoding == NULL) {
854 if (_PyIO_locale_module == NULL) {
855 _PyIO_locale_module = PyImport_ImportModule("locale");
856 if (_PyIO_locale_module == NULL)
857 goto catch_ImportError;
858 else
859 goto use_locale;
860 }
861 else {
862 use_locale:
863 self->encoding = PyObject_CallMethod(
864 _PyIO_locale_module, "getpreferredencoding", NULL);
865 if (self->encoding == NULL) {
866 catch_ImportError:
867 /*
868 Importing locale can raise a ImportError because of
869 _functools, and locale.getpreferredencoding can raise a
870 ImportError if _locale is not available. These will happen
871 during module building.
872 */
873 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
874 PyErr_Clear();
875 self->encoding = PyString_FromString("ascii");
876 }
877 else
878 goto error;
879 }
880 else if (!PyString_Check(self->encoding))
881 Py_CLEAR(self->encoding);
882 }
883 }
884 if (self->encoding != NULL)
885 encoding = PyString_AsString(self->encoding);
886 else if (encoding != NULL) {
887 self->encoding = PyString_FromString(encoding);
888 if (self->encoding == NULL)
889 goto error;
890 }
891 else {
892 PyErr_SetString(PyExc_IOError,
893 "could not determine default encoding");
894 }
895
896 if (errors == NULL)
897 errors = "strict";
898 self->errors = PyBytes_FromString(errors);
899 if (self->errors == NULL)
900 goto error;
901
902 self->chunk_size = 8192;
903 self->readuniversal = (newline == NULL || newline[0] == '\0');
904 self->line_buffering = line_buffering;
905 self->readtranslate = (newline == NULL);
906 if (newline) {
907 self->readnl = PyString_FromString(newline);
908 if (self->readnl == NULL)
909 return -1;
910 }
911 self->writetranslate = (newline == NULL || newline[0] != '\0');
912 if (!self->readuniversal && self->writetranslate) {
913 self->writenl = PyString_AsString(self->readnl);
914 if (!strcmp(self->writenl, "\n"))
915 self->writenl = NULL;
916 }
917 #ifdef MS_WINDOWS
918 else
919 self->writenl = "\r\n";
920 #endif
921
922 /* Build the decoder object */
923 res = PyObject_CallMethod(buffer, "readable", NULL);
924 if (res == NULL)
925 goto error;
926 r = PyObject_IsTrue(res);
927 Py_DECREF(res);
928 if (r == -1)
929 goto error;
930 if (r == 1) {
931 self->decoder = PyCodec_IncrementalDecoder(
932 encoding, errors);
933 if (self->decoder == NULL)
934 goto error;
935
936 if (self->readuniversal) {
937 PyObject *incrementalDecoder = PyObject_CallFunction(
938 (PyObject *)&PyIncrementalNewlineDecoder_Type,
939 "Oi", self->decoder, (int)self->readtranslate);
940 if (incrementalDecoder == NULL)
941 goto error;
942 Py_CLEAR(self->decoder);
943 self->decoder = incrementalDecoder;
944 }
945 }
946
947 /* Build the encoder object */
948 res = PyObject_CallMethod(buffer, "writable", NULL);
949 if (res == NULL)
950 goto error;
951 r = PyObject_IsTrue(res);
952 Py_DECREF(res);
953 if (r == -1)
954 goto error;
955 if (r == 1) {
956 PyObject *ci;
957 self->encoder = PyCodec_IncrementalEncoder(
958 encoding, errors);
959 if (self->encoder == NULL)
960 goto error;
961 /* Get the normalized named of the codec */
962 ci = _PyCodec_Lookup(encoding);
963 if (ci == NULL)
964 goto error;
965 res = PyObject_GetAttrString(ci, "name");
966 Py_DECREF(ci);
967 if (res == NULL) {
968 if (PyErr_ExceptionMatches(PyExc_AttributeError))
969 PyErr_Clear();
970 else
971 goto error;
972 }
973 else if (PyString_Check(res)) {
974 encodefuncentry *e = encodefuncs;
975 while (e->name != NULL) {
976 if (!strcmp(PyString_AS_STRING(res), e->name)) {
977 self->encodefunc = e->encodefunc;
978 break;
979 }
980 e++;
981 }
982 }
983 Py_XDECREF(res);
984 }
985
986 self->buffer = buffer;
987 Py_INCREF(buffer);
988
989 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
990 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
991 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
992 raw = PyObject_GetAttrString(buffer, "raw");
993 /* Cache the raw FileIO object to speed up 'closed' checks */
994 if (raw == NULL) {
995 if (PyErr_ExceptionMatches(PyExc_AttributeError))
996 PyErr_Clear();
997 else
998 goto error;
999 }
1000 else if (Py_TYPE(raw) == &PyFileIO_Type)
1001 self->raw = raw;
1002 else
1003 Py_DECREF(raw);
1004 }
1005
1006 res = PyObject_CallMethod(buffer, "seekable", NULL);
1007 if (res == NULL)
1008 goto error;
1009 self->seekable = self->telling = PyObject_IsTrue(res);
1010 Py_DECREF(res);
1011
1012 self->encoding_start_of_stream = 0;
1013 if (self->seekable && self->encoder) {
1014 PyObject *cookieObj;
1015 int cmp;
1016
1017 self->encoding_start_of_stream = 1;
1018
1019 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1020 if (cookieObj == NULL)
1021 goto error;
1022
1023 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1024 Py_DECREF(cookieObj);
1025 if (cmp < 0) {
1026 goto error;
1027 }
1028
1029 if (cmp == 0) {
1030 self->encoding_start_of_stream = 0;
1031 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1032 _PyIO_zero, NULL);
1033 if (res == NULL)
1034 goto error;
1035 Py_DECREF(res);
1036 }
1037 }
1038
1039 self->ok = 1;
1040 return 0;
1041
1042 error:
1043 return -1;
1044 }
1045
1046 static int
1047 _textiowrapper_clear(textio *self)
1048 {
1049 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1050 return -1;
1051 self->ok = 0;
1052 Py_CLEAR(self->buffer);
1053 Py_CLEAR(self->encoding);
1054 Py_CLEAR(self->encoder);
1055 Py_CLEAR(self->decoder);
1056 Py_CLEAR(self->readnl);
1057 Py_CLEAR(self->decoded_chars);
1058 Py_CLEAR(self->pending_bytes);
1059 Py_CLEAR(self->snapshot);
1060 Py_CLEAR(self->errors);
1061 Py_CLEAR(self->raw);
1062 return 0;
1063 }
1064
1065 static void
1066 textiowrapper_dealloc(textio *self)
1067 {
1068 if (_textiowrapper_clear(self) < 0)
1069 return;
1070 _PyObject_GC_UNTRACK(self);
1071 if (self->weakreflist != NULL)
1072 PyObject_ClearWeakRefs((PyObject *)self);
1073 Py_CLEAR(self->dict);
1074 Py_TYPE(self)->tp_free((PyObject *)self);
1075 }
1076
1077 static int
1078 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1079 {
1080 Py_VISIT(self->buffer);
1081 Py_VISIT(self->encoding);
1082 Py_VISIT(self->encoder);
1083 Py_VISIT(self->decoder);
1084 Py_VISIT(self->readnl);
1085 Py_VISIT(self->decoded_chars);
1086 Py_VISIT(self->pending_bytes);
1087 Py_VISIT(self->snapshot);
1088 Py_VISIT(self->errors);
1089 Py_VISIT(self->raw);
1090
1091 Py_VISIT(self->dict);
1092 return 0;
1093 }
1094
1095 static int
1096 textiowrapper_clear(textio *self)
1097 {
1098 if (_textiowrapper_clear(self) < 0)
1099 return -1;
1100 Py_CLEAR(self->dict);
1101 return 0;
1102 }
1103
1104 static PyObject *
1105 textiowrapper_closed_get(textio *self, void *context);
1106
1107 /* This macro takes some shortcuts to make the common case faster. */
1108 #define CHECK_CLOSED(self) \
1109 do { \
1110 int r; \
1111 PyObject *_res; \
1112 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1113 if (self->raw != NULL) \
1114 r = _PyFileIO_closed(self->raw); \
1115 else { \
1116 _res = textiowrapper_closed_get(self, NULL); \
1117 if (_res == NULL) \
1118 return NULL; \
1119 r = PyObject_IsTrue(_res); \
1120 Py_DECREF(_res); \
1121 if (r < 0) \
1122 return NULL; \
1123 } \
1124 if (r > 0) { \
1125 PyErr_SetString(PyExc_ValueError, \
1126 "I/O operation on closed file."); \
1127 return NULL; \
1128 } \
1129 } \
1130 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1131 return NULL; \
1132 } while (0)
1133
1134 #define CHECK_INITIALIZED(self) \
1135 if (self->ok <= 0) { \
1136 if (self->detached) { \
1137 PyErr_SetString(PyExc_ValueError, \
1138 "underlying buffer has been detached"); \
1139 } else { \
1140 PyErr_SetString(PyExc_ValueError, \
1141 "I/O operation on uninitialized object"); \
1142 } \
1143 return NULL; \
1144 }
1145
1146 #define CHECK_INITIALIZED_INT(self) \
1147 if (self->ok <= 0) { \
1148 if (self->detached) { \
1149 PyErr_SetString(PyExc_ValueError, \
1150 "underlying buffer has been detached"); \
1151 } else { \
1152 PyErr_SetString(PyExc_ValueError, \
1153 "I/O operation on uninitialized object"); \
1154 } \
1155 return -1; \
1156 }
1157
1158
1159 static PyObject *
1160 textiowrapper_detach(textio *self)
1161 {
1162 PyObject *buffer, *res;
1163 CHECK_INITIALIZED(self);
1164 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1165 if (res == NULL)
1166 return NULL;
1167 Py_DECREF(res);
1168 buffer = self->buffer;
1169 self->buffer = NULL;
1170 self->detached = 1;
1171 self->ok = 0;
1172 return buffer;
1173 }
1174
1175 Py_LOCAL_INLINE(const Py_UNICODE *)
1176 findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1177 {
1178 /* like wcschr, but doesn't stop at NULL characters */
1179 while (size-- > 0) {
1180 if (*s == ch)
1181 return s;
1182 s++;
1183 }
1184 return NULL;
1185 }
1186
1187 /* Flush the internal write buffer. This doesn't explicitly flush the
1188 underlying buffered object, though. */
1189 static int
1190 _textiowrapper_writeflush(textio *self)
1191 {
1192 PyObject *pending, *b, *ret;
1193
1194 if (self->pending_bytes == NULL)
1195 return 0;
1196
1197 pending = self->pending_bytes;
1198 Py_INCREF(pending);
1199 self->pending_bytes_count = 0;
1200 Py_CLEAR(self->pending_bytes);
1201
1202 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1203 Py_DECREF(pending);
1204 if (b == NULL)
1205 return -1;
1206 ret = PyObject_CallMethodObjArgs(self->buffer,
1207 _PyIO_str_write, b, NULL);
1208 Py_DECREF(b);
1209 if (ret == NULL)
1210 return -1;
1211 Py_DECREF(ret);
1212 return 0;
1213 }
1214
1215 static PyObject *
1216 textiowrapper_write(textio *self, PyObject *args)
1217 {
1218 PyObject *ret;
1219 PyObject *text; /* owned reference */
1220 PyObject *b;
1221 Py_ssize_t textlen;
1222 int haslf = 0;
1223 int needflush = 0;
1224
1225 CHECK_INITIALIZED(self);
1226
1227 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1228 return NULL;
1229 }
1230
1231 CHECK_CLOSED(self);
1232
1233 if (self->encoder == NULL) {
1234 PyErr_SetString(PyExc_IOError, "not writable");
1235 return NULL;
1236 }
1237
1238 Py_INCREF(text);
1239
1240 textlen = PyUnicode_GetSize(text);
1241
1242 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1243 if (findchar(PyUnicode_AS_UNICODE(text),
1244 PyUnicode_GET_SIZE(text), '\n'))
1245 haslf = 1;
1246
1247 if (haslf && self->writetranslate && self->writenl != NULL) {
1248 PyObject *newtext = PyObject_CallMethod(
1249 text, "replace", "ss", "\n", self->writenl);
1250 Py_DECREF(text);
1251 if (newtext == NULL)
1252 return NULL;
1253 text = newtext;
1254 }
1255
1256 if (self->line_buffering &&
1257 (haslf ||
1258 findchar(PyUnicode_AS_UNICODE(text),
1259 PyUnicode_GET_SIZE(text), '\r')))
1260 needflush = 1;
1261
1262 /* XXX What if we were just reading? */
1263 if (self->encodefunc != NULL) {
1264 b = (*self->encodefunc)((PyObject *) self, text);
1265 self->encoding_start_of_stream = 0;
1266 }
1267 else
1268 b = PyObject_CallMethodObjArgs(self->encoder,
1269 _PyIO_str_encode, text, NULL);
1270 Py_DECREF(text);
1271 if (b == NULL)
1272 return NULL;
1273
1274 if (self->pending_bytes == NULL) {
1275 self->pending_bytes = PyList_New(0);
1276 if (self->pending_bytes == NULL) {
1277 Py_DECREF(b);
1278 return NULL;
1279 }
1280 self->pending_bytes_count = 0;
1281 }
1282 if (PyList_Append(self->pending_bytes, b) < 0) {
1283 Py_DECREF(b);
1284 return NULL;
1285 }
1286 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1287 Py_DECREF(b);
1288 if (self->pending_bytes_count > self->chunk_size || needflush) {
1289 if (_textiowrapper_writeflush(self) < 0)
1290 return NULL;
1291 }
1292
1293 if (needflush) {
1294 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1295 if (ret == NULL)
1296 return NULL;
1297 Py_DECREF(ret);
1298 }
1299
1300 Py_CLEAR(self->snapshot);
1301
1302 if (self->decoder) {
1303 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1304 if (ret == NULL)
1305 return NULL;
1306 Py_DECREF(ret);
1307 }
1308
1309 return PyLong_FromSsize_t(textlen);
1310 }
1311
1312 /* Steal a reference to chars and store it in the decoded_char buffer;
1313 */
1314 static void
1315 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1316 {
1317 Py_CLEAR(self->decoded_chars);
1318 self->decoded_chars = chars;
1319 self->decoded_chars_used = 0;
1320 }
1321
1322 static PyObject *
1323 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1324 {
1325 PyObject *chars;
1326 Py_ssize_t avail;
1327
1328 if (self->decoded_chars == NULL)
1329 return PyUnicode_FromStringAndSize(NULL, 0);
1330
1331 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1332 - self->decoded_chars_used);
1333
1334 assert(avail >= 0);
1335
1336 if (n < 0 || n > avail)
1337 n = avail;
1338
1339 if (self->decoded_chars_used > 0 || n < avail) {
1340 chars = PyUnicode_FromUnicode(
1341 PyUnicode_AS_UNICODE(self->decoded_chars)
1342 + self->decoded_chars_used, n);
1343 if (chars == NULL)
1344 return NULL;
1345 }
1346 else {
1347 chars = self->decoded_chars;
1348 Py_INCREF(chars);
1349 }
1350
1351 self->decoded_chars_used += n;
1352 return chars;
1353 }
1354
1355 /* Read and decode the next chunk of data from the BufferedReader.
1356 */
1357 static int
1358 textiowrapper_read_chunk(textio *self)
1359 {
1360 PyObject *dec_buffer = NULL;
1361 PyObject *dec_flags = NULL;
1362 PyObject *input_chunk = NULL;
1363 PyObject *decoded_chars, *chunk_size;
1364 int eof;
1365
1366 /* The return value is True unless EOF was reached. The decoded string is
1367 * placed in self._decoded_chars (replacing its previous value). The
1368 * entire input chunk is sent to the decoder, though some of it may remain
1369 * buffered in the decoder, yet to be converted.
1370 */
1371
1372 if (self->decoder == NULL) {
1373 PyErr_SetString(PyExc_IOError, "not readable");
1374 return -1;
1375 }
1376
1377 if (self->telling) {
1378 /* To prepare for tell(), we need to snapshot a point in the file
1379 * where the decoder's input buffer is empty.
1380 */
1381
1382 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1383 _PyIO_str_getstate, NULL);
1384 if (state == NULL)
1385 return -1;
1386 /* Given this, we know there was a valid snapshot point
1387 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1388 */
1389 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1390 Py_DECREF(state);
1391 return -1;
1392 }
1393 Py_INCREF(dec_buffer);
1394 Py_INCREF(dec_flags);
1395 Py_DECREF(state);
1396 }
1397
1398 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1399 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1400 if (chunk_size == NULL)
1401 goto fail;
1402 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1403 _PyIO_str_read1, chunk_size, NULL);
1404 Py_DECREF(chunk_size);
1405 if (input_chunk == NULL)
1406 goto fail;
1407 assert(PyBytes_Check(input_chunk));
1408
1409 eof = (PyBytes_Size(input_chunk) == 0);
1410
1411 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1412 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1413 self->decoder, input_chunk, eof);
1414 }
1415 else {
1416 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1417 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1418 }
1419
1420 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1421 if (decoded_chars == NULL)
1422 goto fail;
1423 textiowrapper_set_decoded_chars(self, decoded_chars);
1424 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1425 eof = 0;
1426
1427 if (self->telling) {
1428 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1429 * next input to be decoded is dec_buffer + input_chunk.
1430 */
1431 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1432 if (next_input == NULL)
1433 goto fail;
1434 assert (PyBytes_Check(next_input));
1435 Py_DECREF(dec_buffer);
1436 Py_CLEAR(self->snapshot);
1437 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1438 }
1439 Py_DECREF(input_chunk);
1440
1441 return (eof == 0);
1442
1443 fail:
1444 Py_XDECREF(dec_buffer);
1445 Py_XDECREF(dec_flags);
1446 Py_XDECREF(input_chunk);
1447 return -1;
1448 }
1449
1450 static PyObject *
1451 textiowrapper_read(textio *self, PyObject *args)
1452 {
1453 Py_ssize_t n = -1;
1454 PyObject *result = NULL, *chunks = NULL;
1455
1456 CHECK_INITIALIZED(self);
1457
1458 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
1459 return NULL;
1460
1461 CHECK_CLOSED(self);
1462
1463 if (self->decoder == NULL) {
1464 PyErr_SetString(PyExc_IOError, "not readable");
1465 return NULL;
1466 }
1467
1468 if (_textiowrapper_writeflush(self) < 0)
1469 return NULL;
1470
1471 if (n < 0) {
1472 /* Read everything */
1473 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1474 PyObject *decoded, *final;
1475 if (bytes == NULL)
1476 goto fail;
1477 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1478 bytes, Py_True, NULL);
1479 Py_DECREF(bytes);
1480 if (decoded == NULL)
1481 goto fail;
1482
1483 result = textiowrapper_get_decoded_chars(self, -1);
1484
1485 if (result == NULL) {
1486 Py_DECREF(decoded);
1487 return NULL;
1488 }
1489
1490 final = PyUnicode_Concat(result, decoded);
1491 Py_DECREF(result);
1492 Py_DECREF(decoded);
1493 if (final == NULL)
1494 goto fail;
1495
1496 Py_CLEAR(self->snapshot);
1497 return final;
1498 }
1499 else {
1500 int res = 1;
1501 Py_ssize_t remaining = n;
1502
1503 result = textiowrapper_get_decoded_chars(self, n);
1504 if (result == NULL)
1505 goto fail;
1506 remaining -= PyUnicode_GET_SIZE(result);
1507
1508 /* Keep reading chunks until we have n characters to return */
1509 while (remaining > 0) {
1510 res = textiowrapper_read_chunk(self);
1511 if (res < 0)
1512 goto fail;
1513 if (res == 0) /* EOF */
1514 break;
1515 if (chunks == NULL) {
1516 chunks = PyList_New(0);
1517 if (chunks == NULL)
1518 goto fail;
1519 }
1520 if (PyList_Append(chunks, result) < 0)
1521 goto fail;
1522 Py_DECREF(result);
1523 result = textiowrapper_get_decoded_chars(self, remaining);
1524 if (result == NULL)
1525 goto fail;
1526 remaining -= PyUnicode_GET_SIZE(result);
1527 }
1528 if (chunks != NULL) {
1529 if (result != NULL && PyList_Append(chunks, result) < 0)
1530 goto fail;
1531 Py_CLEAR(result);
1532 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1533 if (result == NULL)
1534 goto fail;
1535 Py_CLEAR(chunks);
1536 }
1537 return result;
1538 }
1539 fail:
1540 Py_XDECREF(result);
1541 Py_XDECREF(chunks);
1542 return NULL;
1543 }
1544
1545
1546 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1547 that is to the NUL character. Otherwise the function will produce
1548 incorrect results. */
1549 static Py_UNICODE *
1550 find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1551 {
1552 Py_UNICODE *s = start;
1553 for (;;) {
1554 while (*s > ch)
1555 s++;
1556 if (*s == ch)
1557 return s;
1558 if (s == end)
1559 return NULL;
1560 s++;
1561 }
1562 }
1563
1564 Py_ssize_t
1565 _PyIO_find_line_ending(
1566 int translated, int universal, PyObject *readnl,
1567 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1568 {
1569 Py_ssize_t len = end - start;
1570
1571 if (translated) {
1572 /* Newlines are already translated, only search for \n */
1573 Py_UNICODE *pos = find_control_char(start, end, '\n');
1574 if (pos != NULL)
1575 return pos - start + 1;
1576 else {
1577 *consumed = len;
1578 return -1;
1579 }
1580 }
1581 else if (universal) {
1582 /* Universal newline search. Find any of \r, \r\n, \n
1583 * The decoder ensures that \r\n are not split in two pieces
1584 */
1585 Py_UNICODE *s = start;
1586 for (;;) {
1587 Py_UNICODE ch;
1588 /* Fast path for non-control chars. The loop always ends
1589 since the Py_UNICODE storage is NUL-terminated. */
1590 while (*s > '\r')
1591 s++;
1592 if (s >= end) {
1593 *consumed = len;
1594 return -1;
1595 }
1596 ch = *s++;
1597 if (ch == '\n')
1598 return s - start;
1599 if (ch == '\r') {
1600 if (*s == '\n')
1601 return s - start + 1;
1602 else
1603 return s - start;
1604 }
1605 }
1606 }
1607 else {
1608 /* Non-universal mode. */
1609 Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1610 unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1611 if (readnl_len == 1) {
1612 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1613 if (pos != NULL)
1614 return pos - start + 1;
1615 *consumed = len;
1616 return -1;
1617 }
1618 else {
1619 Py_UNICODE *s = start;
1620 Py_UNICODE *e = end - readnl_len + 1;
1621 Py_UNICODE *pos;
1622 if (e < s)
1623 e = s;
1624 while (s < e) {
1625 Py_ssize_t i;
1626 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1627 if (pos == NULL || pos >= e)
1628 break;
1629 for (i = 1; i < readnl_len; i++) {
1630 if (pos[i] != nl[i])
1631 break;
1632 }
1633 if (i == readnl_len)
1634 return pos - start + readnl_len;
1635 s = pos + 1;
1636 }
1637 pos = find_control_char(e, end, nl[0]);
1638 if (pos == NULL)
1639 *consumed = len;
1640 else
1641 *consumed = pos - start;
1642 return -1;
1643 }
1644 }
1645 }
1646
1647 static PyObject *
1648 _textiowrapper_readline(textio *self, Py_ssize_t limit)
1649 {
1650 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1651 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1652 int res;
1653
1654 CHECK_CLOSED(self);
1655
1656 if (_textiowrapper_writeflush(self) < 0)
1657 return NULL;
1658
1659 chunked = 0;
1660
1661 while (1) {
1662 Py_UNICODE *ptr;
1663 Py_ssize_t line_len;
1664 Py_ssize_t consumed = 0;
1665
1666 /* First, get some data if necessary */
1667 res = 1;
1668 while (!self->decoded_chars ||
1669 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1670 res = textiowrapper_read_chunk(self);
1671 if (res < 0)
1672 goto error;
1673 if (res == 0)
1674 break;
1675 }
1676 if (res == 0) {
1677 /* end of file */
1678 textiowrapper_set_decoded_chars(self, NULL);
1679 Py_CLEAR(self->snapshot);
1680 start = endpos = offset_to_buffer = 0;
1681 break;
1682 }
1683
1684 if (remaining == NULL) {
1685 line = self->decoded_chars;
1686 start = self->decoded_chars_used;
1687 offset_to_buffer = 0;
1688 Py_INCREF(line);
1689 }
1690 else {
1691 assert(self->decoded_chars_used == 0);
1692 line = PyUnicode_Concat(remaining, self->decoded_chars);
1693 start = 0;
1694 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1695 Py_CLEAR(remaining);
1696 if (line == NULL)
1697 goto error;
1698 }
1699
1700 ptr = PyUnicode_AS_UNICODE(line);
1701 line_len = PyUnicode_GET_SIZE(line);
1702
1703 endpos = _PyIO_find_line_ending(
1704 self->readtranslate, self->readuniversal, self->readnl,
1705 ptr + start, ptr + line_len, &consumed);
1706 if (endpos >= 0) {
1707 endpos += start;
1708 if (limit >= 0 && (endpos - start) + chunked >= limit)
1709 endpos = start + limit - chunked;
1710 break;
1711 }
1712
1713 /* We can put aside up to `endpos` */
1714 endpos = consumed + start;
1715 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1716 /* Didn't find line ending, but reached length limit */
1717 endpos = start + limit - chunked;
1718 break;
1719 }
1720
1721 if (endpos > start) {
1722 /* No line ending seen yet - put aside current data */
1723 PyObject *s;
1724 if (chunks == NULL) {
1725 chunks = PyList_New(0);
1726 if (chunks == NULL)
1727 goto error;
1728 }
1729 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1730 if (s == NULL)
1731 goto error;
1732 if (PyList_Append(chunks, s) < 0) {
1733 Py_DECREF(s);
1734 goto error;
1735 }
1736 chunked += PyUnicode_GET_SIZE(s);
1737 Py_DECREF(s);
1738 }
1739 /* There may be some remaining bytes we'll have to prepend to the
1740 next chunk of data */
1741 if (endpos < line_len) {
1742 remaining = PyUnicode_FromUnicode(
1743 ptr + endpos, line_len - endpos);
1744 if (remaining == NULL)
1745 goto error;
1746 }
1747 Py_CLEAR(line);
1748 /* We have consumed the buffer */
1749 textiowrapper_set_decoded_chars(self, NULL);
1750 }
1751
1752 if (line != NULL) {
1753 /* Our line ends in the current buffer */
1754 self->decoded_chars_used = endpos - offset_to_buffer;
1755 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1756 if (start == 0 && Py_REFCNT(line) == 1) {
1757 if (PyUnicode_Resize(&line, endpos) < 0)
1758 goto error;
1759 }
1760 else {
1761 PyObject *s = PyUnicode_FromUnicode(
1762 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1763 Py_CLEAR(line);
1764 if (s == NULL)
1765 goto error;
1766 line = s;
1767 }
1768 }
1769 }
1770 if (remaining != NULL) {
1771 if (chunks == NULL) {
1772 chunks = PyList_New(0);
1773 if (chunks == NULL)
1774 goto error;
1775 }
1776 if (PyList_Append(chunks, remaining) < 0)
1777 goto error;
1778 Py_CLEAR(remaining);
1779 }
1780 if (chunks != NULL) {
1781 if (line != NULL && PyList_Append(chunks, line) < 0)
1782 goto error;
1783 Py_CLEAR(line);
1784 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1785 if (line == NULL)
1786 goto error;
1787 Py_DECREF(chunks);
1788 }
1789 if (line == NULL)
1790 line = PyUnicode_FromStringAndSize(NULL, 0);
1791
1792 return line;
1793
1794 error:
1795 Py_XDECREF(chunks);
1796 Py_XDECREF(remaining);
1797 Py_XDECREF(line);
1798 return NULL;
1799 }
1800
1801 static PyObject *
1802 textiowrapper_readline(textio *self, PyObject *args)
1803 {
1804 PyObject *limitobj = NULL;
1805 Py_ssize_t limit = -1;
1806
1807 CHECK_INITIALIZED(self);
1808 if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1809 return NULL;
1810 }
1811 if (limitobj) {
1812 if (!PyNumber_Check(limitobj)) {
1813 PyErr_Format(PyExc_TypeError,
1814 "integer argument expected, got '%.200s'",
1815 Py_TYPE(limitobj)->tp_name);
1816 return NULL;
1817 }
1818 limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1819 if (limit == -1 && PyErr_Occurred())
1820 return NULL;
1821 }
1822 return _textiowrapper_readline(self, limit);
1823 }
1824
1825 /* Seek and Tell */
1826
1827 typedef struct {
1828 Py_off_t start_pos;
1829 int dec_flags;
1830 int bytes_to_feed;
1831 int chars_to_skip;
1832 char need_eof;
1833 } cookie_type;
1834
1835 /*
1836 To speed up cookie packing/unpacking, we store the fields in a temporary
1837 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1838 The following macros define at which offsets in the intermediary byte
1839 string the various CookieStruct fields will be stored.
1840 */
1841
1842 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1843
1844 #if defined(WORDS_BIGENDIAN)
1845
1846 # define IS_LITTLE_ENDIAN 0
1847
1848 /* We want the least significant byte of start_pos to also be the least
1849 significant byte of the cookie, which means that in big-endian mode we
1850 must copy the fields in reverse order. */
1851
1852 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1853 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1854 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1855 # define OFF_CHARS_TO_SKIP (sizeof(char))
1856 # define OFF_NEED_EOF 0
1857
1858 #else
1859
1860 # define IS_LITTLE_ENDIAN 1
1861
1862 /* Little-endian mode: the least significant byte of start_pos will
1863 naturally end up the least significant byte of the cookie. */
1864
1865 # define OFF_START_POS 0
1866 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
1867 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1868 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1869 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1870
1871 #endif
1872
1873 static int
1874 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1875 {
1876 unsigned char buffer[COOKIE_BUF_LEN];
1877 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1878 if (cookieLong == NULL)
1879 return -1;
1880
1881 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1882 IS_LITTLE_ENDIAN, 0) < 0) {
1883 Py_DECREF(cookieLong);
1884 return -1;
1885 }
1886 Py_DECREF(cookieLong);
1887
1888 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1889 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1890 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1891 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1892 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1893
1894 return 0;
1895 }
1896
1897 static PyObject *
1898 textiowrapper_build_cookie(cookie_type *cookie)
1899 {
1900 unsigned char buffer[COOKIE_BUF_LEN];
1901
1902 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1903 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1904 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1905 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1906 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1907
1908 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1909 }
1910 #undef IS_LITTLE_ENDIAN
1911
1912 static int
1913 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1914 {
1915 PyObject *res;
1916 /* When seeking to the start of the stream, we call decoder.reset()
1917 rather than decoder.getstate().
1918 This is for a few decoders such as utf-16 for which the state value
1919 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1920 utf-16, that we are expecting a BOM).
1921 */
1922 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1923 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1924 else
1925 res = PyObject_CallMethod(self->decoder, "setstate",
1926 "((si))", "", cookie->dec_flags);
1927 if (res == NULL)
1928 return -1;
1929 Py_DECREF(res);
1930 return 0;
1931 }
1932
1933 static int
1934 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1935 {
1936 PyObject *res;
1937 /* Same as _textiowrapper_decoder_setstate() above. */
1938 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1939 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1940 self->encoding_start_of_stream = 1;
1941 }
1942 else {
1943 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1944 _PyIO_zero, NULL);
1945 self->encoding_start_of_stream = 0;
1946 }
1947 if (res == NULL)
1948 return -1;
1949 Py_DECREF(res);
1950 return 0;
1951 }
1952
1953 static PyObject *
1954 textiowrapper_seek(textio *self, PyObject *args)
1955 {
1956 PyObject *cookieObj, *posobj;
1957 cookie_type cookie;
1958 int whence = 0;
1959 PyObject *res;
1960 int cmp;
1961
1962 CHECK_INITIALIZED(self);
1963
1964 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1965 return NULL;
1966 CHECK_CLOSED(self);
1967
1968 Py_INCREF(cookieObj);
1969
1970 if (!self->seekable) {
1971 PyErr_SetString(PyExc_IOError,
1972 "underlying stream is not seekable");
1973 goto fail;
1974 }
1975
1976 if (whence == 1) {
1977 /* seek relative to current position */
1978 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1979 if (cmp < 0)
1980 goto fail;
1981
1982 if (cmp == 0) {
1983 PyErr_SetString(PyExc_IOError,
1984 "can't do nonzero cur-relative seeks");
1985 goto fail;
1986 }
1987
1988 /* Seeking to the current position should attempt to
1989 * sync the underlying buffer with the current position.
1990 */
1991 Py_DECREF(cookieObj);
1992 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1993 if (cookieObj == NULL)
1994 goto fail;
1995 }
1996 else if (whence == 2) {
1997 /* seek relative to end of file */
1998
1999 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2000 if (cmp < 0)
2001 goto fail;
2002
2003 if (cmp == 0) {
2004 PyErr_SetString(PyExc_IOError,
2005 "can't do nonzero end-relative seeks");
2006 goto fail;
2007 }
2008
2009 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2010 if (res == NULL)
2011 goto fail;
2012 Py_DECREF(res);
2013
2014 textiowrapper_set_decoded_chars(self, NULL);
2015 Py_CLEAR(self->snapshot);
2016 if (self->decoder) {
2017 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2018 if (res == NULL)
2019 goto fail;
2020 Py_DECREF(res);
2021 }
2022
2023 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2024 Py_XDECREF(cookieObj);
2025 return res;
2026 }
2027 else if (whence != 0) {
2028 PyErr_Format(PyExc_ValueError,
2029 "invalid whence (%d, should be 0, 1 or 2)", whence);
2030 goto fail;
2031 }
2032
2033 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2034 if (cmp < 0)
2035 goto fail;
2036
2037 if (cmp == 1) {
2038 PyObject *repr = PyObject_Repr(cookieObj);
2039 if (repr != NULL) {
2040 PyErr_Format(PyExc_ValueError,
2041 "negative seek position %s",
2042 PyString_AS_STRING(repr));
2043 Py_DECREF(repr);
2044 }
2045 goto fail;
2046 }
2047
2048 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2049 if (res == NULL)
2050 goto fail;
2051 Py_DECREF(res);
2052
2053 /* The strategy of seek() is to go back to the safe start point
2054 * and replay the effect of read(chars_to_skip) from there.
2055 */
2056 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2057 goto fail;
2058
2059 /* Seek back to the safe start point. */
2060 posobj = PyLong_FromOff_t(cookie.start_pos);
2061 if (posobj == NULL)
2062 goto fail;
2063 res = PyObject_CallMethodObjArgs(self->buffer,
2064 _PyIO_str_seek, posobj, NULL);
2065 Py_DECREF(posobj);
2066 if (res == NULL)
2067 goto fail;
2068 Py_DECREF(res);
2069
2070 textiowrapper_set_decoded_chars(self, NULL);
2071 Py_CLEAR(self->snapshot);
2072
2073 /* Restore the decoder to its state from the safe start point. */
2074 if (self->decoder) {
2075 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2076 goto fail;
2077 }
2078
2079 if (cookie.chars_to_skip) {
2080 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2081 PyObject *input_chunk = PyObject_CallMethod(
2082 self->buffer, "read", "i", cookie.bytes_to_feed);
2083 PyObject *decoded;
2084
2085 if (input_chunk == NULL)
2086 goto fail;
2087
2088 assert (PyBytes_Check(input_chunk));
2089
2090 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2091 if (self->snapshot == NULL) {
2092 Py_DECREF(input_chunk);
2093 goto fail;
2094 }
2095
2096 decoded = PyObject_CallMethod(self->decoder, "decode",
2097 "Oi", input_chunk, (int)cookie.need_eof);
2098
2099 if (decoded == NULL)
2100 goto fail;
2101
2102 textiowrapper_set_decoded_chars(self, decoded);
2103
2104 /* Skip chars_to_skip of the decoded characters. */
2105 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2106 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2107 goto fail;
2108 }
2109 self->decoded_chars_used = cookie.chars_to_skip;
2110 }
2111 else {
2112 self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2113 if (self->snapshot == NULL)
2114 goto fail;
2115 }
2116
2117 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2118 if (self->encoder) {
2119 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2120 goto fail;
2121 }
2122 return cookieObj;
2123 fail:
2124 Py_XDECREF(cookieObj);
2125 return NULL;
2126
2127 }
2128
2129 static PyObject *
2130 textiowrapper_tell(textio *self, PyObject *args)
2131 {
2132 PyObject *res;
2133 PyObject *posobj = NULL;
2134 cookie_type cookie = {0,0,0,0,0};
2135 PyObject *next_input;
2136 Py_ssize_t chars_to_skip, chars_decoded;
2137 PyObject *saved_state = NULL;
2138 char *input, *input_end;
2139
2140 CHECK_INITIALIZED(self);
2141 CHECK_CLOSED(self);
2142
2143 if (!self->seekable) {
2144 PyErr_SetString(PyExc_IOError,
2145 "underlying stream is not seekable");
2146 goto fail;
2147 }
2148 if (!self->telling) {
2149 PyErr_SetString(PyExc_IOError,
2150 "telling position disabled by next() call");
2151 goto fail;
2152 }
2153
2154 if (_textiowrapper_writeflush(self) < 0)
2155 return NULL;
2156 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2157 if (res == NULL)
2158 goto fail;
2159 Py_DECREF(res);
2160
2161 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2162 if (posobj == NULL)
2163 goto fail;
2164
2165 if (self->decoder == NULL || self->snapshot == NULL) {
2166 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2167 return posobj;
2168 }
2169
2170 #if defined(HAVE_LARGEFILE_SUPPORT)
2171 cookie.start_pos = PyLong_AsLongLong(posobj);
2172 #else
2173 cookie.start_pos = PyLong_AsLong(posobj);
2174 #endif
2175 if (PyErr_Occurred())
2176 goto fail;
2177
2178 /* Skip backward to the snapshot point (see _read_chunk). */
2179 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2180 goto fail;
2181
2182 assert (PyBytes_Check(next_input));
2183
2184 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2185
2186 /* How many decoded characters have been used up since the snapshot? */
2187 if (self->decoded_chars_used == 0) {
2188 /* We haven't moved from the snapshot point. */
2189 Py_DECREF(posobj);
2190 return textiowrapper_build_cookie(&cookie);
2191 }
2192
2193 chars_to_skip = self->decoded_chars_used;
2194
2195 /* Starting from the snapshot position, we will walk the decoder
2196 * forward until it gives us enough decoded characters.
2197 */
2198 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2199 _PyIO_str_getstate, NULL);
2200 if (saved_state == NULL)
2201 goto fail;
2202
2203 /* Note our initial start point. */
2204 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2205 goto fail;
2206
2207 /* Feed the decoder one byte at a time. As we go, note the
2208 * nearest "safe start point" before the current location
2209 * (a point where the decoder has nothing buffered, so seek()
2210 * can safely start from there and advance to this location).
2211 */
2212 chars_decoded = 0;
2213 input = PyBytes_AS_STRING(next_input);
2214 input_end = input + PyBytes_GET_SIZE(next_input);
2215 while (input < input_end) {
2216 PyObject *state;
2217 char *dec_buffer;
2218 Py_ssize_t dec_buffer_len;
2219 int dec_flags;
2220
2221 PyObject *decoded = PyObject_CallMethod(
2222 self->decoder, "decode", "s#", input, 1);
2223 if (decoded == NULL)
2224 goto fail;
2225 assert (PyUnicode_Check(decoded));
2226 chars_decoded += PyUnicode_GET_SIZE(decoded);
2227 Py_DECREF(decoded);
2228
2229 cookie.bytes_to_feed += 1;
2230
2231 state = PyObject_CallMethodObjArgs(self->decoder,
2232 _PyIO_str_getstate, NULL);
2233 if (state == NULL)
2234 goto fail;
2235 if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2236 Py_DECREF(state);
2237 goto fail;
2238 }
2239 Py_DECREF(state);
2240
2241 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2242 /* Decoder buffer is empty, so this is a safe start point. */
2243 cookie.start_pos += cookie.bytes_to_feed;
2244 chars_to_skip -= chars_decoded;
2245 cookie.dec_flags = dec_flags;
2246 cookie.bytes_to_feed = 0;
2247 chars_decoded = 0;
2248 }
2249 if (chars_decoded >= chars_to_skip)
2250 break;
2251 input++;
2252 }
2253 if (input == input_end) {
2254 /* We didn't get enough decoded data; signal EOF to get more. */
2255 PyObject *decoded = PyObject_CallMethod(
2256 self->decoder, "decode", "si", "", /* final = */ 1);
2257 if (decoded == NULL)
2258 goto fail;
2259 assert (PyUnicode_Check(decoded));
2260 chars_decoded += PyUnicode_GET_SIZE(decoded);
2261 Py_DECREF(decoded);
2262 cookie.need_eof = 1;
2263
2264 if (chars_decoded < chars_to_skip) {
2265 PyErr_SetString(PyExc_IOError,
2266 "can't reconstruct logical file position");
2267 goto fail;
2268 }
2269 }
2270
2271 /* finally */
2272 Py_XDECREF(posobj);
2273 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2274 Py_DECREF(saved_state);
2275 if (res == NULL)
2276 return NULL;
2277 Py_DECREF(res);
2278
2279 /* The returned cookie corresponds to the last safe start point. */
2280 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2281 return textiowrapper_build_cookie(&cookie);
2282
2283 fail:
2284 Py_XDECREF(posobj);
2285 if (saved_state) {
2286 PyObject *type, *value, *traceback;
2287 PyErr_Fetch(&type, &value, &traceback);
2288
2289 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2290 Py_DECREF(saved_state);
2291 if (res == NULL)
2292 return NULL;
2293 Py_DECREF(res);
2294
2295 PyErr_Restore(type, value, traceback);
2296 }
2297 return NULL;
2298 }
2299
2300 static PyObject *
2301 textiowrapper_truncate(textio *self, PyObject *args)
2302 {
2303 PyObject *pos = Py_None;
2304 PyObject *res;
2305
2306 CHECK_INITIALIZED(self)
2307 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2308 return NULL;
2309 }
2310
2311 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2312 if (res == NULL)
2313 return NULL;
2314 Py_DECREF(res);
2315
2316 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2317 }
2318
2319 static PyObject *
2320 textiowrapper_repr(textio *self)
2321 {
2322 PyObject *nameobj, *res;
2323 PyObject *namerepr = NULL, *encrepr = NULL;
2324
2325 CHECK_INITIALIZED(self);
2326
2327 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2328 if (nameobj == NULL) {
2329 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2330 PyErr_Clear();
2331 else
2332 goto error;
2333 encrepr = PyObject_Repr(self->encoding);
2334 res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2335 PyString_AS_STRING(encrepr));
2336 }
2337 else {
2338 encrepr = PyObject_Repr(self->encoding);
2339 namerepr = PyObject_Repr(nameobj);
2340 res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2341 PyString_AS_STRING(namerepr),
2342 PyString_AS_STRING(encrepr));
2343 Py_DECREF(nameobj);
2344 }
2345 Py_XDECREF(namerepr);
2346 Py_XDECREF(encrepr);
2347 return res;
2348
2349 error:
2350 Py_XDECREF(namerepr);
2351 Py_XDECREF(encrepr);
2352 return NULL;
2353 }
2354
2355
2356 /* Inquiries */
2357
2358 static PyObject *
2359 textiowrapper_fileno(textio *self, PyObject *args)
2360 {
2361 CHECK_INITIALIZED(self);
2362 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2363 }
2364
2365 static PyObject *
2366 textiowrapper_seekable(textio *self, PyObject *args)
2367 {
2368 CHECK_INITIALIZED(self);
2369 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2370 }
2371
2372 static PyObject *
2373 textiowrapper_readable(textio *self, PyObject *args)
2374 {
2375 CHECK_INITIALIZED(self);
2376 return PyObject_CallMethod(self->buffer, "readable", NULL);
2377 }
2378
2379 static PyObject *
2380 textiowrapper_writable(textio *self, PyObject *args)
2381 {
2382 CHECK_INITIALIZED(self);
2383 return PyObject_CallMethod(self->buffer, "writable", NULL);
2384 }
2385
2386 static PyObject *
2387 textiowrapper_isatty(textio *self, PyObject *args)
2388 {
2389 CHECK_INITIALIZED(self);
2390 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2391 }
2392
2393 static PyObject *
2394 textiowrapper_flush(textio *self, PyObject *args)
2395 {
2396 CHECK_INITIALIZED(self);
2397 CHECK_CLOSED(self);
2398 self->telling = self->seekable;
2399 if (_textiowrapper_writeflush(self) < 0)
2400 return NULL;
2401 return PyObject_CallMethod(self->buffer, "flush", NULL);
2402 }
2403
2404 static PyObject *
2405 textiowrapper_close(textio *self, PyObject *args)
2406 {
2407 PyObject *res;
2408 int r;
2409 CHECK_INITIALIZED(self);
2410
2411 res = textiowrapper_closed_get(self, NULL);
2412 if (res == NULL)
2413 return NULL;
2414 r = PyObject_IsTrue(res);
2415 Py_DECREF(res);
2416 if (r < 0)
2417 return NULL;
2418
2419 if (r > 0) {
2420 Py_RETURN_NONE; /* stream already closed */
2421 }
2422 else {
2423 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2424 if (res == NULL) {
2425 return NULL;
2426 }
2427 else
2428 Py_DECREF(res);
2429
2430 return PyObject_CallMethod(self->buffer, "close", NULL);
2431 }
2432 }
2433
2434 static PyObject *
2435 textiowrapper_iternext(textio *self)
2436 {
2437 PyObject *line;
2438
2439 CHECK_INITIALIZED(self);
2440
2441 self->telling = 0;
2442 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2443 /* Skip method call overhead for speed */
2444 line = _textiowrapper_readline(self, -1);
2445 }
2446 else {
2447 line = PyObject_CallMethodObjArgs((PyObject *)self,
2448 _PyIO_str_readline, NULL);
2449 if (line && !PyUnicode_Check(line)) {
2450 PyErr_Format(PyExc_IOError,
2451 "readline() should have returned an str object, "
2452 "not '%.200s'", Py_TYPE(line)->tp_name);
2453 Py_DECREF(line);
2454 return NULL;
2455 }
2456 }
2457
2458 if (line == NULL)
2459 return NULL;
2460
2461 if (PyUnicode_GET_SIZE(line) == 0) {
2462 /* Reached EOF or would have blocked */
2463 Py_DECREF(line);
2464 Py_CLEAR(self->snapshot);
2465 self->telling = self->seekable;
2466 return NULL;
2467 }
2468
2469 return line;
2470 }
2471
2472 static PyObject *
2473 textiowrapper_name_get(textio *self, void *context)
2474 {
2475 CHECK_INITIALIZED(self);
2476 return PyObject_GetAttrString(self->buffer, "name");
2477 }
2478
2479 static PyObject *
2480 textiowrapper_closed_get(textio *self, void *context)
2481 {
2482 CHECK_INITIALIZED(self);
2483 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2484 }
2485
2486 static PyObject *
2487 textiowrapper_newlines_get(textio *self, void *context)
2488 {
2489 PyObject *res;
2490 CHECK_INITIALIZED(self);
2491 if (self->decoder == NULL)
2492 Py_RETURN_NONE;
2493 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2494 if (res == NULL) {
2495 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2496 PyErr_Clear();
2497 Py_RETURN_NONE;
2498 }
2499 else {
2500 return NULL;
2501 }
2502 }
2503 return res;
2504 }
2505
2506 static PyObject *
2507 textiowrapper_errors_get(textio *self, void *context)
2508 {
2509 CHECK_INITIALIZED(self);
2510 Py_INCREF(self->errors);
2511 return self->errors;
2512 }
2513
2514 static PyObject *
2515 textiowrapper_chunk_size_get(textio *self, void *context)
2516 {
2517 CHECK_INITIALIZED(self);
2518 return PyLong_FromSsize_t(self->chunk_size);
2519 }
2520
2521 static int
2522 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2523 {
2524 Py_ssize_t n;
2525 CHECK_INITIALIZED_INT(self);
2526 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2527 if (n == -1 && PyErr_Occurred())
2528 return -1;
2529 if (n <= 0) {
2530 PyErr_SetString(PyExc_ValueError,
2531 "a strictly positive integer is required");
2532 return -1;
2533 }
2534 self->chunk_size = n;
2535 return 0;
2536 }
2537
2538 static PyMethodDef textiowrapper_methods[] = {
2539 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2540 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2541 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2542 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2543 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2544 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2545
2546 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2547 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2548 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2549 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2550 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2551
2552 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2553 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2554 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2555 {NULL, NULL}
2556 };
2557
2558 static PyMemberDef textiowrapper_members[] = {
2559 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2560 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2561 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2562 {NULL}
2563 };
2564
2565 static PyGetSetDef textiowrapper_getset[] = {
2566 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2567 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2568 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2569 */
2570 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2571 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2572 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2573 (setter)textiowrapper_chunk_size_set, NULL},
2574 {NULL}
2575 };
2576
2577 PyTypeObject PyTextIOWrapper_Type = {
2578 PyVarObject_HEAD_INIT(NULL, 0)
2579 "_io.TextIOWrapper", /*tp_name*/
2580 sizeof(textio), /*tp_basicsize*/
2581 0, /*tp_itemsize*/
2582 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2583 0, /*tp_print*/
2584 0, /*tp_getattr*/
2585 0, /*tps_etattr*/
2586 0, /*tp_compare */
2587 (reprfunc)textiowrapper_repr,/*tp_repr*/
2588 0, /*tp_as_number*/
2589 0, /*tp_as_sequence*/
2590 0, /*tp_as_mapping*/
2591 0, /*tp_hash */
2592 0, /*tp_call*/
2593 0, /*tp_str*/
2594 0, /*tp_getattro*/
2595 0, /*tp_setattro*/
2596 0, /*tp_as_buffer*/
2597 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2598 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2599 textiowrapper_doc, /* tp_doc */
2600 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2601 (inquiry)textiowrapper_clear, /* tp_clear */
2602 0, /* tp_richcompare */
2603 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2604 0, /* tp_iter */
2605 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2606 textiowrapper_methods, /* tp_methods */
2607 textiowrapper_members, /* tp_members */
2608 textiowrapper_getset, /* tp_getset */
2609 0, /* tp_base */
2610 0, /* tp_dict */
2611 0, /* tp_descr_get */
2612 0, /* tp_descr_set */
2613 offsetof(textio, dict), /*tp_dictoffset*/
2614 (initproc)textiowrapper_init, /* tp_init */
2615 0, /* tp_alloc */
2616 PyType_GenericNew, /* tp_new */
2617 };