Python-2.7.3/Modules/_io/textio.c

No issues found

   1 /*
   2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
   3 
   4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
   5 
   6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
   7 */
   8 
   9 #define PY_SSIZE_T_CLEAN
  10 #include "Python.h"
  11 #include "structmember.h"
  12 #include "_iomodule.h"
  13 
  14 /* TextIOBase */
  15 
  16 PyDoc_STRVAR(textiobase_doc,
  17     "Base class for text I/O.\n"
  18     "\n"
  19     "This class provides a character and line based interface to stream\n"
  20     "I/O. There is no readinto method because Python's character strings\n"
  21     "are immutable. There is no public constructor.\n"
  22     );
  23 
  24 static PyObject *
  25 _unsupported(const char *message)
  26 {
  27     PyErr_SetString(_PyIO_unsupported_operation, message);
  28     return NULL;
  29 }
  30 
  31 PyDoc_STRVAR(textiobase_detach_doc,
  32     "Separate the underlying buffer from the TextIOBase and return it.\n"
  33     "\n"
  34     "After the underlying buffer has been detached, the TextIO is in an\n"
  35     "unusable state.\n"
  36     );
  37 
  38 static PyObject *
  39 textiobase_detach(PyObject *self)
  40 {
  41     return _unsupported("detach");
  42 }
  43 
  44 PyDoc_STRVAR(textiobase_read_doc,
  45     "Read at most n characters from stream.\n"
  46     "\n"
  47     "Read from underlying buffer until we have n characters or we hit EOF.\n"
  48     "If n is negative or omitted, read until EOF.\n"
  49     );
  50 
  51 static PyObject *
  52 textiobase_read(PyObject *self, PyObject *args)
  53 {
  54     return _unsupported("read");
  55 }
  56 
  57 PyDoc_STRVAR(textiobase_readline_doc,
  58     "Read until newline or EOF.\n"
  59     "\n"
  60     "Returns an empty string if EOF is hit immediately.\n"
  61     );
  62 
  63 static PyObject *
  64 textiobase_readline(PyObject *self, PyObject *args)
  65 {
  66     return _unsupported("readline");
  67 }
  68 
  69 PyDoc_STRVAR(textiobase_write_doc,
  70     "Write string to stream.\n"
  71     "Returns the number of characters written (which is always equal to\n"
  72     "the length of the string).\n"
  73     );
  74 
  75 static PyObject *
  76 textiobase_write(PyObject *self, PyObject *args)
  77 {
  78     return _unsupported("write");
  79 }
  80 
  81 PyDoc_STRVAR(textiobase_encoding_doc,
  82     "Encoding of the text stream.\n"
  83     "\n"
  84     "Subclasses should override.\n"
  85     );
  86 
  87 static PyObject *
  88 textiobase_encoding_get(PyObject *self, void *context)
  89 {
  90     Py_RETURN_NONE;
  91 }
  92 
  93 PyDoc_STRVAR(textiobase_newlines_doc,
  94     "Line endings translated so far.\n"
  95     "\n"
  96     "Only line endings translated during reading are considered.\n"
  97     "\n"
  98     "Subclasses should override.\n"
  99     );
 100 
 101 static PyObject *
 102 textiobase_newlines_get(PyObject *self, void *context)
 103 {
 104     Py_RETURN_NONE;
 105 }
 106 
 107 PyDoc_STRVAR(textiobase_errors_doc,
 108     "The error setting of the decoder or encoder.\n"
 109     "\n"
 110     "Subclasses should override.\n"
 111     );
 112 
 113 static PyObject *
 114 textiobase_errors_get(PyObject *self, void *context)
 115 {
 116     Py_RETURN_NONE;
 117 }
 118 
 119 
 120 static PyMethodDef textiobase_methods[] = {
 121     {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
 122     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
 123     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
 124     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
 125     {NULL, NULL}
 126 };
 127 
 128 static PyGetSetDef textiobase_getset[] = {
 129     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
 130     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
 131     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
 132     {NULL}
 133 };
 134 
 135 PyTypeObject PyTextIOBase_Type = {
 136     PyVarObject_HEAD_INIT(NULL, 0)
 137     "_io._TextIOBase",          /*tp_name*/
 138     0,                          /*tp_basicsize*/
 139     0,                          /*tp_itemsize*/
 140     0,                          /*tp_dealloc*/
 141     0,                          /*tp_print*/
 142     0,                          /*tp_getattr*/
 143     0,                          /*tp_setattr*/
 144     0,                          /*tp_compare */
 145     0,                          /*tp_repr*/
 146     0,                          /*tp_as_number*/
 147     0,                          /*tp_as_sequence*/
 148     0,                          /*tp_as_mapping*/
 149     0,                          /*tp_hash */
 150     0,                          /*tp_call*/
 151     0,                          /*tp_str*/
 152     0,                          /*tp_getattro*/
 153     0,                          /*tp_setattro*/
 154     0,                          /*tp_as_buffer*/
 155     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
 156     textiobase_doc,             /* tp_doc */
 157     0,                          /* tp_traverse */
 158     0,                          /* tp_clear */
 159     0,                          /* tp_richcompare */
 160     0,                          /* tp_weaklistoffset */
 161     0,                          /* tp_iter */
 162     0,                          /* tp_iternext */
 163     textiobase_methods,         /* tp_methods */
 164     0,                          /* tp_members */
 165     textiobase_getset,          /* tp_getset */
 166     &PyIOBase_Type,             /* tp_base */
 167     0,                          /* tp_dict */
 168     0,                          /* tp_descr_get */
 169     0,                          /* tp_descr_set */
 170     0,                          /* tp_dictoffset */
 171     0,                          /* tp_init */
 172     0,                          /* tp_alloc */
 173     0,                          /* tp_new */
 174 };
 175 
 176 
 177 /* IncrementalNewlineDecoder */
 178 
 179 PyDoc_STRVAR(incrementalnewlinedecoder_doc,
 180     "Codec used when reading a file in universal newlines mode.  It wraps\n"
 181     "another incremental decoder, translating \\r\\n and \\r into \\n.  It also\n"
 182     "records the types of newlines encountered.  When used with\n"
 183     "translate=False, it ensures that the newline sequence is returned in\n"
 184     "one piece. When used with decoder=None, it expects unicode strings as\n"
 185     "decode input and translates newlines without first invoking an external\n"
 186     "decoder.\n"
 187     );
 188 
 189 typedef struct {
 190     PyObject_HEAD
 191     PyObject *decoder;
 192     PyObject *errors;
 193     signed int pendingcr: 1;
 194     signed int translate: 1;
 195     unsigned int seennl: 3;
 196 } nldecoder_object;
 197 
 198 static int
 199 incrementalnewlinedecoder_init(nldecoder_object *self,
 200                                PyObject *args, PyObject *kwds)
 201 {
 202     PyObject *decoder;
 203     int translate;
 204     PyObject *errors = NULL;
 205     char *kwlist[] = {"decoder", "translate", "errors", NULL};
 206 
 207     if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
 208                                      kwlist, &decoder, &translate, &errors))
 209         return -1;
 210 
 211     self->decoder = decoder;
 212     Py_INCREF(decoder);
 213 
 214     if (errors == NULL) {
 215         self->errors = PyUnicode_FromString("strict");
 216         if (self->errors == NULL)
 217             return -1;
 218     }
 219     else {
 220         Py_INCREF(errors);
 221         self->errors = errors;
 222     }
 223 
 224     self->translate = translate;
 225     self->seennl = 0;
 226     self->pendingcr = 0;
 227 
 228     return 0;
 229 }
 230 
 231 static void
 232 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
 233 {
 234     Py_CLEAR(self->decoder);
 235     Py_CLEAR(self->errors);
 236     Py_TYPE(self)->tp_free((PyObject *)self);
 237 }
 238 
 239 #define SEEN_CR   1
 240 #define SEEN_LF   2
 241 #define SEEN_CRLF 4
 242 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
 243 
 244 PyObject *
 245 _PyIncrementalNewlineDecoder_decode(PyObject *_self,
 246                                     PyObject *input, int final)
 247 {
 248     PyObject *output;
 249     Py_ssize_t output_len;
 250     nldecoder_object *self = (nldecoder_object *) _self;
 251 
 252     if (self->decoder == NULL) {
 253         PyErr_SetString(PyExc_ValueError,
 254                         "IncrementalNewlineDecoder.__init__ not called");
 255         return NULL;
 256     }
 257 
 258     /* decode input (with the eventual \r from a previous pass) */
 259     if (self->decoder != Py_None) {
 260         output = PyObject_CallMethodObjArgs(self->decoder,
 261             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
 262     }
 263     else {
 264         output = input;
 265         Py_INCREF(output);
 266     }
 267 
 268     if (output == NULL)
 269         return NULL;
 270 
 271     if (!PyUnicode_Check(output)) {
 272         PyErr_SetString(PyExc_TypeError,
 273                         "decoder should return a string result");
 274         goto error;
 275     }
 276 
 277     output_len = PyUnicode_GET_SIZE(output);
 278     if (self->pendingcr && (final || output_len > 0)) {
 279         Py_UNICODE *out;
 280         PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
 281         if (modified == NULL)
 282             goto error;
 283         out = PyUnicode_AS_UNICODE(modified);
 284         out[0] = '\r';
 285         memcpy(out + 1, PyUnicode_AS_UNICODE(output),
 286                output_len * sizeof(Py_UNICODE));
 287         Py_DECREF(output);
 288         output = modified;
 289         self->pendingcr = 0;
 290         output_len++;
 291     }
 292 
 293     /* retain last \r even when not translating data:
 294      * then readline() is sure to get \r\n in one pass
 295      */
 296     if (!final) {
 297         if (output_len > 0
 298             && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
 299 
 300             if (Py_REFCNT(output) == 1) {
 301                 if (PyUnicode_Resize(&output, output_len - 1) < 0)
 302                     goto error;
 303             }
 304             else {
 305                 PyObject *modified = PyUnicode_FromUnicode(
 306                     PyUnicode_AS_UNICODE(output),
 307                     output_len - 1);
 308                 if (modified == NULL)
 309                     goto error;
 310                 Py_DECREF(output);
 311                 output = modified;
 312             }
 313             self->pendingcr = 1;
 314         }
 315     }
 316 
 317     /* Record which newlines are read and do newline translation if desired,
 318        all in one pass. */
 319     {
 320         Py_UNICODE *in_str;
 321         Py_ssize_t len;
 322         int seennl = self->seennl;
 323         int only_lf = 0;
 324 
 325         in_str = PyUnicode_AS_UNICODE(output);
 326         len = PyUnicode_GET_SIZE(output);
 327 
 328         if (len == 0)
 329             return output;
 330 
 331         /* If, up to now, newlines are consistently \n, do a quick check
 332            for the \r *byte* with the libc's optimized memchr.
 333            */
 334         if (seennl == SEEN_LF || seennl == 0) {
 335             only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
 336         }
 337 
 338         if (only_lf) {
 339             /* If not already seen, quick scan for a possible "\n" character.
 340                (there's nothing else to be done, even when in translation mode)
 341             */
 342             if (seennl == 0 &&
 343                 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
 344                 Py_UNICODE *s, *end;
 345                 s = in_str;
 346                 end = in_str + len;
 347                 for (;;) {
 348                     Py_UNICODE c;
 349                     /* Fast loop for non-control characters */
 350                     while (*s > '\n')
 351                         s++;
 352                     c = *s++;
 353                     if (c == '\n') {
 354                         seennl |= SEEN_LF;
 355                         break;
 356                     }
 357                     if (s > end)
 358                         break;
 359                 }
 360             }
 361             /* Finished: we have scanned for newlines, and none of them
 362                need translating */
 363         }
 364         else if (!self->translate) {
 365             Py_UNICODE *s, *end;
 366             /* We have already seen all newline types, no need to scan again */
 367             if (seennl == SEEN_ALL)
 368                 goto endscan;
 369             s = in_str;
 370             end = in_str + len;
 371             for (;;) {
 372                 Py_UNICODE c;
 373                 /* Fast loop for non-control characters */
 374                 while (*s > '\r')
 375                     s++;
 376                 c = *s++;
 377                 if (c == '\n')
 378                     seennl |= SEEN_LF;
 379                 else if (c == '\r') {
 380                     if (*s == '\n') {
 381                         seennl |= SEEN_CRLF;
 382                         s++;
 383                     }
 384                     else
 385                         seennl |= SEEN_CR;
 386                 }
 387                 if (s > end)
 388                     break;
 389                 if (seennl == SEEN_ALL)
 390                     break;
 391             }
 392         endscan:
 393             ;
 394         }
 395         else {
 396             PyObject *translated = NULL;
 397             Py_UNICODE *out_str;
 398             Py_UNICODE *in, *out, *end;
 399             if (Py_REFCNT(output) != 1) {
 400                 /* We could try to optimize this so that we only do a copy
 401                    when there is something to translate. On the other hand,
 402                    most decoders should only output non-shared strings, i.e.
 403                    translation is done in place. */
 404                 translated = PyUnicode_FromUnicode(NULL, len);
 405                 if (translated == NULL)
 406                     goto error;
 407                 assert(Py_REFCNT(translated) == 1);
 408                 memcpy(PyUnicode_AS_UNICODE(translated),
 409                        PyUnicode_AS_UNICODE(output),
 410                        len * sizeof(Py_UNICODE));
 411             }
 412             else {
 413                 translated = output;
 414             }
 415             out_str = PyUnicode_AS_UNICODE(translated);
 416             in = in_str;
 417             out = out_str;
 418             end = in_str + len;
 419             for (;;) {
 420                 Py_UNICODE c;
 421                 /* Fast loop for non-control characters */
 422                 while ((c = *in++) > '\r')
 423                     *out++ = c;
 424                 if (c == '\n') {
 425                     *out++ = c;
 426                     seennl |= SEEN_LF;
 427                     continue;
 428                 }
 429                 if (c == '\r') {
 430                     if (*in == '\n') {
 431                         in++;
 432                         seennl |= SEEN_CRLF;
 433                     }
 434                     else
 435                         seennl |= SEEN_CR;
 436                     *out++ = '\n';
 437                     continue;
 438                 }
 439                 if (in > end)
 440                     break;
 441                 *out++ = c;
 442             }
 443             if (translated != output) {
 444                 Py_DECREF(output);
 445                 output = translated;
 446             }
 447             if (out - out_str != len) {
 448                 if (PyUnicode_Resize(&output, out - out_str) < 0)
 449                     goto error;
 450             }
 451         }
 452         self->seennl |= seennl;
 453     }
 454 
 455     return output;
 456 
 457   error:
 458     Py_DECREF(output);
 459     return NULL;
 460 }
 461 
 462 static PyObject *
 463 incrementalnewlinedecoder_decode(nldecoder_object *self,
 464                                  PyObject *args, PyObject *kwds)
 465 {
 466     char *kwlist[] = {"input", "final", NULL};
 467     PyObject *input;
 468     int final = 0;
 469 
 470     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
 471                                      kwlist, &input, &final))
 472         return NULL;
 473     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
 474 }
 475 
 476 static PyObject *
 477 incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
 478 {
 479     PyObject *buffer;
 480     unsigned PY_LONG_LONG flag;
 481 
 482     if (self->decoder != Py_None) {
 483         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
 484            _PyIO_str_getstate, NULL);
 485         if (state == NULL)
 486             return NULL;
 487         if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
 488             Py_DECREF(state);
 489             return NULL;
 490         }
 491         Py_INCREF(buffer);
 492         Py_DECREF(state);
 493     }
 494     else {
 495         buffer = PyBytes_FromString("");
 496         flag = 0;
 497     }
 498     flag <<= 1;
 499     if (self->pendingcr)
 500         flag |= 1;
 501     return Py_BuildValue("NK", buffer, flag);
 502 }
 503 
 504 static PyObject *
 505 incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
 506 {
 507     PyObject *buffer;
 508     unsigned PY_LONG_LONG flag;
 509 
 510     if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
 511         return NULL;
 512 
 513     self->pendingcr = (int) flag & 1;
 514     flag >>= 1;
 515 
 516     if (self->decoder != Py_None)
 517         return PyObject_CallMethod(self->decoder,
 518                                    "setstate", "((OK))", buffer, flag);
 519     else
 520         Py_RETURN_NONE;
 521 }
 522 
 523 static PyObject *
 524 incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
 525 {
 526     self->seennl = 0;
 527     self->pendingcr = 0;
 528     if (self->decoder != Py_None)
 529         return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
 530     else
 531         Py_RETURN_NONE;
 532 }
 533 
 534 static PyObject *
 535 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
 536 {
 537     switch (self->seennl) {
 538     case SEEN_CR:
 539         return PyUnicode_FromString("\r");
 540     case SEEN_LF:
 541         return PyUnicode_FromString("\n");
 542     case SEEN_CRLF:
 543         return PyUnicode_FromString("\r\n");
 544     case SEEN_CR | SEEN_LF:
 545         return Py_BuildValue("ss", "\r", "\n");
 546     case SEEN_CR | SEEN_CRLF:
 547         return Py_BuildValue("ss", "\r", "\r\n");
 548     case SEEN_LF | SEEN_CRLF:
 549         return Py_BuildValue("ss", "\n", "\r\n");
 550     case SEEN_CR | SEEN_LF | SEEN_CRLF:
 551         return Py_BuildValue("sss", "\r", "\n", "\r\n");
 552     default:
 553         Py_RETURN_NONE;
 554    }
 555 
 556 }
 557 
 558 
 559 static PyMethodDef incrementalnewlinedecoder_methods[] = {
 560     {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
 561     {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
 562     {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
 563     {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
 564     {NULL}
 565 };
 566 
 567 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
 568     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
 569     {NULL}
 570 };
 571 
 572 PyTypeObject PyIncrementalNewlineDecoder_Type = {
 573     PyVarObject_HEAD_INIT(NULL, 0)
 574     "_io.IncrementalNewlineDecoder", /*tp_name*/
 575     sizeof(nldecoder_object), /*tp_basicsize*/
 576     0,                          /*tp_itemsize*/
 577     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
 578     0,                          /*tp_print*/
 579     0,                          /*tp_getattr*/
 580     0,                          /*tp_setattr*/
 581     0,                          /*tp_compare */
 582     0,                          /*tp_repr*/
 583     0,                          /*tp_as_number*/
 584     0,                          /*tp_as_sequence*/
 585     0,                          /*tp_as_mapping*/
 586     0,                          /*tp_hash */
 587     0,                          /*tp_call*/
 588     0,                          /*tp_str*/
 589     0,                          /*tp_getattro*/
 590     0,                          /*tp_setattro*/
 591     0,                          /*tp_as_buffer*/
 592     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
 593     incrementalnewlinedecoder_doc,          /* tp_doc */
 594     0,                          /* tp_traverse */
 595     0,                          /* tp_clear */
 596     0,                          /* tp_richcompare */
 597     0,                          /*tp_weaklistoffset*/
 598     0,                          /* tp_iter */
 599     0,                          /* tp_iternext */
 600     incrementalnewlinedecoder_methods, /* tp_methods */
 601     0,                          /* tp_members */
 602     incrementalnewlinedecoder_getset, /* tp_getset */
 603     0,                          /* tp_base */
 604     0,                          /* tp_dict */
 605     0,                          /* tp_descr_get */
 606     0,                          /* tp_descr_set */
 607     0,                          /* tp_dictoffset */
 608     (initproc)incrementalnewlinedecoder_init, /* tp_init */
 609     0,                          /* tp_alloc */
 610     PyType_GenericNew,          /* tp_new */
 611 };
 612 
 613 
 614 /* TextIOWrapper */
 615 
 616 PyDoc_STRVAR(textiowrapper_doc,
 617     "Character and line based layer over a BufferedIOBase object, buffer.\n"
 618     "\n"
 619     "encoding gives the name of the encoding that the stream will be\n"
 620     "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
 621     "\n"
 622     "errors determines the strictness of encoding and decoding (see the\n"
 623     "codecs.register) and defaults to \"strict\".\n"
 624     "\n"
 625     "newline can be None, '', '\\n', '\\r', or '\\r\\n'.  It controls the\n"
 626     "handling of line endings. If it is None, universal newlines is\n"
 627     "enabled.  With this enabled, on input, the lines endings '\\n', '\\r',\n"
 628     "or '\\r\\n' are translated to '\\n' before being returned to the\n"
 629     "caller. Conversely, on output, '\\n' is translated to the system\n"
 630     "default line seperator, os.linesep. If newline is any other of its\n"
 631     "legal values, that newline becomes the newline when the file is read\n"
 632     "and it is returned untranslated. On output, '\\n' is converted to the\n"
 633     "newline.\n"
 634     "\n"
 635     "If line_buffering is True, a call to flush is implied when a call to\n"
 636     "write contains a newline character."
 637     );
 638 
 639 typedef PyObject *
 640         (*encodefunc_t)(PyObject *, PyObject *);
 641 
 642 typedef struct
 643 {
 644     PyObject_HEAD
 645     int ok; /* initialized? */
 646     int detached;
 647     Py_ssize_t chunk_size;
 648     PyObject *buffer;
 649     PyObject *encoding;
 650     PyObject *encoder;
 651     PyObject *decoder;
 652     PyObject *readnl;
 653     PyObject *errors;
 654     const char *writenl; /* utf-8 encoded, NULL stands for \n */
 655     char line_buffering;
 656     char readuniversal;
 657     char readtranslate;
 658     char writetranslate;
 659     char seekable;
 660     char telling;
 661     /* Specialized encoding func (see below) */
 662     encodefunc_t encodefunc;
 663     /* Whether or not it's the start of the stream */
 664     char encoding_start_of_stream;
 665 
 666     /* Reads and writes are internally buffered in order to speed things up.
 667        However, any read will first flush the write buffer if itsn't empty.
 668 
 669        Please also note that text to be written is first encoded before being
 670        buffered. This is necessary so that encoding errors are immediately
 671        reported to the caller, but it unfortunately means that the
 672        IncrementalEncoder (whose encode() method is always written in Python)
 673        becomes a bottleneck for small writes.
 674     */
 675     PyObject *decoded_chars;       /* buffer for text returned from decoder */
 676     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
 677     PyObject *pending_bytes;       /* list of bytes objects waiting to be
 678                                       written, or NULL */
 679     Py_ssize_t pending_bytes_count;
 680     PyObject *snapshot;
 681     /* snapshot is either None, or a tuple (dec_flags, next_input) where
 682      * dec_flags is the second (integer) item of the decoder state and
 683      * next_input is the chunk of input bytes that comes next after the
 684      * snapshot point.  We use this to reconstruct decoder states in tell().
 685      */
 686 
 687     /* Cache raw object if it's a FileIO object */
 688     PyObject *raw;
 689 
 690     PyObject *weakreflist;
 691     PyObject *dict;
 692 } textio;
 693 
 694 
 695 /* A couple of specialized cases in order to bypass the slow incremental
 696    encoding methods for the most popular encodings. */
 697 
 698 static PyObject *
 699 ascii_encode(textio *self, PyObject *text)
 700 {
 701     return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
 702                                  PyUnicode_GET_SIZE(text),
 703                                  PyBytes_AS_STRING(self->errors));
 704 }
 705 
 706 static PyObject *
 707 utf16be_encode(textio *self, PyObject *text)
 708 {
 709     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 710                                  PyUnicode_GET_SIZE(text),
 711                                  PyBytes_AS_STRING(self->errors), 1);
 712 }
 713 
 714 static PyObject *
 715 utf16le_encode(textio *self, PyObject *text)
 716 {
 717     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 718                                  PyUnicode_GET_SIZE(text),
 719                                  PyBytes_AS_STRING(self->errors), -1);
 720 }
 721 
 722 static PyObject *
 723 utf16_encode(textio *self, PyObject *text)
 724 {
 725     if (!self->encoding_start_of_stream) {
 726         /* Skip the BOM and use native byte ordering */
 727 #if defined(WORDS_BIGENDIAN)
 728         return utf16be_encode(self, text);
 729 #else
 730         return utf16le_encode(self, text);
 731 #endif
 732     }
 733     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 734                                  PyUnicode_GET_SIZE(text),
 735                                  PyBytes_AS_STRING(self->errors), 0);
 736 }
 737 
 738 static PyObject *
 739 utf32be_encode(textio *self, PyObject *text)
 740 {
 741     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
 742                                  PyUnicode_GET_SIZE(text),
 743                                  PyBytes_AS_STRING(self->errors), 1);
 744 }
 745 
 746 static PyObject *
 747 utf32le_encode(textio *self, PyObject *text)
 748 {
 749     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
 750                                  PyUnicode_GET_SIZE(text),
 751                                  PyBytes_AS_STRING(self->errors), -1);
 752 }
 753 
 754 static PyObject *
 755 utf32_encode(textio *self, PyObject *text)
 756 {
 757     if (!self->encoding_start_of_stream) {
 758         /* Skip the BOM and use native byte ordering */
 759 #if defined(WORDS_BIGENDIAN)
 760         return utf32be_encode(self, text);
 761 #else
 762         return utf32le_encode(self, text);
 763 #endif
 764     }
 765     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
 766                                  PyUnicode_GET_SIZE(text),
 767                                  PyBytes_AS_STRING(self->errors), 0);
 768 }
 769 
 770 static PyObject *
 771 utf8_encode(textio *self, PyObject *text)
 772 {
 773     return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
 774                                 PyUnicode_GET_SIZE(text),
 775                                 PyBytes_AS_STRING(self->errors));
 776 }
 777 
 778 static PyObject *
 779 latin1_encode(textio *self, PyObject *text)
 780 {
 781     return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
 782                                   PyUnicode_GET_SIZE(text),
 783                                   PyBytes_AS_STRING(self->errors));
 784 }
 785 
 786 /* Map normalized encoding names onto the specialized encoding funcs */
 787 
 788 typedef struct {
 789     const char *name;
 790     encodefunc_t encodefunc;
 791 } encodefuncentry;
 792 
 793 static encodefuncentry encodefuncs[] = {
 794     {"ascii",       (encodefunc_t) ascii_encode},
 795     {"iso8859-1",   (encodefunc_t) latin1_encode},
 796     {"utf-8",       (encodefunc_t) utf8_encode},
 797     {"utf-16-be",   (encodefunc_t) utf16be_encode},
 798     {"utf-16-le",   (encodefunc_t) utf16le_encode},
 799     {"utf-16",      (encodefunc_t) utf16_encode},
 800     {"utf-32-be",   (encodefunc_t) utf32be_encode},
 801     {"utf-32-le",   (encodefunc_t) utf32le_encode},
 802     {"utf-32",      (encodefunc_t) utf32_encode},
 803     {NULL, NULL}
 804 };
 805 
 806 
 807 static int
 808 textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
 809 {
 810     char *kwlist[] = {"buffer", "encoding", "errors",
 811                       "newline", "line_buffering",
 812                       NULL};
 813     PyObject *buffer, *raw;
 814     char *encoding = NULL;
 815     char *errors = NULL;
 816     char *newline = NULL;
 817     int line_buffering = 0;
 818 
 819     PyObject *res;
 820     int r;
 821 
 822     self->ok = 0;
 823     self->detached = 0;
 824     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
 825                                      kwlist, &buffer, &encoding, &errors,
 826                                      &newline, &line_buffering))
 827         return -1;
 828 
 829     if (newline && newline[0] != '\0'
 830         && !(newline[0] == '\n' && newline[1] == '\0')
 831         && !(newline[0] == '\r' && newline[1] == '\0')
 832         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
 833         PyErr_Format(PyExc_ValueError,
 834                      "illegal newline value: %s", newline);
 835         return -1;
 836     }
 837 
 838     Py_CLEAR(self->buffer);
 839     Py_CLEAR(self->encoding);
 840     Py_CLEAR(self->encoder);
 841     Py_CLEAR(self->decoder);
 842     Py_CLEAR(self->readnl);
 843     Py_CLEAR(self->decoded_chars);
 844     Py_CLEAR(self->pending_bytes);
 845     Py_CLEAR(self->snapshot);
 846     Py_CLEAR(self->errors);
 847     Py_CLEAR(self->raw);
 848     self->decoded_chars_used = 0;
 849     self->pending_bytes_count = 0;
 850     self->encodefunc = NULL;
 851     self->writenl = NULL;
 852 
 853     if (encoding == NULL && self->encoding == NULL) {
 854         if (_PyIO_locale_module == NULL) {
 855             _PyIO_locale_module = PyImport_ImportModule("locale");
 856             if (_PyIO_locale_module == NULL)
 857                 goto catch_ImportError;
 858             else
 859                 goto use_locale;
 860         }
 861         else {
 862           use_locale:
 863             self->encoding = PyObject_CallMethod(
 864                 _PyIO_locale_module, "getpreferredencoding", NULL);
 865             if (self->encoding == NULL) {
 866               catch_ImportError:
 867                 /*
 868                  Importing locale can raise a ImportError because of
 869                  _functools, and locale.getpreferredencoding can raise a
 870                  ImportError if _locale is not available.  These will happen
 871                  during module building.
 872                 */
 873                 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
 874                     PyErr_Clear();
 875                     self->encoding = PyString_FromString("ascii");
 876                 }
 877                 else
 878                     goto error;
 879             }
 880             else if (!PyString_Check(self->encoding))
 881                 Py_CLEAR(self->encoding);
 882         }
 883     }
 884     if (self->encoding != NULL)
 885         encoding = PyString_AsString(self->encoding);
 886     else if (encoding != NULL) {
 887         self->encoding = PyString_FromString(encoding);
 888         if (self->encoding == NULL)
 889             goto error;
 890     }
 891     else {
 892         PyErr_SetString(PyExc_IOError,
 893                         "could not determine default encoding");
 894     }
 895 
 896     if (errors == NULL)
 897         errors = "strict";
 898     self->errors = PyBytes_FromString(errors);
 899     if (self->errors == NULL)
 900         goto error;
 901 
 902     self->chunk_size = 8192;
 903     self->readuniversal = (newline == NULL || newline[0] == '\0');
 904     self->line_buffering = line_buffering;
 905     self->readtranslate = (newline == NULL);
 906     if (newline) {
 907         self->readnl = PyString_FromString(newline);
 908         if (self->readnl == NULL)
 909             return -1;
 910     }
 911     self->writetranslate = (newline == NULL || newline[0] != '\0');
 912     if (!self->readuniversal && self->writetranslate) {
 913         self->writenl = PyString_AsString(self->readnl);
 914         if (!strcmp(self->writenl, "\n"))
 915             self->writenl = NULL;
 916     }
 917 #ifdef MS_WINDOWS
 918     else
 919         self->writenl = "\r\n";
 920 #endif
 921 
 922     /* Build the decoder object */
 923     res = PyObject_CallMethod(buffer, "readable", NULL);
 924     if (res == NULL)
 925         goto error;
 926     r = PyObject_IsTrue(res);
 927     Py_DECREF(res);
 928     if (r == -1)
 929         goto error;
 930     if (r == 1) {
 931         self->decoder = PyCodec_IncrementalDecoder(
 932             encoding, errors);
 933         if (self->decoder == NULL)
 934             goto error;
 935 
 936         if (self->readuniversal) {
 937             PyObject *incrementalDecoder = PyObject_CallFunction(
 938                 (PyObject *)&PyIncrementalNewlineDecoder_Type,
 939                 "Oi", self->decoder, (int)self->readtranslate);
 940             if (incrementalDecoder == NULL)
 941                 goto error;
 942             Py_CLEAR(self->decoder);
 943             self->decoder = incrementalDecoder;
 944         }
 945     }
 946 
 947     /* Build the encoder object */
 948     res = PyObject_CallMethod(buffer, "writable", NULL);
 949     if (res == NULL)
 950         goto error;
 951     r = PyObject_IsTrue(res);
 952     Py_DECREF(res);
 953     if (r == -1)
 954         goto error;
 955     if (r == 1) {
 956         PyObject *ci;
 957         self->encoder = PyCodec_IncrementalEncoder(
 958             encoding, errors);
 959         if (self->encoder == NULL)
 960             goto error;
 961         /* Get the normalized named of the codec */
 962         ci = _PyCodec_Lookup(encoding);
 963         if (ci == NULL)
 964             goto error;
 965         res = PyObject_GetAttrString(ci, "name");
 966         Py_DECREF(ci);
 967         if (res == NULL) {
 968             if (PyErr_ExceptionMatches(PyExc_AttributeError))
 969                 PyErr_Clear();
 970             else
 971                 goto error;
 972         }
 973         else if (PyString_Check(res)) {
 974             encodefuncentry *e = encodefuncs;
 975             while (e->name != NULL) {
 976                 if (!strcmp(PyString_AS_STRING(res), e->name)) {
 977                     self->encodefunc = e->encodefunc;
 978                     break;
 979                 }
 980                 e++;
 981             }
 982         }
 983         Py_XDECREF(res);
 984     }
 985 
 986     self->buffer = buffer;
 987     Py_INCREF(buffer);
 988 
 989     if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
 990         Py_TYPE(buffer) == &PyBufferedWriter_Type ||
 991         Py_TYPE(buffer) == &PyBufferedRandom_Type) {
 992         raw = PyObject_GetAttrString(buffer, "raw");
 993         /* Cache the raw FileIO object to speed up 'closed' checks */
 994         if (raw == NULL) {
 995             if (PyErr_ExceptionMatches(PyExc_AttributeError))
 996                 PyErr_Clear();
 997             else
 998                 goto error;
 999         }
1000         else if (Py_TYPE(raw) == &PyFileIO_Type)
1001             self->raw = raw;
1002         else
1003             Py_DECREF(raw);
1004     }
1005 
1006     res = PyObject_CallMethod(buffer, "seekable", NULL);
1007     if (res == NULL)
1008         goto error;
1009     self->seekable = self->telling = PyObject_IsTrue(res);
1010     Py_DECREF(res);
1011 
1012     self->encoding_start_of_stream = 0;
1013     if (self->seekable && self->encoder) {
1014         PyObject *cookieObj;
1015         int cmp;
1016 
1017         self->encoding_start_of_stream = 1;
1018 
1019         cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1020         if (cookieObj == NULL)
1021             goto error;
1022 
1023         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1024         Py_DECREF(cookieObj);
1025         if (cmp < 0) {
1026             goto error;
1027         }
1028 
1029         if (cmp == 0) {
1030             self->encoding_start_of_stream = 0;
1031             res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1032                                              _PyIO_zero, NULL);
1033             if (res == NULL)
1034                 goto error;
1035             Py_DECREF(res);
1036         }
1037     }
1038 
1039     self->ok = 1;
1040     return 0;
1041 
1042   error:
1043     return -1;
1044 }
1045 
1046 static int
1047 _textiowrapper_clear(textio *self)
1048 {
1049     if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1050         return -1;
1051     self->ok = 0;
1052     Py_CLEAR(self->buffer);
1053     Py_CLEAR(self->encoding);
1054     Py_CLEAR(self->encoder);
1055     Py_CLEAR(self->decoder);
1056     Py_CLEAR(self->readnl);
1057     Py_CLEAR(self->decoded_chars);
1058     Py_CLEAR(self->pending_bytes);
1059     Py_CLEAR(self->snapshot);
1060     Py_CLEAR(self->errors);
1061     Py_CLEAR(self->raw);
1062     return 0;
1063 }
1064 
1065 static void
1066 textiowrapper_dealloc(textio *self)
1067 {
1068     if (_textiowrapper_clear(self) < 0)
1069         return;
1070     _PyObject_GC_UNTRACK(self);
1071     if (self->weakreflist != NULL)
1072         PyObject_ClearWeakRefs((PyObject *)self);
1073     Py_CLEAR(self->dict);
1074     Py_TYPE(self)->tp_free((PyObject *)self);
1075 }
1076 
1077 static int
1078 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1079 {
1080     Py_VISIT(self->buffer);
1081     Py_VISIT(self->encoding);
1082     Py_VISIT(self->encoder);
1083     Py_VISIT(self->decoder);
1084     Py_VISIT(self->readnl);
1085     Py_VISIT(self->decoded_chars);
1086     Py_VISIT(self->pending_bytes);
1087     Py_VISIT(self->snapshot);
1088     Py_VISIT(self->errors);
1089     Py_VISIT(self->raw);
1090 
1091     Py_VISIT(self->dict);
1092     return 0;
1093 }
1094 
1095 static int
1096 textiowrapper_clear(textio *self)
1097 {
1098     if (_textiowrapper_clear(self) < 0)
1099         return -1;
1100     Py_CLEAR(self->dict);
1101     return 0;
1102 }
1103 
1104 static PyObject *
1105 textiowrapper_closed_get(textio *self, void *context);
1106 
1107 /* This macro takes some shortcuts to make the common case faster. */
1108 #define CHECK_CLOSED(self) \
1109     do { \
1110         int r; \
1111         PyObject *_res; \
1112         if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1113             if (self->raw != NULL) \
1114                 r = _PyFileIO_closed(self->raw); \
1115             else { \
1116                 _res = textiowrapper_closed_get(self, NULL); \
1117                 if (_res == NULL) \
1118                     return NULL; \
1119                 r = PyObject_IsTrue(_res); \
1120                 Py_DECREF(_res); \
1121                 if (r < 0) \
1122                     return NULL; \
1123             } \
1124             if (r > 0) { \
1125                 PyErr_SetString(PyExc_ValueError, \
1126                                 "I/O operation on closed file."); \
1127                 return NULL; \
1128             } \
1129         } \
1130         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1131             return NULL; \
1132     } while (0)
1133 
1134 #define CHECK_INITIALIZED(self) \
1135     if (self->ok <= 0) { \
1136         if (self->detached) { \
1137             PyErr_SetString(PyExc_ValueError, \
1138                  "underlying buffer has been detached"); \
1139         } else {                                   \
1140             PyErr_SetString(PyExc_ValueError, \
1141                 "I/O operation on uninitialized object"); \
1142         } \
1143         return NULL; \
1144     }
1145 
1146 #define CHECK_INITIALIZED_INT(self) \
1147     if (self->ok <= 0) { \
1148         if (self->detached) { \
1149             PyErr_SetString(PyExc_ValueError, \
1150                  "underlying buffer has been detached"); \
1151         } else {                                   \
1152             PyErr_SetString(PyExc_ValueError, \
1153                 "I/O operation on uninitialized object"); \
1154         } \
1155         return -1; \
1156     }
1157 
1158 
1159 static PyObject *
1160 textiowrapper_detach(textio *self)
1161 {
1162     PyObject *buffer, *res;
1163     CHECK_INITIALIZED(self);
1164     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1165     if (res == NULL)
1166         return NULL;
1167     Py_DECREF(res);
1168     buffer = self->buffer;
1169     self->buffer = NULL;
1170     self->detached = 1;
1171     self->ok = 0;
1172     return buffer;
1173 }
1174 
1175 Py_LOCAL_INLINE(const Py_UNICODE *)
1176 findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1177 {
1178     /* like wcschr, but doesn't stop at NULL characters */
1179     while (size-- > 0) {
1180         if (*s == ch)
1181             return s;
1182         s++;
1183     }
1184     return NULL;
1185 }
1186 
1187 /* Flush the internal write buffer. This doesn't explicitly flush the
1188    underlying buffered object, though. */
1189 static int
1190 _textiowrapper_writeflush(textio *self)
1191 {
1192     PyObject *pending, *b, *ret;
1193 
1194     if (self->pending_bytes == NULL)
1195         return 0;
1196 
1197     pending = self->pending_bytes;
1198     Py_INCREF(pending);
1199     self->pending_bytes_count = 0;
1200     Py_CLEAR(self->pending_bytes);
1201 
1202     b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1203     Py_DECREF(pending);
1204     if (b == NULL)
1205         return -1;
1206     ret = PyObject_CallMethodObjArgs(self->buffer,
1207                                      _PyIO_str_write, b, NULL);
1208     Py_DECREF(b);
1209     if (ret == NULL)
1210         return -1;
1211     Py_DECREF(ret);
1212     return 0;
1213 }
1214 
1215 static PyObject *
1216 textiowrapper_write(textio *self, PyObject *args)
1217 {
1218     PyObject *ret;
1219     PyObject *text; /* owned reference */
1220     PyObject *b;
1221     Py_ssize_t textlen;
1222     int haslf = 0;
1223     int needflush = 0;
1224 
1225     CHECK_INITIALIZED(self);
1226 
1227     if (!PyArg_ParseTuple(args, "U:write", &text)) {
1228         return NULL;
1229     }
1230 
1231     CHECK_CLOSED(self);
1232 
1233     if (self->encoder == NULL) {
1234         PyErr_SetString(PyExc_IOError, "not writable");
1235         return NULL;
1236     }
1237 
1238     Py_INCREF(text);
1239 
1240     textlen = PyUnicode_GetSize(text);
1241 
1242     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1243         if (findchar(PyUnicode_AS_UNICODE(text),
1244                      PyUnicode_GET_SIZE(text), '\n'))
1245             haslf = 1;
1246 
1247     if (haslf && self->writetranslate && self->writenl != NULL) {
1248         PyObject *newtext = PyObject_CallMethod(
1249             text, "replace", "ss", "\n", self->writenl);
1250         Py_DECREF(text);
1251         if (newtext == NULL)
1252             return NULL;
1253         text = newtext;
1254     }
1255 
1256     if (self->line_buffering &&
1257         (haslf ||
1258          findchar(PyUnicode_AS_UNICODE(text),
1259                   PyUnicode_GET_SIZE(text), '\r')))
1260         needflush = 1;
1261 
1262     /* XXX What if we were just reading? */
1263     if (self->encodefunc != NULL) {
1264         b = (*self->encodefunc)((PyObject *) self, text);
1265         self->encoding_start_of_stream = 0;
1266     }
1267     else
1268         b = PyObject_CallMethodObjArgs(self->encoder,
1269                                        _PyIO_str_encode, text, NULL);
1270     Py_DECREF(text);
1271     if (b == NULL)
1272         return NULL;
1273 
1274     if (self->pending_bytes == NULL) {
1275         self->pending_bytes = PyList_New(0);
1276         if (self->pending_bytes == NULL) {
1277             Py_DECREF(b);
1278             return NULL;
1279         }
1280         self->pending_bytes_count = 0;
1281     }
1282     if (PyList_Append(self->pending_bytes, b) < 0) {
1283         Py_DECREF(b);
1284         return NULL;
1285     }
1286     self->pending_bytes_count += PyBytes_GET_SIZE(b);
1287     Py_DECREF(b);
1288     if (self->pending_bytes_count > self->chunk_size || needflush) {
1289         if (_textiowrapper_writeflush(self) < 0)
1290             return NULL;
1291     }
1292 
1293     if (needflush) {
1294         ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1295         if (ret == NULL)
1296             return NULL;
1297         Py_DECREF(ret);
1298     }
1299 
1300     Py_CLEAR(self->snapshot);
1301 
1302     if (self->decoder) {
1303         ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1304         if (ret == NULL)
1305             return NULL;
1306         Py_DECREF(ret);
1307     }
1308 
1309     return PyLong_FromSsize_t(textlen);
1310 }
1311 
1312 /* Steal a reference to chars and store it in the decoded_char buffer;
1313  */
1314 static void
1315 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1316 {
1317     Py_CLEAR(self->decoded_chars);
1318     self->decoded_chars = chars;
1319     self->decoded_chars_used = 0;
1320 }
1321 
1322 static PyObject *
1323 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1324 {
1325     PyObject *chars;
1326     Py_ssize_t avail;
1327 
1328     if (self->decoded_chars == NULL)
1329         return PyUnicode_FromStringAndSize(NULL, 0);
1330 
1331     avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1332              - self->decoded_chars_used);
1333 
1334     assert(avail >= 0);
1335 
1336     if (n < 0 || n > avail)
1337         n = avail;
1338 
1339     if (self->decoded_chars_used > 0 || n < avail) {
1340         chars = PyUnicode_FromUnicode(
1341             PyUnicode_AS_UNICODE(self->decoded_chars)
1342             + self->decoded_chars_used, n);
1343         if (chars == NULL)
1344             return NULL;
1345     }
1346     else {
1347         chars = self->decoded_chars;
1348         Py_INCREF(chars);
1349     }
1350 
1351     self->decoded_chars_used += n;
1352     return chars;
1353 }
1354 
1355 /* Read and decode the next chunk of data from the BufferedReader.
1356  */
1357 static int
1358 textiowrapper_read_chunk(textio *self)
1359 {
1360     PyObject *dec_buffer = NULL;
1361     PyObject *dec_flags = NULL;
1362     PyObject *input_chunk = NULL;
1363     PyObject *decoded_chars, *chunk_size;
1364     int eof;
1365 
1366     /* The return value is True unless EOF was reached.  The decoded string is
1367      * placed in self._decoded_chars (replacing its previous value).  The
1368      * entire input chunk is sent to the decoder, though some of it may remain
1369      * buffered in the decoder, yet to be converted.
1370      */
1371 
1372     if (self->decoder == NULL) {
1373         PyErr_SetString(PyExc_IOError, "not readable");
1374         return -1;
1375     }
1376 
1377     if (self->telling) {
1378         /* To prepare for tell(), we need to snapshot a point in the file
1379          * where the decoder's input buffer is empty.
1380          */
1381 
1382         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1383                                                      _PyIO_str_getstate, NULL);
1384         if (state == NULL)
1385             return -1;
1386         /* Given this, we know there was a valid snapshot point
1387          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1388          */
1389         if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1390             Py_DECREF(state);
1391             return -1;
1392         }
1393         Py_INCREF(dec_buffer);
1394         Py_INCREF(dec_flags);
1395         Py_DECREF(state);
1396     }
1397 
1398     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1399     chunk_size = PyLong_FromSsize_t(self->chunk_size);
1400     if (chunk_size == NULL)
1401         goto fail;
1402     input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1403         _PyIO_str_read1, chunk_size, NULL);
1404     Py_DECREF(chunk_size);
1405     if (input_chunk == NULL)
1406         goto fail;
1407     assert(PyBytes_Check(input_chunk));
1408 
1409     eof = (PyBytes_Size(input_chunk) == 0);
1410 
1411     if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1412         decoded_chars = _PyIncrementalNewlineDecoder_decode(
1413             self->decoder, input_chunk, eof);
1414     }
1415     else {
1416         decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1417             _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1418     }
1419 
1420     /* TODO sanity check: isinstance(decoded_chars, unicode) */
1421     if (decoded_chars == NULL)
1422         goto fail;
1423     textiowrapper_set_decoded_chars(self, decoded_chars);
1424     if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1425         eof = 0;
1426 
1427     if (self->telling) {
1428         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1429          * next input to be decoded is dec_buffer + input_chunk.
1430          */
1431         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1432         if (next_input == NULL)
1433             goto fail;
1434         assert (PyBytes_Check(next_input));
1435         Py_DECREF(dec_buffer);
1436         Py_CLEAR(self->snapshot);
1437         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1438     }
1439     Py_DECREF(input_chunk);
1440 
1441     return (eof == 0);
1442 
1443   fail:
1444     Py_XDECREF(dec_buffer);
1445     Py_XDECREF(dec_flags);
1446     Py_XDECREF(input_chunk);
1447     return -1;
1448 }
1449 
1450 static PyObject *
1451 textiowrapper_read(textio *self, PyObject *args)
1452 {
1453     Py_ssize_t n = -1;
1454     PyObject *result = NULL, *chunks = NULL;
1455 
1456     CHECK_INITIALIZED(self);
1457 
1458     if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
1459         return NULL;
1460 
1461     CHECK_CLOSED(self);
1462 
1463     if (self->decoder == NULL) {
1464         PyErr_SetString(PyExc_IOError, "not readable");
1465         return NULL;
1466     }
1467 
1468     if (_textiowrapper_writeflush(self) < 0)
1469         return NULL;
1470 
1471     if (n < 0) {
1472         /* Read everything */
1473         PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1474         PyObject *decoded, *final;
1475         if (bytes == NULL)
1476             goto fail;
1477         decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1478                                              bytes, Py_True, NULL);
1479         Py_DECREF(bytes);
1480         if (decoded == NULL)
1481             goto fail;
1482 
1483         result = textiowrapper_get_decoded_chars(self, -1);
1484 
1485         if (result == NULL) {
1486             Py_DECREF(decoded);
1487             return NULL;
1488         }
1489 
1490         final = PyUnicode_Concat(result, decoded);
1491         Py_DECREF(result);
1492         Py_DECREF(decoded);
1493         if (final == NULL)
1494             goto fail;
1495 
1496         Py_CLEAR(self->snapshot);
1497         return final;
1498     }
1499     else {
1500         int res = 1;
1501         Py_ssize_t remaining = n;
1502 
1503         result = textiowrapper_get_decoded_chars(self, n);
1504         if (result == NULL)
1505             goto fail;
1506         remaining -= PyUnicode_GET_SIZE(result);
1507 
1508         /* Keep reading chunks until we have n characters to return */
1509         while (remaining > 0) {
1510             res = textiowrapper_read_chunk(self);
1511             if (res < 0)
1512                 goto fail;
1513             if (res == 0)  /* EOF */
1514                 break;
1515             if (chunks == NULL) {
1516                 chunks = PyList_New(0);
1517                 if (chunks == NULL)
1518                     goto fail;
1519             }
1520             if (PyList_Append(chunks, result) < 0)
1521                 goto fail;
1522             Py_DECREF(result);
1523             result = textiowrapper_get_decoded_chars(self, remaining);
1524             if (result == NULL)
1525                 goto fail;
1526             remaining -= PyUnicode_GET_SIZE(result);
1527         }
1528         if (chunks != NULL) {
1529             if (result != NULL && PyList_Append(chunks, result) < 0)
1530                 goto fail;
1531             Py_CLEAR(result);
1532             result = PyUnicode_Join(_PyIO_empty_str, chunks);
1533             if (result == NULL)
1534                 goto fail;
1535             Py_CLEAR(chunks);
1536         }
1537         return result;
1538     }
1539   fail:
1540     Py_XDECREF(result);
1541     Py_XDECREF(chunks);
1542     return NULL;
1543 }
1544 
1545 
1546 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1547    that is to the NUL character. Otherwise the function will produce
1548    incorrect results. */
1549 static Py_UNICODE *
1550 find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1551 {
1552     Py_UNICODE *s = start;
1553     for (;;) {
1554         while (*s > ch)
1555             s++;
1556         if (*s == ch)
1557             return s;
1558         if (s == end)
1559             return NULL;
1560         s++;
1561     }
1562 }
1563 
1564 Py_ssize_t
1565 _PyIO_find_line_ending(
1566     int translated, int universal, PyObject *readnl,
1567     Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1568 {
1569     Py_ssize_t len = end - start;
1570 
1571     if (translated) {
1572         /* Newlines are already translated, only search for \n */
1573         Py_UNICODE *pos = find_control_char(start, end, '\n');
1574         if (pos != NULL)
1575             return pos - start + 1;
1576         else {
1577             *consumed = len;
1578             return -1;
1579         }
1580     }
1581     else if (universal) {
1582         /* Universal newline search. Find any of \r, \r\n, \n
1583          * The decoder ensures that \r\n are not split in two pieces
1584          */
1585         Py_UNICODE *s = start;
1586         for (;;) {
1587             Py_UNICODE ch;
1588             /* Fast path for non-control chars. The loop always ends
1589                since the Py_UNICODE storage is NUL-terminated. */
1590             while (*s > '\r')
1591                 s++;
1592             if (s >= end) {
1593                 *consumed = len;
1594                 return -1;
1595             }
1596             ch = *s++;
1597             if (ch == '\n')
1598                 return s - start;
1599             if (ch == '\r') {
1600                 if (*s == '\n')
1601                     return s - start + 1;
1602                 else
1603                     return s - start;
1604             }
1605         }
1606     }
1607     else {
1608         /* Non-universal mode. */
1609         Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1610         unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1611         if (readnl_len == 1) {
1612             Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1613             if (pos != NULL)
1614                 return pos - start + 1;
1615             *consumed = len;
1616             return -1;
1617         }
1618         else {
1619             Py_UNICODE *s = start;
1620             Py_UNICODE *e = end - readnl_len + 1;
1621             Py_UNICODE *pos;
1622             if (e < s)
1623                 e = s;
1624             while (s < e) {
1625                 Py_ssize_t i;
1626                 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1627                 if (pos == NULL || pos >= e)
1628                     break;
1629                 for (i = 1; i < readnl_len; i++) {
1630                     if (pos[i] != nl[i])
1631                         break;
1632                 }
1633                 if (i == readnl_len)
1634                     return pos - start + readnl_len;
1635                 s = pos + 1;
1636             }
1637             pos = find_control_char(e, end, nl[0]);
1638             if (pos == NULL)
1639                 *consumed = len;
1640             else
1641                 *consumed = pos - start;
1642             return -1;
1643         }
1644     }
1645 }
1646 
1647 static PyObject *
1648 _textiowrapper_readline(textio *self, Py_ssize_t limit)
1649 {
1650     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1651     Py_ssize_t start, endpos, chunked, offset_to_buffer;
1652     int res;
1653 
1654     CHECK_CLOSED(self);
1655 
1656     if (_textiowrapper_writeflush(self) < 0)
1657         return NULL;
1658 
1659     chunked = 0;
1660 
1661     while (1) {
1662         Py_UNICODE *ptr;
1663         Py_ssize_t line_len;
1664         Py_ssize_t consumed = 0;
1665 
1666         /* First, get some data if necessary */
1667         res = 1;
1668         while (!self->decoded_chars ||
1669                !PyUnicode_GET_SIZE(self->decoded_chars)) {
1670             res = textiowrapper_read_chunk(self);
1671             if (res < 0)
1672                 goto error;
1673             if (res == 0)
1674                 break;
1675         }
1676         if (res == 0) {
1677             /* end of file */
1678             textiowrapper_set_decoded_chars(self, NULL);
1679             Py_CLEAR(self->snapshot);
1680             start = endpos = offset_to_buffer = 0;
1681             break;
1682         }
1683 
1684         if (remaining == NULL) {
1685             line = self->decoded_chars;
1686             start = self->decoded_chars_used;
1687             offset_to_buffer = 0;
1688             Py_INCREF(line);
1689         }
1690         else {
1691             assert(self->decoded_chars_used == 0);
1692             line = PyUnicode_Concat(remaining, self->decoded_chars);
1693             start = 0;
1694             offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1695             Py_CLEAR(remaining);
1696             if (line == NULL)
1697                 goto error;
1698         }
1699 
1700         ptr = PyUnicode_AS_UNICODE(line);
1701         line_len = PyUnicode_GET_SIZE(line);
1702 
1703         endpos = _PyIO_find_line_ending(
1704             self->readtranslate, self->readuniversal, self->readnl,
1705             ptr + start, ptr + line_len, &consumed);
1706         if (endpos >= 0) {
1707             endpos += start;
1708             if (limit >= 0 && (endpos - start) + chunked >= limit)
1709                 endpos = start + limit - chunked;
1710             break;
1711         }
1712 
1713         /* We can put aside up to `endpos` */
1714         endpos = consumed + start;
1715         if (limit >= 0 && (endpos - start) + chunked >= limit) {
1716             /* Didn't find line ending, but reached length limit */
1717             endpos = start + limit - chunked;
1718             break;
1719         }
1720 
1721         if (endpos > start) {
1722             /* No line ending seen yet - put aside current data */
1723             PyObject *s;
1724             if (chunks == NULL) {
1725                 chunks = PyList_New(0);
1726                 if (chunks == NULL)
1727                     goto error;
1728             }
1729             s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1730             if (s == NULL)
1731                 goto error;
1732             if (PyList_Append(chunks, s) < 0) {
1733                 Py_DECREF(s);
1734                 goto error;
1735             }
1736             chunked += PyUnicode_GET_SIZE(s);
1737             Py_DECREF(s);
1738         }
1739         /* There may be some remaining bytes we'll have to prepend to the
1740            next chunk of data */
1741         if (endpos < line_len) {
1742             remaining = PyUnicode_FromUnicode(
1743                     ptr + endpos, line_len - endpos);
1744             if (remaining == NULL)
1745                 goto error;
1746         }
1747         Py_CLEAR(line);
1748         /* We have consumed the buffer */
1749         textiowrapper_set_decoded_chars(self, NULL);
1750     }
1751 
1752     if (line != NULL) {
1753         /* Our line ends in the current buffer */
1754         self->decoded_chars_used = endpos - offset_to_buffer;
1755         if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1756             if (start == 0 && Py_REFCNT(line) == 1) {
1757                 if (PyUnicode_Resize(&line, endpos) < 0)
1758                     goto error;
1759             }
1760             else {
1761                 PyObject *s = PyUnicode_FromUnicode(
1762                         PyUnicode_AS_UNICODE(line) + start, endpos - start);
1763                 Py_CLEAR(line);
1764                 if (s == NULL)
1765                     goto error;
1766                 line = s;
1767             }
1768         }
1769     }
1770     if (remaining != NULL) {
1771         if (chunks == NULL) {
1772             chunks = PyList_New(0);
1773             if (chunks == NULL)
1774                 goto error;
1775         }
1776         if (PyList_Append(chunks, remaining) < 0)
1777             goto error;
1778         Py_CLEAR(remaining);
1779     }
1780     if (chunks != NULL) {
1781         if (line != NULL && PyList_Append(chunks, line) < 0)
1782             goto error;
1783         Py_CLEAR(line);
1784         line = PyUnicode_Join(_PyIO_empty_str, chunks);
1785         if (line == NULL)
1786             goto error;
1787         Py_DECREF(chunks);
1788     }
1789     if (line == NULL)
1790         line = PyUnicode_FromStringAndSize(NULL, 0);
1791 
1792     return line;
1793 
1794   error:
1795     Py_XDECREF(chunks);
1796     Py_XDECREF(remaining);
1797     Py_XDECREF(line);
1798     return NULL;
1799 }
1800 
1801 static PyObject *
1802 textiowrapper_readline(textio *self, PyObject *args)
1803 {
1804     PyObject *limitobj = NULL;
1805     Py_ssize_t limit = -1;
1806 
1807     CHECK_INITIALIZED(self);
1808     if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1809         return NULL;
1810     }
1811     if (limitobj) {
1812         if (!PyNumber_Check(limitobj)) {
1813             PyErr_Format(PyExc_TypeError,
1814                          "integer argument expected, got '%.200s'",
1815                          Py_TYPE(limitobj)->tp_name);
1816             return NULL;
1817         }
1818         limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1819         if (limit == -1 && PyErr_Occurred())
1820             return NULL;
1821     }
1822     return _textiowrapper_readline(self, limit);
1823 }
1824 
1825 /* Seek and Tell */
1826 
1827 typedef struct {
1828     Py_off_t start_pos;
1829     int dec_flags;
1830     int bytes_to_feed;
1831     int chars_to_skip;
1832     char need_eof;
1833 } cookie_type;
1834 
1835 /*
1836    To speed up cookie packing/unpacking, we store the fields in a temporary
1837    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1838    The following macros define at which offsets in the intermediary byte
1839    string the various CookieStruct fields will be stored.
1840  */
1841 
1842 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1843 
1844 #if defined(WORDS_BIGENDIAN)
1845 
1846 # define IS_LITTLE_ENDIAN   0
1847 
1848 /* We want the least significant byte of start_pos to also be the least
1849    significant byte of the cookie, which means that in big-endian mode we
1850    must copy the fields in reverse order. */
1851 
1852 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
1853 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
1854 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
1855 # define OFF_CHARS_TO_SKIP  (sizeof(char))
1856 # define OFF_NEED_EOF       0
1857 
1858 #else
1859 
1860 # define IS_LITTLE_ENDIAN   1
1861 
1862 /* Little-endian mode: the least significant byte of start_pos will
1863    naturally end up the least significant byte of the cookie. */
1864 
1865 # define OFF_START_POS      0
1866 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
1867 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
1868 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
1869 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
1870 
1871 #endif
1872 
1873 static int
1874 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1875 {
1876     unsigned char buffer[COOKIE_BUF_LEN];
1877     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1878     if (cookieLong == NULL)
1879         return -1;
1880 
1881     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1882                             IS_LITTLE_ENDIAN, 0) < 0) {
1883         Py_DECREF(cookieLong);
1884         return -1;
1885     }
1886     Py_DECREF(cookieLong);
1887 
1888     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1889     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1890     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1891     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1892     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1893 
1894     return 0;
1895 }
1896 
1897 static PyObject *
1898 textiowrapper_build_cookie(cookie_type *cookie)
1899 {
1900     unsigned char buffer[COOKIE_BUF_LEN];
1901 
1902     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1903     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1904     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1905     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1906     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1907 
1908     return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1909 }
1910 #undef IS_LITTLE_ENDIAN
1911 
1912 static int
1913 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1914 {
1915     PyObject *res;
1916     /* When seeking to the start of the stream, we call decoder.reset()
1917        rather than decoder.getstate().
1918        This is for a few decoders such as utf-16 for which the state value
1919        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1920        utf-16, that we are expecting a BOM).
1921     */
1922     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1923         res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1924     else
1925         res = PyObject_CallMethod(self->decoder, "setstate",
1926                                   "((si))", "", cookie->dec_flags);
1927     if (res == NULL)
1928         return -1;
1929     Py_DECREF(res);
1930     return 0;
1931 }
1932 
1933 static int
1934 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1935 {
1936     PyObject *res;
1937     /* Same as _textiowrapper_decoder_setstate() above. */
1938     if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1939         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1940         self->encoding_start_of_stream = 1;
1941     }
1942     else {
1943         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1944                                          _PyIO_zero, NULL);
1945         self->encoding_start_of_stream = 0;
1946     }
1947     if (res == NULL)
1948         return -1;
1949     Py_DECREF(res);
1950     return 0;
1951 }
1952 
1953 static PyObject *
1954 textiowrapper_seek(textio *self, PyObject *args)
1955 {
1956     PyObject *cookieObj, *posobj;
1957     cookie_type cookie;
1958     int whence = 0;
1959     PyObject *res;
1960     int cmp;
1961 
1962     CHECK_INITIALIZED(self);
1963 
1964     if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1965         return NULL;
1966     CHECK_CLOSED(self);
1967 
1968     Py_INCREF(cookieObj);
1969 
1970     if (!self->seekable) {
1971         PyErr_SetString(PyExc_IOError,
1972                         "underlying stream is not seekable");
1973         goto fail;
1974     }
1975 
1976     if (whence == 1) {
1977         /* seek relative to current position */
1978         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1979         if (cmp < 0)
1980             goto fail;
1981 
1982         if (cmp == 0) {
1983             PyErr_SetString(PyExc_IOError,
1984                             "can't do nonzero cur-relative seeks");
1985             goto fail;
1986         }
1987 
1988         /* Seeking to the current position should attempt to
1989          * sync the underlying buffer with the current position.
1990          */
1991         Py_DECREF(cookieObj);
1992         cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1993         if (cookieObj == NULL)
1994             goto fail;
1995     }
1996     else if (whence == 2) {
1997         /* seek relative to end of file */
1998 
1999         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2000         if (cmp < 0)
2001             goto fail;
2002 
2003         if (cmp == 0) {
2004             PyErr_SetString(PyExc_IOError,
2005                             "can't do nonzero end-relative seeks");
2006             goto fail;
2007         }
2008 
2009         res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2010         if (res == NULL)
2011             goto fail;
2012         Py_DECREF(res);
2013 
2014         textiowrapper_set_decoded_chars(self, NULL);
2015         Py_CLEAR(self->snapshot);
2016         if (self->decoder) {
2017             res = PyObject_CallMethod(self->decoder, "reset", NULL);
2018             if (res == NULL)
2019                 goto fail;
2020             Py_DECREF(res);
2021         }
2022 
2023         res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2024         Py_XDECREF(cookieObj);
2025         return res;
2026     }
2027     else if (whence != 0) {
2028         PyErr_Format(PyExc_ValueError,
2029                      "invalid whence (%d, should be 0, 1 or 2)", whence);
2030         goto fail;
2031     }
2032 
2033     cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2034     if (cmp < 0)
2035         goto fail;
2036 
2037     if (cmp == 1) {
2038         PyObject *repr = PyObject_Repr(cookieObj);
2039         if (repr != NULL) {
2040             PyErr_Format(PyExc_ValueError,
2041                          "negative seek position %s",
2042                          PyString_AS_STRING(repr));
2043             Py_DECREF(repr);
2044         }
2045         goto fail;
2046     }
2047 
2048     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2049     if (res == NULL)
2050         goto fail;
2051     Py_DECREF(res);
2052 
2053     /* The strategy of seek() is to go back to the safe start point
2054      * and replay the effect of read(chars_to_skip) from there.
2055      */
2056     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2057         goto fail;
2058 
2059     /* Seek back to the safe start point. */
2060     posobj = PyLong_FromOff_t(cookie.start_pos);
2061     if (posobj == NULL)
2062         goto fail;
2063     res = PyObject_CallMethodObjArgs(self->buffer,
2064                                      _PyIO_str_seek, posobj, NULL);
2065     Py_DECREF(posobj);
2066     if (res == NULL)
2067         goto fail;
2068     Py_DECREF(res);
2069 
2070     textiowrapper_set_decoded_chars(self, NULL);
2071     Py_CLEAR(self->snapshot);
2072 
2073     /* Restore the decoder to its state from the safe start point. */
2074     if (self->decoder) {
2075         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2076             goto fail;
2077     }
2078 
2079     if (cookie.chars_to_skip) {
2080         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2081         PyObject *input_chunk = PyObject_CallMethod(
2082             self->buffer, "read", "i", cookie.bytes_to_feed);
2083         PyObject *decoded;
2084 
2085         if (input_chunk == NULL)
2086             goto fail;
2087 
2088         assert (PyBytes_Check(input_chunk));
2089 
2090         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2091         if (self->snapshot == NULL) {
2092             Py_DECREF(input_chunk);
2093             goto fail;
2094         }
2095 
2096         decoded = PyObject_CallMethod(self->decoder, "decode",
2097                                       "Oi", input_chunk, (int)cookie.need_eof);
2098 
2099         if (decoded == NULL)
2100             goto fail;
2101 
2102         textiowrapper_set_decoded_chars(self, decoded);
2103 
2104         /* Skip chars_to_skip of the decoded characters. */
2105         if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2106             PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2107             goto fail;
2108         }
2109         self->decoded_chars_used = cookie.chars_to_skip;
2110     }
2111     else {
2112         self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2113         if (self->snapshot == NULL)
2114             goto fail;
2115     }
2116 
2117     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2118     if (self->encoder) {
2119         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2120             goto fail;
2121     }
2122     return cookieObj;
2123   fail:
2124     Py_XDECREF(cookieObj);
2125     return NULL;
2126 
2127 }
2128 
2129 static PyObject *
2130 textiowrapper_tell(textio *self, PyObject *args)
2131 {
2132     PyObject *res;
2133     PyObject *posobj = NULL;
2134     cookie_type cookie = {0,0,0,0,0};
2135     PyObject *next_input;
2136     Py_ssize_t chars_to_skip, chars_decoded;
2137     PyObject *saved_state = NULL;
2138     char *input, *input_end;
2139 
2140     CHECK_INITIALIZED(self);
2141     CHECK_CLOSED(self);
2142 
2143     if (!self->seekable) {
2144         PyErr_SetString(PyExc_IOError,
2145                         "underlying stream is not seekable");
2146         goto fail;
2147     }
2148     if (!self->telling) {
2149         PyErr_SetString(PyExc_IOError,
2150                         "telling position disabled by next() call");
2151         goto fail;
2152     }
2153 
2154     if (_textiowrapper_writeflush(self) < 0)
2155         return NULL;
2156     res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2157     if (res == NULL)
2158         goto fail;
2159     Py_DECREF(res);
2160 
2161     posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2162     if (posobj == NULL)
2163         goto fail;
2164 
2165     if (self->decoder == NULL || self->snapshot == NULL) {
2166         assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2167         return posobj;
2168     }
2169 
2170 #if defined(HAVE_LARGEFILE_SUPPORT)
2171     cookie.start_pos = PyLong_AsLongLong(posobj);
2172 #else
2173     cookie.start_pos = PyLong_AsLong(posobj);
2174 #endif
2175     if (PyErr_Occurred())
2176         goto fail;
2177 
2178     /* Skip backward to the snapshot point (see _read_chunk). */
2179     if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2180         goto fail;
2181 
2182     assert (PyBytes_Check(next_input));
2183 
2184     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2185 
2186     /* How many decoded characters have been used up since the snapshot? */
2187     if (self->decoded_chars_used == 0)  {
2188         /* We haven't moved from the snapshot point. */
2189         Py_DECREF(posobj);
2190         return textiowrapper_build_cookie(&cookie);
2191     }
2192 
2193     chars_to_skip = self->decoded_chars_used;
2194 
2195     /* Starting from the snapshot position, we will walk the decoder
2196      * forward until it gives us enough decoded characters.
2197      */
2198     saved_state = PyObject_CallMethodObjArgs(self->decoder,
2199                                              _PyIO_str_getstate, NULL);
2200     if (saved_state == NULL)
2201         goto fail;
2202 
2203     /* Note our initial start point. */
2204     if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2205         goto fail;
2206 
2207     /* Feed the decoder one byte at a time.  As we go, note the
2208      * nearest "safe start point" before the current location
2209      * (a point where the decoder has nothing buffered, so seek()
2210      * can safely start from there and advance to this location).
2211      */
2212     chars_decoded = 0;
2213     input = PyBytes_AS_STRING(next_input);
2214     input_end = input + PyBytes_GET_SIZE(next_input);
2215     while (input < input_end) {
2216         PyObject *state;
2217         char *dec_buffer;
2218         Py_ssize_t dec_buffer_len;
2219         int dec_flags;
2220 
2221         PyObject *decoded = PyObject_CallMethod(
2222             self->decoder, "decode", "s#", input, 1);
2223         if (decoded == NULL)
2224             goto fail;
2225         assert (PyUnicode_Check(decoded));
2226         chars_decoded += PyUnicode_GET_SIZE(decoded);
2227         Py_DECREF(decoded);
2228 
2229         cookie.bytes_to_feed += 1;
2230 
2231         state = PyObject_CallMethodObjArgs(self->decoder,
2232                                            _PyIO_str_getstate, NULL);
2233         if (state == NULL)
2234             goto fail;
2235         if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2236             Py_DECREF(state);
2237             goto fail;
2238         }
2239         Py_DECREF(state);
2240 
2241         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2242             /* Decoder buffer is empty, so this is a safe start point. */
2243             cookie.start_pos += cookie.bytes_to_feed;
2244             chars_to_skip -= chars_decoded;
2245             cookie.dec_flags = dec_flags;
2246             cookie.bytes_to_feed = 0;
2247             chars_decoded = 0;
2248         }
2249         if (chars_decoded >= chars_to_skip)
2250             break;
2251         input++;
2252     }
2253     if (input == input_end) {
2254         /* We didn't get enough decoded data; signal EOF to get more. */
2255         PyObject *decoded = PyObject_CallMethod(
2256             self->decoder, "decode", "si", "", /* final = */ 1);
2257         if (decoded == NULL)
2258             goto fail;
2259         assert (PyUnicode_Check(decoded));
2260         chars_decoded += PyUnicode_GET_SIZE(decoded);
2261         Py_DECREF(decoded);
2262         cookie.need_eof = 1;
2263 
2264         if (chars_decoded < chars_to_skip) {
2265             PyErr_SetString(PyExc_IOError,
2266                             "can't reconstruct logical file position");
2267             goto fail;
2268         }
2269     }
2270 
2271     /* finally */
2272     Py_XDECREF(posobj);
2273     res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2274     Py_DECREF(saved_state);
2275     if (res == NULL)
2276         return NULL;
2277     Py_DECREF(res);
2278 
2279     /* The returned cookie corresponds to the last safe start point. */
2280     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2281     return textiowrapper_build_cookie(&cookie);
2282 
2283   fail:
2284     Py_XDECREF(posobj);
2285     if (saved_state) {
2286         PyObject *type, *value, *traceback;
2287         PyErr_Fetch(&type, &value, &traceback);
2288 
2289         res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2290         Py_DECREF(saved_state);
2291         if (res == NULL)
2292             return NULL;
2293         Py_DECREF(res);
2294 
2295         PyErr_Restore(type, value, traceback);
2296     }
2297     return NULL;
2298 }
2299 
2300 static PyObject *
2301 textiowrapper_truncate(textio *self, PyObject *args)
2302 {
2303     PyObject *pos = Py_None;
2304     PyObject *res;
2305 
2306     CHECK_INITIALIZED(self)
2307     if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2308         return NULL;
2309     }
2310 
2311     res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2312     if (res == NULL)
2313         return NULL;
2314     Py_DECREF(res);
2315 
2316     return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2317 }
2318 
2319 static PyObject *
2320 textiowrapper_repr(textio *self)
2321 {
2322     PyObject *nameobj, *res;
2323     PyObject *namerepr = NULL, *encrepr = NULL;
2324 
2325     CHECK_INITIALIZED(self);
2326 
2327     nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2328     if (nameobj == NULL) {
2329         if (PyErr_ExceptionMatches(PyExc_AttributeError))
2330             PyErr_Clear();
2331         else
2332             goto error;
2333         encrepr = PyObject_Repr(self->encoding);
2334         res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2335                                    PyString_AS_STRING(encrepr));
2336     }
2337     else {
2338         encrepr = PyObject_Repr(self->encoding);
2339         namerepr = PyObject_Repr(nameobj);
2340         res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2341                                    PyString_AS_STRING(namerepr),
2342                                    PyString_AS_STRING(encrepr));
2343         Py_DECREF(nameobj);
2344     }
2345     Py_XDECREF(namerepr);
2346     Py_XDECREF(encrepr);
2347     return res;
2348 
2349 error:
2350     Py_XDECREF(namerepr);
2351     Py_XDECREF(encrepr);
2352     return NULL;
2353 }
2354 
2355 
2356 /* Inquiries */
2357 
2358 static PyObject *
2359 textiowrapper_fileno(textio *self, PyObject *args)
2360 {
2361     CHECK_INITIALIZED(self);
2362     return PyObject_CallMethod(self->buffer, "fileno", NULL);
2363 }
2364 
2365 static PyObject *
2366 textiowrapper_seekable(textio *self, PyObject *args)
2367 {
2368     CHECK_INITIALIZED(self);
2369     return PyObject_CallMethod(self->buffer, "seekable", NULL);
2370 }
2371 
2372 static PyObject *
2373 textiowrapper_readable(textio *self, PyObject *args)
2374 {
2375     CHECK_INITIALIZED(self);
2376     return PyObject_CallMethod(self->buffer, "readable", NULL);
2377 }
2378 
2379 static PyObject *
2380 textiowrapper_writable(textio *self, PyObject *args)
2381 {
2382     CHECK_INITIALIZED(self);
2383     return PyObject_CallMethod(self->buffer, "writable", NULL);
2384 }
2385 
2386 static PyObject *
2387 textiowrapper_isatty(textio *self, PyObject *args)
2388 {
2389     CHECK_INITIALIZED(self);
2390     return PyObject_CallMethod(self->buffer, "isatty", NULL);
2391 }
2392 
2393 static PyObject *
2394 textiowrapper_flush(textio *self, PyObject *args)
2395 {
2396     CHECK_INITIALIZED(self);
2397     CHECK_CLOSED(self);
2398     self->telling = self->seekable;
2399     if (_textiowrapper_writeflush(self) < 0)
2400         return NULL;
2401     return PyObject_CallMethod(self->buffer, "flush", NULL);
2402 }
2403 
2404 static PyObject *
2405 textiowrapper_close(textio *self, PyObject *args)
2406 {
2407     PyObject *res;
2408     int r;
2409     CHECK_INITIALIZED(self);
2410 
2411     res = textiowrapper_closed_get(self, NULL);
2412     if (res == NULL)
2413         return NULL;
2414     r = PyObject_IsTrue(res);
2415     Py_DECREF(res);
2416     if (r < 0)
2417         return NULL;
2418     
2419     if (r > 0) {
2420         Py_RETURN_NONE; /* stream already closed */
2421     }
2422     else {
2423         res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2424         if (res == NULL) {
2425             return NULL;
2426         }
2427         else
2428             Py_DECREF(res);
2429 
2430         return PyObject_CallMethod(self->buffer, "close", NULL);
2431     }
2432 }
2433 
2434 static PyObject *
2435 textiowrapper_iternext(textio *self)
2436 {
2437     PyObject *line;
2438 
2439     CHECK_INITIALIZED(self);
2440 
2441     self->telling = 0;
2442     if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2443         /* Skip method call overhead for speed */
2444         line = _textiowrapper_readline(self, -1);
2445     }
2446     else {
2447         line = PyObject_CallMethodObjArgs((PyObject *)self,
2448                                            _PyIO_str_readline, NULL);
2449         if (line && !PyUnicode_Check(line)) {
2450             PyErr_Format(PyExc_IOError,
2451                          "readline() should have returned an str object, "
2452                          "not '%.200s'", Py_TYPE(line)->tp_name);
2453             Py_DECREF(line);
2454             return NULL;
2455         }
2456     }
2457 
2458     if (line == NULL)
2459         return NULL;
2460 
2461     if (PyUnicode_GET_SIZE(line) == 0) {
2462         /* Reached EOF or would have blocked */
2463         Py_DECREF(line);
2464         Py_CLEAR(self->snapshot);
2465         self->telling = self->seekable;
2466         return NULL;
2467     }
2468 
2469     return line;
2470 }
2471 
2472 static PyObject *
2473 textiowrapper_name_get(textio *self, void *context)
2474 {
2475     CHECK_INITIALIZED(self);
2476     return PyObject_GetAttrString(self->buffer, "name");
2477 }
2478 
2479 static PyObject *
2480 textiowrapper_closed_get(textio *self, void *context)
2481 {
2482     CHECK_INITIALIZED(self);
2483     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2484 }
2485 
2486 static PyObject *
2487 textiowrapper_newlines_get(textio *self, void *context)
2488 {
2489     PyObject *res;
2490     CHECK_INITIALIZED(self);
2491     if (self->decoder == NULL)
2492         Py_RETURN_NONE;
2493     res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2494     if (res == NULL) {
2495         if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2496             PyErr_Clear();
2497             Py_RETURN_NONE;
2498         }
2499         else {
2500             return NULL;
2501         }
2502     }
2503     return res;
2504 }
2505 
2506 static PyObject *
2507 textiowrapper_errors_get(textio *self, void *context)
2508 {
2509     CHECK_INITIALIZED(self);
2510     Py_INCREF(self->errors);
2511     return self->errors;
2512 }
2513 
2514 static PyObject *
2515 textiowrapper_chunk_size_get(textio *self, void *context)
2516 {
2517     CHECK_INITIALIZED(self);
2518     return PyLong_FromSsize_t(self->chunk_size);
2519 }
2520 
2521 static int
2522 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2523 {
2524     Py_ssize_t n;
2525     CHECK_INITIALIZED_INT(self);
2526     n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2527     if (n == -1 && PyErr_Occurred())
2528         return -1;
2529     if (n <= 0) {
2530         PyErr_SetString(PyExc_ValueError,
2531                         "a strictly positive integer is required");
2532         return -1;
2533     }
2534     self->chunk_size = n;
2535     return 0;
2536 }
2537 
2538 static PyMethodDef textiowrapper_methods[] = {
2539     {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2540     {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2541     {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2542     {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2543     {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2544     {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2545 
2546     {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2547     {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2548     {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2549     {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2550     {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2551 
2552     {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2553     {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2554     {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2555     {NULL, NULL}
2556 };
2557 
2558 static PyMemberDef textiowrapper_members[] = {
2559     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2560     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2561     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2562     {NULL}
2563 };
2564 
2565 static PyGetSetDef textiowrapper_getset[] = {
2566     {"name", (getter)textiowrapper_name_get, NULL, NULL},
2567     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2568 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2569 */
2570     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2571     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2572     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2573                     (setter)textiowrapper_chunk_size_set, NULL},
2574     {NULL}
2575 };
2576 
2577 PyTypeObject PyTextIOWrapper_Type = {
2578     PyVarObject_HEAD_INIT(NULL, 0)
2579     "_io.TextIOWrapper",        /*tp_name*/
2580     sizeof(textio), /*tp_basicsize*/
2581     0,                          /*tp_itemsize*/
2582     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2583     0,                          /*tp_print*/
2584     0,                          /*tp_getattr*/
2585     0,                          /*tps_etattr*/
2586     0,                          /*tp_compare */
2587     (reprfunc)textiowrapper_repr,/*tp_repr*/
2588     0,                          /*tp_as_number*/
2589     0,                          /*tp_as_sequence*/
2590     0,                          /*tp_as_mapping*/
2591     0,                          /*tp_hash */
2592     0,                          /*tp_call*/
2593     0,                          /*tp_str*/
2594     0,                          /*tp_getattro*/
2595     0,                          /*tp_setattro*/
2596     0,                          /*tp_as_buffer*/
2597     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2598             | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2599     textiowrapper_doc,          /* tp_doc */
2600     (traverseproc)textiowrapper_traverse, /* tp_traverse */
2601     (inquiry)textiowrapper_clear, /* tp_clear */
2602     0,                          /* tp_richcompare */
2603     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2604     0,                          /* tp_iter */
2605     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2606     textiowrapper_methods,      /* tp_methods */
2607     textiowrapper_members,      /* tp_members */
2608     textiowrapper_getset,       /* tp_getset */
2609     0,                          /* tp_base */
2610     0,                          /* tp_dict */
2611     0,                          /* tp_descr_get */
2612     0,                          /* tp_descr_set */
2613     offsetof(textio, dict), /*tp_dictoffset*/
2614     (initproc)textiowrapper_init, /* tp_init */
2615     0,                          /* tp_alloc */
2616     PyType_GenericNew,          /* tp_new */
2617 };