1 /* File object implementation */
   2 
   3 #define PY_SSIZE_T_CLEAN
   4 #include "Python.h"
   5 #include "structmember.h"
   6 
   7 #ifdef HAVE_SYS_TYPES_H
   8 #include <sys/types.h>
   9 #endif /* HAVE_SYS_TYPES_H */
  10 
  11 #ifdef MS_WINDOWS
  12 #define fileno _fileno
  13 /* can simulate truncate with Win32 API functions; see file_truncate */
  14 #define HAVE_FTRUNCATE
  15 #define WIN32_LEAN_AND_MEAN
  16 #include <windows.h>
  17 #endif
  18 
  19 #if defined(PYOS_OS2) && defined(PYCC_GCC)
  20 #include <io.h>
  21 #endif
  22 
  23 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  24 
  25 #ifdef HAVE_ERRNO_H
  26 #include <errno.h>
  27 #endif
  28 
  29 #ifdef HAVE_GETC_UNLOCKED
  30 #define GETC(f) getc_unlocked(f)
  31 #define FLOCKFILE(f) flockfile(f)
  32 #define FUNLOCKFILE(f) funlockfile(f)
  33 #else
  34 #define GETC(f) getc(f)
  35 #define FLOCKFILE(f)
  36 #define FUNLOCKFILE(f)
  37 #endif
  38 
  39 /* Bits in f_newlinetypes */
  40 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  41 #define NEWLINE_CR 1            /* \r newline seen */
  42 #define NEWLINE_LF 2            /* \n newline seen */
  43 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  44 
  45 /*
  46  * These macros release the GIL while preventing the f_close() function being
  47  * called in the interval between them.  For that purpose, a running total of
  48  * the number of currently running unlocked code sections is kept in
  49  * the unlocked_count field of the PyFileObject. The close() method raises
  50  * an IOError if that field is non-zero.  See issue #815646, #595601.
  51  */
  52 
  53 #define FILE_BEGIN_ALLOW_THREADS(fobj) \
  54 { \
  55     fobj->unlocked_count++; \
  56     Py_BEGIN_ALLOW_THREADS
  57 
  58 #define FILE_END_ALLOW_THREADS(fobj) \
  59     Py_END_ALLOW_THREADS \
  60     fobj->unlocked_count--; \
  61     assert(fobj->unlocked_count >= 0); \
  62 }
  63 
  64 #define FILE_ABORT_ALLOW_THREADS(fobj) \
  65     Py_BLOCK_THREADS \
  66     fobj->unlocked_count--; \
  67     assert(fobj->unlocked_count >= 0);
  68 
  69 #ifdef __cplusplus
  70 extern "C" {
  71 #endif
  72 
  73 FILE *
  74 PyFile_AsFile(PyObject *f)
  75 {
  76     if (f == NULL || !PyFile_Check(f))
  77         return NULL;
  78     else
  79         return ((PyFileObject *)f)->f_fp;
  80 }
  81 
  82 void PyFile_IncUseCount(PyFileObject *fobj)
  83 {
  84     fobj->unlocked_count++;
  85 }
  86 
  87 void PyFile_DecUseCount(PyFileObject *fobj)
  88 {
  89     fobj->unlocked_count--;
  90     assert(fobj->unlocked_count >= 0);
  91 }
  92 
  93 PyObject *
  94 PyFile_Name(PyObject *f)
  95 {
  96     if (f == NULL || !PyFile_Check(f))
  97         return NULL;
  98     else
  99         return ((PyFileObject *)f)->f_name;
 100 }
 101 
 102 /* This is a safe wrapper around PyObject_Print to print to the FILE
 103    of a PyFileObject. PyObject_Print releases the GIL but knows nothing
 104    about PyFileObject. */
 105 static int
 106 file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
 107 {
 108     int result;
 109     PyFile_IncUseCount(f);
 110     result = PyObject_Print(op, f->f_fp, flags);
 111     PyFile_DecUseCount(f);
 112     return result;
 113 }
 114 
 115 /* On Unix, fopen will succeed for directories.
 116    In Python, there should be no file objects referring to
 117    directories, so we need a check.  */
 118 
 119 static PyFileObject*
 120 dircheck(PyFileObject* f)
 121 {
 122 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
 123     struct stat buf;
 124     if (f->f_fp == NULL)
 125         return f;
 126     if (fstat(fileno(f->f_fp), &buf) == 0 &&
 127         S_ISDIR(buf.st_mode)) {
 128         char *msg = strerror(EISDIR);
 129         PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(isO)",
 130                                               EISDIR, msg, f->f_name);
 131         PyErr_SetObject(PyExc_IOError, exc);
 132         Py_XDECREF(exc);
 133         return NULL;
 134     }
 135 #endif
 136     return f;
 137 }
 138 
 139 
 140 static PyObject *
 141 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
 142                  int (*close)(FILE *))
 143 {
 144     assert(name != NULL);
 145     assert(f != NULL);
 146     assert(PyFile_Check(f));
 147     assert(f->f_fp == NULL);
 148 
 149     Py_DECREF(f->f_name);
 150     Py_DECREF(f->f_mode);
 151     Py_DECREF(f->f_encoding);
 152     Py_DECREF(f->f_errors);
 153 
 154     Py_INCREF(name);
 155     f->f_name = name;
 156 
 157     f->f_mode = PyString_FromString(mode);
 158 
 159     f->f_close = close;
 160     f->f_softspace = 0;
 161     f->f_binary = strchr(mode,'b') != NULL;
 162     f->f_buf = NULL;
 163     f->f_univ_newline = (strchr(mode, 'U') != NULL);
 164     f->f_newlinetypes = NEWLINE_UNKNOWN;
 165     f->f_skipnextlf = 0;
 166     Py_INCREF(Py_None);
 167     f->f_encoding = Py_None;
 168     Py_INCREF(Py_None);
 169     f->f_errors = Py_None;
 170     f->readable = f->writable = 0;
 171     if (strchr(mode, 'r') != NULL || f->f_univ_newline)
 172         f->readable = 1;
 173     if (strchr(mode, 'w') != NULL || strchr(mode, 'a') != NULL)
 174         f->writable = 1;
 175     if (strchr(mode, '+') != NULL)
 176         f->readable = f->writable = 1;
 177 
 178     if (f->f_mode == NULL)
 179         return NULL;
 180     f->f_fp = fp;
 181     f = dircheck(f);
 182     return (PyObject *) f;
 183 }
 184 
 185 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
 186 #define Py_VERIFY_WINNT
 187 /* The CRT on windows compiled with Visual Studio 2005 and higher may
 188  * assert if given invalid mode strings.  This is all fine and well
 189  * in static languages like C where the mode string is typcially hard
 190  * coded.  But in Python, were we pass in the mode string from the user,
 191  * we need to verify it first manually
 192  */
 193 static int _PyVerify_Mode_WINNT(const char *mode)
 194 {
 195     /* See if mode string is valid on Windows to avoid hard assertions */
 196     /* remove leading spacese */
 197     int singles = 0;
 198     int pairs = 0;
 199     int encoding = 0;
 200     const char *s, *c;
 201 
 202     while(*mode == ' ') /* strip initial spaces */
 203         ++mode;
 204     if (!strchr("rwa", *mode)) /* must start with one of these */
 205         return 0;
 206     while (*++mode) {
 207         if (*mode == ' ' || *mode == 'N') /* ignore spaces and N */
 208             continue;
 209         s = "+TD"; /* each of this can appear only once */
 210         c = strchr(s, *mode);
 211         if (c) {
 212             ptrdiff_t idx = s-c;
 213             if (singles & (1<<idx))
 214                 return 0;
 215             singles |= (1<<idx);
 216             continue;
 217         }
 218         s = "btcnSR"; /* only one of each letter in the pairs allowed */
 219         c = strchr(s, *mode);
 220         if (c) {
 221             ptrdiff_t idx = (s-c)/2;
 222             if (pairs & (1<<idx))
 223                 return 0;
 224             pairs |= (1<<idx);
 225             continue;
 226         }
 227         if (*mode == ',') {
 228             encoding = 1;
 229             break;
 230         }
 231         return 0; /* found an invalid char */
 232     }
 233 
 234     if (encoding) {
 235         char *e[] = {"UTF-8", "UTF-16LE", "UNICODE"};
 236         while (*mode == ' ')
 237             ++mode;
 238         /* find 'ccs =' */
 239         if (strncmp(mode, "ccs", 3))
 240             return 0;
 241         mode += 3;
 242         while (*mode == ' ')
 243             ++mode;
 244         if (*mode != '=')
 245             return 0;
 246         while (*mode == ' ')
 247             ++mode;
 248         for(encoding = 0; encoding<_countof(e); ++encoding) {
 249             size_t l = strlen(e[encoding]);
 250             if (!strncmp(mode, e[encoding], l)) {
 251                 mode += l; /* found a valid encoding */
 252                 break;
 253             }
 254         }
 255         if (encoding == _countof(e))
 256             return 0;
 257     }
 258     /* skip trailing spaces */
 259     while (*mode == ' ')
 260         ++mode;
 261 
 262     return *mode == '\0'; /* must be at the end of the string */
 263 }
 264 #endif
 265 
 266 /* check for known incorrect mode strings - problem is, platforms are
 267    free to accept any mode characters they like and are supposed to
 268    ignore stuff they don't understand... write or append mode with
 269    universal newline support is expressly forbidden by PEP 278.
 270    Additionally, remove the 'U' from the mode string as platforms
 271    won't know what it is. Non-zero return signals an exception */
 272 int
 273 _PyFile_SanitizeMode(char *mode)
 274 {
 275     char *upos;
 276     size_t len = strlen(mode);
 277 
 278     if (!len) {
 279         PyErr_SetString(PyExc_ValueError, "empty mode string");
 280         return -1;
 281     }
 282 
 283     upos = strchr(mode, 'U');
 284     if (upos) {
 285         memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
 286 
 287         if (mode[0] == 'w' || mode[0] == 'a') {
 288             PyErr_Format(PyExc_ValueError, "universal newline "
 289                          "mode can only be used with modes "
 290                          "starting with 'r'");
 291             return -1;
 292         }
 293 
 294         if (mode[0] != 'r') {
 295             memmove(mode+1, mode, strlen(mode)+1);
 296             mode[0] = 'r';
 297         }
 298 
 299         if (!strchr(mode, 'b')) {
 300             memmove(mode+2, mode+1, strlen(mode));
 301             mode[1] = 'b';
 302         }
 303     } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
 304         PyErr_Format(PyExc_ValueError, "mode string must begin with "
 305                     "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
 306         return -1;
 307     }
 308 #ifdef Py_VERIFY_WINNT
 309     /* additional checks on NT with visual studio 2005 and higher */
 310     if (!_PyVerify_Mode_WINNT(mode)) {
 311         PyErr_Format(PyExc_ValueError, "Invalid mode ('%.50s')", mode);
 312         return -1;
 313     }
 314 #endif
 315     return 0;
 316 }
 317 
 318 static PyObject *
 319 open_the_file(PyFileObject *f, char *name, char *mode)
 320 {
 321     char *newmode;
 322     assert(f != NULL);
 323     assert(PyFile_Check(f));
 324 #ifdef MS_WINDOWS
 325     /* windows ignores the passed name in order to support Unicode */
 326     assert(f->f_name != NULL);
 327 #else
 328     assert(name != NULL);
 329 #endif
 330     assert(mode != NULL);
 331     assert(f->f_fp == NULL);
 332 
 333     /* probably need to replace 'U' by 'rb' */
 334     newmode = PyMem_MALLOC(strlen(mode) + 3);
 335     if (!newmode) {
 336         PyErr_NoMemory();
 337         return NULL;
 338     }
 339     strcpy(newmode, mode);
 340 
 341     if (_PyFile_SanitizeMode(newmode)) {
 342         f = NULL;
 343         goto cleanup;
 344     }
 345 
 346     /* rexec.py can't stop a user from getting the file() constructor --
 347        all they have to do is get *any* file object f, and then do
 348        type(f).  Here we prevent them from doing damage with it. */
 349     if (PyEval_GetRestricted()) {
 350         PyErr_SetString(PyExc_IOError,
 351         "file() constructor not accessible in restricted mode");
 352         f = NULL;
 353         goto cleanup;
 354     }
 355     errno = 0;
 356 
 357 #ifdef MS_WINDOWS
 358     if (PyUnicode_Check(f->f_name)) {
 359         PyObject *wmode;
 360         wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
 361         if (f->f_name && wmode) {
 362             FILE_BEGIN_ALLOW_THREADS(f)
 363             /* PyUnicode_AS_UNICODE OK without thread
 364                lock as it is a simple dereference. */
 365             f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
 366                               PyUnicode_AS_UNICODE(wmode));
 367             FILE_END_ALLOW_THREADS(f)
 368         }
 369         Py_XDECREF(wmode);
 370     }
 371 #endif
 372     if (NULL == f->f_fp && NULL != name) {
 373         FILE_BEGIN_ALLOW_THREADS(f)
 374         f->f_fp = fopen(name, newmode);
 375         FILE_END_ALLOW_THREADS(f)
 376     }
 377 
 378     if (f->f_fp == NULL) {
 379 #if defined  _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
 380         /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
 381          * across all Windows flavors.  When it sets EINVAL varies
 382          * across Windows flavors, the exact conditions aren't
 383          * documented, and the answer lies in the OS's implementation
 384          * of Win32's CreateFile function (whose source is secret).
 385          * Seems the best we can do is map EINVAL to ENOENT.
 386          * Starting with Visual Studio .NET 2005, EINVAL is correctly
 387          * set by our CRT error handler (set in exceptions.c.)
 388          */
 389         if (errno == 0)         /* bad mode string */
 390             errno = EINVAL;
 391         else if (errno == EINVAL) /* unknown, but not a mode string */
 392             errno = ENOENT;
 393 #endif
 394         /* EINVAL is returned when an invalid filename or
 395          * an invalid mode is supplied. */
 396         if (errno == EINVAL) {
 397             PyObject *v;
 398             char message[100];
 399             PyOS_snprintf(message, 100,
 400                 "invalid mode ('%.50s') or filename", mode);
 401             v = Py_BuildValue("(isO)", errno, message, f->f_name);
 402             if (v != NULL) {
 403                 PyErr_SetObject(PyExc_IOError, v);
 404                 Py_DECREF(v);
 405             }
 406         }
 407         else
 408             PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
 409         f = NULL;
 410     }
 411     if (f != NULL)
 412         f = dircheck(f);
 413 
 414 cleanup:
 415     PyMem_FREE(newmode);
 416 
 417     return (PyObject *)f;
 418 }
 419 
 420 static PyObject *
 421 close_the_file(PyFileObject *f)
 422 {
 423     int sts = 0;
 424     int (*local_close)(FILE *);
 425     FILE *local_fp = f->f_fp;
 426     char *local_setbuf = f->f_setbuf;
 427     if (local_fp != NULL) {
 428         local_close = f->f_close;
 429         if (local_close != NULL && f->unlocked_count > 0) {
 430             if (f->ob_refcnt > 0) {
 431                 PyErr_SetString(PyExc_IOError,
 432                     "close() called during concurrent "
 433                     "operation on the same file object.");
 434             } else {
 435                 /* This should not happen unless someone is
 436                  * carelessly playing with the PyFileObject
 437                  * struct fields and/or its associated FILE
 438                  * pointer. */
 439                 PyErr_SetString(PyExc_SystemError,
 440                     "PyFileObject locking error in "
 441                     "destructor (refcnt <= 0 at close).");
 442             }
 443             return NULL;
 444         }
 445         /* NULL out the FILE pointer before releasing the GIL, because
 446          * it will not be valid anymore after the close() function is
 447          * called. */
 448         f->f_fp = NULL;
 449         if (local_close != NULL) {
 450             /* Issue #9295: must temporarily reset f_setbuf so that another
 451                thread doesn't free it when running file_close() concurrently.
 452                Otherwise this close() will crash when flushing the buffer. */
 453             f->f_setbuf = NULL;
 454             Py_BEGIN_ALLOW_THREADS
 455             errno = 0;
 456             sts = (*local_close)(local_fp);
 457             Py_END_ALLOW_THREADS
 458             f->f_setbuf = local_setbuf;
 459             if (sts == EOF)
 460                 return PyErr_SetFromErrno(PyExc_IOError);
 461             if (sts != 0)
 462                 return PyInt_FromLong((long)sts);
 463         }
 464     }
 465     Py_RETURN_NONE;
 466 }
 467 
 468 PyObject *
 469 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
 470 {
 471     PyFileObject *f;
 472     PyObject *o_name;
 473 
 474     f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type, NULL, NULL);
 475     if (f == NULL)
 476         return NULL;
 477     o_name = PyString_FromString(name);
 478     if (o_name == NULL) {
 479         if (close != NULL && fp != NULL)
 480             close(fp);
 481         Py_DECREF(f);
 482         return NULL;
 483     }
 484     if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
 485         Py_DECREF(f);
 486         Py_DECREF(o_name);
 487         return NULL;
 488     }
 489     Py_DECREF(o_name);
 490     return (PyObject *)f;
 491 }
 492 
 493 PyObject *
 494 PyFile_FromString(char *name, char *mode)
 495 {
 496     PyFileObject *f;
 497 
 498     f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, NULL);
 499     if (f != NULL) {
 500         if (open_the_file(f, name, mode) == NULL) {
 501             Py_DECREF(f);
 502             f = NULL;
 503         }
 504     }
 505     return (PyObject *)f;
 506 }
 507 
 508 void
 509 PyFile_SetBufSize(PyObject *f, int bufsize)
 510 {
 511     PyFileObject *file = (PyFileObject *)f;
 512     if (bufsize >= 0) {
 513         int type;
 514         switch (bufsize) {
 515         case 0:
 516             type = _IONBF;
 517             break;
 518 #ifdef HAVE_SETVBUF
 519         case 1:
 520             type = _IOLBF;
 521             bufsize = BUFSIZ;
 522             break;
 523 #endif
 524         default:
 525             type = _IOFBF;
 526 #ifndef HAVE_SETVBUF
 527             bufsize = BUFSIZ;
 528 #endif
 529             break;
 530         }
 531         fflush(file->f_fp);
 532         if (type == _IONBF) {
 533             PyMem_Free(file->f_setbuf);
 534             file->f_setbuf = NULL;
 535         } else {
 536             file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
 537                                                     bufsize);
 538         }
 539 #ifdef HAVE_SETVBUF
 540         setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
 541 #else /* !HAVE_SETVBUF */
 542         setbuf(file->f_fp, file->f_setbuf);
 543 #endif /* !HAVE_SETVBUF */
 544     }
 545 }
 546 
 547 /* Set the encoding used to output Unicode strings.
 548    Return 1 on success, 0 on failure. */
 549 
 550 int
 551 PyFile_SetEncoding(PyObject *f, const char *enc)
 552 {
 553     return PyFile_SetEncodingAndErrors(f, enc, NULL);
 554 }
 555 
 556 int
 557 PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
 558 {
 559     PyFileObject *file = (PyFileObject*)f;
 560     PyObject *str, *oerrors;
 561 
 562     assert(PyFile_Check(f));
 563     str = PyString_FromString(enc);
 564     if (!str)
 565         return 0;
 566     if (errors) {
 567         oerrors = PyString_FromString(errors);
 568         if (!oerrors) {
 569             Py_DECREF(str);
 570             return 0;
 571         }
 572     } else {
 573         oerrors = Py_None;
 574         Py_INCREF(Py_None);
 575     }
 576     Py_DECREF(file->f_encoding);
 577     file->f_encoding = str;
 578     Py_DECREF(file->f_errors);
 579     file->f_errors = oerrors;
 580     return 1;
 581 }
 582 
 583 static PyObject *
 584 err_closed(void)
 585 {
 586     PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
 587     return NULL;
 588 }
 589 
 590 static PyObject *
 591 err_mode(char *action)
 592 {
 593     PyErr_Format(PyExc_IOError, "File not open for %s", action);
 594     return NULL;
 595 }
 596 
 597 /* Refuse regular file I/O if there's data in the iteration-buffer.
 598  * Mixing them would cause data to arrive out of order, as the read*
 599  * methods don't use the iteration buffer. */
 600 static PyObject *
 601 err_iterbuffered(void)
 602 {
 603     PyErr_SetString(PyExc_ValueError,
 604         "Mixing iteration and read methods would lose data");
 605     return NULL;
 606 }
 607 
 608 static void drop_readahead(PyFileObject *);
 609 
 610 /* Methods */
 611 
 612 static void
 613 file_dealloc(PyFileObject *f)
 614 {
 615     PyObject *ret;
 616     if (f->weakreflist != NULL)
 617         PyObject_ClearWeakRefs((PyObject *) f);
 618     ret = close_the_file(f);
 619     if (!ret) {
 620         PySys_WriteStderr("close failed in file object destructor:\n");
 621         PyErr_Print();
 622     }
 623     else {
 624         Py_DECREF(ret);
 625     }
 626     PyMem_Free(f->f_setbuf);
 627     Py_XDECREF(f->f_name);
 628     Py_XDECREF(f->f_mode);
 629     Py_XDECREF(f->f_encoding);
 630     Py_XDECREF(f->f_errors);
 631     drop_readahead(f);
 632     Py_TYPE(f)->tp_free((PyObject *)f);
 633 }
 634 
 635 static PyObject *
 636 file_repr(PyFileObject *f)
 637 {
 638     if (PyUnicode_Check(f->f_name)) {
 639 #ifdef Py_USING_UNICODE
 640         PyObject *ret = NULL;
 641         PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
 642         const char *name_str = name ? PyString_AsString(name) : "?";
 643         ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
 644                            f->f_fp == NULL ? "closed" : "open",
 645                            name_str,
 646                            PyString_AsString(f->f_mode),
 647                            f);
 648         Py_XDECREF(name);
 649         return ret;
 650 #endif
 651     } else {
 652         return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
 653                            f->f_fp == NULL ? "closed" : "open",
 654                            PyString_AsString(f->f_name),
 655                            PyString_AsString(f->f_mode),
 656                            f);
 657     }
 658 }
 659 
 660 static PyObject *
 661 file_close(PyFileObject *f)
 662 {
 663     PyObject *sts = close_the_file(f);
 664     if (sts) {
 665         PyMem_Free(f->f_setbuf);
 666         f->f_setbuf = NULL;
 667     }
 668     return sts;
 669 }
 670 
 671 
 672 /* Our very own off_t-like type, 64-bit if possible */
 673 #if !defined(HAVE_LARGEFILE_SUPPORT)
 674 typedef off_t Py_off_t;
 675 #elif SIZEOF_OFF_T >= 8
 676 typedef off_t Py_off_t;
 677 #elif SIZEOF_FPOS_T >= 8
 678 typedef fpos_t Py_off_t;
 679 #else
 680 #error "Large file support, but neither off_t nor fpos_t is large enough."
 681 #endif
 682 
 683 
 684 /* a portable fseek() function
 685    return 0 on success, non-zero on failure (with errno set) */
 686 static int
 687 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
 688 {
 689 #if !defined(HAVE_LARGEFILE_SUPPORT)
 690     return fseek(fp, offset, whence);
 691 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
 692     return fseeko(fp, offset, whence);
 693 #elif defined(HAVE_FSEEK64)
 694     return fseek64(fp, offset, whence);
 695 #elif defined(__BEOS__)
 696     return _fseek(fp, offset, whence);
 697 #elif SIZEOF_FPOS_T >= 8
 698     /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
 699        and fgetpos() to implement fseek()*/
 700     fpos_t pos;
 701     switch (whence) {
 702     case SEEK_END:
 703 #ifdef MS_WINDOWS
 704         fflush(fp);
 705         if (_lseeki64(fileno(fp), 0, 2) == -1)
 706             return -1;
 707 #else
 708         if (fseek(fp, 0, SEEK_END) != 0)
 709             return -1;
 710 #endif
 711         /* fall through */
 712     case SEEK_CUR:
 713         if (fgetpos(fp, &pos) != 0)
 714             return -1;
 715         offset += pos;
 716         break;
 717     /* case SEEK_SET: break; */
 718     }
 719     return fsetpos(fp, &offset);
 720 #else
 721 #error "Large file support, but no way to fseek."
 722 #endif
 723 }
 724 
 725 
 726 /* a portable ftell() function
 727    Return -1 on failure with errno set appropriately, current file
 728    position on success */
 729 static Py_off_t
 730 _portable_ftell(FILE* fp)
 731 {
 732 #if !defined(HAVE_LARGEFILE_SUPPORT)
 733     return ftell(fp);
 734 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
 735     return ftello(fp);
 736 #elif defined(HAVE_FTELL64)
 737     return ftell64(fp);
 738 #elif SIZEOF_FPOS_T >= 8
 739     fpos_t pos;
 740     if (fgetpos(fp, &pos) != 0)
 741         return -1;
 742     return pos;
 743 #else
 744 #error "Large file support, but no way to ftell."
 745 #endif
 746 }
 747 
 748 
 749 static PyObject *
 750 file_seek(PyFileObject *f, PyObject *args)
 751 {
 752     int whence;
 753     int ret;
 754     Py_off_t offset;
 755     PyObject *offobj, *off_index;
 756 
 757     if (f->f_fp == NULL)
 758         return err_closed();
 759     drop_readahead(f);
 760     whence = 0;
 761     if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
 762         return NULL;
 763     off_index = PyNumber_Index(offobj);
 764     if (!off_index) {
 765         if (!PyFloat_Check(offobj))
 766             return NULL;
 767         /* Deprecated in 2.6 */
 768         PyErr_Clear();
 769         if (PyErr_WarnEx(PyExc_DeprecationWarning,
 770                          "integer argument expected, got float",
 771                          1) < 0)
 772             return NULL;
 773         off_index = offobj;
 774         Py_INCREF(offobj);
 775     }
 776 #if !defined(HAVE_LARGEFILE_SUPPORT)
 777     offset = PyInt_AsLong(off_index);
 778 #else
 779     offset = PyLong_Check(off_index) ?
 780         PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
 781 #endif
 782     Py_DECREF(off_index);
 783     if (PyErr_Occurred())
 784         return NULL;
 785 
 786     FILE_BEGIN_ALLOW_THREADS(f)
 787     errno = 0;
 788     ret = _portable_fseek(f->f_fp, offset, whence);
 789     FILE_END_ALLOW_THREADS(f)
 790 
 791     if (ret != 0) {
 792         PyErr_SetFromErrno(PyExc_IOError);
 793         clearerr(f->f_fp);
 794         return NULL;
 795     }
 796     f->f_skipnextlf = 0;
 797     Py_INCREF(Py_None);
 798     return Py_None;
 799 }
 800 
 801 
 802 #ifdef HAVE_FTRUNCATE
 803 static PyObject *
 804 file_truncate(PyFileObject *f, PyObject *args)
 805 {
 806     Py_off_t newsize;
 807     PyObject *newsizeobj = NULL;
 808     Py_off_t initialpos;
 809     int ret;
 810 
 811     if (f->f_fp == NULL)
 812         return err_closed();
 813     if (!f->writable)
 814         return err_mode("writing");
 815     if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
 816         return NULL;
 817 
 818     /* Get current file position.  If the file happens to be open for
 819      * update and the last operation was an input operation, C doesn't
 820      * define what the later fflush() will do, but we promise truncate()
 821      * won't change the current position (and fflush() *does* change it
 822      * then at least on Windows).  The easiest thing is to capture
 823      * current pos now and seek back to it at the end.
 824      */
 825     FILE_BEGIN_ALLOW_THREADS(f)
 826     errno = 0;
 827     initialpos = _portable_ftell(f->f_fp);
 828     FILE_END_ALLOW_THREADS(f)
 829     if (initialpos == -1)
 830         goto onioerror;
 831 
 832     /* Set newsize to current postion if newsizeobj NULL, else to the
 833      * specified value.
 834      */
 835     if (newsizeobj != NULL) {
 836 #if !defined(HAVE_LARGEFILE_SUPPORT)
 837         newsize = PyInt_AsLong(newsizeobj);
 838 #else
 839         newsize = PyLong_Check(newsizeobj) ?
 840                         PyLong_AsLongLong(newsizeobj) :
 841                 PyInt_AsLong(newsizeobj);
 842 #endif
 843         if (PyErr_Occurred())
 844             return NULL;
 845     }
 846     else /* default to current position */
 847         newsize = initialpos;
 848 
 849     /* Flush the stream.  We're mixing stream-level I/O with lower-level
 850      * I/O, and a flush may be necessary to synch both platform views
 851      * of the current file state.
 852      */
 853     FILE_BEGIN_ALLOW_THREADS(f)
 854     errno = 0;
 855     ret = fflush(f->f_fp);
 856     FILE_END_ALLOW_THREADS(f)
 857     if (ret != 0)
 858         goto onioerror;
 859 
 860 #ifdef MS_WINDOWS
 861     /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
 862        so don't even try using it. */
 863     {
 864         HANDLE hFile;
 865 
 866         /* Have to move current pos to desired endpoint on Windows. */
 867         FILE_BEGIN_ALLOW_THREADS(f)
 868         errno = 0;
 869         ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
 870         FILE_END_ALLOW_THREADS(f)
 871         if (ret)
 872             goto onioerror;
 873 
 874         /* Truncate.  Note that this may grow the file! */
 875         FILE_BEGIN_ALLOW_THREADS(f)
 876         errno = 0;
 877         hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
 878         ret = hFile == (HANDLE)-1;
 879         if (ret == 0) {
 880             ret = SetEndOfFile(hFile) == 0;
 881             if (ret)
 882                 errno = EACCES;
 883         }
 884         FILE_END_ALLOW_THREADS(f)
 885         if (ret)
 886             goto onioerror;
 887     }
 888 #else
 889     FILE_BEGIN_ALLOW_THREADS(f)
 890     errno = 0;
 891     ret = ftruncate(fileno(f->f_fp), newsize);
 892     FILE_END_ALLOW_THREADS(f)
 893     if (ret != 0)
 894         goto onioerror;
 895 #endif /* !MS_WINDOWS */
 896 
 897     /* Restore original file position. */
 898     FILE_BEGIN_ALLOW_THREADS(f)
 899     errno = 0;
 900     ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
 901     FILE_END_ALLOW_THREADS(f)
 902     if (ret)
 903         goto onioerror;
 904 
 905     Py_INCREF(Py_None);
 906     return Py_None;
 907 
 908 onioerror:
 909     PyErr_SetFromErrno(PyExc_IOError);
 910     clearerr(f->f_fp);
 911     return NULL;
 912 }
 913 #endif /* HAVE_FTRUNCATE */
 914 
 915 static PyObject *
 916 file_tell(PyFileObject *f)
 917 {
 918     Py_off_t pos;
 919 
 920     if (f->f_fp == NULL)
 921         return err_closed();
 922     FILE_BEGIN_ALLOW_THREADS(f)
 923     errno = 0;
 924     pos = _portable_ftell(f->f_fp);
 925     FILE_END_ALLOW_THREADS(f)
 926 
 927     if (pos == -1) {
 928         PyErr_SetFromErrno(PyExc_IOError);
 929         clearerr(f->f_fp);
 930         return NULL;
 931     }
 932     if (f->f_skipnextlf) {
 933         int c;
 934         c = GETC(f->f_fp);
 935         if (c == '\n') {
 936             f->f_newlinetypes |= NEWLINE_CRLF;
 937             pos++;
 938             f->f_skipnextlf = 0;
 939         } else if (c != EOF) ungetc(c, f->f_fp);
 940     }
 941 #if !defined(HAVE_LARGEFILE_SUPPORT)
 942     return PyInt_FromLong(pos);
 943 #else
 944     return PyLong_FromLongLong(pos);
 945 #endif
 946 }
 947 
 948 static PyObject *
 949 file_fileno(PyFileObject *f)
 950 {
 951     if (f->f_fp == NULL)
 952         return err_closed();
 953     return PyInt_FromLong((long) fileno(f->f_fp));
 954 }
 955 
 956 static PyObject *
 957 file_flush(PyFileObject *f)
 958 {
 959     int res;
 960 
 961     if (f->f_fp == NULL)
 962         return err_closed();
 963     FILE_BEGIN_ALLOW_THREADS(f)
 964     errno = 0;
 965     res = fflush(f->f_fp);
 966     FILE_END_ALLOW_THREADS(f)
 967     if (res != 0) {
 968         PyErr_SetFromErrno(PyExc_IOError);
 969         clearerr(f->f_fp);
 970         return NULL;
 971     }
 972     Py_INCREF(Py_None);
 973     return Py_None;
 974 }
 975 
 976 static PyObject *
 977 file_isatty(PyFileObject *f)
 978 {
 979     long res;
 980     if (f->f_fp == NULL)
 981         return err_closed();
 982     FILE_BEGIN_ALLOW_THREADS(f)
 983     res = isatty((int)fileno(f->f_fp));
 984     FILE_END_ALLOW_THREADS(f)
 985     return PyBool_FromLong(res);
 986 }
 987 
 988 
 989 #if BUFSIZ < 8192
 990 #define SMALLCHUNK 8192
 991 #else
 992 #define SMALLCHUNK BUFSIZ
 993 #endif
 994 
 995 static size_t
 996 new_buffersize(PyFileObject *f, size_t currentsize)
 997 {
 998 #ifdef HAVE_FSTAT
 999     off_t pos, end;
1000     struct stat st;
1001     if (fstat(fileno(f->f_fp), &st) == 0) {
1002         end = st.st_size;
1003         /* The following is not a bug: we really need to call lseek()
1004            *and* ftell().  The reason is that some stdio libraries
1005            mistakenly flush their buffer when ftell() is called and
1006            the lseek() call it makes fails, thereby throwing away
1007            data that cannot be recovered in any way.  To avoid this,
1008            we first test lseek(), and only call ftell() if lseek()
1009            works.  We can't use the lseek() value either, because we
1010            need to take the amount of buffered data into account.
1011            (Yet another reason why stdio stinks. :-) */
1012         pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
1013         if (pos >= 0) {
1014             pos = ftell(f->f_fp);
1015         }
1016         if (pos < 0)
1017             clearerr(f->f_fp);
1018         if (end > pos && pos >= 0)
1019             return currentsize + end - pos + 1;
1020         /* Add 1 so if the file were to grow we'd notice. */
1021     }
1022 #endif
1023     /* Expand the buffer by an amount proportional to the current size,
1024        giving us amortized linear-time behavior. Use a less-than-double
1025        growth factor to avoid excessive allocation. */
1026     return currentsize + (currentsize >> 3) + 6;
1027 }
1028 
1029 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
1030 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
1031 #else
1032 #ifdef EWOULDBLOCK
1033 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
1034 #else
1035 #ifdef EAGAIN
1036 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
1037 #else
1038 #define BLOCKED_ERRNO(x) 0
1039 #endif
1040 #endif
1041 #endif
1042 
1043 static PyObject *
1044 file_read(PyFileObject *f, PyObject *args)
1045 {
1046     long bytesrequested = -1;
1047     size_t bytesread, buffersize, chunksize;
1048     PyObject *v;
1049 
1050     if (f->f_fp == NULL)
1051         return err_closed();
1052     if (!f->readable)
1053         return err_mode("reading");
1054     /* refuse to mix with f.next() */
1055     if (f->f_buf != NULL &&
1056         (f->f_bufend - f->f_bufptr) > 0 &&
1057         f->f_buf[0] != '\0')
1058         return err_iterbuffered();
1059     if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
1060         return NULL;
1061     if (bytesrequested < 0)
1062         buffersize = new_buffersize(f, (size_t)0);
1063     else
1064         buffersize = bytesrequested;
1065     if (buffersize > PY_SSIZE_T_MAX) {
1066         PyErr_SetString(PyExc_OverflowError,
1067     "requested number of bytes is more than a Python string can hold");
1068         return NULL;
1069     }
1070     v = PyString_FromStringAndSize((char *)NULL, buffersize);
1071     if (v == NULL)
1072         return NULL;
1073     bytesread = 0;
1074     for (;;) {
1075         FILE_BEGIN_ALLOW_THREADS(f)
1076         errno = 0;
1077         chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
1078                   buffersize - bytesread, f->f_fp, (PyObject *)f);
1079         FILE_END_ALLOW_THREADS(f)
1080         if (chunksize == 0) {
1081             if (!ferror(f->f_fp))
1082                 break;
1083             clearerr(f->f_fp);
1084             /* When in non-blocking mode, data shouldn't
1085              * be discarded if a blocking signal was
1086              * received. That will also happen if
1087              * chunksize != 0, but bytesread < buffersize. */
1088             if (bytesread > 0 && BLOCKED_ERRNO(errno))
1089                 break;
1090             PyErr_SetFromErrno(PyExc_IOError);
1091             Py_DECREF(v);
1092             return NULL;
1093         }
1094         bytesread += chunksize;
1095         if (bytesread < buffersize) {
1096             clearerr(f->f_fp);
1097             break;
1098         }
1099         if (bytesrequested < 0) {
1100             buffersize = new_buffersize(f, buffersize);
1101             if (_PyString_Resize(&v, buffersize) < 0)
1102                 return NULL;
1103         } else {
1104             /* Got what was requested. */
1105             break;
1106         }
1107     }
1108     if (bytesread != buffersize && _PyString_Resize(&v, bytesread))
1109         return NULL;
1110     return v;
1111 }
1112 
1113 static PyObject *
1114 file_readinto(PyFileObject *f, PyObject *args)
1115 {
1116     char *ptr;
1117     Py_ssize_t ntodo;
1118     Py_ssize_t ndone, nnow;
1119     Py_buffer pbuf;
1120 
1121     if (f->f_fp == NULL)
1122         return err_closed();
1123     if (!f->readable)
1124         return err_mode("reading");
1125     /* refuse to mix with f.next() */
1126     if (f->f_buf != NULL &&
1127         (f->f_bufend - f->f_bufptr) > 0 &&
1128         f->f_buf[0] != '\0')
1129         return err_iterbuffered();
1130     if (!PyArg_ParseTuple(args, "w*", &pbuf))
1131         return NULL;
1132     ptr = pbuf.buf;
1133     ntodo = pbuf.len;
1134     ndone = 0;
1135     while (ntodo > 0) {
1136         FILE_BEGIN_ALLOW_THREADS(f)
1137         errno = 0;
1138         nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1139                                         (PyObject *)f);
1140         FILE_END_ALLOW_THREADS(f)
1141         if (nnow == 0) {
1142             if (!ferror(f->f_fp))
1143                 break;
1144             PyErr_SetFromErrno(PyExc_IOError);
1145             clearerr(f->f_fp);
1146             PyBuffer_Release(&pbuf);
1147             return NULL;
1148         }
1149         ndone += nnow;
1150         ntodo -= nnow;
1151     }
1152     PyBuffer_Release(&pbuf);
1153     return PyInt_FromSsize_t(ndone);
1154 }
1155 
1156 /**************************************************************************
1157 Routine to get next line using platform fgets().
1158 
1159 Under MSVC 6:
1160 
1161 + MS threadsafe getc is very slow (multiple layers of function calls before+
1162   after each character, to lock+unlock the stream).
1163 + The stream-locking functions are MS-internal -- can't access them from user
1164   code.
1165 + There's nothing Tim could find in the MS C or platform SDK libraries that
1166   can worm around this.
1167 + MS fgets locks/unlocks only once per line; it's the only hook we have.
1168 
1169 So we use fgets for speed(!), despite that it's painful.
1170 
1171 MS realloc is also slow.
1172 
1173 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1174 have):
1175     Linux               a wash
1176     Solaris             a wash
1177     Tru64 Unix          getline_via_fgets significantly faster
1178 
1179 CAUTION:  The C std isn't clear about this:  in those cases where fgets
1180 writes something into the buffer, can it write into any position beyond the
1181 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
1182 known on which it does; and it would be a strange way to code fgets. Still,
1183 getline_via_fgets may not work correctly if it does.  The std test
1184 test_bufio.py should fail if platform fgets() routinely writes beyond the
1185 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
1186 **************************************************************************/
1187 
1188 /* Use this routine if told to, or by default on non-get_unlocked()
1189  * platforms unless told not to.  Yikes!  Let's spell that out:
1190  * On a platform with getc_unlocked():
1191  *     By default, use getc_unlocked().
1192  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1193  * On a platform without getc_unlocked():
1194  *     By default, use fgets().
1195  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1196  */
1197 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1198 #define USE_FGETS_IN_GETLINE
1199 #endif
1200 
1201 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1202 #undef USE_FGETS_IN_GETLINE
1203 #endif
1204 
1205 #ifdef USE_FGETS_IN_GETLINE
1206 static PyObject*
1207 getline_via_fgets(PyFileObject *f, FILE *fp)
1208 {
1209 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
1210  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
1211  * to fill this much of the buffer with a known value in order to figure out
1212  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
1213  * than "most" lines, we waste time filling unused buffer slots.  100 is
1214  * surely adequate for most peoples' email archives, chewing over source code,
1215  * etc -- "regular old text files".
1216  * MAXBUFSIZE is the maximum line length that lets us get away with the less
1217  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
1218  * cautions about boosting that.  300 was chosen because the worst real-life
1219  * text-crunching job reported on Python-Dev was a mail-log crawler where over
1220  * half the lines were 254 chars.
1221  */
1222 #define INITBUFSIZE 100
1223 #define MAXBUFSIZE 300
1224     char* p;            /* temp */
1225     char buf[MAXBUFSIZE];
1226     PyObject* v;        /* the string object result */
1227     char* pvfree;       /* address of next free slot */
1228     char* pvend;    /* address one beyond last free slot */
1229     size_t nfree;       /* # of free buffer slots; pvend-pvfree */
1230     size_t total_v_size;  /* total # of slots in buffer */
1231     size_t increment;           /* amount to increment the buffer */
1232     size_t prev_v_size;
1233 
1234     /* Optimize for normal case:  avoid _PyString_Resize if at all
1235      * possible via first reading into stack buffer "buf".
1236      */
1237     total_v_size = INITBUFSIZE;         /* start small and pray */
1238     pvfree = buf;
1239     for (;;) {
1240         FILE_BEGIN_ALLOW_THREADS(f)
1241         pvend = buf + total_v_size;
1242         nfree = pvend - pvfree;
1243         memset(pvfree, '\n', nfree);
1244         assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1245         p = fgets(pvfree, (int)nfree, fp);
1246         FILE_END_ALLOW_THREADS(f)
1247 
1248         if (p == NULL) {
1249             clearerr(fp);
1250             if (PyErr_CheckSignals())
1251                 return NULL;
1252             v = PyString_FromStringAndSize(buf, pvfree - buf);
1253             return v;
1254         }
1255         /* fgets read *something* */
1256         p = memchr(pvfree, '\n', nfree);
1257         if (p != NULL) {
1258             /* Did the \n come from fgets or from us?
1259              * Since fgets stops at the first \n, and then writes
1260              * \0, if it's from fgets a \0 must be next.  But if
1261              * that's so, it could not have come from us, since
1262              * the \n's we filled the buffer with have only more
1263              * \n's to the right.
1264              */
1265             if (p+1 < pvend && *(p+1) == '\0') {
1266                 /* It's from fgets:  we win!  In particular,
1267                  * we haven't done any mallocs yet, and can
1268                  * build the final result on the first try.
1269                  */
1270                 ++p;                    /* include \n from fgets */
1271             }
1272             else {
1273                 /* Must be from us:  fgets didn't fill the
1274                  * buffer and didn't find a newline, so it
1275                  * must be the last and newline-free line of
1276                  * the file.
1277                  */
1278                 assert(p > pvfree && *(p-1) == '\0');
1279                 --p;                    /* don't include \0 from fgets */
1280             }
1281             v = PyString_FromStringAndSize(buf, p - buf);
1282             return v;
1283         }
1284         /* yuck:  fgets overwrote all the newlines, i.e. the entire
1285          * buffer.  So this line isn't over yet, or maybe it is but
1286          * we're exactly at EOF.  If we haven't already, try using the
1287          * rest of the stack buffer.
1288          */
1289         assert(*(pvend-1) == '\0');
1290         if (pvfree == buf) {
1291             pvfree = pvend - 1;                 /* overwrite trailing null */
1292             total_v_size = MAXBUFSIZE;
1293         }
1294         else
1295             break;
1296     }
1297 
1298     /* The stack buffer isn't big enough; malloc a string object and read
1299      * into its buffer.
1300      */
1301     total_v_size = MAXBUFSIZE << 1;
1302     v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1303     if (v == NULL)
1304         return v;
1305     /* copy over everything except the last null byte */
1306     memcpy(BUF(v), buf, MAXBUFSIZE-1);
1307     pvfree = BUF(v) + MAXBUFSIZE - 1;
1308 
1309     /* Keep reading stuff into v; if it ever ends successfully, break
1310      * after setting p one beyond the end of the line.  The code here is
1311      * very much like the code above, except reads into v's buffer; see
1312      * the code above for detailed comments about the logic.
1313      */
1314     for (;;) {
1315         FILE_BEGIN_ALLOW_THREADS(f)
1316         pvend = BUF(v) + total_v_size;
1317         nfree = pvend - pvfree;
1318         memset(pvfree, '\n', nfree);
1319         assert(nfree < INT_MAX);
1320         p = fgets(pvfree, (int)nfree, fp);
1321         FILE_END_ALLOW_THREADS(f)
1322 
1323         if (p == NULL) {
1324             clearerr(fp);
1325             if (PyErr_CheckSignals()) {
1326                 Py_DECREF(v);
1327                 return NULL;
1328             }
1329             p = pvfree;
1330             break;
1331         }
1332         p = memchr(pvfree, '\n', nfree);
1333         if (p != NULL) {
1334             if (p+1 < pvend && *(p+1) == '\0') {
1335                 /* \n came from fgets */
1336                 ++p;
1337                 break;
1338             }
1339             /* \n came from us; last line of file, no newline */
1340             assert(p > pvfree && *(p-1) == '\0');
1341             --p;
1342             break;
1343         }
1344         /* expand buffer and try again */
1345         assert(*(pvend-1) == '\0');
1346         increment = total_v_size >> 2;          /* mild exponential growth */
1347         prev_v_size = total_v_size;
1348         total_v_size += increment;
1349         /* check for overflow */
1350         if (total_v_size <= prev_v_size ||
1351             total_v_size > PY_SSIZE_T_MAX) {
1352             PyErr_SetString(PyExc_OverflowError,
1353                 "line is longer than a Python string can hold");
1354             Py_DECREF(v);
1355             return NULL;
1356         }
1357         if (_PyString_Resize(&v, (int)total_v_size) < 0)
1358             return NULL;
1359         /* overwrite the trailing null byte */
1360         pvfree = BUF(v) + (prev_v_size - 1);
1361     }
1362     if (BUF(v) + total_v_size != p && _PyString_Resize(&v, p - BUF(v)))
1363         return NULL;
1364     return v;
1365 #undef INITBUFSIZE
1366 #undef MAXBUFSIZE
1367 }
1368 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1369 
1370 /* Internal routine to get a line.
1371    Size argument interpretation:
1372    > 0: max length;
1373    <= 0: read arbitrary line
1374 */
1375 
1376 static PyObject *
1377 get_line(PyFileObject *f, int n)
1378 {
1379     FILE *fp = f->f_fp;
1380     int c;
1381     char *buf, *end;
1382     size_t total_v_size;        /* total # of slots in buffer */
1383     size_t used_v_size;         /* # used slots in buffer */
1384     size_t increment;       /* amount to increment the buffer */
1385     PyObject *v;
1386     int newlinetypes = f->f_newlinetypes;
1387     int skipnextlf = f->f_skipnextlf;
1388     int univ_newline = f->f_univ_newline;
1389 
1390 #if defined(USE_FGETS_IN_GETLINE)
1391     if (n <= 0 && !univ_newline )
1392         return getline_via_fgets(f, fp);
1393 #endif
1394     total_v_size = n > 0 ? n : 100;
1395     v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1396     if (v == NULL)
1397         return NULL;
1398     buf = BUF(v);
1399     end = buf + total_v_size;
1400 
1401     for (;;) {
1402         FILE_BEGIN_ALLOW_THREADS(f)
1403         FLOCKFILE(fp);
1404         if (univ_newline) {
1405             c = 'x'; /* Shut up gcc warning */
1406             while ( buf != end && (c = GETC(fp)) != EOF ) {
1407                 if (skipnextlf ) {
1408                     skipnextlf = 0;
1409                     if (c == '\n') {
1410                         /* Seeing a \n here with
1411                          * skipnextlf true means we
1412                          * saw a \r before.
1413                          */
1414                         newlinetypes |= NEWLINE_CRLF;
1415                         c = GETC(fp);
1416                         if (c == EOF) break;
1417                     } else {
1418                         newlinetypes |= NEWLINE_CR;
1419                     }
1420                 }
1421                 if (c == '\r') {
1422                     skipnextlf = 1;
1423                     c = '\n';
1424                 } else if ( c == '\n')
1425                     newlinetypes |= NEWLINE_LF;
1426                 *buf++ = c;
1427                 if (c == '\n') break;
1428             }
1429             if ( c == EOF && skipnextlf )
1430                 newlinetypes |= NEWLINE_CR;
1431         } else /* If not universal newlines use the normal loop */
1432         while ((c = GETC(fp)) != EOF &&
1433                (*buf++ = c) != '\n' &&
1434             buf != end)
1435             ;
1436         FUNLOCKFILE(fp);
1437         FILE_END_ALLOW_THREADS(f)
1438         f->f_newlinetypes = newlinetypes;
1439         f->f_skipnextlf = skipnextlf;
1440         if (c == '\n')
1441             break;
1442         if (c == EOF) {
1443             if (ferror(fp)) {
1444                 PyErr_SetFromErrno(PyExc_IOError);
1445                 clearerr(fp);
1446                 Py_DECREF(v);
1447                 return NULL;
1448             }
1449             clearerr(fp);
1450             if (PyErr_CheckSignals()) {
1451                 Py_DECREF(v);
1452                 return NULL;
1453             }
1454             break;
1455         }
1456         /* Must be because buf == end */
1457         if (n > 0)
1458             break;
1459         used_v_size = total_v_size;
1460         increment = total_v_size >> 2; /* mild exponential growth */
1461         total_v_size += increment;
1462         if (total_v_size > PY_SSIZE_T_MAX) {
1463             PyErr_SetString(PyExc_OverflowError,
1464                 "line is longer than a Python string can hold");
1465             Py_DECREF(v);
1466             return NULL;
1467         }
1468         if (_PyString_Resize(&v, total_v_size) < 0)
1469             return NULL;
1470         buf = BUF(v) + used_v_size;
1471         end = BUF(v) + total_v_size;
1472     }
1473 
1474     used_v_size = buf - BUF(v);
1475     if (used_v_size != total_v_size && _PyString_Resize(&v, used_v_size))
1476         return NULL;
1477     return v;
1478 }
1479 
1480 /* External C interface */
1481 
1482 PyObject *
1483 PyFile_GetLine(PyObject *f, int n)
1484 {
1485     PyObject *result;
1486 
1487     if (f == NULL) {
1488         PyErr_BadInternalCall();
1489         return NULL;
1490     }
1491 
1492     if (PyFile_Check(f)) {
1493         PyFileObject *fo = (PyFileObject *)f;
1494         if (fo->f_fp == NULL)
1495             return err_closed();
1496         if (!fo->readable)
1497             return err_mode("reading");
1498         /* refuse to mix with f.next() */
1499         if (fo->f_buf != NULL &&
1500             (fo->f_bufend - fo->f_bufptr) > 0 &&
1501             fo->f_buf[0] != '\0')
1502             return err_iterbuffered();
1503         result = get_line(fo, n);
1504     }
1505     else {
1506         PyObject *reader;
1507         PyObject *args;
1508 
1509         reader = PyObject_GetAttrString(f, "readline");
1510         if (reader == NULL)
1511             return NULL;
1512         if (n <= 0)
1513             args = PyTuple_New(0);
1514         else
1515             args = Py_BuildValue("(i)", n);
1516         if (args == NULL) {
1517             Py_DECREF(reader);
1518             return NULL;
1519         }
1520         result = PyEval_CallObject(reader, args);
1521         Py_DECREF(reader);
1522         Py_DECREF(args);
1523         if (result != NULL && !PyString_Check(result) &&
1524             !PyUnicode_Check(result)) {
1525             Py_DECREF(result);
1526             result = NULL;
1527             PyErr_SetString(PyExc_TypeError,
1528                        "object.readline() returned non-string");
1529         }
1530     }
1531 
1532     if (n < 0 && result != NULL && PyString_Check(result)) {
1533         char *s = PyString_AS_STRING(result);
1534         Py_ssize_t len = PyString_GET_SIZE(result);
1535         if (len == 0) {
1536             Py_DECREF(result);
1537             result = NULL;
1538             PyErr_SetString(PyExc_EOFError,
1539                             "EOF when reading a line");
1540         }
1541         else if (s[len-1] == '\n') {
1542             if (result->ob_refcnt == 1) {
1543                 if (_PyString_Resize(&result, len-1))
1544                     return NULL;
1545             }
1546             else {
1547                 PyObject *v;
1548                 v = PyString_FromStringAndSize(s, len-1);
1549                 Py_DECREF(result);
1550                 result = v;
1551             }
1552         }
1553     }
1554 #ifdef Py_USING_UNICODE
1555     if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1556         Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1557         Py_ssize_t len = PyUnicode_GET_SIZE(result);
1558         if (len == 0) {
1559             Py_DECREF(result);
1560             result = NULL;
1561             PyErr_SetString(PyExc_EOFError,
1562                             "EOF when reading a line");
1563         }
1564         else if (s[len-1] == '\n') {
1565             if (result->ob_refcnt == 1)
1566                 PyUnicode_Resize(&result, len-1);
1567             else {
1568                 PyObject *v;
1569                 v = PyUnicode_FromUnicode(s, len-1);
1570                 Py_DECREF(result);
1571                 result = v;
1572             }
1573         }
1574     }
1575 #endif
1576     return result;
1577 }
1578 
1579 /* Python method */
1580 
1581 static PyObject *
1582 file_readline(PyFileObject *f, PyObject *args)
1583 {
1584     int n = -1;
1585 
1586     if (f->f_fp == NULL)
1587         return err_closed();
1588     if (!f->readable)
1589         return err_mode("reading");
1590     /* refuse to mix with f.next() */
1591     if (f->f_buf != NULL &&
1592         (f->f_bufend - f->f_bufptr) > 0 &&
1593         f->f_buf[0] != '\0')
1594         return err_iterbuffered();
1595     if (!PyArg_ParseTuple(args, "|i:readline", &n))
1596         return NULL;
1597     if (n == 0)
1598         return PyString_FromString("");
1599     if (n < 0)
1600         n = 0;
1601     return get_line(f, n);
1602 }
1603 
1604 static PyObject *
1605 file_readlines(PyFileObject *f, PyObject *args)
1606 {
1607     long sizehint = 0;
1608     PyObject *list = NULL;
1609     PyObject *line;
1610     char small_buffer[SMALLCHUNK];
1611     char *buffer = small_buffer;
1612     size_t buffersize = SMALLCHUNK;
1613     PyObject *big_buffer = NULL;
1614     size_t nfilled = 0;
1615     size_t nread;
1616     size_t totalread = 0;
1617     char *p, *q, *end;
1618     int err;
1619     int shortread = 0;
1620 
1621     if (f->f_fp == NULL)
1622         return err_closed();
1623     if (!f->readable)
1624         return err_mode("reading");
1625     /* refuse to mix with f.next() */
1626     if (f->f_buf != NULL &&
1627         (f->f_bufend - f->f_bufptr) > 0 &&
1628         f->f_buf[0] != '\0')
1629         return err_iterbuffered();
1630     if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1631         return NULL;
1632     if ((list = PyList_New(0)) == NULL)
1633         return NULL;
1634     for (;;) {
1635         if (shortread)
1636             nread = 0;
1637         else {
1638             FILE_BEGIN_ALLOW_THREADS(f)
1639             errno = 0;
1640             nread = Py_UniversalNewlineFread(buffer+nfilled,
1641                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1642             FILE_END_ALLOW_THREADS(f)
1643             shortread = (nread < buffersize-nfilled);
1644         }
1645         if (nread == 0) {
1646             sizehint = 0;
1647             if (!ferror(f->f_fp))
1648                 break;
1649             PyErr_SetFromErrno(PyExc_IOError);
1650             clearerr(f->f_fp);
1651             goto error;
1652         }
1653         totalread += nread;
1654         p = (char *)memchr(buffer+nfilled, '\n', nread);
1655         if (p == NULL) {
1656             /* Need a larger buffer to fit this line */
1657             nfilled += nread;
1658             buffersize *= 2;
1659             if (buffersize > PY_SSIZE_T_MAX) {
1660                 PyErr_SetString(PyExc_OverflowError,
1661                 "line is longer than a Python string can hold");
1662                 goto error;
1663             }
1664             if (big_buffer == NULL) {
1665                 /* Create the big buffer */
1666                 big_buffer = PyString_FromStringAndSize(
1667                     NULL, buffersize);
1668                 if (big_buffer == NULL)
1669                     goto error;
1670                 buffer = PyString_AS_STRING(big_buffer);
1671                 memcpy(buffer, small_buffer, nfilled);
1672             }
1673             else {
1674                 /* Grow the big buffer */
1675                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1676                     goto error;
1677                 buffer = PyString_AS_STRING(big_buffer);
1678             }
1679             continue;
1680         }
1681         end = buffer+nfilled+nread;
1682         q = buffer;
1683         do {
1684             /* Process complete lines */
1685             p++;
1686             line = PyString_FromStringAndSize(q, p-q);
1687             if (line == NULL)
1688                 goto error;
1689             err = PyList_Append(list, line);
1690             Py_DECREF(line);
1691             if (err != 0)
1692                 goto error;
1693             q = p;
1694             p = (char *)memchr(q, '\n', end-q);
1695         } while (p != NULL);
1696         /* Move the remaining incomplete line to the start */
1697         nfilled = end-q;
1698         memmove(buffer, q, nfilled);
1699         if (sizehint > 0)
1700             if (totalread >= (size_t)sizehint)
1701                 break;
1702     }
1703     if (nfilled != 0) {
1704         /* Partial last line */
1705         line = PyString_FromStringAndSize(buffer, nfilled);
1706         if (line == NULL)
1707             goto error;
1708         if (sizehint > 0) {
1709             /* Need to complete the last line */
1710             PyObject *rest = get_line(f, 0);
1711             if (rest == NULL) {
1712                 Py_DECREF(line);
1713                 goto error;
1714             }
1715             PyString_Concat(&line, rest);
1716             Py_DECREF(rest);
1717             if (line == NULL)
1718                 goto error;
1719         }
1720         err = PyList_Append(list, line);
1721         Py_DECREF(line);
1722         if (err != 0)
1723             goto error;
1724     }
1725 
1726 cleanup:
1727     Py_XDECREF(big_buffer);
1728     return list;
1729 
1730 error:
1731     Py_CLEAR(list);
1732     goto cleanup;
1733 }
1734 
1735 static PyObject *
1736 file_write(PyFileObject *f, PyObject *args)
1737 {
1738     Py_buffer pbuf;
1739     const char *s;
1740     Py_ssize_t n, n2;
1741     PyObject *encoded = NULL;
1742 
1743     if (f->f_fp == NULL)
1744         return err_closed();
1745     if (!f->writable)
1746         return err_mode("writing");
1747     if (f->f_binary) {
1748         if (!PyArg_ParseTuple(args, "s*", &pbuf))
1749             return NULL;
1750         s = pbuf.buf;
1751         n = pbuf.len;
1752     }
1753     else {
1754         const char *encoding, *errors;
1755         PyObject *text;
1756         if (!PyArg_ParseTuple(args, "O", &text))
1757             return NULL;
1758 
1759         if (PyString_Check(text)) {
1760             s = PyString_AS_STRING(text);
1761             n = PyString_GET_SIZE(text);
1762         } else if (PyUnicode_Check(text)) {
1763             if (f->f_encoding != Py_None)
1764                 encoding = PyString_AS_STRING(f->f_encoding);
1765             else
1766                 encoding = PyUnicode_GetDefaultEncoding();
1767             if (f->f_errors != Py_None)
1768                 errors = PyString_AS_STRING(f->f_errors);
1769             else
1770                 errors = "strict";
1771             encoded = PyUnicode_AsEncodedString(text, encoding, errors);
1772             if (encoded == NULL)
1773                 return NULL;
1774             s = PyString_AS_STRING(encoded);
1775             n = PyString_GET_SIZE(encoded);
1776         } else {
1777             if (PyObject_AsCharBuffer(text, &s, &n))
1778                 return NULL;
1779         }
1780     }
1781     f->f_softspace = 0;
1782     FILE_BEGIN_ALLOW_THREADS(f)
1783     errno = 0;
1784     n2 = fwrite(s, 1, n, f->f_fp);
1785     FILE_END_ALLOW_THREADS(f)
1786     Py_XDECREF(encoded);
1787     if (f->f_binary)
1788         PyBuffer_Release(&pbuf);
1789     if (n2 != n) {
1790         PyErr_SetFromErrno(PyExc_IOError);
1791         clearerr(f->f_fp);
1792         return NULL;
1793     }
1794     Py_INCREF(Py_None);
1795     return Py_None;
1796 }
1797 
1798 static PyObject *
1799 file_writelines(PyFileObject *f, PyObject *seq)
1800 {
1801 #define CHUNKSIZE 1000
1802     PyObject *list, *line;
1803     PyObject *it;       /* iter(seq) */
1804     PyObject *result;
1805     int index, islist;
1806     Py_ssize_t i, j, nwritten, len;
1807 
1808     assert(seq != NULL);
1809     if (f->f_fp == NULL)
1810         return err_closed();
1811     if (!f->writable)
1812         return err_mode("writing");
1813 
1814     result = NULL;
1815     list = NULL;
1816     islist = PyList_Check(seq);
1817     if  (islist)
1818         it = NULL;
1819     else {
1820         it = PyObject_GetIter(seq);
1821         if (it == NULL) {
1822             PyErr_SetString(PyExc_TypeError,
1823                 "writelines() requires an iterable argument");
1824             return NULL;
1825         }
1826         /* From here on, fail by going to error, to reclaim "it". */
1827         list = PyList_New(CHUNKSIZE);
1828         if (list == NULL)
1829             goto error;
1830     }
1831 
1832     /* Strategy: slurp CHUNKSIZE lines into a private list,
1833        checking that they are all strings, then write that list
1834        without holding the interpreter lock, then come back for more. */
1835     for (index = 0; ; index += CHUNKSIZE) {
1836         if (islist) {
1837             Py_XDECREF(list);
1838             list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1839             if (list == NULL)
1840                 goto error;
1841             j = PyList_GET_SIZE(list);
1842         }
1843         else {
1844             for (j = 0; j < CHUNKSIZE; j++) {
1845                 line = PyIter_Next(it);
1846                 if (line == NULL) {
1847                     if (PyErr_Occurred())
1848                         goto error;
1849                     break;
1850                 }
1851                 PyList_SetItem(list, j, line);
1852             }
1853             /* The iterator might have closed the file on us. */
1854             if (f->f_fp == NULL) {
1855                 err_closed();
1856                 goto error;
1857             }
1858         }
1859         if (j == 0)
1860             break;
1861 
1862         /* Check that all entries are indeed strings. If not,
1863            apply the same rules as for file.write() and
1864            convert the results to strings. This is slow, but
1865            seems to be the only way since all conversion APIs
1866            could potentially execute Python code. */
1867         for (i = 0; i < j; i++) {
1868             PyObject *v = PyList_GET_ITEM(list, i);
1869             if (!PyString_Check(v)) {
1870                 const char *buffer;
1871                 if (((f->f_binary &&
1872                       PyObject_AsReadBuffer(v,
1873                           (const void**)&buffer,
1874                                         &len)) ||
1875                      PyObject_AsCharBuffer(v,
1876                                            &buffer,
1877                                            &len))) {
1878                     PyErr_SetString(PyExc_TypeError,
1879             "writelines() argument must be a sequence of strings");
1880                             goto error;
1881                 }
1882                 line = PyString_FromStringAndSize(buffer,
1883                                                   len);
1884                 if (line == NULL)
1885                     goto error;
1886                 Py_DECREF(v);
1887                 PyList_SET_ITEM(list, i, line);
1888             }
1889         }
1890 
1891         /* Since we are releasing the global lock, the
1892            following code may *not* execute Python code. */
1893         f->f_softspace = 0;
1894         FILE_BEGIN_ALLOW_THREADS(f)
1895         errno = 0;
1896         for (i = 0; i < j; i++) {
1897             line = PyList_GET_ITEM(list, i);
1898             len = PyString_GET_SIZE(line);
1899             nwritten = fwrite(PyString_AS_STRING(line),
1900                               1, len, f->f_fp);
1901             if (nwritten != len) {
1902                 FILE_ABORT_ALLOW_THREADS(f)
1903                 PyErr_SetFromErrno(PyExc_IOError);
1904                 clearerr(f->f_fp);
1905                 goto error;
1906             }
1907         }
1908         FILE_END_ALLOW_THREADS(f)
1909 
1910         if (j < CHUNKSIZE)
1911             break;
1912     }
1913 
1914     Py_INCREF(Py_None);
1915     result = Py_None;
1916   error:
1917     Py_XDECREF(list);
1918     Py_XDECREF(it);
1919     return result;
1920 #undef CHUNKSIZE
1921 }
1922 
1923 static PyObject *
1924 file_self(PyFileObject *f)
1925 {
1926     if (f->f_fp == NULL)
1927         return err_closed();
1928     Py_INCREF(f);
1929     return (PyObject *)f;
1930 }
1931 
1932 static PyObject *
1933 file_xreadlines(PyFileObject *f)
1934 {
1935     if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
1936                        "try 'for line in f' instead", 1) < 0)
1937            return NULL;
1938     return file_self(f);
1939 }
1940 
1941 static PyObject *
1942 file_exit(PyObject *f, PyObject *args)
1943 {
1944     PyObject *ret = PyObject_CallMethod(f, "close", NULL);
1945     if (!ret)
1946         /* If error occurred, pass through */
1947         return NULL;
1948     Py_DECREF(ret);
1949     /* We cannot return the result of close since a true
1950      * value will be interpreted as "yes, swallow the
1951      * exception if one was raised inside the with block". */
1952     Py_RETURN_NONE;
1953 }
1954 
1955 PyDoc_STRVAR(readline_doc,
1956 "readline([size]) -> next line from the file, as a string.\n"
1957 "\n"
1958 "Retain newline.  A non-negative size argument limits the maximum\n"
1959 "number of bytes to return (an incomplete line may be returned then).\n"
1960 "Return an empty string at EOF.");
1961 
1962 PyDoc_STRVAR(read_doc,
1963 "read([size]) -> read at most size bytes, returned as a string.\n"
1964 "\n"
1965 "If the size argument is negative or omitted, read until EOF is reached.\n"
1966 "Notice that when in non-blocking mode, less data than what was requested\n"
1967 "may be returned, even if no size parameter was given.");
1968 
1969 PyDoc_STRVAR(write_doc,
1970 "write(str) -> None.  Write string str to file.\n"
1971 "\n"
1972 "Note that due to buffering, flush() or close() may be needed before\n"
1973 "the file on disk reflects the data written.");
1974 
1975 PyDoc_STRVAR(fileno_doc,
1976 "fileno() -> integer \"file descriptor\".\n"
1977 "\n"
1978 "This is needed for lower-level file interfaces, such os.read().");
1979 
1980 PyDoc_STRVAR(seek_doc,
1981 "seek(offset[, whence]) -> None.  Move to new file position.\n"
1982 "\n"
1983 "Argument offset is a byte count.  Optional argument whence defaults to\n"
1984 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1985 "(move relative to current position, positive or negative), and 2 (move\n"
1986 "relative to end of file, usually negative, although many platforms allow\n"
1987 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
1988 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
1989 "undefined behavior."
1990 "\n"
1991 "Note that not all file objects are seekable.");
1992 
1993 #ifdef HAVE_FTRUNCATE
1994 PyDoc_STRVAR(truncate_doc,
1995 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
1996 "\n"
1997 "Size defaults to the current file position, as returned by tell().");
1998 #endif
1999 
2000 PyDoc_STRVAR(tell_doc,
2001 "tell() -> current file position, an integer (may be a long integer).");
2002 
2003 PyDoc_STRVAR(readinto_doc,
2004 "readinto() -> Undocumented.  Don't use this; it may go away.");
2005 
2006 PyDoc_STRVAR(readlines_doc,
2007 "readlines([size]) -> list of strings, each a line from the file.\n"
2008 "\n"
2009 "Call readline() repeatedly and return a list of the lines so read.\n"
2010 "The optional size argument, if given, is an approximate bound on the\n"
2011 "total number of bytes in the lines returned.");
2012 
2013 PyDoc_STRVAR(xreadlines_doc,
2014 "xreadlines() -> returns self.\n"
2015 "\n"
2016 "For backward compatibility. File objects now include the performance\n"
2017 "optimizations previously implemented in the xreadlines module.");
2018 
2019 PyDoc_STRVAR(writelines_doc,
2020 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
2021 "\n"
2022 "Note that newlines are not added.  The sequence can be any iterable object\n"
2023 "producing strings. This is equivalent to calling write() for each string.");
2024 
2025 PyDoc_STRVAR(flush_doc,
2026 "flush() -> None.  Flush the internal I/O buffer.");
2027 
2028 PyDoc_STRVAR(close_doc,
2029 "close() -> None or (perhaps) an integer.  Close the file.\n"
2030 "\n"
2031 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
2032 "further I/O operations.  close() may be called more than once without\n"
2033 "error.  Some kinds of file objects (for example, opened by popen())\n"
2034 "may return an exit status upon closing.");
2035 
2036 PyDoc_STRVAR(isatty_doc,
2037 "isatty() -> true or false.  True if the file is connected to a tty device.");
2038 
2039 PyDoc_STRVAR(enter_doc,
2040              "__enter__() -> self.");
2041 
2042 PyDoc_STRVAR(exit_doc,
2043              "__exit__(*excinfo) -> None.  Closes the file.");
2044 
2045 static PyMethodDef file_methods[] = {
2046     {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
2047     {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
2048     {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
2049     {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
2050     {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
2051 #ifdef HAVE_FTRUNCATE
2052     {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
2053 #endif
2054     {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
2055     {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
2056     {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
2057     {"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
2058     {"writelines",(PyCFunction)file_writelines, METH_O,     writelines_doc},
2059     {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
2060     {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
2061     {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
2062     {"__enter__", (PyCFunction)file_self,     METH_NOARGS,  enter_doc},
2063     {"__exit__",  (PyCFunction)file_exit,     METH_VARARGS, exit_doc},
2064     {NULL,            NULL}             /* sentinel */
2065 };
2066 
2067 #define OFF(x) offsetof(PyFileObject, x)
2068 
2069 static PyMemberDef file_memberlist[] = {
2070     {"mode",            T_OBJECT,       OFF(f_mode),    RO,
2071      "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
2072     {"name",            T_OBJECT,       OFF(f_name),    RO,
2073      "file name"},
2074     {"encoding",        T_OBJECT,       OFF(f_encoding),        RO,
2075      "file encoding"},
2076     {"errors",          T_OBJECT,       OFF(f_errors),  RO,
2077      "Unicode error handler"},
2078     /* getattr(f, "closed") is implemented without this table */
2079     {NULL}      /* Sentinel */
2080 };
2081 
2082 static PyObject *
2083 get_closed(PyFileObject *f, void *closure)
2084 {
2085     return PyBool_FromLong((long)(f->f_fp == 0));
2086 }
2087 static PyObject *
2088 get_newlines(PyFileObject *f, void *closure)
2089 {
2090     switch (f->f_newlinetypes) {
2091     case NEWLINE_UNKNOWN:
2092         Py_INCREF(Py_None);
2093         return Py_None;
2094     case NEWLINE_CR:
2095         return PyString_FromString("\r");
2096     case NEWLINE_LF:
2097         return PyString_FromString("\n");
2098     case NEWLINE_CR|NEWLINE_LF:
2099         return Py_BuildValue("(ss)", "\r", "\n");
2100     case NEWLINE_CRLF:
2101         return PyString_FromString("\r\n");
2102     case NEWLINE_CR|NEWLINE_CRLF:
2103         return Py_BuildValue("(ss)", "\r", "\r\n");
2104     case NEWLINE_LF|NEWLINE_CRLF:
2105         return Py_BuildValue("(ss)", "\n", "\r\n");
2106     case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
2107         return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
2108     default:
2109         PyErr_Format(PyExc_SystemError,
2110                      "Unknown newlines value 0x%x\n",
2111                      f->f_newlinetypes);
2112         return NULL;
2113     }
2114 }
2115 
2116 static PyObject *
2117 get_softspace(PyFileObject *f, void *closure)
2118 {
2119     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2120         return NULL;
2121     return PyInt_FromLong(f->f_softspace);
2122 }
2123 
2124 static int
2125 set_softspace(PyFileObject *f, PyObject *value)
2126 {
2127     int new;
2128     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2129         return -1;
2130 
2131     if (value == NULL) {
2132         PyErr_SetString(PyExc_TypeError,
2133                         "can't delete softspace attribute");
2134         return -1;
2135     }
2136 
2137     new = PyInt_AsLong(value);
2138     if (new == -1 && PyErr_Occurred())
2139         return -1;
2140     f->f_softspace = new;
2141     return 0;
2142 }
2143 
2144 static PyGetSetDef file_getsetlist[] = {
2145     {"closed", (getter)get_closed, NULL, "True if the file is closed"},
2146     {"newlines", (getter)get_newlines, NULL,
2147      "end-of-line convention used in this file"},
2148     {"softspace", (getter)get_softspace, (setter)set_softspace,
2149      "flag indicating that a space needs to be printed; used by print"},
2150     {0},
2151 };
2152 
2153 static void
2154 drop_readahead(PyFileObject *f)
2155 {
2156     if (f->f_buf != NULL) {
2157         PyMem_Free(f->f_buf);
2158         f->f_buf = NULL;
2159     }
2160 }
2161 
2162 /* Make sure that file has a readahead buffer with at least one byte
2163    (unless at EOF) and no more than bufsize.  Returns negative value on
2164    error, will set MemoryError if bufsize bytes cannot be allocated. */
2165 static int
2166 readahead(PyFileObject *f, int bufsize)
2167 {
2168     Py_ssize_t chunksize;
2169 
2170     if (f->f_buf != NULL) {
2171         if( (f->f_bufend - f->f_bufptr) >= 1)
2172             return 0;
2173         else
2174             drop_readahead(f);
2175     }
2176     if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
2177         PyErr_NoMemory();
2178         return -1;
2179     }
2180     FILE_BEGIN_ALLOW_THREADS(f)
2181     errno = 0;
2182     chunksize = Py_UniversalNewlineFread(
2183         f->f_buf, bufsize, f->f_fp, (PyObject *)f);
2184     FILE_END_ALLOW_THREADS(f)
2185     if (chunksize == 0) {
2186         if (ferror(f->f_fp)) {
2187             PyErr_SetFromErrno(PyExc_IOError);
2188             clearerr(f->f_fp);
2189             drop_readahead(f);
2190             return -1;
2191         }
2192     }
2193     f->f_bufptr = f->f_buf;
2194     f->f_bufend = f->f_buf + chunksize;
2195     return 0;
2196 }
2197 
2198 /* Used by file_iternext.  The returned string will start with 'skip'
2199    uninitialized bytes followed by the remainder of the line. Don't be
2200    horrified by the recursive call: maximum recursion depth is limited by
2201    logarithmic buffer growth to about 50 even when reading a 1gb line. */
2202 
2203 static PyStringObject *
2204 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
2205 {
2206     PyStringObject* s;
2207     char *bufptr;
2208     char *buf;
2209     Py_ssize_t len;
2210 
2211     if (f->f_buf == NULL)
2212         if (readahead(f, bufsize) < 0)
2213             return NULL;
2214 
2215     len = f->f_bufend - f->f_bufptr;
2216     if (len == 0)
2217         return (PyStringObject *)
2218             PyString_FromStringAndSize(NULL, skip);
2219     bufptr = (char *)memchr(f->f_bufptr, '\n', len);
2220     if (bufptr != NULL) {
2221         bufptr++;                               /* Count the '\n' */
2222         len = bufptr - f->f_bufptr;
2223         s = (PyStringObject *)
2224             PyString_FromStringAndSize(NULL, skip+len);
2225         if (s == NULL)
2226             return NULL;
2227         memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
2228         f->f_bufptr = bufptr;
2229         if (bufptr == f->f_bufend)
2230             drop_readahead(f);
2231     } else {
2232         bufptr = f->f_bufptr;
2233         buf = f->f_buf;
2234         f->f_buf = NULL;                /* Force new readahead buffer */
2235         assert(skip+len < INT_MAX);
2236         s = readahead_get_line_skip(
2237             f, (int)(skip+len), bufsize + (bufsize>>2) );
2238         if (s == NULL) {
2239             PyMem_Free(buf);
2240             return NULL;
2241         }
2242         memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
2243         PyMem_Free(buf);
2244     }
2245     return s;
2246 }
2247 
2248 /* A larger buffer size may actually decrease performance. */
2249 #define READAHEAD_BUFSIZE 8192
2250 
2251 static PyObject *
2252 file_iternext(PyFileObject *f)
2253 {
2254     PyStringObject* l;
2255 
2256     if (f->f_fp == NULL)
2257         return err_closed();
2258     if (!f->readable)
2259         return err_mode("reading");
2260 
2261     l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
2262     if (l == NULL || PyString_GET_SIZE(l) == 0) {
2263         Py_XDECREF(l);
2264         return NULL;
2265     }
2266     return (PyObject *)l;
2267 }
2268 
2269 
2270 static PyObject *
2271 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2272 {
2273     PyObject *self;
2274     static PyObject *not_yet_string;
2275 
2276     assert(type != NULL && type->tp_alloc != NULL);
2277 
2278     if (not_yet_string == NULL) {
2279         not_yet_string = PyString_InternFromString("<uninitialized file>");
2280         if (not_yet_string == NULL)
2281             return NULL;
2282     }
2283 
2284     self = type->tp_alloc(type, 0);
2285     if (self != NULL) {
2286         /* Always fill in the name and mode, so that nobody else
2287            needs to special-case NULLs there. */
2288         Py_INCREF(not_yet_string);
2289         ((PyFileObject *)self)->f_name = not_yet_string;
2290         Py_INCREF(not_yet_string);
2291         ((PyFileObject *)self)->f_mode = not_yet_string;
2292         Py_INCREF(Py_None);
2293         ((PyFileObject *)self)->f_encoding = Py_None;
2294         Py_INCREF(Py_None);
2295         ((PyFileObject *)self)->f_errors = Py_None;
2296         ((PyFileObject *)self)->weakreflist = NULL;
2297         ((PyFileObject *)self)->unlocked_count = 0;
2298     }
2299     return self;
2300 }
2301 
2302 static int
2303 file_init(PyObject *self, PyObject *args, PyObject *kwds)
2304 {
2305     PyFileObject *foself = (PyFileObject *)self;
2306     int ret = 0;
2307     static char *kwlist[] = {"name", "mode", "buffering", 0};
2308     char *name = NULL;
2309     char *mode = "r";
2310     int bufsize = -1;
2311     int wideargument = 0;
2312 #ifdef MS_WINDOWS
2313     PyObject *po;
2314 #endif
2315 
2316     assert(PyFile_Check(self));
2317     if (foself->f_fp != NULL) {
2318         /* Have to close the existing file first. */
2319         PyObject *closeresult = file_close(foself);
2320         if (closeresult == NULL)
2321             return -1;
2322         Py_DECREF(closeresult);
2323     }
2324 
2325 #ifdef MS_WINDOWS
2326     if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2327                                     kwlist, &po, &mode, &bufsize)) {
2328         wideargument = 1;
2329         if (fill_file_fields(foself, NULL, po, mode,
2330                              fclose) == NULL)
2331             goto Error;
2332     } else {
2333         /* Drop the argument parsing error as narrow
2334            strings are also valid. */
2335         PyErr_Clear();
2336     }
2337 #endif
2338 
2339     if (!wideargument) {
2340         PyObject *o_name;
2341 
2342         if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2343                                          Py_FileSystemDefaultEncoding,
2344                                          &name,
2345                                          &mode, &bufsize))
2346             return -1;
2347 
2348         /* We parse again to get the name as a PyObject */
2349         if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2350                                          kwlist, &o_name, &mode,
2351                                          &bufsize))
2352             goto Error;
2353 
2354         if (fill_file_fields(foself, NULL, o_name, mode,
2355                              fclose) == NULL)
2356             goto Error;
2357     }
2358     if (open_the_file(foself, name, mode) == NULL)
2359         goto Error;
2360     foself->f_setbuf = NULL;
2361     PyFile_SetBufSize(self, bufsize);
2362     goto Done;
2363 
2364 Error:
2365     ret = -1;
2366     /* fall through */
2367 Done:
2368     PyMem_Free(name); /* free the encoded string */
2369     return ret;
2370 }
2371 
2372 PyDoc_VAR(file_doc) =
2373 PyDoc_STR(
2374 "file(name[, mode[, buffering]]) -> file object\n"
2375 "\n"
2376 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
2377 "writing or appending.  The file will be created if it doesn't exist\n"
2378 "when opened for writing or appending; it will be truncated when\n"
2379 "opened for writing.  Add a 'b' to the mode for binary files.\n"
2380 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2381 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2382 "buffered, and larger numbers specify the buffer size.  The preferred way\n"
2383 "to open a file is with the builtin open() function.\n"
2384 )
2385 PyDoc_STR(
2386 "Add a 'U' to mode to open the file for input with universal newline\n"
2387 "support.  Any line ending in the input file will be seen as a '\\n'\n"
2388 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
2389 "the value for this attribute is one of None (no newline read yet),\n"
2390 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2391 "\n"
2392 "'U' cannot be combined with 'w' or '+' mode.\n"
2393 );
2394 
2395 PyTypeObject PyFile_Type = {
2396     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2397     "file",
2398     sizeof(PyFileObject),
2399     0,
2400     (destructor)file_dealloc,                   /* tp_dealloc */
2401     0,                                          /* tp_print */
2402     0,                                          /* tp_getattr */
2403     0,                                          /* tp_setattr */
2404     0,                                          /* tp_compare */
2405     (reprfunc)file_repr,                        /* tp_repr */
2406     0,                                          /* tp_as_number */
2407     0,                                          /* tp_as_sequence */
2408     0,                                          /* tp_as_mapping */
2409     0,                                          /* tp_hash */
2410     0,                                          /* tp_call */
2411     0,                                          /* tp_str */
2412     PyObject_GenericGetAttr,                    /* tp_getattro */
2413     /* softspace is writable:  we must supply tp_setattro */
2414     PyObject_GenericSetAttr,                    /* tp_setattro */
2415     0,                                          /* tp_as_buffer */
2416     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2417     file_doc,                                   /* tp_doc */
2418     0,                                          /* tp_traverse */
2419     0,                                          /* tp_clear */
2420     0,                                          /* tp_richcompare */
2421     offsetof(PyFileObject, weakreflist),        /* tp_weaklistoffset */
2422     (getiterfunc)file_self,                     /* tp_iter */
2423     (iternextfunc)file_iternext,                /* tp_iternext */
2424     file_methods,                               /* tp_methods */
2425     file_memberlist,                            /* tp_members */
2426     file_getsetlist,                            /* tp_getset */
2427     0,                                          /* tp_base */
2428     0,                                          /* tp_dict */
2429     0,                                          /* tp_descr_get */
2430     0,                                          /* tp_descr_set */
2431     0,                                          /* tp_dictoffset */
2432     file_init,                                  /* tp_init */
2433     PyType_GenericAlloc,                        /* tp_alloc */
2434     file_new,                                   /* tp_new */
2435     PyObject_Del,                           /* tp_free */
2436 };
2437 
2438 /* Interface for the 'soft space' between print items. */
2439 
2440 int
2441 PyFile_SoftSpace(PyObject *f, int newflag)
2442 {
2443     long oldflag = 0;
2444     if (f == NULL) {
2445         /* Do nothing */
2446     }
2447     else if (PyFile_Check(f)) {
2448         oldflag = ((PyFileObject *)f)->f_softspace;
2449         ((PyFileObject *)f)->f_softspace = newflag;
2450     }
2451     else {
2452         PyObject *v;
2453         v = PyObject_GetAttrString(f, "softspace");
2454         if (v == NULL)
2455             PyErr_Clear();
2456         else {
2457             if (PyInt_Check(v))
2458                 oldflag = PyInt_AsLong(v);
2459             assert(oldflag < INT_MAX);
2460             Py_DECREF(v);
2461         }
2462         v = PyInt_FromLong((long)newflag);
2463         if (v == NULL)
2464             PyErr_Clear();
2465         else {
2466             if (PyObject_SetAttrString(f, "softspace", v) != 0)
2467                 PyErr_Clear();
2468             Py_DECREF(v);
2469         }
2470     }
2471     return (int)oldflag;
2472 }
2473 
2474 /* Interfaces to write objects/strings to file-like objects */
2475 
2476 int
2477 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2478 {
2479     PyObject *writer, *value, *args, *result;
2480     if (f == NULL) {
2481         PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2482         return -1;
2483     }
2484     else if (PyFile_Check(f)) {
2485         PyFileObject *fobj = (PyFileObject *) f;
2486 #ifdef Py_USING_UNICODE
2487         PyObject *enc = fobj->f_encoding;
2488         int result;
2489 #endif
2490         if (fobj->f_fp == NULL) {
2491             err_closed();
2492             return -1;
2493         }
2494 #ifdef Py_USING_UNICODE
2495         if ((flags & Py_PRINT_RAW) &&
2496             PyUnicode_Check(v) && enc != Py_None) {
2497             char *cenc = PyString_AS_STRING(enc);
2498             char *errors = fobj->f_errors == Py_None ?
2499               "strict" : PyString_AS_STRING(fobj->f_errors);
2500             value = PyUnicode_AsEncodedString(v, cenc, errors);
2501             if (value == NULL)
2502                 return -1;
2503         } else {
2504             value = v;
2505             Py_INCREF(value);
2506         }
2507         result = file_PyObject_Print(value, fobj, flags);
2508         Py_DECREF(value);
2509         return result;
2510 #else
2511         return file_PyObject_Print(v, fobj, flags);
2512 #endif
2513     }
2514     writer = PyObject_GetAttrString(f, "write");
2515     if (writer == NULL)
2516         return -1;
2517     if (flags & Py_PRINT_RAW) {
2518         if (PyUnicode_Check(v)) {
2519             value = v;
2520             Py_INCREF(value);
2521         } else
2522             value = PyObject_Str(v);
2523     }
2524     else
2525         value = PyObject_Repr(v);
2526     if (value == NULL) {
2527         Py_DECREF(writer);
2528         return -1;
2529     }
2530     args = PyTuple_Pack(1, value);
2531     if (args == NULL) {
2532         Py_DECREF(value);
2533         Py_DECREF(writer);
2534         return -1;
2535     }
2536     result = PyEval_CallObject(writer, args);
2537     Py_DECREF(args);
2538     Py_DECREF(value);
2539     Py_DECREF(writer);
2540     if (result == NULL)
2541         return -1;
2542     Py_DECREF(result);
2543     return 0;
2544 }
2545 
2546 int
2547 PyFile_WriteString(const char *s, PyObject *f)
2548 {
2549 
2550     if (f == NULL) {
2551         /* Should be caused by a pre-existing error */
2552         if (!PyErr_Occurred())
2553             PyErr_SetString(PyExc_SystemError,
2554                             "null file for PyFile_WriteString");
2555         return -1;
2556     }
2557     else if (PyFile_Check(f)) {
2558         PyFileObject *fobj = (PyFileObject *) f;
2559         FILE *fp = PyFile_AsFile(f);
2560         if (fp == NULL) {
2561             err_closed();
2562             return -1;
2563         }
2564         FILE_BEGIN_ALLOW_THREADS(fobj)
2565         fputs(s, fp);
2566         FILE_END_ALLOW_THREADS(fobj)
2567         return 0;
2568     }
2569     else if (!PyErr_Occurred()) {
2570         PyObject *v = PyString_FromString(s);
2571         int err;
2572         if (v == NULL)
2573             return -1;
2574         err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2575         Py_DECREF(v);
2576         return err;
2577     }
2578     else
2579         return -1;
2580 }
2581 
2582 /* Try to get a file-descriptor from a Python object.  If the object
2583    is an integer or long integer, its value is returned.  If not, the
2584    object's fileno() method is called if it exists; the method must return
2585    an integer or long integer, which is returned as the file descriptor value.
2586    -1 is returned on failure.
2587 */
2588 
2589 int PyObject_AsFileDescriptor(PyObject *o)
2590 {
2591     int fd;
2592     PyObject *meth;
2593 
2594     if (PyInt_Check(o)) {
2595         fd = PyInt_AsLong(o);
2596     }
2597     else if (PyLong_Check(o)) {
2598         fd = PyLong_AsLong(o);
2599     }
2600     else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2601     {
2602         PyObject *fno = PyEval_CallObject(meth, NULL);
2603         Py_DECREF(meth);
2604         if (fno == NULL)
2605             return -1;
2606 
2607         if (PyInt_Check(fno)) {
2608             fd = PyInt_AsLong(fno);
2609             Py_DECREF(fno);
2610         }
2611         else if (PyLong_Check(fno)) {
2612             fd = PyLong_AsLong(fno);
2613             Py_DECREF(fno);
2614         }
2615         else {
2616             PyErr_SetString(PyExc_TypeError,
2617                             "fileno() returned a non-integer");
2618             Py_DECREF(fno);
2619             return -1;
2620         }
2621     }
2622     else {
2623         PyErr_SetString(PyExc_TypeError,
2624                         "argument must be an int, or have a fileno() method.");
2625         return -1;
2626     }
2627 
2628     if (fd < 0) {
2629         PyErr_Format(PyExc_ValueError,
2630                      "file descriptor cannot be a negative integer (%i)",
2631                      fd);
2632         return -1;
2633     }
2634     return fd;
2635 }
2636 
2637 /* From here on we need access to the real fgets and fread */
2638 #undef fgets
2639 #undef fread
2640 
2641 /*
2642 ** Py_UniversalNewlineFgets is an fgets variation that understands
2643 ** all of \r, \n and \r\n conventions.
2644 ** The stream should be opened in binary mode.
2645 ** If fobj is NULL the routine always does newline conversion, and
2646 ** it may peek one char ahead to gobble the second char in \r\n.
2647 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2648 ** is no readahead but in stead a flag is used to skip a following
2649 ** \n on the next read. Also, if the file is open in binary mode
2650 ** the whole conversion is skipped. Finally, the routine keeps track of
2651 ** the different types of newlines seen.
2652 ** Note that we need no error handling: fgets() treats error and eof
2653 ** identically.
2654 */
2655 char *
2656 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2657 {
2658     char *p = buf;
2659     int c;
2660     int newlinetypes = 0;
2661     int skipnextlf = 0;
2662     int univ_newline = 1;
2663 
2664     if (fobj) {
2665         if (!PyFile_Check(fobj)) {
2666             errno = ENXIO;              /* What can you do... */
2667             return NULL;
2668         }
2669         univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2670         if ( !univ_newline )
2671             return fgets(buf, n, stream);
2672         newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2673         skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2674     }
2675     FLOCKFILE(stream);
2676     c = 'x'; /* Shut up gcc warning */
2677     while (--n > 0 && (c = GETC(stream)) != EOF ) {
2678         if (skipnextlf ) {
2679             skipnextlf = 0;
2680             if (c == '\n') {
2681                 /* Seeing a \n here with skipnextlf true
2682                 ** means we saw a \r before.
2683                 */
2684                 newlinetypes |= NEWLINE_CRLF;
2685                 c = GETC(stream);
2686                 if (c == EOF) break;
2687             } else {
2688                 /*
2689                 ** Note that c == EOF also brings us here,
2690                 ** so we're okay if the last char in the file
2691                 ** is a CR.
2692                 */
2693                 newlinetypes |= NEWLINE_CR;
2694             }
2695         }
2696         if (c == '\r') {
2697             /* A \r is translated into a \n, and we skip
2698             ** an adjacent \n, if any. We don't set the
2699             ** newlinetypes flag until we've seen the next char.
2700             */
2701             skipnextlf = 1;
2702             c = '\n';
2703         } else if ( c == '\n') {
2704             newlinetypes |= NEWLINE_LF;
2705         }
2706         *p++ = c;
2707         if (c == '\n') break;
2708     }
2709     if ( c == EOF && skipnextlf )
2710         newlinetypes |= NEWLINE_CR;
2711     FUNLOCKFILE(stream);
2712     *p = '\0';
2713     if (fobj) {
2714         ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2715         ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2716     } else if ( skipnextlf ) {
2717         /* If we have no file object we cannot save the
2718         ** skipnextlf flag. We have to readahead, which
2719         ** will cause a pause if we're reading from an
2720         ** interactive stream, but that is very unlikely
2721         ** unless we're doing something silly like
2722         ** execfile("/dev/tty").
2723         */
2724         c = GETC(stream);
2725         if ( c != '\n' )
2726             ungetc(c, stream);
2727     }
2728     if (p == buf)
2729         return NULL;
2730     return buf;
2731 }
2732 
2733 /*
2734 ** Py_UniversalNewlineFread is an fread variation that understands
2735 ** all of \r, \n and \r\n conventions.
2736 ** The stream should be opened in binary mode.
2737 ** fobj must be a PyFileObject. In this case there
2738 ** is no readahead but in stead a flag is used to skip a following
2739 ** \n on the next read. Also, if the file is open in binary mode
2740 ** the whole conversion is skipped. Finally, the routine keeps track of
2741 ** the different types of newlines seen.
2742 */
2743 size_t
2744 Py_UniversalNewlineFread(char *buf, size_t n,
2745                          FILE *stream, PyObject *fobj)
2746 {
2747     char *dst = buf;
2748     PyFileObject *f = (PyFileObject *)fobj;
2749     int newlinetypes, skipnextlf;
2750 
2751     assert(buf != NULL);
2752     assert(stream != NULL);
2753 
2754     if (!fobj || !PyFile_Check(fobj)) {
2755         errno = ENXIO;          /* What can you do... */
2756         return 0;
2757     }
2758     if (!f->f_univ_newline)
2759         return fread(buf, 1, n, stream);
2760     newlinetypes = f->f_newlinetypes;
2761     skipnextlf = f->f_skipnextlf;
2762     /* Invariant:  n is the number of bytes remaining to be filled
2763      * in the buffer.
2764      */
2765     while (n) {
2766         size_t nread;
2767         int shortread;
2768         char *src = dst;
2769 
2770         nread = fread(dst, 1, n, stream);
2771         assert(nread <= n);
2772         if (nread == 0)
2773             break;
2774 
2775         n -= nread; /* assuming 1 byte out for each in; will adjust */
2776         shortread = n != 0;             /* true iff EOF or error */
2777         while (nread--) {
2778             char c = *src++;
2779             if (c == '\r') {
2780                 /* Save as LF and set flag to skip next LF. */
2781                 *dst++ = '\n';
2782                 skipnextlf = 1;
2783             }
2784             else if (skipnextlf && c == '\n') {
2785                 /* Skip LF, and remember we saw CR LF. */
2786                 skipnextlf = 0;
2787                 newlinetypes |= NEWLINE_CRLF;
2788                 ++n;
2789             }
2790             else {
2791                 /* Normal char to be stored in buffer.  Also
2792                  * update the newlinetypes flag if either this
2793                  * is an LF or the previous char was a CR.
2794                  */
2795                 if (c == '\n')
2796                     newlinetypes |= NEWLINE_LF;
2797                 else if (skipnextlf)
2798                     newlinetypes |= NEWLINE_CR;
2799                 *dst++ = c;
2800                 skipnextlf = 0;
2801             }
2802         }
2803         if (shortread) {
2804             /* If this is EOF, update type flags. */
2805             if (skipnextlf && feof(stream))
2806                 newlinetypes |= NEWLINE_CR;
2807             break;
2808         }
2809     }
2810     f->f_newlinetypes = newlinetypes;
2811     f->f_skipnextlf = skipnextlf;
2812     return dst - buf;
2813 }
2814 
2815 #ifdef __cplusplus
2816 }
2817 #endif
Python-2.7.3/Objects/fileobject.c