Python-2.7.3/Modules/bz2module.c

Location Tool Test ID Function Issue
/builddir/build/BUILD/Python-2.7.3/Modules/bz2module.c:173:0 cppcheck syntaxError syntax error
/builddir/build/BUILD/Python-2.7.3/Modules/bz2module.c:173:0 cppcheck syntaxError syntax error
   1 /*
   2 
   3 python-bz2 - python bz2 library interface
   4 
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7 
   8 */
   9 
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14 
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18 
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24 
  25 /* Our very own off_t-like type, 64-bit if possible */
  26 /* copied from Objects/fileobject.c */
  27 #if !defined(HAVE_LARGEFILE_SUPPORT)
  28 typedef off_t Py_off_t;
  29 #elif SIZEOF_OFF_T >= 8
  30 typedef off_t Py_off_t;
  31 #elif SIZEOF_FPOS_T >= 8
  32 typedef fpos_t Py_off_t;
  33 #else
  34 #error "Large file support, but neither off_t nor fpos_t is large enough."
  35 #endif
  36 
  37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  38 
  39 #define MODE_CLOSED   0
  40 #define MODE_READ     1
  41 #define MODE_READ_EOF 2
  42 #define MODE_WRITE    3
  43 
  44 #define BZ2FileObject_Check(v)  (Py_TYPE(v) == &BZ2File_Type)
  45 
  46 
  47 #ifdef BZ_CONFIG_ERROR
  48 
  49 #if SIZEOF_LONG >= 8
  50 #define BZS_TOTAL_OUT(bzs) \
  51     (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  52 #elif SIZEOF_LONG_LONG >= 8
  53 #define BZS_TOTAL_OUT(bzs) \
  54     (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  55 #else
  56 #define BZS_TOTAL_OUT(bzs) \
  57     bzs->total_out_lo32
  58 #endif
  59 
  60 #else /* ! BZ_CONFIG_ERROR */
  61 
  62 #define BZ2_bzRead bzRead
  63 #define BZ2_bzReadOpen bzReadOpen
  64 #define BZ2_bzReadClose bzReadClose
  65 #define BZ2_bzWrite bzWrite
  66 #define BZ2_bzWriteOpen bzWriteOpen
  67 #define BZ2_bzWriteClose bzWriteClose
  68 #define BZ2_bzCompress bzCompress
  69 #define BZ2_bzCompressInit bzCompressInit
  70 #define BZ2_bzCompressEnd bzCompressEnd
  71 #define BZ2_bzDecompress bzDecompress
  72 #define BZ2_bzDecompressInit bzDecompressInit
  73 #define BZ2_bzDecompressEnd bzDecompressEnd
  74 
  75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  76 
  77 #endif /* ! BZ_CONFIG_ERROR */
  78 
  79 
  80 #ifdef WITH_THREAD
  81 #define ACQUIRE_LOCK(obj) do { \
  82     if (!PyThread_acquire_lock(obj->lock, 0)) { \
  83         Py_BEGIN_ALLOW_THREADS \
  84         PyThread_acquire_lock(obj->lock, 1); \
  85         Py_END_ALLOW_THREADS \
  86     } } while(0)
  87 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  88 #else
  89 #define ACQUIRE_LOCK(obj)
  90 #define RELEASE_LOCK(obj)
  91 #endif
  92 
  93 /* Bits in f_newlinetypes */
  94 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  95 #define NEWLINE_CR 1            /* \r newline seen */
  96 #define NEWLINE_LF 2            /* \n newline seen */
  97 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  98 
  99 /* ===================================================================== */
 100 /* Structure definitions. */
 101 
 102 typedef struct {
 103     PyObject_HEAD
 104     PyObject *file;
 105 
 106     char* f_buf;                /* Allocated readahead buffer */
 107     char* f_bufend;             /* Points after last occupied position */
 108     char* f_bufptr;             /* Current buffer position */
 109 
 110     int f_softspace;            /* Flag used by 'print' command */
 111 
 112     int f_univ_newline;         /* Handle any newline convention */
 113     int f_newlinetypes;         /* Types of newlines seen */
 114     int f_skipnextlf;           /* Skip next \n */
 115 
 116     BZFILE *fp;
 117     int mode;
 118     Py_off_t pos;
 119     Py_off_t size;
 120 #ifdef WITH_THREAD
 121     PyThread_type_lock lock;
 122 #endif
 123 } BZ2FileObject;
 124 
 125 typedef struct {
 126     PyObject_HEAD
 127     bz_stream bzs;
 128     int running;
 129 #ifdef WITH_THREAD
 130     PyThread_type_lock lock;
 131 #endif
 132 } BZ2CompObject;
 133 
 134 typedef struct {
 135     PyObject_HEAD
 136     bz_stream bzs;
 137     int running;
 138     PyObject *unused_data;
 139 #ifdef WITH_THREAD
 140     PyThread_type_lock lock;
 141 #endif
 142 } BZ2DecompObject;
 143 
 144 /* ===================================================================== */
 145 /* Utility functions. */
 146 
 147 /* Refuse regular I/O if there's data in the iteration-buffer.
 148  * Mixing them would cause data to arrive out of order, as the read*
 149  * methods don't use the iteration buffer. */
 150 static int
 151 check_iterbuffered(BZ2FileObject *f)
 152 {
 153     if (f->f_buf != NULL &&
 154         (f->f_bufend - f->f_bufptr) > 0 &&
 155         f->f_buf[0] != '\0') {
 156         PyErr_SetString(PyExc_ValueError,
 157             "Mixing iteration and read methods would lose data");
 158         return -1;
 159     }
 160     return 0;
 161 }
 162 
 163 static int
 164 Util_CatchBZ2Error(int bzerror)
 165 {
 166     int ret = 0;
 167     switch(bzerror) {
 168         case BZ_OK:
 169         case BZ_STREAM_END:
 170             break;
 171 
 172 #ifdef BZ_CONFIG_ERROR
 173         case BZ_CONFIG_ERROR:
syntax error
(emitted by cppcheck)
syntax error
(emitted by cppcheck)
174 PyErr_SetString(PyExc_SystemError, 175 "the bz2 library was not compiled " 176 "correctly"); 177 ret = 1; 178 break; 179 #endif 180 181 case BZ_PARAM_ERROR: 182 PyErr_SetString(PyExc_ValueError, 183 "the bz2 library has received wrong " 184 "parameters"); 185 ret = 1; 186 break; 187 188 case BZ_MEM_ERROR: 189 PyErr_NoMemory(); 190 ret = 1; 191 break; 192 193 case BZ_DATA_ERROR: 194 case BZ_DATA_ERROR_MAGIC: 195 PyErr_SetString(PyExc_IOError, "invalid data stream"); 196 ret = 1; 197 break; 198 199 case BZ_IO_ERROR: 200 PyErr_SetString(PyExc_IOError, "unknown IO error"); 201 ret = 1; 202 break; 203 204 case BZ_UNEXPECTED_EOF: 205 PyErr_SetString(PyExc_EOFError, 206 "compressed file ended before the " 207 "logical end-of-stream was detected"); 208 ret = 1; 209 break; 210 211 case BZ_SEQUENCE_ERROR: 212 PyErr_SetString(PyExc_RuntimeError, 213 "wrong sequence of bz2 library " 214 "commands used"); 215 ret = 1; 216 break; 217 } 218 return ret; 219 } 220 221 #if BUFSIZ < 8192 222 #define SMALLCHUNK 8192 223 #else 224 #define SMALLCHUNK BUFSIZ 225 #endif 226 227 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */ 228 static size_t 229 Util_NewBufferSize(size_t currentsize) 230 { 231 /* Expand the buffer by an amount proportional to the current size, 232 giving us amortized linear-time behavior. Use a less-than-double 233 growth factor to avoid excessive allocation. */ 234 return currentsize + (currentsize >> 3) + 6; 235 } 236 237 /* This is a hacked version of Python's fileobject.c:get_line(). */ 238 static PyObject * 239 Util_GetLine(BZ2FileObject *f, int n) 240 { 241 char c; 242 char *buf, *end; 243 size_t total_v_size; /* total # of slots in buffer */ 244 size_t used_v_size; /* # used slots in buffer */ 245 size_t increment; /* amount to increment the buffer */ 246 PyObject *v; 247 int bzerror; 248 int bytes_read; 249 int newlinetypes = f->f_newlinetypes; 250 int skipnextlf = f->f_skipnextlf; 251 int univ_newline = f->f_univ_newline; 252 253 total_v_size = n > 0 ? n : 100; 254 v = PyString_FromStringAndSize((char *)NULL, total_v_size); 255 if (v == NULL) 256 return NULL; 257 258 buf = BUF(v); 259 end = buf + total_v_size; 260 261 for (;;) { 262 Py_BEGIN_ALLOW_THREADS 263 while (buf != end) { 264 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1); 265 f->pos++; 266 if (bytes_read == 0) break; 267 if (univ_newline) { 268 if (skipnextlf) { 269 skipnextlf = 0; 270 if (c == '\n') { 271 /* Seeing a \n here with skipnextlf true means we 272 * saw a \r before. 273 */ 274 newlinetypes |= NEWLINE_CRLF; 275 if (bzerror != BZ_OK) break; 276 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1); 277 f->pos++; 278 if (bytes_read == 0) break; 279 } else { 280 newlinetypes |= NEWLINE_CR; 281 } 282 } 283 if (c == '\r') { 284 skipnextlf = 1; 285 c = '\n'; 286 } else if (c == '\n') 287 newlinetypes |= NEWLINE_LF; 288 } 289 *buf++ = c; 290 if (bzerror != BZ_OK || c == '\n') break; 291 } 292 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf) 293 newlinetypes |= NEWLINE_CR; 294 Py_END_ALLOW_THREADS 295 f->f_newlinetypes = newlinetypes; 296 f->f_skipnextlf = skipnextlf; 297 if (bzerror == BZ_STREAM_END) { 298 f->size = f->pos; 299 f->mode = MODE_READ_EOF; 300 break; 301 } else if (bzerror != BZ_OK) { 302 Util_CatchBZ2Error(bzerror); 303 Py_DECREF(v); 304 return NULL; 305 } 306 if (c == '\n') 307 break; 308 /* Must be because buf == end */ 309 if (n > 0) 310 break; 311 used_v_size = total_v_size; 312 increment = total_v_size >> 2; /* mild exponential growth */ 313 total_v_size += increment; 314 if (total_v_size > INT_MAX) { 315 PyErr_SetString(PyExc_OverflowError, 316 "line is longer than a Python string can hold"); 317 Py_DECREF(v); 318 return NULL; 319 } 320 if (_PyString_Resize(&v, total_v_size) < 0) 321 return NULL; 322 buf = BUF(v) + used_v_size; 323 end = BUF(v) + total_v_size; 324 } 325 326 used_v_size = buf - BUF(v); 327 if (used_v_size != total_v_size) 328 _PyString_Resize(&v, used_v_size); 329 return v; 330 } 331 332 /* This is a hacked version of Python's 333 * fileobject.c:Py_UniversalNewlineFread(). */ 334 size_t 335 Util_UnivNewlineRead(int *bzerror, BZFILE *stream, 336 char* buf, size_t n, BZ2FileObject *f) 337 { 338 char *dst = buf; 339 int newlinetypes, skipnextlf; 340 341 assert(buf != NULL); 342 assert(stream != NULL); 343 344 if (!f->f_univ_newline) 345 return BZ2_bzRead(bzerror, stream, buf, n); 346 347 newlinetypes = f->f_newlinetypes; 348 skipnextlf = f->f_skipnextlf; 349 350 /* Invariant: n is the number of bytes remaining to be filled 351 * in the buffer. 352 */ 353 while (n) { 354 size_t nread; 355 int shortread; 356 char *src = dst; 357 358 nread = BZ2_bzRead(bzerror, stream, dst, n); 359 assert(nread <= n); 360 n -= nread; /* assuming 1 byte out for each in; will adjust */ 361 shortread = n != 0; /* true iff EOF or error */ 362 while (nread--) { 363 char c = *src++; 364 if (c == '\r') { 365 /* Save as LF and set flag to skip next LF. */ 366 *dst++ = '\n'; 367 skipnextlf = 1; 368 } 369 else if (skipnextlf && c == '\n') { 370 /* Skip LF, and remember we saw CR LF. */ 371 skipnextlf = 0; 372 newlinetypes |= NEWLINE_CRLF; 373 ++n; 374 } 375 else { 376 /* Normal char to be stored in buffer. Also 377 * update the newlinetypes flag if either this 378 * is an LF or the previous char was a CR. 379 */ 380 if (c == '\n') 381 newlinetypes |= NEWLINE_LF; 382 else if (skipnextlf) 383 newlinetypes |= NEWLINE_CR; 384 *dst++ = c; 385 skipnextlf = 0; 386 } 387 } 388 if (shortread) { 389 /* If this is EOF, update type flags. */ 390 if (skipnextlf && *bzerror == BZ_STREAM_END) 391 newlinetypes |= NEWLINE_CR; 392 break; 393 } 394 } 395 f->f_newlinetypes = newlinetypes; 396 f->f_skipnextlf = skipnextlf; 397 return dst - buf; 398 } 399 400 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */ 401 static void 402 Util_DropReadAhead(BZ2FileObject *f) 403 { 404 if (f->f_buf != NULL) { 405 PyMem_Free(f->f_buf); 406 f->f_buf = NULL; 407 } 408 } 409 410 /* This is a hacked version of Python's fileobject.c:readahead(). */ 411 static int 412 Util_ReadAhead(BZ2FileObject *f, int bufsize) 413 { 414 int chunksize; 415 int bzerror; 416 417 if (f->f_buf != NULL) { 418 if((f->f_bufend - f->f_bufptr) >= 1) 419 return 0; 420 else 421 Util_DropReadAhead(f); 422 } 423 if (f->mode == MODE_READ_EOF) { 424 f->f_bufptr = f->f_buf; 425 f->f_bufend = f->f_buf; 426 return 0; 427 } 428 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) { 429 PyErr_NoMemory(); 430 return -1; 431 } 432 Py_BEGIN_ALLOW_THREADS 433 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf, 434 bufsize, f); 435 Py_END_ALLOW_THREADS 436 f->pos += chunksize; 437 if (bzerror == BZ_STREAM_END) { 438 f->size = f->pos; 439 f->mode = MODE_READ_EOF; 440 } else if (bzerror != BZ_OK) { 441 Util_CatchBZ2Error(bzerror); 442 Util_DropReadAhead(f); 443 return -1; 444 } 445 f->f_bufptr = f->f_buf; 446 f->f_bufend = f->f_buf + chunksize; 447 return 0; 448 } 449 450 /* This is a hacked version of Python's 451 * fileobject.c:readahead_get_line_skip(). */ 452 static PyStringObject * 453 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize) 454 { 455 PyStringObject* s; 456 char *bufptr; 457 char *buf; 458 int len; 459 460 if (f->f_buf == NULL) 461 if (Util_ReadAhead(f, bufsize) < 0) 462 return NULL; 463 464 len = f->f_bufend - f->f_bufptr; 465 if (len == 0) 466 return (PyStringObject *) 467 PyString_FromStringAndSize(NULL, skip); 468 bufptr = memchr(f->f_bufptr, '\n', len); 469 if (bufptr != NULL) { 470 bufptr++; /* Count the '\n' */ 471 len = bufptr - f->f_bufptr; 472 s = (PyStringObject *) 473 PyString_FromStringAndSize(NULL, skip+len); 474 if (s == NULL) 475 return NULL; 476 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len); 477 f->f_bufptr = bufptr; 478 if (bufptr == f->f_bufend) 479 Util_DropReadAhead(f); 480 } else { 481 bufptr = f->f_bufptr; 482 buf = f->f_buf; 483 f->f_buf = NULL; /* Force new readahead buffer */ 484 s = Util_ReadAheadGetLineSkip(f, skip+len, 485 bufsize + (bufsize>>2)); 486 if (s == NULL) { 487 PyMem_Free(buf); 488 return NULL; 489 } 490 memcpy(PyString_AS_STRING(s)+skip, bufptr, len); 491 PyMem_Free(buf); 492 } 493 return s; 494 } 495 496 /* ===================================================================== */ 497 /* Methods of BZ2File. */ 498 499 PyDoc_STRVAR(BZ2File_read__doc__, 500 "read([size]) -> string\n\ 501 \n\ 502 Read at most size uncompressed bytes, returned as a string. If the size\n\ 503 argument is negative or omitted, read until EOF is reached.\n\ 504 "); 505 506 /* This is a hacked version of Python's fileobject.c:file_read(). */ 507 static PyObject * 508 BZ2File_read(BZ2FileObject *self, PyObject *args) 509 { 510 long bytesrequested = -1; 511 size_t bytesread, buffersize, chunksize; 512 int bzerror; 513 PyObject *ret = NULL; 514 515 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested)) 516 return NULL; 517 518 ACQUIRE_LOCK(self); 519 switch (self->mode) { 520 case MODE_READ: 521 break; 522 case MODE_READ_EOF: 523 ret = PyString_FromString(""); 524 goto cleanup; 525 case MODE_CLOSED: 526 PyErr_SetString(PyExc_ValueError, 527 "I/O operation on closed file"); 528 goto cleanup; 529 default: 530 PyErr_SetString(PyExc_IOError, 531 "file is not ready for reading"); 532 goto cleanup; 533 } 534 535 /* refuse to mix with f.next() */ 536 if (check_iterbuffered(self)) 537 goto cleanup; 538 539 if (bytesrequested < 0) 540 buffersize = Util_NewBufferSize((size_t)0); 541 else 542 buffersize = bytesrequested; 543 if (buffersize > INT_MAX) { 544 PyErr_SetString(PyExc_OverflowError, 545 "requested number of bytes is " 546 "more than a Python string can hold"); 547 goto cleanup; 548 } 549 ret = PyString_FromStringAndSize((char *)NULL, buffersize); 550 if (ret == NULL) 551 goto cleanup; 552 bytesread = 0; 553 554 for (;;) { 555 Py_BEGIN_ALLOW_THREADS 556 chunksize = Util_UnivNewlineRead(&bzerror, self->fp, 557 BUF(ret)+bytesread, 558 buffersize-bytesread, 559 self); 560 self->pos += chunksize; 561 Py_END_ALLOW_THREADS 562 bytesread += chunksize; 563 if (bzerror == BZ_STREAM_END) { 564 self->size = self->pos; 565 self->mode = MODE_READ_EOF; 566 break; 567 } else if (bzerror != BZ_OK) { 568 Util_CatchBZ2Error(bzerror); 569 Py_DECREF(ret); 570 ret = NULL; 571 goto cleanup; 572 } 573 if (bytesrequested < 0) { 574 buffersize = Util_NewBufferSize(buffersize); 575 if (_PyString_Resize(&ret, buffersize) < 0) 576 goto cleanup; 577 } else { 578 break; 579 } 580 } 581 if (bytesread != buffersize) 582 _PyString_Resize(&ret, bytesread); 583 584 cleanup: 585 RELEASE_LOCK(self); 586 return ret; 587 } 588 589 PyDoc_STRVAR(BZ2File_readline__doc__, 590 "readline([size]) -> string\n\ 591 \n\ 592 Return the next line from the file, as a string, retaining newline.\n\ 593 A non-negative size argument will limit the maximum number of bytes to\n\ 594 return (an incomplete line may be returned then). Return an empty\n\ 595 string at EOF.\n\ 596 "); 597 598 static PyObject * 599 BZ2File_readline(BZ2FileObject *self, PyObject *args) 600 { 601 PyObject *ret = NULL; 602 int sizehint = -1; 603 604 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint)) 605 return NULL; 606 607 ACQUIRE_LOCK(self); 608 switch (self->mode) { 609 case MODE_READ: 610 break; 611 case MODE_READ_EOF: 612 ret = PyString_FromString(""); 613 goto cleanup; 614 case MODE_CLOSED: 615 PyErr_SetString(PyExc_ValueError, 616 "I/O operation on closed file"); 617 goto cleanup; 618 default: 619 PyErr_SetString(PyExc_IOError, 620 "file is not ready for reading"); 621 goto cleanup; 622 } 623 624 /* refuse to mix with f.next() */ 625 if (check_iterbuffered(self)) 626 goto cleanup; 627 628 if (sizehint == 0) 629 ret = PyString_FromString(""); 630 else 631 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint); 632 633 cleanup: 634 RELEASE_LOCK(self); 635 return ret; 636 } 637 638 PyDoc_STRVAR(BZ2File_readlines__doc__, 639 "readlines([size]) -> list\n\ 640 \n\ 641 Call readline() repeatedly and return a list of lines read.\n\ 642 The optional size argument, if given, is an approximate bound on the\n\ 643 total number of bytes in the lines returned.\n\ 644 "); 645 646 /* This is a hacked version of Python's fileobject.c:file_readlines(). */ 647 static PyObject * 648 BZ2File_readlines(BZ2FileObject *self, PyObject *args) 649 { 650 long sizehint = 0; 651 PyObject *list = NULL; 652 PyObject *line; 653 char small_buffer[SMALLCHUNK]; 654 char *buffer = small_buffer; 655 size_t buffersize = SMALLCHUNK; 656 PyObject *big_buffer = NULL; 657 size_t nfilled = 0; 658 size_t nread; 659 size_t totalread = 0; 660 char *p, *q, *end; 661 int err; 662 int shortread = 0; 663 int bzerror; 664 665 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint)) 666 return NULL; 667 668 ACQUIRE_LOCK(self); 669 switch (self->mode) { 670 case MODE_READ: 671 break; 672 case MODE_READ_EOF: 673 list = PyList_New(0); 674 goto cleanup; 675 case MODE_CLOSED: 676 PyErr_SetString(PyExc_ValueError, 677 "I/O operation on closed file"); 678 goto cleanup; 679 default: 680 PyErr_SetString(PyExc_IOError, 681 "file is not ready for reading"); 682 goto cleanup; 683 } 684 685 /* refuse to mix with f.next() */ 686 if (check_iterbuffered(self)) 687 goto cleanup; 688 689 if ((list = PyList_New(0)) == NULL) 690 goto cleanup; 691 692 for (;;) { 693 Py_BEGIN_ALLOW_THREADS 694 nread = Util_UnivNewlineRead(&bzerror, self->fp, 695 buffer+nfilled, 696 buffersize-nfilled, self); 697 self->pos += nread; 698 Py_END_ALLOW_THREADS 699 if (bzerror == BZ_STREAM_END) { 700 self->size = self->pos; 701 self->mode = MODE_READ_EOF; 702 if (nread == 0) { 703 sizehint = 0; 704 break; 705 } 706 shortread = 1; 707 } else if (bzerror != BZ_OK) { 708 Util_CatchBZ2Error(bzerror); 709 error: 710 Py_DECREF(list); 711 list = NULL; 712 goto cleanup; 713 } 714 totalread += nread; 715 p = memchr(buffer+nfilled, '\n', nread); 716 if (!shortread && p == NULL) { 717 /* Need a larger buffer to fit this line */ 718 nfilled += nread; 719 buffersize *= 2; 720 if (buffersize > INT_MAX) { 721 PyErr_SetString(PyExc_OverflowError, 722 "line is longer than a Python string can hold"); 723 goto error; 724 } 725 if (big_buffer == NULL) { 726 /* Create the big buffer */ 727 big_buffer = PyString_FromStringAndSize( 728 NULL, buffersize); 729 if (big_buffer == NULL) 730 goto error; 731 buffer = PyString_AS_STRING(big_buffer); 732 memcpy(buffer, small_buffer, nfilled); 733 } 734 else { 735 /* Grow the big buffer */ 736 _PyString_Resize(&big_buffer, buffersize); 737 buffer = PyString_AS_STRING(big_buffer); 738 } 739 continue; 740 } 741 end = buffer+nfilled+nread; 742 q = buffer; 743 while (p != NULL) { 744 /* Process complete lines */ 745 p++; 746 line = PyString_FromStringAndSize(q, p-q); 747 if (line == NULL) 748 goto error; 749 err = PyList_Append(list, line); 750 Py_DECREF(line); 751 if (err != 0) 752 goto error; 753 q = p; 754 p = memchr(q, '\n', end-q); 755 } 756 /* Move the remaining incomplete line to the start */ 757 nfilled = end-q; 758 memmove(buffer, q, nfilled); 759 if (sizehint > 0) 760 if (totalread >= (size_t)sizehint) 761 break; 762 if (shortread) { 763 sizehint = 0; 764 break; 765 } 766 } 767 if (nfilled != 0) { 768 /* Partial last line */ 769 line = PyString_FromStringAndSize(buffer, nfilled); 770 if (line == NULL) 771 goto error; 772 if (sizehint > 0) { 773 /* Need to complete the last line */ 774 PyObject *rest = Util_GetLine(self, 0); 775 if (rest == NULL) { 776 Py_DECREF(line); 777 goto error; 778 } 779 PyString_Concat(&line, rest); 780 Py_DECREF(rest); 781 if (line == NULL) 782 goto error; 783 } 784 err = PyList_Append(list, line); 785 Py_DECREF(line); 786 if (err != 0) 787 goto error; 788 } 789 790 cleanup: 791 RELEASE_LOCK(self); 792 if (big_buffer) { 793 Py_DECREF(big_buffer); 794 } 795 return list; 796 } 797 798 PyDoc_STRVAR(BZ2File_xreadlines__doc__, 799 "xreadlines() -> self\n\ 800 \n\ 801 For backward compatibility. BZ2File objects now include the performance\n\ 802 optimizations previously implemented in the xreadlines module.\n\ 803 "); 804 805 PyDoc_STRVAR(BZ2File_write__doc__, 806 "write(data) -> None\n\ 807 \n\ 808 Write the 'data' string to file. Note that due to buffering, close() may\n\ 809 be needed before the file on disk reflects the data written.\n\ 810 "); 811 812 /* This is a hacked version of Python's fileobject.c:file_write(). */ 813 static PyObject * 814 BZ2File_write(BZ2FileObject *self, PyObject *args) 815 { 816 PyObject *ret = NULL; 817 Py_buffer pbuf; 818 char *buf; 819 int len; 820 int bzerror; 821 822 if (!PyArg_ParseTuple(args, "s*:write", &pbuf)) 823 return NULL; 824 buf = pbuf.buf; 825 len = pbuf.len; 826 827 ACQUIRE_LOCK(self); 828 switch (self->mode) { 829 case MODE_WRITE: 830 break; 831 832 case MODE_CLOSED: 833 PyErr_SetString(PyExc_ValueError, 834 "I/O operation on closed file"); 835 goto cleanup; 836 837 default: 838 PyErr_SetString(PyExc_IOError, 839 "file is not ready for writing"); 840 goto cleanup; 841 } 842 843 self->f_softspace = 0; 844 845 Py_BEGIN_ALLOW_THREADS 846 BZ2_bzWrite (&bzerror, self->fp, buf, len); 847 self->pos += len; 848 Py_END_ALLOW_THREADS 849 850 if (bzerror != BZ_OK) { 851 Util_CatchBZ2Error(bzerror); 852 goto cleanup; 853 } 854 855 Py_INCREF(Py_None); 856 ret = Py_None; 857 858 cleanup: 859 PyBuffer_Release(&pbuf); 860 RELEASE_LOCK(self); 861 return ret; 862 } 863 864 PyDoc_STRVAR(BZ2File_writelines__doc__, 865 "writelines(sequence_of_strings) -> None\n\ 866 \n\ 867 Write the sequence of strings to the file. Note that newlines are not\n\ 868 added. The sequence can be any iterable object producing strings. This is\n\ 869 equivalent to calling write() for each string.\n\ 870 "); 871 872 /* This is a hacked version of Python's fileobject.c:file_writelines(). */ 873 static PyObject * 874 BZ2File_writelines(BZ2FileObject *self, PyObject *seq) 875 { 876 #define CHUNKSIZE 1000 877 PyObject *list = NULL; 878 PyObject *iter = NULL; 879 PyObject *ret = NULL; 880 PyObject *line; 881 int i, j, index, len, islist; 882 int bzerror; 883 884 ACQUIRE_LOCK(self); 885 switch (self->mode) { 886 case MODE_WRITE: 887 break; 888 889 case MODE_CLOSED: 890 PyErr_SetString(PyExc_ValueError, 891 "I/O operation on closed file"); 892 goto error; 893 894 default: 895 PyErr_SetString(PyExc_IOError, 896 "file is not ready for writing"); 897 goto error; 898 } 899 900 islist = PyList_Check(seq); 901 if (!islist) { 902 iter = PyObject_GetIter(seq); 903 if (iter == NULL) { 904 PyErr_SetString(PyExc_TypeError, 905 "writelines() requires an iterable argument"); 906 goto error; 907 } 908 list = PyList_New(CHUNKSIZE); 909 if (list == NULL) 910 goto error; 911 } 912 913 /* Strategy: slurp CHUNKSIZE lines into a private list, 914 checking that they are all strings, then write that list 915 without holding the interpreter lock, then come back for more. */ 916 for (index = 0; ; index += CHUNKSIZE) { 917 if (islist) { 918 Py_XDECREF(list); 919 list = PyList_GetSlice(seq, index, index+CHUNKSIZE); 920 if (list == NULL) 921 goto error; 922 j = PyList_GET_SIZE(list); 923 } 924 else { 925 for (j = 0; j < CHUNKSIZE; j++) { 926 line = PyIter_Next(iter); 927 if (line == NULL) { 928 if (PyErr_Occurred()) 929 goto error; 930 break; 931 } 932 PyList_SetItem(list, j, line); 933 } 934 } 935 if (j == 0) 936 break; 937 938 /* Check that all entries are indeed strings. If not, 939 apply the same rules as for file.write() and 940 convert the rets to strings. This is slow, but 941 seems to be the only way since all conversion APIs 942 could potentially execute Python code. */ 943 for (i = 0; i < j; i++) { 944 PyObject *v = PyList_GET_ITEM(list, i); 945 if (!PyString_Check(v)) { 946 const char *buffer; 947 Py_ssize_t len; 948 if (PyObject_AsCharBuffer(v, &buffer, &len)) { 949 PyErr_SetString(PyExc_TypeError, 950 "writelines() " 951 "argument must be " 952 "a sequence of " 953 "strings"); 954 goto error; 955 } 956 line = PyString_FromStringAndSize(buffer, 957 len); 958 if (line == NULL) 959 goto error; 960 Py_DECREF(v); 961 PyList_SET_ITEM(list, i, line); 962 } 963 } 964 965 self->f_softspace = 0; 966 967 /* Since we are releasing the global lock, the 968 following code may *not* execute Python code. */ 969 Py_BEGIN_ALLOW_THREADS 970 for (i = 0; i < j; i++) { 971 line = PyList_GET_ITEM(list, i); 972 len = PyString_GET_SIZE(line); 973 BZ2_bzWrite (&bzerror, self->fp, 974 PyString_AS_STRING(line), len); 975 if (bzerror != BZ_OK) { 976 Py_BLOCK_THREADS 977 Util_CatchBZ2Error(bzerror); 978 goto error; 979 } 980 } 981 Py_END_ALLOW_THREADS 982 983 if (j < CHUNKSIZE) 984 break; 985 } 986 987 Py_INCREF(Py_None); 988 ret = Py_None; 989 990 error: 991 RELEASE_LOCK(self); 992 Py_XDECREF(list); 993 Py_XDECREF(iter); 994 return ret; 995 #undef CHUNKSIZE 996 } 997 998 PyDoc_STRVAR(BZ2File_seek__doc__, 999 "seek(offset [, whence]) -> None\n\ 1000 \n\ 1001 Move to new file position. Argument offset is a byte count. Optional\n\ 1002 argument whence defaults to 0 (offset from start of file, offset\n\ 1003 should be >= 0); other values are 1 (move relative to current position,\n\ 1004 positive or negative), and 2 (move relative to end of file, usually\n\ 1005 negative, although many platforms allow seeking beyond the end of a file).\n\ 1006 \n\ 1007 Note that seeking of bz2 files is emulated, and depending on the parameters\n\ 1008 the operation may be extremely slow.\n\ 1009 "); 1010 1011 static PyObject * 1012 BZ2File_seek(BZ2FileObject *self, PyObject *args) 1013 { 1014 int where = 0; 1015 PyObject *offobj; 1016 Py_off_t offset; 1017 char small_buffer[SMALLCHUNK]; 1018 char *buffer = small_buffer; 1019 size_t buffersize = SMALLCHUNK; 1020 Py_off_t bytesread = 0; 1021 size_t readsize; 1022 int chunksize; 1023 int bzerror; 1024 PyObject *ret = NULL; 1025 1026 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where)) 1027 return NULL; 1028 #if !defined(HAVE_LARGEFILE_SUPPORT) 1029 offset = PyInt_AsLong(offobj); 1030 #else 1031 offset = PyLong_Check(offobj) ? 1032 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj); 1033 #endif 1034 if (PyErr_Occurred()) 1035 return NULL; 1036 1037 ACQUIRE_LOCK(self); 1038 Util_DropReadAhead(self); 1039 switch (self->mode) { 1040 case MODE_READ: 1041 case MODE_READ_EOF: 1042 break; 1043 1044 case MODE_CLOSED: 1045 PyErr_SetString(PyExc_ValueError, 1046 "I/O operation on closed file"); 1047 goto cleanup; 1048 1049 default: 1050 PyErr_SetString(PyExc_IOError, 1051 "seek works only while reading"); 1052 goto cleanup; 1053 } 1054 1055 if (where == 2) { 1056 if (self->size == -1) { 1057 assert(self->mode != MODE_READ_EOF); 1058 for (;;) { 1059 Py_BEGIN_ALLOW_THREADS 1060 chunksize = Util_UnivNewlineRead( 1061 &bzerror, self->fp, 1062 buffer, buffersize, 1063 self); 1064 self->pos += chunksize; 1065 Py_END_ALLOW_THREADS 1066 1067 bytesread += chunksize; 1068 if (bzerror == BZ_STREAM_END) { 1069 break; 1070 } else if (bzerror != BZ_OK) { 1071 Util_CatchBZ2Error(bzerror); 1072 goto cleanup; 1073 } 1074 } 1075 self->mode = MODE_READ_EOF; 1076 self->size = self->pos; 1077 bytesread = 0; 1078 } 1079 offset = self->size + offset; 1080 } else if (where == 1) { 1081 offset = self->pos + offset; 1082 } 1083 1084 /* Before getting here, offset must be the absolute position the file 1085 * pointer should be set to. */ 1086 1087 if (offset >= self->pos) { 1088 /* we can move forward */ 1089 offset -= self->pos; 1090 } else { 1091 /* we cannot move back, so rewind the stream */ 1092 BZ2_bzReadClose(&bzerror, self->fp); 1093 if (self->fp) { 1094 PyFile_DecUseCount((PyFileObject *)self->file); 1095 self->fp = NULL; 1096 } 1097 if (bzerror != BZ_OK) { 1098 Util_CatchBZ2Error(bzerror); 1099 goto cleanup; 1100 } 1101 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0); 1102 if (!ret) 1103 goto cleanup; 1104 Py_DECREF(ret); 1105 ret = NULL; 1106 self->pos = 0; 1107 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file), 1108 0, 0, NULL, 0); 1109 if (self->fp) 1110 PyFile_IncUseCount((PyFileObject *)self->file); 1111 if (bzerror != BZ_OK) { 1112 Util_CatchBZ2Error(bzerror); 1113 goto cleanup; 1114 } 1115 self->mode = MODE_READ; 1116 } 1117 1118 if (offset <= 0 || self->mode == MODE_READ_EOF) 1119 goto exit; 1120 1121 /* Before getting here, offset must be set to the number of bytes 1122 * to walk forward. */ 1123 for (;;) { 1124 if (offset-bytesread > buffersize) 1125 readsize = buffersize; 1126 else 1127 /* offset might be wider that readsize, but the result 1128 * of the subtraction is bound by buffersize (see the 1129 * condition above). buffersize is 8192. */ 1130 readsize = (size_t)(offset-bytesread); 1131 Py_BEGIN_ALLOW_THREADS 1132 chunksize = Util_UnivNewlineRead(&bzerror, self->fp, 1133 buffer, readsize, self); 1134 self->pos += chunksize; 1135 Py_END_ALLOW_THREADS 1136 bytesread += chunksize; 1137 if (bzerror == BZ_STREAM_END) { 1138 self->size = self->pos; 1139 self->mode = MODE_READ_EOF; 1140 break; 1141 } else if (bzerror != BZ_OK) { 1142 Util_CatchBZ2Error(bzerror); 1143 goto cleanup; 1144 } 1145 if (bytesread == offset) 1146 break; 1147 } 1148 1149 exit: 1150 Py_INCREF(Py_None); 1151 ret = Py_None; 1152 1153 cleanup: 1154 RELEASE_LOCK(self); 1155 return ret; 1156 } 1157 1158 PyDoc_STRVAR(BZ2File_tell__doc__, 1159 "tell() -> int\n\ 1160 \n\ 1161 Return the current file position, an integer (may be a long integer).\n\ 1162 "); 1163 1164 static PyObject * 1165 BZ2File_tell(BZ2FileObject *self, PyObject *args) 1166 { 1167 PyObject *ret = NULL; 1168 1169 if (self->mode == MODE_CLOSED) { 1170 PyErr_SetString(PyExc_ValueError, 1171 "I/O operation on closed file"); 1172 goto cleanup; 1173 } 1174 1175 #if !defined(HAVE_LARGEFILE_SUPPORT) 1176 ret = PyInt_FromLong(self->pos); 1177 #else 1178 ret = PyLong_FromLongLong(self->pos); 1179 #endif 1180 1181 cleanup: 1182 return ret; 1183 } 1184 1185 PyDoc_STRVAR(BZ2File_close__doc__, 1186 "close() -> None or (perhaps) an integer\n\ 1187 \n\ 1188 Close the file. Sets data attribute .closed to true. A closed file\n\ 1189 cannot be used for further I/O operations. close() may be called more\n\ 1190 than once without error.\n\ 1191 "); 1192 1193 static PyObject * 1194 BZ2File_close(BZ2FileObject *self) 1195 { 1196 PyObject *ret = NULL; 1197 int bzerror = BZ_OK; 1198 1199 ACQUIRE_LOCK(self); 1200 switch (self->mode) { 1201 case MODE_READ: 1202 case MODE_READ_EOF: 1203 BZ2_bzReadClose(&bzerror, self->fp); 1204 break; 1205 case MODE_WRITE: 1206 BZ2_bzWriteClose(&bzerror, self->fp, 1207 0, NULL, NULL); 1208 break; 1209 } 1210 if (self->fp) { 1211 PyFile_DecUseCount((PyFileObject *)self->file); 1212 self->fp = NULL; 1213 } 1214 self->mode = MODE_CLOSED; 1215 ret = PyObject_CallMethod(self->file, "close", NULL); 1216 if (bzerror != BZ_OK) { 1217 Util_CatchBZ2Error(bzerror); 1218 Py_XDECREF(ret); 1219 ret = NULL; 1220 } 1221 1222 RELEASE_LOCK(self); 1223 return ret; 1224 } 1225 1226 PyDoc_STRVAR(BZ2File_enter_doc, 1227 "__enter__() -> self."); 1228 1229 static PyObject * 1230 BZ2File_enter(BZ2FileObject *self) 1231 { 1232 if (self->mode == MODE_CLOSED) { 1233 PyErr_SetString(PyExc_ValueError, 1234 "I/O operation on closed file"); 1235 return NULL; 1236 } 1237 Py_INCREF(self); 1238 return (PyObject *) self; 1239 } 1240 1241 PyDoc_STRVAR(BZ2File_exit_doc, 1242 "__exit__(*excinfo) -> None. Closes the file."); 1243 1244 static PyObject * 1245 BZ2File_exit(BZ2FileObject *self, PyObject *args) 1246 { 1247 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL); 1248 if (!ret) 1249 /* If error occurred, pass through */ 1250 return NULL; 1251 Py_DECREF(ret); 1252 Py_RETURN_NONE; 1253 } 1254 1255 1256 static PyObject *BZ2File_getiter(BZ2FileObject *self); 1257 1258 static PyMethodDef BZ2File_methods[] = { 1259 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__}, 1260 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__}, 1261 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__}, 1262 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__}, 1263 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__}, 1264 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__}, 1265 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__}, 1266 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__}, 1267 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__}, 1268 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc}, 1269 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc}, 1270 {NULL, NULL} /* sentinel */ 1271 }; 1272 1273 1274 /* ===================================================================== */ 1275 /* Getters and setters of BZ2File. */ 1276 1277 /* This is a hacked version of Python's fileobject.c:get_newlines(). */ 1278 static PyObject * 1279 BZ2File_get_newlines(BZ2FileObject *self, void *closure) 1280 { 1281 switch (self->f_newlinetypes) { 1282 case NEWLINE_UNKNOWN: 1283 Py_INCREF(Py_None); 1284 return Py_None; 1285 case NEWLINE_CR: 1286 return PyString_FromString("\r"); 1287 case NEWLINE_LF: 1288 return PyString_FromString("\n"); 1289 case NEWLINE_CR|NEWLINE_LF: 1290 return Py_BuildValue("(ss)", "\r", "\n"); 1291 case NEWLINE_CRLF: 1292 return PyString_FromString("\r\n"); 1293 case NEWLINE_CR|NEWLINE_CRLF: 1294 return Py_BuildValue("(ss)", "\r", "\r\n"); 1295 case NEWLINE_LF|NEWLINE_CRLF: 1296 return Py_BuildValue("(ss)", "\n", "\r\n"); 1297 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF: 1298 return Py_BuildValue("(sss)", "\r", "\n", "\r\n"); 1299 default: 1300 PyErr_Format(PyExc_SystemError, 1301 "Unknown newlines value 0x%x\n", 1302 self->f_newlinetypes); 1303 return NULL; 1304 } 1305 } 1306 1307 static PyObject * 1308 BZ2File_get_closed(BZ2FileObject *self, void *closure) 1309 { 1310 return PyInt_FromLong(self->mode == MODE_CLOSED); 1311 } 1312 1313 static PyObject * 1314 BZ2File_get_mode(BZ2FileObject *self, void *closure) 1315 { 1316 return PyObject_GetAttrString(self->file, "mode"); 1317 } 1318 1319 static PyObject * 1320 BZ2File_get_name(BZ2FileObject *self, void *closure) 1321 { 1322 return PyObject_GetAttrString(self->file, "name"); 1323 } 1324 1325 static PyGetSetDef BZ2File_getset[] = { 1326 {"closed", (getter)BZ2File_get_closed, NULL, 1327 "True if the file is closed"}, 1328 {"newlines", (getter)BZ2File_get_newlines, NULL, 1329 "end-of-line convention used in this file"}, 1330 {"mode", (getter)BZ2File_get_mode, NULL, 1331 "file mode ('r', 'w', or 'U')"}, 1332 {"name", (getter)BZ2File_get_name, NULL, 1333 "file name"}, 1334 {NULL} /* Sentinel */ 1335 }; 1336 1337 1338 /* ===================================================================== */ 1339 /* Members of BZ2File_Type. */ 1340 1341 #undef OFF 1342 #define OFF(x) offsetof(BZ2FileObject, x) 1343 1344 static PyMemberDef BZ2File_members[] = { 1345 {"softspace", T_INT, OFF(f_softspace), 0, 1346 "flag indicating that a space needs to be printed; used by print"}, 1347 {NULL} /* Sentinel */ 1348 }; 1349 1350 /* ===================================================================== */ 1351 /* Slot definitions for BZ2File_Type. */ 1352 1353 static int 1354 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) 1355 { 1356 static char *kwlist[] = {"filename", "mode", "buffering", 1357 "compresslevel", 0}; 1358 PyObject *name; 1359 char *mode = "r"; 1360 int buffering = -1; 1361 int compresslevel = 9; 1362 int bzerror; 1363 int mode_char = 0; 1364 1365 self->size = -1; 1366 1367 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File", 1368 kwlist, &name, &mode, &buffering, 1369 &compresslevel)) 1370 return -1; 1371 1372 if (compresslevel < 1 || compresslevel > 9) { 1373 PyErr_SetString(PyExc_ValueError, 1374 "compresslevel must be between 1 and 9"); 1375 return -1; 1376 } 1377 1378 for (;;) { 1379 int error = 0; 1380 switch (*mode) { 1381 case 'r': 1382 case 'w': 1383 if (mode_char) 1384 error = 1; 1385 mode_char = *mode; 1386 break; 1387 1388 case 'b': 1389 break; 1390 1391 case 'U': 1392 #ifdef __VMS 1393 self->f_univ_newline = 0; 1394 #else 1395 self->f_univ_newline = 1; 1396 #endif 1397 break; 1398 1399 default: 1400 error = 1; 1401 break; 1402 } 1403 if (error) { 1404 PyErr_Format(PyExc_ValueError, 1405 "invalid mode char %c", *mode); 1406 return -1; 1407 } 1408 mode++; 1409 if (*mode == '\0') 1410 break; 1411 } 1412 1413 if (mode_char == 0) { 1414 mode_char = 'r'; 1415 } 1416 1417 mode = (mode_char == 'r') ? "rb" : "wb"; 1418 1419 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)", 1420 name, mode, buffering); 1421 if (self->file == NULL) 1422 return -1; 1423 1424 /* From now on, we have stuff to dealloc, so jump to error label 1425 * instead of returning */ 1426 1427 #ifdef WITH_THREAD 1428 self->lock = PyThread_allocate_lock(); 1429 if (!self->lock) { 1430 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); 1431 goto error; 1432 } 1433 #endif 1434 1435 if (mode_char == 'r') 1436 self->fp = BZ2_bzReadOpen(&bzerror, 1437 PyFile_AsFile(self->file), 1438 0, 0, NULL, 0); 1439 else 1440 self->fp = BZ2_bzWriteOpen(&bzerror, 1441 PyFile_AsFile(self->file), 1442 compresslevel, 0, 0); 1443 1444 if (bzerror != BZ_OK) { 1445 Util_CatchBZ2Error(bzerror); 1446 goto error; 1447 } 1448 PyFile_IncUseCount((PyFileObject *)self->file); 1449 1450 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE; 1451 1452 return 0; 1453 1454 error: 1455 Py_CLEAR(self->file); 1456 #ifdef WITH_THREAD 1457 if (self->lock) { 1458 PyThread_free_lock(self->lock); 1459 self->lock = NULL; 1460 } 1461 #endif 1462 return -1; 1463 } 1464 1465 static void 1466 BZ2File_dealloc(BZ2FileObject *self) 1467 { 1468 int bzerror; 1469 #ifdef WITH_THREAD 1470 if (self->lock) 1471 PyThread_free_lock(self->lock); 1472 #endif 1473 switch (self->mode) { 1474 case MODE_READ: 1475 case MODE_READ_EOF: 1476 BZ2_bzReadClose(&bzerror, self->fp); 1477 break; 1478 case MODE_WRITE: 1479 BZ2_bzWriteClose(&bzerror, self->fp, 1480 0, NULL, NULL); 1481 break; 1482 } 1483 if (self->fp) { 1484 PyFile_DecUseCount((PyFileObject *)self->file); 1485 self->fp = NULL; 1486 } 1487 Util_DropReadAhead(self); 1488 Py_XDECREF(self->file); 1489 Py_TYPE(self)->tp_free((PyObject *)self); 1490 } 1491 1492 /* This is a hacked version of Python's fileobject.c:file_getiter(). */ 1493 static PyObject * 1494 BZ2File_getiter(BZ2FileObject *self) 1495 { 1496 if (self->mode == MODE_CLOSED) { 1497 PyErr_SetString(PyExc_ValueError, 1498 "I/O operation on closed file"); 1499 return NULL; 1500 } 1501 Py_INCREF((PyObject*)self); 1502 return (PyObject *)self; 1503 } 1504 1505 /* This is a hacked version of Python's fileobject.c:file_iternext(). */ 1506 #define READAHEAD_BUFSIZE 8192 1507 static PyObject * 1508 BZ2File_iternext(BZ2FileObject *self) 1509 { 1510 PyStringObject* ret; 1511 ACQUIRE_LOCK(self); 1512 if (self->mode == MODE_CLOSED) { 1513 RELEASE_LOCK(self); 1514 PyErr_SetString(PyExc_ValueError, 1515 "I/O operation on closed file"); 1516 return NULL; 1517 } 1518 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE); 1519 RELEASE_LOCK(self); 1520 if (ret == NULL || PyString_GET_SIZE(ret) == 0) { 1521 Py_XDECREF(ret); 1522 return NULL; 1523 } 1524 return (PyObject *)ret; 1525 } 1526 1527 /* ===================================================================== */ 1528 /* BZ2File_Type definition. */ 1529 1530 PyDoc_VAR(BZ2File__doc__) = 1531 PyDoc_STR( 1532 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\ 1533 \n\ 1534 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\ 1535 writing. When opened for writing, the file will be created if it doesn't\n\ 1536 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\ 1537 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\ 1538 is given, must be a number between 1 and 9.\n\ 1539 ") 1540 PyDoc_STR( 1541 "\n\ 1542 Add a 'U' to mode to open the file for input with universal newline\n\ 1543 support. Any line ending in the input file will be seen as a '\\n' in\n\ 1544 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\ 1545 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\ 1546 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\ 1547 newlines are available only when reading.\n\ 1548 ") 1549 ; 1550 1551 static PyTypeObject BZ2File_Type = { 1552 PyVarObject_HEAD_INIT(NULL, 0) 1553 "bz2.BZ2File", /*tp_name*/ 1554 sizeof(BZ2FileObject), /*tp_basicsize*/ 1555 0, /*tp_itemsize*/ 1556 (destructor)BZ2File_dealloc, /*tp_dealloc*/ 1557 0, /*tp_print*/ 1558 0, /*tp_getattr*/ 1559 0, /*tp_setattr*/ 1560 0, /*tp_compare*/ 1561 0, /*tp_repr*/ 1562 0, /*tp_as_number*/ 1563 0, /*tp_as_sequence*/ 1564 0, /*tp_as_mapping*/ 1565 0, /*tp_hash*/ 1566 0, /*tp_call*/ 1567 0, /*tp_str*/ 1568 PyObject_GenericGetAttr,/*tp_getattro*/ 1569 PyObject_GenericSetAttr,/*tp_setattro*/ 1570 0, /*tp_as_buffer*/ 1571 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ 1572 BZ2File__doc__, /*tp_doc*/ 1573 0, /*tp_traverse*/ 1574 0, /*tp_clear*/ 1575 0, /*tp_richcompare*/ 1576 0, /*tp_weaklistoffset*/ 1577 (getiterfunc)BZ2File_getiter, /*tp_iter*/ 1578 (iternextfunc)BZ2File_iternext, /*tp_iternext*/ 1579 BZ2File_methods, /*tp_methods*/ 1580 BZ2File_members, /*tp_members*/ 1581 BZ2File_getset, /*tp_getset*/ 1582 0, /*tp_base*/ 1583 0, /*tp_dict*/ 1584 0, /*tp_descr_get*/ 1585 0, /*tp_descr_set*/ 1586 0, /*tp_dictoffset*/ 1587 (initproc)BZ2File_init, /*tp_init*/ 1588 PyType_GenericAlloc, /*tp_alloc*/ 1589 PyType_GenericNew, /*tp_new*/ 1590 _PyObject_Del, /*tp_free*/ 1591 0, /*tp_is_gc*/ 1592 }; 1593 1594 1595 /* ===================================================================== */ 1596 /* Methods of BZ2Comp. */ 1597 1598 PyDoc_STRVAR(BZ2Comp_compress__doc__, 1599 "compress(data) -> string\n\ 1600 \n\ 1601 Provide more data to the compressor object. It will return chunks of\n\ 1602 compressed data whenever possible. When you've finished providing data\n\ 1603 to compress, call the flush() method to finish the compression process,\n\ 1604 and return what is left in the internal buffers.\n\ 1605 "); 1606 1607 static PyObject * 1608 BZ2Comp_compress(BZ2CompObject *self, PyObject *args) 1609 { 1610 Py_buffer pdata; 1611 char *data; 1612 int datasize; 1613 int bufsize = SMALLCHUNK; 1614 PY_LONG_LONG totalout; 1615 PyObject *ret = NULL; 1616 bz_stream *bzs = &self->bzs; 1617 int bzerror; 1618 1619 if (!PyArg_ParseTuple(args, "s*:compress", &pdata)) 1620 return NULL; 1621 data = pdata.buf; 1622 datasize = pdata.len; 1623 1624 if (datasize == 0) { 1625 PyBuffer_Release(&pdata); 1626 return PyString_FromString(""); 1627 } 1628 1629 ACQUIRE_LOCK(self); 1630 if (!self->running) { 1631 PyErr_SetString(PyExc_ValueError, 1632 "this object was already flushed"); 1633 goto error; 1634 } 1635 1636 ret = PyString_FromStringAndSize(NULL, bufsize); 1637 if (!ret) 1638 goto error; 1639 1640 bzs->next_in = data; 1641 bzs->avail_in = datasize; 1642 bzs->next_out = BUF(ret); 1643 bzs->avail_out = bufsize; 1644 1645 totalout = BZS_TOTAL_OUT(bzs); 1646 1647 for (;;) { 1648 Py_BEGIN_ALLOW_THREADS 1649 bzerror = BZ2_bzCompress(bzs, BZ_RUN); 1650 Py_END_ALLOW_THREADS 1651 if (bzerror != BZ_RUN_OK) { 1652 Util_CatchBZ2Error(bzerror); 1653 goto error; 1654 } 1655 if (bzs->avail_in == 0) 1656 break; /* no more input data */ 1657 if (bzs->avail_out == 0) { 1658 bufsize = Util_NewBufferSize(bufsize); 1659 if (_PyString_Resize(&ret, bufsize) < 0) { 1660 BZ2_bzCompressEnd(bzs); 1661 goto error; 1662 } 1663 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs) 1664 - totalout); 1665 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); 1666 } 1667 } 1668 1669 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)); 1670 1671 RELEASE_LOCK(self); 1672 PyBuffer_Release(&pdata); 1673 return ret; 1674 1675 error: 1676 RELEASE_LOCK(self); 1677 PyBuffer_Release(&pdata); 1678 Py_XDECREF(ret); 1679 return NULL; 1680 } 1681 1682 PyDoc_STRVAR(BZ2Comp_flush__doc__, 1683 "flush() -> string\n\ 1684 \n\ 1685 Finish the compression process and return what is left in internal buffers.\n\ 1686 You must not use the compressor object after calling this method.\n\ 1687 "); 1688 1689 static PyObject * 1690 BZ2Comp_flush(BZ2CompObject *self) 1691 { 1692 int bufsize = SMALLCHUNK; 1693 PyObject *ret = NULL; 1694 bz_stream *bzs = &self->bzs; 1695 PY_LONG_LONG totalout; 1696 int bzerror; 1697 1698 ACQUIRE_LOCK(self); 1699 if (!self->running) { 1700 PyErr_SetString(PyExc_ValueError, "object was already " 1701 "flushed"); 1702 goto error; 1703 } 1704 self->running = 0; 1705 1706 ret = PyString_FromStringAndSize(NULL, bufsize); 1707 if (!ret) 1708 goto error; 1709 1710 bzs->next_out = BUF(ret); 1711 bzs->avail_out = bufsize; 1712 1713 totalout = BZS_TOTAL_OUT(bzs); 1714 1715 for (;;) { 1716 Py_BEGIN_ALLOW_THREADS 1717 bzerror = BZ2_bzCompress(bzs, BZ_FINISH); 1718 Py_END_ALLOW_THREADS 1719 if (bzerror == BZ_STREAM_END) { 1720 break; 1721 } else if (bzerror != BZ_FINISH_OK) { 1722 Util_CatchBZ2Error(bzerror); 1723 goto error; 1724 } 1725 if (bzs->avail_out == 0) { 1726 bufsize = Util_NewBufferSize(bufsize); 1727 if (_PyString_Resize(&ret, bufsize) < 0) 1728 goto error; 1729 bzs->next_out = BUF(ret); 1730 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs) 1731 - totalout); 1732 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); 1733 } 1734 } 1735 1736 if (bzs->avail_out != 0) 1737 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)); 1738 1739 RELEASE_LOCK(self); 1740 return ret; 1741 1742 error: 1743 RELEASE_LOCK(self); 1744 Py_XDECREF(ret); 1745 return NULL; 1746 } 1747 1748 static PyMethodDef BZ2Comp_methods[] = { 1749 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS, 1750 BZ2Comp_compress__doc__}, 1751 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS, 1752 BZ2Comp_flush__doc__}, 1753 {NULL, NULL} /* sentinel */ 1754 }; 1755 1756 1757 /* ===================================================================== */ 1758 /* Slot definitions for BZ2Comp_Type. */ 1759 1760 static int 1761 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs) 1762 { 1763 int compresslevel = 9; 1764 int bzerror; 1765 static char *kwlist[] = {"compresslevel", 0}; 1766 1767 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor", 1768 kwlist, &compresslevel)) 1769 return -1; 1770 1771 if (compresslevel < 1 || compresslevel > 9) { 1772 PyErr_SetString(PyExc_ValueError, 1773 "compresslevel must be between 1 and 9"); 1774 goto error; 1775 } 1776 1777 #ifdef WITH_THREAD 1778 self->lock = PyThread_allocate_lock(); 1779 if (!self->lock) { 1780 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); 1781 goto error; 1782 } 1783 #endif 1784 1785 memset(&self->bzs, 0, sizeof(bz_stream)); 1786 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0); 1787 if (bzerror != BZ_OK) { 1788 Util_CatchBZ2Error(bzerror); 1789 goto error; 1790 } 1791 1792 self->running = 1; 1793 1794 return 0; 1795 error: 1796 #ifdef WITH_THREAD 1797 if (self->lock) { 1798 PyThread_free_lock(self->lock); 1799 self->lock = NULL; 1800 } 1801 #endif 1802 return -1; 1803 } 1804 1805 static void 1806 BZ2Comp_dealloc(BZ2CompObject *self) 1807 { 1808 #ifdef WITH_THREAD 1809 if (self->lock) 1810 PyThread_free_lock(self->lock); 1811 #endif 1812 BZ2_bzCompressEnd(&self->bzs); 1813 Py_TYPE(self)->tp_free((PyObject *)self); 1814 } 1815 1816 1817 /* ===================================================================== */ 1818 /* BZ2Comp_Type definition. */ 1819 1820 PyDoc_STRVAR(BZ2Comp__doc__, 1821 "BZ2Compressor([compresslevel=9]) -> compressor object\n\ 1822 \n\ 1823 Create a new compressor object. This object may be used to compress\n\ 1824 data sequentially. If you want to compress data in one shot, use the\n\ 1825 compress() function instead. The compresslevel parameter, if given,\n\ 1826 must be a number between 1 and 9.\n\ 1827 "); 1828 1829 static PyTypeObject BZ2Comp_Type = { 1830 PyVarObject_HEAD_INIT(NULL, 0) 1831 "bz2.BZ2Compressor", /*tp_name*/ 1832 sizeof(BZ2CompObject), /*tp_basicsize*/ 1833 0, /*tp_itemsize*/ 1834 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/ 1835 0, /*tp_print*/ 1836 0, /*tp_getattr*/ 1837 0, /*tp_setattr*/ 1838 0, /*tp_compare*/ 1839 0, /*tp_repr*/ 1840 0, /*tp_as_number*/ 1841 0, /*tp_as_sequence*/ 1842 0, /*tp_as_mapping*/ 1843 0, /*tp_hash*/ 1844 0, /*tp_call*/ 1845 0, /*tp_str*/ 1846 PyObject_GenericGetAttr,/*tp_getattro*/ 1847 PyObject_GenericSetAttr,/*tp_setattro*/ 1848 0, /*tp_as_buffer*/ 1849 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ 1850 BZ2Comp__doc__, /*tp_doc*/ 1851 0, /*tp_traverse*/ 1852 0, /*tp_clear*/ 1853 0, /*tp_richcompare*/ 1854 0, /*tp_weaklistoffset*/ 1855 0, /*tp_iter*/ 1856 0, /*tp_iternext*/ 1857 BZ2Comp_methods, /*tp_methods*/ 1858 0, /*tp_members*/ 1859 0, /*tp_getset*/ 1860 0, /*tp_base*/ 1861 0, /*tp_dict*/ 1862 0, /*tp_descr_get*/ 1863 0, /*tp_descr_set*/ 1864 0, /*tp_dictoffset*/ 1865 (initproc)BZ2Comp_init, /*tp_init*/ 1866 PyType_GenericAlloc, /*tp_alloc*/ 1867 PyType_GenericNew, /*tp_new*/ 1868 _PyObject_Del, /*tp_free*/ 1869 0, /*tp_is_gc*/ 1870 }; 1871 1872 1873 /* ===================================================================== */ 1874 /* Members of BZ2Decomp. */ 1875 1876 #undef OFF 1877 #define OFF(x) offsetof(BZ2DecompObject, x) 1878 1879 static PyMemberDef BZ2Decomp_members[] = { 1880 {"unused_data", T_OBJECT, OFF(unused_data), RO}, 1881 {NULL} /* Sentinel */ 1882 }; 1883 1884 1885 /* ===================================================================== */ 1886 /* Methods of BZ2Decomp. */ 1887 1888 PyDoc_STRVAR(BZ2Decomp_decompress__doc__, 1889 "decompress(data) -> string\n\ 1890 \n\ 1891 Provide more data to the decompressor object. It will return chunks\n\ 1892 of decompressed data whenever possible. If you try to decompress data\n\ 1893 after the end of stream is found, EOFError will be raised. If any data\n\ 1894 was found after the end of stream, it'll be ignored and saved in\n\ 1895 unused_data attribute.\n\ 1896 "); 1897 1898 static PyObject * 1899 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args) 1900 { 1901 Py_buffer pdata; 1902 char *data; 1903 int datasize; 1904 int bufsize = SMALLCHUNK; 1905 PY_LONG_LONG totalout; 1906 PyObject *ret = NULL; 1907 bz_stream *bzs = &self->bzs; 1908 int bzerror; 1909 1910 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata)) 1911 return NULL; 1912 data = pdata.buf; 1913 datasize = pdata.len; 1914 1915 ACQUIRE_LOCK(self); 1916 if (!self->running) { 1917 PyErr_SetString(PyExc_EOFError, "end of stream was " 1918 "already found"); 1919 goto error; 1920 } 1921 1922 ret = PyString_FromStringAndSize(NULL, bufsize); 1923 if (!ret) 1924 goto error; 1925 1926 bzs->next_in = data; 1927 bzs->avail_in = datasize; 1928 bzs->next_out = BUF(ret); 1929 bzs->avail_out = bufsize; 1930 1931 totalout = BZS_TOTAL_OUT(bzs); 1932 1933 for (;;) { 1934 Py_BEGIN_ALLOW_THREADS 1935 bzerror = BZ2_bzDecompress(bzs); 1936 Py_END_ALLOW_THREADS 1937 if (bzerror == BZ_STREAM_END) { 1938 if (bzs->avail_in != 0) { 1939 Py_DECREF(self->unused_data); 1940 self->unused_data = 1941 PyString_FromStringAndSize(bzs->next_in, 1942 bzs->avail_in); 1943 } 1944 self->running = 0; 1945 break; 1946 } 1947 if (bzerror != BZ_OK) { 1948 Util_CatchBZ2Error(bzerror); 1949 goto error; 1950 } 1951 if (bzs->avail_in == 0) 1952 break; /* no more input data */ 1953 if (bzs->avail_out == 0) { 1954 bufsize = Util_NewBufferSize(bufsize); 1955 if (_PyString_Resize(&ret, bufsize) < 0) { 1956 BZ2_bzDecompressEnd(bzs); 1957 goto error; 1958 } 1959 bzs->next_out = BUF(ret); 1960 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs) 1961 - totalout); 1962 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); 1963 } 1964 } 1965 1966 if (bzs->avail_out != 0) 1967 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)); 1968 1969 RELEASE_LOCK(self); 1970 PyBuffer_Release(&pdata); 1971 return ret; 1972 1973 error: 1974 RELEASE_LOCK(self); 1975 PyBuffer_Release(&pdata); 1976 Py_XDECREF(ret); 1977 return NULL; 1978 } 1979 1980 static PyMethodDef BZ2Decomp_methods[] = { 1981 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__}, 1982 {NULL, NULL} /* sentinel */ 1983 }; 1984 1985 1986 /* ===================================================================== */ 1987 /* Slot definitions for BZ2Decomp_Type. */ 1988 1989 static int 1990 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs) 1991 { 1992 int bzerror; 1993 1994 if (!PyArg_ParseTuple(args, ":BZ2Decompressor")) 1995 return -1; 1996 1997 #ifdef WITH_THREAD 1998 self->lock = PyThread_allocate_lock(); 1999 if (!self->lock) { 2000 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); 2001 goto error; 2002 } 2003 #endif 2004 2005 self->unused_data = PyString_FromString(""); 2006 if (!self->unused_data) 2007 goto error; 2008 2009 memset(&self->bzs, 0, sizeof(bz_stream)); 2010 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); 2011 if (bzerror != BZ_OK) { 2012 Util_CatchBZ2Error(bzerror); 2013 goto error; 2014 } 2015 2016 self->running = 1; 2017 2018 return 0; 2019 2020 error: 2021 #ifdef WITH_THREAD 2022 if (self->lock) { 2023 PyThread_free_lock(self->lock); 2024 self->lock = NULL; 2025 } 2026 #endif 2027 Py_CLEAR(self->unused_data); 2028 return -1; 2029 } 2030 2031 static void 2032 BZ2Decomp_dealloc(BZ2DecompObject *self) 2033 { 2034 #ifdef WITH_THREAD 2035 if (self->lock) 2036 PyThread_free_lock(self->lock); 2037 #endif 2038 Py_XDECREF(self->unused_data); 2039 BZ2_bzDecompressEnd(&self->bzs); 2040 Py_TYPE(self)->tp_free((PyObject *)self); 2041 } 2042 2043 2044 /* ===================================================================== */ 2045 /* BZ2Decomp_Type definition. */ 2046 2047 PyDoc_STRVAR(BZ2Decomp__doc__, 2048 "BZ2Decompressor() -> decompressor object\n\ 2049 \n\ 2050 Create a new decompressor object. This object may be used to decompress\n\ 2051 data sequentially. If you want to decompress data in one shot, use the\n\ 2052 decompress() function instead.\n\ 2053 "); 2054 2055 static PyTypeObject BZ2Decomp_Type = { 2056 PyVarObject_HEAD_INIT(NULL, 0) 2057 "bz2.BZ2Decompressor", /*tp_name*/ 2058 sizeof(BZ2DecompObject), /*tp_basicsize*/ 2059 0, /*tp_itemsize*/ 2060 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/ 2061 0, /*tp_print*/ 2062 0, /*tp_getattr*/ 2063 0, /*tp_setattr*/ 2064 0, /*tp_compare*/ 2065 0, /*tp_repr*/ 2066 0, /*tp_as_number*/ 2067 0, /*tp_as_sequence*/ 2068 0, /*tp_as_mapping*/ 2069 0, /*tp_hash*/ 2070 0, /*tp_call*/ 2071 0, /*tp_str*/ 2072 PyObject_GenericGetAttr,/*tp_getattro*/ 2073 PyObject_GenericSetAttr,/*tp_setattro*/ 2074 0, /*tp_as_buffer*/ 2075 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ 2076 BZ2Decomp__doc__, /*tp_doc*/ 2077 0, /*tp_traverse*/ 2078 0, /*tp_clear*/ 2079 0, /*tp_richcompare*/ 2080 0, /*tp_weaklistoffset*/ 2081 0, /*tp_iter*/ 2082 0, /*tp_iternext*/ 2083 BZ2Decomp_methods, /*tp_methods*/ 2084 BZ2Decomp_members, /*tp_members*/ 2085 0, /*tp_getset*/ 2086 0, /*tp_base*/ 2087 0, /*tp_dict*/ 2088 0, /*tp_descr_get*/ 2089 0, /*tp_descr_set*/ 2090 0, /*tp_dictoffset*/ 2091 (initproc)BZ2Decomp_init, /*tp_init*/ 2092 PyType_GenericAlloc, /*tp_alloc*/ 2093 PyType_GenericNew, /*tp_new*/ 2094 _PyObject_Del, /*tp_free*/ 2095 0, /*tp_is_gc*/ 2096 }; 2097 2098 2099 /* ===================================================================== */ 2100 /* Module functions. */ 2101 2102 PyDoc_STRVAR(bz2_compress__doc__, 2103 "compress(data [, compresslevel=9]) -> string\n\ 2104 \n\ 2105 Compress data in one shot. If you want to compress data sequentially,\n\ 2106 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\ 2107 given, must be a number between 1 and 9.\n\ 2108 "); 2109 2110 static PyObject * 2111 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs) 2112 { 2113 int compresslevel=9; 2114 Py_buffer pdata; 2115 char *data; 2116 int datasize; 2117 int bufsize; 2118 PyObject *ret = NULL; 2119 bz_stream _bzs; 2120 bz_stream *bzs = &_bzs; 2121 int bzerror; 2122 static char *kwlist[] = {"data", "compresslevel", 0}; 2123 2124 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", 2125 kwlist, &pdata, 2126 &compresslevel)) 2127 return NULL; 2128 data = pdata.buf; 2129 datasize = pdata.len; 2130 2131 if (compresslevel < 1 || compresslevel > 9) { 2132 PyErr_SetString(PyExc_ValueError, 2133 "compresslevel must be between 1 and 9"); 2134 PyBuffer_Release(&pdata); 2135 return NULL; 2136 } 2137 2138 /* Conforming to bz2 manual, this is large enough to fit compressed 2139 * data in one shot. We will check it later anyway. */ 2140 bufsize = datasize + (datasize/100+1) + 600; 2141 2142 ret = PyString_FromStringAndSize(NULL, bufsize); 2143 if (!ret) { 2144 PyBuffer_Release(&pdata); 2145 return NULL; 2146 } 2147 2148 memset(bzs, 0, sizeof(bz_stream)); 2149 2150 bzs->next_in = data; 2151 bzs->avail_in = datasize; 2152 bzs->next_out = BUF(ret); 2153 bzs->avail_out = bufsize; 2154 2155 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0); 2156 if (bzerror != BZ_OK) { 2157 Util_CatchBZ2Error(bzerror); 2158 PyBuffer_Release(&pdata); 2159 Py_DECREF(ret); 2160 return NULL; 2161 } 2162 2163 for (;;) { 2164 Py_BEGIN_ALLOW_THREADS 2165 bzerror = BZ2_bzCompress(bzs, BZ_FINISH); 2166 Py_END_ALLOW_THREADS 2167 if (bzerror == BZ_STREAM_END) { 2168 break; 2169 } else if (bzerror != BZ_FINISH_OK) { 2170 BZ2_bzCompressEnd(bzs); 2171 Util_CatchBZ2Error(bzerror); 2172 PyBuffer_Release(&pdata); 2173 Py_DECREF(ret); 2174 return NULL; 2175 } 2176 if (bzs->avail_out == 0) { 2177 bufsize = Util_NewBufferSize(bufsize); 2178 if (_PyString_Resize(&ret, bufsize) < 0) { 2179 BZ2_bzCompressEnd(bzs); 2180 PyBuffer_Release(&pdata); 2181 Py_DECREF(ret); 2182 return NULL; 2183 } 2184 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs); 2185 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); 2186 } 2187 } 2188 2189 if (bzs->avail_out != 0) 2190 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)); 2191 BZ2_bzCompressEnd(bzs); 2192 2193 PyBuffer_Release(&pdata); 2194 return ret; 2195 } 2196 2197 PyDoc_STRVAR(bz2_decompress__doc__, 2198 "decompress(data) -> decompressed data\n\ 2199 \n\ 2200 Decompress data in one shot. If you want to decompress data sequentially,\n\ 2201 use an instance of BZ2Decompressor instead.\n\ 2202 "); 2203 2204 static PyObject * 2205 bz2_decompress(PyObject *self, PyObject *args) 2206 { 2207 Py_buffer pdata; 2208 char *data; 2209 int datasize; 2210 int bufsize = SMALLCHUNK; 2211 PyObject *ret; 2212 bz_stream _bzs; 2213 bz_stream *bzs = &_bzs; 2214 int bzerror; 2215 2216 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata)) 2217 return NULL; 2218 data = pdata.buf; 2219 datasize = pdata.len; 2220 2221 if (datasize == 0) { 2222 PyBuffer_Release(&pdata); 2223 return PyString_FromString(""); 2224 } 2225 2226 ret = PyString_FromStringAndSize(NULL, bufsize); 2227 if (!ret) { 2228 PyBuffer_Release(&pdata); 2229 return NULL; 2230 } 2231 2232 memset(bzs, 0, sizeof(bz_stream)); 2233 2234 bzs->next_in = data; 2235 bzs->avail_in = datasize; 2236 bzs->next_out = BUF(ret); 2237 bzs->avail_out = bufsize; 2238 2239 bzerror = BZ2_bzDecompressInit(bzs, 0, 0); 2240 if (bzerror != BZ_OK) { 2241 Util_CatchBZ2Error(bzerror); 2242 Py_DECREF(ret); 2243 PyBuffer_Release(&pdata); 2244 return NULL; 2245 } 2246 2247 for (;;) { 2248 Py_BEGIN_ALLOW_THREADS 2249 bzerror = BZ2_bzDecompress(bzs); 2250 Py_END_ALLOW_THREADS 2251 if (bzerror == BZ_STREAM_END) { 2252 break; 2253 } else if (bzerror != BZ_OK) { 2254 BZ2_bzDecompressEnd(bzs); 2255 Util_CatchBZ2Error(bzerror); 2256 PyBuffer_Release(&pdata); 2257 Py_DECREF(ret); 2258 return NULL; 2259 } 2260 if (bzs->avail_in == 0) { 2261 BZ2_bzDecompressEnd(bzs); 2262 PyErr_SetString(PyExc_ValueError, 2263 "couldn't find end of stream"); 2264 PyBuffer_Release(&pdata); 2265 Py_DECREF(ret); 2266 return NULL; 2267 } 2268 if (bzs->avail_out == 0) { 2269 bufsize = Util_NewBufferSize(bufsize); 2270 if (_PyString_Resize(&ret, bufsize) < 0) { 2271 BZ2_bzDecompressEnd(bzs); 2272 PyBuffer_Release(&pdata); 2273 Py_DECREF(ret); 2274 return NULL; 2275 } 2276 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs); 2277 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); 2278 } 2279 } 2280 2281 if (bzs->avail_out != 0) 2282 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)); 2283 BZ2_bzDecompressEnd(bzs); 2284 PyBuffer_Release(&pdata); 2285 2286 return ret; 2287 } 2288 2289 static PyMethodDef bz2_methods[] = { 2290 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS, 2291 bz2_compress__doc__}, 2292 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS, 2293 bz2_decompress__doc__}, 2294 {NULL, NULL} /* sentinel */ 2295 }; 2296 2297 /* ===================================================================== */ 2298 /* Initialization function. */ 2299 2300 PyDoc_STRVAR(bz2__doc__, 2301 "The python bz2 module provides a comprehensive interface for\n\ 2302 the bz2 compression library. It implements a complete file\n\ 2303 interface, one shot (de)compression functions, and types for\n\ 2304 sequential (de)compression.\n\ 2305 "); 2306 2307 PyMODINIT_FUNC 2308 initbz2(void) 2309 { 2310 PyObject *m; 2311 2312 if (PyType_Ready(&BZ2File_Type) < 0) 2313 return; 2314 if (PyType_Ready(&BZ2Comp_Type) < 0) 2315 return; 2316 if (PyType_Ready(&BZ2Decomp_Type) < 0) 2317 return; 2318 2319 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__); 2320 if (m == NULL) 2321 return; 2322 2323 PyModule_AddObject(m, "__author__", PyString_FromString(__author__)); 2324 2325 Py_INCREF(&BZ2File_Type); 2326 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type); 2327 2328 Py_INCREF(&BZ2Comp_Type); 2329 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type); 2330 2331 Py_INCREF(&BZ2Decomp_Type); 2332 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type); 2333 }