1 /*
2
3 python-bz2 - python bz2 library interface
4
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8 */
9
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
14
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
18
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23 ";
24
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t;
33 #else
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
35 #endif
36
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39 #define MODE_CLOSED 0
40 #define MODE_READ 1
41 #define MODE_READ_EOF 2
42 #define MODE_WRITE 3
43
44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
45
46
47 #ifdef BZ_CONFIG_ERROR
48
49 #if SIZEOF_LONG >= 8
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
55 #else
56 #define BZS_TOTAL_OUT(bzs) \
57 bzs->total_out_lo32
58 #endif
59
60 #else /* ! BZ_CONFIG_ERROR */
61
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
74
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77 #endif /* ! BZ_CONFIG_ERROR */
78
79
80 #ifdef WITH_THREAD
81 #define ACQUIRE_LOCK(obj) do { \
82 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
87 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88 #else
89 #define ACQUIRE_LOCK(obj)
90 #define RELEASE_LOCK(obj)
91 #endif
92
93 /* Bits in f_newlinetypes */
94 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95 #define NEWLINE_CR 1 /* \r newline seen */
96 #define NEWLINE_LF 2 /* \n newline seen */
97 #define NEWLINE_CRLF 4 /* \r\n newline seen */
98
99 /* ===================================================================== */
100 /* Structure definitions. */
101
102 typedef struct {
103 PyObject_HEAD
104 PyObject *file;
105
106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
109
110 int f_softspace; /* Flag used by 'print' command */
111
112 int f_univ_newline; /* Handle any newline convention */
113 int f_newlinetypes; /* Types of newlines seen */
114 int f_skipnextlf; /* Skip next \n */
115
116 BZFILE *fp;
117 int mode;
118 Py_off_t pos;
119 Py_off_t size;
120 #ifdef WITH_THREAD
121 PyThread_type_lock lock;
122 #endif
123 } BZ2FileObject;
124
125 typedef struct {
126 PyObject_HEAD
127 bz_stream bzs;
128 int running;
129 #ifdef WITH_THREAD
130 PyThread_type_lock lock;
131 #endif
132 } BZ2CompObject;
133
134 typedef struct {
135 PyObject_HEAD
136 bz_stream bzs;
137 int running;
138 PyObject *unused_data;
139 #ifdef WITH_THREAD
140 PyThread_type_lock lock;
141 #endif
142 } BZ2DecompObject;
143
144 /* ===================================================================== */
145 /* Utility functions. */
146
147 /* Refuse regular I/O if there's data in the iteration-buffer.
148 * Mixing them would cause data to arrive out of order, as the read*
149 * methods don't use the iteration buffer. */
150 static int
151 check_iterbuffered(BZ2FileObject *f)
152 {
153 if (f->f_buf != NULL &&
154 (f->f_bufend - f->f_bufptr) > 0 &&
155 f->f_buf[0] != '\0') {
156 PyErr_SetString(PyExc_ValueError,
157 "Mixing iteration and read methods would lose data");
158 return -1;
159 }
160 return 0;
161 }
162
163 static int
164 Util_CatchBZ2Error(int bzerror)
165 {
166 int ret = 0;
167 switch(bzerror) {
168 case BZ_OK:
169 case BZ_STREAM_END:
170 break;
171
172 #ifdef BZ_CONFIG_ERROR
173 case BZ_CONFIG_ERROR:
syntax error
(emitted by cppcheck)
syntax error
(emitted by cppcheck)
174 PyErr_SetString(PyExc_SystemError,
175 "the bz2 library was not compiled "
176 "correctly");
177 ret = 1;
178 break;
179 #endif
180
181 case BZ_PARAM_ERROR:
182 PyErr_SetString(PyExc_ValueError,
183 "the bz2 library has received wrong "
184 "parameters");
185 ret = 1;
186 break;
187
188 case BZ_MEM_ERROR:
189 PyErr_NoMemory();
190 ret = 1;
191 break;
192
193 case BZ_DATA_ERROR:
194 case BZ_DATA_ERROR_MAGIC:
195 PyErr_SetString(PyExc_IOError, "invalid data stream");
196 ret = 1;
197 break;
198
199 case BZ_IO_ERROR:
200 PyErr_SetString(PyExc_IOError, "unknown IO error");
201 ret = 1;
202 break;
203
204 case BZ_UNEXPECTED_EOF:
205 PyErr_SetString(PyExc_EOFError,
206 "compressed file ended before the "
207 "logical end-of-stream was detected");
208 ret = 1;
209 break;
210
211 case BZ_SEQUENCE_ERROR:
212 PyErr_SetString(PyExc_RuntimeError,
213 "wrong sequence of bz2 library "
214 "commands used");
215 ret = 1;
216 break;
217 }
218 return ret;
219 }
220
221 #if BUFSIZ < 8192
222 #define SMALLCHUNK 8192
223 #else
224 #define SMALLCHUNK BUFSIZ
225 #endif
226
227 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
228 static size_t
229 Util_NewBufferSize(size_t currentsize)
230 {
231 /* Expand the buffer by an amount proportional to the current size,
232 giving us amortized linear-time behavior. Use a less-than-double
233 growth factor to avoid excessive allocation. */
234 return currentsize + (currentsize >> 3) + 6;
235 }
236
237 /* This is a hacked version of Python's fileobject.c:get_line(). */
238 static PyObject *
239 Util_GetLine(BZ2FileObject *f, int n)
240 {
241 char c;
242 char *buf, *end;
243 size_t total_v_size; /* total # of slots in buffer */
244 size_t used_v_size; /* # used slots in buffer */
245 size_t increment; /* amount to increment the buffer */
246 PyObject *v;
247 int bzerror;
248 int bytes_read;
249 int newlinetypes = f->f_newlinetypes;
250 int skipnextlf = f->f_skipnextlf;
251 int univ_newline = f->f_univ_newline;
252
253 total_v_size = n > 0 ? n : 100;
254 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
255 if (v == NULL)
256 return NULL;
257
258 buf = BUF(v);
259 end = buf + total_v_size;
260
261 for (;;) {
262 Py_BEGIN_ALLOW_THREADS
263 while (buf != end) {
264 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
265 f->pos++;
266 if (bytes_read == 0) break;
267 if (univ_newline) {
268 if (skipnextlf) {
269 skipnextlf = 0;
270 if (c == '\n') {
271 /* Seeing a \n here with skipnextlf true means we
272 * saw a \r before.
273 */
274 newlinetypes |= NEWLINE_CRLF;
275 if (bzerror != BZ_OK) break;
276 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
277 f->pos++;
278 if (bytes_read == 0) break;
279 } else {
280 newlinetypes |= NEWLINE_CR;
281 }
282 }
283 if (c == '\r') {
284 skipnextlf = 1;
285 c = '\n';
286 } else if (c == '\n')
287 newlinetypes |= NEWLINE_LF;
288 }
289 *buf++ = c;
290 if (bzerror != BZ_OK || c == '\n') break;
291 }
292 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
293 newlinetypes |= NEWLINE_CR;
294 Py_END_ALLOW_THREADS
295 f->f_newlinetypes = newlinetypes;
296 f->f_skipnextlf = skipnextlf;
297 if (bzerror == BZ_STREAM_END) {
298 f->size = f->pos;
299 f->mode = MODE_READ_EOF;
300 break;
301 } else if (bzerror != BZ_OK) {
302 Util_CatchBZ2Error(bzerror);
303 Py_DECREF(v);
304 return NULL;
305 }
306 if (c == '\n')
307 break;
308 /* Must be because buf == end */
309 if (n > 0)
310 break;
311 used_v_size = total_v_size;
312 increment = total_v_size >> 2; /* mild exponential growth */
313 total_v_size += increment;
314 if (total_v_size > INT_MAX) {
315 PyErr_SetString(PyExc_OverflowError,
316 "line is longer than a Python string can hold");
317 Py_DECREF(v);
318 return NULL;
319 }
320 if (_PyString_Resize(&v, total_v_size) < 0)
321 return NULL;
322 buf = BUF(v) + used_v_size;
323 end = BUF(v) + total_v_size;
324 }
325
326 used_v_size = buf - BUF(v);
327 if (used_v_size != total_v_size)
328 _PyString_Resize(&v, used_v_size);
329 return v;
330 }
331
332 /* This is a hacked version of Python's
333 * fileobject.c:Py_UniversalNewlineFread(). */
334 size_t
335 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
336 char* buf, size_t n, BZ2FileObject *f)
337 {
338 char *dst = buf;
339 int newlinetypes, skipnextlf;
340
341 assert(buf != NULL);
342 assert(stream != NULL);
343
344 if (!f->f_univ_newline)
345 return BZ2_bzRead(bzerror, stream, buf, n);
346
347 newlinetypes = f->f_newlinetypes;
348 skipnextlf = f->f_skipnextlf;
349
350 /* Invariant: n is the number of bytes remaining to be filled
351 * in the buffer.
352 */
353 while (n) {
354 size_t nread;
355 int shortread;
356 char *src = dst;
357
358 nread = BZ2_bzRead(bzerror, stream, dst, n);
359 assert(nread <= n);
360 n -= nread; /* assuming 1 byte out for each in; will adjust */
361 shortread = n != 0; /* true iff EOF or error */
362 while (nread--) {
363 char c = *src++;
364 if (c == '\r') {
365 /* Save as LF and set flag to skip next LF. */
366 *dst++ = '\n';
367 skipnextlf = 1;
368 }
369 else if (skipnextlf && c == '\n') {
370 /* Skip LF, and remember we saw CR LF. */
371 skipnextlf = 0;
372 newlinetypes |= NEWLINE_CRLF;
373 ++n;
374 }
375 else {
376 /* Normal char to be stored in buffer. Also
377 * update the newlinetypes flag if either this
378 * is an LF or the previous char was a CR.
379 */
380 if (c == '\n')
381 newlinetypes |= NEWLINE_LF;
382 else if (skipnextlf)
383 newlinetypes |= NEWLINE_CR;
384 *dst++ = c;
385 skipnextlf = 0;
386 }
387 }
388 if (shortread) {
389 /* If this is EOF, update type flags. */
390 if (skipnextlf && *bzerror == BZ_STREAM_END)
391 newlinetypes |= NEWLINE_CR;
392 break;
393 }
394 }
395 f->f_newlinetypes = newlinetypes;
396 f->f_skipnextlf = skipnextlf;
397 return dst - buf;
398 }
399
400 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
401 static void
402 Util_DropReadAhead(BZ2FileObject *f)
403 {
404 if (f->f_buf != NULL) {
405 PyMem_Free(f->f_buf);
406 f->f_buf = NULL;
407 }
408 }
409
410 /* This is a hacked version of Python's fileobject.c:readahead(). */
411 static int
412 Util_ReadAhead(BZ2FileObject *f, int bufsize)
413 {
414 int chunksize;
415 int bzerror;
416
417 if (f->f_buf != NULL) {
418 if((f->f_bufend - f->f_bufptr) >= 1)
419 return 0;
420 else
421 Util_DropReadAhead(f);
422 }
423 if (f->mode == MODE_READ_EOF) {
424 f->f_bufptr = f->f_buf;
425 f->f_bufend = f->f_buf;
426 return 0;
427 }
428 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
429 PyErr_NoMemory();
430 return -1;
431 }
432 Py_BEGIN_ALLOW_THREADS
433 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
434 bufsize, f);
435 Py_END_ALLOW_THREADS
436 f->pos += chunksize;
437 if (bzerror == BZ_STREAM_END) {
438 f->size = f->pos;
439 f->mode = MODE_READ_EOF;
440 } else if (bzerror != BZ_OK) {
441 Util_CatchBZ2Error(bzerror);
442 Util_DropReadAhead(f);
443 return -1;
444 }
445 f->f_bufptr = f->f_buf;
446 f->f_bufend = f->f_buf + chunksize;
447 return 0;
448 }
449
450 /* This is a hacked version of Python's
451 * fileobject.c:readahead_get_line_skip(). */
452 static PyStringObject *
453 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
454 {
455 PyStringObject* s;
456 char *bufptr;
457 char *buf;
458 int len;
459
460 if (f->f_buf == NULL)
461 if (Util_ReadAhead(f, bufsize) < 0)
462 return NULL;
463
464 len = f->f_bufend - f->f_bufptr;
465 if (len == 0)
466 return (PyStringObject *)
467 PyString_FromStringAndSize(NULL, skip);
468 bufptr = memchr(f->f_bufptr, '\n', len);
469 if (bufptr != NULL) {
470 bufptr++; /* Count the '\n' */
471 len = bufptr - f->f_bufptr;
472 s = (PyStringObject *)
473 PyString_FromStringAndSize(NULL, skip+len);
474 if (s == NULL)
475 return NULL;
476 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
477 f->f_bufptr = bufptr;
478 if (bufptr == f->f_bufend)
479 Util_DropReadAhead(f);
480 } else {
481 bufptr = f->f_bufptr;
482 buf = f->f_buf;
483 f->f_buf = NULL; /* Force new readahead buffer */
484 s = Util_ReadAheadGetLineSkip(f, skip+len,
485 bufsize + (bufsize>>2));
486 if (s == NULL) {
487 PyMem_Free(buf);
488 return NULL;
489 }
490 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
491 PyMem_Free(buf);
492 }
493 return s;
494 }
495
496 /* ===================================================================== */
497 /* Methods of BZ2File. */
498
499 PyDoc_STRVAR(BZ2File_read__doc__,
500 "read([size]) -> string\n\
501 \n\
502 Read at most size uncompressed bytes, returned as a string. If the size\n\
503 argument is negative or omitted, read until EOF is reached.\n\
504 ");
505
506 /* This is a hacked version of Python's fileobject.c:file_read(). */
507 static PyObject *
508 BZ2File_read(BZ2FileObject *self, PyObject *args)
509 {
510 long bytesrequested = -1;
511 size_t bytesread, buffersize, chunksize;
512 int bzerror;
513 PyObject *ret = NULL;
514
515 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
516 return NULL;
517
518 ACQUIRE_LOCK(self);
519 switch (self->mode) {
520 case MODE_READ:
521 break;
522 case MODE_READ_EOF:
523 ret = PyString_FromString("");
524 goto cleanup;
525 case MODE_CLOSED:
526 PyErr_SetString(PyExc_ValueError,
527 "I/O operation on closed file");
528 goto cleanup;
529 default:
530 PyErr_SetString(PyExc_IOError,
531 "file is not ready for reading");
532 goto cleanup;
533 }
534
535 /* refuse to mix with f.next() */
536 if (check_iterbuffered(self))
537 goto cleanup;
538
539 if (bytesrequested < 0)
540 buffersize = Util_NewBufferSize((size_t)0);
541 else
542 buffersize = bytesrequested;
543 if (buffersize > INT_MAX) {
544 PyErr_SetString(PyExc_OverflowError,
545 "requested number of bytes is "
546 "more than a Python string can hold");
547 goto cleanup;
548 }
549 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
550 if (ret == NULL)
551 goto cleanup;
552 bytesread = 0;
553
554 for (;;) {
555 Py_BEGIN_ALLOW_THREADS
556 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
557 BUF(ret)+bytesread,
558 buffersize-bytesread,
559 self);
560 self->pos += chunksize;
561 Py_END_ALLOW_THREADS
562 bytesread += chunksize;
563 if (bzerror == BZ_STREAM_END) {
564 self->size = self->pos;
565 self->mode = MODE_READ_EOF;
566 break;
567 } else if (bzerror != BZ_OK) {
568 Util_CatchBZ2Error(bzerror);
569 Py_DECREF(ret);
570 ret = NULL;
571 goto cleanup;
572 }
573 if (bytesrequested < 0) {
574 buffersize = Util_NewBufferSize(buffersize);
575 if (_PyString_Resize(&ret, buffersize) < 0)
576 goto cleanup;
577 } else {
578 break;
579 }
580 }
581 if (bytesread != buffersize)
582 _PyString_Resize(&ret, bytesread);
583
584 cleanup:
585 RELEASE_LOCK(self);
586 return ret;
587 }
588
589 PyDoc_STRVAR(BZ2File_readline__doc__,
590 "readline([size]) -> string\n\
591 \n\
592 Return the next line from the file, as a string, retaining newline.\n\
593 A non-negative size argument will limit the maximum number of bytes to\n\
594 return (an incomplete line may be returned then). Return an empty\n\
595 string at EOF.\n\
596 ");
597
598 static PyObject *
599 BZ2File_readline(BZ2FileObject *self, PyObject *args)
600 {
601 PyObject *ret = NULL;
602 int sizehint = -1;
603
604 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
605 return NULL;
606
607 ACQUIRE_LOCK(self);
608 switch (self->mode) {
609 case MODE_READ:
610 break;
611 case MODE_READ_EOF:
612 ret = PyString_FromString("");
613 goto cleanup;
614 case MODE_CLOSED:
615 PyErr_SetString(PyExc_ValueError,
616 "I/O operation on closed file");
617 goto cleanup;
618 default:
619 PyErr_SetString(PyExc_IOError,
620 "file is not ready for reading");
621 goto cleanup;
622 }
623
624 /* refuse to mix with f.next() */
625 if (check_iterbuffered(self))
626 goto cleanup;
627
628 if (sizehint == 0)
629 ret = PyString_FromString("");
630 else
631 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
632
633 cleanup:
634 RELEASE_LOCK(self);
635 return ret;
636 }
637
638 PyDoc_STRVAR(BZ2File_readlines__doc__,
639 "readlines([size]) -> list\n\
640 \n\
641 Call readline() repeatedly and return a list of lines read.\n\
642 The optional size argument, if given, is an approximate bound on the\n\
643 total number of bytes in the lines returned.\n\
644 ");
645
646 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
647 static PyObject *
648 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
649 {
650 long sizehint = 0;
651 PyObject *list = NULL;
652 PyObject *line;
653 char small_buffer[SMALLCHUNK];
654 char *buffer = small_buffer;
655 size_t buffersize = SMALLCHUNK;
656 PyObject *big_buffer = NULL;
657 size_t nfilled = 0;
658 size_t nread;
659 size_t totalread = 0;
660 char *p, *q, *end;
661 int err;
662 int shortread = 0;
663 int bzerror;
664
665 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
666 return NULL;
667
668 ACQUIRE_LOCK(self);
669 switch (self->mode) {
670 case MODE_READ:
671 break;
672 case MODE_READ_EOF:
673 list = PyList_New(0);
674 goto cleanup;
675 case MODE_CLOSED:
676 PyErr_SetString(PyExc_ValueError,
677 "I/O operation on closed file");
678 goto cleanup;
679 default:
680 PyErr_SetString(PyExc_IOError,
681 "file is not ready for reading");
682 goto cleanup;
683 }
684
685 /* refuse to mix with f.next() */
686 if (check_iterbuffered(self))
687 goto cleanup;
688
689 if ((list = PyList_New(0)) == NULL)
690 goto cleanup;
691
692 for (;;) {
693 Py_BEGIN_ALLOW_THREADS
694 nread = Util_UnivNewlineRead(&bzerror, self->fp,
695 buffer+nfilled,
696 buffersize-nfilled, self);
697 self->pos += nread;
698 Py_END_ALLOW_THREADS
699 if (bzerror == BZ_STREAM_END) {
700 self->size = self->pos;
701 self->mode = MODE_READ_EOF;
702 if (nread == 0) {
703 sizehint = 0;
704 break;
705 }
706 shortread = 1;
707 } else if (bzerror != BZ_OK) {
708 Util_CatchBZ2Error(bzerror);
709 error:
710 Py_DECREF(list);
711 list = NULL;
712 goto cleanup;
713 }
714 totalread += nread;
715 p = memchr(buffer+nfilled, '\n', nread);
716 if (!shortread && p == NULL) {
717 /* Need a larger buffer to fit this line */
718 nfilled += nread;
719 buffersize *= 2;
720 if (buffersize > INT_MAX) {
721 PyErr_SetString(PyExc_OverflowError,
722 "line is longer than a Python string can hold");
723 goto error;
724 }
725 if (big_buffer == NULL) {
726 /* Create the big buffer */
727 big_buffer = PyString_FromStringAndSize(
728 NULL, buffersize);
729 if (big_buffer == NULL)
730 goto error;
731 buffer = PyString_AS_STRING(big_buffer);
732 memcpy(buffer, small_buffer, nfilled);
733 }
734 else {
735 /* Grow the big buffer */
736 _PyString_Resize(&big_buffer, buffersize);
737 buffer = PyString_AS_STRING(big_buffer);
738 }
739 continue;
740 }
741 end = buffer+nfilled+nread;
742 q = buffer;
743 while (p != NULL) {
744 /* Process complete lines */
745 p++;
746 line = PyString_FromStringAndSize(q, p-q);
747 if (line == NULL)
748 goto error;
749 err = PyList_Append(list, line);
750 Py_DECREF(line);
751 if (err != 0)
752 goto error;
753 q = p;
754 p = memchr(q, '\n', end-q);
755 }
756 /* Move the remaining incomplete line to the start */
757 nfilled = end-q;
758 memmove(buffer, q, nfilled);
759 if (sizehint > 0)
760 if (totalread >= (size_t)sizehint)
761 break;
762 if (shortread) {
763 sizehint = 0;
764 break;
765 }
766 }
767 if (nfilled != 0) {
768 /* Partial last line */
769 line = PyString_FromStringAndSize(buffer, nfilled);
770 if (line == NULL)
771 goto error;
772 if (sizehint > 0) {
773 /* Need to complete the last line */
774 PyObject *rest = Util_GetLine(self, 0);
775 if (rest == NULL) {
776 Py_DECREF(line);
777 goto error;
778 }
779 PyString_Concat(&line, rest);
780 Py_DECREF(rest);
781 if (line == NULL)
782 goto error;
783 }
784 err = PyList_Append(list, line);
785 Py_DECREF(line);
786 if (err != 0)
787 goto error;
788 }
789
790 cleanup:
791 RELEASE_LOCK(self);
792 if (big_buffer) {
793 Py_DECREF(big_buffer);
794 }
795 return list;
796 }
797
798 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
799 "xreadlines() -> self\n\
800 \n\
801 For backward compatibility. BZ2File objects now include the performance\n\
802 optimizations previously implemented in the xreadlines module.\n\
803 ");
804
805 PyDoc_STRVAR(BZ2File_write__doc__,
806 "write(data) -> None\n\
807 \n\
808 Write the 'data' string to file. Note that due to buffering, close() may\n\
809 be needed before the file on disk reflects the data written.\n\
810 ");
811
812 /* This is a hacked version of Python's fileobject.c:file_write(). */
813 static PyObject *
814 BZ2File_write(BZ2FileObject *self, PyObject *args)
815 {
816 PyObject *ret = NULL;
817 Py_buffer pbuf;
818 char *buf;
819 int len;
820 int bzerror;
821
822 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
823 return NULL;
824 buf = pbuf.buf;
825 len = pbuf.len;
826
827 ACQUIRE_LOCK(self);
828 switch (self->mode) {
829 case MODE_WRITE:
830 break;
831
832 case MODE_CLOSED:
833 PyErr_SetString(PyExc_ValueError,
834 "I/O operation on closed file");
835 goto cleanup;
836
837 default:
838 PyErr_SetString(PyExc_IOError,
839 "file is not ready for writing");
840 goto cleanup;
841 }
842
843 self->f_softspace = 0;
844
845 Py_BEGIN_ALLOW_THREADS
846 BZ2_bzWrite (&bzerror, self->fp, buf, len);
847 self->pos += len;
848 Py_END_ALLOW_THREADS
849
850 if (bzerror != BZ_OK) {
851 Util_CatchBZ2Error(bzerror);
852 goto cleanup;
853 }
854
855 Py_INCREF(Py_None);
856 ret = Py_None;
857
858 cleanup:
859 PyBuffer_Release(&pbuf);
860 RELEASE_LOCK(self);
861 return ret;
862 }
863
864 PyDoc_STRVAR(BZ2File_writelines__doc__,
865 "writelines(sequence_of_strings) -> None\n\
866 \n\
867 Write the sequence of strings to the file. Note that newlines are not\n\
868 added. The sequence can be any iterable object producing strings. This is\n\
869 equivalent to calling write() for each string.\n\
870 ");
871
872 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
873 static PyObject *
874 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
875 {
876 #define CHUNKSIZE 1000
877 PyObject *list = NULL;
878 PyObject *iter = NULL;
879 PyObject *ret = NULL;
880 PyObject *line;
881 int i, j, index, len, islist;
882 int bzerror;
883
884 ACQUIRE_LOCK(self);
885 switch (self->mode) {
886 case MODE_WRITE:
887 break;
888
889 case MODE_CLOSED:
890 PyErr_SetString(PyExc_ValueError,
891 "I/O operation on closed file");
892 goto error;
893
894 default:
895 PyErr_SetString(PyExc_IOError,
896 "file is not ready for writing");
897 goto error;
898 }
899
900 islist = PyList_Check(seq);
901 if (!islist) {
902 iter = PyObject_GetIter(seq);
903 if (iter == NULL) {
904 PyErr_SetString(PyExc_TypeError,
905 "writelines() requires an iterable argument");
906 goto error;
907 }
908 list = PyList_New(CHUNKSIZE);
909 if (list == NULL)
910 goto error;
911 }
912
913 /* Strategy: slurp CHUNKSIZE lines into a private list,
914 checking that they are all strings, then write that list
915 without holding the interpreter lock, then come back for more. */
916 for (index = 0; ; index += CHUNKSIZE) {
917 if (islist) {
918 Py_XDECREF(list);
919 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
920 if (list == NULL)
921 goto error;
922 j = PyList_GET_SIZE(list);
923 }
924 else {
925 for (j = 0; j < CHUNKSIZE; j++) {
926 line = PyIter_Next(iter);
927 if (line == NULL) {
928 if (PyErr_Occurred())
929 goto error;
930 break;
931 }
932 PyList_SetItem(list, j, line);
933 }
934 }
935 if (j == 0)
936 break;
937
938 /* Check that all entries are indeed strings. If not,
939 apply the same rules as for file.write() and
940 convert the rets to strings. This is slow, but
941 seems to be the only way since all conversion APIs
942 could potentially execute Python code. */
943 for (i = 0; i < j; i++) {
944 PyObject *v = PyList_GET_ITEM(list, i);
945 if (!PyString_Check(v)) {
946 const char *buffer;
947 Py_ssize_t len;
948 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
949 PyErr_SetString(PyExc_TypeError,
950 "writelines() "
951 "argument must be "
952 "a sequence of "
953 "strings");
954 goto error;
955 }
956 line = PyString_FromStringAndSize(buffer,
957 len);
958 if (line == NULL)
959 goto error;
960 Py_DECREF(v);
961 PyList_SET_ITEM(list, i, line);
962 }
963 }
964
965 self->f_softspace = 0;
966
967 /* Since we are releasing the global lock, the
968 following code may *not* execute Python code. */
969 Py_BEGIN_ALLOW_THREADS
970 for (i = 0; i < j; i++) {
971 line = PyList_GET_ITEM(list, i);
972 len = PyString_GET_SIZE(line);
973 BZ2_bzWrite (&bzerror, self->fp,
974 PyString_AS_STRING(line), len);
975 if (bzerror != BZ_OK) {
976 Py_BLOCK_THREADS
977 Util_CatchBZ2Error(bzerror);
978 goto error;
979 }
980 }
981 Py_END_ALLOW_THREADS
982
983 if (j < CHUNKSIZE)
984 break;
985 }
986
987 Py_INCREF(Py_None);
988 ret = Py_None;
989
990 error:
991 RELEASE_LOCK(self);
992 Py_XDECREF(list);
993 Py_XDECREF(iter);
994 return ret;
995 #undef CHUNKSIZE
996 }
997
998 PyDoc_STRVAR(BZ2File_seek__doc__,
999 "seek(offset [, whence]) -> None\n\
1000 \n\
1001 Move to new file position. Argument offset is a byte count. Optional\n\
1002 argument whence defaults to 0 (offset from start of file, offset\n\
1003 should be >= 0); other values are 1 (move relative to current position,\n\
1004 positive or negative), and 2 (move relative to end of file, usually\n\
1005 negative, although many platforms allow seeking beyond the end of a file).\n\
1006 \n\
1007 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1008 the operation may be extremely slow.\n\
1009 ");
1010
1011 static PyObject *
1012 BZ2File_seek(BZ2FileObject *self, PyObject *args)
1013 {
1014 int where = 0;
1015 PyObject *offobj;
1016 Py_off_t offset;
1017 char small_buffer[SMALLCHUNK];
1018 char *buffer = small_buffer;
1019 size_t buffersize = SMALLCHUNK;
1020 Py_off_t bytesread = 0;
1021 size_t readsize;
1022 int chunksize;
1023 int bzerror;
1024 PyObject *ret = NULL;
1025
1026 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1027 return NULL;
1028 #if !defined(HAVE_LARGEFILE_SUPPORT)
1029 offset = PyInt_AsLong(offobj);
1030 #else
1031 offset = PyLong_Check(offobj) ?
1032 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1033 #endif
1034 if (PyErr_Occurred())
1035 return NULL;
1036
1037 ACQUIRE_LOCK(self);
1038 Util_DropReadAhead(self);
1039 switch (self->mode) {
1040 case MODE_READ:
1041 case MODE_READ_EOF:
1042 break;
1043
1044 case MODE_CLOSED:
1045 PyErr_SetString(PyExc_ValueError,
1046 "I/O operation on closed file");
1047 goto cleanup;
1048
1049 default:
1050 PyErr_SetString(PyExc_IOError,
1051 "seek works only while reading");
1052 goto cleanup;
1053 }
1054
1055 if (where == 2) {
1056 if (self->size == -1) {
1057 assert(self->mode != MODE_READ_EOF);
1058 for (;;) {
1059 Py_BEGIN_ALLOW_THREADS
1060 chunksize = Util_UnivNewlineRead(
1061 &bzerror, self->fp,
1062 buffer, buffersize,
1063 self);
1064 self->pos += chunksize;
1065 Py_END_ALLOW_THREADS
1066
1067 bytesread += chunksize;
1068 if (bzerror == BZ_STREAM_END) {
1069 break;
1070 } else if (bzerror != BZ_OK) {
1071 Util_CatchBZ2Error(bzerror);
1072 goto cleanup;
1073 }
1074 }
1075 self->mode = MODE_READ_EOF;
1076 self->size = self->pos;
1077 bytesread = 0;
1078 }
1079 offset = self->size + offset;
1080 } else if (where == 1) {
1081 offset = self->pos + offset;
1082 }
1083
1084 /* Before getting here, offset must be the absolute position the file
1085 * pointer should be set to. */
1086
1087 if (offset >= self->pos) {
1088 /* we can move forward */
1089 offset -= self->pos;
1090 } else {
1091 /* we cannot move back, so rewind the stream */
1092 BZ2_bzReadClose(&bzerror, self->fp);
1093 if (self->fp) {
1094 PyFile_DecUseCount((PyFileObject *)self->file);
1095 self->fp = NULL;
1096 }
1097 if (bzerror != BZ_OK) {
1098 Util_CatchBZ2Error(bzerror);
1099 goto cleanup;
1100 }
1101 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1102 if (!ret)
1103 goto cleanup;
1104 Py_DECREF(ret);
1105 ret = NULL;
1106 self->pos = 0;
1107 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1108 0, 0, NULL, 0);
1109 if (self->fp)
1110 PyFile_IncUseCount((PyFileObject *)self->file);
1111 if (bzerror != BZ_OK) {
1112 Util_CatchBZ2Error(bzerror);
1113 goto cleanup;
1114 }
1115 self->mode = MODE_READ;
1116 }
1117
1118 if (offset <= 0 || self->mode == MODE_READ_EOF)
1119 goto exit;
1120
1121 /* Before getting here, offset must be set to the number of bytes
1122 * to walk forward. */
1123 for (;;) {
1124 if (offset-bytesread > buffersize)
1125 readsize = buffersize;
1126 else
1127 /* offset might be wider that readsize, but the result
1128 * of the subtraction is bound by buffersize (see the
1129 * condition above). buffersize is 8192. */
1130 readsize = (size_t)(offset-bytesread);
1131 Py_BEGIN_ALLOW_THREADS
1132 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1133 buffer, readsize, self);
1134 self->pos += chunksize;
1135 Py_END_ALLOW_THREADS
1136 bytesread += chunksize;
1137 if (bzerror == BZ_STREAM_END) {
1138 self->size = self->pos;
1139 self->mode = MODE_READ_EOF;
1140 break;
1141 } else if (bzerror != BZ_OK) {
1142 Util_CatchBZ2Error(bzerror);
1143 goto cleanup;
1144 }
1145 if (bytesread == offset)
1146 break;
1147 }
1148
1149 exit:
1150 Py_INCREF(Py_None);
1151 ret = Py_None;
1152
1153 cleanup:
1154 RELEASE_LOCK(self);
1155 return ret;
1156 }
1157
1158 PyDoc_STRVAR(BZ2File_tell__doc__,
1159 "tell() -> int\n\
1160 \n\
1161 Return the current file position, an integer (may be a long integer).\n\
1162 ");
1163
1164 static PyObject *
1165 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1166 {
1167 PyObject *ret = NULL;
1168
1169 if (self->mode == MODE_CLOSED) {
1170 PyErr_SetString(PyExc_ValueError,
1171 "I/O operation on closed file");
1172 goto cleanup;
1173 }
1174
1175 #if !defined(HAVE_LARGEFILE_SUPPORT)
1176 ret = PyInt_FromLong(self->pos);
1177 #else
1178 ret = PyLong_FromLongLong(self->pos);
1179 #endif
1180
1181 cleanup:
1182 return ret;
1183 }
1184
1185 PyDoc_STRVAR(BZ2File_close__doc__,
1186 "close() -> None or (perhaps) an integer\n\
1187 \n\
1188 Close the file. Sets data attribute .closed to true. A closed file\n\
1189 cannot be used for further I/O operations. close() may be called more\n\
1190 than once without error.\n\
1191 ");
1192
1193 static PyObject *
1194 BZ2File_close(BZ2FileObject *self)
1195 {
1196 PyObject *ret = NULL;
1197 int bzerror = BZ_OK;
1198
1199 ACQUIRE_LOCK(self);
1200 switch (self->mode) {
1201 case MODE_READ:
1202 case MODE_READ_EOF:
1203 BZ2_bzReadClose(&bzerror, self->fp);
1204 break;
1205 case MODE_WRITE:
1206 BZ2_bzWriteClose(&bzerror, self->fp,
1207 0, NULL, NULL);
1208 break;
1209 }
1210 if (self->fp) {
1211 PyFile_DecUseCount((PyFileObject *)self->file);
1212 self->fp = NULL;
1213 }
1214 self->mode = MODE_CLOSED;
1215 ret = PyObject_CallMethod(self->file, "close", NULL);
1216 if (bzerror != BZ_OK) {
1217 Util_CatchBZ2Error(bzerror);
1218 Py_XDECREF(ret);
1219 ret = NULL;
1220 }
1221
1222 RELEASE_LOCK(self);
1223 return ret;
1224 }
1225
1226 PyDoc_STRVAR(BZ2File_enter_doc,
1227 "__enter__() -> self.");
1228
1229 static PyObject *
1230 BZ2File_enter(BZ2FileObject *self)
1231 {
1232 if (self->mode == MODE_CLOSED) {
1233 PyErr_SetString(PyExc_ValueError,
1234 "I/O operation on closed file");
1235 return NULL;
1236 }
1237 Py_INCREF(self);
1238 return (PyObject *) self;
1239 }
1240
1241 PyDoc_STRVAR(BZ2File_exit_doc,
1242 "__exit__(*excinfo) -> None. Closes the file.");
1243
1244 static PyObject *
1245 BZ2File_exit(BZ2FileObject *self, PyObject *args)
1246 {
1247 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1248 if (!ret)
1249 /* If error occurred, pass through */
1250 return NULL;
1251 Py_DECREF(ret);
1252 Py_RETURN_NONE;
1253 }
1254
1255
1256 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1257
1258 static PyMethodDef BZ2File_methods[] = {
1259 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1260 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1261 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1262 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1263 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1264 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1265 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1266 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1267 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1268 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1269 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1270 {NULL, NULL} /* sentinel */
1271 };
1272
1273
1274 /* ===================================================================== */
1275 /* Getters and setters of BZ2File. */
1276
1277 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1278 static PyObject *
1279 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1280 {
1281 switch (self->f_newlinetypes) {
1282 case NEWLINE_UNKNOWN:
1283 Py_INCREF(Py_None);
1284 return Py_None;
1285 case NEWLINE_CR:
1286 return PyString_FromString("\r");
1287 case NEWLINE_LF:
1288 return PyString_FromString("\n");
1289 case NEWLINE_CR|NEWLINE_LF:
1290 return Py_BuildValue("(ss)", "\r", "\n");
1291 case NEWLINE_CRLF:
1292 return PyString_FromString("\r\n");
1293 case NEWLINE_CR|NEWLINE_CRLF:
1294 return Py_BuildValue("(ss)", "\r", "\r\n");
1295 case NEWLINE_LF|NEWLINE_CRLF:
1296 return Py_BuildValue("(ss)", "\n", "\r\n");
1297 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1298 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1299 default:
1300 PyErr_Format(PyExc_SystemError,
1301 "Unknown newlines value 0x%x\n",
1302 self->f_newlinetypes);
1303 return NULL;
1304 }
1305 }
1306
1307 static PyObject *
1308 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1309 {
1310 return PyInt_FromLong(self->mode == MODE_CLOSED);
1311 }
1312
1313 static PyObject *
1314 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1315 {
1316 return PyObject_GetAttrString(self->file, "mode");
1317 }
1318
1319 static PyObject *
1320 BZ2File_get_name(BZ2FileObject *self, void *closure)
1321 {
1322 return PyObject_GetAttrString(self->file, "name");
1323 }
1324
1325 static PyGetSetDef BZ2File_getset[] = {
1326 {"closed", (getter)BZ2File_get_closed, NULL,
1327 "True if the file is closed"},
1328 {"newlines", (getter)BZ2File_get_newlines, NULL,
1329 "end-of-line convention used in this file"},
1330 {"mode", (getter)BZ2File_get_mode, NULL,
1331 "file mode ('r', 'w', or 'U')"},
1332 {"name", (getter)BZ2File_get_name, NULL,
1333 "file name"},
1334 {NULL} /* Sentinel */
1335 };
1336
1337
1338 /* ===================================================================== */
1339 /* Members of BZ2File_Type. */
1340
1341 #undef OFF
1342 #define OFF(x) offsetof(BZ2FileObject, x)
1343
1344 static PyMemberDef BZ2File_members[] = {
1345 {"softspace", T_INT, OFF(f_softspace), 0,
1346 "flag indicating that a space needs to be printed; used by print"},
1347 {NULL} /* Sentinel */
1348 };
1349
1350 /* ===================================================================== */
1351 /* Slot definitions for BZ2File_Type. */
1352
1353 static int
1354 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1355 {
1356 static char *kwlist[] = {"filename", "mode", "buffering",
1357 "compresslevel", 0};
1358 PyObject *name;
1359 char *mode = "r";
1360 int buffering = -1;
1361 int compresslevel = 9;
1362 int bzerror;
1363 int mode_char = 0;
1364
1365 self->size = -1;
1366
1367 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1368 kwlist, &name, &mode, &buffering,
1369 &compresslevel))
1370 return -1;
1371
1372 if (compresslevel < 1 || compresslevel > 9) {
1373 PyErr_SetString(PyExc_ValueError,
1374 "compresslevel must be between 1 and 9");
1375 return -1;
1376 }
1377
1378 for (;;) {
1379 int error = 0;
1380 switch (*mode) {
1381 case 'r':
1382 case 'w':
1383 if (mode_char)
1384 error = 1;
1385 mode_char = *mode;
1386 break;
1387
1388 case 'b':
1389 break;
1390
1391 case 'U':
1392 #ifdef __VMS
1393 self->f_univ_newline = 0;
1394 #else
1395 self->f_univ_newline = 1;
1396 #endif
1397 break;
1398
1399 default:
1400 error = 1;
1401 break;
1402 }
1403 if (error) {
1404 PyErr_Format(PyExc_ValueError,
1405 "invalid mode char %c", *mode);
1406 return -1;
1407 }
1408 mode++;
1409 if (*mode == '\0')
1410 break;
1411 }
1412
1413 if (mode_char == 0) {
1414 mode_char = 'r';
1415 }
1416
1417 mode = (mode_char == 'r') ? "rb" : "wb";
1418
1419 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1420 name, mode, buffering);
1421 if (self->file == NULL)
1422 return -1;
1423
1424 /* From now on, we have stuff to dealloc, so jump to error label
1425 * instead of returning */
1426
1427 #ifdef WITH_THREAD
1428 self->lock = PyThread_allocate_lock();
1429 if (!self->lock) {
1430 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1431 goto error;
1432 }
1433 #endif
1434
1435 if (mode_char == 'r')
1436 self->fp = BZ2_bzReadOpen(&bzerror,
1437 PyFile_AsFile(self->file),
1438 0, 0, NULL, 0);
1439 else
1440 self->fp = BZ2_bzWriteOpen(&bzerror,
1441 PyFile_AsFile(self->file),
1442 compresslevel, 0, 0);
1443
1444 if (bzerror != BZ_OK) {
1445 Util_CatchBZ2Error(bzerror);
1446 goto error;
1447 }
1448 PyFile_IncUseCount((PyFileObject *)self->file);
1449
1450 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1451
1452 return 0;
1453
1454 error:
1455 Py_CLEAR(self->file);
1456 #ifdef WITH_THREAD
1457 if (self->lock) {
1458 PyThread_free_lock(self->lock);
1459 self->lock = NULL;
1460 }
1461 #endif
1462 return -1;
1463 }
1464
1465 static void
1466 BZ2File_dealloc(BZ2FileObject *self)
1467 {
1468 int bzerror;
1469 #ifdef WITH_THREAD
1470 if (self->lock)
1471 PyThread_free_lock(self->lock);
1472 #endif
1473 switch (self->mode) {
1474 case MODE_READ:
1475 case MODE_READ_EOF:
1476 BZ2_bzReadClose(&bzerror, self->fp);
1477 break;
1478 case MODE_WRITE:
1479 BZ2_bzWriteClose(&bzerror, self->fp,
1480 0, NULL, NULL);
1481 break;
1482 }
1483 if (self->fp) {
1484 PyFile_DecUseCount((PyFileObject *)self->file);
1485 self->fp = NULL;
1486 }
1487 Util_DropReadAhead(self);
1488 Py_XDECREF(self->file);
1489 Py_TYPE(self)->tp_free((PyObject *)self);
1490 }
1491
1492 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1493 static PyObject *
1494 BZ2File_getiter(BZ2FileObject *self)
1495 {
1496 if (self->mode == MODE_CLOSED) {
1497 PyErr_SetString(PyExc_ValueError,
1498 "I/O operation on closed file");
1499 return NULL;
1500 }
1501 Py_INCREF((PyObject*)self);
1502 return (PyObject *)self;
1503 }
1504
1505 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1506 #define READAHEAD_BUFSIZE 8192
1507 static PyObject *
1508 BZ2File_iternext(BZ2FileObject *self)
1509 {
1510 PyStringObject* ret;
1511 ACQUIRE_LOCK(self);
1512 if (self->mode == MODE_CLOSED) {
1513 RELEASE_LOCK(self);
1514 PyErr_SetString(PyExc_ValueError,
1515 "I/O operation on closed file");
1516 return NULL;
1517 }
1518 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1519 RELEASE_LOCK(self);
1520 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1521 Py_XDECREF(ret);
1522 return NULL;
1523 }
1524 return (PyObject *)ret;
1525 }
1526
1527 /* ===================================================================== */
1528 /* BZ2File_Type definition. */
1529
1530 PyDoc_VAR(BZ2File__doc__) =
1531 PyDoc_STR(
1532 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1533 \n\
1534 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1535 writing. When opened for writing, the file will be created if it doesn't\n\
1536 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1537 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1538 is given, must be a number between 1 and 9.\n\
1539 ")
1540 PyDoc_STR(
1541 "\n\
1542 Add a 'U' to mode to open the file for input with universal newline\n\
1543 support. Any line ending in the input file will be seen as a '\\n' in\n\
1544 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1545 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1546 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1547 newlines are available only when reading.\n\
1548 ")
1549 ;
1550
1551 static PyTypeObject BZ2File_Type = {
1552 PyVarObject_HEAD_INIT(NULL, 0)
1553 "bz2.BZ2File", /*tp_name*/
1554 sizeof(BZ2FileObject), /*tp_basicsize*/
1555 0, /*tp_itemsize*/
1556 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1557 0, /*tp_print*/
1558 0, /*tp_getattr*/
1559 0, /*tp_setattr*/
1560 0, /*tp_compare*/
1561 0, /*tp_repr*/
1562 0, /*tp_as_number*/
1563 0, /*tp_as_sequence*/
1564 0, /*tp_as_mapping*/
1565 0, /*tp_hash*/
1566 0, /*tp_call*/
1567 0, /*tp_str*/
1568 PyObject_GenericGetAttr,/*tp_getattro*/
1569 PyObject_GenericSetAttr,/*tp_setattro*/
1570 0, /*tp_as_buffer*/
1571 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1572 BZ2File__doc__, /*tp_doc*/
1573 0, /*tp_traverse*/
1574 0, /*tp_clear*/
1575 0, /*tp_richcompare*/
1576 0, /*tp_weaklistoffset*/
1577 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1578 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1579 BZ2File_methods, /*tp_methods*/
1580 BZ2File_members, /*tp_members*/
1581 BZ2File_getset, /*tp_getset*/
1582 0, /*tp_base*/
1583 0, /*tp_dict*/
1584 0, /*tp_descr_get*/
1585 0, /*tp_descr_set*/
1586 0, /*tp_dictoffset*/
1587 (initproc)BZ2File_init, /*tp_init*/
1588 PyType_GenericAlloc, /*tp_alloc*/
1589 PyType_GenericNew, /*tp_new*/
1590 _PyObject_Del, /*tp_free*/
1591 0, /*tp_is_gc*/
1592 };
1593
1594
1595 /* ===================================================================== */
1596 /* Methods of BZ2Comp. */
1597
1598 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1599 "compress(data) -> string\n\
1600 \n\
1601 Provide more data to the compressor object. It will return chunks of\n\
1602 compressed data whenever possible. When you've finished providing data\n\
1603 to compress, call the flush() method to finish the compression process,\n\
1604 and return what is left in the internal buffers.\n\
1605 ");
1606
1607 static PyObject *
1608 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1609 {
1610 Py_buffer pdata;
1611 char *data;
1612 int datasize;
1613 int bufsize = SMALLCHUNK;
1614 PY_LONG_LONG totalout;
1615 PyObject *ret = NULL;
1616 bz_stream *bzs = &self->bzs;
1617 int bzerror;
1618
1619 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1620 return NULL;
1621 data = pdata.buf;
1622 datasize = pdata.len;
1623
1624 if (datasize == 0) {
1625 PyBuffer_Release(&pdata);
1626 return PyString_FromString("");
1627 }
1628
1629 ACQUIRE_LOCK(self);
1630 if (!self->running) {
1631 PyErr_SetString(PyExc_ValueError,
1632 "this object was already flushed");
1633 goto error;
1634 }
1635
1636 ret = PyString_FromStringAndSize(NULL, bufsize);
1637 if (!ret)
1638 goto error;
1639
1640 bzs->next_in = data;
1641 bzs->avail_in = datasize;
1642 bzs->next_out = BUF(ret);
1643 bzs->avail_out = bufsize;
1644
1645 totalout = BZS_TOTAL_OUT(bzs);
1646
1647 for (;;) {
1648 Py_BEGIN_ALLOW_THREADS
1649 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1650 Py_END_ALLOW_THREADS
1651 if (bzerror != BZ_RUN_OK) {
1652 Util_CatchBZ2Error(bzerror);
1653 goto error;
1654 }
1655 if (bzs->avail_in == 0)
1656 break; /* no more input data */
1657 if (bzs->avail_out == 0) {
1658 bufsize = Util_NewBufferSize(bufsize);
1659 if (_PyString_Resize(&ret, bufsize) < 0) {
1660 BZ2_bzCompressEnd(bzs);
1661 goto error;
1662 }
1663 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1664 - totalout);
1665 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1666 }
1667 }
1668
1669 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1670
1671 RELEASE_LOCK(self);
1672 PyBuffer_Release(&pdata);
1673 return ret;
1674
1675 error:
1676 RELEASE_LOCK(self);
1677 PyBuffer_Release(&pdata);
1678 Py_XDECREF(ret);
1679 return NULL;
1680 }
1681
1682 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1683 "flush() -> string\n\
1684 \n\
1685 Finish the compression process and return what is left in internal buffers.\n\
1686 You must not use the compressor object after calling this method.\n\
1687 ");
1688
1689 static PyObject *
1690 BZ2Comp_flush(BZ2CompObject *self)
1691 {
1692 int bufsize = SMALLCHUNK;
1693 PyObject *ret = NULL;
1694 bz_stream *bzs = &self->bzs;
1695 PY_LONG_LONG totalout;
1696 int bzerror;
1697
1698 ACQUIRE_LOCK(self);
1699 if (!self->running) {
1700 PyErr_SetString(PyExc_ValueError, "object was already "
1701 "flushed");
1702 goto error;
1703 }
1704 self->running = 0;
1705
1706 ret = PyString_FromStringAndSize(NULL, bufsize);
1707 if (!ret)
1708 goto error;
1709
1710 bzs->next_out = BUF(ret);
1711 bzs->avail_out = bufsize;
1712
1713 totalout = BZS_TOTAL_OUT(bzs);
1714
1715 for (;;) {
1716 Py_BEGIN_ALLOW_THREADS
1717 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1718 Py_END_ALLOW_THREADS
1719 if (bzerror == BZ_STREAM_END) {
1720 break;
1721 } else if (bzerror != BZ_FINISH_OK) {
1722 Util_CatchBZ2Error(bzerror);
1723 goto error;
1724 }
1725 if (bzs->avail_out == 0) {
1726 bufsize = Util_NewBufferSize(bufsize);
1727 if (_PyString_Resize(&ret, bufsize) < 0)
1728 goto error;
1729 bzs->next_out = BUF(ret);
1730 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1731 - totalout);
1732 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1733 }
1734 }
1735
1736 if (bzs->avail_out != 0)
1737 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1738
1739 RELEASE_LOCK(self);
1740 return ret;
1741
1742 error:
1743 RELEASE_LOCK(self);
1744 Py_XDECREF(ret);
1745 return NULL;
1746 }
1747
1748 static PyMethodDef BZ2Comp_methods[] = {
1749 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1750 BZ2Comp_compress__doc__},
1751 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1752 BZ2Comp_flush__doc__},
1753 {NULL, NULL} /* sentinel */
1754 };
1755
1756
1757 /* ===================================================================== */
1758 /* Slot definitions for BZ2Comp_Type. */
1759
1760 static int
1761 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1762 {
1763 int compresslevel = 9;
1764 int bzerror;
1765 static char *kwlist[] = {"compresslevel", 0};
1766
1767 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1768 kwlist, &compresslevel))
1769 return -1;
1770
1771 if (compresslevel < 1 || compresslevel > 9) {
1772 PyErr_SetString(PyExc_ValueError,
1773 "compresslevel must be between 1 and 9");
1774 goto error;
1775 }
1776
1777 #ifdef WITH_THREAD
1778 self->lock = PyThread_allocate_lock();
1779 if (!self->lock) {
1780 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1781 goto error;
1782 }
1783 #endif
1784
1785 memset(&self->bzs, 0, sizeof(bz_stream));
1786 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1787 if (bzerror != BZ_OK) {
1788 Util_CatchBZ2Error(bzerror);
1789 goto error;
1790 }
1791
1792 self->running = 1;
1793
1794 return 0;
1795 error:
1796 #ifdef WITH_THREAD
1797 if (self->lock) {
1798 PyThread_free_lock(self->lock);
1799 self->lock = NULL;
1800 }
1801 #endif
1802 return -1;
1803 }
1804
1805 static void
1806 BZ2Comp_dealloc(BZ2CompObject *self)
1807 {
1808 #ifdef WITH_THREAD
1809 if (self->lock)
1810 PyThread_free_lock(self->lock);
1811 #endif
1812 BZ2_bzCompressEnd(&self->bzs);
1813 Py_TYPE(self)->tp_free((PyObject *)self);
1814 }
1815
1816
1817 /* ===================================================================== */
1818 /* BZ2Comp_Type definition. */
1819
1820 PyDoc_STRVAR(BZ2Comp__doc__,
1821 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1822 \n\
1823 Create a new compressor object. This object may be used to compress\n\
1824 data sequentially. If you want to compress data in one shot, use the\n\
1825 compress() function instead. The compresslevel parameter, if given,\n\
1826 must be a number between 1 and 9.\n\
1827 ");
1828
1829 static PyTypeObject BZ2Comp_Type = {
1830 PyVarObject_HEAD_INIT(NULL, 0)
1831 "bz2.BZ2Compressor", /*tp_name*/
1832 sizeof(BZ2CompObject), /*tp_basicsize*/
1833 0, /*tp_itemsize*/
1834 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1835 0, /*tp_print*/
1836 0, /*tp_getattr*/
1837 0, /*tp_setattr*/
1838 0, /*tp_compare*/
1839 0, /*tp_repr*/
1840 0, /*tp_as_number*/
1841 0, /*tp_as_sequence*/
1842 0, /*tp_as_mapping*/
1843 0, /*tp_hash*/
1844 0, /*tp_call*/
1845 0, /*tp_str*/
1846 PyObject_GenericGetAttr,/*tp_getattro*/
1847 PyObject_GenericSetAttr,/*tp_setattro*/
1848 0, /*tp_as_buffer*/
1849 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1850 BZ2Comp__doc__, /*tp_doc*/
1851 0, /*tp_traverse*/
1852 0, /*tp_clear*/
1853 0, /*tp_richcompare*/
1854 0, /*tp_weaklistoffset*/
1855 0, /*tp_iter*/
1856 0, /*tp_iternext*/
1857 BZ2Comp_methods, /*tp_methods*/
1858 0, /*tp_members*/
1859 0, /*tp_getset*/
1860 0, /*tp_base*/
1861 0, /*tp_dict*/
1862 0, /*tp_descr_get*/
1863 0, /*tp_descr_set*/
1864 0, /*tp_dictoffset*/
1865 (initproc)BZ2Comp_init, /*tp_init*/
1866 PyType_GenericAlloc, /*tp_alloc*/
1867 PyType_GenericNew, /*tp_new*/
1868 _PyObject_Del, /*tp_free*/
1869 0, /*tp_is_gc*/
1870 };
1871
1872
1873 /* ===================================================================== */
1874 /* Members of BZ2Decomp. */
1875
1876 #undef OFF
1877 #define OFF(x) offsetof(BZ2DecompObject, x)
1878
1879 static PyMemberDef BZ2Decomp_members[] = {
1880 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1881 {NULL} /* Sentinel */
1882 };
1883
1884
1885 /* ===================================================================== */
1886 /* Methods of BZ2Decomp. */
1887
1888 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1889 "decompress(data) -> string\n\
1890 \n\
1891 Provide more data to the decompressor object. It will return chunks\n\
1892 of decompressed data whenever possible. If you try to decompress data\n\
1893 after the end of stream is found, EOFError will be raised. If any data\n\
1894 was found after the end of stream, it'll be ignored and saved in\n\
1895 unused_data attribute.\n\
1896 ");
1897
1898 static PyObject *
1899 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1900 {
1901 Py_buffer pdata;
1902 char *data;
1903 int datasize;
1904 int bufsize = SMALLCHUNK;
1905 PY_LONG_LONG totalout;
1906 PyObject *ret = NULL;
1907 bz_stream *bzs = &self->bzs;
1908 int bzerror;
1909
1910 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1911 return NULL;
1912 data = pdata.buf;
1913 datasize = pdata.len;
1914
1915 ACQUIRE_LOCK(self);
1916 if (!self->running) {
1917 PyErr_SetString(PyExc_EOFError, "end of stream was "
1918 "already found");
1919 goto error;
1920 }
1921
1922 ret = PyString_FromStringAndSize(NULL, bufsize);
1923 if (!ret)
1924 goto error;
1925
1926 bzs->next_in = data;
1927 bzs->avail_in = datasize;
1928 bzs->next_out = BUF(ret);
1929 bzs->avail_out = bufsize;
1930
1931 totalout = BZS_TOTAL_OUT(bzs);
1932
1933 for (;;) {
1934 Py_BEGIN_ALLOW_THREADS
1935 bzerror = BZ2_bzDecompress(bzs);
1936 Py_END_ALLOW_THREADS
1937 if (bzerror == BZ_STREAM_END) {
1938 if (bzs->avail_in != 0) {
1939 Py_DECREF(self->unused_data);
1940 self->unused_data =
1941 PyString_FromStringAndSize(bzs->next_in,
1942 bzs->avail_in);
1943 }
1944 self->running = 0;
1945 break;
1946 }
1947 if (bzerror != BZ_OK) {
1948 Util_CatchBZ2Error(bzerror);
1949 goto error;
1950 }
1951 if (bzs->avail_in == 0)
1952 break; /* no more input data */
1953 if (bzs->avail_out == 0) {
1954 bufsize = Util_NewBufferSize(bufsize);
1955 if (_PyString_Resize(&ret, bufsize) < 0) {
1956 BZ2_bzDecompressEnd(bzs);
1957 goto error;
1958 }
1959 bzs->next_out = BUF(ret);
1960 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1961 - totalout);
1962 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1963 }
1964 }
1965
1966 if (bzs->avail_out != 0)
1967 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1968
1969 RELEASE_LOCK(self);
1970 PyBuffer_Release(&pdata);
1971 return ret;
1972
1973 error:
1974 RELEASE_LOCK(self);
1975 PyBuffer_Release(&pdata);
1976 Py_XDECREF(ret);
1977 return NULL;
1978 }
1979
1980 static PyMethodDef BZ2Decomp_methods[] = {
1981 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1982 {NULL, NULL} /* sentinel */
1983 };
1984
1985
1986 /* ===================================================================== */
1987 /* Slot definitions for BZ2Decomp_Type. */
1988
1989 static int
1990 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1991 {
1992 int bzerror;
1993
1994 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1995 return -1;
1996
1997 #ifdef WITH_THREAD
1998 self->lock = PyThread_allocate_lock();
1999 if (!self->lock) {
2000 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
2001 goto error;
2002 }
2003 #endif
2004
2005 self->unused_data = PyString_FromString("");
2006 if (!self->unused_data)
2007 goto error;
2008
2009 memset(&self->bzs, 0, sizeof(bz_stream));
2010 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
2011 if (bzerror != BZ_OK) {
2012 Util_CatchBZ2Error(bzerror);
2013 goto error;
2014 }
2015
2016 self->running = 1;
2017
2018 return 0;
2019
2020 error:
2021 #ifdef WITH_THREAD
2022 if (self->lock) {
2023 PyThread_free_lock(self->lock);
2024 self->lock = NULL;
2025 }
2026 #endif
2027 Py_CLEAR(self->unused_data);
2028 return -1;
2029 }
2030
2031 static void
2032 BZ2Decomp_dealloc(BZ2DecompObject *self)
2033 {
2034 #ifdef WITH_THREAD
2035 if (self->lock)
2036 PyThread_free_lock(self->lock);
2037 #endif
2038 Py_XDECREF(self->unused_data);
2039 BZ2_bzDecompressEnd(&self->bzs);
2040 Py_TYPE(self)->tp_free((PyObject *)self);
2041 }
2042
2043
2044 /* ===================================================================== */
2045 /* BZ2Decomp_Type definition. */
2046
2047 PyDoc_STRVAR(BZ2Decomp__doc__,
2048 "BZ2Decompressor() -> decompressor object\n\
2049 \n\
2050 Create a new decompressor object. This object may be used to decompress\n\
2051 data sequentially. If you want to decompress data in one shot, use the\n\
2052 decompress() function instead.\n\
2053 ");
2054
2055 static PyTypeObject BZ2Decomp_Type = {
2056 PyVarObject_HEAD_INIT(NULL, 0)
2057 "bz2.BZ2Decompressor", /*tp_name*/
2058 sizeof(BZ2DecompObject), /*tp_basicsize*/
2059 0, /*tp_itemsize*/
2060 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2061 0, /*tp_print*/
2062 0, /*tp_getattr*/
2063 0, /*tp_setattr*/
2064 0, /*tp_compare*/
2065 0, /*tp_repr*/
2066 0, /*tp_as_number*/
2067 0, /*tp_as_sequence*/
2068 0, /*tp_as_mapping*/
2069 0, /*tp_hash*/
2070 0, /*tp_call*/
2071 0, /*tp_str*/
2072 PyObject_GenericGetAttr,/*tp_getattro*/
2073 PyObject_GenericSetAttr,/*tp_setattro*/
2074 0, /*tp_as_buffer*/
2075 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2076 BZ2Decomp__doc__, /*tp_doc*/
2077 0, /*tp_traverse*/
2078 0, /*tp_clear*/
2079 0, /*tp_richcompare*/
2080 0, /*tp_weaklistoffset*/
2081 0, /*tp_iter*/
2082 0, /*tp_iternext*/
2083 BZ2Decomp_methods, /*tp_methods*/
2084 BZ2Decomp_members, /*tp_members*/
2085 0, /*tp_getset*/
2086 0, /*tp_base*/
2087 0, /*tp_dict*/
2088 0, /*tp_descr_get*/
2089 0, /*tp_descr_set*/
2090 0, /*tp_dictoffset*/
2091 (initproc)BZ2Decomp_init, /*tp_init*/
2092 PyType_GenericAlloc, /*tp_alloc*/
2093 PyType_GenericNew, /*tp_new*/
2094 _PyObject_Del, /*tp_free*/
2095 0, /*tp_is_gc*/
2096 };
2097
2098
2099 /* ===================================================================== */
2100 /* Module functions. */
2101
2102 PyDoc_STRVAR(bz2_compress__doc__,
2103 "compress(data [, compresslevel=9]) -> string\n\
2104 \n\
2105 Compress data in one shot. If you want to compress data sequentially,\n\
2106 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2107 given, must be a number between 1 and 9.\n\
2108 ");
2109
2110 static PyObject *
2111 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2112 {
2113 int compresslevel=9;
2114 Py_buffer pdata;
2115 char *data;
2116 int datasize;
2117 int bufsize;
2118 PyObject *ret = NULL;
2119 bz_stream _bzs;
2120 bz_stream *bzs = &_bzs;
2121 int bzerror;
2122 static char *kwlist[] = {"data", "compresslevel", 0};
2123
2124 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2125 kwlist, &pdata,
2126 &compresslevel))
2127 return NULL;
2128 data = pdata.buf;
2129 datasize = pdata.len;
2130
2131 if (compresslevel < 1 || compresslevel > 9) {
2132 PyErr_SetString(PyExc_ValueError,
2133 "compresslevel must be between 1 and 9");
2134 PyBuffer_Release(&pdata);
2135 return NULL;
2136 }
2137
2138 /* Conforming to bz2 manual, this is large enough to fit compressed
2139 * data in one shot. We will check it later anyway. */
2140 bufsize = datasize + (datasize/100+1) + 600;
2141
2142 ret = PyString_FromStringAndSize(NULL, bufsize);
2143 if (!ret) {
2144 PyBuffer_Release(&pdata);
2145 return NULL;
2146 }
2147
2148 memset(bzs, 0, sizeof(bz_stream));
2149
2150 bzs->next_in = data;
2151 bzs->avail_in = datasize;
2152 bzs->next_out = BUF(ret);
2153 bzs->avail_out = bufsize;
2154
2155 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2156 if (bzerror != BZ_OK) {
2157 Util_CatchBZ2Error(bzerror);
2158 PyBuffer_Release(&pdata);
2159 Py_DECREF(ret);
2160 return NULL;
2161 }
2162
2163 for (;;) {
2164 Py_BEGIN_ALLOW_THREADS
2165 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2166 Py_END_ALLOW_THREADS
2167 if (bzerror == BZ_STREAM_END) {
2168 break;
2169 } else if (bzerror != BZ_FINISH_OK) {
2170 BZ2_bzCompressEnd(bzs);
2171 Util_CatchBZ2Error(bzerror);
2172 PyBuffer_Release(&pdata);
2173 Py_DECREF(ret);
2174 return NULL;
2175 }
2176 if (bzs->avail_out == 0) {
2177 bufsize = Util_NewBufferSize(bufsize);
2178 if (_PyString_Resize(&ret, bufsize) < 0) {
2179 BZ2_bzCompressEnd(bzs);
2180 PyBuffer_Release(&pdata);
2181 Py_DECREF(ret);
2182 return NULL;
2183 }
2184 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2185 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2186 }
2187 }
2188
2189 if (bzs->avail_out != 0)
2190 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2191 BZ2_bzCompressEnd(bzs);
2192
2193 PyBuffer_Release(&pdata);
2194 return ret;
2195 }
2196
2197 PyDoc_STRVAR(bz2_decompress__doc__,
2198 "decompress(data) -> decompressed data\n\
2199 \n\
2200 Decompress data in one shot. If you want to decompress data sequentially,\n\
2201 use an instance of BZ2Decompressor instead.\n\
2202 ");
2203
2204 static PyObject *
2205 bz2_decompress(PyObject *self, PyObject *args)
2206 {
2207 Py_buffer pdata;
2208 char *data;
2209 int datasize;
2210 int bufsize = SMALLCHUNK;
2211 PyObject *ret;
2212 bz_stream _bzs;
2213 bz_stream *bzs = &_bzs;
2214 int bzerror;
2215
2216 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2217 return NULL;
2218 data = pdata.buf;
2219 datasize = pdata.len;
2220
2221 if (datasize == 0) {
2222 PyBuffer_Release(&pdata);
2223 return PyString_FromString("");
2224 }
2225
2226 ret = PyString_FromStringAndSize(NULL, bufsize);
2227 if (!ret) {
2228 PyBuffer_Release(&pdata);
2229 return NULL;
2230 }
2231
2232 memset(bzs, 0, sizeof(bz_stream));
2233
2234 bzs->next_in = data;
2235 bzs->avail_in = datasize;
2236 bzs->next_out = BUF(ret);
2237 bzs->avail_out = bufsize;
2238
2239 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2240 if (bzerror != BZ_OK) {
2241 Util_CatchBZ2Error(bzerror);
2242 Py_DECREF(ret);
2243 PyBuffer_Release(&pdata);
2244 return NULL;
2245 }
2246
2247 for (;;) {
2248 Py_BEGIN_ALLOW_THREADS
2249 bzerror = BZ2_bzDecompress(bzs);
2250 Py_END_ALLOW_THREADS
2251 if (bzerror == BZ_STREAM_END) {
2252 break;
2253 } else if (bzerror != BZ_OK) {
2254 BZ2_bzDecompressEnd(bzs);
2255 Util_CatchBZ2Error(bzerror);
2256 PyBuffer_Release(&pdata);
2257 Py_DECREF(ret);
2258 return NULL;
2259 }
2260 if (bzs->avail_in == 0) {
2261 BZ2_bzDecompressEnd(bzs);
2262 PyErr_SetString(PyExc_ValueError,
2263 "couldn't find end of stream");
2264 PyBuffer_Release(&pdata);
2265 Py_DECREF(ret);
2266 return NULL;
2267 }
2268 if (bzs->avail_out == 0) {
2269 bufsize = Util_NewBufferSize(bufsize);
2270 if (_PyString_Resize(&ret, bufsize) < 0) {
2271 BZ2_bzDecompressEnd(bzs);
2272 PyBuffer_Release(&pdata);
2273 Py_DECREF(ret);
2274 return NULL;
2275 }
2276 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2277 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2278 }
2279 }
2280
2281 if (bzs->avail_out != 0)
2282 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2283 BZ2_bzDecompressEnd(bzs);
2284 PyBuffer_Release(&pdata);
2285
2286 return ret;
2287 }
2288
2289 static PyMethodDef bz2_methods[] = {
2290 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2291 bz2_compress__doc__},
2292 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2293 bz2_decompress__doc__},
2294 {NULL, NULL} /* sentinel */
2295 };
2296
2297 /* ===================================================================== */
2298 /* Initialization function. */
2299
2300 PyDoc_STRVAR(bz2__doc__,
2301 "The python bz2 module provides a comprehensive interface for\n\
2302 the bz2 compression library. It implements a complete file\n\
2303 interface, one shot (de)compression functions, and types for\n\
2304 sequential (de)compression.\n\
2305 ");
2306
2307 PyMODINIT_FUNC
2308 initbz2(void)
2309 {
2310 PyObject *m;
2311
2312 if (PyType_Ready(&BZ2File_Type) < 0)
2313 return;
2314 if (PyType_Ready(&BZ2Comp_Type) < 0)
2315 return;
2316 if (PyType_Ready(&BZ2Decomp_Type) < 0)
2317 return;
2318
2319 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2320 if (m == NULL)
2321 return;
2322
2323 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2324
2325 Py_INCREF(&BZ2File_Type);
2326 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2327
2328 Py_INCREF(&BZ2Comp_Type);
2329 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2330
2331 Py_INCREF(&BZ2Decomp_Type);
2332 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2333 }