Python-2.7.3/Modules/cjkcodecs/multibytecodec.c

Location Tool Test ID Function Issue
/builddir/build/BUILD/Python-2.7.3/Modules/cjkcodecs/multibytecodec.c:536:5 clang-analyzer The left operand of '==' is a garbage value
/builddir/build/BUILD/Python-2.7.3/Modules/cjkcodecs/multibytecodec.c:536:5 clang-analyzer The left operand of '==' is a garbage value
   1 /*
   2  * multibytecodec.c: Common Multibyte Codec Implementation
   3  *
   4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
   5  */
   6 
   7 #define PY_SSIZE_T_CLEAN
   8 #include "Python.h"
   9 #include "structmember.h"
  10 #include "multibytecodec.h"
  11 
  12 typedef struct {
  13     const Py_UNICODE    *inbuf, *inbuf_top, *inbuf_end;
  14     unsigned char       *outbuf, *outbuf_end;
  15     PyObject            *excobj, *outobj;
  16 } MultibyteEncodeBuffer;
  17 
  18 typedef struct {
  19     const unsigned char *inbuf, *inbuf_top, *inbuf_end;
  20     Py_UNICODE          *outbuf, *outbuf_end;
  21     PyObject            *excobj, *outobj;
  22 } MultibyteDecodeBuffer;
  23 
  24 PyDoc_STRVAR(MultibyteCodec_Encode__doc__,
  25 "I.encode(unicode[, errors]) -> (string, length consumed)\n\
  26 \n\
  27 Return an encoded string version of `unicode'. errors may be given to\n\
  28 set a different error handling scheme. Default is 'strict' meaning that\n\
  29 encoding errors raise a UnicodeEncodeError. Other possible values are\n\
  30 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\
  31 registered with codecs.register_error that can handle UnicodeEncodeErrors.");
  32 
  33 PyDoc_STRVAR(MultibyteCodec_Decode__doc__,
  34 "I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\
  35 \n\
  36 Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\
  37 to set a different error handling scheme. Default is 'strict' meaning\n\
  38 that encoding errors raise a UnicodeDecodeError. Other possible values\n\
  39 are 'ignore' and 'replace' as well as any other name registered with\n\
  40 codecs.register_error that is able to handle UnicodeDecodeErrors.");
  41 
  42 static char *codeckwarglist[] = {"input", "errors", NULL};
  43 static char *incnewkwarglist[] = {"errors", NULL};
  44 static char *incrementalkwarglist[] = {"input", "final", NULL};
  45 static char *streamkwarglist[] = {"stream", "errors", NULL};
  46 
  47 static PyObject *multibytecodec_encode(MultibyteCodec *,
  48                 MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
  49                 PyObject *, int);
  50 
  51 #define MBENC_RESET     MBENC_MAX<<1 /* reset after an encoding session */
  52 
  53 static PyObject *
  54 make_tuple(PyObject *object, Py_ssize_t len)
  55 {
  56     PyObject *v, *w;
  57 
  58     if (object == NULL)
  59         return NULL;
  60 
  61     v = PyTuple_New(2);
  62     if (v == NULL) {
  63         Py_DECREF(object);
  64         return NULL;
  65     }
  66     PyTuple_SET_ITEM(v, 0, object);
  67 
  68     w = PyInt_FromSsize_t(len);
  69     if (w == NULL) {
  70         Py_DECREF(v);
  71         return NULL;
  72     }
  73     PyTuple_SET_ITEM(v, 1, w);
  74 
  75     return v;
  76 }
  77 
  78 static PyObject *
  79 internal_error_callback(const char *errors)
  80 {
  81     if (errors == NULL || strcmp(errors, "strict") == 0)
  82         return ERROR_STRICT;
  83     else if (strcmp(errors, "ignore") == 0)
  84         return ERROR_IGNORE;
  85     else if (strcmp(errors, "replace") == 0)
  86         return ERROR_REPLACE;
  87     else
  88         return PyString_FromString(errors);
  89 }
  90 
  91 static PyObject *
  92 call_error_callback(PyObject *errors, PyObject *exc)
  93 {
  94     PyObject *args, *cb, *r;
  95 
  96     assert(PyString_Check(errors));
  97     cb = PyCodec_LookupError(PyString_AS_STRING(errors));
  98     if (cb == NULL)
  99         return NULL;
 100 
 101     args = PyTuple_New(1);
 102     if (args == NULL) {
 103         Py_DECREF(cb);
 104         return NULL;
 105     }
 106 
 107     PyTuple_SET_ITEM(args, 0, exc);
 108     Py_INCREF(exc);
 109 
 110     r = PyObject_CallObject(cb, args);
 111     Py_DECREF(args);
 112     Py_DECREF(cb);
 113     return r;
 114 }
 115 
 116 static PyObject *
 117 codecctx_errors_get(MultibyteStatefulCodecContext *self)
 118 {
 119     const char *errors;
 120 
 121     if (self->errors == ERROR_STRICT)
 122         errors = "strict";
 123     else if (self->errors == ERROR_IGNORE)
 124         errors = "ignore";
 125     else if (self->errors == ERROR_REPLACE)
 126         errors = "replace";
 127     else {
 128         Py_INCREF(self->errors);
 129         return self->errors;
 130     }
 131 
 132     return PyString_FromString(errors);
 133 }
 134 
 135 static int
 136 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
 137                     void *closure)
 138 {
 139     PyObject *cb;
 140 
 141     if (!PyString_Check(value)) {
 142         PyErr_SetString(PyExc_TypeError, "errors must be a string");
 143         return -1;
 144     }
 145 
 146     cb = internal_error_callback(PyString_AS_STRING(value));
 147     if (cb == NULL)
 148         return -1;
 149 
 150     ERROR_DECREF(self->errors);
 151     self->errors = cb;
 152     return 0;
 153 }
 154 
 155 /* This getset handlers list is used by all the stateful codec objects */
 156 static PyGetSetDef codecctx_getsets[] = {
 157     {"errors",          (getter)codecctx_errors_get,
 158                     (setter)codecctx_errors_set,
 159                     PyDoc_STR("how to treat errors")},
 160     {NULL,}
 161 };
 162 
 163 static int
 164 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
 165 {
 166     Py_ssize_t orgpos, orgsize, incsize;
 167 
 168     orgpos = (Py_ssize_t)((char *)buf->outbuf -
 169                             PyString_AS_STRING(buf->outobj));
 170     orgsize = PyString_GET_SIZE(buf->outobj);
 171     incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
 172 
 173     if (orgsize > PY_SSIZE_T_MAX - incsize)
 174         return -1;
 175 
 176     if (_PyString_Resize(&buf->outobj, orgsize + incsize) == -1)
 177         return -1;
 178 
 179     buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos;
 180     buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj)
 181         + PyString_GET_SIZE(buf->outobj);
 182 
 183     return 0;
 184 }
 185 #define REQUIRE_ENCODEBUFFER(buf, s) {                                  \
 186     if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end)             \
 187         if (expand_encodebuffer(buf, s) == -1)                          \
 188             goto errorexit;                                             \
 189 }
 190 
 191 static int
 192 expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
 193 {
 194     Py_ssize_t orgpos, orgsize;
 195 
 196     orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
 197     orgsize = PyUnicode_GET_SIZE(buf->outobj);
 198     if (PyUnicode_Resize(&buf->outobj, orgsize + (
 199         esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
 200         return -1;
 201 
 202     buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
 203     buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
 204                       + PyUnicode_GET_SIZE(buf->outobj);
 205 
 206     return 0;
 207 }
 208 #define REQUIRE_DECODEBUFFER(buf, s) {                                  \
 209     if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end)             \
 210         if (expand_decodebuffer(buf, s) == -1)                          \
 211             goto errorexit;                                             \
 212 }
 213 
 214 
 215 /**
 216  * MultibyteCodec object
 217  */
 218 
 219 static int
 220 multibytecodec_encerror(MultibyteCodec *codec,
 221                         MultibyteCodec_State *state,
 222                         MultibyteEncodeBuffer *buf,
 223                         PyObject *errors, Py_ssize_t e)
 224 {
 225     PyObject *retobj = NULL, *retstr = NULL, *tobj;
 226     Py_ssize_t retstrsize, newpos;
 227     Py_ssize_t esize, start, end;
 228     const char *reason;
 229 
 230     if (e > 0) {
 231         reason = "illegal multibyte sequence";
 232         esize = e;
 233     }
 234     else {
 235         switch (e) {
 236         case MBERR_TOOSMALL:
 237             REQUIRE_ENCODEBUFFER(buf, -1);
 238             return 0; /* retry it */
 239         case MBERR_TOOFEW:
 240             reason = "incomplete multibyte sequence";
 241             esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
 242             break;
 243         case MBERR_INTERNAL:
 244             PyErr_SetString(PyExc_RuntimeError,
 245                             "internal codec error");
 246             return -1;
 247         default:
 248             PyErr_SetString(PyExc_RuntimeError,
 249                             "unknown runtime error");
 250             return -1;
 251         }
 252     }
 253 
 254     if (errors == ERROR_REPLACE) {
 255         const Py_UNICODE replchar = '?', *inbuf = &replchar;
 256         Py_ssize_t r;
 257 
 258         for (;;) {
 259             Py_ssize_t outleft;
 260 
 261             outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
 262             r = codec->encode(state, codec->config, &inbuf, 1,
 263                               &buf->outbuf, outleft, 0);
 264             if (r == MBERR_TOOSMALL) {
 265                 REQUIRE_ENCODEBUFFER(buf, -1);
 266                 continue;
 267             }
 268             else
 269                 break;
 270         }
 271 
 272         if (r != 0) {
 273             REQUIRE_ENCODEBUFFER(buf, 1);
 274             *buf->outbuf++ = '?';
 275         }
 276     }
 277     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
 278         buf->inbuf += esize;
 279         return 0;
 280     }
 281 
 282     start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
 283     end = start + esize;
 284 
 285     /* use cached exception object if available */
 286     if (buf->excobj == NULL) {
 287         buf->excobj = PyUnicodeEncodeError_Create(codec->encoding,
 288                         buf->inbuf_top,
 289                         buf->inbuf_end - buf->inbuf_top,
 290                         start, end, reason);
 291         if (buf->excobj == NULL)
 292             goto errorexit;
 293     }
 294     else
 295         if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
 296             PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
 297             PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
 298             goto errorexit;
 299 
 300     if (errors == ERROR_STRICT) {
 301         PyCodec_StrictErrors(buf->excobj);
 302         goto errorexit;
 303     }
 304 
 305     retobj = call_error_callback(errors, buf->excobj);
 306     if (retobj == NULL)
 307         goto errorexit;
 308 
 309     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
 310         !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
 311         !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) ||
 312           PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) {
 313         PyErr_SetString(PyExc_TypeError,
 314                         "encoding error handler must return "
 315                         "(unicode, int) tuple");
 316         goto errorexit;
 317     }
 318 
 319     {
 320         const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
 321 
 322         retstr = multibytecodec_encode(codec, state, &uraw,
 323                         PyUnicode_GET_SIZE(tobj), ERROR_STRICT,
 324                         MBENC_FLUSH);
 325         if (retstr == NULL)
 326             goto errorexit;
 327     }
 328 
 329     retstrsize = PyString_GET_SIZE(retstr);
 330     REQUIRE_ENCODEBUFFER(buf, retstrsize);
 331 
 332     memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
 333     buf->outbuf += retstrsize;
 334 
 335     newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
 336     if (newpos < 0 && !PyErr_Occurred())
 337         newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
 338     if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
 339         PyErr_Clear();
 340         PyErr_Format(PyExc_IndexError,
 341                      "position %zd from error handler out of bounds",
 342                      newpos);
 343         goto errorexit;
 344     }
 345     buf->inbuf = buf->inbuf_top + newpos;
 346 
 347     Py_DECREF(retobj);
 348     Py_DECREF(retstr);
 349     return 0;
 350 
 351 errorexit:
 352     Py_XDECREF(retobj);
 353     Py_XDECREF(retstr);
 354     return -1;
 355 }
 356 
 357 static int
 358 multibytecodec_decerror(MultibyteCodec *codec,
 359                         MultibyteCodec_State *state,
 360                         MultibyteDecodeBuffer *buf,
 361                         PyObject *errors, Py_ssize_t e)
 362 {
 363     PyObject *retobj = NULL, *retuni = NULL;
 364     Py_ssize_t retunisize, newpos;
 365     const char *reason;
 366     Py_ssize_t esize, start, end;
 367 
 368     if (e > 0) {
 369         reason = "illegal multibyte sequence";
 370         esize = e;
 371     }
 372     else {
 373         switch (e) {
 374         case MBERR_TOOSMALL:
 375             REQUIRE_DECODEBUFFER(buf, -1);
 376             return 0; /* retry it */
 377         case MBERR_TOOFEW:
 378             reason = "incomplete multibyte sequence";
 379             esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
 380             break;
 381         case MBERR_INTERNAL:
 382             PyErr_SetString(PyExc_RuntimeError,
 383                             "internal codec error");
 384             return -1;
 385         default:
 386             PyErr_SetString(PyExc_RuntimeError,
 387                             "unknown runtime error");
 388             return -1;
 389         }
 390     }
 391 
 392     if (errors == ERROR_REPLACE) {
 393         REQUIRE_DECODEBUFFER(buf, 1);
 394         *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
 395     }
 396     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
 397         buf->inbuf += esize;
 398         return 0;
 399     }
 400 
 401     start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
 402     end = start + esize;
 403 
 404     /* use cached exception object if available */
 405     if (buf->excobj == NULL) {
 406         buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
 407                         (const char *)buf->inbuf_top,
 408                         (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
 409                         start, end, reason);
 410         if (buf->excobj == NULL)
 411             goto errorexit;
 412     }
 413     else
 414         if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
 415             PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
 416             PyUnicodeDecodeError_SetReason(buf->excobj, reason))
 417             goto errorexit;
 418 
 419     if (errors == ERROR_STRICT) {
 420         PyCodec_StrictErrors(buf->excobj);
 421         goto errorexit;
 422     }
 423 
 424     retobj = call_error_callback(errors, buf->excobj);
 425     if (retobj == NULL)
 426         goto errorexit;
 427 
 428     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
 429         !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
 430         !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) ||
 431           PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) {
 432         PyErr_SetString(PyExc_TypeError,
 433                         "decoding error handler must return "
 434                         "(unicode, int) tuple");
 435         goto errorexit;
 436     }
 437 
 438     retunisize = PyUnicode_GET_SIZE(retuni);
 439     if (retunisize > 0) {
 440         REQUIRE_DECODEBUFFER(buf, retunisize);
 441         memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni),
 442                         retunisize * Py_UNICODE_SIZE);
 443         buf->outbuf += retunisize;
 444     }
 445 
 446     newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
 447     if (newpos < 0 && !PyErr_Occurred())
 448         newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
 449     if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
 450         PyErr_Clear();
 451         PyErr_Format(PyExc_IndexError,
 452                      "position %zd from error handler out of bounds",
 453                      newpos);
 454         goto errorexit;
 455     }
 456     buf->inbuf = buf->inbuf_top + newpos;
 457     Py_DECREF(retobj);
 458     return 0;
 459 
 460 errorexit:
 461     Py_XDECREF(retobj);
 462     return -1;
 463 }
 464 
 465 static PyObject *
 466 multibytecodec_encode(MultibyteCodec *codec,
 467                       MultibyteCodec_State *state,
 468                       const Py_UNICODE **data, Py_ssize_t datalen,
 469                       PyObject *errors, int flags)
 470 {
 471     MultibyteEncodeBuffer buf;
 472     Py_ssize_t finalsize, r = 0;
 473 
 474     if (datalen == 0 && !(flags & MBENC_RESET))
 475         return PyString_FromString("");
 476 
 477     buf.excobj = NULL;
 478     buf.inbuf = buf.inbuf_top = *data;
 479     buf.inbuf_end = buf.inbuf_top + datalen;
 480 
 481     if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
 482         PyErr_NoMemory();
 483         goto errorexit;
 484     }
 485 
 486     buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16);
 487     if (buf.outobj == NULL)
 488         goto errorexit;
 489     buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj);
 490     buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj);
 491 
 492     while (buf.inbuf < buf.inbuf_end) {
 493         Py_ssize_t inleft, outleft;
 494 
 495         /* we don't reuse inleft and outleft here.
 496          * error callbacks can relocate the cursor anywhere on buffer*/
 497         inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
 498         outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
 499         r = codec->encode(state, codec->config, &buf.inbuf, inleft,
 500                           &buf.outbuf, outleft, flags);
 501         if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
 502             break;
 503         else if (multibytecodec_encerror(codec, state, &buf, errors,r))
 504             goto errorexit;
 505         else if (r == MBERR_TOOFEW)
 506             break;
 507     }
 508 
 509     if (codec->encreset != NULL && (flags & MBENC_RESET))
 510         for (;;) {
 511             Py_ssize_t outleft;
 512 
 513             outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
 514             r = codec->encreset(state, codec->config, &buf.outbuf,
 515                                 outleft);
 516             if (r == 0)
 517                 break;
 518             else if (multibytecodec_encerror(codec, state,
 519                                              &buf, errors, r))
 520                 goto errorexit;
 521         }
 522 
 523     finalsize = (Py_ssize_t)((char *)buf.outbuf -
 524                              PyString_AS_STRING(buf.outobj));
 525 
 526     if (finalsize != PyString_GET_SIZE(buf.outobj))
 527         if (_PyString_Resize(&buf.outobj, finalsize) == -1)
 528             goto errorexit;
 529 
 530 	*data = buf.inbuf;
 531     Py_XDECREF(buf.excobj);
 532     return buf.outobj;
 533 
 534 errorexit:
 535     Py_XDECREF(buf.excobj);
 536     Py_XDECREF(buf.outobj);
The left operand of '==' is a garbage value
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

The left operand of '==' is a garbage value
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

537 return NULL; 538 } 539 540 static PyObject * 541 MultibyteCodec_Encode(MultibyteCodecObject *self, 542 PyObject *args, PyObject *kwargs) 543 { 544 MultibyteCodec_State state; 545 Py_UNICODE *data; 546 PyObject *errorcb, *r, *arg, *ucvt; 547 const char *errors = NULL; 548 Py_ssize_t datalen; 549 550 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode", 551 codeckwarglist, &arg, &errors)) 552 return NULL; 553 554 if (PyUnicode_Check(arg)) 555 ucvt = NULL; 556 else { 557 arg = ucvt = PyObject_Unicode(arg); 558 if (arg == NULL) 559 return NULL; 560 else if (!PyUnicode_Check(arg)) { 561 PyErr_SetString(PyExc_TypeError, 562 "couldn't convert the object to unicode."); 563 Py_DECREF(ucvt); 564 return NULL; 565 } 566 } 567 568 data = PyUnicode_AS_UNICODE(arg); 569 datalen = PyUnicode_GET_SIZE(arg); 570 571 errorcb = internal_error_callback(errors); 572 if (errorcb == NULL) { 573 Py_XDECREF(ucvt); 574 return NULL; 575 } 576 577 if (self->codec->encinit != NULL && 578 self->codec->encinit(&state, self->codec->config) != 0) 579 goto errorexit; 580 r = multibytecodec_encode(self->codec, &state, 581 (const Py_UNICODE **)&data, datalen, errorcb, 582 MBENC_FLUSH | MBENC_RESET); 583 if (r == NULL) 584 goto errorexit; 585 586 ERROR_DECREF(errorcb); 587 Py_XDECREF(ucvt); 588 return make_tuple(r, datalen); 589 590 errorexit: 591 ERROR_DECREF(errorcb); 592 Py_XDECREF(ucvt); 593 return NULL; 594 } 595 596 static PyObject * 597 MultibyteCodec_Decode(MultibyteCodecObject *self, 598 PyObject *args, PyObject *kwargs) 599 { 600 MultibyteCodec_State state; 601 MultibyteDecodeBuffer buf; 602 PyObject *errorcb; 603 Py_buffer pdata; 604 const char *data, *errors = NULL; 605 Py_ssize_t datalen, finalsize; 606 607 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|z:decode", 608 codeckwarglist, &pdata, &errors)) 609 return NULL; 610 data = pdata.buf; 611 datalen = pdata.len; 612 613 errorcb = internal_error_callback(errors); 614 if (errorcb == NULL) { 615 PyBuffer_Release(&pdata); 616 return NULL; 617 } 618 619 if (datalen == 0) { 620 PyBuffer_Release(&pdata); 621 ERROR_DECREF(errorcb); 622 return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0); 623 } 624 625 buf.excobj = NULL; 626 buf.inbuf = buf.inbuf_top = (unsigned char *)data; 627 buf.inbuf_end = buf.inbuf_top + datalen; 628 buf.outobj = PyUnicode_FromUnicode(NULL, datalen); 629 if (buf.outobj == NULL) 630 goto errorexit; 631 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); 632 buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); 633 634 if (self->codec->decinit != NULL && 635 self->codec->decinit(&state, self->codec->config) != 0) 636 goto errorexit; 637 638 while (buf.inbuf < buf.inbuf_end) { 639 Py_ssize_t inleft, outleft, r; 640 641 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); 642 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 643 644 r = self->codec->decode(&state, self->codec->config, 645 &buf.inbuf, inleft, &buf.outbuf, outleft); 646 if (r == 0) 647 break; 648 else if (multibytecodec_decerror(self->codec, &state, 649 &buf, errorcb, r)) 650 goto errorexit; 651 } 652 653 finalsize = (Py_ssize_t)(buf.outbuf - 654 PyUnicode_AS_UNICODE(buf.outobj)); 655 656 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) 657 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) 658 goto errorexit; 659 660 PyBuffer_Release(&pdata); 661 Py_XDECREF(buf.excobj); 662 ERROR_DECREF(errorcb); 663 return make_tuple(buf.outobj, datalen); 664 665 errorexit: 666 PyBuffer_Release(&pdata); 667 ERROR_DECREF(errorcb); 668 Py_XDECREF(buf.excobj); 669 Py_XDECREF(buf.outobj); 670 671 return NULL; 672 } 673 674 static struct PyMethodDef multibytecodec_methods[] = { 675 {"encode", (PyCFunction)MultibyteCodec_Encode, 676 METH_VARARGS | METH_KEYWORDS, 677 MultibyteCodec_Encode__doc__}, 678 {"decode", (PyCFunction)MultibyteCodec_Decode, 679 METH_VARARGS | METH_KEYWORDS, 680 MultibyteCodec_Decode__doc__}, 681 {NULL, NULL}, 682 }; 683 684 static void 685 multibytecodec_dealloc(MultibyteCodecObject *self) 686 { 687 PyObject_Del(self); 688 } 689 690 static PyTypeObject MultibyteCodec_Type = { 691 PyVarObject_HEAD_INIT(NULL, 0) 692 "MultibyteCodec", /* tp_name */ 693 sizeof(MultibyteCodecObject), /* tp_basicsize */ 694 0, /* tp_itemsize */ 695 /* methods */ 696 (destructor)multibytecodec_dealloc, /* tp_dealloc */ 697 0, /* tp_print */ 698 0, /* tp_getattr */ 699 0, /* tp_setattr */ 700 0, /* tp_compare */ 701 0, /* tp_repr */ 702 0, /* tp_as_number */ 703 0, /* tp_as_sequence */ 704 0, /* tp_as_mapping */ 705 0, /* tp_hash */ 706 0, /* tp_call */ 707 0, /* tp_str */ 708 PyObject_GenericGetAttr, /* tp_getattro */ 709 0, /* tp_setattro */ 710 0, /* tp_as_buffer */ 711 Py_TPFLAGS_DEFAULT, /* tp_flags */ 712 0, /* tp_doc */ 713 0, /* tp_traverse */ 714 0, /* tp_clear */ 715 0, /* tp_richcompare */ 716 0, /* tp_weaklistoffset */ 717 0, /* tp_iter */ 718 0, /* tp_iterext */ 719 multibytecodec_methods, /* tp_methods */ 720 }; 721 722 723 /** 724 * Utility functions for stateful codec mechanism 725 */ 726 727 #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o)) 728 #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o)) 729 730 static PyObject * 731 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, 732 PyObject *unistr, int final) 733 { 734 PyObject *ucvt, *r = NULL; 735 Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; 736 Py_ssize_t datalen, origpending; 737 738 if (PyUnicode_Check(unistr)) 739 ucvt = NULL; 740 else { 741 unistr = ucvt = PyObject_Unicode(unistr); 742 if (unistr == NULL) 743 return NULL; 744 else if (!PyUnicode_Check(unistr)) { 745 PyErr_SetString(PyExc_TypeError, 746 "couldn't convert the object to unicode."); 747 Py_DECREF(ucvt); 748 return NULL; 749 } 750 } 751 752 datalen = PyUnicode_GET_SIZE(unistr); 753 origpending = ctx->pendingsize; 754 755 if (origpending > 0) { 756 if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) { 757 PyErr_NoMemory(); 758 /* inbuf_tmp == NULL */ 759 goto errorexit; 760 } 761 inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); 762 if (inbuf_tmp == NULL) 763 goto errorexit; 764 memcpy(inbuf_tmp, ctx->pending, 765 Py_UNICODE_SIZE * ctx->pendingsize); 766 memcpy(inbuf_tmp + ctx->pendingsize, 767 PyUnicode_AS_UNICODE(unistr), 768 Py_UNICODE_SIZE * datalen); 769 datalen += ctx->pendingsize; 770 ctx->pendingsize = 0; 771 inbuf = inbuf_tmp; 772 } 773 else 774 inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr); 775 776 inbuf_end = inbuf + datalen; 777 778 r = multibytecodec_encode(ctx->codec, &ctx->state, 779 (const Py_UNICODE **)&inbuf, datalen, 780 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0); 781 if (r == NULL) { 782 /* recover the original pending buffer */ 783 if (origpending > 0) 784 memcpy(ctx->pending, inbuf_tmp, 785 Py_UNICODE_SIZE * origpending); 786 ctx->pendingsize = origpending; 787 goto errorexit; 788 } 789 790 if (inbuf < inbuf_end) { 791 ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); 792 if (ctx->pendingsize > MAXENCPENDING) { 793 /* normal codecs can't reach here */ 794 ctx->pendingsize = 0; 795 PyErr_SetString(PyExc_UnicodeError, 796 "pending buffer overflow"); 797 goto errorexit; 798 } 799 memcpy(ctx->pending, inbuf, 800 ctx->pendingsize * Py_UNICODE_SIZE); 801 } 802 803 if (inbuf_tmp != NULL) 804 PyMem_Del(inbuf_tmp); 805 Py_XDECREF(ucvt); 806 return r; 807 808 errorexit: 809 if (inbuf_tmp != NULL) 810 PyMem_Del(inbuf_tmp); 811 Py_XDECREF(r); 812 Py_XDECREF(ucvt); 813 return NULL; 814 } 815 816 static int 817 decoder_append_pending(MultibyteStatefulDecoderContext *ctx, 818 MultibyteDecodeBuffer *buf) 819 { 820 Py_ssize_t npendings; 821 822 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 823 if (npendings + ctx->pendingsize > MAXDECPENDING || 824 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { 825 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); 826 return -1; 827 } 828 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); 829 ctx->pendingsize += npendings; 830 return 0; 831 } 832 833 static int 834 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, 835 Py_ssize_t size) 836 { 837 buf->inbuf = buf->inbuf_top = (const unsigned char *)data; 838 buf->inbuf_end = buf->inbuf_top + size; 839 if (buf->outobj == NULL) { /* only if outobj is not allocated yet */ 840 buf->outobj = PyUnicode_FromUnicode(NULL, size); 841 if (buf->outobj == NULL) 842 return -1; 843 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj); 844 buf->outbuf_end = buf->outbuf + 845 PyUnicode_GET_SIZE(buf->outobj); 846 } 847 848 return 0; 849 } 850 851 static int 852 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, 853 MultibyteDecodeBuffer *buf) 854 { 855 while (buf->inbuf < buf->inbuf_end) { 856 Py_ssize_t inleft, outleft; 857 Py_ssize_t r; 858 859 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 860 outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); 861 862 r = ctx->codec->decode(&ctx->state, ctx->codec->config, 863 &buf->inbuf, inleft, &buf->outbuf, outleft); 864 if (r == 0 || r == MBERR_TOOFEW) 865 break; 866 else if (multibytecodec_decerror(ctx->codec, &ctx->state, 867 buf, ctx->errors, r)) 868 return -1; 869 } 870 return 0; 871 } 872 873 874 /** 875 * MultibyteIncrementalEncoder object 876 */ 877 878 static PyObject * 879 mbiencoder_encode(MultibyteIncrementalEncoderObject *self, 880 PyObject *args, PyObject *kwargs) 881 { 882 PyObject *data; 883 int final = 0; 884 885 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode", 886 incrementalkwarglist, &data, &final)) 887 return NULL; 888 889 return encoder_encode_stateful(STATEFUL_ECTX(self), data, final); 890 } 891 892 static PyObject * 893 mbiencoder_reset(MultibyteIncrementalEncoderObject *self) 894 { 895 if (self->codec->decreset != NULL && 896 self->codec->decreset(&self->state, self->codec->config) != 0) 897 return NULL; 898 self->pendingsize = 0; 899 900 Py_RETURN_NONE; 901 } 902 903 static struct PyMethodDef mbiencoder_methods[] = { 904 {"encode", (PyCFunction)mbiencoder_encode, 905 METH_VARARGS | METH_KEYWORDS, NULL}, 906 {"reset", (PyCFunction)mbiencoder_reset, 907 METH_NOARGS, NULL}, 908 {NULL, NULL}, 909 }; 910 911 static PyObject * 912 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 913 { 914 MultibyteIncrementalEncoderObject *self; 915 PyObject *codec = NULL; 916 char *errors = NULL; 917 918 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder", 919 incnewkwarglist, &errors)) 920 return NULL; 921 922 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0); 923 if (self == NULL) 924 return NULL; 925 926 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 927 if (codec == NULL) 928 goto errorexit; 929 if (!MultibyteCodec_Check(codec)) { 930 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 931 goto errorexit; 932 } 933 934 self->codec = ((MultibyteCodecObject *)codec)->codec; 935 self->pendingsize = 0; 936 self->errors = internal_error_callback(errors); 937 if (self->errors == NULL) 938 goto errorexit; 939 if (self->codec->encinit != NULL && 940 self->codec->encinit(&self->state, self->codec->config) != 0) 941 goto errorexit; 942 943 Py_DECREF(codec); 944 return (PyObject *)self; 945 946 errorexit: 947 Py_XDECREF(self); 948 Py_XDECREF(codec); 949 return NULL; 950 } 951 952 static int 953 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds) 954 { 955 return 0; 956 } 957 958 static int 959 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self, 960 visitproc visit, void *arg) 961 { 962 if (ERROR_ISCUSTOM(self->errors)) 963 Py_VISIT(self->errors); 964 return 0; 965 } 966 967 static void 968 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self) 969 { 970 PyObject_GC_UnTrack(self); 971 ERROR_DECREF(self->errors); 972 Py_TYPE(self)->tp_free(self); 973 } 974 975 static PyTypeObject MultibyteIncrementalEncoder_Type = { 976 PyVarObject_HEAD_INIT(NULL, 0) 977 "MultibyteIncrementalEncoder", /* tp_name */ 978 sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */ 979 0, /* tp_itemsize */ 980 /* methods */ 981 (destructor)mbiencoder_dealloc, /* tp_dealloc */ 982 0, /* tp_print */ 983 0, /* tp_getattr */ 984 0, /* tp_setattr */ 985 0, /* tp_compare */ 986 0, /* tp_repr */ 987 0, /* tp_as_number */ 988 0, /* tp_as_sequence */ 989 0, /* tp_as_mapping */ 990 0, /* tp_hash */ 991 0, /* tp_call */ 992 0, /* tp_str */ 993 PyObject_GenericGetAttr, /* tp_getattro */ 994 0, /* tp_setattro */ 995 0, /* tp_as_buffer */ 996 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 997 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 998 0, /* tp_doc */ 999 (traverseproc)mbiencoder_traverse, /* tp_traverse */ 1000 0, /* tp_clear */ 1001 0, /* tp_richcompare */ 1002 0, /* tp_weaklistoffset */ 1003 0, /* tp_iter */ 1004 0, /* tp_iterext */ 1005 mbiencoder_methods, /* tp_methods */ 1006 0, /* tp_members */ 1007 codecctx_getsets, /* tp_getset */ 1008 0, /* tp_base */ 1009 0, /* tp_dict */ 1010 0, /* tp_descr_get */ 1011 0, /* tp_descr_set */ 1012 0, /* tp_dictoffset */ 1013 mbiencoder_init, /* tp_init */ 1014 0, /* tp_alloc */ 1015 mbiencoder_new, /* tp_new */ 1016 }; 1017 1018 1019 /** 1020 * MultibyteIncrementalDecoder object 1021 */ 1022 1023 static PyObject * 1024 mbidecoder_decode(MultibyteIncrementalDecoderObject *self, 1025 PyObject *args, PyObject *kwargs) 1026 { 1027 MultibyteDecodeBuffer buf; 1028 char *data, *wdata = NULL; 1029 Py_buffer pdata; 1030 Py_ssize_t wsize, finalsize = 0, size, origpending; 1031 int final = 0; 1032 1033 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i:decode", 1034 incrementalkwarglist, &pdata, &final)) 1035 return NULL; 1036 data = pdata.buf; 1037 size = pdata.len; 1038 1039 buf.outobj = buf.excobj = NULL; 1040 origpending = self->pendingsize; 1041 1042 if (self->pendingsize == 0) { 1043 wsize = size; 1044 wdata = data; 1045 } 1046 else { 1047 if (size > PY_SSIZE_T_MAX - self->pendingsize) { 1048 PyErr_NoMemory(); 1049 goto errorexit; 1050 } 1051 wsize = size + self->pendingsize; 1052 wdata = PyMem_Malloc(wsize); 1053 if (wdata == NULL) 1054 goto errorexit; 1055 memcpy(wdata, self->pending, self->pendingsize); 1056 memcpy(wdata + self->pendingsize, data, size); 1057 self->pendingsize = 0; 1058 } 1059 1060 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0) 1061 goto errorexit; 1062 1063 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf)) 1064 goto errorexit; 1065 1066 if (final && buf.inbuf < buf.inbuf_end) { 1067 if (multibytecodec_decerror(self->codec, &self->state, 1068 &buf, self->errors, MBERR_TOOFEW)) { 1069 /* recover the original pending buffer */ 1070 memcpy(self->pending, wdata, origpending); 1071 self->pendingsize = origpending; 1072 goto errorexit; 1073 } 1074 } 1075 1076 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */ 1077 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0) 1078 goto errorexit; 1079 } 1080 1081 finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); 1082 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) 1083 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) 1084 goto errorexit; 1085 1086 PyBuffer_Release(&pdata); 1087 if (wdata != data) 1088 PyMem_Del(wdata); 1089 Py_XDECREF(buf.excobj); 1090 return buf.outobj; 1091 1092 errorexit: 1093 PyBuffer_Release(&pdata); 1094 if (wdata != NULL && wdata != data) 1095 PyMem_Del(wdata); 1096 Py_XDECREF(buf.excobj); 1097 Py_XDECREF(buf.outobj); 1098 return NULL; 1099 } 1100 1101 static PyObject * 1102 mbidecoder_reset(MultibyteIncrementalDecoderObject *self) 1103 { 1104 if (self->codec->decreset != NULL && 1105 self->codec->decreset(&self->state, self->codec->config) != 0) 1106 return NULL; 1107 self->pendingsize = 0; 1108 1109 Py_RETURN_NONE; 1110 } 1111 1112 static struct PyMethodDef mbidecoder_methods[] = { 1113 {"decode", (PyCFunction)mbidecoder_decode, 1114 METH_VARARGS | METH_KEYWORDS, NULL}, 1115 {"reset", (PyCFunction)mbidecoder_reset, 1116 METH_NOARGS, NULL}, 1117 {NULL, NULL}, 1118 }; 1119 1120 static PyObject * 1121 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1122 { 1123 MultibyteIncrementalDecoderObject *self; 1124 PyObject *codec = NULL; 1125 char *errors = NULL; 1126 1127 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder", 1128 incnewkwarglist, &errors)) 1129 return NULL; 1130 1131 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0); 1132 if (self == NULL) 1133 return NULL; 1134 1135 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1136 if (codec == NULL) 1137 goto errorexit; 1138 if (!MultibyteCodec_Check(codec)) { 1139 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1140 goto errorexit; 1141 } 1142 1143 self->codec = ((MultibyteCodecObject *)codec)->codec; 1144 self->pendingsize = 0; 1145 self->errors = internal_error_callback(errors); 1146 if (self->errors == NULL) 1147 goto errorexit; 1148 if (self->codec->decinit != NULL && 1149 self->codec->decinit(&self->state, self->codec->config) != 0) 1150 goto errorexit; 1151 1152 Py_DECREF(codec); 1153 return (PyObject *)self; 1154 1155 errorexit: 1156 Py_XDECREF(self); 1157 Py_XDECREF(codec); 1158 return NULL; 1159 } 1160 1161 static int 1162 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds) 1163 { 1164 return 0; 1165 } 1166 1167 static int 1168 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self, 1169 visitproc visit, void *arg) 1170 { 1171 if (ERROR_ISCUSTOM(self->errors)) 1172 Py_VISIT(self->errors); 1173 return 0; 1174 } 1175 1176 static void 1177 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self) 1178 { 1179 PyObject_GC_UnTrack(self); 1180 ERROR_DECREF(self->errors); 1181 Py_TYPE(self)->tp_free(self); 1182 } 1183 1184 static PyTypeObject MultibyteIncrementalDecoder_Type = { 1185 PyVarObject_HEAD_INIT(NULL, 0) 1186 "MultibyteIncrementalDecoder", /* tp_name */ 1187 sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */ 1188 0, /* tp_itemsize */ 1189 /* methods */ 1190 (destructor)mbidecoder_dealloc, /* tp_dealloc */ 1191 0, /* tp_print */ 1192 0, /* tp_getattr */ 1193 0, /* tp_setattr */ 1194 0, /* tp_compare */ 1195 0, /* tp_repr */ 1196 0, /* tp_as_number */ 1197 0, /* tp_as_sequence */ 1198 0, /* tp_as_mapping */ 1199 0, /* tp_hash */ 1200 0, /* tp_call */ 1201 0, /* tp_str */ 1202 PyObject_GenericGetAttr, /* tp_getattro */ 1203 0, /* tp_setattro */ 1204 0, /* tp_as_buffer */ 1205 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 1206 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 1207 0, /* tp_doc */ 1208 (traverseproc)mbidecoder_traverse, /* tp_traverse */ 1209 0, /* tp_clear */ 1210 0, /* tp_richcompare */ 1211 0, /* tp_weaklistoffset */ 1212 0, /* tp_iter */ 1213 0, /* tp_iterext */ 1214 mbidecoder_methods, /* tp_methods */ 1215 0, /* tp_members */ 1216 codecctx_getsets, /* tp_getset */ 1217 0, /* tp_base */ 1218 0, /* tp_dict */ 1219 0, /* tp_descr_get */ 1220 0, /* tp_descr_set */ 1221 0, /* tp_dictoffset */ 1222 mbidecoder_init, /* tp_init */ 1223 0, /* tp_alloc */ 1224 mbidecoder_new, /* tp_new */ 1225 }; 1226 1227 1228 /** 1229 * MultibyteStreamReader object 1230 */ 1231 1232 static PyObject * 1233 mbstreamreader_iread(MultibyteStreamReaderObject *self, 1234 const char *method, Py_ssize_t sizehint) 1235 { 1236 MultibyteDecodeBuffer buf; 1237 PyObject *cres; 1238 Py_ssize_t rsize, finalsize = 0; 1239 1240 if (sizehint == 0) 1241 return PyUnicode_FromUnicode(NULL, 0); 1242 1243 buf.outobj = buf.excobj = NULL; 1244 cres = NULL; 1245 1246 for (;;) { 1247 int endoffile; 1248 1249 if (sizehint < 0) 1250 cres = PyObject_CallMethod(self->stream, 1251 (char *)method, NULL); 1252 else 1253 cres = PyObject_CallMethod(self->stream, 1254 (char *)method, "i", sizehint); 1255 if (cres == NULL) 1256 goto errorexit; 1257 1258 if (!PyString_Check(cres)) { 1259 PyErr_SetString(PyExc_TypeError, 1260 "stream function returned a " 1261 "non-string object"); 1262 goto errorexit; 1263 } 1264 1265 endoffile = (PyString_GET_SIZE(cres) == 0); 1266 1267 if (self->pendingsize > 0) { 1268 PyObject *ctr; 1269 char *ctrdata; 1270 1271 if (PyString_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { 1272 PyErr_NoMemory(); 1273 goto errorexit; 1274 } 1275 rsize = PyString_GET_SIZE(cres) + self->pendingsize; 1276 ctr = PyString_FromStringAndSize(NULL, rsize); 1277 if (ctr == NULL) 1278 goto errorexit; 1279 ctrdata = PyString_AS_STRING(ctr); 1280 memcpy(ctrdata, self->pending, self->pendingsize); 1281 memcpy(ctrdata + self->pendingsize, 1282 PyString_AS_STRING(cres), 1283 PyString_GET_SIZE(cres)); 1284 Py_DECREF(cres); 1285 cres = ctr; 1286 self->pendingsize = 0; 1287 } 1288 1289 rsize = PyString_GET_SIZE(cres); 1290 if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres), 1291 rsize) != 0) 1292 goto errorexit; 1293 1294 if (rsize > 0 && decoder_feed_buffer( 1295 (MultibyteStatefulDecoderContext *)self, &buf)) 1296 goto errorexit; 1297 1298 if (endoffile || sizehint < 0) { 1299 if (buf.inbuf < buf.inbuf_end && 1300 multibytecodec_decerror(self->codec, &self->state, 1301 &buf, self->errors, MBERR_TOOFEW)) 1302 goto errorexit; 1303 } 1304 1305 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ 1306 if (decoder_append_pending(STATEFUL_DCTX(self), 1307 &buf) != 0) 1308 goto errorexit; 1309 } 1310 1311 finalsize = (Py_ssize_t)(buf.outbuf - 1312 PyUnicode_AS_UNICODE(buf.outobj)); 1313 Py_DECREF(cres); 1314 cres = NULL; 1315 1316 if (sizehint < 0 || finalsize != 0 || rsize == 0) 1317 break; 1318 1319 sizehint = 1; /* read 1 more byte and retry */ 1320 } 1321 1322 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) 1323 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) 1324 goto errorexit; 1325 1326 Py_XDECREF(cres); 1327 Py_XDECREF(buf.excobj); 1328 return buf.outobj; 1329 1330 errorexit: 1331 Py_XDECREF(cres); 1332 Py_XDECREF(buf.excobj); 1333 Py_XDECREF(buf.outobj); 1334 return NULL; 1335 } 1336 1337 static PyObject * 1338 mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args) 1339 { 1340 PyObject *sizeobj = NULL; 1341 Py_ssize_t size; 1342 1343 if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj)) 1344 return NULL; 1345 1346 if (sizeobj == Py_None || sizeobj == NULL) 1347 size = -1; 1348 else if (PyInt_Check(sizeobj)) 1349 size = PyInt_AsSsize_t(sizeobj); 1350 else { 1351 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1352 return NULL; 1353 } 1354 1355 return mbstreamreader_iread(self, "read", size); 1356 } 1357 1358 static PyObject * 1359 mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args) 1360 { 1361 PyObject *sizeobj = NULL; 1362 Py_ssize_t size; 1363 1364 if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj)) 1365 return NULL; 1366 1367 if (sizeobj == Py_None || sizeobj == NULL) 1368 size = -1; 1369 else if (PyInt_Check(sizeobj)) 1370 size = PyInt_AsSsize_t(sizeobj); 1371 else { 1372 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1373 return NULL; 1374 } 1375 1376 return mbstreamreader_iread(self, "readline", size); 1377 } 1378 1379 static PyObject * 1380 mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args) 1381 { 1382 PyObject *sizehintobj = NULL, *r, *sr; 1383 Py_ssize_t sizehint; 1384 1385 if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj)) 1386 return NULL; 1387 1388 if (sizehintobj == Py_None || sizehintobj == NULL) 1389 sizehint = -1; 1390 else if (PyInt_Check(sizehintobj)) 1391 sizehint = PyInt_AsSsize_t(sizehintobj); 1392 else { 1393 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1394 return NULL; 1395 } 1396 1397 r = mbstreamreader_iread(self, "read", sizehint); 1398 if (r == NULL) 1399 return NULL; 1400 1401 sr = PyUnicode_Splitlines(r, 1); 1402 Py_DECREF(r); 1403 return sr; 1404 } 1405 1406 static PyObject * 1407 mbstreamreader_reset(MultibyteStreamReaderObject *self) 1408 { 1409 if (self->codec->decreset != NULL && 1410 self->codec->decreset(&self->state, self->codec->config) != 0) 1411 return NULL; 1412 self->pendingsize = 0; 1413 1414 Py_RETURN_NONE; 1415 } 1416 1417 static struct PyMethodDef mbstreamreader_methods[] = { 1418 {"read", (PyCFunction)mbstreamreader_read, 1419 METH_VARARGS, NULL}, 1420 {"readline", (PyCFunction)mbstreamreader_readline, 1421 METH_VARARGS, NULL}, 1422 {"readlines", (PyCFunction)mbstreamreader_readlines, 1423 METH_VARARGS, NULL}, 1424 {"reset", (PyCFunction)mbstreamreader_reset, 1425 METH_NOARGS, NULL}, 1426 {NULL, NULL}, 1427 }; 1428 1429 static PyMemberDef mbstreamreader_members[] = { 1430 {"stream", T_OBJECT, 1431 offsetof(MultibyteStreamReaderObject, stream), 1432 READONLY, NULL}, 1433 {NULL,} 1434 }; 1435 1436 static PyObject * 1437 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1438 { 1439 MultibyteStreamReaderObject *self; 1440 PyObject *stream, *codec = NULL; 1441 char *errors = NULL; 1442 1443 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader", 1444 streamkwarglist, &stream, &errors)) 1445 return NULL; 1446 1447 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0); 1448 if (self == NULL) 1449 return NULL; 1450 1451 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1452 if (codec == NULL) 1453 goto errorexit; 1454 if (!MultibyteCodec_Check(codec)) { 1455 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1456 goto errorexit; 1457 } 1458 1459 self->codec = ((MultibyteCodecObject *)codec)->codec; 1460 self->stream = stream; 1461 Py_INCREF(stream); 1462 self->pendingsize = 0; 1463 self->errors = internal_error_callback(errors); 1464 if (self->errors == NULL) 1465 goto errorexit; 1466 if (self->codec->decinit != NULL && 1467 self->codec->decinit(&self->state, self->codec->config) != 0) 1468 goto errorexit; 1469 1470 Py_DECREF(codec); 1471 return (PyObject *)self; 1472 1473 errorexit: 1474 Py_XDECREF(self); 1475 Py_XDECREF(codec); 1476 return NULL; 1477 } 1478 1479 static int 1480 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds) 1481 { 1482 return 0; 1483 } 1484 1485 static int 1486 mbstreamreader_traverse(MultibyteStreamReaderObject *self, 1487 visitproc visit, void *arg) 1488 { 1489 if (ERROR_ISCUSTOM(self->errors)) 1490 Py_VISIT(self->errors); 1491 Py_VISIT(self->stream); 1492 return 0; 1493 } 1494 1495 static void 1496 mbstreamreader_dealloc(MultibyteStreamReaderObject *self) 1497 { 1498 PyObject_GC_UnTrack(self); 1499 ERROR_DECREF(self->errors); 1500 Py_XDECREF(self->stream); 1501 Py_TYPE(self)->tp_free(self); 1502 } 1503 1504 static PyTypeObject MultibyteStreamReader_Type = { 1505 PyVarObject_HEAD_INIT(NULL, 0) 1506 "MultibyteStreamReader", /* tp_name */ 1507 sizeof(MultibyteStreamReaderObject), /* tp_basicsize */ 1508 0, /* tp_itemsize */ 1509 /* methods */ 1510 (destructor)mbstreamreader_dealloc, /* tp_dealloc */ 1511 0, /* tp_print */ 1512 0, /* tp_getattr */ 1513 0, /* tp_setattr */ 1514 0, /* tp_compare */ 1515 0, /* tp_repr */ 1516 0, /* tp_as_number */ 1517 0, /* tp_as_sequence */ 1518 0, /* tp_as_mapping */ 1519 0, /* tp_hash */ 1520 0, /* tp_call */ 1521 0, /* tp_str */ 1522 PyObject_GenericGetAttr, /* tp_getattro */ 1523 0, /* tp_setattro */ 1524 0, /* tp_as_buffer */ 1525 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 1526 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 1527 0, /* tp_doc */ 1528 (traverseproc)mbstreamreader_traverse, /* tp_traverse */ 1529 0, /* tp_clear */ 1530 0, /* tp_richcompare */ 1531 0, /* tp_weaklistoffset */ 1532 0, /* tp_iter */ 1533 0, /* tp_iterext */ 1534 mbstreamreader_methods, /* tp_methods */ 1535 mbstreamreader_members, /* tp_members */ 1536 codecctx_getsets, /* tp_getset */ 1537 0, /* tp_base */ 1538 0, /* tp_dict */ 1539 0, /* tp_descr_get */ 1540 0, /* tp_descr_set */ 1541 0, /* tp_dictoffset */ 1542 mbstreamreader_init, /* tp_init */ 1543 0, /* tp_alloc */ 1544 mbstreamreader_new, /* tp_new */ 1545 }; 1546 1547 1548 /** 1549 * MultibyteStreamWriter object 1550 */ 1551 1552 static int 1553 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, 1554 PyObject *unistr) 1555 { 1556 PyObject *str, *wr; 1557 1558 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0); 1559 if (str == NULL) 1560 return -1; 1561 1562 wr = PyObject_CallMethod(self->stream, "write", "O", str); 1563 Py_DECREF(str); 1564 if (wr == NULL) 1565 return -1; 1566 1567 Py_DECREF(wr); 1568 return 0; 1569 } 1570 1571 static PyObject * 1572 mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *strobj) 1573 { 1574 if (mbstreamwriter_iwrite(self, strobj)) 1575 return NULL; 1576 else 1577 Py_RETURN_NONE; 1578 } 1579 1580 static PyObject * 1581 mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines) 1582 { 1583 PyObject *strobj; 1584 int i, r; 1585 1586 if (!PySequence_Check(lines)) { 1587 PyErr_SetString(PyExc_TypeError, 1588 "arg must be a sequence object"); 1589 return NULL; 1590 } 1591 1592 for (i = 0; i < PySequence_Length(lines); i++) { 1593 /* length can be changed even within this loop */ 1594 strobj = PySequence_GetItem(lines, i); 1595 if (strobj == NULL) 1596 return NULL; 1597 1598 r = mbstreamwriter_iwrite(self, strobj); 1599 Py_DECREF(strobj); 1600 if (r == -1) 1601 return NULL; 1602 } 1603 1604 Py_RETURN_NONE; 1605 } 1606 1607 static PyObject * 1608 mbstreamwriter_reset(MultibyteStreamWriterObject *self) 1609 { 1610 const Py_UNICODE *pending; 1611 PyObject *pwrt; 1612 1613 pending = self->pending; 1614 pwrt = multibytecodec_encode(self->codec, &self->state, 1615 &pending, self->pendingsize, self->errors, 1616 MBENC_FLUSH | MBENC_RESET); 1617 /* some pending buffer can be truncated when UnicodeEncodeError is 1618 * raised on 'strict' mode. but, 'reset' method is designed to 1619 * reset the pending buffer or states so failed string sequence 1620 * ought to be missed */ 1621 self->pendingsize = 0; 1622 if (pwrt == NULL) 1623 return NULL; 1624 1625 if (PyString_Size(pwrt) > 0) { 1626 PyObject *wr; 1627 wr = PyObject_CallMethod(self->stream, "write", "O", pwrt); 1628 if (wr == NULL) { 1629 Py_DECREF(pwrt); 1630 return NULL; 1631 } 1632 } 1633 Py_DECREF(pwrt); 1634 1635 Py_RETURN_NONE; 1636 } 1637 1638 static PyObject * 1639 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1640 { 1641 MultibyteStreamWriterObject *self; 1642 PyObject *stream, *codec = NULL; 1643 char *errors = NULL; 1644 1645 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter", 1646 streamkwarglist, &stream, &errors)) 1647 return NULL; 1648 1649 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0); 1650 if (self == NULL) 1651 return NULL; 1652 1653 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1654 if (codec == NULL) 1655 goto errorexit; 1656 if (!MultibyteCodec_Check(codec)) { 1657 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1658 goto errorexit; 1659 } 1660 1661 self->codec = ((MultibyteCodecObject *)codec)->codec; 1662 self->stream = stream; 1663 Py_INCREF(stream); 1664 self->pendingsize = 0; 1665 self->errors = internal_error_callback(errors); 1666 if (self->errors == NULL) 1667 goto errorexit; 1668 if (self->codec->encinit != NULL && 1669 self->codec->encinit(&self->state, self->codec->config) != 0) 1670 goto errorexit; 1671 1672 Py_DECREF(codec); 1673 return (PyObject *)self; 1674 1675 errorexit: 1676 Py_XDECREF(self); 1677 Py_XDECREF(codec); 1678 return NULL; 1679 } 1680 1681 static int 1682 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds) 1683 { 1684 return 0; 1685 } 1686 1687 static int 1688 mbstreamwriter_traverse(MultibyteStreamWriterObject *self, 1689 visitproc visit, void *arg) 1690 { 1691 if (ERROR_ISCUSTOM(self->errors)) 1692 Py_VISIT(self->errors); 1693 Py_VISIT(self->stream); 1694 return 0; 1695 } 1696 1697 static void 1698 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self) 1699 { 1700 PyObject_GC_UnTrack(self); 1701 ERROR_DECREF(self->errors); 1702 Py_XDECREF(self->stream); 1703 Py_TYPE(self)->tp_free(self); 1704 } 1705 1706 static struct PyMethodDef mbstreamwriter_methods[] = { 1707 {"write", (PyCFunction)mbstreamwriter_write, 1708 METH_O, NULL}, 1709 {"writelines", (PyCFunction)mbstreamwriter_writelines, 1710 METH_O, NULL}, 1711 {"reset", (PyCFunction)mbstreamwriter_reset, 1712 METH_NOARGS, NULL}, 1713 {NULL, NULL}, 1714 }; 1715 1716 static PyMemberDef mbstreamwriter_members[] = { 1717 {"stream", T_OBJECT, 1718 offsetof(MultibyteStreamWriterObject, stream), 1719 READONLY, NULL}, 1720 {NULL,} 1721 }; 1722 1723 static PyTypeObject MultibyteStreamWriter_Type = { 1724 PyVarObject_HEAD_INIT(NULL, 0) 1725 "MultibyteStreamWriter", /* tp_name */ 1726 sizeof(MultibyteStreamWriterObject), /* tp_basicsize */ 1727 0, /* tp_itemsize */ 1728 /* methods */ 1729 (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ 1730 0, /* tp_print */ 1731 0, /* tp_getattr */ 1732 0, /* tp_setattr */ 1733 0, /* tp_compare */ 1734 0, /* tp_repr */ 1735 0, /* tp_as_number */ 1736 0, /* tp_as_sequence */ 1737 0, /* tp_as_mapping */ 1738 0, /* tp_hash */ 1739 0, /* tp_call */ 1740 0, /* tp_str */ 1741 PyObject_GenericGetAttr, /* tp_getattro */ 1742 0, /* tp_setattro */ 1743 0, /* tp_as_buffer */ 1744 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 1745 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 1746 0, /* tp_doc */ 1747 (traverseproc)mbstreamwriter_traverse, /* tp_traverse */ 1748 0, /* tp_clear */ 1749 0, /* tp_richcompare */ 1750 0, /* tp_weaklistoffset */ 1751 0, /* tp_iter */ 1752 0, /* tp_iterext */ 1753 mbstreamwriter_methods, /* tp_methods */ 1754 mbstreamwriter_members, /* tp_members */ 1755 codecctx_getsets, /* tp_getset */ 1756 0, /* tp_base */ 1757 0, /* tp_dict */ 1758 0, /* tp_descr_get */ 1759 0, /* tp_descr_set */ 1760 0, /* tp_dictoffset */ 1761 mbstreamwriter_init, /* tp_init */ 1762 0, /* tp_alloc */ 1763 mbstreamwriter_new, /* tp_new */ 1764 }; 1765 1766 1767 /** 1768 * Exposed factory function 1769 */ 1770 1771 static PyObject * 1772 __create_codec(PyObject *ignore, PyObject *arg) 1773 { 1774 MultibyteCodecObject *self; 1775 MultibyteCodec *codec; 1776 1777 if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) { 1778 PyErr_SetString(PyExc_ValueError, "argument type invalid"); 1779 return NULL; 1780 } 1781 1782 codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME); 1783 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) 1784 return NULL; 1785 1786 self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type); 1787 if (self == NULL) 1788 return NULL; 1789 self->codec = codec; 1790 1791 return (PyObject *)self; 1792 } 1793 1794 static struct PyMethodDef __methods[] = { 1795 {"__create_codec", (PyCFunction)__create_codec, METH_O}, 1796 {NULL, NULL}, 1797 }; 1798 1799 PyMODINIT_FUNC 1800 init_multibytecodec(void) 1801 { 1802 int i; 1803 PyObject *m; 1804 PyTypeObject *typelist[] = { 1805 &MultibyteIncrementalEncoder_Type, 1806 &MultibyteIncrementalDecoder_Type, 1807 &MultibyteStreamReader_Type, 1808 &MultibyteStreamWriter_Type, 1809 NULL 1810 }; 1811 1812 if (PyType_Ready(&MultibyteCodec_Type) < 0) 1813 return; 1814 1815 m = Py_InitModule("_multibytecodec", __methods); 1816 if (m == NULL) 1817 return; 1818 1819 for (i = 0; typelist[i] != NULL; i++) { 1820 if (PyType_Ready(typelist[i]) < 0) 1821 return; 1822 Py_INCREF(typelist[i]); 1823 PyModule_AddObject(m, typelist[i]->tp_name, 1824 (PyObject *)typelist[i]); 1825 } 1826 1827 if (PyErr_Occurred()) 1828 Py_FatalError("can't initialize the _multibytecodec module"); 1829 }