Python-2.7.3/Modules/_codecsmodule.c

No issues found

   1 /* ------------------------------------------------------------------------
   2 
   3    _codecs -- Provides access to the codec registry and the builtin
   4               codecs.
   5 
   6    This module should never be imported directly. The standard library
   7    module "codecs" wraps this builtin module for use within Python.
   8 
   9    The codec registry is accessible via:
  10 
  11      register(search_function) -> None
  12 
  13      lookup(encoding) -> CodecInfo object
  14 
  15    The builtin Unicode codecs use the following interface:
  16 
  17      <encoding>_encode(Unicode_object[,errors='strict']) ->
  18         (string object, bytes consumed)
  19 
  20      <encoding>_decode(char_buffer_obj[,errors='strict']) ->
  21         (Unicode object, bytes consumed)
  22 
  23    <encoding>_encode() interfaces also accept non-Unicode object as
  24    input. The objects are then converted to Unicode using
  25    PyUnicode_FromObject() prior to applying the conversion.
  26 
  27    These <encoding>s are available: utf_8, unicode_escape,
  28    raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
  29    mbcs (on win32).
  30 
  31 
  32 Written by Marc-Andre Lemburg (mal@lemburg.com).
  33 
  34 Copyright (c) Corporation for National Research Initiatives.
  35 
  36    ------------------------------------------------------------------------ */
  37 
  38 #define PY_SSIZE_T_CLEAN
  39 #include "Python.h"
  40 
  41 /* --- Registry ----------------------------------------------------------- */
  42 
  43 PyDoc_STRVAR(register__doc__,
  44 "register(search_function)\n\
  45 \n\
  46 Register a codec search function. Search functions are expected to take\n\
  47 one argument, the encoding name in all lower case letters, and return\n\
  48 a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
  49 (or a CodecInfo object).");
  50 
  51 static
  52 PyObject *codec_register(PyObject *self, PyObject *search_function)
  53 {
  54     if (PyCodec_Register(search_function))
  55         return NULL;
  56 
  57     Py_RETURN_NONE;
  58 }
  59 
  60 PyDoc_STRVAR(lookup__doc__,
  61 "lookup(encoding) -> CodecInfo\n\
  62 \n\
  63 Looks up a codec tuple in the Python codec registry and returns\n\
  64 a CodecInfo object.");
  65 
  66 static
  67 PyObject *codec_lookup(PyObject *self, PyObject *args)
  68 {
  69     char *encoding;
  70 
  71     if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
  72         return NULL;
  73 
  74     return _PyCodec_Lookup(encoding);
  75 }
  76 
  77 PyDoc_STRVAR(encode__doc__,
  78 "encode(obj, [encoding[,errors]]) -> object\n\
  79 \n\
  80 Encodes obj using the codec registered for encoding. encoding defaults\n\
  81 to the default encoding. errors may be given to set a different error\n\
  82 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
  83 a ValueError. Other possible values are 'ignore', 'replace' and\n\
  84 'xmlcharrefreplace' as well as any other name registered with\n\
  85 codecs.register_error that can handle ValueErrors.");
  86 
  87 static PyObject *
  88 codec_encode(PyObject *self, PyObject *args)
  89 {
  90     const char *encoding = NULL;
  91     const char *errors = NULL;
  92     PyObject *v;
  93 
  94     if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
  95         return NULL;
  96 
  97 #ifdef Py_USING_UNICODE
  98     if (encoding == NULL)
  99         encoding = PyUnicode_GetDefaultEncoding();
 100 #else
 101     if (encoding == NULL) {
 102         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 103         return NULL;
 104     }
 105 #endif
 106 
 107     /* Encode via the codec registry */
 108     return PyCodec_Encode(v, encoding, errors);
 109 }
 110 
 111 PyDoc_STRVAR(decode__doc__,
 112 "decode(obj, [encoding[,errors]]) -> object\n\
 113 \n\
 114 Decodes obj using the codec registered for encoding. encoding defaults\n\
 115 to the default encoding. errors may be given to set a different error\n\
 116 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
 117 a ValueError. Other possible values are 'ignore' and 'replace'\n\
 118 as well as any other name registered with codecs.register_error that is\n\
 119 able to handle ValueErrors.");
 120 
 121 static PyObject *
 122 codec_decode(PyObject *self, PyObject *args)
 123 {
 124     const char *encoding = NULL;
 125     const char *errors = NULL;
 126     PyObject *v;
 127 
 128     if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
 129         return NULL;
 130 
 131 #ifdef Py_USING_UNICODE
 132     if (encoding == NULL)
 133         encoding = PyUnicode_GetDefaultEncoding();
 134 #else
 135     if (encoding == NULL) {
 136         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 137         return NULL;
 138     }
 139 #endif
 140 
 141     /* Decode via the codec registry */
 142     return PyCodec_Decode(v, encoding, errors);
 143 }
 144 
 145 /* --- Helpers ------------------------------------------------------------ */
 146 
 147 static
 148 PyObject *codec_tuple(PyObject *unicode,
 149                       Py_ssize_t len)
 150 {
 151     PyObject *v;
 152     if (unicode == NULL)
 153         return NULL;
 154     v = Py_BuildValue("On", unicode, len);
 155     Py_DECREF(unicode);
 156     return v;
 157 }
 158 
 159 /* --- String codecs ------------------------------------------------------ */
 160 static PyObject *
 161 escape_decode(PyObject *self,
 162               PyObject *args)
 163 {
 164     const char *errors = NULL;
 165     const char *data;
 166     Py_ssize_t size;
 167 
 168     if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
 169                           &data, &size, &errors))
 170         return NULL;
 171     return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
 172                        size);
 173 }
 174 
 175 static PyObject *
 176 escape_encode(PyObject *self,
 177               PyObject *args)
 178 {
 179     PyObject *str;
 180     const char *errors = NULL;
 181     char *buf;
 182     Py_ssize_t consumed, len;
 183 
 184     if (!PyArg_ParseTuple(args, "S|z:escape_encode",
 185                           &str, &errors))
 186         return NULL;
 187 
 188     consumed = PyString_GET_SIZE(str);
 189     str = PyString_Repr(str, 0);
 190     if (!str)
 191         return NULL;
 192 
 193     /* The string will be quoted. Unquote, similar to unicode-escape. */
 194     buf = PyString_AS_STRING (str);
 195     len = PyString_GET_SIZE (str);
 196     memmove(buf, buf+1, len-2);
 197     if (_PyString_Resize(&str, len-2) < 0)
 198         return NULL;
 199 
 200     return codec_tuple(str, consumed);
 201 }
 202 
 203 #ifdef Py_USING_UNICODE
 204 /* --- Decoder ------------------------------------------------------------ */
 205 
 206 static PyObject *
 207 unicode_internal_decode(PyObject *self,
 208                         PyObject *args)
 209 {
 210     PyObject *obj;
 211     const char *errors = NULL;
 212     const char *data;
 213     Py_ssize_t size;
 214 
 215     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
 216                           &obj, &errors))
 217         return NULL;
 218 
 219     if (PyUnicode_Check(obj)) {
 220         Py_INCREF(obj);
 221         return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
 222     }
 223     else {
 224         if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
 225             return NULL;
 226 
 227         return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
 228                            size);
 229     }
 230 }
 231 
 232 static PyObject *
 233 utf_7_decode(PyObject *self,
 234              PyObject *args)
 235 {
 236     Py_buffer pbuf;
 237     const char *errors = NULL;
 238     int final = 0;
 239     Py_ssize_t consumed;
 240     PyObject *decoded = NULL;
 241 
 242     if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
 243                           &pbuf, &errors, &final))
 244         return NULL;
 245     consumed = pbuf.len;
 246 
 247     decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
 248                                            final ? NULL : &consumed);
 249     PyBuffer_Release(&pbuf);
 250     if (decoded == NULL)
 251         return NULL;
 252     return codec_tuple(decoded, consumed);
 253 }
 254 
 255 static PyObject *
 256 utf_8_decode(PyObject *self,
 257             PyObject *args)
 258 {
 259     Py_buffer pbuf;
 260     const char *errors = NULL;
 261     int final = 0;
 262     Py_ssize_t consumed;
 263     PyObject *decoded = NULL;
 264 
 265     if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
 266                           &pbuf, &errors, &final))
 267         return NULL;
 268     consumed = pbuf.len;
 269 
 270     decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
 271                                            final ? NULL : &consumed);
 272     PyBuffer_Release(&pbuf);
 273     if (decoded == NULL)
 274         return NULL;
 275     return codec_tuple(decoded, consumed);
 276 }
 277 
 278 static PyObject *
 279 utf_16_decode(PyObject *self,
 280             PyObject *args)
 281 {
 282     Py_buffer pbuf;
 283     const char *errors = NULL;
 284     int byteorder = 0;
 285     int final = 0;
 286     Py_ssize_t consumed;
 287     PyObject *decoded;
 288 
 289     if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
 290                           &pbuf, &errors, &final))
 291         return NULL;
 292     consumed = pbuf.len; /* This is overwritten unless final is true. */
 293     decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
 294                                         &byteorder, final ? NULL : &consumed);
 295     PyBuffer_Release(&pbuf);
 296     if (decoded == NULL)
 297         return NULL;
 298     return codec_tuple(decoded, consumed);
 299 }
 300 
 301 static PyObject *
 302 utf_16_le_decode(PyObject *self,
 303                  PyObject *args)
 304 {
 305     Py_buffer pbuf;
 306     const char *errors = NULL;
 307     int byteorder = -1;
 308     int final = 0;
 309     Py_ssize_t consumed;
 310     PyObject *decoded = NULL;
 311 
 312     if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
 313                           &pbuf, &errors, &final))
 314         return NULL;
 315 
 316     consumed = pbuf.len; /* This is overwritten unless final is true. */
 317     decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
 318         &byteorder, final ? NULL : &consumed);
 319     PyBuffer_Release(&pbuf);
 320     if (decoded == NULL)
 321         return NULL;
 322     return codec_tuple(decoded, consumed);
 323 }
 324 
 325 static PyObject *
 326 utf_16_be_decode(PyObject *self,
 327                  PyObject *args)
 328 {
 329     Py_buffer pbuf;
 330     const char *errors = NULL;
 331     int byteorder = 1;
 332     int final = 0;
 333     Py_ssize_t consumed;
 334     PyObject *decoded = NULL;
 335 
 336     if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
 337                           &pbuf, &errors, &final))
 338         return NULL;
 339 
 340     consumed = pbuf.len; /* This is overwritten unless final is true. */
 341     decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
 342         &byteorder, final ? NULL : &consumed);
 343     PyBuffer_Release(&pbuf);
 344     if (decoded == NULL)
 345         return NULL;
 346     return codec_tuple(decoded, consumed);
 347 }
 348 
 349 /* This non-standard version also provides access to the byteorder
 350    parameter of the builtin UTF-16 codec.
 351 
 352    It returns a tuple (unicode, bytesread, byteorder) with byteorder
 353    being the value in effect at the end of data.
 354 
 355 */
 356 
 357 static PyObject *
 358 utf_16_ex_decode(PyObject *self,
 359                  PyObject *args)
 360 {
 361     Py_buffer pbuf;
 362     const char *errors = NULL;
 363     int byteorder = 0;
 364     PyObject *unicode, *tuple;
 365     int final = 0;
 366     Py_ssize_t consumed;
 367 
 368     if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
 369                           &pbuf, &errors, &byteorder, &final))
 370         return NULL;
 371     consumed = pbuf.len; /* This is overwritten unless final is true. */
 372     unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
 373                                         &byteorder, final ? NULL : &consumed);
 374     PyBuffer_Release(&pbuf);
 375     if (unicode == NULL)
 376         return NULL;
 377     tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
 378     Py_DECREF(unicode);
 379     return tuple;
 380 }
 381 
 382 static PyObject *
 383 utf_32_decode(PyObject *self,
 384             PyObject *args)
 385 {
 386     Py_buffer pbuf;
 387     const char *errors = NULL;
 388     int byteorder = 0;
 389     int final = 0;
 390     Py_ssize_t consumed;
 391     PyObject *decoded;
 392 
 393     if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
 394                           &pbuf, &errors, &final))
 395         return NULL;
 396     consumed = pbuf.len; /* This is overwritten unless final is true. */
 397     decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
 398                                         &byteorder, final ? NULL : &consumed);
 399     PyBuffer_Release(&pbuf);
 400     if (decoded == NULL)
 401         return NULL;
 402     return codec_tuple(decoded, consumed);
 403 }
 404 
 405 static PyObject *
 406 utf_32_le_decode(PyObject *self,
 407                  PyObject *args)
 408 {
 409     Py_buffer pbuf;
 410     const char *errors = NULL;
 411     int byteorder = -1;
 412     int final = 0;
 413     Py_ssize_t consumed;
 414     PyObject *decoded;
 415 
 416     if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
 417                           &pbuf, &errors, &final))
 418         return NULL;
 419     consumed = pbuf.len; /* This is overwritten unless final is true. */
 420     decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
 421                                         &byteorder, final ? NULL : &consumed);
 422     PyBuffer_Release(&pbuf);
 423     if (decoded == NULL)
 424         return NULL;
 425     return codec_tuple(decoded, consumed);
 426 }
 427 
 428 static PyObject *
 429 utf_32_be_decode(PyObject *self,
 430                  PyObject *args)
 431 {
 432     Py_buffer pbuf;
 433     const char *errors = NULL;
 434     int byteorder = 1;
 435     int final = 0;
 436     Py_ssize_t consumed;
 437     PyObject *decoded;
 438 
 439     if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
 440                           &pbuf, &errors, &final))
 441         return NULL;
 442     consumed = pbuf.len; /* This is overwritten unless final is true. */
 443     decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
 444                                         &byteorder, final ? NULL : &consumed);
 445     PyBuffer_Release(&pbuf);
 446     if (decoded == NULL)
 447         return NULL;
 448     return codec_tuple(decoded, consumed);
 449 }
 450 
 451 /* This non-standard version also provides access to the byteorder
 452    parameter of the builtin UTF-32 codec.
 453 
 454    It returns a tuple (unicode, bytesread, byteorder) with byteorder
 455    being the value in effect at the end of data.
 456 
 457 */
 458 
 459 static PyObject *
 460 utf_32_ex_decode(PyObject *self,
 461                  PyObject *args)
 462 {
 463     Py_buffer pbuf;
 464     const char *errors = NULL;
 465     int byteorder = 0;
 466     PyObject *unicode, *tuple;
 467     int final = 0;
 468     Py_ssize_t consumed;
 469 
 470     if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
 471                           &pbuf, &errors, &byteorder, &final))
 472         return NULL;
 473     consumed = pbuf.len; /* This is overwritten unless final is true. */
 474     unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
 475                                         &byteorder, final ? NULL : &consumed);
 476     PyBuffer_Release(&pbuf);
 477     if (unicode == NULL)
 478         return NULL;
 479     tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
 480     Py_DECREF(unicode);
 481     return tuple;
 482 }
 483 
 484 static PyObject *
 485 unicode_escape_decode(PyObject *self,
 486                      PyObject *args)
 487 {
 488     Py_buffer pbuf;
 489     const char *errors = NULL;
 490         PyObject *unicode;
 491 
 492     if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
 493                           &pbuf, &errors))
 494         return NULL;
 495 
 496     unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
 497     PyBuffer_Release(&pbuf);
 498     return codec_tuple(unicode, pbuf.len);
 499 }
 500 
 501 static PyObject *
 502 raw_unicode_escape_decode(PyObject *self,
 503                         PyObject *args)
 504 {
 505     Py_buffer pbuf;
 506     const char *errors = NULL;
 507     PyObject *unicode;
 508 
 509     if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
 510                           &pbuf, &errors))
 511         return NULL;
 512 
 513     unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
 514     PyBuffer_Release(&pbuf);
 515     return codec_tuple(unicode, pbuf.len);
 516 }
 517 
 518 static PyObject *
 519 latin_1_decode(PyObject *self,
 520                PyObject *args)
 521 {
 522     Py_buffer pbuf;
 523     PyObject *unicode;
 524     const char *errors = NULL;
 525 
 526     if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
 527                           &pbuf, &errors))
 528         return NULL;
 529 
 530     unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
 531     PyBuffer_Release(&pbuf);
 532     return codec_tuple(unicode, pbuf.len);
 533 }
 534 
 535 static PyObject *
 536 ascii_decode(PyObject *self,
 537              PyObject *args)
 538 {
 539     Py_buffer pbuf;
 540     PyObject *unicode;
 541     const char *errors = NULL;
 542 
 543     if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
 544                           &pbuf, &errors))
 545         return NULL;
 546 
 547     unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
 548     PyBuffer_Release(&pbuf);
 549     return codec_tuple(unicode, pbuf.len);
 550 }
 551 
 552 static PyObject *
 553 charmap_decode(PyObject *self,
 554                PyObject *args)
 555 {
 556     Py_buffer pbuf;
 557     PyObject *unicode;
 558     const char *errors = NULL;
 559     PyObject *mapping = NULL;
 560 
 561     if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
 562                           &pbuf, &errors, &mapping))
 563         return NULL;
 564     if (mapping == Py_None)
 565         mapping = NULL;
 566 
 567     unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
 568     PyBuffer_Release(&pbuf);
 569     return codec_tuple(unicode, pbuf.len);
 570 }
 571 
 572 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
 573 
 574 static PyObject *
 575 mbcs_decode(PyObject *self,
 576             PyObject *args)
 577 {
 578     Py_buffer pbuf;
 579     const char *errors = NULL;
 580     int final = 0;
 581     Py_ssize_t consumed;
 582     PyObject *decoded = NULL;
 583 
 584     if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
 585                           &pbuf, &errors, &final))
 586         return NULL;
 587     consumed = pbuf.len;
 588 
 589     decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
 590                                            final ? NULL : &consumed);
 591     PyBuffer_Release(&pbuf);
 592     if (decoded == NULL)
 593         return NULL;
 594     return codec_tuple(decoded, consumed);
 595 }
 596 
 597 #endif /* MS_WINDOWS */
 598 
 599 /* --- Encoder ------------------------------------------------------------ */
 600 
 601 static PyObject *
 602 readbuffer_encode(PyObject *self,
 603                   PyObject *args)
 604 {
 605     const char *data;
 606     Py_ssize_t size;
 607     const char *errors = NULL;
 608 
 609     if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
 610                           &data, &size, &errors))
 611         return NULL;
 612 
 613     return codec_tuple(PyString_FromStringAndSize(data, size),
 614                        size);
 615 }
 616 
 617 static PyObject *
 618 charbuffer_encode(PyObject *self,
 619                   PyObject *args)
 620 {
 621     const char *data;
 622     Py_ssize_t size;
 623     const char *errors = NULL;
 624 
 625     if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
 626                           &data, &size, &errors))
 627         return NULL;
 628 
 629     return codec_tuple(PyString_FromStringAndSize(data, size),
 630                        size);
 631 }
 632 
 633 static PyObject *
 634 unicode_internal_encode(PyObject *self,
 635                         PyObject *args)
 636 {
 637     PyObject *obj;
 638     const char *errors = NULL;
 639     const char *data;
 640     Py_ssize_t size;
 641 
 642     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
 643                           &obj, &errors))
 644         return NULL;
 645 
 646     if (PyUnicode_Check(obj)) {
 647         data = PyUnicode_AS_DATA(obj);
 648         size = PyUnicode_GET_DATA_SIZE(obj);
 649         return codec_tuple(PyString_FromStringAndSize(data, size),
 650                            PyUnicode_GET_SIZE(obj));
 651     }
 652     else {
 653         if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
 654             return NULL;
 655         return codec_tuple(PyString_FromStringAndSize(data, size),
 656                            size);
 657     }
 658 }
 659 
 660 static PyObject *
 661 utf_7_encode(PyObject *self,
 662             PyObject *args)
 663 {
 664     PyObject *str, *v;
 665     const char *errors = NULL;
 666 
 667     if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
 668                           &str, &errors))
 669         return NULL;
 670 
 671     str = PyUnicode_FromObject(str);
 672     if (str == NULL)
 673         return NULL;
 674     v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
 675                                          PyUnicode_GET_SIZE(str),
 676                                          0,
 677                                          0,
 678                                          errors),
 679                     PyUnicode_GET_SIZE(str));
 680     Py_DECREF(str);
 681     return v;
 682 }
 683 
 684 static PyObject *
 685 utf_8_encode(PyObject *self,
 686             PyObject *args)
 687 {
 688     PyObject *str, *v;
 689     const char *errors = NULL;
 690 
 691     if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
 692                           &str, &errors))
 693         return NULL;
 694 
 695     str = PyUnicode_FromObject(str);
 696     if (str == NULL)
 697         return NULL;
 698     v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
 699                                          PyUnicode_GET_SIZE(str),
 700                                          errors),
 701                     PyUnicode_GET_SIZE(str));
 702     Py_DECREF(str);
 703     return v;
 704 }
 705 
 706 /* This version provides access to the byteorder parameter of the
 707    builtin UTF-16 codecs as optional third argument. It defaults to 0
 708    which means: use the native byte order and prepend the data with a
 709    BOM mark.
 710 
 711 */
 712 
 713 static PyObject *
 714 utf_16_encode(PyObject *self,
 715             PyObject *args)
 716 {
 717     PyObject *str, *v;
 718     const char *errors = NULL;
 719     int byteorder = 0;
 720 
 721     if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
 722                           &str, &errors, &byteorder))
 723         return NULL;
 724 
 725     str = PyUnicode_FromObject(str);
 726     if (str == NULL)
 727         return NULL;
 728     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 729                                           PyUnicode_GET_SIZE(str),
 730                                           errors,
 731                                           byteorder),
 732                     PyUnicode_GET_SIZE(str));
 733     Py_DECREF(str);
 734     return v;
 735 }
 736 
 737 static PyObject *
 738 utf_16_le_encode(PyObject *self,
 739                  PyObject *args)
 740 {
 741     PyObject *str, *v;
 742     const char *errors = NULL;
 743 
 744     if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
 745                           &str, &errors))
 746         return NULL;
 747 
 748     str = PyUnicode_FromObject(str);
 749     if (str == NULL)
 750         return NULL;
 751     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 752                                              PyUnicode_GET_SIZE(str),
 753                                              errors,
 754                                              -1),
 755                        PyUnicode_GET_SIZE(str));
 756     Py_DECREF(str);
 757     return v;
 758 }
 759 
 760 static PyObject *
 761 utf_16_be_encode(PyObject *self,
 762                  PyObject *args)
 763 {
 764     PyObject *str, *v;
 765     const char *errors = NULL;
 766 
 767     if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
 768                           &str, &errors))
 769         return NULL;
 770 
 771     str = PyUnicode_FromObject(str);
 772     if (str == NULL)
 773         return NULL;
 774     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 775                                           PyUnicode_GET_SIZE(str),
 776                                           errors,
 777                                           +1),
 778                     PyUnicode_GET_SIZE(str));
 779     Py_DECREF(str);
 780     return v;
 781 }
 782 
 783 /* This version provides access to the byteorder parameter of the
 784    builtin UTF-32 codecs as optional third argument. It defaults to 0
 785    which means: use the native byte order and prepend the data with a
 786    BOM mark.
 787 
 788 */
 789 
 790 static PyObject *
 791 utf_32_encode(PyObject *self,
 792             PyObject *args)
 793 {
 794     PyObject *str, *v;
 795     const char *errors = NULL;
 796     int byteorder = 0;
 797 
 798     if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
 799                           &str, &errors, &byteorder))
 800         return NULL;
 801 
 802     str = PyUnicode_FromObject(str);
 803     if (str == NULL)
 804         return NULL;
 805     v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
 806                                           PyUnicode_GET_SIZE(str),
 807                                           errors,
 808                                           byteorder),
 809                     PyUnicode_GET_SIZE(str));
 810     Py_DECREF(str);
 811     return v;
 812 }
 813 
 814 static PyObject *
 815 utf_32_le_encode(PyObject *self,
 816                  PyObject *args)
 817 {
 818     PyObject *str, *v;
 819     const char *errors = NULL;
 820 
 821     if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
 822                           &str, &errors))
 823         return NULL;
 824 
 825     str = PyUnicode_FromObject(str);
 826     if (str == NULL)
 827         return NULL;
 828     v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
 829                                              PyUnicode_GET_SIZE(str),
 830                                              errors,
 831                                              -1),
 832                        PyUnicode_GET_SIZE(str));
 833     Py_DECREF(str);
 834     return v;
 835 }
 836 
 837 static PyObject *
 838 utf_32_be_encode(PyObject *self,
 839                  PyObject *args)
 840 {
 841     PyObject *str, *v;
 842     const char *errors = NULL;
 843 
 844     if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
 845                           &str, &errors))
 846         return NULL;
 847 
 848     str = PyUnicode_FromObject(str);
 849     if (str == NULL)
 850         return NULL;
 851     v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
 852                                           PyUnicode_GET_SIZE(str),
 853                                           errors,
 854                                           +1),
 855                     PyUnicode_GET_SIZE(str));
 856     Py_DECREF(str);
 857     return v;
 858 }
 859 
 860 static PyObject *
 861 unicode_escape_encode(PyObject *self,
 862                      PyObject *args)
 863 {
 864     PyObject *str, *v;
 865     const char *errors = NULL;
 866 
 867     if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
 868                           &str, &errors))
 869         return NULL;
 870 
 871     str = PyUnicode_FromObject(str);
 872     if (str == NULL)
 873         return NULL;
 874     v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
 875                                                   PyUnicode_GET_SIZE(str)),
 876                     PyUnicode_GET_SIZE(str));
 877     Py_DECREF(str);
 878     return v;
 879 }
 880 
 881 static PyObject *
 882 raw_unicode_escape_encode(PyObject *self,
 883                         PyObject *args)
 884 {
 885     PyObject *str, *v;
 886     const char *errors = NULL;
 887 
 888     if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
 889                           &str, &errors))
 890         return NULL;
 891 
 892     str = PyUnicode_FromObject(str);
 893     if (str == NULL)
 894         return NULL;
 895     v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
 896                                PyUnicode_AS_UNICODE(str),
 897                                PyUnicode_GET_SIZE(str)),
 898                     PyUnicode_GET_SIZE(str));
 899     Py_DECREF(str);
 900     return v;
 901 }
 902 
 903 static PyObject *
 904 latin_1_encode(PyObject *self,
 905                PyObject *args)
 906 {
 907     PyObject *str, *v;
 908     const char *errors = NULL;
 909 
 910     if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
 911                           &str, &errors))
 912         return NULL;
 913 
 914     str = PyUnicode_FromObject(str);
 915     if (str == NULL)
 916         return NULL;
 917     v = codec_tuple(PyUnicode_EncodeLatin1(
 918                                PyUnicode_AS_UNICODE(str),
 919                                PyUnicode_GET_SIZE(str),
 920                                errors),
 921                     PyUnicode_GET_SIZE(str));
 922     Py_DECREF(str);
 923     return v;
 924 }
 925 
 926 static PyObject *
 927 ascii_encode(PyObject *self,
 928              PyObject *args)
 929 {
 930     PyObject *str, *v;
 931     const char *errors = NULL;
 932 
 933     if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
 934                           &str, &errors))
 935         return NULL;
 936 
 937     str = PyUnicode_FromObject(str);
 938     if (str == NULL)
 939         return NULL;
 940     v = codec_tuple(PyUnicode_EncodeASCII(
 941                                PyUnicode_AS_UNICODE(str),
 942                                PyUnicode_GET_SIZE(str),
 943                                errors),
 944                     PyUnicode_GET_SIZE(str));
 945     Py_DECREF(str);
 946     return v;
 947 }
 948 
 949 static PyObject *
 950 charmap_encode(PyObject *self,
 951              PyObject *args)
 952 {
 953     PyObject *str, *v;
 954     const char *errors = NULL;
 955     PyObject *mapping = NULL;
 956 
 957     if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
 958                           &str, &errors, &mapping))
 959         return NULL;
 960     if (mapping == Py_None)
 961         mapping = NULL;
 962 
 963     str = PyUnicode_FromObject(str);
 964     if (str == NULL)
 965         return NULL;
 966     v = codec_tuple(PyUnicode_EncodeCharmap(
 967                                PyUnicode_AS_UNICODE(str),
 968                                PyUnicode_GET_SIZE(str),
 969                                mapping,
 970                                errors),
 971                     PyUnicode_GET_SIZE(str));
 972     Py_DECREF(str);
 973     return v;
 974 }
 975 
 976 static PyObject*
 977 charmap_build(PyObject *self, PyObject *args)
 978 {
 979     PyObject *map;
 980     if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
 981         return NULL;
 982     return PyUnicode_BuildEncodingMap(map);
 983 }
 984 
 985 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
 986 
 987 static PyObject *
 988 mbcs_encode(PyObject *self,
 989             PyObject *args)
 990 {
 991     PyObject *str, *v;
 992     const char *errors = NULL;
 993 
 994     if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
 995                           &str, &errors))
 996         return NULL;
 997 
 998     str = PyUnicode_FromObject(str);
 999     if (str == NULL)
1000         return NULL;
1001     v = codec_tuple(PyUnicode_EncodeMBCS(
1002                                PyUnicode_AS_UNICODE(str),
1003                                PyUnicode_GET_SIZE(str),
1004                                errors),
1005                     PyUnicode_GET_SIZE(str));
1006     Py_DECREF(str);
1007     return v;
1008 }
1009 
1010 #endif /* MS_WINDOWS */
1011 #endif /* Py_USING_UNICODE */
1012 
1013 /* --- Error handler registry --------------------------------------------- */
1014 
1015 PyDoc_STRVAR(register_error__doc__,
1016 "register_error(errors, handler)\n\
1017 \n\
1018 Register the specified error handler under the name\n\
1019 errors. handler must be a callable object, that\n\
1020 will be called with an exception instance containing\n\
1021 information about the location of the encoding/decoding\n\
1022 error and must return a (replacement, new position) tuple.");
1023 
1024 static PyObject *register_error(PyObject *self, PyObject *args)
1025 {
1026     const char *name;
1027     PyObject *handler;
1028 
1029     if (!PyArg_ParseTuple(args, "sO:register_error",
1030                           &name, &handler))
1031         return NULL;
1032     if (PyCodec_RegisterError(name, handler))
1033         return NULL;
1034     Py_RETURN_NONE;
1035 }
1036 
1037 PyDoc_STRVAR(lookup_error__doc__,
1038 "lookup_error(errors) -> handler\n\
1039 \n\
1040 Return the error handler for the specified error handling name\n\
1041 or raise a LookupError, if no handler exists under this name.");
1042 
1043 static PyObject *lookup_error(PyObject *self, PyObject *args)
1044 {
1045     const char *name;
1046 
1047     if (!PyArg_ParseTuple(args, "s:lookup_error",
1048                           &name))
1049         return NULL;
1050     return PyCodec_LookupError(name);
1051 }
1052 
1053 /* --- Module API --------------------------------------------------------- */
1054 
1055 static PyMethodDef _codecs_functions[] = {
1056     {"register",                codec_register,                 METH_O,
1057         register__doc__},
1058     {"lookup",                  codec_lookup,                   METH_VARARGS,
1059         lookup__doc__},
1060     {"encode",                  codec_encode,                   METH_VARARGS,
1061         encode__doc__},
1062     {"decode",                  codec_decode,                   METH_VARARGS,
1063         decode__doc__},
1064     {"escape_encode",           escape_encode,                  METH_VARARGS},
1065     {"escape_decode",           escape_decode,                  METH_VARARGS},
1066 #ifdef Py_USING_UNICODE
1067     {"utf_8_encode",            utf_8_encode,                   METH_VARARGS},
1068     {"utf_8_decode",            utf_8_decode,                   METH_VARARGS},
1069     {"utf_7_encode",            utf_7_encode,                   METH_VARARGS},
1070     {"utf_7_decode",            utf_7_decode,                   METH_VARARGS},
1071     {"utf_16_encode",           utf_16_encode,                  METH_VARARGS},
1072     {"utf_16_le_encode",        utf_16_le_encode,               METH_VARARGS},
1073     {"utf_16_be_encode",        utf_16_be_encode,               METH_VARARGS},
1074     {"utf_16_decode",           utf_16_decode,                  METH_VARARGS},
1075     {"utf_16_le_decode",        utf_16_le_decode,               METH_VARARGS},
1076     {"utf_16_be_decode",        utf_16_be_decode,               METH_VARARGS},
1077     {"utf_16_ex_decode",        utf_16_ex_decode,               METH_VARARGS},
1078     {"utf_32_encode",           utf_32_encode,                  METH_VARARGS},
1079     {"utf_32_le_encode",        utf_32_le_encode,               METH_VARARGS},
1080     {"utf_32_be_encode",        utf_32_be_encode,               METH_VARARGS},
1081     {"utf_32_decode",           utf_32_decode,                  METH_VARARGS},
1082     {"utf_32_le_decode",        utf_32_le_decode,               METH_VARARGS},
1083     {"utf_32_be_decode",        utf_32_be_decode,               METH_VARARGS},
1084     {"utf_32_ex_decode",        utf_32_ex_decode,               METH_VARARGS},
1085     {"unicode_escape_encode",   unicode_escape_encode,          METH_VARARGS},
1086     {"unicode_escape_decode",   unicode_escape_decode,          METH_VARARGS},
1087     {"unicode_internal_encode", unicode_internal_encode,        METH_VARARGS},
1088     {"unicode_internal_decode", unicode_internal_decode,        METH_VARARGS},
1089     {"raw_unicode_escape_encode", raw_unicode_escape_encode,    METH_VARARGS},
1090     {"raw_unicode_escape_decode", raw_unicode_escape_decode,    METH_VARARGS},
1091     {"latin_1_encode",          latin_1_encode,                 METH_VARARGS},
1092     {"latin_1_decode",          latin_1_decode,                 METH_VARARGS},
1093     {"ascii_encode",            ascii_encode,                   METH_VARARGS},
1094     {"ascii_decode",            ascii_decode,                   METH_VARARGS},
1095     {"charmap_encode",          charmap_encode,                 METH_VARARGS},
1096     {"charmap_decode",          charmap_decode,                 METH_VARARGS},
1097     {"charmap_build",           charmap_build,                  METH_VARARGS},
1098     {"readbuffer_encode",       readbuffer_encode,              METH_VARARGS},
1099     {"charbuffer_encode",       charbuffer_encode,              METH_VARARGS},
1100 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1101     {"mbcs_encode",             mbcs_encode,                    METH_VARARGS},
1102     {"mbcs_decode",             mbcs_decode,                    METH_VARARGS},
1103 #endif
1104 #endif /* Py_USING_UNICODE */
1105     {"register_error",          register_error,                 METH_VARARGS,
1106         register_error__doc__},
1107     {"lookup_error",            lookup_error,                   METH_VARARGS,
1108         lookup_error__doc__},
1109     {NULL, NULL}                /* sentinel */
1110 };
1111 
1112 PyMODINIT_FUNC
1113 init_codecs(void)
1114 {
1115     Py_InitModule("_codecs", _codecs_functions);
1116 }