Python-2.7.3/Modules/_json.c

No issues found

   1 #include "Python.h"
   2 #include "structmember.h"
   3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
   4 #define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
   5 #endif
   6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
   7 typedef int Py_ssize_t;
   8 #define PY_SSIZE_T_MAX INT_MAX
   9 #define PY_SSIZE_T_MIN INT_MIN
  10 #define PyInt_FromSsize_t PyInt_FromLong
  11 #define PyInt_AsSsize_t PyInt_AsLong
  12 #endif
  13 #ifndef Py_IS_FINITE
  14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
  15 #endif
  16 
  17 #ifdef __GNUC__
  18 #define UNUSED __attribute__((__unused__))
  19 #else
  20 #define UNUSED
  21 #endif
  22 
  23 #define DEFAULT_ENCODING "utf-8"
  24 
  25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
  26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
  27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
  28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
  29 
  30 static PyTypeObject PyScannerType;
  31 static PyTypeObject PyEncoderType;
  32 
  33 typedef struct _PyScannerObject {
  34     PyObject_HEAD
  35     PyObject *encoding;
  36     PyObject *strict;
  37     PyObject *object_hook;
  38     PyObject *pairs_hook;
  39     PyObject *parse_float;
  40     PyObject *parse_int;
  41     PyObject *parse_constant;
  42 } PyScannerObject;
  43 
  44 static PyMemberDef scanner_members[] = {
  45     {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
  46     {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
  47     {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
  48     {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
  49     {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
  50     {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
  51     {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
  52     {NULL}
  53 };
  54 
  55 typedef struct _PyEncoderObject {
  56     PyObject_HEAD
  57     PyObject *markers;
  58     PyObject *defaultfn;
  59     PyObject *encoder;
  60     PyObject *indent;
  61     PyObject *key_separator;
  62     PyObject *item_separator;
  63     PyObject *sort_keys;
  64     PyObject *skipkeys;
  65     int fast_encode;
  66     int allow_nan;
  67 } PyEncoderObject;
  68 
  69 static PyMemberDef encoder_members[] = {
  70     {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
  71     {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
  72     {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
  73     {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
  74     {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
  75     {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
  76     {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
  77     {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
  78     {NULL}
  79 };
  80 
  81 static Py_ssize_t
  82 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
  83 static PyObject *
  84 ascii_escape_unicode(PyObject *pystr);
  85 static PyObject *
  86 ascii_escape_str(PyObject *pystr);
  87 static PyObject *
  88 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
  89 void init_json(void);
  90 static PyObject *
  91 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
  92 static PyObject *
  93 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
  94 static PyObject *
  95 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
  96 static PyObject *
  97 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
  98 static int
  99 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
 100 static void
 101 scanner_dealloc(PyObject *self);
 102 static int
 103 scanner_clear(PyObject *self);
 104 static PyObject *
 105 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
 106 static int
 107 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
 108 static void
 109 encoder_dealloc(PyObject *self);
 110 static int
 111 encoder_clear(PyObject *self);
 112 static int
 113 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
 114 static int
 115 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
 116 static int
 117 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
 118 static PyObject *
 119 _encoded_const(PyObject *obj);
 120 static void
 121 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
 122 static PyObject *
 123 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
 124 static int
 125 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
 126 static PyObject *
 127 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
 128 static PyObject *
 129 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
 130 
 131 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
 132 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
 133 
 134 #define MIN_EXPANSION 6
 135 #ifdef Py_UNICODE_WIDE
 136 #define MAX_EXPANSION (2 * MIN_EXPANSION)
 137 #else
 138 #define MAX_EXPANSION MIN_EXPANSION
 139 #endif
 140 
 141 static int
 142 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
 143 {
 144     /* PyObject to Py_ssize_t converter */
 145     *size_ptr = PyInt_AsSsize_t(o);
 146     if (*size_ptr == -1 && PyErr_Occurred())
 147         return 0;
 148     return 1;
 149 }
 150 
 151 static PyObject *
 152 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
 153 {
 154     /* Py_ssize_t to PyObject converter */
 155     return PyInt_FromSsize_t(*size_ptr);
 156 }
 157 
 158 static Py_ssize_t
 159 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
 160 {
 161     /* Escape unicode code point c to ASCII escape sequences
 162     in char *output. output must have at least 12 bytes unused to
 163     accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
 164     output[chars++] = '\\';
 165     switch (c) {
 166         case '\\': output[chars++] = (char)c; break;
 167         case '"': output[chars++] = (char)c; break;
 168         case '\b': output[chars++] = 'b'; break;
 169         case '\f': output[chars++] = 'f'; break;
 170         case '\n': output[chars++] = 'n'; break;
 171         case '\r': output[chars++] = 'r'; break;
 172         case '\t': output[chars++] = 't'; break;
 173         default:
 174 #ifdef Py_UNICODE_WIDE
 175             if (c >= 0x10000) {
 176                 /* UTF-16 surrogate pair */
 177                 Py_UNICODE v = c - 0x10000;
 178                 c = 0xd800 | ((v >> 10) & 0x3ff);
 179                 output[chars++] = 'u';
 180                 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
 181                 output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
 182                 output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
 183                 output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
 184                 c = 0xdc00 | (v & 0x3ff);
 185                 output[chars++] = '\\';
 186             }
 187 #endif
 188             output[chars++] = 'u';
 189             output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
 190             output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
 191             output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
 192             output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
 193     }
 194     return chars;
 195 }
 196 
 197 static PyObject *
 198 ascii_escape_unicode(PyObject *pystr)
 199 {
 200     /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
 201     Py_ssize_t i;
 202     Py_ssize_t input_chars;
 203     Py_ssize_t output_size;
 204     Py_ssize_t max_output_size;
 205     Py_ssize_t chars;
 206     PyObject *rval;
 207     char *output;
 208     Py_UNICODE *input_unicode;
 209 
 210     input_chars = PyUnicode_GET_SIZE(pystr);
 211     input_unicode = PyUnicode_AS_UNICODE(pystr);
 212 
 213     /* One char input can be up to 6 chars output, estimate 4 of these */
 214     output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
 215     max_output_size = 2 + (input_chars * MAX_EXPANSION);
 216     rval = PyString_FromStringAndSize(NULL, output_size);
 217     if (rval == NULL) {
 218         return NULL;
 219     }
 220     output = PyString_AS_STRING(rval);
 221     chars = 0;
 222     output[chars++] = '"';
 223     for (i = 0; i < input_chars; i++) {
 224         Py_UNICODE c = input_unicode[i];
 225         if (S_CHAR(c)) {
 226             output[chars++] = (char)c;
 227         }
 228         else {
 229             chars = ascii_escape_char(c, output, chars);
 230         }
 231         if (output_size - chars < (1 + MAX_EXPANSION)) {
 232             /* There's more than four, so let's resize by a lot */
 233             Py_ssize_t new_output_size = output_size * 2;
 234             /* This is an upper bound */
 235             if (new_output_size > max_output_size) {
 236                 new_output_size = max_output_size;
 237             }
 238             /* Make sure that the output size changed before resizing */
 239             if (new_output_size != output_size) {
 240                 output_size = new_output_size;
 241                 if (_PyString_Resize(&rval, output_size) == -1) {
 242                     return NULL;
 243                 }
 244                 output = PyString_AS_STRING(rval);
 245             }
 246         }
 247     }
 248     output[chars++] = '"';
 249     if (_PyString_Resize(&rval, chars) == -1) {
 250         return NULL;
 251     }
 252     return rval;
 253 }
 254 
 255 static PyObject *
 256 ascii_escape_str(PyObject *pystr)
 257 {
 258     /* Take a PyString pystr and return a new ASCII-only escaped PyString */
 259     Py_ssize_t i;
 260     Py_ssize_t input_chars;
 261     Py_ssize_t output_size;
 262     Py_ssize_t chars;
 263     PyObject *rval;
 264     char *output;
 265     char *input_str;
 266 
 267     input_chars = PyString_GET_SIZE(pystr);
 268     input_str = PyString_AS_STRING(pystr);
 269 
 270     /* Fast path for a string that's already ASCII */
 271     for (i = 0; i < input_chars; i++) {
 272         Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
 273         if (!S_CHAR(c)) {
 274             /* If we have to escape something, scan the string for unicode */
 275             Py_ssize_t j;
 276             for (j = i; j < input_chars; j++) {
 277                 c = (Py_UNICODE)(unsigned char)input_str[j];
 278                 if (c > 0x7f) {
 279                     /* We hit a non-ASCII character, bail to unicode mode */
 280                     PyObject *uni;
 281                     uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
 282                     if (uni == NULL) {
 283                         return NULL;
 284                     }
 285                     rval = ascii_escape_unicode(uni);
 286                     Py_DECREF(uni);
 287                     return rval;
 288                 }
 289             }
 290             break;
 291         }
 292     }
 293 
 294     if (i == input_chars) {
 295         /* Input is already ASCII */
 296         output_size = 2 + input_chars;
 297     }
 298     else {
 299         /* One char input can be up to 6 chars output, estimate 4 of these */
 300         output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
 301     }
 302     rval = PyString_FromStringAndSize(NULL, output_size);
 303     if (rval == NULL) {
 304         return NULL;
 305     }
 306     output = PyString_AS_STRING(rval);
 307     output[0] = '"';
 308 
 309     /* We know that everything up to i is ASCII already */
 310     chars = i + 1;
 311     memcpy(&output[1], input_str, i);
 312 
 313     for (; i < input_chars; i++) {
 314         Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
 315         if (S_CHAR(c)) {
 316             output[chars++] = (char)c;
 317         }
 318         else {
 319             chars = ascii_escape_char(c, output, chars);
 320         }
 321         /* An ASCII char can't possibly expand to a surrogate! */
 322         if (output_size - chars < (1 + MIN_EXPANSION)) {
 323             /* There's more than four, so let's resize by a lot */
 324             output_size *= 2;
 325             if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
 326                 output_size = 2 + (input_chars * MIN_EXPANSION);
 327             }
 328             if (_PyString_Resize(&rval, output_size) == -1) {
 329                 return NULL;
 330             }
 331             output = PyString_AS_STRING(rval);
 332         }
 333     }
 334     output[chars++] = '"';
 335     if (_PyString_Resize(&rval, chars) == -1) {
 336         return NULL;
 337     }
 338     return rval;
 339 }
 340 
 341 static void
 342 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
 343 {
 344     /* Use the Python function json.decoder.errmsg to raise a nice
 345     looking ValueError exception */
 346     static PyObject *errmsg_fn = NULL;
 347     PyObject *pymsg;
 348     if (errmsg_fn == NULL) {
 349         PyObject *decoder = PyImport_ImportModule("json.decoder");
 350         if (decoder == NULL)
 351             return;
 352         errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
 353         Py_DECREF(decoder);
 354         if (errmsg_fn == NULL)
 355             return;
 356     }
 357     pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
 358     if (pymsg) {
 359         PyErr_SetObject(PyExc_ValueError, pymsg);
 360         Py_DECREF(pymsg);
 361     }
 362 }
 363 
 364 static PyObject *
 365 join_list_unicode(PyObject *lst)
 366 {
 367     /* return u''.join(lst) */
 368     static PyObject *joinfn = NULL;
 369     if (joinfn == NULL) {
 370         PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
 371         if (ustr == NULL)
 372             return NULL;
 373 
 374         joinfn = PyObject_GetAttrString(ustr, "join");
 375         Py_DECREF(ustr);
 376         if (joinfn == NULL)
 377             return NULL;
 378     }
 379     return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
 380 }
 381 
 382 static PyObject *
 383 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
 384     /* return (rval, idx) tuple, stealing reference to rval */
 385     PyObject *tpl;
 386     PyObject *pyidx;
 387     /*
 388     steal a reference to rval, returns (rval, idx)
 389     */
 390     if (rval == NULL) {
 391         return NULL;
 392     }
 393     pyidx = PyInt_FromSsize_t(idx);
 394     if (pyidx == NULL) {
 395         Py_DECREF(rval);
 396         return NULL;
 397     }
 398     tpl = PyTuple_New(2);
 399     if (tpl == NULL) {
 400         Py_DECREF(pyidx);
 401         Py_DECREF(rval);
 402         return NULL;
 403     }
 404     PyTuple_SET_ITEM(tpl, 0, rval);
 405     PyTuple_SET_ITEM(tpl, 1, pyidx);
 406     return tpl;
 407 }
 408 
 409 static PyObject *
 410 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
 411 {
 412     /* Read the JSON string from PyString pystr.
 413     end is the index of the first character after the quote.
 414     encoding is the encoding of pystr (must be an ASCII superset)
 415     if strict is zero then literal control characters are allowed
 416     *next_end_ptr is a return-by-reference index of the character
 417         after the end quote
 418 
 419     Return value is a new PyString (if ASCII-only) or PyUnicode
 420     */
 421     PyObject *rval;
 422     Py_ssize_t len = PyString_GET_SIZE(pystr);
 423     Py_ssize_t begin = end - 1;
 424     Py_ssize_t next;
 425     char *buf = PyString_AS_STRING(pystr);
 426     PyObject *chunks = PyList_New(0);
 427     if (chunks == NULL) {
 428         goto bail;
 429     }
 430     if (end < 0 || len <= end) {
 431         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
 432         goto bail;
 433     }
 434     while (1) {
 435         /* Find the end of the string or the next escape */
 436         Py_UNICODE c = 0;
 437         PyObject *chunk = NULL;
 438         for (next = end; next < len; next++) {
 439             c = (unsigned char)buf[next];
 440             if (c == '"' || c == '\\') {
 441                 break;
 442             }
 443             else if (strict && c <= 0x1f) {
 444                 raise_errmsg("Invalid control character at", pystr, next);
 445                 goto bail;
 446             }
 447         }
 448         if (!(c == '"' || c == '\\')) {
 449             raise_errmsg("Unterminated string starting at", pystr, begin);
 450             goto bail;
 451         }
 452         /* Pick up this chunk if it's not zero length */
 453         if (next != end) {
 454             PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
 455             if (strchunk == NULL) {
 456                 goto bail;
 457             }
 458             chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
 459             Py_DECREF(strchunk);
 460             if (chunk == NULL) {
 461                 goto bail;
 462             }
 463             if (PyList_Append(chunks, chunk)) {
 464                 Py_DECREF(chunk);
 465                 goto bail;
 466             }
 467             Py_DECREF(chunk);
 468         }
 469         next++;
 470         if (c == '"') {
 471             end = next;
 472             break;
 473         }
 474         if (next == len) {
 475             raise_errmsg("Unterminated string starting at", pystr, begin);
 476             goto bail;
 477         }
 478         c = buf[next];
 479         if (c != 'u') {
 480             /* Non-unicode backslash escapes */
 481             end = next + 1;
 482             switch (c) {
 483                 case '"': break;
 484                 case '\\': break;
 485                 case '/': break;
 486                 case 'b': c = '\b'; break;
 487                 case 'f': c = '\f'; break;
 488                 case 'n': c = '\n'; break;
 489                 case 'r': c = '\r'; break;
 490                 case 't': c = '\t'; break;
 491                 default: c = 0;
 492             }
 493             if (c == 0) {
 494                 raise_errmsg("Invalid \\escape", pystr, end - 2);
 495                 goto bail;
 496             }
 497         }
 498         else {
 499             c = 0;
 500             next++;
 501             end = next + 4;
 502             if (end >= len) {
 503                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
 504                 goto bail;
 505             }
 506             /* Decode 4 hex digits */
 507             for (; next < end; next++) {
 508                 Py_UNICODE digit = buf[next];
 509                 c <<= 4;
 510                 switch (digit) {
 511                     case '0': case '1': case '2': case '3': case '4':
 512                     case '5': case '6': case '7': case '8': case '9':
 513                         c |= (digit - '0'); break;
 514                     case 'a': case 'b': case 'c': case 'd': case 'e':
 515                     case 'f':
 516                         c |= (digit - 'a' + 10); break;
 517                     case 'A': case 'B': case 'C': case 'D': case 'E':
 518                     case 'F':
 519                         c |= (digit - 'A' + 10); break;
 520                     default:
 521                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
 522                         goto bail;
 523                 }
 524             }
 525 #ifdef Py_UNICODE_WIDE
 526             /* Surrogate pair */
 527             if ((c & 0xfc00) == 0xd800) {
 528                 Py_UNICODE c2 = 0;
 529                 if (end + 6 >= len) {
 530                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
 531                     goto bail;
 532                 }
 533                 if (buf[next++] != '\\' || buf[next++] != 'u') {
 534                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
 535                     goto bail;
 536                 }
 537                 end += 6;
 538                 /* Decode 4 hex digits */
 539                 for (; next < end; next++) {
 540                     Py_UNICODE digit = buf[next];
 541                     c2 <<= 4;
 542                     switch (digit) {
 543                         case '0': case '1': case '2': case '3': case '4':
 544                         case '5': case '6': case '7': case '8': case '9':
 545                             c2 |= (digit - '0'); break;
 546                         case 'a': case 'b': case 'c': case 'd': case 'e':
 547                         case 'f':
 548                             c2 |= (digit - 'a' + 10); break;
 549                         case 'A': case 'B': case 'C': case 'D': case 'E':
 550                         case 'F':
 551                             c2 |= (digit - 'A' + 10); break;
 552                         default:
 553                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
 554                             goto bail;
 555                     }
 556                 }
 557                 if ((c2 & 0xfc00) != 0xdc00) {
 558                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
 559                     goto bail;
 560                 }
 561                 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
 562             }
 563             else if ((c & 0xfc00) == 0xdc00) {
 564                 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
 565                 goto bail;
 566             }
 567 #endif
 568         }
 569         chunk = PyUnicode_FromUnicode(&c, 1);
 570         if (chunk == NULL) {
 571             goto bail;
 572         }
 573         if (PyList_Append(chunks, chunk)) {
 574             Py_DECREF(chunk);
 575             goto bail;
 576         }
 577         Py_DECREF(chunk);
 578     }
 579 
 580     rval = join_list_unicode(chunks);
 581     if (rval == NULL) {
 582         goto bail;
 583     }
 584     Py_CLEAR(chunks);
 585     *next_end_ptr = end;
 586     return rval;
 587 bail:
 588     *next_end_ptr = -1;
 589     Py_XDECREF(chunks);
 590     return NULL;
 591 }
 592 
 593 
 594 static PyObject *
 595 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
 596 {
 597     /* Read the JSON string from PyUnicode pystr.
 598     end is the index of the first character after the quote.
 599     if strict is zero then literal control characters are allowed
 600     *next_end_ptr is a return-by-reference index of the character
 601         after the end quote
 602 
 603     Return value is a new PyUnicode
 604     */
 605     PyObject *rval;
 606     Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
 607     Py_ssize_t begin = end - 1;
 608     Py_ssize_t next;
 609     const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
 610     PyObject *chunks = PyList_New(0);
 611     if (chunks == NULL) {
 612         goto bail;
 613     }
 614     if (end < 0 || len <= end) {
 615         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
 616         goto bail;
 617     }
 618     while (1) {
 619         /* Find the end of the string or the next escape */
 620         Py_UNICODE c = 0;
 621         PyObject *chunk = NULL;
 622         for (next = end; next < len; next++) {
 623             c = buf[next];
 624             if (c == '"' || c == '\\') {
 625                 break;
 626             }
 627             else if (strict && c <= 0x1f) {
 628                 raise_errmsg("Invalid control character at", pystr, next);
 629                 goto bail;
 630             }
 631         }
 632         if (!(c == '"' || c == '\\')) {
 633             raise_errmsg("Unterminated string starting at", pystr, begin);
 634             goto bail;
 635         }
 636         /* Pick up this chunk if it's not zero length */
 637         if (next != end) {
 638             chunk = PyUnicode_FromUnicode(&buf[end], next - end);
 639             if (chunk == NULL) {
 640                 goto bail;
 641             }
 642             if (PyList_Append(chunks, chunk)) {
 643                 Py_DECREF(chunk);
 644                 goto bail;
 645             }
 646             Py_DECREF(chunk);
 647         }
 648         next++;
 649         if (c == '"') {
 650             end = next;
 651             break;
 652         }
 653         if (next == len) {
 654             raise_errmsg("Unterminated string starting at", pystr, begin);
 655             goto bail;
 656         }
 657         c = buf[next];
 658         if (c != 'u') {
 659             /* Non-unicode backslash escapes */
 660             end = next + 1;
 661             switch (c) {
 662                 case '"': break;
 663                 case '\\': break;
 664                 case '/': break;
 665                 case 'b': c = '\b'; break;
 666                 case 'f': c = '\f'; break;
 667                 case 'n': c = '\n'; break;
 668                 case 'r': c = '\r'; break;
 669                 case 't': c = '\t'; break;
 670                 default: c = 0;
 671             }
 672             if (c == 0) {
 673                 raise_errmsg("Invalid \\escape", pystr, end - 2);
 674                 goto bail;
 675             }
 676         }
 677         else {
 678             c = 0;
 679             next++;
 680             end = next + 4;
 681             if (end >= len) {
 682                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
 683                 goto bail;
 684             }
 685             /* Decode 4 hex digits */
 686             for (; next < end; next++) {
 687                 Py_UNICODE digit = buf[next];
 688                 c <<= 4;
 689                 switch (digit) {
 690                     case '0': case '1': case '2': case '3': case '4':
 691                     case '5': case '6': case '7': case '8': case '9':
 692                         c |= (digit - '0'); break;
 693                     case 'a': case 'b': case 'c': case 'd': case 'e':
 694                     case 'f':
 695                         c |= (digit - 'a' + 10); break;
 696                     case 'A': case 'B': case 'C': case 'D': case 'E':
 697                     case 'F':
 698                         c |= (digit - 'A' + 10); break;
 699                     default:
 700                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
 701                         goto bail;
 702                 }
 703             }
 704 #ifdef Py_UNICODE_WIDE
 705             /* Surrogate pair */
 706             if ((c & 0xfc00) == 0xd800) {
 707                 Py_UNICODE c2 = 0;
 708                 if (end + 6 >= len) {
 709                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
 710                     goto bail;
 711                 }
 712                 if (buf[next++] != '\\' || buf[next++] != 'u') {
 713                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
 714                     goto bail;
 715                 }
 716                 end += 6;
 717                 /* Decode 4 hex digits */
 718                 for (; next < end; next++) {
 719                     Py_UNICODE digit = buf[next];
 720                     c2 <<= 4;
 721                     switch (digit) {
 722                         case '0': case '1': case '2': case '3': case '4':
 723                         case '5': case '6': case '7': case '8': case '9':
 724                             c2 |= (digit - '0'); break;
 725                         case 'a': case 'b': case 'c': case 'd': case 'e':
 726                         case 'f':
 727                             c2 |= (digit - 'a' + 10); break;
 728                         case 'A': case 'B': case 'C': case 'D': case 'E':
 729                         case 'F':
 730                             c2 |= (digit - 'A' + 10); break;
 731                         default:
 732                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
 733                             goto bail;
 734                     }
 735                 }
 736                 if ((c2 & 0xfc00) != 0xdc00) {
 737                     raise_errmsg("Unpaired high surrogate", pystr, end - 5);
 738                     goto bail;
 739                 }
 740                 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
 741             }
 742             else if ((c & 0xfc00) == 0xdc00) {
 743                 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
 744                 goto bail;
 745             }
 746 #endif
 747         }
 748         chunk = PyUnicode_FromUnicode(&c, 1);
 749         if (chunk == NULL) {
 750             goto bail;
 751         }
 752         if (PyList_Append(chunks, chunk)) {
 753             Py_DECREF(chunk);
 754             goto bail;
 755         }
 756         Py_DECREF(chunk);
 757     }
 758 
 759     rval = join_list_unicode(chunks);
 760     if (rval == NULL) {
 761         goto bail;
 762     }
 763     Py_DECREF(chunks);
 764     *next_end_ptr = end;
 765     return rval;
 766 bail:
 767     *next_end_ptr = -1;
 768     Py_XDECREF(chunks);
 769     return NULL;
 770 }
 771 
 772 PyDoc_STRVAR(pydoc_scanstring,
 773     "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
 774     "\n"
 775     "Scan the string s for a JSON string. End is the index of the\n"
 776     "character in s after the quote that started the JSON string.\n"
 777     "Unescapes all valid JSON string escape sequences and raises ValueError\n"
 778     "on attempt to decode an invalid string. If strict is False then literal\n"
 779     "control characters are allowed in the string.\n"
 780     "\n"
 781     "Returns a tuple of the decoded string and the index of the character in s\n"
 782     "after the end quote."
 783 );
 784 
 785 static PyObject *
 786 py_scanstring(PyObject* self UNUSED, PyObject *args)
 787 {
 788     PyObject *pystr;
 789     PyObject *rval;
 790     Py_ssize_t end;
 791     Py_ssize_t next_end = -1;
 792     char *encoding = NULL;
 793     int strict = 1;
 794     if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
 795         return NULL;
 796     }
 797     if (encoding == NULL) {
 798         encoding = DEFAULT_ENCODING;
 799     }
 800     if (PyString_Check(pystr)) {
 801         rval = scanstring_str(pystr, end, encoding, strict, &next_end);
 802     }
 803     else if (PyUnicode_Check(pystr)) {
 804         rval = scanstring_unicode(pystr, end, strict, &next_end);
 805     }
 806     else {
 807         PyErr_Format(PyExc_TypeError,
 808                      "first argument must be a string, not %.80s",
 809                      Py_TYPE(pystr)->tp_name);
 810         return NULL;
 811     }
 812     return _build_rval_index_tuple(rval, next_end);
 813 }
 814 
 815 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
 816     "encode_basestring_ascii(basestring) -> str\n"
 817     "\n"
 818     "Return an ASCII-only JSON representation of a Python string"
 819 );
 820 
 821 static PyObject *
 822 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
 823 {
 824     /* Return an ASCII-only JSON representation of a Python string */
 825     /* METH_O */
 826     if (PyString_Check(pystr)) {
 827         return ascii_escape_str(pystr);
 828     }
 829     else if (PyUnicode_Check(pystr)) {
 830         return ascii_escape_unicode(pystr);
 831     }
 832     else {
 833         PyErr_Format(PyExc_TypeError,
 834                      "first argument must be a string, not %.80s",
 835                      Py_TYPE(pystr)->tp_name);
 836         return NULL;
 837     }
 838 }
 839 
 840 static void
 841 scanner_dealloc(PyObject *self)
 842 {
 843     /* Deallocate scanner object */
 844     scanner_clear(self);
 845     Py_TYPE(self)->tp_free(self);
 846 }
 847 
 848 static int
 849 scanner_traverse(PyObject *self, visitproc visit, void *arg)
 850 {
 851     PyScannerObject *s;
 852     assert(PyScanner_Check(self));
 853     s = (PyScannerObject *)self;
 854     Py_VISIT(s->encoding);
 855     Py_VISIT(s->strict);
 856     Py_VISIT(s->object_hook);
 857     Py_VISIT(s->pairs_hook);
 858     Py_VISIT(s->parse_float);
 859     Py_VISIT(s->parse_int);
 860     Py_VISIT(s->parse_constant);
 861     return 0;
 862 }
 863 
 864 static int
 865 scanner_clear(PyObject *self)
 866 {
 867     PyScannerObject *s;
 868     assert(PyScanner_Check(self));
 869     s = (PyScannerObject *)self;
 870     Py_CLEAR(s->encoding);
 871     Py_CLEAR(s->strict);
 872     Py_CLEAR(s->object_hook);
 873     Py_CLEAR(s->pairs_hook);
 874     Py_CLEAR(s->parse_float);
 875     Py_CLEAR(s->parse_int);
 876     Py_CLEAR(s->parse_constant);
 877     return 0;
 878 }
 879 
 880 static PyObject *
 881 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
 882     /* Read a JSON object from PyString pystr.
 883     idx is the index of the first character after the opening curly brace.
 884     *next_idx_ptr is a return-by-reference index to the first character after
 885         the closing curly brace.
 886 
 887     Returns a new PyObject (usually a dict, but object_hook can change that)
 888     */
 889     char *str = PyString_AS_STRING(pystr);
 890     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
 891     PyObject *rval;
 892     PyObject *pairs;
 893     PyObject *item;
 894     PyObject *key = NULL;
 895     PyObject *val = NULL;
 896     char *encoding = PyString_AS_STRING(s->encoding);
 897     int strict = PyObject_IsTrue(s->strict);
 898     Py_ssize_t next_idx;
 899 
 900     pairs = PyList_New(0);
 901     if (pairs == NULL)
 902         return NULL;
 903 
 904     /* skip whitespace after { */
 905     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
 906 
 907     /* only loop if the object is non-empty */
 908     if (idx <= end_idx && str[idx] != '}') {
 909         while (idx <= end_idx) {
 910             /* read key */
 911             if (str[idx] != '"') {
 912                 raise_errmsg("Expecting property name", pystr, idx);
 913                 goto bail;
 914             }
 915             key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
 916             if (key == NULL)
 917                 goto bail;
 918             idx = next_idx;
 919 
 920             /* skip whitespace between key and : delimiter, read :, skip whitespace */
 921             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
 922             if (idx > end_idx || str[idx] != ':') {
 923                 raise_errmsg("Expecting : delimiter", pystr, idx);
 924                 goto bail;
 925             }
 926             idx++;
 927             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
 928 
 929             /* read any JSON data type */
 930             val = scan_once_str(s, pystr, idx, &next_idx);
 931             if (val == NULL)
 932                 goto bail;
 933 
 934             item = PyTuple_Pack(2, key, val);
 935             if (item == NULL)
 936                 goto bail;
 937             Py_CLEAR(key);
 938             Py_CLEAR(val);
 939             if (PyList_Append(pairs, item) == -1) {
 940                 Py_DECREF(item);
 941                 goto bail;
 942             }
 943             Py_DECREF(item);
 944             idx = next_idx;
 945 
 946             /* skip whitespace before } or , */
 947             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
 948 
 949             /* bail if the object is closed or we didn't get the , delimiter */
 950             if (idx > end_idx) break;
 951             if (str[idx] == '}') {
 952                 break;
 953             }
 954             else if (str[idx] != ',') {
 955                 raise_errmsg("Expecting , delimiter", pystr, idx);
 956                 goto bail;
 957             }
 958             idx++;
 959 
 960             /* skip whitespace after , delimiter */
 961             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
 962         }
 963     }
 964     /* verify that idx < end_idx, str[idx] should be '}' */
 965     if (idx > end_idx || str[idx] != '}') {
 966         raise_errmsg("Expecting object", pystr, end_idx);
 967         goto bail;
 968     }
 969 
 970     /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
 971     if (s->pairs_hook != Py_None) {
 972         val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
 973         if (val == NULL)
 974             goto bail;
 975         Py_DECREF(pairs);
 976         *next_idx_ptr = idx + 1;
 977         return val;
 978     }
 979 
 980     rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type), 
 981                                          pairs, NULL);
 982     if (rval == NULL)
 983         goto bail;
 984     Py_CLEAR(pairs);
 985 
 986     /* if object_hook is not None: rval = object_hook(rval) */
 987     if (s->object_hook != Py_None) {
 988         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
 989         if (val == NULL)
 990             goto bail;
 991         Py_DECREF(rval);
 992         rval = val;
 993         val = NULL;
 994     }
 995     *next_idx_ptr = idx + 1;
 996     return rval;
 997 bail:
 998     Py_XDECREF(key);
 999     Py_XDECREF(val);
1000     Py_XDECREF(pairs);
1001     return NULL;
1002 }
1003 
1004 static PyObject *
1005 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1006     /* Read a JSON object from PyUnicode pystr.
1007     idx is the index of the first character after the opening curly brace.
1008     *next_idx_ptr is a return-by-reference index to the first character after
1009         the closing curly brace.
1010 
1011     Returns a new PyObject (usually a dict, but object_hook can change that)
1012     */
1013     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1014     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1015     PyObject *rval;
1016     PyObject *pairs;
1017     PyObject *item;
1018     PyObject *key = NULL;
1019     PyObject *val = NULL;
1020     int strict = PyObject_IsTrue(s->strict);
1021     Py_ssize_t next_idx;
1022 
1023     pairs = PyList_New(0);
1024     if (pairs == NULL)
1025         return NULL;
1026 
1027     /* skip whitespace after { */
1028     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1029 
1030     /* only loop if the object is non-empty */
1031     if (idx <= end_idx && str[idx] != '}') {
1032         while (idx <= end_idx) {
1033             /* read key */
1034             if (str[idx] != '"') {
1035                 raise_errmsg("Expecting property name", pystr, idx);
1036                 goto bail;
1037             }
1038             key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1039             if (key == NULL)
1040                 goto bail;
1041             idx = next_idx;
1042 
1043             /* skip whitespace between key and : delimiter, read :, skip whitespace */
1044             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1045             if (idx > end_idx || str[idx] != ':') {
1046                 raise_errmsg("Expecting : delimiter", pystr, idx);
1047                 goto bail;
1048             }
1049             idx++;
1050             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1051 
1052             /* read any JSON term */
1053             val = scan_once_unicode(s, pystr, idx, &next_idx);
1054             if (val == NULL)
1055                 goto bail;
1056 
1057             item = PyTuple_Pack(2, key, val);
1058             if (item == NULL)
1059                 goto bail;
1060             Py_CLEAR(key);
1061             Py_CLEAR(val);
1062             if (PyList_Append(pairs, item) == -1) {
1063                 Py_DECREF(item);
1064                 goto bail;
1065             }
1066             Py_DECREF(item);
1067             idx = next_idx;
1068 
1069             /* skip whitespace before } or , */
1070             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1071 
1072             /* bail if the object is closed or we didn't get the , delimiter */
1073             if (idx > end_idx) break;
1074             if (str[idx] == '}') {
1075                 break;
1076             }
1077             else if (str[idx] != ',') {
1078                 raise_errmsg("Expecting , delimiter", pystr, idx);
1079                 goto bail;
1080             }
1081             idx++;
1082 
1083             /* skip whitespace after , delimiter */
1084             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1085         }
1086     }
1087 
1088     /* verify that idx < end_idx, str[idx] should be '}' */
1089     if (idx > end_idx || str[idx] != '}') {
1090         raise_errmsg("Expecting object", pystr, end_idx);
1091         goto bail;
1092     }
1093 
1094     /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1095     if (s->pairs_hook != Py_None) {
1096         val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1097         if (val == NULL)
1098             goto bail;
1099         Py_DECREF(pairs);
1100         *next_idx_ptr = idx + 1;
1101         return val;
1102     }
1103 
1104     rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type), 
1105                                          pairs, NULL);
1106     if (rval == NULL)
1107         goto bail;
1108     Py_CLEAR(pairs);
1109 
1110     /* if object_hook is not None: rval = object_hook(rval) */
1111     if (s->object_hook != Py_None) {
1112         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1113         if (val == NULL)
1114             goto bail;
1115         Py_DECREF(rval);
1116         rval = val;
1117         val = NULL;
1118     }
1119     *next_idx_ptr = idx + 1;
1120     return rval;
1121 bail:
1122     Py_XDECREF(key);
1123     Py_XDECREF(val);
1124     Py_XDECREF(pairs);
1125     return NULL;
1126 }
1127 
1128 static PyObject *
1129 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1130     /* Read a JSON array from PyString pystr.
1131     idx is the index of the first character after the opening brace.
1132     *next_idx_ptr is a return-by-reference index to the first character after
1133         the closing brace.
1134 
1135     Returns a new PyList
1136     */
1137     char *str = PyString_AS_STRING(pystr);
1138     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1139     PyObject *val = NULL;
1140     PyObject *rval = PyList_New(0);
1141     Py_ssize_t next_idx;
1142     if (rval == NULL)
1143         return NULL;
1144 
1145     /* skip whitespace after [ */
1146     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1147 
1148     /* only loop if the array is non-empty */
1149     if (idx <= end_idx && str[idx] != ']') {
1150         while (idx <= end_idx) {
1151 
1152             /* read any JSON term and de-tuplefy the (rval, idx) */
1153             val = scan_once_str(s, pystr, idx, &next_idx);
1154             if (val == NULL)
1155                 goto bail;
1156 
1157             if (PyList_Append(rval, val) == -1)
1158                 goto bail;
1159 
1160             Py_CLEAR(val);
1161             idx = next_idx;
1162 
1163             /* skip whitespace between term and , */
1164             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1165 
1166             /* bail if the array is closed or we didn't get the , delimiter */
1167             if (idx > end_idx) break;
1168             if (str[idx] == ']') {
1169                 break;
1170             }
1171             else if (str[idx] != ',') {
1172                 raise_errmsg("Expecting , delimiter", pystr, idx);
1173                 goto bail;
1174             }
1175             idx++;
1176 
1177             /* skip whitespace after , */
1178             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1179         }
1180     }
1181 
1182     /* verify that idx < end_idx, str[idx] should be ']' */
1183     if (idx > end_idx || str[idx] != ']') {
1184         raise_errmsg("Expecting object", pystr, end_idx);
1185         goto bail;
1186     }
1187     *next_idx_ptr = idx + 1;
1188     return rval;
1189 bail:
1190     Py_XDECREF(val);
1191     Py_DECREF(rval);
1192     return NULL;
1193 }
1194 
1195 static PyObject *
1196 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1197     /* Read a JSON array from PyString pystr.
1198     idx is the index of the first character after the opening brace.
1199     *next_idx_ptr is a return-by-reference index to the first character after
1200         the closing brace.
1201 
1202     Returns a new PyList
1203     */
1204     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1205     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1206     PyObject *val = NULL;
1207     PyObject *rval = PyList_New(0);
1208     Py_ssize_t next_idx;
1209     if (rval == NULL)
1210         return NULL;
1211 
1212     /* skip whitespace after [ */
1213     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1214 
1215     /* only loop if the array is non-empty */
1216     if (idx <= end_idx && str[idx] != ']') {
1217         while (idx <= end_idx) {
1218 
1219             /* read any JSON term  */
1220             val = scan_once_unicode(s, pystr, idx, &next_idx);
1221             if (val == NULL)
1222                 goto bail;
1223 
1224             if (PyList_Append(rval, val) == -1)
1225                 goto bail;
1226 
1227             Py_CLEAR(val);
1228             idx = next_idx;
1229 
1230             /* skip whitespace between term and , */
1231             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1232 
1233             /* bail if the array is closed or we didn't get the , delimiter */
1234             if (idx > end_idx) break;
1235             if (str[idx] == ']') {
1236                 break;
1237             }
1238             else if (str[idx] != ',') {
1239                 raise_errmsg("Expecting , delimiter", pystr, idx);
1240                 goto bail;
1241             }
1242             idx++;
1243 
1244             /* skip whitespace after , */
1245             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1246         }
1247     }
1248 
1249     /* verify that idx < end_idx, str[idx] should be ']' */
1250     if (idx > end_idx || str[idx] != ']') {
1251         raise_errmsg("Expecting object", pystr, end_idx);
1252         goto bail;
1253     }
1254     *next_idx_ptr = idx + 1;
1255     return rval;
1256 bail:
1257     Py_XDECREF(val);
1258     Py_DECREF(rval);
1259     return NULL;
1260 }
1261 
1262 static PyObject *
1263 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1264     /* Read a JSON constant from PyString pystr.
1265     constant is the constant string that was found
1266         ("NaN", "Infinity", "-Infinity").
1267     idx is the index of the first character of the constant
1268     *next_idx_ptr is a return-by-reference index to the first character after
1269         the constant.
1270 
1271     Returns the result of parse_constant
1272     */
1273     PyObject *cstr;
1274     PyObject *rval;
1275     /* constant is "NaN", "Infinity", or "-Infinity" */
1276     cstr = PyString_InternFromString(constant);
1277     if (cstr == NULL)
1278         return NULL;
1279 
1280     /* rval = parse_constant(constant) */
1281     rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1282     idx += PyString_GET_SIZE(cstr);
1283     Py_DECREF(cstr);
1284     *next_idx_ptr = idx;
1285     return rval;
1286 }
1287 
1288 static PyObject *
1289 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1290     /* Read a JSON number from PyString pystr.
1291     idx is the index of the first character of the number
1292     *next_idx_ptr is a return-by-reference index to the first character after
1293         the number.
1294 
1295     Returns a new PyObject representation of that number:
1296         PyInt, PyLong, or PyFloat.
1297         May return other types if parse_int or parse_float are set
1298     */
1299     char *str = PyString_AS_STRING(pystr);
1300     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1301     Py_ssize_t idx = start;
1302     int is_float = 0;
1303     PyObject *rval;
1304     PyObject *numstr;
1305 
1306     /* read a sign if it's there, make sure it's not the end of the string */
1307     if (str[idx] == '-') {
1308         idx++;
1309         if (idx > end_idx) {
1310             PyErr_SetNone(PyExc_StopIteration);
1311             return NULL;
1312         }
1313     }
1314 
1315     /* read as many integer digits as we find as long as it doesn't start with 0 */
1316     if (str[idx] >= '1' && str[idx] <= '9') {
1317         idx++;
1318         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1319     }
1320     /* if it starts with 0 we only expect one integer digit */
1321     else if (str[idx] == '0') {
1322         idx++;
1323     }
1324     /* no integer digits, error */
1325     else {
1326         PyErr_SetNone(PyExc_StopIteration);
1327         return NULL;
1328     }
1329 
1330     /* if the next char is '.' followed by a digit then read all float digits */
1331     if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1332         is_float = 1;
1333         idx += 2;
1334         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1335     }
1336 
1337     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1338     if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1339 
1340         /* save the index of the 'e' or 'E' just in case we need to backtrack */
1341         Py_ssize_t e_start = idx;
1342         idx++;
1343 
1344         /* read an exponent sign if present */
1345         if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1346 
1347         /* read all digits */
1348         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1349 
1350         /* if we got a digit, then parse as float. if not, backtrack */
1351         if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1352             is_float = 1;
1353         }
1354         else {
1355             idx = e_start;
1356         }
1357     }
1358 
1359     /* copy the section we determined to be a number */
1360     numstr = PyString_FromStringAndSize(&str[start], idx - start);
1361     if (numstr == NULL)
1362         return NULL;
1363     if (is_float) {
1364         /* parse as a float using a fast path if available, otherwise call user defined method */
1365         if (s->parse_float != (PyObject *)&PyFloat_Type) {
1366             rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1367         }
1368         else {
1369             double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1370                                              NULL, NULL);
1371             if (d == -1.0 && PyErr_Occurred())
1372                 return NULL;
1373             rval = PyFloat_FromDouble(d);
1374         }
1375     }
1376     else {
1377         /* parse as an int using a fast path if available, otherwise call user defined method */
1378         if (s->parse_int != (PyObject *)&PyInt_Type) {
1379             rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1380         }
1381         else {
1382             rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1383         }
1384     }
1385     Py_DECREF(numstr);
1386     *next_idx_ptr = idx;
1387     return rval;
1388 }
1389 
1390 static PyObject *
1391 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1392     /* Read a JSON number from PyUnicode pystr.
1393     idx is the index of the first character of the number
1394     *next_idx_ptr is a return-by-reference index to the first character after
1395         the number.
1396 
1397     Returns a new PyObject representation of that number:
1398         PyInt, PyLong, or PyFloat.
1399         May return other types if parse_int or parse_float are set
1400     */
1401     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1402     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1403     Py_ssize_t idx = start;
1404     int is_float = 0;
1405     PyObject *rval;
1406     PyObject *numstr;
1407 
1408     /* read a sign if it's there, make sure it's not the end of the string */
1409     if (str[idx] == '-') {
1410         idx++;
1411         if (idx > end_idx) {
1412             PyErr_SetNone(PyExc_StopIteration);
1413             return NULL;
1414         }
1415     }
1416 
1417     /* read as many integer digits as we find as long as it doesn't start with 0 */
1418     if (str[idx] >= '1' && str[idx] <= '9') {
1419         idx++;
1420         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1421     }
1422     /* if it starts with 0 we only expect one integer digit */
1423     else if (str[idx] == '0') {
1424         idx++;
1425     }
1426     /* no integer digits, error */
1427     else {
1428         PyErr_SetNone(PyExc_StopIteration);
1429         return NULL;
1430     }
1431 
1432     /* if the next char is '.' followed by a digit then read all float digits */
1433     if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1434         is_float = 1;
1435         idx += 2;
1436         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1437     }
1438 
1439     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1440     if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1441         Py_ssize_t e_start = idx;
1442         idx++;
1443 
1444         /* read an exponent sign if present */
1445         if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1446 
1447         /* read all digits */
1448         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1449 
1450         /* if we got a digit, then parse as float. if not, backtrack */
1451         if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1452             is_float = 1;
1453         }
1454         else {
1455             idx = e_start;
1456         }
1457     }
1458 
1459     /* copy the section we determined to be a number */
1460     numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1461     if (numstr == NULL)
1462         return NULL;
1463     if (is_float) {
1464         /* parse as a float using a fast path if available, otherwise call user defined method */
1465         if (s->parse_float != (PyObject *)&PyFloat_Type) {
1466             rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1467         }
1468         else {
1469             rval = PyFloat_FromString(numstr, NULL);
1470         }
1471     }
1472     else {
1473         /* no fast path for unicode -> int, just call */
1474         rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1475     }
1476     Py_DECREF(numstr);
1477     *next_idx_ptr = idx;
1478     return rval;
1479 }
1480 
1481 static PyObject *
1482 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1483 {
1484     /* Read one JSON term (of any kind) from PyString pystr.
1485     idx is the index of the first character of the term
1486     *next_idx_ptr is a return-by-reference index to the first character after
1487         the number.
1488 
1489     Returns a new PyObject representation of the term.
1490     */
1491     PyObject *res;
1492     char *str = PyString_AS_STRING(pystr);
1493     Py_ssize_t length = PyString_GET_SIZE(pystr);
1494     if (idx >= length) {
1495         PyErr_SetNone(PyExc_StopIteration);
1496         return NULL;
1497     }
1498     switch (str[idx]) {
1499         case '"':
1500             /* string */
1501             return scanstring_str(pystr, idx + 1,
1502                 PyString_AS_STRING(s->encoding),
1503                 PyObject_IsTrue(s->strict),
1504                 next_idx_ptr);
1505         case '{':
1506             /* object */
1507             if (Py_EnterRecursiveCall(" while decoding a JSON object "
1508                                       "from a byte string"))
1509                 return NULL;
1510             res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1511             Py_LeaveRecursiveCall();
1512             return res;
1513         case '[':
1514             /* array */
1515             if (Py_EnterRecursiveCall(" while decoding a JSON array "
1516                                       "from a byte string"))
1517                 return NULL;
1518             res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1519             Py_LeaveRecursiveCall();
1520             return res;
1521         case 'n':
1522             /* null */
1523             if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1524                 Py_INCREF(Py_None);
1525                 *next_idx_ptr = idx + 4;
1526                 return Py_None;
1527             }
1528             break;
1529         case 't':
1530             /* true */
1531             if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1532                 Py_INCREF(Py_True);
1533                 *next_idx_ptr = idx + 4;
1534                 return Py_True;
1535             }
1536             break;
1537         case 'f':
1538             /* false */
1539             if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1540                 Py_INCREF(Py_False);
1541                 *next_idx_ptr = idx + 5;
1542                 return Py_False;
1543             }
1544             break;
1545         case 'N':
1546             /* NaN */
1547             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1548                 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1549             }
1550             break;
1551         case 'I':
1552             /* Infinity */
1553             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1554                 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1555             }
1556             break;
1557         case '-':
1558             /* -Infinity */
1559             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1560                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1561             }
1562             break;
1563     }
1564     /* Didn't find a string, object, array, or named constant. Look for a number. */
1565     return _match_number_str(s, pystr, idx, next_idx_ptr);
1566 }
1567 
1568 static PyObject *
1569 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1570 {
1571     /* Read one JSON term (of any kind) from PyUnicode pystr.
1572     idx is the index of the first character of the term
1573     *next_idx_ptr is a return-by-reference index to the first character after
1574         the number.
1575 
1576     Returns a new PyObject representation of the term.
1577     */
1578     PyObject *res;
1579     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1580     Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1581     if (idx >= length) {
1582         PyErr_SetNone(PyExc_StopIteration);
1583         return NULL;
1584     }
1585     switch (str[idx]) {
1586         case '"':
1587             /* string */
1588             return scanstring_unicode(pystr, idx + 1,
1589                 PyObject_IsTrue(s->strict),
1590                 next_idx_ptr);
1591         case '{':
1592             /* object */
1593             if (Py_EnterRecursiveCall(" while decoding a JSON object "
1594                                       "from a unicode string"))
1595                 return NULL;
1596             res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1597             Py_LeaveRecursiveCall();
1598             return res;
1599         case '[':
1600             /* array */
1601             if (Py_EnterRecursiveCall(" while decoding a JSON array "
1602                                       "from a unicode string"))
1603                 return NULL;
1604             res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1605             Py_LeaveRecursiveCall();
1606             return res;
1607         case 'n':
1608             /* null */
1609             if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1610                 Py_INCREF(Py_None);
1611                 *next_idx_ptr = idx + 4;
1612                 return Py_None;
1613             }
1614             break;
1615         case 't':
1616             /* true */
1617             if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1618                 Py_INCREF(Py_True);
1619                 *next_idx_ptr = idx + 4;
1620                 return Py_True;
1621             }
1622             break;
1623         case 'f':
1624             /* false */
1625             if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1626                 Py_INCREF(Py_False);
1627                 *next_idx_ptr = idx + 5;
1628                 return Py_False;
1629             }
1630             break;
1631         case 'N':
1632             /* NaN */
1633             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1634                 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1635             }
1636             break;
1637         case 'I':
1638             /* Infinity */
1639             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1640                 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1641             }
1642             break;
1643         case '-':
1644             /* -Infinity */
1645             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1646                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1647             }
1648             break;
1649     }
1650     /* Didn't find a string, object, array, or named constant. Look for a number. */
1651     return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1652 }
1653 
1654 static PyObject *
1655 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1656 {
1657     /* Python callable interface to scan_once_{str,unicode} */
1658     PyObject *pystr;
1659     PyObject *rval;
1660     Py_ssize_t idx;
1661     Py_ssize_t next_idx = -1;
1662     static char *kwlist[] = {"string", "idx", NULL};
1663     PyScannerObject *s;
1664     assert(PyScanner_Check(self));
1665     s = (PyScannerObject *)self;
1666     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1667         return NULL;
1668 
1669     if (PyString_Check(pystr)) {
1670         rval = scan_once_str(s, pystr, idx, &next_idx);
1671     }
1672     else if (PyUnicode_Check(pystr)) {
1673         rval = scan_once_unicode(s, pystr, idx, &next_idx);
1674     }
1675     else {
1676         PyErr_Format(PyExc_TypeError,
1677                  "first argument must be a string, not %.80s",
1678                  Py_TYPE(pystr)->tp_name);
1679         return NULL;
1680     }
1681     return _build_rval_index_tuple(rval, next_idx);
1682 }
1683 
1684 static PyObject *
1685 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1686 {
1687     PyScannerObject *s;
1688     s = (PyScannerObject *)type->tp_alloc(type, 0);
1689     if (s != NULL) {
1690         s->encoding = NULL;
1691         s->strict = NULL;
1692         s->object_hook = NULL;
1693         s->pairs_hook = NULL;
1694         s->parse_float = NULL;
1695         s->parse_int = NULL;
1696         s->parse_constant = NULL;
1697     }
1698     return (PyObject *)s;
1699 }
1700 
1701 static int
1702 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1703 {
1704     /* Initialize Scanner object */
1705     PyObject *ctx;
1706     static char *kwlist[] = {"context", NULL};
1707     PyScannerObject *s;
1708 
1709     assert(PyScanner_Check(self));
1710     s = (PyScannerObject *)self;
1711 
1712     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1713         return -1;
1714 
1715     /* PyString_AS_STRING is used on encoding */
1716     s->encoding = PyObject_GetAttrString(ctx, "encoding");
1717     if (s->encoding == NULL)
1718         goto bail;
1719     if (s->encoding == Py_None) {
1720         Py_DECREF(Py_None);
1721         s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1722     }
1723     else if (PyUnicode_Check(s->encoding)) {
1724         PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1725         Py_DECREF(s->encoding);
1726         s->encoding = tmp;
1727     }
1728     if (s->encoding == NULL)
1729         goto bail;
1730     if (!PyString_Check(s->encoding)) {
1731 	PyErr_Format(PyExc_TypeError,
1732 		     "encoding must be a string, not %.80s",
1733 		     Py_TYPE(s->encoding)->tp_name);
1734 	goto bail;
1735     }
1736        
1737 
1738     /* All of these will fail "gracefully" so we don't need to verify them */
1739     s->strict = PyObject_GetAttrString(ctx, "strict");
1740     if (s->strict == NULL)
1741         goto bail;
1742     s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1743     if (s->object_hook == NULL)
1744         goto bail;
1745     s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1746     if (s->pairs_hook == NULL)
1747         goto bail;
1748     s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1749     if (s->parse_float == NULL)
1750         goto bail;
1751     s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1752     if (s->parse_int == NULL)
1753         goto bail;
1754     s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1755     if (s->parse_constant == NULL)
1756         goto bail;
1757 
1758     return 0;
1759 
1760 bail:
1761     Py_CLEAR(s->encoding);
1762     Py_CLEAR(s->strict);
1763     Py_CLEAR(s->object_hook);
1764     Py_CLEAR(s->pairs_hook);
1765     Py_CLEAR(s->parse_float);
1766     Py_CLEAR(s->parse_int);
1767     Py_CLEAR(s->parse_constant);
1768     return -1;
1769 }
1770 
1771 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1772 
1773 static
1774 PyTypeObject PyScannerType = {
1775     PyObject_HEAD_INIT(NULL)
1776     0,                    /* tp_internal */
1777     "_json.Scanner",       /* tp_name */
1778     sizeof(PyScannerObject), /* tp_basicsize */
1779     0,                    /* tp_itemsize */
1780     scanner_dealloc, /* tp_dealloc */
1781     0,                    /* tp_print */
1782     0,                    /* tp_getattr */
1783     0,                    /* tp_setattr */
1784     0,                    /* tp_compare */
1785     0,                    /* tp_repr */
1786     0,                    /* tp_as_number */
1787     0,                    /* tp_as_sequence */
1788     0,                    /* tp_as_mapping */
1789     0,                    /* tp_hash */
1790     scanner_call,         /* tp_call */
1791     0,                    /* tp_str */
1792     0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
1793     0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
1794     0,                    /* tp_as_buffer */
1795     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
1796     scanner_doc,          /* tp_doc */
1797     scanner_traverse,                    /* tp_traverse */
1798     scanner_clear,                    /* tp_clear */
1799     0,                    /* tp_richcompare */
1800     0,                    /* tp_weaklistoffset */
1801     0,                    /* tp_iter */
1802     0,                    /* tp_iternext */
1803     0,                    /* tp_methods */
1804     scanner_members,                    /* tp_members */
1805     0,                    /* tp_getset */
1806     0,                    /* tp_base */
1807     0,                    /* tp_dict */
1808     0,                    /* tp_descr_get */
1809     0,                    /* tp_descr_set */
1810     0,                    /* tp_dictoffset */
1811     scanner_init,                    /* tp_init */
1812     0,/* PyType_GenericAlloc, */        /* tp_alloc */
1813     scanner_new,          /* tp_new */
1814     0,/* PyObject_GC_Del, */              /* tp_free */
1815 };
1816 
1817 static PyObject *
1818 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1819 {
1820     PyEncoderObject *s;
1821     s = (PyEncoderObject *)type->tp_alloc(type, 0);
1822     if (s != NULL) {
1823         s->markers = NULL;
1824         s->defaultfn = NULL;
1825         s->encoder = NULL;
1826         s->indent = NULL;
1827         s->key_separator = NULL;
1828         s->item_separator = NULL;
1829         s->sort_keys = NULL;
1830         s->skipkeys = NULL;
1831     }
1832     return (PyObject *)s;
1833 }
1834 
1835 static int
1836 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1837 {
1838     /* initialize Encoder object */
1839     static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1840 
1841     PyEncoderObject *s;
1842     PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1843     PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
1844 
1845     assert(PyEncoder_Check(self));
1846     s = (PyEncoderObject *)self;
1847 
1848     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1849         &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1850         &sort_keys, &skipkeys, &allow_nan))
1851         return -1;
1852 
1853     s->markers = markers;
1854     s->defaultfn = defaultfn;
1855     s->encoder = encoder;
1856     s->indent = indent;
1857     s->key_separator = key_separator;
1858     s->item_separator = item_separator;
1859     s->sort_keys = sort_keys;
1860     s->skipkeys = skipkeys;
1861     s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1862     s->allow_nan = PyObject_IsTrue(allow_nan);
1863 
1864     Py_INCREF(s->markers);
1865     Py_INCREF(s->defaultfn);
1866     Py_INCREF(s->encoder);
1867     Py_INCREF(s->indent);
1868     Py_INCREF(s->key_separator);
1869     Py_INCREF(s->item_separator);
1870     Py_INCREF(s->sort_keys);
1871     Py_INCREF(s->skipkeys);
1872     return 0;
1873 }
1874 
1875 static PyObject *
1876 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1877 {
1878     /* Python callable interface to encode_listencode_obj */
1879     static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1880     PyObject *obj;
1881     PyObject *rval;
1882     Py_ssize_t indent_level;
1883     PyEncoderObject *s;
1884     assert(PyEncoder_Check(self));
1885     s = (PyEncoderObject *)self;
1886     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1887         &obj, _convertPyInt_AsSsize_t, &indent_level))
1888         return NULL;
1889     rval = PyList_New(0);
1890     if (rval == NULL)
1891         return NULL;
1892     if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1893         Py_DECREF(rval);
1894         return NULL;
1895     }
1896     return rval;
1897 }
1898 
1899 static PyObject *
1900 _encoded_const(PyObject *obj)
1901 {
1902     /* Return the JSON string representation of None, True, False */
1903     if (obj == Py_None) {
1904         static PyObject *s_null = NULL;
1905         if (s_null == NULL) {
1906             s_null = PyString_InternFromString("null");
1907         }
1908         Py_INCREF(s_null);
1909         return s_null;
1910     }
1911     else if (obj == Py_True) {
1912         static PyObject *s_true = NULL;
1913         if (s_true == NULL) {
1914             s_true = PyString_InternFromString("true");
1915         }
1916         Py_INCREF(s_true);
1917         return s_true;
1918     }
1919     else if (obj == Py_False) {
1920         static PyObject *s_false = NULL;
1921         if (s_false == NULL) {
1922             s_false = PyString_InternFromString("false");
1923         }
1924         Py_INCREF(s_false);
1925         return s_false;
1926     }
1927     else {
1928         PyErr_SetString(PyExc_ValueError, "not a const");
1929         return NULL;
1930     }
1931 }
1932 
1933 static PyObject *
1934 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1935 {
1936     /* Return the JSON representation of a PyFloat */
1937     double i = PyFloat_AS_DOUBLE(obj);
1938     if (!Py_IS_FINITE(i)) {
1939         if (!s->allow_nan) {
1940             PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1941             return NULL;
1942         }
1943         if (i > 0) {
1944             return PyString_FromString("Infinity");
1945         }
1946         else if (i < 0) {
1947             return PyString_FromString("-Infinity");
1948         }
1949         else {
1950             return PyString_FromString("NaN");
1951         }
1952     }
1953     /* Use a better float format here? */
1954     return PyObject_Repr(obj);
1955 }
1956 
1957 static PyObject *
1958 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1959 {
1960     /* Return the JSON representation of a string */
1961     if (s->fast_encode)
1962         return py_encode_basestring_ascii(NULL, obj);
1963     else
1964         return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1965 }
1966 
1967 static int
1968 _steal_list_append(PyObject *lst, PyObject *stolen)
1969 {
1970     /* Append stolen and then decrement its reference count */
1971     int rval = PyList_Append(lst, stolen);
1972     Py_DECREF(stolen);
1973     return rval;
1974 }
1975 
1976 static int
1977 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1978 {
1979     /* Encode Python object obj to a JSON term, rval is a PyList */
1980     PyObject *newobj;
1981     int rv;
1982 
1983     if (obj == Py_None || obj == Py_True || obj == Py_False) {
1984         PyObject *cstr = _encoded_const(obj);
1985         if (cstr == NULL)
1986             return -1;
1987         return _steal_list_append(rval, cstr);
1988     }
1989     else if (PyString_Check(obj) || PyUnicode_Check(obj))
1990     {
1991         PyObject *encoded = encoder_encode_string(s, obj);
1992         if (encoded == NULL)
1993             return -1;
1994         return _steal_list_append(rval, encoded);
1995     }
1996     else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1997         PyObject *encoded = PyObject_Str(obj);
1998         if (encoded == NULL)
1999             return -1;
2000         return _steal_list_append(rval, encoded);
2001     }
2002     else if (PyFloat_Check(obj)) {
2003         PyObject *encoded = encoder_encode_float(s, obj);
2004         if (encoded == NULL)
2005             return -1;
2006         return _steal_list_append(rval, encoded);
2007     }
2008     else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2009         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2010             return -1;
2011         rv = encoder_listencode_list(s, rval, obj, indent_level);
2012         Py_LeaveRecursiveCall();
2013         return rv;
2014     }
2015     else if (PyDict_Check(obj)) {
2016         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2017             return -1;
2018         rv = encoder_listencode_dict(s, rval, obj, indent_level);
2019         Py_LeaveRecursiveCall();
2020         return rv;
2021     }
2022     else {
2023         PyObject *ident = NULL;
2024         if (s->markers != Py_None) {
2025             int has_key;
2026             ident = PyLong_FromVoidPtr(obj);
2027             if (ident == NULL)
2028                 return -1;
2029             has_key = PyDict_Contains(s->markers, ident);
2030             if (has_key) {
2031                 if (has_key != -1)
2032                     PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2033                 Py_DECREF(ident);
2034                 return -1;
2035             }
2036             if (PyDict_SetItem(s->markers, ident, obj)) {
2037                 Py_DECREF(ident);
2038                 return -1;
2039             }
2040         }
2041         newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2042         if (newobj == NULL) {
2043             Py_XDECREF(ident);
2044             return -1;
2045         }
2046 
2047         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2048             return -1;
2049         rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2050         Py_LeaveRecursiveCall();
2051 
2052         Py_DECREF(newobj);
2053         if (rv) {
2054             Py_XDECREF(ident);
2055             return -1;
2056         }
2057         if (ident != NULL) {
2058             if (PyDict_DelItem(s->markers, ident)) {
2059                 Py_XDECREF(ident);
2060                 return -1;
2061             }
2062             Py_XDECREF(ident);
2063         }
2064         return rv;
2065     }
2066 }
2067 
2068 static int
2069 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2070 {
2071     /* Encode Python dict dct a JSON term, rval is a PyList */
2072     static PyObject *open_dict = NULL;
2073     static PyObject *close_dict = NULL;
2074     static PyObject *empty_dict = NULL;
2075     PyObject *kstr = NULL;
2076     PyObject *ident = NULL;
2077     PyObject *key = NULL;
2078     PyObject *value = NULL;
2079     PyObject *it = NULL;
2080     int skipkeys;
2081     Py_ssize_t idx;
2082 
2083     if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2084         open_dict = PyString_InternFromString("{");
2085         close_dict = PyString_InternFromString("}");
2086         empty_dict = PyString_InternFromString("{}");
2087         if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2088             return -1;
2089     }
2090     if (Py_SIZE(dct) == 0)
2091         return PyList_Append(rval, empty_dict);
2092 
2093     if (s->markers != Py_None) {
2094         int has_key;
2095         ident = PyLong_FromVoidPtr(dct);
2096         if (ident == NULL)
2097             goto bail;
2098         has_key = PyDict_Contains(s->markers, ident);
2099         if (has_key) {
2100             if (has_key != -1)
2101                 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2102             goto bail;
2103         }
2104         if (PyDict_SetItem(s->markers, ident, dct)) {
2105             goto bail;
2106         }
2107     }
2108 
2109     if (PyList_Append(rval, open_dict))
2110         goto bail;
2111 
2112     if (s->indent != Py_None) {
2113         /* TODO: DOES NOT RUN */
2114         indent_level += 1;
2115         /*
2116             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2117             separator = _item_separator + newline_indent
2118             buf += newline_indent
2119         */
2120     }
2121 
2122     /* TODO: C speedup not implemented for sort_keys */
2123 
2124     it = PyObject_GetIter(dct);
2125     if (it == NULL)
2126         goto bail;
2127     skipkeys = PyObject_IsTrue(s->skipkeys);
2128     idx = 0;
2129     while ((key = PyIter_Next(it)) != NULL) {
2130         PyObject *encoded;
2131 
2132         if (PyString_Check(key) || PyUnicode_Check(key)) {
2133             Py_INCREF(key);
2134             kstr = key;
2135         }
2136         else if (PyFloat_Check(key)) {
2137             kstr = encoder_encode_float(s, key);
2138             if (kstr == NULL)
2139                 goto bail;
2140         }
2141         else if (PyInt_Check(key) || PyLong_Check(key)) {
2142             kstr = PyObject_Str(key);
2143             if (kstr == NULL)
2144                 goto bail;
2145         }
2146         else if (key == Py_True || key == Py_False || key == Py_None) {
2147             kstr = _encoded_const(key);
2148             if (kstr == NULL)
2149                 goto bail;
2150         }
2151         else if (skipkeys) {
2152             Py_DECREF(key);
2153             continue;
2154         }
2155         else {
2156             /* TODO: include repr of key */
2157             PyErr_SetString(PyExc_TypeError, "keys must be a string");
2158             goto bail;
2159         }
2160 
2161         if (idx) {
2162             if (PyList_Append(rval, s->item_separator))
2163                 goto bail;
2164         }
2165 
2166         value = PyObject_GetItem(dct, key);
2167         if (value == NULL)
2168             goto bail;
2169 
2170         encoded = encoder_encode_string(s, kstr);
2171         Py_CLEAR(kstr);
2172         if (encoded == NULL)
2173             goto bail;
2174         if (PyList_Append(rval, encoded)) {
2175             Py_DECREF(encoded);
2176             goto bail;
2177         }
2178         Py_DECREF(encoded);
2179         if (PyList_Append(rval, s->key_separator))
2180             goto bail;
2181         if (encoder_listencode_obj(s, rval, value, indent_level))
2182             goto bail;
2183         idx += 1;
2184         Py_CLEAR(value);
2185         Py_DECREF(key);
2186     }
2187     if (PyErr_Occurred())
2188         goto bail;
2189     Py_CLEAR(it);
2190 
2191     if (ident != NULL) {
2192         if (PyDict_DelItem(s->markers, ident))
2193             goto bail;
2194         Py_CLEAR(ident);
2195     }
2196     if (s->indent != Py_None) {
2197         /* TODO: DOES NOT RUN */
2198         /*
2199             indent_level -= 1;
2200 
2201             yield '\n' + (' ' * (_indent * _current_indent_level))
2202         */
2203     }
2204     if (PyList_Append(rval, close_dict))
2205         goto bail;
2206     return 0;
2207 
2208 bail:
2209     Py_XDECREF(it);
2210     Py_XDECREF(key);
2211     Py_XDECREF(value);
2212     Py_XDECREF(kstr);
2213     Py_XDECREF(ident);
2214     return -1;
2215 }
2216 
2217 
2218 static int
2219 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2220 {
2221     /* Encode Python list seq to a JSON term, rval is a PyList */
2222     static PyObject *open_array = NULL;
2223     static PyObject *close_array = NULL;
2224     static PyObject *empty_array = NULL;
2225     PyObject *ident = NULL;
2226     PyObject *s_fast = NULL;
2227     Py_ssize_t num_items;
2228     PyObject **seq_items;
2229     Py_ssize_t i;
2230 
2231     if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2232         open_array = PyString_InternFromString("[");
2233         close_array = PyString_InternFromString("]");
2234         empty_array = PyString_InternFromString("[]");
2235         if (open_array == NULL || close_array == NULL || empty_array == NULL)
2236             return -1;
2237     }
2238     ident = NULL;
2239     s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2240     if (s_fast == NULL)
2241         return -1;
2242     num_items = PySequence_Fast_GET_SIZE(s_fast);
2243     if (num_items == 0) {
2244         Py_DECREF(s_fast);
2245         return PyList_Append(rval, empty_array);
2246     }
2247 
2248     if (s->markers != Py_None) {
2249         int has_key;
2250         ident = PyLong_FromVoidPtr(seq);
2251         if (ident == NULL)
2252             goto bail;
2253         has_key = PyDict_Contains(s->markers, ident);
2254         if (has_key) {
2255             if (has_key != -1)
2256                 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2257             goto bail;
2258         }
2259         if (PyDict_SetItem(s->markers, ident, seq)) {
2260             goto bail;
2261         }
2262     }
2263 
2264     seq_items = PySequence_Fast_ITEMS(s_fast);
2265     if (PyList_Append(rval, open_array))
2266         goto bail;
2267     if (s->indent != Py_None) {
2268         /* TODO: DOES NOT RUN */
2269         indent_level += 1;
2270         /*
2271             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2272             separator = _item_separator + newline_indent
2273             buf += newline_indent
2274         */
2275     }
2276     for (i = 0; i < num_items; i++) {
2277         PyObject *obj = seq_items[i];
2278         if (i) {
2279             if (PyList_Append(rval, s->item_separator))
2280                 goto bail;
2281         }
2282         if (encoder_listencode_obj(s, rval, obj, indent_level))
2283             goto bail;
2284     }
2285     if (ident != NULL) {
2286         if (PyDict_DelItem(s->markers, ident))
2287             goto bail;
2288         Py_CLEAR(ident);
2289     }
2290     if (s->indent != Py_None) {
2291         /* TODO: DOES NOT RUN */
2292         /*
2293             indent_level -= 1;
2294 
2295             yield '\n' + (' ' * (_indent * _current_indent_level))
2296         */
2297     }
2298     if (PyList_Append(rval, close_array))
2299         goto bail;
2300     Py_DECREF(s_fast);
2301     return 0;
2302 
2303 bail:
2304     Py_XDECREF(ident);
2305     Py_DECREF(s_fast);
2306     return -1;
2307 }
2308 
2309 static void
2310 encoder_dealloc(PyObject *self)
2311 {
2312     /* Deallocate Encoder */
2313     encoder_clear(self);
2314     Py_TYPE(self)->tp_free(self);
2315 }
2316 
2317 static int
2318 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2319 {
2320     PyEncoderObject *s;
2321     assert(PyEncoder_Check(self));
2322     s = (PyEncoderObject *)self;
2323     Py_VISIT(s->markers);
2324     Py_VISIT(s->defaultfn);
2325     Py_VISIT(s->encoder);
2326     Py_VISIT(s->indent);
2327     Py_VISIT(s->key_separator);
2328     Py_VISIT(s->item_separator);
2329     Py_VISIT(s->sort_keys);
2330     Py_VISIT(s->skipkeys);
2331     return 0;
2332 }
2333 
2334 static int
2335 encoder_clear(PyObject *self)
2336 {
2337     /* Deallocate Encoder */
2338     PyEncoderObject *s;
2339     assert(PyEncoder_Check(self));
2340     s = (PyEncoderObject *)self;
2341     Py_CLEAR(s->markers);
2342     Py_CLEAR(s->defaultfn);
2343     Py_CLEAR(s->encoder);
2344     Py_CLEAR(s->indent);
2345     Py_CLEAR(s->key_separator);
2346     Py_CLEAR(s->item_separator);
2347     Py_CLEAR(s->sort_keys);
2348     Py_CLEAR(s->skipkeys);
2349     return 0;
2350 }
2351 
2352 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2353 
2354 static
2355 PyTypeObject PyEncoderType = {
2356     PyObject_HEAD_INIT(NULL)
2357     0,                    /* tp_internal */
2358     "_json.Encoder",       /* tp_name */
2359     sizeof(PyEncoderObject), /* tp_basicsize */
2360     0,                    /* tp_itemsize */
2361     encoder_dealloc, /* tp_dealloc */
2362     0,                    /* tp_print */
2363     0,                    /* tp_getattr */
2364     0,                    /* tp_setattr */
2365     0,                    /* tp_compare */
2366     0,                    /* tp_repr */
2367     0,                    /* tp_as_number */
2368     0,                    /* tp_as_sequence */
2369     0,                    /* tp_as_mapping */
2370     0,                    /* tp_hash */
2371     encoder_call,         /* tp_call */
2372     0,                    /* tp_str */
2373     0,                    /* tp_getattro */
2374     0,                    /* tp_setattro */
2375     0,                    /* tp_as_buffer */
2376     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
2377     encoder_doc,          /* tp_doc */
2378     encoder_traverse,     /* tp_traverse */
2379     encoder_clear,        /* tp_clear */
2380     0,                    /* tp_richcompare */
2381     0,                    /* tp_weaklistoffset */
2382     0,                    /* tp_iter */
2383     0,                    /* tp_iternext */
2384     0,                    /* tp_methods */
2385     encoder_members,      /* tp_members */
2386     0,                    /* tp_getset */
2387     0,                    /* tp_base */
2388     0,                    /* tp_dict */
2389     0,                    /* tp_descr_get */
2390     0,                    /* tp_descr_set */
2391     0,                    /* tp_dictoffset */
2392     encoder_init,         /* tp_init */
2393     0,                    /* tp_alloc */
2394     encoder_new,          /* tp_new */
2395     0,                    /* tp_free */
2396 };
2397 
2398 static PyMethodDef speedups_methods[] = {
2399     {"encode_basestring_ascii",
2400         (PyCFunction)py_encode_basestring_ascii,
2401         METH_O,
2402         pydoc_encode_basestring_ascii},
2403     {"scanstring",
2404         (PyCFunction)py_scanstring,
2405         METH_VARARGS,
2406         pydoc_scanstring},
2407     {NULL, NULL, 0, NULL}
2408 };
2409 
2410 PyDoc_STRVAR(module_doc,
2411 "json speedups\n");
2412 
2413 void
2414 init_json(void)
2415 {
2416     PyObject *m;
2417     PyScannerType.tp_new = PyType_GenericNew;
2418     if (PyType_Ready(&PyScannerType) < 0)
2419         return;
2420     PyEncoderType.tp_new = PyType_GenericNew;
2421     if (PyType_Ready(&PyEncoderType) < 0)
2422         return;
2423     m = Py_InitModule3("_json", speedups_methods, module_doc);
2424     Py_INCREF((PyObject*)&PyScannerType);
2425     PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2426     Py_INCREF((PyObject*)&PyEncoderType);
2427     PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2428 }