Python-2.7.3/Objects/stringobject.c

Location Tool Test ID Function Issue
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:390:18 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:390:18 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:651:22 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:651:22 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:667:25 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:667:25 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:668:25 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:668:25 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:669:25 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:669:25 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:670:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:670:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:671:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:671:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:672:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:672:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:673:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:673:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:674:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:674:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:675:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:675:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:676:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:676:24 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:685:18 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:685:18 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:710:22 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:710:22 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:719:22 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:719:22 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:741:18 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:741:18 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:2066:18 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:2066:18 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:2271:17 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:2271:17 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4622:28 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4622:28 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4627:28 clang-analyzer Dereference of null pointer
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4712:5 clang-analyzer Access to field 'ob_refcnt' results in a dereference of a null pointer (loaded from variable 'result')
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4712:5 clang-analyzer Access to field 'ob_refcnt' results in a dereference of a null pointer (loaded from variable 'result')
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4728:10 clang-analyzer Access to field 'ob_type' results in a dereference of a null pointer (loaded from variable 's')
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4728:10 clang-analyzer Access to field 'ob_type' results in a dereference of a null pointer (loaded from variable 's')
   1 /* String (str/bytes) object implementation */
   2 
   3 #define PY_SSIZE_T_CLEAN
   4 
   5 #include "Python.h"
   6 #include <ctype.h>
   7 #include <stddef.h>
   8 
   9 #ifdef COUNT_ALLOCS
  10 Py_ssize_t null_strings, one_strings;
  11 #endif
  12 
  13 static PyStringObject *characters[UCHAR_MAX + 1];
  14 static PyStringObject *nullstring;
  15 
  16 /* This dictionary holds all interned strings.  Note that references to
  17    strings in this dictionary are *not* counted in the string's ob_refcnt.
  18    When the interned string reaches a refcnt of 0 the string deallocation
  19    function will delete the reference from this dictionary.
  20 
  21    Another way to look at this is that to say that the actual reference
  22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
  23 */
  24 static PyObject *interned;
  25 
  26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
  27    for a string of length n should request PyStringObject_SIZE + n bytes.
  28 
  29    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
  30    3 bytes per string allocation on a typical system.
  31 */
  32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
  33 
  34 /*
  35    For PyString_FromString(), the parameter `str' points to a null-terminated
  36    string containing exactly `size' bytes.
  37 
  38    For PyString_FromStringAndSize(), the parameter the parameter `str' is
  39    either NULL or else points to a string containing at least `size' bytes.
  40    For PyString_FromStringAndSize(), the string in the `str' parameter does
  41    not have to be null-terminated.  (Therefore it is safe to construct a
  42    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
  43    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
  44    bytes (setting the last byte to the null terminating character) and you can
  45    fill in the data yourself.  If `str' is non-NULL then the resulting
  46    PyString object must be treated as immutable and you must not fill in nor
  47    alter the data yourself, since the strings may be shared.
  48 
  49    The PyObject member `op->ob_size', which denotes the number of "extra
  50    items" in a variable-size object, will contain the number of bytes
  51    allocated for string data, not counting the null terminating character.
  52    It is therefore equal to the `size' parameter (for
  53    PyString_FromStringAndSize()) or the length of the string in the `str'
  54    parameter (for PyString_FromString()).
  55 */
  56 PyObject *
  57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
  58 {
  59     register PyStringObject *op;
  60     if (size < 0) {
  61         PyErr_SetString(PyExc_SystemError,
  62             "Negative size passed to PyString_FromStringAndSize");
  63         return NULL;
  64     }
  65     if (size == 0 && (op = nullstring) != NULL) {
  66 #ifdef COUNT_ALLOCS
  67         null_strings++;
  68 #endif
  69         Py_INCREF(op);
  70         return (PyObject *)op;
  71     }
  72     if (size == 1 && str != NULL &&
  73         (op = characters[*str & UCHAR_MAX]) != NULL)
  74     {
  75 #ifdef COUNT_ALLOCS
  76         one_strings++;
  77 #endif
  78         Py_INCREF(op);
  79         return (PyObject *)op;
  80     }
  81 
  82     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
  83         PyErr_SetString(PyExc_OverflowError, "string is too large");
  84         return NULL;
  85     }
  86 
  87     /* Inline PyObject_NewVar */
  88     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
  89     if (op == NULL)
  90         return PyErr_NoMemory();
  91     PyObject_INIT_VAR(op, &PyString_Type, size);
  92     op->ob_shash = -1;
  93     op->ob_sstate = SSTATE_NOT_INTERNED;
  94     if (str != NULL)
  95         Py_MEMCPY(op->ob_sval, str, size);
  96     op->ob_sval[size] = '\0';
  97     /* share short strings */
  98     if (size == 0) {
  99         PyObject *t = (PyObject *)op;
 100         PyString_InternInPlace(&t);
 101         op = (PyStringObject *)t;
 102         nullstring = op;
 103         Py_INCREF(op);
 104     } else if (size == 1 && str != NULL) {
 105         PyObject *t = (PyObject *)op;
 106         PyString_InternInPlace(&t);
 107         op = (PyStringObject *)t;
 108         characters[*str & UCHAR_MAX] = op;
 109         Py_INCREF(op);
 110     }
 111     return (PyObject *) op;
 112 }
 113 
 114 PyObject *
 115 PyString_FromString(const char *str)
 116 {
 117     register size_t size;
 118     register PyStringObject *op;
 119 
 120     assert(str != NULL);
 121     size = strlen(str);
 122     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
 123         PyErr_SetString(PyExc_OverflowError,
 124             "string is too long for a Python string");
 125         return NULL;
 126     }
 127     if (size == 0 && (op = nullstring) != NULL) {
 128 #ifdef COUNT_ALLOCS
 129         null_strings++;
 130 #endif
 131         Py_INCREF(op);
 132         return (PyObject *)op;
 133     }
 134     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 135 #ifdef COUNT_ALLOCS
 136         one_strings++;
 137 #endif
 138         Py_INCREF(op);
 139         return (PyObject *)op;
 140     }
 141 
 142     /* Inline PyObject_NewVar */
 143     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
 144     if (op == NULL)
 145         return PyErr_NoMemory();
 146     PyObject_INIT_VAR(op, &PyString_Type, size);
 147     op->ob_shash = -1;
 148     op->ob_sstate = SSTATE_NOT_INTERNED;
 149     Py_MEMCPY(op->ob_sval, str, size+1);
 150     /* share short strings */
 151     if (size == 0) {
 152         PyObject *t = (PyObject *)op;
 153         PyString_InternInPlace(&t);
 154         op = (PyStringObject *)t;
 155         nullstring = op;
 156         Py_INCREF(op);
 157     } else if (size == 1) {
 158         PyObject *t = (PyObject *)op;
 159         PyString_InternInPlace(&t);
 160         op = (PyStringObject *)t;
 161         characters[*str & UCHAR_MAX] = op;
 162         Py_INCREF(op);
 163     }
 164     return (PyObject *) op;
 165 }
 166 
 167 PyObject *
 168 PyString_FromFormatV(const char *format, va_list vargs)
 169 {
 170     va_list count;
 171     Py_ssize_t n = 0;
 172     const char* f;
 173     char *s;
 174     PyObject* string;
 175 
 176 #ifdef VA_LIST_IS_ARRAY
 177     Py_MEMCPY(count, vargs, sizeof(va_list));
 178 #else
 179 #ifdef  __va_copy
 180     __va_copy(count, vargs);
 181 #else
 182     count = vargs;
 183 #endif
 184 #endif
 185     /* step 1: figure out how large a buffer we need */
 186     for (f = format; *f; f++) {
 187         if (*f == '%') {
 188 #ifdef HAVE_LONG_LONG
 189             int longlongflag = 0;
 190 #endif
 191             const char* p = f;
 192             while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 193                 ;
 194 
 195             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 196              * they don't affect the amount of space we reserve.
 197              */
 198             if (*f == 'l') {
 199                 if (f[1] == 'd' || f[1] == 'u') {
 200                     ++f;
 201                 }
 202 #ifdef HAVE_LONG_LONG
 203                 else if (f[1] == 'l' &&
 204                          (f[2] == 'd' || f[2] == 'u')) {
 205                     longlongflag = 1;
 206                     f += 2;
 207                 }
 208 #endif
 209             }
 210             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 211                 ++f;
 212             }
 213 
 214             switch (*f) {
 215             case 'c':
 216                 (void)va_arg(count, int);
 217                 /* fall through... */
 218             case '%':
 219                 n++;
 220                 break;
 221             case 'd': case 'u': case 'i': case 'x':
 222                 (void) va_arg(count, int);
 223 #ifdef HAVE_LONG_LONG
 224                 /* Need at most
 225                    ceil(log10(256)*SIZEOF_LONG_LONG) digits,
 226                    plus 1 for the sign.  53/22 is an upper
 227                    bound for log10(256). */
 228                 if (longlongflag)
 229                     n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
 230                 else
 231 #endif
 232                     /* 20 bytes is enough to hold a 64-bit
 233                        integer.  Decimal takes the most
 234                        space.  This isn't enough for
 235                        octal. */
 236                     n += 20;
 237 
 238                 break;
 239             case 's':
 240                 s = va_arg(count, char*);
 241                 n += strlen(s);
 242                 break;
 243             case 'p':
 244                 (void) va_arg(count, int);
 245                 /* maximum 64-bit pointer representation:
 246                  * 0xffffffffffffffff
 247                  * so 19 characters is enough.
 248                  * XXX I count 18 -- what's the extra for?
 249                  */
 250                 n += 19;
 251                 break;
 252             default:
 253                 /* if we stumble upon an unknown
 254                    formatting code, copy the rest of
 255                    the format string to the output
 256                    string. (we cannot just skip the
 257                    code, since there's no way to know
 258                    what's in the argument list) */
 259                 n += strlen(p);
 260                 goto expand;
 261             }
 262         } else
 263             n++;
 264     }
 265  expand:
 266     /* step 2: fill the buffer */
 267     /* Since we've analyzed how much space we need for the worst case,
 268        use sprintf directly instead of the slower PyOS_snprintf. */
 269     string = PyString_FromStringAndSize(NULL, n);
 270     if (!string)
 271         return NULL;
 272 
 273     s = PyString_AsString(string);
 274 
 275     for (f = format; *f; f++) {
 276         if (*f == '%') {
 277             const char* p = f++;
 278             Py_ssize_t i;
 279             int longflag = 0;
 280 #ifdef HAVE_LONG_LONG
 281             int longlongflag = 0;
 282 #endif
 283             int size_tflag = 0;
 284             /* parse the width.precision part (we're only
 285                interested in the precision value, if any) */
 286             n = 0;
 287             while (isdigit(Py_CHARMASK(*f)))
 288                 n = (n*10) + *f++ - '0';
 289             if (*f == '.') {
 290                 f++;
 291                 n = 0;
 292                 while (isdigit(Py_CHARMASK(*f)))
 293                     n = (n*10) + *f++ - '0';
 294             }
 295             while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 296                 f++;
 297             /* Handle %ld, %lu, %lld and %llu. */
 298             if (*f == 'l') {
 299                 if (f[1] == 'd' || f[1] == 'u') {
 300                     longflag = 1;
 301                     ++f;
 302                 }
 303 #ifdef HAVE_LONG_LONG
 304                 else if (f[1] == 'l' &&
 305                          (f[2] == 'd' || f[2] == 'u')) {
 306                     longlongflag = 1;
 307                     f += 2;
 308                 }
 309 #endif
 310             }
 311             /* handle the size_t flag. */
 312             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 313                 size_tflag = 1;
 314                 ++f;
 315             }
 316 
 317             switch (*f) {
 318             case 'c':
 319                 *s++ = va_arg(vargs, int);
 320                 break;
 321             case 'd':
 322                 if (longflag)
 323                     sprintf(s, "%ld", va_arg(vargs, long));
 324 #ifdef HAVE_LONG_LONG
 325                 else if (longlongflag)
 326                     sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
 327                         va_arg(vargs, PY_LONG_LONG));
 328 #endif
 329                 else if (size_tflag)
 330                     sprintf(s, "%" PY_FORMAT_SIZE_T "d",
 331                         va_arg(vargs, Py_ssize_t));
 332                 else
 333                     sprintf(s, "%d", va_arg(vargs, int));
 334                 s += strlen(s);
 335                 break;
 336             case 'u':
 337                 if (longflag)
 338                     sprintf(s, "%lu",
 339                         va_arg(vargs, unsigned long));
 340 #ifdef HAVE_LONG_LONG
 341                 else if (longlongflag)
 342                     sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
 343                         va_arg(vargs, PY_LONG_LONG));
 344 #endif
 345                 else if (size_tflag)
 346                     sprintf(s, "%" PY_FORMAT_SIZE_T "u",
 347                         va_arg(vargs, size_t));
 348                 else
 349                     sprintf(s, "%u",
 350                         va_arg(vargs, unsigned int));
 351                 s += strlen(s);
 352                 break;
 353             case 'i':
 354                 sprintf(s, "%i", va_arg(vargs, int));
 355                 s += strlen(s);
 356                 break;
 357             case 'x':
 358                 sprintf(s, "%x", va_arg(vargs, int));
 359                 s += strlen(s);
 360                 break;
 361             case 's':
 362                 p = va_arg(vargs, char*);
 363                 i = strlen(p);
 364                 if (n > 0 && i > n)
 365                     i = n;
 366                 Py_MEMCPY(s, p, i);
 367                 s += i;
 368                 break;
 369             case 'p':
 370                 sprintf(s, "%p", va_arg(vargs, void*));
 371                 /* %p is ill-defined:  ensure leading 0x. */
 372                 if (s[1] == 'X')
 373                     s[1] = 'x';
 374                 else if (s[1] != 'x') {
 375                     memmove(s+2, s, strlen(s)+1);
 376                     s[0] = '0';
 377                     s[1] = 'x';
 378                 }
 379                 s += strlen(s);
 380                 break;
 381             case '%':
 382                 *s++ = '%';
 383                 break;
 384             default:
 385                 strcpy(s, p);
 386                 s += strlen(s);
 387                 goto end;
 388             }
 389         } else
 390             *s++ = *f;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

391 } 392 393 end: 394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string))) 395 return NULL; 396 return string; 397 } 398 399 PyObject * 400 PyString_FromFormat(const char *format, ...) 401 { 402 PyObject* ret; 403 va_list vargs; 404 405 #ifdef HAVE_STDARG_PROTOTYPES 406 va_start(vargs, format); 407 #else 408 va_start(vargs); 409 #endif 410 ret = PyString_FromFormatV(format, vargs); 411 va_end(vargs); 412 return ret; 413 } 414 415 416 PyObject *PyString_Decode(const char *s, 417 Py_ssize_t size, 418 const char *encoding, 419 const char *errors) 420 { 421 PyObject *v, *str; 422 423 str = PyString_FromStringAndSize(s, size); 424 if (str == NULL) 425 return NULL; 426 v = PyString_AsDecodedString(str, encoding, errors); 427 Py_DECREF(str); 428 return v; 429 } 430 431 PyObject *PyString_AsDecodedObject(PyObject *str, 432 const char *encoding, 433 const char *errors) 434 { 435 PyObject *v; 436 437 if (!PyString_Check(str)) { 438 PyErr_BadArgument(); 439 goto onError; 440 } 441 442 if (encoding == NULL) { 443 #ifdef Py_USING_UNICODE 444 encoding = PyUnicode_GetDefaultEncoding(); 445 #else 446 PyErr_SetString(PyExc_ValueError, "no encoding specified"); 447 goto onError; 448 #endif 449 } 450 451 /* Decode via the codec registry */ 452 v = PyCodec_Decode(str, encoding, errors); 453 if (v == NULL) 454 goto onError; 455 456 return v; 457 458 onError: 459 return NULL; 460 } 461 462 PyObject *PyString_AsDecodedString(PyObject *str, 463 const char *encoding, 464 const char *errors) 465 { 466 PyObject *v; 467 468 v = PyString_AsDecodedObject(str, encoding, errors); 469 if (v == NULL) 470 goto onError; 471 472 #ifdef Py_USING_UNICODE 473 /* Convert Unicode to a string using the default encoding */ 474 if (PyUnicode_Check(v)) { 475 PyObject *temp = v; 476 v = PyUnicode_AsEncodedString(v, NULL, NULL); 477 Py_DECREF(temp); 478 if (v == NULL) 479 goto onError; 480 } 481 #endif 482 if (!PyString_Check(v)) { 483 PyErr_Format(PyExc_TypeError, 484 "decoder did not return a string object (type=%.400s)", 485 Py_TYPE(v)->tp_name); 486 Py_DECREF(v); 487 goto onError; 488 } 489 490 return v; 491 492 onError: 493 return NULL; 494 } 495 496 PyObject *PyString_Encode(const char *s, 497 Py_ssize_t size, 498 const char *encoding, 499 const char *errors) 500 { 501 PyObject *v, *str; 502 503 str = PyString_FromStringAndSize(s, size); 504 if (str == NULL) 505 return NULL; 506 v = PyString_AsEncodedString(str, encoding, errors); 507 Py_DECREF(str); 508 return v; 509 } 510 511 PyObject *PyString_AsEncodedObject(PyObject *str, 512 const char *encoding, 513 const char *errors) 514 { 515 PyObject *v; 516 517 if (!PyString_Check(str)) { 518 PyErr_BadArgument(); 519 goto onError; 520 } 521 522 if (encoding == NULL) { 523 #ifdef Py_USING_UNICODE 524 encoding = PyUnicode_GetDefaultEncoding(); 525 #else 526 PyErr_SetString(PyExc_ValueError, "no encoding specified"); 527 goto onError; 528 #endif 529 } 530 531 /* Encode via the codec registry */ 532 v = PyCodec_Encode(str, encoding, errors); 533 if (v == NULL) 534 goto onError; 535 536 return v; 537 538 onError: 539 return NULL; 540 } 541 542 PyObject *PyString_AsEncodedString(PyObject *str, 543 const char *encoding, 544 const char *errors) 545 { 546 PyObject *v; 547 548 v = PyString_AsEncodedObject(str, encoding, errors); 549 if (v == NULL) 550 goto onError; 551 552 #ifdef Py_USING_UNICODE 553 /* Convert Unicode to a string using the default encoding */ 554 if (PyUnicode_Check(v)) { 555 PyObject *temp = v; 556 v = PyUnicode_AsEncodedString(v, NULL, NULL); 557 Py_DECREF(temp); 558 if (v == NULL) 559 goto onError; 560 } 561 #endif 562 if (!PyString_Check(v)) { 563 PyErr_Format(PyExc_TypeError, 564 "encoder did not return a string object (type=%.400s)", 565 Py_TYPE(v)->tp_name); 566 Py_DECREF(v); 567 goto onError; 568 } 569 570 return v; 571 572 onError: 573 return NULL; 574 } 575 576 static void 577 string_dealloc(PyObject *op) 578 { 579 switch (PyString_CHECK_INTERNED(op)) { 580 case SSTATE_NOT_INTERNED: 581 break; 582 583 case SSTATE_INTERNED_MORTAL: 584 /* revive dead object temporarily for DelItem */ 585 Py_REFCNT(op) = 3; 586 if (PyDict_DelItem(interned, op) != 0) 587 Py_FatalError( 588 "deletion of interned string failed"); 589 break; 590 591 case SSTATE_INTERNED_IMMORTAL: 592 Py_FatalError("Immortal interned string died."); 593 594 default: 595 Py_FatalError("Inconsistent interned string state."); 596 } 597 Py_TYPE(op)->tp_free(op); 598 } 599 600 /* Unescape a backslash-escaped string. If unicode is non-zero, 601 the string is a u-literal. If recode_encoding is non-zero, 602 the string is UTF-8 encoded and should be re-encoded in the 603 specified encoding. */ 604 605 PyObject *PyString_DecodeEscape(const char *s, 606 Py_ssize_t len, 607 const char *errors, 608 Py_ssize_t unicode, 609 const char *recode_encoding) 610 { 611 int c; 612 char *p, *buf; 613 const char *end; 614 PyObject *v; 615 Py_ssize_t newlen = recode_encoding ? 4*len:len; 616 v = PyString_FromStringAndSize((char *)NULL, newlen); 617 if (v == NULL) 618 return NULL; 619 p = buf = PyString_AsString(v); 620 end = s + len; 621 while (s < end) { 622 if (*s != '\\') { 623 non_esc: 624 #ifdef Py_USING_UNICODE 625 if (recode_encoding && (*s & 0x80)) { 626 PyObject *u, *w; 627 char *r; 628 const char* t; 629 Py_ssize_t rn; 630 t = s; 631 /* Decode non-ASCII bytes as UTF-8. */ 632 while (t < end && (*t & 0x80)) t++; 633 u = PyUnicode_DecodeUTF8(s, t - s, errors); 634 if(!u) goto failed; 635 636 /* Recode them in target encoding. */ 637 w = PyUnicode_AsEncodedString( 638 u, recode_encoding, errors); 639 Py_DECREF(u); 640 if (!w) goto failed; 641 642 /* Append bytes to output buffer. */ 643 assert(PyString_Check(w)); 644 r = PyString_AS_STRING(w); 645 rn = PyString_GET_SIZE(w); 646 Py_MEMCPY(p, r, rn); 647 p += rn; 648 Py_DECREF(w); 649 s = t; 650 } else { 651 *p++ = *s++;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

652 } 653 #else 654 *p++ = *s++; 655 #endif 656 continue; 657 } 658 s++; 659 if (s==end) { 660 PyErr_SetString(PyExc_ValueError, 661 "Trailing \\ in string"); 662 goto failed; 663 } 664 switch (*s++) { 665 /* XXX This assumes ASCII! */ 666 case '\n': break; 667 case '\\': *p++ = '\\'; break;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

668 case '\'': *p++ = '\''; break;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

669 case '\"': *p++ = '\"'; break;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

670 case 'b': *p++ = '\b'; break;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

671 case 'f': *p++ = '\014'; break; /* FF */
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

672 case 't': *p++ = '\t'; break;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

673 case 'n': *p++ = '\n'; break;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

674 case 'r': *p++ = '\r'; break;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

675 case 'v': *p++ = '\013'; break; /* VT */
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

677 case '0': case '1': case '2': case '3': 678 case '4': case '5': case '6': case '7': 679 c = s[-1] - '0'; 680 if (s < end && '0' <= *s && *s <= '7') { 681 c = (c<<3) + *s++ - '0'; 682 if (s < end && '0' <= *s && *s <= '7') 683 c = (c<<3) + *s++ - '0'; 684 } 685 *p++ = c;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

686 break; 687 case 'x': 688 if (s+1 < end && 689 isxdigit(Py_CHARMASK(s[0])) && 690 isxdigit(Py_CHARMASK(s[1]))) 691 { 692 unsigned int x = 0; 693 c = Py_CHARMASK(*s); 694 s++; 695 if (isdigit(c)) 696 x = c - '0'; 697 else if (islower(c)) 698 x = 10 + c - 'a'; 699 else 700 x = 10 + c - 'A'; 701 x = x << 4; 702 c = Py_CHARMASK(*s); 703 s++; 704 if (isdigit(c)) 705 x += c - '0'; 706 else if (islower(c)) 707 x += 10 + c - 'a'; 708 else 709 x += 10 + c - 'A'; 710 *p++ = x;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

711 break; 712 } 713 if (!errors || strcmp(errors, "strict") == 0) { 714 PyErr_SetString(PyExc_ValueError, 715 "invalid \\x escape"); 716 goto failed; 717 } 718 if (strcmp(errors, "replace") == 0) { 719 *p++ = '?';
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

720 } else if (strcmp(errors, "ignore") == 0) 721 /* do nothing */; 722 else { 723 PyErr_Format(PyExc_ValueError, 724 "decoding error; " 725 "unknown error handling code: %.400s", 726 errors); 727 goto failed; 728 } 729 #ifndef Py_USING_UNICODE 730 case 'u': 731 case 'U': 732 case 'N': 733 if (unicode) { 734 PyErr_SetString(PyExc_ValueError, 735 "Unicode escapes not legal " 736 "when Unicode disabled"); 737 goto failed; 738 } 739 #endif 740 default: 741 *p++ = '\\';
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

742 s--; 743 goto non_esc; /* an arbitrary number of unescaped 744 UTF-8 bytes may follow. */ 745 } 746 } 747 if (p-buf < newlen && _PyString_Resize(&v, p - buf)) 748 goto failed; 749 return v; 750 failed: 751 Py_DECREF(v); 752 return NULL; 753 } 754 755 /* -------------------------------------------------------------------- */ 756 /* object api */ 757 758 static Py_ssize_t 759 string_getsize(register PyObject *op) 760 { 761 char *s; 762 Py_ssize_t len; 763 if (PyString_AsStringAndSize(op, &s, &len)) 764 return -1; 765 return len; 766 } 767 768 static /*const*/ char * 769 string_getbuffer(register PyObject *op) 770 { 771 char *s; 772 Py_ssize_t len; 773 if (PyString_AsStringAndSize(op, &s, &len)) 774 return NULL; 775 return s; 776 } 777 778 Py_ssize_t 779 PyString_Size(register PyObject *op) 780 { 781 if (!PyString_Check(op)) 782 return string_getsize(op); 783 return Py_SIZE(op); 784 } 785 786 /*const*/ char * 787 PyString_AsString(register PyObject *op) 788 { 789 if (!PyString_Check(op)) 790 return string_getbuffer(op); 791 return ((PyStringObject *)op) -> ob_sval; 792 } 793 794 int 795 PyString_AsStringAndSize(register PyObject *obj, 796 register char **s, 797 register Py_ssize_t *len) 798 { 799 if (s == NULL) { 800 PyErr_BadInternalCall(); 801 return -1; 802 } 803 804 if (!PyString_Check(obj)) { 805 #ifdef Py_USING_UNICODE 806 if (PyUnicode_Check(obj)) { 807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL); 808 if (obj == NULL) 809 return -1; 810 } 811 else 812 #endif 813 { 814 PyErr_Format(PyExc_TypeError, 815 "expected string or Unicode object, " 816 "%.200s found", Py_TYPE(obj)->tp_name); 817 return -1; 818 } 819 } 820 821 *s = PyString_AS_STRING(obj); 822 if (len != NULL) 823 *len = PyString_GET_SIZE(obj); 824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) { 825 PyErr_SetString(PyExc_TypeError, 826 "expected string without null bytes"); 827 return -1; 828 } 829 return 0; 830 } 831 832 /* -------------------------------------------------------------------- */ 833 /* Methods */ 834 835 #include "stringlib/stringdefs.h" 836 #include "stringlib/fastsearch.h" 837 838 #include "stringlib/count.h" 839 #include "stringlib/find.h" 840 #include "stringlib/partition.h" 841 #include "stringlib/split.h" 842 843 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping 844 #include "stringlib/localeutil.h" 845 846 847 848 static int 849 string_print(PyStringObject *op, FILE *fp, int flags) 850 { 851 Py_ssize_t i, str_len; 852 char c; 853 int quote; 854 855 /* XXX Ought to check for interrupts when writing long strings */ 856 if (! PyString_CheckExact(op)) { 857 int ret; 858 /* A str subclass may have its own __str__ method. */ 859 op = (PyStringObject *) PyObject_Str((PyObject *)op); 860 if (op == NULL) 861 return -1; 862 ret = string_print(op, fp, flags); 863 Py_DECREF(op); 864 return ret; 865 } 866 if (flags & Py_PRINT_RAW) { 867 char *data = op->ob_sval; 868 Py_ssize_t size = Py_SIZE(op); 869 Py_BEGIN_ALLOW_THREADS 870 while (size > INT_MAX) { 871 /* Very long strings cannot be written atomically. 872 * But don't write exactly INT_MAX bytes at a time 873 * to avoid memory aligment issues. 874 */ 875 const int chunk_size = INT_MAX & ~0x3FFF; 876 fwrite(data, 1, chunk_size, fp); 877 data += chunk_size; 878 size -= chunk_size; 879 } 880 #ifdef __VMS 881 if (size) fwrite(data, (int)size, 1, fp); 882 #else 883 fwrite(data, 1, (int)size, fp); 884 #endif 885 Py_END_ALLOW_THREADS 886 return 0; 887 } 888 889 /* figure out which quote to use; single is preferred */ 890 quote = '\''; 891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) && 892 !memchr(op->ob_sval, '"', Py_SIZE(op))) 893 quote = '"'; 894 895 str_len = Py_SIZE(op); 896 Py_BEGIN_ALLOW_THREADS 897 fputc(quote, fp); 898 for (i = 0; i < str_len; i++) { 899 /* Since strings are immutable and the caller should have a 900 reference, accessing the interal buffer should not be an issue 901 with the GIL released. */ 902 c = op->ob_sval[i]; 903 if (c == quote || c == '\\') 904 fprintf(fp, "\\%c", c); 905 else if (c == '\t') 906 fprintf(fp, "\\t"); 907 else if (c == '\n') 908 fprintf(fp, "\\n"); 909 else if (c == '\r') 910 fprintf(fp, "\\r"); 911 else if (c < ' ' || c >= 0x7f) 912 fprintf(fp, "\\x%02x", c & 0xff); 913 else 914 fputc(c, fp); 915 } 916 fputc(quote, fp); 917 Py_END_ALLOW_THREADS 918 return 0; 919 } 920 921 PyObject * 922 PyString_Repr(PyObject *obj, int smartquotes) 923 { 924 register PyStringObject* op = (PyStringObject*) obj; 925 size_t newsize = 2 + 4 * Py_SIZE(op); 926 PyObject *v; 927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) { 928 PyErr_SetString(PyExc_OverflowError, 929 "string is too large to make repr"); 930 return NULL; 931 } 932 v = PyString_FromStringAndSize((char *)NULL, newsize); 933 if (v == NULL) { 934 return NULL; 935 } 936 else { 937 register Py_ssize_t i; 938 register char c; 939 register char *p; 940 int quote; 941 942 /* figure out which quote to use; single is preferred */ 943 quote = '\''; 944 if (smartquotes && 945 memchr(op->ob_sval, '\'', Py_SIZE(op)) && 946 !memchr(op->ob_sval, '"', Py_SIZE(op))) 947 quote = '"'; 948 949 p = PyString_AS_STRING(v); 950 *p++ = quote; 951 for (i = 0; i < Py_SIZE(op); i++) { 952 /* There's at least enough room for a hex escape 953 and a closing quote. */ 954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5); 955 c = op->ob_sval[i]; 956 if (c == quote || c == '\\') 957 *p++ = '\\', *p++ = c; 958 else if (c == '\t') 959 *p++ = '\\', *p++ = 't'; 960 else if (c == '\n') 961 *p++ = '\\', *p++ = 'n'; 962 else if (c == '\r') 963 *p++ = '\\', *p++ = 'r'; 964 else if (c < ' ' || c >= 0x7f) { 965 /* For performance, we don't want to call 966 PyOS_snprintf here (extra layers of 967 function call). */ 968 sprintf(p, "\\x%02x", c & 0xff); 969 p += 4; 970 } 971 else 972 *p++ = c; 973 } 974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1); 975 *p++ = quote; 976 *p = '\0'; 977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v)))) 978 return NULL; 979 return v; 980 } 981 } 982 983 static PyObject * 984 string_repr(PyObject *op) 985 { 986 return PyString_Repr(op, 1); 987 } 988 989 static PyObject * 990 string_str(PyObject *s) 991 { 992 assert(PyString_Check(s)); 993 if (PyString_CheckExact(s)) { 994 Py_INCREF(s); 995 return s; 996 } 997 else { 998 /* Subtype -- return genuine string with the same value. */ 999 PyStringObject *t = (PyStringObject *) s; 1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t)); 1001 } 1002 } 1003 1004 static Py_ssize_t 1005 string_length(PyStringObject *a) 1006 { 1007 return Py_SIZE(a); 1008 } 1009 1010 static PyObject * 1011 string_concat(register PyStringObject *a, register PyObject *bb) 1012 { 1013 register Py_ssize_t size; 1014 register PyStringObject *op; 1015 if (!PyString_Check(bb)) { 1016 #ifdef Py_USING_UNICODE 1017 if (PyUnicode_Check(bb)) 1018 return PyUnicode_Concat((PyObject *)a, bb); 1019 #endif 1020 if (PyByteArray_Check(bb)) 1021 return PyByteArray_Concat((PyObject *)a, bb); 1022 PyErr_Format(PyExc_TypeError, 1023 "cannot concatenate 'str' and '%.200s' objects", 1024 Py_TYPE(bb)->tp_name); 1025 return NULL; 1026 } 1027 #define b ((PyStringObject *)bb) 1028 /* Optimize cases with empty left or right operand */ 1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) && 1030 PyString_CheckExact(a) && PyString_CheckExact(b)) { 1031 if (Py_SIZE(a) == 0) { 1032 Py_INCREF(bb); 1033 return bb; 1034 } 1035 Py_INCREF(a); 1036 return (PyObject *)a; 1037 } 1038 size = Py_SIZE(a) + Py_SIZE(b); 1039 /* Check that string sizes are not negative, to prevent an 1040 overflow in cases where we are passed incorrectly-created 1041 strings with negative lengths (due to a bug in other code). 1042 */ 1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 || 1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) { 1045 PyErr_SetString(PyExc_OverflowError, 1046 "strings are too large to concat"); 1047 return NULL; 1048 } 1049 1050 /* Inline PyObject_NewVar */ 1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { 1052 PyErr_SetString(PyExc_OverflowError, 1053 "strings are too large to concat"); 1054 return NULL; 1055 } 1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); 1057 if (op == NULL) 1058 return PyErr_NoMemory(); 1059 PyObject_INIT_VAR(op, &PyString_Type, size); 1060 op->ob_shash = -1; 1061 op->ob_sstate = SSTATE_NOT_INTERNED; 1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); 1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b)); 1064 op->ob_sval[size] = '\0'; 1065 return (PyObject *) op; 1066 #undef b 1067 } 1068 1069 static PyObject * 1070 string_repeat(register PyStringObject *a, register Py_ssize_t n) 1071 { 1072 register Py_ssize_t i; 1073 register Py_ssize_t j; 1074 register Py_ssize_t size; 1075 register PyStringObject *op; 1076 size_t nbytes; 1077 if (n < 0) 1078 n = 0; 1079 /* watch out for overflows: the size can overflow int, 1080 * and the # of bytes needed can overflow size_t 1081 */ 1082 size = Py_SIZE(a) * n; 1083 if (n && size / n != Py_SIZE(a)) { 1084 PyErr_SetString(PyExc_OverflowError, 1085 "repeated string is too long"); 1086 return NULL; 1087 } 1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) { 1089 Py_INCREF(a); 1090 return (PyObject *)a; 1091 } 1092 nbytes = (size_t)size; 1093 if (nbytes + PyStringObject_SIZE <= nbytes) { 1094 PyErr_SetString(PyExc_OverflowError, 1095 "repeated string is too long"); 1096 return NULL; 1097 } 1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes); 1099 if (op == NULL) 1100 return PyErr_NoMemory(); 1101 PyObject_INIT_VAR(op, &PyString_Type, size); 1102 op->ob_shash = -1; 1103 op->ob_sstate = SSTATE_NOT_INTERNED; 1104 op->ob_sval[size] = '\0'; 1105 if (Py_SIZE(a) == 1 && n > 0) { 1106 memset(op->ob_sval, a->ob_sval[0] , n); 1107 return (PyObject *) op; 1108 } 1109 i = 0; 1110 if (i < size) { 1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); 1112 i = Py_SIZE(a); 1113 } 1114 while (i < size) { 1115 j = (i <= size-i) ? i : size-i; 1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j); 1117 i += j; 1118 } 1119 return (PyObject *) op; 1120 } 1121 1122 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */ 1123 1124 static PyObject * 1125 string_slice(register PyStringObject *a, register Py_ssize_t i, 1126 register Py_ssize_t j) 1127 /* j -- may be negative! */ 1128 { 1129 if (i < 0) 1130 i = 0; 1131 if (j < 0) 1132 j = 0; /* Avoid signed/unsigned bug in next line */ 1133 if (j > Py_SIZE(a)) 1134 j = Py_SIZE(a); 1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) { 1136 /* It's the same as a */ 1137 Py_INCREF(a); 1138 return (PyObject *)a; 1139 } 1140 if (j < i) 1141 j = i; 1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i); 1143 } 1144 1145 static int 1146 string_contains(PyObject *str_obj, PyObject *sub_obj) 1147 { 1148 if (!PyString_CheckExact(sub_obj)) { 1149 #ifdef Py_USING_UNICODE 1150 if (PyUnicode_Check(sub_obj)) 1151 return PyUnicode_Contains(str_obj, sub_obj); 1152 #endif 1153 if (!PyString_Check(sub_obj)) { 1154 PyErr_Format(PyExc_TypeError, 1155 "'in <string>' requires string as left operand, " 1156 "not %.200s", Py_TYPE(sub_obj)->tp_name); 1157 return -1; 1158 } 1159 } 1160 1161 return stringlib_contains_obj(str_obj, sub_obj); 1162 } 1163 1164 static PyObject * 1165 string_item(PyStringObject *a, register Py_ssize_t i) 1166 { 1167 char pchar; 1168 PyObject *v; 1169 if (i < 0 || i >= Py_SIZE(a)) { 1170 PyErr_SetString(PyExc_IndexError, "string index out of range"); 1171 return NULL; 1172 } 1173 pchar = a->ob_sval[i]; 1174 v = (PyObject *)characters[pchar & UCHAR_MAX]; 1175 if (v == NULL) 1176 v = PyString_FromStringAndSize(&pchar, 1); 1177 else { 1178 #ifdef COUNT_ALLOCS 1179 one_strings++; 1180 #endif 1181 Py_INCREF(v); 1182 } 1183 return v; 1184 } 1185 1186 static PyObject* 1187 string_richcompare(PyStringObject *a, PyStringObject *b, int op) 1188 { 1189 int c; 1190 Py_ssize_t len_a, len_b; 1191 Py_ssize_t min_len; 1192 PyObject *result; 1193 1194 /* Make sure both arguments are strings. */ 1195 if (!(PyString_Check(a) && PyString_Check(b))) { 1196 result = Py_NotImplemented; 1197 goto out; 1198 } 1199 if (a == b) { 1200 switch (op) { 1201 case Py_EQ:case Py_LE:case Py_GE: 1202 result = Py_True; 1203 goto out; 1204 case Py_NE:case Py_LT:case Py_GT: 1205 result = Py_False; 1206 goto out; 1207 } 1208 } 1209 if (op == Py_EQ) { 1210 /* Supporting Py_NE here as well does not save 1211 much time, since Py_NE is rarely used. */ 1212 if (Py_SIZE(a) == Py_SIZE(b) 1213 && (a->ob_sval[0] == b->ob_sval[0] 1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) { 1215 result = Py_True; 1216 } else { 1217 result = Py_False; 1218 } 1219 goto out; 1220 } 1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b); 1222 min_len = (len_a < len_b) ? len_a : len_b; 1223 if (min_len > 0) { 1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); 1225 if (c==0) 1226 c = memcmp(a->ob_sval, b->ob_sval, min_len); 1227 } else 1228 c = 0; 1229 if (c == 0) 1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0; 1231 switch (op) { 1232 case Py_LT: c = c < 0; break; 1233 case Py_LE: c = c <= 0; break; 1234 case Py_EQ: assert(0); break; /* unreachable */ 1235 case Py_NE: c = c != 0; break; 1236 case Py_GT: c = c > 0; break; 1237 case Py_GE: c = c >= 0; break; 1238 default: 1239 result = Py_NotImplemented; 1240 goto out; 1241 } 1242 result = c ? Py_True : Py_False; 1243 out: 1244 Py_INCREF(result); 1245 return result; 1246 } 1247 1248 int 1249 _PyString_Eq(PyObject *o1, PyObject *o2) 1250 { 1251 PyStringObject *a = (PyStringObject*) o1; 1252 PyStringObject *b = (PyStringObject*) o2; 1253 return Py_SIZE(a) == Py_SIZE(b) 1254 && *a->ob_sval == *b->ob_sval 1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0; 1256 } 1257 1258 static long 1259 string_hash(PyStringObject *a) 1260 { 1261 register Py_ssize_t len; 1262 register unsigned char *p; 1263 register long x; 1264 1265 #ifdef Py_DEBUG 1266 assert(_Py_HashSecret_Initialized); 1267 #endif 1268 if (a->ob_shash != -1) 1269 return a->ob_shash; 1270 len = Py_SIZE(a); 1271 /* 1272 We make the hash of the empty string be 0, rather than using 1273 (prefix ^ suffix), since this slightly obfuscates the hash secret 1274 */ 1275 if (len == 0) { 1276 a->ob_shash = 0; 1277 return 0; 1278 } 1279 p = (unsigned char *) a->ob_sval; 1280 x = _Py_HashSecret.prefix; 1281 x ^= *p << 7; 1282 while (--len >= 0) 1283 x = (1000003*x) ^ *p++; 1284 x ^= Py_SIZE(a); 1285 x ^= _Py_HashSecret.suffix; 1286 if (x == -1) 1287 x = -2; 1288 a->ob_shash = x; 1289 return x; 1290 } 1291 1292 static PyObject* 1293 string_subscript(PyStringObject* self, PyObject* item) 1294 { 1295 if (PyIndex_Check(item)) { 1296 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); 1297 if (i == -1 && PyErr_Occurred()) 1298 return NULL; 1299 if (i < 0) 1300 i += PyString_GET_SIZE(self); 1301 return string_item(self, i); 1302 } 1303 else if (PySlice_Check(item)) { 1304 Py_ssize_t start, stop, step, slicelength, cur, i; 1305 char* source_buf; 1306 char* result_buf; 1307 PyObject* result; 1308 1309 if (PySlice_GetIndicesEx((PySliceObject*)item, 1310 PyString_GET_SIZE(self), 1311 &start, &stop, &step, &slicelength) < 0) { 1312 return NULL; 1313 } 1314 1315 if (slicelength <= 0) { 1316 return PyString_FromStringAndSize("", 0); 1317 } 1318 else if (start == 0 && step == 1 && 1319 slicelength == PyString_GET_SIZE(self) && 1320 PyString_CheckExact(self)) { 1321 Py_INCREF(self); 1322 return (PyObject *)self; 1323 } 1324 else if (step == 1) { 1325 return PyString_FromStringAndSize( 1326 PyString_AS_STRING(self) + start, 1327 slicelength); 1328 } 1329 else { 1330 source_buf = PyString_AsString((PyObject*)self); 1331 result_buf = (char *)PyMem_Malloc(slicelength); 1332 if (result_buf == NULL) 1333 return PyErr_NoMemory(); 1334 1335 for (cur = start, i = 0; i < slicelength; 1336 cur += step, i++) { 1337 result_buf[i] = source_buf[cur]; 1338 } 1339 1340 result = PyString_FromStringAndSize(result_buf, 1341 slicelength); 1342 PyMem_Free(result_buf); 1343 return result; 1344 } 1345 } 1346 else { 1347 PyErr_Format(PyExc_TypeError, 1348 "string indices must be integers, not %.200s", 1349 Py_TYPE(item)->tp_name); 1350 return NULL; 1351 } 1352 } 1353 1354 static Py_ssize_t 1355 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr) 1356 { 1357 if ( index != 0 ) { 1358 PyErr_SetString(PyExc_SystemError, 1359 "accessing non-existent string segment"); 1360 return -1; 1361 } 1362 *ptr = (void *)self->ob_sval; 1363 return Py_SIZE(self); 1364 } 1365 1366 static Py_ssize_t 1367 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr) 1368 { 1369 PyErr_SetString(PyExc_TypeError, 1370 "Cannot use string as modifiable buffer"); 1371 return -1; 1372 } 1373 1374 static Py_ssize_t 1375 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp) 1376 { 1377 if ( lenp ) 1378 *lenp = Py_SIZE(self); 1379 return 1; 1380 } 1381 1382 static Py_ssize_t 1383 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr) 1384 { 1385 if ( index != 0 ) { 1386 PyErr_SetString(PyExc_SystemError, 1387 "accessing non-existent string segment"); 1388 return -1; 1389 } 1390 *ptr = self->ob_sval; 1391 return Py_SIZE(self); 1392 } 1393 1394 static int 1395 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags) 1396 { 1397 return PyBuffer_FillInfo(view, (PyObject*)self, 1398 (void *)self->ob_sval, Py_SIZE(self), 1399 1, flags); 1400 } 1401 1402 static PySequenceMethods string_as_sequence = { 1403 (lenfunc)string_length, /*sq_length*/ 1404 (binaryfunc)string_concat, /*sq_concat*/ 1405 (ssizeargfunc)string_repeat, /*sq_repeat*/ 1406 (ssizeargfunc)string_item, /*sq_item*/ 1407 (ssizessizeargfunc)string_slice, /*sq_slice*/ 1408 0, /*sq_ass_item*/ 1409 0, /*sq_ass_slice*/ 1410 (objobjproc)string_contains /*sq_contains*/ 1411 }; 1412 1413 static PyMappingMethods string_as_mapping = { 1414 (lenfunc)string_length, 1415 (binaryfunc)string_subscript, 1416 0, 1417 }; 1418 1419 static PyBufferProcs string_as_buffer = { 1420 (readbufferproc)string_buffer_getreadbuf, 1421 (writebufferproc)string_buffer_getwritebuf, 1422 (segcountproc)string_buffer_getsegcount, 1423 (charbufferproc)string_buffer_getcharbuf, 1424 (getbufferproc)string_buffer_getbuffer, 1425 0, /* XXX */ 1426 }; 1427 1428 1429 1430 #define LEFTSTRIP 0 1431 #define RIGHTSTRIP 1 1432 #define BOTHSTRIP 2 1433 1434 /* Arrays indexed by above */ 1435 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; 1436 1437 #define STRIPNAME(i) (stripformat[i]+3) 1438 1439 PyDoc_STRVAR(split__doc__, 1440 "S.split([sep [,maxsplit]]) -> list of strings\n\ 1441 \n\ 1442 Return a list of the words in the string S, using sep as the\n\ 1443 delimiter string. If maxsplit is given, at most maxsplit\n\ 1444 splits are done. If sep is not specified or is None, any\n\ 1445 whitespace string is a separator and empty strings are removed\n\ 1446 from the result."); 1447 1448 static PyObject * 1449 string_split(PyStringObject *self, PyObject *args) 1450 { 1451 Py_ssize_t len = PyString_GET_SIZE(self), n; 1452 Py_ssize_t maxsplit = -1; 1453 const char *s = PyString_AS_STRING(self), *sub; 1454 PyObject *subobj = Py_None; 1455 1456 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) 1457 return NULL; 1458 if (maxsplit < 0) 1459 maxsplit = PY_SSIZE_T_MAX; 1460 if (subobj == Py_None) 1461 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit); 1462 if (PyString_Check(subobj)) { 1463 sub = PyString_AS_STRING(subobj); 1464 n = PyString_GET_SIZE(subobj); 1465 } 1466 #ifdef Py_USING_UNICODE 1467 else if (PyUnicode_Check(subobj)) 1468 return PyUnicode_Split((PyObject *)self, subobj, maxsplit); 1469 #endif 1470 else if (PyObject_AsCharBuffer(subobj, &sub, &n)) 1471 return NULL; 1472 1473 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit); 1474 } 1475 1476 PyDoc_STRVAR(partition__doc__, 1477 "S.partition(sep) -> (head, sep, tail)\n\ 1478 \n\ 1479 Search for the separator sep in S, and return the part before it,\n\ 1480 the separator itself, and the part after it. If the separator is not\n\ 1481 found, return S and two empty strings."); 1482 1483 static PyObject * 1484 string_partition(PyStringObject *self, PyObject *sep_obj) 1485 { 1486 const char *sep; 1487 Py_ssize_t sep_len; 1488 1489 if (PyString_Check(sep_obj)) { 1490 sep = PyString_AS_STRING(sep_obj); 1491 sep_len = PyString_GET_SIZE(sep_obj); 1492 } 1493 #ifdef Py_USING_UNICODE 1494 else if (PyUnicode_Check(sep_obj)) 1495 return PyUnicode_Partition((PyObject *) self, sep_obj); 1496 #endif 1497 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) 1498 return NULL; 1499 1500 return stringlib_partition( 1501 (PyObject*) self, 1502 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1503 sep_obj, sep, sep_len 1504 ); 1505 } 1506 1507 PyDoc_STRVAR(rpartition__doc__, 1508 "S.rpartition(sep) -> (head, sep, tail)\n\ 1509 \n\ 1510 Search for the separator sep in S, starting at the end of S, and return\n\ 1511 the part before it, the separator itself, and the part after it. If the\n\ 1512 separator is not found, return two empty strings and S."); 1513 1514 static PyObject * 1515 string_rpartition(PyStringObject *self, PyObject *sep_obj) 1516 { 1517 const char *sep; 1518 Py_ssize_t sep_len; 1519 1520 if (PyString_Check(sep_obj)) { 1521 sep = PyString_AS_STRING(sep_obj); 1522 sep_len = PyString_GET_SIZE(sep_obj); 1523 } 1524 #ifdef Py_USING_UNICODE 1525 else if (PyUnicode_Check(sep_obj)) 1526 return PyUnicode_RPartition((PyObject *) self, sep_obj); 1527 #endif 1528 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) 1529 return NULL; 1530 1531 return stringlib_rpartition( 1532 (PyObject*) self, 1533 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1534 sep_obj, sep, sep_len 1535 ); 1536 } 1537 1538 PyDoc_STRVAR(rsplit__doc__, 1539 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\ 1540 \n\ 1541 Return a list of the words in the string S, using sep as the\n\ 1542 delimiter string, starting at the end of the string and working\n\ 1543 to the front. If maxsplit is given, at most maxsplit splits are\n\ 1544 done. If sep is not specified or is None, any whitespace string\n\ 1545 is a separator."); 1546 1547 static PyObject * 1548 string_rsplit(PyStringObject *self, PyObject *args) 1549 { 1550 Py_ssize_t len = PyString_GET_SIZE(self), n; 1551 Py_ssize_t maxsplit = -1; 1552 const char *s = PyString_AS_STRING(self), *sub; 1553 PyObject *subobj = Py_None; 1554 1555 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) 1556 return NULL; 1557 if (maxsplit < 0) 1558 maxsplit = PY_SSIZE_T_MAX; 1559 if (subobj == Py_None) 1560 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit); 1561 if (PyString_Check(subobj)) { 1562 sub = PyString_AS_STRING(subobj); 1563 n = PyString_GET_SIZE(subobj); 1564 } 1565 #ifdef Py_USING_UNICODE 1566 else if (PyUnicode_Check(subobj)) 1567 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit); 1568 #endif 1569 else if (PyObject_AsCharBuffer(subobj, &sub, &n)) 1570 return NULL; 1571 1572 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit); 1573 } 1574 1575 1576 PyDoc_STRVAR(join__doc__, 1577 "S.join(iterable) -> string\n\ 1578 \n\ 1579 Return a string which is the concatenation of the strings in the\n\ 1580 iterable. The separator between elements is S."); 1581 1582 static PyObject * 1583 string_join(PyStringObject *self, PyObject *orig) 1584 { 1585 char *sep = PyString_AS_STRING(self); 1586 const Py_ssize_t seplen = PyString_GET_SIZE(self); 1587 PyObject *res = NULL; 1588 char *p; 1589 Py_ssize_t seqlen = 0; 1590 size_t sz = 0; 1591 Py_ssize_t i; 1592 PyObject *seq, *item; 1593 1594 seq = PySequence_Fast(orig, ""); 1595 if (seq == NULL) { 1596 return NULL; 1597 } 1598 1599 seqlen = PySequence_Size(seq); 1600 if (seqlen == 0) { 1601 Py_DECREF(seq); 1602 return PyString_FromString(""); 1603 } 1604 if (seqlen == 1) { 1605 item = PySequence_Fast_GET_ITEM(seq, 0); 1606 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) { 1607 Py_INCREF(item); 1608 Py_DECREF(seq); 1609 return item; 1610 } 1611 } 1612 1613 /* There are at least two things to join, or else we have a subclass 1614 * of the builtin types in the sequence. 1615 * Do a pre-pass to figure out the total amount of space we'll 1616 * need (sz), see whether any argument is absurd, and defer to 1617 * the Unicode join if appropriate. 1618 */ 1619 for (i = 0; i < seqlen; i++) { 1620 const size_t old_sz = sz; 1621 item = PySequence_Fast_GET_ITEM(seq, i); 1622 if (!PyString_Check(item)){ 1623 #ifdef Py_USING_UNICODE 1624 if (PyUnicode_Check(item)) { 1625 /* Defer to Unicode join. 1626 * CAUTION: There's no gurantee that the 1627 * original sequence can be iterated over 1628 * again, so we must pass seq here. 1629 */ 1630 PyObject *result; 1631 result = PyUnicode_Join((PyObject *)self, seq); 1632 Py_DECREF(seq); 1633 return result; 1634 } 1635 #endif 1636 PyErr_Format(PyExc_TypeError, 1637 "sequence item %zd: expected string," 1638 " %.80s found", 1639 i, Py_TYPE(item)->tp_name); 1640 Py_DECREF(seq); 1641 return NULL; 1642 } 1643 sz += PyString_GET_SIZE(item); 1644 if (i != 0) 1645 sz += seplen; 1646 if (sz < old_sz || sz > PY_SSIZE_T_MAX) { 1647 PyErr_SetString(PyExc_OverflowError, 1648 "join() result is too long for a Python string"); 1649 Py_DECREF(seq); 1650 return NULL; 1651 } 1652 } 1653 1654 /* Allocate result space. */ 1655 res = PyString_FromStringAndSize((char*)NULL, sz); 1656 if (res == NULL) { 1657 Py_DECREF(seq); 1658 return NULL; 1659 } 1660 1661 /* Catenate everything. */ 1662 p = PyString_AS_STRING(res); 1663 for (i = 0; i < seqlen; ++i) { 1664 size_t n; 1665 item = PySequence_Fast_GET_ITEM(seq, i); 1666 n = PyString_GET_SIZE(item); 1667 Py_MEMCPY(p, PyString_AS_STRING(item), n); 1668 p += n; 1669 if (i < seqlen - 1) { 1670 Py_MEMCPY(p, sep, seplen); 1671 p += seplen; 1672 } 1673 } 1674 1675 Py_DECREF(seq); 1676 return res; 1677 } 1678 1679 PyObject * 1680 _PyString_Join(PyObject *sep, PyObject *x) 1681 { 1682 assert(sep != NULL && PyString_Check(sep)); 1683 assert(x != NULL); 1684 return string_join((PyStringObject *)sep, x); 1685 } 1686 1687 /* helper macro to fixup start/end slice values */ 1688 #define ADJUST_INDICES(start, end, len) \ 1689 if (end > len) \ 1690 end = len; \ 1691 else if (end < 0) { \ 1692 end += len; \ 1693 if (end < 0) \ 1694 end = 0; \ 1695 } \ 1696 if (start < 0) { \ 1697 start += len; \ 1698 if (start < 0) \ 1699 start = 0; \ 1700 } 1701 1702 Py_LOCAL_INLINE(Py_ssize_t) 1703 string_find_internal(PyStringObject *self, PyObject *args, int dir) 1704 { 1705 PyObject *subobj; 1706 const char *sub; 1707 Py_ssize_t sub_len; 1708 Py_ssize_t start=0, end=PY_SSIZE_T_MAX; 1709 1710 if (!stringlib_parse_args_finds("find/rfind/index/rindex", 1711 args, &subobj, &start, &end)) 1712 return -2; 1713 1714 if (PyString_Check(subobj)) { 1715 sub = PyString_AS_STRING(subobj); 1716 sub_len = PyString_GET_SIZE(subobj); 1717 } 1718 #ifdef Py_USING_UNICODE 1719 else if (PyUnicode_Check(subobj)) 1720 return PyUnicode_Find( 1721 (PyObject *)self, subobj, start, end, dir); 1722 #endif 1723 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) 1724 /* XXX - the "expected a character buffer object" is pretty 1725 confusing for a non-expert. remap to something else ? */ 1726 return -2; 1727 1728 if (dir > 0) 1729 return stringlib_find_slice( 1730 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1731 sub, sub_len, start, end); 1732 else 1733 return stringlib_rfind_slice( 1734 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1735 sub, sub_len, start, end); 1736 } 1737 1738 1739 PyDoc_STRVAR(find__doc__, 1740 "S.find(sub [,start [,end]]) -> int\n\ 1741 \n\ 1742 Return the lowest index in S where substring sub is found,\n\ 1743 such that sub is contained within S[start:end]. Optional\n\ 1744 arguments start and end are interpreted as in slice notation.\n\ 1745 \n\ 1746 Return -1 on failure."); 1747 1748 static PyObject * 1749 string_find(PyStringObject *self, PyObject *args) 1750 { 1751 Py_ssize_t result = string_find_internal(self, args, +1); 1752 if (result == -2) 1753 return NULL; 1754 return PyInt_FromSsize_t(result); 1755 } 1756 1757 1758 PyDoc_STRVAR(index__doc__, 1759 "S.index(sub [,start [,end]]) -> int\n\ 1760 \n\ 1761 Like S.find() but raise ValueError when the substring is not found."); 1762 1763 static PyObject * 1764 string_index(PyStringObject *self, PyObject *args) 1765 { 1766 Py_ssize_t result = string_find_internal(self, args, +1); 1767 if (result == -2) 1768 return NULL; 1769 if (result == -1) { 1770 PyErr_SetString(PyExc_ValueError, 1771 "substring not found"); 1772 return NULL; 1773 } 1774 return PyInt_FromSsize_t(result); 1775 } 1776 1777 1778 PyDoc_STRVAR(rfind__doc__, 1779 "S.rfind(sub [,start [,end]]) -> int\n\ 1780 \n\ 1781 Return the highest index in S where substring sub is found,\n\ 1782 such that sub is contained within S[start:end]. Optional\n\ 1783 arguments start and end are interpreted as in slice notation.\n\ 1784 \n\ 1785 Return -1 on failure."); 1786 1787 static PyObject * 1788 string_rfind(PyStringObject *self, PyObject *args) 1789 { 1790 Py_ssize_t result = string_find_internal(self, args, -1); 1791 if (result == -2) 1792 return NULL; 1793 return PyInt_FromSsize_t(result); 1794 } 1795 1796 1797 PyDoc_STRVAR(rindex__doc__, 1798 "S.rindex(sub [,start [,end]]) -> int\n\ 1799 \n\ 1800 Like S.rfind() but raise ValueError when the substring is not found."); 1801 1802 static PyObject * 1803 string_rindex(PyStringObject *self, PyObject *args) 1804 { 1805 Py_ssize_t result = string_find_internal(self, args, -1); 1806 if (result == -2) 1807 return NULL; 1808 if (result == -1) { 1809 PyErr_SetString(PyExc_ValueError, 1810 "substring not found"); 1811 return NULL; 1812 } 1813 return PyInt_FromSsize_t(result); 1814 } 1815 1816 1817 Py_LOCAL_INLINE(PyObject *) 1818 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj) 1819 { 1820 char *s = PyString_AS_STRING(self); 1821 Py_ssize_t len = PyString_GET_SIZE(self); 1822 char *sep = PyString_AS_STRING(sepobj); 1823 Py_ssize_t seplen = PyString_GET_SIZE(sepobj); 1824 Py_ssize_t i, j; 1825 1826 i = 0; 1827 if (striptype != RIGHTSTRIP) { 1828 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) { 1829 i++; 1830 } 1831 } 1832 1833 j = len; 1834 if (striptype != LEFTSTRIP) { 1835 do { 1836 j--; 1837 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen)); 1838 j++; 1839 } 1840 1841 if (i == 0 && j == len && PyString_CheckExact(self)) { 1842 Py_INCREF(self); 1843 return (PyObject*)self; 1844 } 1845 else 1846 return PyString_FromStringAndSize(s+i, j-i); 1847 } 1848 1849 1850 Py_LOCAL_INLINE(PyObject *) 1851 do_strip(PyStringObject *self, int striptype) 1852 { 1853 char *s = PyString_AS_STRING(self); 1854 Py_ssize_t len = PyString_GET_SIZE(self), i, j; 1855 1856 i = 0; 1857 if (striptype != RIGHTSTRIP) { 1858 while (i < len && isspace(Py_CHARMASK(s[i]))) { 1859 i++; 1860 } 1861 } 1862 1863 j = len; 1864 if (striptype != LEFTSTRIP) { 1865 do { 1866 j--; 1867 } while (j >= i && isspace(Py_CHARMASK(s[j]))); 1868 j++; 1869 } 1870 1871 if (i == 0 && j == len && PyString_CheckExact(self)) { 1872 Py_INCREF(self); 1873 return (PyObject*)self; 1874 } 1875 else 1876 return PyString_FromStringAndSize(s+i, j-i); 1877 } 1878 1879 1880 Py_LOCAL_INLINE(PyObject *) 1881 do_argstrip(PyStringObject *self, int striptype, PyObject *args) 1882 { 1883 PyObject *sep = NULL; 1884 1885 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep)) 1886 return NULL; 1887 1888 if (sep != NULL && sep != Py_None) { 1889 if (PyString_Check(sep)) 1890 return do_xstrip(self, striptype, sep); 1891 #ifdef Py_USING_UNICODE 1892 else if (PyUnicode_Check(sep)) { 1893 PyObject *uniself = PyUnicode_FromObject((PyObject *)self); 1894 PyObject *res; 1895 if (uniself==NULL) 1896 return NULL; 1897 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself, 1898 striptype, sep); 1899 Py_DECREF(uniself); 1900 return res; 1901 } 1902 #endif 1903 PyErr_Format(PyExc_TypeError, 1904 #ifdef Py_USING_UNICODE 1905 "%s arg must be None, str or unicode", 1906 #else 1907 "%s arg must be None or str", 1908 #endif 1909 STRIPNAME(striptype)); 1910 return NULL; 1911 } 1912 1913 return do_strip(self, striptype); 1914 } 1915 1916 1917 PyDoc_STRVAR(strip__doc__, 1918 "S.strip([chars]) -> string or unicode\n\ 1919 \n\ 1920 Return a copy of the string S with leading and trailing\n\ 1921 whitespace removed.\n\ 1922 If chars is given and not None, remove characters in chars instead.\n\ 1923 If chars is unicode, S will be converted to unicode before stripping"); 1924 1925 static PyObject * 1926 string_strip(PyStringObject *self, PyObject *args) 1927 { 1928 if (PyTuple_GET_SIZE(args) == 0) 1929 return do_strip(self, BOTHSTRIP); /* Common case */ 1930 else 1931 return do_argstrip(self, BOTHSTRIP, args); 1932 } 1933 1934 1935 PyDoc_STRVAR(lstrip__doc__, 1936 "S.lstrip([chars]) -> string or unicode\n\ 1937 \n\ 1938 Return a copy of the string S with leading whitespace removed.\n\ 1939 If chars is given and not None, remove characters in chars instead.\n\ 1940 If chars is unicode, S will be converted to unicode before stripping"); 1941 1942 static PyObject * 1943 string_lstrip(PyStringObject *self, PyObject *args) 1944 { 1945 if (PyTuple_GET_SIZE(args) == 0) 1946 return do_strip(self, LEFTSTRIP); /* Common case */ 1947 else 1948 return do_argstrip(self, LEFTSTRIP, args); 1949 } 1950 1951 1952 PyDoc_STRVAR(rstrip__doc__, 1953 "S.rstrip([chars]) -> string or unicode\n\ 1954 \n\ 1955 Return a copy of the string S with trailing whitespace removed.\n\ 1956 If chars is given and not None, remove characters in chars instead.\n\ 1957 If chars is unicode, S will be converted to unicode before stripping"); 1958 1959 static PyObject * 1960 string_rstrip(PyStringObject *self, PyObject *args) 1961 { 1962 if (PyTuple_GET_SIZE(args) == 0) 1963 return do_strip(self, RIGHTSTRIP); /* Common case */ 1964 else 1965 return do_argstrip(self, RIGHTSTRIP, args); 1966 } 1967 1968 1969 PyDoc_STRVAR(lower__doc__, 1970 "S.lower() -> string\n\ 1971 \n\ 1972 Return a copy of the string S converted to lowercase."); 1973 1974 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */ 1975 #ifndef _tolower 1976 #define _tolower tolower 1977 #endif 1978 1979 static PyObject * 1980 string_lower(PyStringObject *self) 1981 { 1982 char *s; 1983 Py_ssize_t i, n = PyString_GET_SIZE(self); 1984 PyObject *newobj; 1985 1986 newobj = PyString_FromStringAndSize(NULL, n); 1987 if (!newobj) 1988 return NULL; 1989 1990 s = PyString_AS_STRING(newobj); 1991 1992 Py_MEMCPY(s, PyString_AS_STRING(self), n); 1993 1994 for (i = 0; i < n; i++) { 1995 int c = Py_CHARMASK(s[i]); 1996 if (isupper(c)) 1997 s[i] = _tolower(c); 1998 } 1999 2000 return newobj; 2001 } 2002 2003 PyDoc_STRVAR(upper__doc__, 2004 "S.upper() -> string\n\ 2005 \n\ 2006 Return a copy of the string S converted to uppercase."); 2007 2008 #ifndef _toupper 2009 #define _toupper toupper 2010 #endif 2011 2012 static PyObject * 2013 string_upper(PyStringObject *self) 2014 { 2015 char *s; 2016 Py_ssize_t i, n = PyString_GET_SIZE(self); 2017 PyObject *newobj; 2018 2019 newobj = PyString_FromStringAndSize(NULL, n); 2020 if (!newobj) 2021 return NULL; 2022 2023 s = PyString_AS_STRING(newobj); 2024 2025 Py_MEMCPY(s, PyString_AS_STRING(self), n); 2026 2027 for (i = 0; i < n; i++) { 2028 int c = Py_CHARMASK(s[i]); 2029 if (islower(c)) 2030 s[i] = _toupper(c); 2031 } 2032 2033 return newobj; 2034 } 2035 2036 PyDoc_STRVAR(title__doc__, 2037 "S.title() -> string\n\ 2038 \n\ 2039 Return a titlecased version of S, i.e. words start with uppercase\n\ 2040 characters, all remaining cased characters have lowercase."); 2041 2042 static PyObject* 2043 string_title(PyStringObject *self) 2044 { 2045 char *s = PyString_AS_STRING(self), *s_new; 2046 Py_ssize_t i, n = PyString_GET_SIZE(self); 2047 int previous_is_cased = 0; 2048 PyObject *newobj; 2049 2050 newobj = PyString_FromStringAndSize(NULL, n); 2051 if (newobj == NULL) 2052 return NULL; 2053 s_new = PyString_AsString(newobj); 2054 for (i = 0; i < n; i++) { 2055 int c = Py_CHARMASK(*s++); 2056 if (islower(c)) { 2057 if (!previous_is_cased) 2058 c = toupper(c); 2059 previous_is_cased = 1; 2060 } else if (isupper(c)) { 2061 if (previous_is_cased) 2062 c = tolower(c); 2063 previous_is_cased = 1; 2064 } else 2065 previous_is_cased = 0; 2066 *s_new++ = c;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

2067 } 2068 return newobj; 2069 } 2070 2071 PyDoc_STRVAR(capitalize__doc__, 2072 "S.capitalize() -> string\n\ 2073 \n\ 2074 Return a copy of the string S with only its first character\n\ 2075 capitalized."); 2076 2077 static PyObject * 2078 string_capitalize(PyStringObject *self) 2079 { 2080 char *s = PyString_AS_STRING(self), *s_new; 2081 Py_ssize_t i, n = PyString_GET_SIZE(self); 2082 PyObject *newobj; 2083 2084 newobj = PyString_FromStringAndSize(NULL, n); 2085 if (newobj == NULL) 2086 return NULL; 2087 s_new = PyString_AsString(newobj); 2088 if (0 < n) { 2089 int c = Py_CHARMASK(*s++); 2090 if (islower(c)) 2091 *s_new = toupper(c); 2092 else 2093 *s_new = c; 2094 s_new++; 2095 } 2096 for (i = 1; i < n; i++) { 2097 int c = Py_CHARMASK(*s++); 2098 if (isupper(c)) 2099 *s_new = tolower(c); 2100 else 2101 *s_new = c; 2102 s_new++; 2103 } 2104 return newobj; 2105 } 2106 2107 2108 PyDoc_STRVAR(count__doc__, 2109 "S.count(sub[, start[, end]]) -> int\n\ 2110 \n\ 2111 Return the number of non-overlapping occurrences of substring sub in\n\ 2112 string S[start:end]. Optional arguments start and end are interpreted\n\ 2113 as in slice notation."); 2114 2115 static PyObject * 2116 string_count(PyStringObject *self, PyObject *args) 2117 { 2118 PyObject *sub_obj; 2119 const char *str = PyString_AS_STRING(self), *sub; 2120 Py_ssize_t sub_len; 2121 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; 2122 2123 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end)) 2124 return NULL; 2125 2126 if (PyString_Check(sub_obj)) { 2127 sub = PyString_AS_STRING(sub_obj); 2128 sub_len = PyString_GET_SIZE(sub_obj); 2129 } 2130 #ifdef Py_USING_UNICODE 2131 else if (PyUnicode_Check(sub_obj)) { 2132 Py_ssize_t count; 2133 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end); 2134 if (count == -1) 2135 return NULL; 2136 else 2137 return PyInt_FromSsize_t(count); 2138 } 2139 #endif 2140 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) 2141 return NULL; 2142 2143 ADJUST_INDICES(start, end, PyString_GET_SIZE(self)); 2144 2145 return PyInt_FromSsize_t( 2146 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) 2147 ); 2148 } 2149 2150 PyDoc_STRVAR(swapcase__doc__, 2151 "S.swapcase() -> string\n\ 2152 \n\ 2153 Return a copy of the string S with uppercase characters\n\ 2154 converted to lowercase and vice versa."); 2155 2156 static PyObject * 2157 string_swapcase(PyStringObject *self) 2158 { 2159 char *s = PyString_AS_STRING(self), *s_new; 2160 Py_ssize_t i, n = PyString_GET_SIZE(self); 2161 PyObject *newobj; 2162 2163 newobj = PyString_FromStringAndSize(NULL, n); 2164 if (newobj == NULL) 2165 return NULL; 2166 s_new = PyString_AsString(newobj); 2167 for (i = 0; i < n; i++) { 2168 int c = Py_CHARMASK(*s++); 2169 if (islower(c)) { 2170 *s_new = toupper(c); 2171 } 2172 else if (isupper(c)) { 2173 *s_new = tolower(c); 2174 } 2175 else 2176 *s_new = c; 2177 s_new++; 2178 } 2179 return newobj; 2180 } 2181 2182 2183 PyDoc_STRVAR(translate__doc__, 2184 "S.translate(table [,deletechars]) -> string\n\ 2185 \n\ 2186 Return a copy of the string S, where all characters occurring\n\ 2187 in the optional argument deletechars are removed, and the\n\ 2188 remaining characters have been mapped through the given\n\ 2189 translation table, which must be a string of length 256 or None.\n\ 2190 If the table argument is None, no translation is applied and\n\ 2191 the operation simply removes the characters in deletechars."); 2192 2193 static PyObject * 2194 string_translate(PyStringObject *self, PyObject *args) 2195 { 2196 register char *input, *output; 2197 const char *table; 2198 register Py_ssize_t i, c, changed = 0; 2199 PyObject *input_obj = (PyObject*)self; 2200 const char *output_start, *del_table=NULL; 2201 Py_ssize_t inlen, tablen, dellen = 0; 2202 PyObject *result; 2203 int trans_table[256]; 2204 PyObject *tableobj, *delobj = NULL; 2205 2206 if (!PyArg_UnpackTuple(args, "translate", 1, 2, 2207 &tableobj, &delobj)) 2208 return NULL; 2209 2210 if (PyString_Check(tableobj)) { 2211 table = PyString_AS_STRING(tableobj); 2212 tablen = PyString_GET_SIZE(tableobj); 2213 } 2214 else if (tableobj == Py_None) { 2215 table = NULL; 2216 tablen = 256; 2217 } 2218 #ifdef Py_USING_UNICODE 2219 else if (PyUnicode_Check(tableobj)) { 2220 /* Unicode .translate() does not support the deletechars 2221 parameter; instead a mapping to None will cause characters 2222 to be deleted. */ 2223 if (delobj != NULL) { 2224 PyErr_SetString(PyExc_TypeError, 2225 "deletions are implemented differently for unicode"); 2226 return NULL; 2227 } 2228 return PyUnicode_Translate((PyObject *)self, tableobj, NULL); 2229 } 2230 #endif 2231 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen)) 2232 return NULL; 2233 2234 if (tablen != 256) { 2235 PyErr_SetString(PyExc_ValueError, 2236 "translation table must be 256 characters long"); 2237 return NULL; 2238 } 2239 2240 if (delobj != NULL) { 2241 if (PyString_Check(delobj)) { 2242 del_table = PyString_AS_STRING(delobj); 2243 dellen = PyString_GET_SIZE(delobj); 2244 } 2245 #ifdef Py_USING_UNICODE 2246 else if (PyUnicode_Check(delobj)) { 2247 PyErr_SetString(PyExc_TypeError, 2248 "deletions are implemented differently for unicode"); 2249 return NULL; 2250 } 2251 #endif 2252 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen)) 2253 return NULL; 2254 } 2255 else { 2256 del_table = NULL; 2257 dellen = 0; 2258 } 2259 2260 inlen = PyString_GET_SIZE(input_obj); 2261 result = PyString_FromStringAndSize((char *)NULL, inlen); 2262 if (result == NULL) 2263 return NULL; 2264 output_start = output = PyString_AsString(result); 2265 input = PyString_AS_STRING(input_obj); 2266 2267 if (dellen == 0 && table != NULL) { 2268 /* If no deletions are required, use faster code */ 2269 for (i = inlen; --i >= 0; ) { 2270 c = Py_CHARMASK(*input++); 2271 if (Py_CHARMASK((*output++ = table[c])) != c)
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

2272 changed = 1; 2273 } 2274 if (changed || !PyString_CheckExact(input_obj)) 2275 return result; 2276 Py_DECREF(result); 2277 Py_INCREF(input_obj); 2278 return input_obj; 2279 } 2280 2281 if (table == NULL) { 2282 for (i = 0; i < 256; i++) 2283 trans_table[i] = Py_CHARMASK(i); 2284 } else { 2285 for (i = 0; i < 256; i++) 2286 trans_table[i] = Py_CHARMASK(table[i]); 2287 } 2288 2289 for (i = 0; i < dellen; i++) 2290 trans_table[(int) Py_CHARMASK(del_table[i])] = -1; 2291 2292 for (i = inlen; --i >= 0; ) { 2293 c = Py_CHARMASK(*input++); 2294 if (trans_table[c] != -1) 2295 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) 2296 continue; 2297 changed = 1; 2298 } 2299 if (!changed && PyString_CheckExact(input_obj)) { 2300 Py_DECREF(result); 2301 Py_INCREF(input_obj); 2302 return input_obj; 2303 } 2304 /* Fix the size of the resulting string */ 2305 if (inlen > 0 && _PyString_Resize(&result, output - output_start)) 2306 return NULL; 2307 return result; 2308 } 2309 2310 2311 /* find and count characters and substrings */ 2312 2313 #define findchar(target, target_len, c) \ 2314 ((char *)memchr((const void *)(target), c, target_len)) 2315 2316 /* String ops must return a string. */ 2317 /* If the object is subclass of string, create a copy */ 2318 Py_LOCAL(PyStringObject *) 2319 return_self(PyStringObject *self) 2320 { 2321 if (PyString_CheckExact(self)) { 2322 Py_INCREF(self); 2323 return self; 2324 } 2325 return (PyStringObject *)PyString_FromStringAndSize( 2326 PyString_AS_STRING(self), 2327 PyString_GET_SIZE(self)); 2328 } 2329 2330 Py_LOCAL_INLINE(Py_ssize_t) 2331 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount) 2332 { 2333 Py_ssize_t count=0; 2334 const char *start=target; 2335 const char *end=target+target_len; 2336 2337 while ( (start=findchar(start, end-start, c)) != NULL ) { 2338 count++; 2339 if (count >= maxcount) 2340 break; 2341 start += 1; 2342 } 2343 return count; 2344 } 2345 2346 2347 /* Algorithms for different cases of string replacement */ 2348 2349 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ 2350 Py_LOCAL(PyStringObject *) 2351 replace_interleave(PyStringObject *self, 2352 const char *to_s, Py_ssize_t to_len, 2353 Py_ssize_t maxcount) 2354 { 2355 char *self_s, *result_s; 2356 Py_ssize_t self_len, result_len; 2357 Py_ssize_t count, i, product; 2358 PyStringObject *result; 2359 2360 self_len = PyString_GET_SIZE(self); 2361 2362 /* 1 at the end plus 1 after every character */ 2363 count = self_len+1; 2364 if (maxcount < count) 2365 count = maxcount; 2366 2367 /* Check for overflow */ 2368 /* result_len = count * to_len + self_len; */ 2369 product = count * to_len; 2370 if (product / to_len != count) { 2371 PyErr_SetString(PyExc_OverflowError, 2372 "replace string is too long"); 2373 return NULL; 2374 } 2375 result_len = product + self_len; 2376 if (result_len < 0) { 2377 PyErr_SetString(PyExc_OverflowError, 2378 "replace string is too long"); 2379 return NULL; 2380 } 2381 2382 if (! (result = (PyStringObject *) 2383 PyString_FromStringAndSize(NULL, result_len)) ) 2384 return NULL; 2385 2386 self_s = PyString_AS_STRING(self); 2387 result_s = PyString_AS_STRING(result); 2388 2389 /* TODO: special case single character, which doesn't need memcpy */ 2390 2391 /* Lay the first one down (guaranteed this will occur) */ 2392 Py_MEMCPY(result_s, to_s, to_len); 2393 result_s += to_len; 2394 count -= 1; 2395 2396 for (i=0; i<count; i++) { 2397 *result_s++ = *self_s++; 2398 Py_MEMCPY(result_s, to_s, to_len); 2399 result_s += to_len; 2400 } 2401 2402 /* Copy the rest of the original string */ 2403 Py_MEMCPY(result_s, self_s, self_len-i); 2404 2405 return result; 2406 } 2407 2408 /* Special case for deleting a single character */ 2409 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */ 2410 Py_LOCAL(PyStringObject *) 2411 replace_delete_single_character(PyStringObject *self, 2412 char from_c, Py_ssize_t maxcount) 2413 { 2414 char *self_s, *result_s; 2415 char *start, *next, *end; 2416 Py_ssize_t self_len, result_len; 2417 Py_ssize_t count; 2418 PyStringObject *result; 2419 2420 self_len = PyString_GET_SIZE(self); 2421 self_s = PyString_AS_STRING(self); 2422 2423 count = countchar(self_s, self_len, from_c, maxcount); 2424 if (count == 0) { 2425 return return_self(self); 2426 } 2427 2428 result_len = self_len - count; /* from_len == 1 */ 2429 assert(result_len>=0); 2430 2431 if ( (result = (PyStringObject *) 2432 PyString_FromStringAndSize(NULL, result_len)) == NULL) 2433 return NULL; 2434 result_s = PyString_AS_STRING(result); 2435 2436 start = self_s; 2437 end = self_s + self_len; 2438 while (count-- > 0) { 2439 next = findchar(start, end-start, from_c); 2440 if (next == NULL) 2441 break; 2442 Py_MEMCPY(result_s, start, next-start); 2443 result_s += (next-start); 2444 start = next+1; 2445 } 2446 Py_MEMCPY(result_s, start, end-start); 2447 2448 return result; 2449 } 2450 2451 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ 2452 2453 Py_LOCAL(PyStringObject *) 2454 replace_delete_substring(PyStringObject *self, 2455 const char *from_s, Py_ssize_t from_len, 2456 Py_ssize_t maxcount) { 2457 char *self_s, *result_s; 2458 char *start, *next, *end; 2459 Py_ssize_t self_len, result_len; 2460 Py_ssize_t count, offset; 2461 PyStringObject *result; 2462 2463 self_len = PyString_GET_SIZE(self); 2464 self_s = PyString_AS_STRING(self); 2465 2466 count = stringlib_count(self_s, self_len, 2467 from_s, from_len, 2468 maxcount); 2469 2470 if (count == 0) { 2471 /* no matches */ 2472 return return_self(self); 2473 } 2474 2475 result_len = self_len - (count * from_len); 2476 assert (result_len>=0); 2477 2478 if ( (result = (PyStringObject *) 2479 PyString_FromStringAndSize(NULL, result_len)) == NULL ) 2480 return NULL; 2481 2482 result_s = PyString_AS_STRING(result); 2483 2484 start = self_s; 2485 end = self_s + self_len; 2486 while (count-- > 0) { 2487 offset = stringlib_find(start, end-start, 2488 from_s, from_len, 2489 0); 2490 if (offset == -1) 2491 break; 2492 next = start + offset; 2493 2494 Py_MEMCPY(result_s, start, next-start); 2495 2496 result_s += (next-start); 2497 start = next+from_len; 2498 } 2499 Py_MEMCPY(result_s, start, end-start); 2500 return result; 2501 } 2502 2503 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ 2504 Py_LOCAL(PyStringObject *) 2505 replace_single_character_in_place(PyStringObject *self, 2506 char from_c, char to_c, 2507 Py_ssize_t maxcount) 2508 { 2509 char *self_s, *result_s, *start, *end, *next; 2510 Py_ssize_t self_len; 2511 PyStringObject *result; 2512 2513 /* The result string will be the same size */ 2514 self_s = PyString_AS_STRING(self); 2515 self_len = PyString_GET_SIZE(self); 2516 2517 next = findchar(self_s, self_len, from_c); 2518 2519 if (next == NULL) { 2520 /* No matches; return the original string */ 2521 return return_self(self); 2522 } 2523 2524 /* Need to make a new string */ 2525 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); 2526 if (result == NULL) 2527 return NULL; 2528 result_s = PyString_AS_STRING(result); 2529 Py_MEMCPY(result_s, self_s, self_len); 2530 2531 /* change everything in-place, starting with this one */ 2532 start = result_s + (next-self_s); 2533 *start = to_c; 2534 start++; 2535 end = result_s + self_len; 2536 2537 while (--maxcount > 0) { 2538 next = findchar(start, end-start, from_c); 2539 if (next == NULL) 2540 break; 2541 *next = to_c; 2542 start = next+1; 2543 } 2544 2545 return result; 2546 } 2547 2548 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ 2549 Py_LOCAL(PyStringObject *) 2550 replace_substring_in_place(PyStringObject *self, 2551 const char *from_s, Py_ssize_t from_len, 2552 const char *to_s, Py_ssize_t to_len, 2553 Py_ssize_t maxcount) 2554 { 2555 char *result_s, *start, *end; 2556 char *self_s; 2557 Py_ssize_t self_len, offset; 2558 PyStringObject *result; 2559 2560 /* The result string will be the same size */ 2561 2562 self_s = PyString_AS_STRING(self); 2563 self_len = PyString_GET_SIZE(self); 2564 2565 offset = stringlib_find(self_s, self_len, 2566 from_s, from_len, 2567 0); 2568 if (offset == -1) { 2569 /* No matches; return the original string */ 2570 return return_self(self); 2571 } 2572 2573 /* Need to make a new string */ 2574 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); 2575 if (result == NULL) 2576 return NULL; 2577 result_s = PyString_AS_STRING(result); 2578 Py_MEMCPY(result_s, self_s, self_len); 2579 2580 /* change everything in-place, starting with this one */ 2581 start = result_s + offset; 2582 Py_MEMCPY(start, to_s, from_len); 2583 start += from_len; 2584 end = result_s + self_len; 2585 2586 while ( --maxcount > 0) { 2587 offset = stringlib_find(start, end-start, 2588 from_s, from_len, 2589 0); 2590 if (offset==-1) 2591 break; 2592 Py_MEMCPY(start+offset, to_s, from_len); 2593 start += offset+from_len; 2594 } 2595 2596 return result; 2597 } 2598 2599 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ 2600 Py_LOCAL(PyStringObject *) 2601 replace_single_character(PyStringObject *self, 2602 char from_c, 2603 const char *to_s, Py_ssize_t to_len, 2604 Py_ssize_t maxcount) 2605 { 2606 char *self_s, *result_s; 2607 char *start, *next, *end; 2608 Py_ssize_t self_len, result_len; 2609 Py_ssize_t count, product; 2610 PyStringObject *result; 2611 2612 self_s = PyString_AS_STRING(self); 2613 self_len = PyString_GET_SIZE(self); 2614 2615 count = countchar(self_s, self_len, from_c, maxcount); 2616 if (count == 0) { 2617 /* no matches, return unchanged */ 2618 return return_self(self); 2619 } 2620 2621 /* use the difference between current and new, hence the "-1" */ 2622 /* result_len = self_len + count * (to_len-1) */ 2623 product = count * (to_len-1); 2624 if (product / (to_len-1) != count) { 2625 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2626 return NULL; 2627 } 2628 result_len = self_len + product; 2629 if (result_len < 0) { 2630 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2631 return NULL; 2632 } 2633 2634 if ( (result = (PyStringObject *) 2635 PyString_FromStringAndSize(NULL, result_len)) == NULL) 2636 return NULL; 2637 result_s = PyString_AS_STRING(result); 2638 2639 start = self_s; 2640 end = self_s + self_len; 2641 while (count-- > 0) { 2642 next = findchar(start, end-start, from_c); 2643 if (next == NULL) 2644 break; 2645 2646 if (next == start) { 2647 /* replace with the 'to' */ 2648 Py_MEMCPY(result_s, to_s, to_len); 2649 result_s += to_len; 2650 start += 1; 2651 } else { 2652 /* copy the unchanged old then the 'to' */ 2653 Py_MEMCPY(result_s, start, next-start); 2654 result_s += (next-start); 2655 Py_MEMCPY(result_s, to_s, to_len); 2656 result_s += to_len; 2657 start = next+1; 2658 } 2659 } 2660 /* Copy the remainder of the remaining string */ 2661 Py_MEMCPY(result_s, start, end-start); 2662 2663 return result; 2664 } 2665 2666 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ 2667 Py_LOCAL(PyStringObject *) 2668 replace_substring(PyStringObject *self, 2669 const char *from_s, Py_ssize_t from_len, 2670 const char *to_s, Py_ssize_t to_len, 2671 Py_ssize_t maxcount) { 2672 char *self_s, *result_s; 2673 char *start, *next, *end; 2674 Py_ssize_t self_len, result_len; 2675 Py_ssize_t count, offset, product; 2676 PyStringObject *result; 2677 2678 self_s = PyString_AS_STRING(self); 2679 self_len = PyString_GET_SIZE(self); 2680 2681 count = stringlib_count(self_s, self_len, 2682 from_s, from_len, 2683 maxcount); 2684 2685 if (count == 0) { 2686 /* no matches, return unchanged */ 2687 return return_self(self); 2688 } 2689 2690 /* Check for overflow */ 2691 /* result_len = self_len + count * (to_len-from_len) */ 2692 product = count * (to_len-from_len); 2693 if (product / (to_len-from_len) != count) { 2694 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2695 return NULL; 2696 } 2697 result_len = self_len + product; 2698 if (result_len < 0) { 2699 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2700 return NULL; 2701 } 2702 2703 if ( (result = (PyStringObject *) 2704 PyString_FromStringAndSize(NULL, result_len)) == NULL) 2705 return NULL; 2706 result_s = PyString_AS_STRING(result); 2707 2708 start = self_s; 2709 end = self_s + self_len; 2710 while (count-- > 0) { 2711 offset = stringlib_find(start, end-start, 2712 from_s, from_len, 2713 0); 2714 if (offset == -1) 2715 break; 2716 next = start+offset; 2717 if (next == start) { 2718 /* replace with the 'to' */ 2719 Py_MEMCPY(result_s, to_s, to_len); 2720 result_s += to_len; 2721 start += from_len; 2722 } else { 2723 /* copy the unchanged old then the 'to' */ 2724 Py_MEMCPY(result_s, start, next-start); 2725 result_s += (next-start); 2726 Py_MEMCPY(result_s, to_s, to_len); 2727 result_s += to_len; 2728 start = next+from_len; 2729 } 2730 } 2731 /* Copy the remainder of the remaining string */ 2732 Py_MEMCPY(result_s, start, end-start); 2733 2734 return result; 2735 } 2736 2737 2738 Py_LOCAL(PyStringObject *) 2739 replace(PyStringObject *self, 2740 const char *from_s, Py_ssize_t from_len, 2741 const char *to_s, Py_ssize_t to_len, 2742 Py_ssize_t maxcount) 2743 { 2744 if (maxcount < 0) { 2745 maxcount = PY_SSIZE_T_MAX; 2746 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) { 2747 /* nothing to do; return the original string */ 2748 return return_self(self); 2749 } 2750 2751 if (maxcount == 0 || 2752 (from_len == 0 && to_len == 0)) { 2753 /* nothing to do; return the original string */ 2754 return return_self(self); 2755 } 2756 2757 /* Handle zero-length special cases */ 2758 2759 if (from_len == 0) { 2760 /* insert the 'to' string everywhere. */ 2761 /* >>> "Python".replace("", ".") */ 2762 /* '.P.y.t.h.o.n.' */ 2763 return replace_interleave(self, to_s, to_len, maxcount); 2764 } 2765 2766 /* Except for "".replace("", "A") == "A" there is no way beyond this */ 2767 /* point for an empty self string to generate a non-empty string */ 2768 /* Special case so the remaining code always gets a non-empty string */ 2769 if (PyString_GET_SIZE(self) == 0) { 2770 return return_self(self); 2771 } 2772 2773 if (to_len == 0) { 2774 /* delete all occurances of 'from' string */ 2775 if (from_len == 1) { 2776 return replace_delete_single_character( 2777 self, from_s[0], maxcount); 2778 } else { 2779 return replace_delete_substring(self, from_s, from_len, maxcount); 2780 } 2781 } 2782 2783 /* Handle special case where both strings have the same length */ 2784 2785 if (from_len == to_len) { 2786 if (from_len == 1) { 2787 return replace_single_character_in_place( 2788 self, 2789 from_s[0], 2790 to_s[0], 2791 maxcount); 2792 } else { 2793 return replace_substring_in_place( 2794 self, from_s, from_len, to_s, to_len, maxcount); 2795 } 2796 } 2797 2798 /* Otherwise use the more generic algorithms */ 2799 if (from_len == 1) { 2800 return replace_single_character(self, from_s[0], 2801 to_s, to_len, maxcount); 2802 } else { 2803 /* len('from')>=2, len('to')>=1 */ 2804 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount); 2805 } 2806 } 2807 2808 PyDoc_STRVAR(replace__doc__, 2809 "S.replace(old, new[, count]) -> string\n\ 2810 \n\ 2811 Return a copy of string S with all occurrences of substring\n\ 2812 old replaced by new. If the optional argument count is\n\ 2813 given, only the first count occurrences are replaced."); 2814 2815 static PyObject * 2816 string_replace(PyStringObject *self, PyObject *args) 2817 { 2818 Py_ssize_t count = -1; 2819 PyObject *from, *to; 2820 const char *from_s, *to_s; 2821 Py_ssize_t from_len, to_len; 2822 2823 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count)) 2824 return NULL; 2825 2826 if (PyString_Check(from)) { 2827 from_s = PyString_AS_STRING(from); 2828 from_len = PyString_GET_SIZE(from); 2829 } 2830 #ifdef Py_USING_UNICODE 2831 if (PyUnicode_Check(from)) 2832 return PyUnicode_Replace((PyObject *)self, 2833 from, to, count); 2834 #endif 2835 else if (PyObject_AsCharBuffer(from, &from_s, &from_len)) 2836 return NULL; 2837 2838 if (PyString_Check(to)) { 2839 to_s = PyString_AS_STRING(to); 2840 to_len = PyString_GET_SIZE(to); 2841 } 2842 #ifdef Py_USING_UNICODE 2843 else if (PyUnicode_Check(to)) 2844 return PyUnicode_Replace((PyObject *)self, 2845 from, to, count); 2846 #endif 2847 else if (PyObject_AsCharBuffer(to, &to_s, &to_len)) 2848 return NULL; 2849 2850 return (PyObject *)replace((PyStringObject *) self, 2851 from_s, from_len, 2852 to_s, to_len, count); 2853 } 2854 2855 /** End DALKE **/ 2856 2857 /* Matches the end (direction >= 0) or start (direction < 0) of self 2858 * against substr, using the start and end arguments. Returns 2859 * -1 on error, 0 if not found and 1 if found. 2860 */ 2861 Py_LOCAL(int) 2862 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start, 2863 Py_ssize_t end, int direction) 2864 { 2865 Py_ssize_t len = PyString_GET_SIZE(self); 2866 Py_ssize_t slen; 2867 const char* sub; 2868 const char* str; 2869 2870 if (PyString_Check(substr)) { 2871 sub = PyString_AS_STRING(substr); 2872 slen = PyString_GET_SIZE(substr); 2873 } 2874 #ifdef Py_USING_UNICODE 2875 else if (PyUnicode_Check(substr)) 2876 return PyUnicode_Tailmatch((PyObject *)self, 2877 substr, start, end, direction); 2878 #endif 2879 else if (PyObject_AsCharBuffer(substr, &sub, &slen)) 2880 return -1; 2881 str = PyString_AS_STRING(self); 2882 2883 ADJUST_INDICES(start, end, len); 2884 2885 if (direction < 0) { 2886 /* startswith */ 2887 if (start+slen > len) 2888 return 0; 2889 } else { 2890 /* endswith */ 2891 if (end-start < slen || start > len) 2892 return 0; 2893 2894 if (end-slen > start) 2895 start = end - slen; 2896 } 2897 if (end-start >= slen) 2898 return ! memcmp(str+start, sub, slen); 2899 return 0; 2900 } 2901 2902 2903 PyDoc_STRVAR(startswith__doc__, 2904 "S.startswith(prefix[, start[, end]]) -> bool\n\ 2905 \n\ 2906 Return True if S starts with the specified prefix, False otherwise.\n\ 2907 With optional start, test S beginning at that position.\n\ 2908 With optional end, stop comparing S at that position.\n\ 2909 prefix can also be a tuple of strings to try."); 2910 2911 static PyObject * 2912 string_startswith(PyStringObject *self, PyObject *args) 2913 { 2914 Py_ssize_t start = 0; 2915 Py_ssize_t end = PY_SSIZE_T_MAX; 2916 PyObject *subobj; 2917 int result; 2918 2919 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) 2920 return NULL; 2921 if (PyTuple_Check(subobj)) { 2922 Py_ssize_t i; 2923 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { 2924 result = _string_tailmatch(self, 2925 PyTuple_GET_ITEM(subobj, i), 2926 start, end, -1); 2927 if (result == -1) 2928 return NULL; 2929 else if (result) { 2930 Py_RETURN_TRUE; 2931 } 2932 } 2933 Py_RETURN_FALSE; 2934 } 2935 result = _string_tailmatch(self, subobj, start, end, -1); 2936 if (result == -1) { 2937 if (PyErr_ExceptionMatches(PyExc_TypeError)) 2938 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, " 2939 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name); 2940 return NULL; 2941 } 2942 else 2943 return PyBool_FromLong(result); 2944 } 2945 2946 2947 PyDoc_STRVAR(endswith__doc__, 2948 "S.endswith(suffix[, start[, end]]) -> bool\n\ 2949 \n\ 2950 Return True if S ends with the specified suffix, False otherwise.\n\ 2951 With optional start, test S beginning at that position.\n\ 2952 With optional end, stop comparing S at that position.\n\ 2953 suffix can also be a tuple of strings to try."); 2954 2955 static PyObject * 2956 string_endswith(PyStringObject *self, PyObject *args) 2957 { 2958 Py_ssize_t start = 0; 2959 Py_ssize_t end = PY_SSIZE_T_MAX; 2960 PyObject *subobj; 2961 int result; 2962 2963 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) 2964 return NULL; 2965 if (PyTuple_Check(subobj)) { 2966 Py_ssize_t i; 2967 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { 2968 result = _string_tailmatch(self, 2969 PyTuple_GET_ITEM(subobj, i), 2970 start, end, +1); 2971 if (result == -1) 2972 return NULL; 2973 else if (result) { 2974 Py_RETURN_TRUE; 2975 } 2976 } 2977 Py_RETURN_FALSE; 2978 } 2979 result = _string_tailmatch(self, subobj, start, end, +1); 2980 if (result == -1) { 2981 if (PyErr_ExceptionMatches(PyExc_TypeError)) 2982 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, " 2983 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name); 2984 return NULL; 2985 } 2986 else 2987 return PyBool_FromLong(result); 2988 } 2989 2990 2991 PyDoc_STRVAR(encode__doc__, 2992 "S.encode([encoding[,errors]]) -> object\n\ 2993 \n\ 2994 Encodes S using the codec registered for encoding. encoding defaults\n\ 2995 to the default encoding. errors may be given to set a different error\n\ 2996 handling scheme. Default is 'strict' meaning that encoding errors raise\n\ 2997 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ 2998 'xmlcharrefreplace' as well as any other name registered with\n\ 2999 codecs.register_error that is able to handle UnicodeEncodeErrors."); 3000 3001 static PyObject * 3002 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs) 3003 { 3004 static char *kwlist[] = {"encoding", "errors", 0}; 3005 char *encoding = NULL; 3006 char *errors = NULL; 3007 PyObject *v; 3008 3009 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", 3010 kwlist, &encoding, &errors)) 3011 return NULL; 3012 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors); 3013 if (v == NULL) 3014 goto onError; 3015 if (!PyString_Check(v) && !PyUnicode_Check(v)) { 3016 PyErr_Format(PyExc_TypeError, 3017 "encoder did not return a string/unicode object " 3018 "(type=%.400s)", 3019 Py_TYPE(v)->tp_name); 3020 Py_DECREF(v); 3021 return NULL; 3022 } 3023 return v; 3024 3025 onError: 3026 return NULL; 3027 } 3028 3029 3030 PyDoc_STRVAR(decode__doc__, 3031 "S.decode([encoding[,errors]]) -> object\n\ 3032 \n\ 3033 Decodes S using the codec registered for encoding. encoding defaults\n\ 3034 to the default encoding. errors may be given to set a different error\n\ 3035 handling scheme. Default is 'strict' meaning that encoding errors raise\n\ 3036 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ 3037 as well as any other name registered with codecs.register_error that is\n\ 3038 able to handle UnicodeDecodeErrors."); 3039 3040 static PyObject * 3041 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs) 3042 { 3043 static char *kwlist[] = {"encoding", "errors", 0}; 3044 char *encoding = NULL; 3045 char *errors = NULL; 3046 PyObject *v; 3047 3048 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", 3049 kwlist, &encoding, &errors)) 3050 return NULL; 3051 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors); 3052 if (v == NULL) 3053 goto onError; 3054 if (!PyString_Check(v) && !PyUnicode_Check(v)) { 3055 PyErr_Format(PyExc_TypeError, 3056 "decoder did not return a string/unicode object " 3057 "(type=%.400s)", 3058 Py_TYPE(v)->tp_name); 3059 Py_DECREF(v); 3060 return NULL; 3061 } 3062 return v; 3063 3064 onError: 3065 return NULL; 3066 } 3067 3068 3069 PyDoc_STRVAR(expandtabs__doc__, 3070 "S.expandtabs([tabsize]) -> string\n\ 3071 \n\ 3072 Return a copy of S where all tab characters are expanded using spaces.\n\ 3073 If tabsize is not given, a tab size of 8 characters is assumed."); 3074 3075 static PyObject* 3076 string_expandtabs(PyStringObject *self, PyObject *args) 3077 { 3078 const char *e, *p, *qe; 3079 char *q; 3080 Py_ssize_t i, j, incr; 3081 PyObject *u; 3082 int tabsize = 8; 3083 3084 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) 3085 return NULL; 3086 3087 /* First pass: determine size of output string */ 3088 i = 0; /* chars up to and including most recent \n or \r */ 3089 j = 0; /* chars since most recent \n or \r (use in tab calculations) */ 3090 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */ 3091 for (p = PyString_AS_STRING(self); p < e; p++) 3092 if (*p == '\t') { 3093 if (tabsize > 0) { 3094 incr = tabsize - (j % tabsize); 3095 if (j > PY_SSIZE_T_MAX - incr) 3096 goto overflow1; 3097 j += incr; 3098 } 3099 } 3100 else { 3101 if (j > PY_SSIZE_T_MAX - 1) 3102 goto overflow1; 3103 j++; 3104 if (*p == '\n' || *p == '\r') { 3105 if (i > PY_SSIZE_T_MAX - j) 3106 goto overflow1; 3107 i += j; 3108 j = 0; 3109 } 3110 } 3111 3112 if (i > PY_SSIZE_T_MAX - j) 3113 goto overflow1; 3114 3115 /* Second pass: create output string and fill it */ 3116 u = PyString_FromStringAndSize(NULL, i + j); 3117 if (!u) 3118 return NULL; 3119 3120 j = 0; /* same as in first pass */ 3121 q = PyString_AS_STRING(u); /* next output char */ 3122 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */ 3123 3124 for (p = PyString_AS_STRING(self); p < e; p++) 3125 if (*p == '\t') { 3126 if (tabsize > 0) { 3127 i = tabsize - (j % tabsize); 3128 j += i; 3129 while (i--) { 3130 if (q >= qe) 3131 goto overflow2; 3132 *q++ = ' '; 3133 } 3134 } 3135 } 3136 else { 3137 if (q >= qe) 3138 goto overflow2; 3139 *q++ = *p; 3140 j++; 3141 if (*p == '\n' || *p == '\r') 3142 j = 0; 3143 } 3144 3145 return u; 3146 3147 overflow2: 3148 Py_DECREF(u); 3149 overflow1: 3150 PyErr_SetString(PyExc_OverflowError, "new string is too long"); 3151 return NULL; 3152 } 3153 3154 Py_LOCAL_INLINE(PyObject *) 3155 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill) 3156 { 3157 PyObject *u; 3158 3159 if (left < 0) 3160 left = 0; 3161 if (right < 0) 3162 right = 0; 3163 3164 if (left == 0 && right == 0 && PyString_CheckExact(self)) { 3165 Py_INCREF(self); 3166 return (PyObject *)self; 3167 } 3168 3169 u = PyString_FromStringAndSize(NULL, 3170 left + PyString_GET_SIZE(self) + right); 3171 if (u) { 3172 if (left) 3173 memset(PyString_AS_STRING(u), fill, left); 3174 Py_MEMCPY(PyString_AS_STRING(u) + left, 3175 PyString_AS_STRING(self), 3176 PyString_GET_SIZE(self)); 3177 if (right) 3178 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self), 3179 fill, right); 3180 } 3181 3182 return u; 3183 } 3184 3185 PyDoc_STRVAR(ljust__doc__, 3186 "S.ljust(width[, fillchar]) -> string\n" 3187 "\n" 3188 "Return S left-justified in a string of length width. Padding is\n" 3189 "done using the specified fill character (default is a space)."); 3190 3191 static PyObject * 3192 string_ljust(PyStringObject *self, PyObject *args) 3193 { 3194 Py_ssize_t width; 3195 char fillchar = ' '; 3196 3197 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) 3198 return NULL; 3199 3200 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { 3201 Py_INCREF(self); 3202 return (PyObject*) self; 3203 } 3204 3205 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar); 3206 } 3207 3208 3209 PyDoc_STRVAR(rjust__doc__, 3210 "S.rjust(width[, fillchar]) -> string\n" 3211 "\n" 3212 "Return S right-justified in a string of length width. Padding is\n" 3213 "done using the specified fill character (default is a space)"); 3214 3215 static PyObject * 3216 string_rjust(PyStringObject *self, PyObject *args) 3217 { 3218 Py_ssize_t width; 3219 char fillchar = ' '; 3220 3221 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) 3222 return NULL; 3223 3224 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { 3225 Py_INCREF(self); 3226 return (PyObject*) self; 3227 } 3228 3229 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar); 3230 } 3231 3232 3233 PyDoc_STRVAR(center__doc__, 3234 "S.center(width[, fillchar]) -> string\n" 3235 "\n" 3236 "Return S centered in a string of length width. Padding is\n" 3237 "done using the specified fill character (default is a space)"); 3238 3239 static PyObject * 3240 string_center(PyStringObject *self, PyObject *args) 3241 { 3242 Py_ssize_t marg, left; 3243 Py_ssize_t width; 3244 char fillchar = ' '; 3245 3246 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) 3247 return NULL; 3248 3249 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { 3250 Py_INCREF(self); 3251 return (PyObject*) self; 3252 } 3253 3254 marg = width - PyString_GET_SIZE(self); 3255 left = marg / 2 + (marg & width & 1); 3256 3257 return pad(self, left, marg - left, fillchar); 3258 } 3259 3260 PyDoc_STRVAR(zfill__doc__, 3261 "S.zfill(width) -> string\n" 3262 "\n" 3263 "Pad a numeric string S with zeros on the left, to fill a field\n" 3264 "of the specified width. The string S is never truncated."); 3265 3266 static PyObject * 3267 string_zfill(PyStringObject *self, PyObject *args) 3268 { 3269 Py_ssize_t fill; 3270 PyObject *s; 3271 char *p; 3272 Py_ssize_t width; 3273 3274 if (!PyArg_ParseTuple(args, "n:zfill", &width)) 3275 return NULL; 3276 3277 if (PyString_GET_SIZE(self) >= width) { 3278 if (PyString_CheckExact(self)) { 3279 Py_INCREF(self); 3280 return (PyObject*) self; 3281 } 3282 else 3283 return PyString_FromStringAndSize( 3284 PyString_AS_STRING(self), 3285 PyString_GET_SIZE(self) 3286 ); 3287 } 3288 3289 fill = width - PyString_GET_SIZE(self); 3290 3291 s = pad(self, fill, 0, '0'); 3292 3293 if (s == NULL) 3294 return NULL; 3295 3296 p = PyString_AS_STRING(s); 3297 if (p[fill] == '+' || p[fill] == '-') { 3298 /* move sign to beginning of string */ 3299 p[0] = p[fill]; 3300 p[fill] = '0'; 3301 } 3302 3303 return (PyObject*) s; 3304 } 3305 3306 PyDoc_STRVAR(isspace__doc__, 3307 "S.isspace() -> bool\n\ 3308 \n\ 3309 Return True if all characters in S are whitespace\n\ 3310 and there is at least one character in S, False otherwise."); 3311 3312 static PyObject* 3313 string_isspace(PyStringObject *self) 3314 { 3315 register const unsigned char *p 3316 = (unsigned char *) PyString_AS_STRING(self); 3317 register const unsigned char *e; 3318 3319 /* Shortcut for single character strings */ 3320 if (PyString_GET_SIZE(self) == 1 && 3321 isspace(*p)) 3322 return PyBool_FromLong(1); 3323 3324 /* Special case for empty strings */ 3325 if (PyString_GET_SIZE(self) == 0) 3326 return PyBool_FromLong(0); 3327 3328 e = p + PyString_GET_SIZE(self); 3329 for (; p < e; p++) { 3330 if (!isspace(*p)) 3331 return PyBool_FromLong(0); 3332 } 3333 return PyBool_FromLong(1); 3334 } 3335 3336 3337 PyDoc_STRVAR(isalpha__doc__, 3338 "S.isalpha() -> bool\n\ 3339 \n\ 3340 Return True if all characters in S are alphabetic\n\ 3341 and there is at least one character in S, False otherwise."); 3342 3343 static PyObject* 3344 string_isalpha(PyStringObject *self) 3345 { 3346 register const unsigned char *p 3347 = (unsigned char *) PyString_AS_STRING(self); 3348 register const unsigned char *e; 3349 3350 /* Shortcut for single character strings */ 3351 if (PyString_GET_SIZE(self) == 1 && 3352 isalpha(*p)) 3353 return PyBool_FromLong(1); 3354 3355 /* Special case for empty strings */ 3356 if (PyString_GET_SIZE(self) == 0) 3357 return PyBool_FromLong(0); 3358 3359 e = p + PyString_GET_SIZE(self); 3360 for (; p < e; p++) { 3361 if (!isalpha(*p)) 3362 return PyBool_FromLong(0); 3363 } 3364 return PyBool_FromLong(1); 3365 } 3366 3367 3368 PyDoc_STRVAR(isalnum__doc__, 3369 "S.isalnum() -> bool\n\ 3370 \n\ 3371 Return True if all characters in S are alphanumeric\n\ 3372 and there is at least one character in S, False otherwise."); 3373 3374 static PyObject* 3375 string_isalnum(PyStringObject *self) 3376 { 3377 register const unsigned char *p 3378 = (unsigned char *) PyString_AS_STRING(self); 3379 register const unsigned char *e; 3380 3381 /* Shortcut for single character strings */ 3382 if (PyString_GET_SIZE(self) == 1 && 3383 isalnum(*p)) 3384 return PyBool_FromLong(1); 3385 3386 /* Special case for empty strings */ 3387 if (PyString_GET_SIZE(self) == 0) 3388 return PyBool_FromLong(0); 3389 3390 e = p + PyString_GET_SIZE(self); 3391 for (; p < e; p++) { 3392 if (!isalnum(*p)) 3393 return PyBool_FromLong(0); 3394 } 3395 return PyBool_FromLong(1); 3396 } 3397 3398 3399 PyDoc_STRVAR(isdigit__doc__, 3400 "S.isdigit() -> bool\n\ 3401 \n\ 3402 Return True if all characters in S are digits\n\ 3403 and there is at least one character in S, False otherwise."); 3404 3405 static PyObject* 3406 string_isdigit(PyStringObject *self) 3407 { 3408 register const unsigned char *p 3409 = (unsigned char *) PyString_AS_STRING(self); 3410 register const unsigned char *e; 3411 3412 /* Shortcut for single character strings */ 3413 if (PyString_GET_SIZE(self) == 1 && 3414 isdigit(*p)) 3415 return PyBool_FromLong(1); 3416 3417 /* Special case for empty strings */ 3418 if (PyString_GET_SIZE(self) == 0) 3419 return PyBool_FromLong(0); 3420 3421 e = p + PyString_GET_SIZE(self); 3422 for (; p < e; p++) { 3423 if (!isdigit(*p)) 3424 return PyBool_FromLong(0); 3425 } 3426 return PyBool_FromLong(1); 3427 } 3428 3429 3430 PyDoc_STRVAR(islower__doc__, 3431 "S.islower() -> bool\n\ 3432 \n\ 3433 Return True if all cased characters in S are lowercase and there is\n\ 3434 at least one cased character in S, False otherwise."); 3435 3436 static PyObject* 3437 string_islower(PyStringObject *self) 3438 { 3439 register const unsigned char *p 3440 = (unsigned char *) PyString_AS_STRING(self); 3441 register const unsigned char *e; 3442 int cased; 3443 3444 /* Shortcut for single character strings */ 3445 if (PyString_GET_SIZE(self) == 1) 3446 return PyBool_FromLong(islower(*p) != 0); 3447 3448 /* Special case for empty strings */ 3449 if (PyString_GET_SIZE(self) == 0) 3450 return PyBool_FromLong(0); 3451 3452 e = p + PyString_GET_SIZE(self); 3453 cased = 0; 3454 for (; p < e; p++) { 3455 if (isupper(*p)) 3456 return PyBool_FromLong(0); 3457 else if (!cased && islower(*p)) 3458 cased = 1; 3459 } 3460 return PyBool_FromLong(cased); 3461 } 3462 3463 3464 PyDoc_STRVAR(isupper__doc__, 3465 "S.isupper() -> bool\n\ 3466 \n\ 3467 Return True if all cased characters in S are uppercase and there is\n\ 3468 at least one cased character in S, False otherwise."); 3469 3470 static PyObject* 3471 string_isupper(PyStringObject *self) 3472 { 3473 register const unsigned char *p 3474 = (unsigned char *) PyString_AS_STRING(self); 3475 register const unsigned char *e; 3476 int cased; 3477 3478 /* Shortcut for single character strings */ 3479 if (PyString_GET_SIZE(self) == 1) 3480 return PyBool_FromLong(isupper(*p) != 0); 3481 3482 /* Special case for empty strings */ 3483 if (PyString_GET_SIZE(self) == 0) 3484 return PyBool_FromLong(0); 3485 3486 e = p + PyString_GET_SIZE(self); 3487 cased = 0; 3488 for (; p < e; p++) { 3489 if (islower(*p)) 3490 return PyBool_FromLong(0); 3491 else if (!cased && isupper(*p)) 3492 cased = 1; 3493 } 3494 return PyBool_FromLong(cased); 3495 } 3496 3497 3498 PyDoc_STRVAR(istitle__doc__, 3499 "S.istitle() -> bool\n\ 3500 \n\ 3501 Return True if S is a titlecased string and there is at least one\n\ 3502 character in S, i.e. uppercase characters may only follow uncased\n\ 3503 characters and lowercase characters only cased ones. Return False\n\ 3504 otherwise."); 3505 3506 static PyObject* 3507 string_istitle(PyStringObject *self, PyObject *uncased) 3508 { 3509 register const unsigned char *p 3510 = (unsigned char *) PyString_AS_STRING(self); 3511 register const unsigned char *e; 3512 int cased, previous_is_cased; 3513 3514 /* Shortcut for single character strings */ 3515 if (PyString_GET_SIZE(self) == 1) 3516 return PyBool_FromLong(isupper(*p) != 0); 3517 3518 /* Special case for empty strings */ 3519 if (PyString_GET_SIZE(self) == 0) 3520 return PyBool_FromLong(0); 3521 3522 e = p + PyString_GET_SIZE(self); 3523 cased = 0; 3524 previous_is_cased = 0; 3525 for (; p < e; p++) { 3526 register const unsigned char ch = *p; 3527 3528 if (isupper(ch)) { 3529 if (previous_is_cased) 3530 return PyBool_FromLong(0); 3531 previous_is_cased = 1; 3532 cased = 1; 3533 } 3534 else if (islower(ch)) { 3535 if (!previous_is_cased) 3536 return PyBool_FromLong(0); 3537 previous_is_cased = 1; 3538 cased = 1; 3539 } 3540 else 3541 previous_is_cased = 0; 3542 } 3543 return PyBool_FromLong(cased); 3544 } 3545 3546 3547 PyDoc_STRVAR(splitlines__doc__, 3548 "S.splitlines([keepends]) -> list of strings\n\ 3549 \n\ 3550 Return a list of the lines in S, breaking at line boundaries.\n\ 3551 Line breaks are not included in the resulting list unless keepends\n\ 3552 is given and true."); 3553 3554 static PyObject* 3555 string_splitlines(PyStringObject *self, PyObject *args) 3556 { 3557 int keepends = 0; 3558 3559 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) 3560 return NULL; 3561 3562 return stringlib_splitlines( 3563 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self), 3564 keepends 3565 ); 3566 } 3567 3568 PyDoc_STRVAR(sizeof__doc__, 3569 "S.__sizeof__() -> size of S in memory, in bytes"); 3570 3571 static PyObject * 3572 string_sizeof(PyStringObject *v) 3573 { 3574 Py_ssize_t res; 3575 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize; 3576 return PyInt_FromSsize_t(res); 3577 } 3578 3579 static PyObject * 3580 string_getnewargs(PyStringObject *v) 3581 { 3582 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v)); 3583 } 3584 3585 3586 #include "stringlib/string_format.h" 3587 3588 PyDoc_STRVAR(format__doc__, 3589 "S.format(*args, **kwargs) -> string\n\ 3590 \n\ 3591 Return a formatted version of S, using substitutions from args and kwargs.\n\ 3592 The substitutions are identified by braces ('{' and '}')."); 3593 3594 static PyObject * 3595 string__format__(PyObject* self, PyObject* args) 3596 { 3597 PyObject *format_spec; 3598 PyObject *result = NULL; 3599 PyObject *tmp = NULL; 3600 3601 /* If 2.x, convert format_spec to the same type as value */ 3602 /* This is to allow things like u''.format('') */ 3603 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) 3604 goto done; 3605 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) { 3606 PyErr_Format(PyExc_TypeError, "__format__ arg must be str " 3607 "or unicode, not %s", Py_TYPE(format_spec)->tp_name); 3608 goto done; 3609 } 3610 tmp = PyObject_Str(format_spec); 3611 if (tmp == NULL) 3612 goto done; 3613 format_spec = tmp; 3614 3615 result = _PyBytes_FormatAdvanced(self, 3616 PyString_AS_STRING(format_spec), 3617 PyString_GET_SIZE(format_spec)); 3618 done: 3619 Py_XDECREF(tmp); 3620 return result; 3621 } 3622 3623 PyDoc_STRVAR(p_format__doc__, 3624 "S.__format__(format_spec) -> string\n\ 3625 \n\ 3626 Return a formatted version of S as described by format_spec."); 3627 3628 3629 static PyMethodDef 3630 string_methods[] = { 3631 /* Counterparts of the obsolete stropmodule functions; except 3632 string.maketrans(). */ 3633 {"join", (PyCFunction)string_join, METH_O, join__doc__}, 3634 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__}, 3635 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__}, 3636 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__}, 3637 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__}, 3638 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__}, 3639 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__}, 3640 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__}, 3641 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__}, 3642 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__}, 3643 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__}, 3644 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__}, 3645 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, 3646 capitalize__doc__}, 3647 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__}, 3648 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, 3649 endswith__doc__}, 3650 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__}, 3651 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__}, 3652 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__}, 3653 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__}, 3654 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__}, 3655 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__}, 3656 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__}, 3657 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__}, 3658 {"rpartition", (PyCFunction)string_rpartition, METH_O, 3659 rpartition__doc__}, 3660 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, 3661 startswith__doc__}, 3662 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__}, 3663 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, 3664 swapcase__doc__}, 3665 {"translate", (PyCFunction)string_translate, METH_VARARGS, 3666 translate__doc__}, 3667 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__}, 3668 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__}, 3669 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__}, 3670 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__}, 3671 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__}, 3672 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, 3673 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__}, 3674 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS}, 3675 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS}, 3676 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, 3677 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__}, 3678 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, 3679 expandtabs__doc__}, 3680 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, 3681 splitlines__doc__}, 3682 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS, 3683 sizeof__doc__}, 3684 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS}, 3685 {NULL, NULL} /* sentinel */ 3686 }; 3687 3688 static PyObject * 3689 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); 3690 3691 static PyObject * 3692 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 3693 { 3694 PyObject *x = NULL; 3695 static char *kwlist[] = {"object", 0}; 3696 3697 if (type != &PyString_Type) 3698 return str_subtype_new(type, args, kwds); 3699 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x)) 3700 return NULL; 3701 if (x == NULL) 3702 return PyString_FromString(""); 3703 return PyObject_Str(x); 3704 } 3705 3706 static PyObject * 3707 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 3708 { 3709 PyObject *tmp, *pnew; 3710 Py_ssize_t n; 3711 3712 assert(PyType_IsSubtype(type, &PyString_Type)); 3713 tmp = string_new(&PyString_Type, args, kwds); 3714 if (tmp == NULL) 3715 return NULL; 3716 assert(PyString_CheckExact(tmp)); 3717 n = PyString_GET_SIZE(tmp); 3718 pnew = type->tp_alloc(type, n); 3719 if (pnew != NULL) { 3720 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); 3721 ((PyStringObject *)pnew)->ob_shash = 3722 ((PyStringObject *)tmp)->ob_shash; 3723 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; 3724 } 3725 Py_DECREF(tmp); 3726 return pnew; 3727 } 3728 3729 static PyObject * 3730 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 3731 { 3732 PyErr_SetString(PyExc_TypeError, 3733 "The basestring type cannot be instantiated"); 3734 return NULL; 3735 } 3736 3737 static PyObject * 3738 string_mod(PyObject *v, PyObject *w) 3739 { 3740 if (!PyString_Check(v)) { 3741 Py_INCREF(Py_NotImplemented); 3742 return Py_NotImplemented; 3743 } 3744 return PyString_Format(v, w); 3745 } 3746 3747 PyDoc_STRVAR(basestring_doc, 3748 "Type basestring cannot be instantiated; it is the base for str and unicode."); 3749 3750 static PyNumberMethods string_as_number = { 3751 0, /*nb_add*/ 3752 0, /*nb_subtract*/ 3753 0, /*nb_multiply*/ 3754 0, /*nb_divide*/ 3755 string_mod, /*nb_remainder*/ 3756 }; 3757 3758 3759 PyTypeObject PyBaseString_Type = { 3760 PyVarObject_HEAD_INIT(&PyType_Type, 0) 3761 "basestring", 3762 0, 3763 0, 3764 0, /* tp_dealloc */ 3765 0, /* tp_print */ 3766 0, /* tp_getattr */ 3767 0, /* tp_setattr */ 3768 0, /* tp_compare */ 3769 0, /* tp_repr */ 3770 0, /* tp_as_number */ 3771 0, /* tp_as_sequence */ 3772 0, /* tp_as_mapping */ 3773 0, /* tp_hash */ 3774 0, /* tp_call */ 3775 0, /* tp_str */ 3776 0, /* tp_getattro */ 3777 0, /* tp_setattro */ 3778 0, /* tp_as_buffer */ 3779 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ 3780 basestring_doc, /* tp_doc */ 3781 0, /* tp_traverse */ 3782 0, /* tp_clear */ 3783 0, /* tp_richcompare */ 3784 0, /* tp_weaklistoffset */ 3785 0, /* tp_iter */ 3786 0, /* tp_iternext */ 3787 0, /* tp_methods */ 3788 0, /* tp_members */ 3789 0, /* tp_getset */ 3790 &PyBaseObject_Type, /* tp_base */ 3791 0, /* tp_dict */ 3792 0, /* tp_descr_get */ 3793 0, /* tp_descr_set */ 3794 0, /* tp_dictoffset */ 3795 0, /* tp_init */ 3796 0, /* tp_alloc */ 3797 basestring_new, /* tp_new */ 3798 0, /* tp_free */ 3799 }; 3800 3801 PyDoc_STRVAR(string_doc, 3802 "str(object) -> string\n\ 3803 \n\ 3804 Return a nice string representation of the object.\n\ 3805 If the argument is a string, the return value is the same object."); 3806 3807 PyTypeObject PyString_Type = { 3808 PyVarObject_HEAD_INIT(&PyType_Type, 0) 3809 "str", 3810 PyStringObject_SIZE, 3811 sizeof(char), 3812 string_dealloc, /* tp_dealloc */ 3813 (printfunc)string_print, /* tp_print */ 3814 0, /* tp_getattr */ 3815 0, /* tp_setattr */ 3816 0, /* tp_compare */ 3817 string_repr, /* tp_repr */ 3818 &string_as_number, /* tp_as_number */ 3819 &string_as_sequence, /* tp_as_sequence */ 3820 &string_as_mapping, /* tp_as_mapping */ 3821 (hashfunc)string_hash, /* tp_hash */ 3822 0, /* tp_call */ 3823 string_str, /* tp_str */ 3824 PyObject_GenericGetAttr, /* tp_getattro */ 3825 0, /* tp_setattro */ 3826 &string_as_buffer, /* tp_as_buffer */ 3827 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES | 3828 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS | 3829 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */ 3830 string_doc, /* tp_doc */ 3831 0, /* tp_traverse */ 3832 0, /* tp_clear */ 3833 (richcmpfunc)string_richcompare, /* tp_richcompare */ 3834 0, /* tp_weaklistoffset */ 3835 0, /* tp_iter */ 3836 0, /* tp_iternext */ 3837 string_methods, /* tp_methods */ 3838 0, /* tp_members */ 3839 0, /* tp_getset */ 3840 &PyBaseString_Type, /* tp_base */ 3841 0, /* tp_dict */ 3842 0, /* tp_descr_get */ 3843 0, /* tp_descr_set */ 3844 0, /* tp_dictoffset */ 3845 0, /* tp_init */ 3846 0, /* tp_alloc */ 3847 string_new, /* tp_new */ 3848 PyObject_Del, /* tp_free */ 3849 }; 3850 3851 void 3852 PyString_Concat(register PyObject **pv, register PyObject *w) 3853 { 3854 register PyObject *v; 3855 if (*pv == NULL) 3856 return; 3857 if (w == NULL || !PyString_Check(*pv)) { 3858 Py_DECREF(*pv); 3859 *pv = NULL; 3860 return; 3861 } 3862 v = string_concat((PyStringObject *) *pv, w); 3863 Py_DECREF(*pv); 3864 *pv = v; 3865 } 3866 3867 void 3868 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w) 3869 { 3870 PyString_Concat(pv, w); 3871 Py_XDECREF(w); 3872 } 3873 3874 3875 /* The following function breaks the notion that strings are immutable: 3876 it changes the size of a string. We get away with this only if there 3877 is only one module referencing the object. You can also think of it 3878 as creating a new string object and destroying the old one, only 3879 more efficiently. In any case, don't use this if the string may 3880 already be known to some other part of the code... 3881 Note that if there's not enough memory to resize the string, the original 3882 string object at *pv is deallocated, *pv is set to NULL, an "out of 3883 memory" exception is set, and -1 is returned. Else (on success) 0 is 3884 returned, and the value in *pv may or may not be the same as on input. 3885 As always, an extra byte is allocated for a trailing \0 byte (newsize 3886 does *not* include that), and a trailing \0 byte is stored. 3887 */ 3888 3889 int 3890 _PyString_Resize(PyObject **pv, Py_ssize_t newsize) 3891 { 3892 register PyObject *v; 3893 register PyStringObject *sv; 3894 v = *pv; 3895 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 || 3896 PyString_CHECK_INTERNED(v)) { 3897 *pv = 0; 3898 Py_DECREF(v); 3899 PyErr_BadInternalCall(); 3900 return -1; 3901 } 3902 /* XXX UNREF/NEWREF interface should be more symmetrical */ 3903 _Py_DEC_REFTOTAL; 3904 _Py_ForgetReference(v); 3905 *pv = (PyObject *) 3906 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize); 3907 if (*pv == NULL) { 3908 PyObject_Del(v); 3909 PyErr_NoMemory(); 3910 return -1; 3911 } 3912 _Py_NewReference(*pv); 3913 sv = (PyStringObject *) *pv; 3914 Py_SIZE(sv) = newsize; 3915 sv->ob_sval[newsize] = '\0'; 3916 sv->ob_shash = -1; /* invalidate cached hash value */ 3917 return 0; 3918 } 3919 3920 /* Helpers for formatstring */ 3921 3922 Py_LOCAL_INLINE(PyObject *) 3923 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) 3924 { 3925 Py_ssize_t argidx = *p_argidx; 3926 if (argidx < arglen) { 3927 (*p_argidx)++; 3928 if (arglen < 0) 3929 return args; 3930 else 3931 return PyTuple_GetItem(args, argidx); 3932 } 3933 PyErr_SetString(PyExc_TypeError, 3934 "not enough arguments for format string"); 3935 return NULL; 3936 } 3937 3938 /* Format codes 3939 * F_LJUST '-' 3940 * F_SIGN '+' 3941 * F_BLANK ' ' 3942 * F_ALT '#' 3943 * F_ZERO '0' 3944 */ 3945 #define F_LJUST (1<<0) 3946 #define F_SIGN (1<<1) 3947 #define F_BLANK (1<<2) 3948 #define F_ALT (1<<3) 3949 #define F_ZERO (1<<4) 3950 3951 /* Returns a new reference to a PyString object, or NULL on failure. */ 3952 3953 static PyObject * 3954 formatfloat(PyObject *v, int flags, int prec, int type) 3955 { 3956 char *p; 3957 PyObject *result; 3958 double x; 3959 3960 x = PyFloat_AsDouble(v); 3961 if (x == -1.0 && PyErr_Occurred()) { 3962 PyErr_Format(PyExc_TypeError, "float argument required, " 3963 "not %.200s", Py_TYPE(v)->tp_name); 3964 return NULL; 3965 } 3966 3967 if (prec < 0) 3968 prec = 6; 3969 3970 p = PyOS_double_to_string(x, type, prec, 3971 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); 3972 3973 if (p == NULL) 3974 return NULL; 3975 result = PyString_FromStringAndSize(p, strlen(p)); 3976 PyMem_Free(p); 3977 return result; 3978 } 3979 3980 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and 3981 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for 3982 * Python's regular ints. 3983 * Return value: a new PyString*, or NULL if error. 3984 * . *pbuf is set to point into it, 3985 * *plen set to the # of chars following that. 3986 * Caller must decref it when done using pbuf. 3987 * The string starting at *pbuf is of the form 3988 * "-"? ("0x" | "0X")? digit+ 3989 * "0x"/"0X" are present only for x and X conversions, with F_ALT 3990 * set in flags. The case of hex digits will be correct, 3991 * There will be at least prec digits, zero-filled on the left if 3992 * necessary to get that many. 3993 * val object to be converted 3994 * flags bitmask of format flags; only F_ALT is looked at 3995 * prec minimum number of digits; 0-fill on left if needed 3996 * type a character in [duoxX]; u acts the same as d 3997 * 3998 * CAUTION: o, x and X conversions on regular ints can never 3999 * produce a '-' sign, but can for Python's unbounded ints. 4000 */ 4001 PyObject* 4002 _PyString_FormatLong(PyObject *val, int flags, int prec, int type, 4003 char **pbuf, int *plen) 4004 { 4005 PyObject *result = NULL; 4006 char *buf; 4007 Py_ssize_t i; 4008 int sign; /* 1 if '-', else 0 */ 4009 int len; /* number of characters */ 4010 Py_ssize_t llen; 4011 int numdigits; /* len == numnondigits + numdigits */ 4012 int numnondigits = 0; 4013 4014 switch (type) { 4015 case 'd': 4016 case 'u': 4017 result = Py_TYPE(val)->tp_str(val); 4018 break; 4019 case 'o': 4020 result = Py_TYPE(val)->tp_as_number->nb_oct(val); 4021 break; 4022 case 'x': 4023 case 'X': 4024 numnondigits = 2; 4025 result = Py_TYPE(val)->tp_as_number->nb_hex(val); 4026 break; 4027 default: 4028 assert(!"'type' not in [duoxX]"); 4029 } 4030 if (!result) 4031 return NULL; 4032 4033 buf = PyString_AsString(result); 4034 if (!buf) { 4035 Py_DECREF(result); 4036 return NULL; 4037 } 4038 4039 /* To modify the string in-place, there can only be one reference. */ 4040 if (Py_REFCNT(result) != 1) { 4041 PyErr_BadInternalCall(); 4042 return NULL; 4043 } 4044 llen = PyString_Size(result); 4045 if (llen > INT_MAX) { 4046 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong"); 4047 return NULL; 4048 } 4049 len = (int)llen; 4050 if (buf[len-1] == 'L') { 4051 --len; 4052 buf[len] = '\0'; 4053 } 4054 sign = buf[0] == '-'; 4055 numnondigits += sign; 4056 numdigits = len - numnondigits; 4057 assert(numdigits > 0); 4058 4059 /* Get rid of base marker unless F_ALT */ 4060 if ((flags & F_ALT) == 0) { 4061 /* Need to skip 0x, 0X or 0. */ 4062 int skipped = 0; 4063 switch (type) { 4064 case 'o': 4065 assert(buf[sign] == '0'); 4066 /* If 0 is only digit, leave it alone. */ 4067 if (numdigits > 1) { 4068 skipped = 1; 4069 --numdigits; 4070 } 4071 break; 4072 case 'x': 4073 case 'X': 4074 assert(buf[sign] == '0'); 4075 assert(buf[sign + 1] == 'x'); 4076 skipped = 2; 4077 numnondigits -= 2; 4078 break; 4079 } 4080 if (skipped) { 4081 buf += skipped; 4082 len -= skipped; 4083 if (sign) 4084 buf[0] = '-'; 4085 } 4086 assert(len == numnondigits + numdigits); 4087 assert(numdigits > 0); 4088 } 4089 4090 /* Fill with leading zeroes to meet minimum width. */ 4091 if (prec > numdigits) { 4092 PyObject *r1 = PyString_FromStringAndSize(NULL, 4093 numnondigits + prec); 4094 char *b1; 4095 if (!r1) { 4096 Py_DECREF(result); 4097 return NULL; 4098 } 4099 b1 = PyString_AS_STRING(r1); 4100 for (i = 0; i < numnondigits; ++i) 4101 *b1++ = *buf++; 4102 for (i = 0; i < prec - numdigits; i++) 4103 *b1++ = '0'; 4104 for (i = 0; i < numdigits; i++) 4105 *b1++ = *buf++; 4106 *b1 = '\0'; 4107 Py_DECREF(result); 4108 result = r1; 4109 buf = PyString_AS_STRING(result); 4110 len = numnondigits + prec; 4111 } 4112 4113 /* Fix up case for hex conversions. */ 4114 if (type == 'X') { 4115 /* Need to convert all lower case letters to upper case. 4116 and need to convert 0x to 0X (and -0x to -0X). */ 4117 for (i = 0; i < len; i++) 4118 if (buf[i] >= 'a' && buf[i] <= 'x') 4119 buf[i] -= 'a'-'A'; 4120 } 4121 *pbuf = buf; 4122 *plen = len; 4123 return result; 4124 } 4125 4126 Py_LOCAL_INLINE(int) 4127 formatint(char *buf, size_t buflen, int flags, 4128 int prec, int type, PyObject *v) 4129 { 4130 /* fmt = '%#.' + `prec` + 'l' + `type` 4131 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine) 4132 + 1 + 1 = 24 */ 4133 char fmt[64]; /* plenty big enough! */ 4134 char *sign; 4135 long x; 4136 4137 x = PyInt_AsLong(v); 4138 if (x == -1 && PyErr_Occurred()) { 4139 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s", 4140 Py_TYPE(v)->tp_name); 4141 return -1; 4142 } 4143 if (x < 0 && type == 'u') { 4144 type = 'd'; 4145 } 4146 if (x < 0 && (type == 'x' || type == 'X' || type == 'o')) 4147 sign = "-"; 4148 else 4149 sign = ""; 4150 if (prec < 0) 4151 prec = 1; 4152 4153 if ((flags & F_ALT) && 4154 (type == 'x' || type == 'X')) { 4155 /* When converting under %#x or %#X, there are a number 4156 * of issues that cause pain: 4157 * - when 0 is being converted, the C standard leaves off 4158 * the '0x' or '0X', which is inconsistent with other 4159 * %#x/%#X conversions and inconsistent with Python's 4160 * hex() function 4161 * - there are platforms that violate the standard and 4162 * convert 0 with the '0x' or '0X' 4163 * (Metrowerks, Compaq Tru64) 4164 * - there are platforms that give '0x' when converting 4165 * under %#X, but convert 0 in accordance with the 4166 * standard (OS/2 EMX) 4167 * 4168 * We can achieve the desired consistency by inserting our 4169 * own '0x' or '0X' prefix, and substituting %x/%X in place 4170 * of %#x/%#X. 4171 * 4172 * Note that this is the same approach as used in 4173 * formatint() in unicodeobject.c 4174 */ 4175 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c", 4176 sign, type, prec, type); 4177 } 4178 else { 4179 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c", 4180 sign, (flags&F_ALT) ? "#" : "", 4181 prec, type); 4182 } 4183 4184 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal)) 4185 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11 4186 */ 4187 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) { 4188 PyErr_SetString(PyExc_OverflowError, 4189 "formatted integer is too long (precision too large?)"); 4190 return -1; 4191 } 4192 if (sign[0]) 4193 PyOS_snprintf(buf, buflen, fmt, -x); 4194 else 4195 PyOS_snprintf(buf, buflen, fmt, x); 4196 return (int)strlen(buf); 4197 } 4198 4199 Py_LOCAL_INLINE(int) 4200 formatchar(char *buf, size_t buflen, PyObject *v) 4201 { 4202 /* presume that the buffer is at least 2 characters long */ 4203 if (PyString_Check(v)) { 4204 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0])) 4205 return -1; 4206 } 4207 else { 4208 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0])) 4209 return -1; 4210 } 4211 buf[1] = '\0'; 4212 return 1; 4213 } 4214 4215 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) 4216 4217 FORMATBUFLEN is the length of the buffer in which the ints & 4218 chars are formatted. XXX This is a magic number. Each formatting 4219 routine does bounds checking to ensure no overflow, but a better 4220 solution may be to malloc a buffer of appropriate size for each 4221 format. For now, the current solution is sufficient. 4222 */ 4223 #define FORMATBUFLEN (size_t)120 4224 4225 PyObject * 4226 PyString_Format(PyObject *format, PyObject *args) 4227 { 4228 char *fmt, *res; 4229 Py_ssize_t arglen, argidx; 4230 Py_ssize_t reslen, rescnt, fmtcnt; 4231 int args_owned = 0; 4232 PyObject *result, *orig_args; 4233 #ifdef Py_USING_UNICODE 4234 PyObject *v, *w; 4235 #endif 4236 PyObject *dict = NULL; 4237 if (format == NULL || !PyString_Check(format) || args == NULL) { 4238 PyErr_BadInternalCall(); 4239 return NULL; 4240 } 4241 orig_args = args; 4242 fmt = PyString_AS_STRING(format); 4243 fmtcnt = PyString_GET_SIZE(format); 4244 reslen = rescnt = fmtcnt + 100; 4245 result = PyString_FromStringAndSize((char *)NULL, reslen); 4246 if (result == NULL) 4247 return NULL; 4248 res = PyString_AsString(result); 4249 if (PyTuple_Check(args)) { 4250 arglen = PyTuple_GET_SIZE(args); 4251 argidx = 0; 4252 } 4253 else { 4254 arglen = -1; 4255 argidx = -2; 4256 } 4257 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) && 4258 !PyObject_TypeCheck(args, &PyBaseString_Type)) 4259 dict = args; 4260 while (--fmtcnt >= 0) { 4261 if (*fmt != '%') { 4262 if (--rescnt < 0) { 4263 rescnt = fmtcnt + 100; 4264 reslen += rescnt; 4265 if (_PyString_Resize(&result, reslen)) 4266 return NULL; 4267 res = PyString_AS_STRING(result) 4268 + reslen - rescnt; 4269 --rescnt; 4270 } 4271 *res++ = *fmt++; 4272 } 4273 else { 4274 /* Got a format specifier */ 4275 int flags = 0; 4276 Py_ssize_t width = -1; 4277 int prec = -1; 4278 int c = '\0'; 4279 int fill; 4280 int isnumok; 4281 PyObject *v = NULL; 4282 PyObject *temp = NULL; 4283 char *pbuf; 4284 int sign; 4285 Py_ssize_t len; 4286 char formatbuf[FORMATBUFLEN]; 4287 /* For format{int,char}() */ 4288 #ifdef Py_USING_UNICODE 4289 char *fmt_start = fmt; 4290 Py_ssize_t argidx_start = argidx; 4291 #endif 4292 4293 fmt++; 4294 if (*fmt == '(') { 4295 char *keystart; 4296 Py_ssize_t keylen; 4297 PyObject *key; 4298 int pcount = 1; 4299 4300 if (dict == NULL) { 4301 PyErr_SetString(PyExc_TypeError, 4302 "format requires a mapping"); 4303 goto error; 4304 } 4305 ++fmt; 4306 --fmtcnt; 4307 keystart = fmt; 4308 /* Skip over balanced parentheses */ 4309 while (pcount > 0 && --fmtcnt >= 0) { 4310 if (*fmt == ')') 4311 --pcount; 4312 else if (*fmt == '(') 4313 ++pcount; 4314 fmt++; 4315 } 4316 keylen = fmt - keystart - 1; 4317 if (fmtcnt < 0 || pcount > 0) { 4318 PyErr_SetString(PyExc_ValueError, 4319 "incomplete format key"); 4320 goto error; 4321 } 4322 key = PyString_FromStringAndSize(keystart, 4323 keylen); 4324 if (key == NULL) 4325 goto error; 4326 if (args_owned) { 4327 Py_DECREF(args); 4328 args_owned = 0; 4329 } 4330 args = PyObject_GetItem(dict, key); 4331 Py_DECREF(key); 4332 if (args == NULL) { 4333 goto error; 4334 } 4335 args_owned = 1; 4336 arglen = -1; 4337 argidx = -2; 4338 } 4339 while (--fmtcnt >= 0) { 4340 switch (c = *fmt++) { 4341 case '-': flags |= F_LJUST; continue; 4342 case '+': flags |= F_SIGN; continue; 4343 case ' ': flags |= F_BLANK; continue; 4344 case '#': flags |= F_ALT; continue; 4345 case '0': flags |= F_ZERO; continue; 4346 } 4347 break; 4348 } 4349 if (c == '*') { 4350 v = getnextarg(args, arglen, &argidx); 4351 if (v == NULL) 4352 goto error; 4353 if (!PyInt_Check(v)) { 4354 PyErr_SetString(PyExc_TypeError, 4355 "* wants int"); 4356 goto error; 4357 } 4358 width = PyInt_AsLong(v); 4359 if (width < 0) { 4360 flags |= F_LJUST; 4361 width = -width; 4362 } 4363 if (--fmtcnt >= 0) 4364 c = *fmt++; 4365 } 4366 else if (c >= 0 && isdigit(c)) { 4367 width = c - '0'; 4368 while (--fmtcnt >= 0) { 4369 c = Py_CHARMASK(*fmt++); 4370 if (!isdigit(c)) 4371 break; 4372 if ((width*10) / 10 != width) { 4373 PyErr_SetString( 4374 PyExc_ValueError, 4375 "width too big"); 4376 goto error; 4377 } 4378 width = width*10 + (c - '0'); 4379 } 4380 } 4381 if (c == '.') { 4382 prec = 0; 4383 if (--fmtcnt >= 0) 4384 c = *fmt++; 4385 if (c == '*') { 4386 v = getnextarg(args, arglen, &argidx); 4387 if (v == NULL) 4388 goto error; 4389 if (!PyInt_Check(v)) { 4390 PyErr_SetString( 4391 PyExc_TypeError, 4392 "* wants int"); 4393 goto error; 4394 } 4395 prec = PyInt_AsLong(v); 4396 if (prec < 0) 4397 prec = 0; 4398 if (--fmtcnt >= 0) 4399 c = *fmt++; 4400 } 4401 else if (c >= 0 && isdigit(c)) { 4402 prec = c - '0'; 4403 while (--fmtcnt >= 0) { 4404 c = Py_CHARMASK(*fmt++); 4405 if (!isdigit(c)) 4406 break; 4407 if ((prec*10) / 10 != prec) { 4408 PyErr_SetString( 4409 PyExc_ValueError, 4410 "prec too big"); 4411 goto error; 4412 } 4413 prec = prec*10 + (c - '0'); 4414 } 4415 } 4416 } /* prec */ 4417 if (fmtcnt >= 0) { 4418 if (c == 'h' || c == 'l' || c == 'L') { 4419 if (--fmtcnt >= 0) 4420 c = *fmt++; 4421 } 4422 } 4423 if (fmtcnt < 0) { 4424 PyErr_SetString(PyExc_ValueError, 4425 "incomplete format"); 4426 goto error; 4427 } 4428 if (c != '%') { 4429 v = getnextarg(args, arglen, &argidx); 4430 if (v == NULL) 4431 goto error; 4432 } 4433 sign = 0; 4434 fill = ' '; 4435 switch (c) { 4436 case '%': 4437 pbuf = "%"; 4438 len = 1; 4439 break; 4440 case 's': 4441 #ifdef Py_USING_UNICODE 4442 if (PyUnicode_Check(v)) { 4443 fmt = fmt_start; 4444 argidx = argidx_start; 4445 goto unicode; 4446 } 4447 #endif 4448 temp = _PyObject_Str(v); 4449 #ifdef Py_USING_UNICODE 4450 if (temp != NULL && PyUnicode_Check(temp)) { 4451 Py_DECREF(temp); 4452 fmt = fmt_start; 4453 argidx = argidx_start; 4454 goto unicode; 4455 } 4456 #endif 4457 /* Fall through */ 4458 case 'r': 4459 if (c == 'r') 4460 temp = PyObject_Repr(v); 4461 if (temp == NULL) 4462 goto error; 4463 if (!PyString_Check(temp)) { 4464 PyErr_SetString(PyExc_TypeError, 4465 "%s argument has non-string str()"); 4466 Py_DECREF(temp); 4467 goto error; 4468 } 4469 pbuf = PyString_AS_STRING(temp); 4470 len = PyString_GET_SIZE(temp); 4471 if (prec >= 0 && len > prec) 4472 len = prec; 4473 break; 4474 case 'i': 4475 case 'd': 4476 case 'u': 4477 case 'o': 4478 case 'x': 4479 case 'X': 4480 if (c == 'i') 4481 c = 'd'; 4482 isnumok = 0; 4483 if (PyNumber_Check(v)) { 4484 PyObject *iobj=NULL; 4485 4486 if (PyInt_Check(v) || (PyLong_Check(v))) { 4487 iobj = v; 4488 Py_INCREF(iobj); 4489 } 4490 else { 4491 iobj = PyNumber_Int(v); 4492 if (iobj==NULL) iobj = PyNumber_Long(v); 4493 } 4494 if (iobj!=NULL) { 4495 if (PyInt_Check(iobj)) { 4496 isnumok = 1; 4497 pbuf = formatbuf; 4498 len = formatint(pbuf, 4499 sizeof(formatbuf), 4500 flags, prec, c, iobj); 4501 Py_DECREF(iobj); 4502 if (len < 0) 4503 goto error; 4504 sign = 1; 4505 } 4506 else if (PyLong_Check(iobj)) { 4507 int ilen; 4508 4509 isnumok = 1; 4510 temp = _PyString_FormatLong(iobj, flags, 4511 prec, c, &pbuf, &ilen); 4512 Py_DECREF(iobj); 4513 len = ilen; 4514 if (!temp) 4515 goto error; 4516 sign = 1; 4517 } 4518 else { 4519 Py_DECREF(iobj); 4520 } 4521 } 4522 } 4523 if (!isnumok) { 4524 PyErr_Format(PyExc_TypeError, 4525 "%%%c format: a number is required, " 4526 "not %.200s", c, Py_TYPE(v)->tp_name); 4527 goto error; 4528 } 4529 if (flags & F_ZERO) 4530 fill = '0'; 4531 break; 4532 case 'e': 4533 case 'E': 4534 case 'f': 4535 case 'F': 4536 case 'g': 4537 case 'G': 4538 temp = formatfloat(v, flags, prec, c); 4539 if (temp == NULL) 4540 goto error; 4541 pbuf = PyString_AS_STRING(temp); 4542 len = PyString_GET_SIZE(temp); 4543 sign = 1; 4544 if (flags & F_ZERO) 4545 fill = '0'; 4546 break; 4547 case 'c': 4548 #ifdef Py_USING_UNICODE 4549 if (PyUnicode_Check(v)) { 4550 fmt = fmt_start; 4551 argidx = argidx_start; 4552 goto unicode; 4553 } 4554 #endif 4555 pbuf = formatbuf; 4556 len = formatchar(pbuf, sizeof(formatbuf), v); 4557 if (len < 0) 4558 goto error; 4559 break; 4560 default: 4561 PyErr_Format(PyExc_ValueError, 4562 "unsupported format character '%c' (0x%x) " 4563 "at index %zd", 4564 c, c, 4565 (Py_ssize_t)(fmt - 1 - 4566 PyString_AsString(format))); 4567 goto error; 4568 } 4569 if (sign) { 4570 if (*pbuf == '-' || *pbuf == '+') { 4571 sign = *pbuf++; 4572 len--; 4573 } 4574 else if (flags & F_SIGN) 4575 sign = '+'; 4576 else if (flags & F_BLANK) 4577 sign = ' '; 4578 else 4579 sign = 0; 4580 } 4581 if (width < len) 4582 width = len; 4583 if (rescnt - (sign != 0) < width) { 4584 reslen -= rescnt; 4585 rescnt = width + fmtcnt + 100; 4586 reslen += rescnt; 4587 if (reslen < 0) { 4588 Py_DECREF(result); 4589 Py_XDECREF(temp); 4590 return PyErr_NoMemory(); 4591 } 4592 if (_PyString_Resize(&result, reslen)) { 4593 Py_XDECREF(temp); 4594 return NULL; 4595 } 4596 res = PyString_AS_STRING(result) 4597 + reslen - rescnt; 4598 } 4599 if (sign) { 4600 if (fill != ' ') 4601 *res++ = sign; 4602 rescnt--; 4603 if (width > len) 4604 width--; 4605 } 4606 if ((flags & F_ALT) && (c == 'x' || c == 'X')) { 4607 assert(pbuf[0] == '0'); 4608 assert(pbuf[1] == c); 4609 if (fill != ' ') { 4610 *res++ = *pbuf++; 4611 *res++ = *pbuf++; 4612 } 4613 rescnt -= 2; 4614 width -= 2; 4615 if (width < 0) 4616 width = 0; 4617 len -= 2; 4618 } 4619 if (width > len && !(flags & F_LJUST)) { 4620 do { 4621 --rescnt; 4622 *res++ = fill;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

4623 } while (--width > len); 4624 } 4625 if (fill == ' ') { 4626 if (sign) 4627 *res++ = sign;
Dereference of null pointer
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

4628 if ((flags & F_ALT) && 4629 (c == 'x' || c == 'X')) { 4630 assert(pbuf[0] == '0'); 4631 assert(pbuf[1] == c); 4632 *res++ = *pbuf++; 4633 *res++ = *pbuf++; 4634 } 4635 } 4636 Py_MEMCPY(res, pbuf, len); 4637 res += len; 4638 rescnt -= len; 4639 while (--width >= len) { 4640 --rescnt; 4641 *res++ = ' '; 4642 } 4643 if (dict && (argidx < arglen) && c != '%') { 4644 PyErr_SetString(PyExc_TypeError, 4645 "not all arguments converted during string formatting"); 4646 Py_XDECREF(temp); 4647 goto error; 4648 } 4649 Py_XDECREF(temp); 4650 } /* '%' */ 4651 } /* until end */ 4652 if (argidx < arglen && !dict) { 4653 PyErr_SetString(PyExc_TypeError, 4654 "not all arguments converted during string formatting"); 4655 goto error; 4656 } 4657 if (args_owned) { 4658 Py_DECREF(args); 4659 } 4660 if (_PyString_Resize(&result, reslen - rescnt)) 4661 return NULL; 4662 return result; 4663 4664 #ifdef Py_USING_UNICODE 4665 unicode: 4666 if (args_owned) { 4667 Py_DECREF(args); 4668 args_owned = 0; 4669 } 4670 /* Fiddle args right (remove the first argidx arguments) */ 4671 if (PyTuple_Check(orig_args) && argidx > 0) { 4672 PyObject *v; 4673 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx; 4674 v = PyTuple_New(n); 4675 if (v == NULL) 4676 goto error; 4677 while (--n >= 0) { 4678 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx); 4679 Py_INCREF(w); 4680 PyTuple_SET_ITEM(v, n, w); 4681 } 4682 args = v; 4683 } else { 4684 Py_INCREF(orig_args); 4685 args = orig_args; 4686 } 4687 args_owned = 1; 4688 /* Take what we have of the result and let the Unicode formatting 4689 function format the rest of the input. */ 4690 rescnt = res - PyString_AS_STRING(result); 4691 if (_PyString_Resize(&result, rescnt)) 4692 goto error; 4693 fmtcnt = PyString_GET_SIZE(format) - \ 4694 (fmt - PyString_AS_STRING(format)); 4695 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL); 4696 if (format == NULL) 4697 goto error; 4698 v = PyUnicode_Format(format, args); 4699 Py_DECREF(format); 4700 if (v == NULL) 4701 goto error; 4702 /* Paste what we have (result) to what the Unicode formatting 4703 function returned (v) and return the result (or error) */ 4704 w = PyUnicode_Concat(result, v); 4705 Py_DECREF(result); 4706 Py_DECREF(v); 4707 Py_DECREF(args); 4708 return w; 4709 #endif /* Py_USING_UNICODE */ 4710 4711 error: 4712 Py_DECREF(result);
Access to field 'ob_refcnt' results in a dereference of a null pointer (loaded from variable 'result')
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Access to field 'ob_refcnt' results in a dereference of a null pointer (loaded from variable 'result')
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

4713 if (args_owned) { 4714 Py_DECREF(args); 4715 } 4716 return NULL; 4717 } 4718 4719 void 4720 PyString_InternInPlace(PyObject **p) 4721 { 4722 register PyStringObject *s = (PyStringObject *)(*p); 4723 PyObject *t; 4724 if (s == NULL || !PyString_Check(s)) 4725 Py_FatalError("PyString_InternInPlace: strings only please!"); 4726 /* If it's a string subclass, we don't really know what putting 4727 it in the interned dict might do. */ 4728 if (!PyString_CheckExact(s))
Access to field 'ob_type' results in a dereference of a null pointer (loaded from variable 's')
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Access to field 'ob_type' results in a dereference of a null pointer (loaded from variable 's')
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

4729 return; 4730 if (PyString_CHECK_INTERNED(s)) 4731 return; 4732 if (interned == NULL) { 4733 interned = PyDict_New(); 4734 if (interned == NULL) { 4735 PyErr_Clear(); /* Don't leave an exception */ 4736 return; 4737 } 4738 } 4739 t = PyDict_GetItem(interned, (PyObject *)s); 4740 if (t) { 4741 Py_INCREF(t); 4742 Py_DECREF(*p); 4743 *p = t; 4744 return; 4745 } 4746 4747 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) { 4748 PyErr_Clear(); 4749 return; 4750 } 4751 /* The two references in interned are not counted by refcnt. 4752 The string deallocator will take care of this */ 4753 Py_REFCNT(s) -= 2; 4754 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; 4755 } 4756 4757 void 4758 PyString_InternImmortal(PyObject **p) 4759 { 4760 PyString_InternInPlace(p); 4761 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { 4762 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL; 4763 Py_INCREF(*p); 4764 } 4765 } 4766 4767 4768 PyObject * 4769 PyString_InternFromString(const char *cp) 4770 { 4771 PyObject *s = PyString_FromString(cp); 4772 if (s == NULL) 4773 return NULL; 4774 PyString_InternInPlace(&s); 4775 return s; 4776 } 4777 4778 void 4779 PyString_Fini(void) 4780 { 4781 int i; 4782 for (i = 0; i < UCHAR_MAX + 1; i++) { 4783 Py_XDECREF(characters[i]); 4784 characters[i] = NULL; 4785 } 4786 Py_XDECREF(nullstring); 4787 nullstring = NULL; 4788 } 4789 4790 void _Py_ReleaseInternedStrings(void) 4791 { 4792 PyObject *keys; 4793 PyStringObject *s; 4794 Py_ssize_t i, n; 4795 Py_ssize_t immortal_size = 0, mortal_size = 0; 4796 4797 if (interned == NULL || !PyDict_Check(interned)) 4798 return; 4799 keys = PyDict_Keys(interned); 4800 if (keys == NULL || !PyList_Check(keys)) { 4801 PyErr_Clear(); 4802 return; 4803 } 4804 4805 /* Since _Py_ReleaseInternedStrings() is intended to help a leak 4806 detector, interned strings are not forcibly deallocated; rather, we 4807 give them their stolen references back, and then clear and DECREF 4808 the interned dict. */ 4809 4810 n = PyList_GET_SIZE(keys); 4811 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", 4812 n); 4813 for (i = 0; i < n; i++) { 4814 s = (PyStringObject *) PyList_GET_ITEM(keys, i); 4815 switch (s->ob_sstate) { 4816 case SSTATE_NOT_INTERNED: 4817 /* XXX Shouldn't happen */ 4818 break; 4819 case SSTATE_INTERNED_IMMORTAL: 4820 Py_REFCNT(s) += 1; 4821 immortal_size += Py_SIZE(s); 4822 break; 4823 case SSTATE_INTERNED_MORTAL: 4824 Py_REFCNT(s) += 2; 4825 mortal_size += Py_SIZE(s); 4826 break; 4827 default: 4828 Py_FatalError("Inconsistent interned string state."); 4829 } 4830 s->ob_sstate = SSTATE_NOT_INTERNED; 4831 } 4832 fprintf(stderr, "total size of all interned strings: " 4833 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d " 4834 "mortal/immortal\n", mortal_size, immortal_size); 4835 Py_DECREF(keys); 4836 PyDict_Clear(interned); 4837 Py_DECREF(interned); 4838 interned = NULL; 4839 } 4840 4841 void _PyString_DebugMallocStats(FILE *out) 4842 { 4843 ssize_t i; 4844 int num_immortal = 0, num_mortal = 0; 4845 ssize_t immortal_size = 0, mortal_size = 0; 4846 4847 if (interned == NULL || !PyDict_Check(interned)) 4848 return; 4849 4850 for (i = 0; i <= ((PyDictObject*)interned)->ma_mask; i++) { 4851 PyDictEntry *ep = ((PyDictObject*)interned)->ma_table + i; 4852 PyObject *pvalue = ep->me_value; 4853 if (pvalue != NULL) { 4854 PyStringObject *s = (PyStringObject *)ep->me_key; 4855 4856 switch (s->ob_sstate) { 4857 case SSTATE_NOT_INTERNED: 4858 /* XXX Shouldn't happen */ 4859 break; 4860 case SSTATE_INTERNED_IMMORTAL: 4861 num_immortal ++; 4862 immortal_size += s->ob_size; 4863 break; 4864 case SSTATE_INTERNED_MORTAL: 4865 num_mortal ++; 4866 mortal_size += s->ob_size; 4867 break; 4868 default: 4869 Py_FatalError("Inconsistent interned string state."); 4870 } 4871 } 4872 } 4873 4874 fprintf(out, "%d mortal interned strings\n", num_mortal); 4875 fprintf(out, "%d immortal interned strings\n", num_immortal); 4876 fprintf(out, "total size of all interned strings: " 4877 "%zi/%zi " 4878 "mortal/immortal\n", mortal_size, immortal_size); 4879 }