Location | Tool | Test ID | Function | Issue |
---|---|---|---|---|
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:390:18 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:390:18 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:651:22 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:651:22 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:667:25 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:667:25 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:668:25 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:668:25 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:669:25 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:669:25 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:670:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:670:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:671:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:671:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:672:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:672:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:673:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:673:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:674:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:674:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:675:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:675:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:676:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:676:24 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:685:18 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:685:18 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:710:22 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:710:22 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:719:22 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:719:22 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:741:18 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:741:18 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:2066:18 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:2066:18 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:2271:17 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:2271:17 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4622:28 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4622:28 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4627:28 | clang-analyzer | Dereference of null pointer | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4712:5 | clang-analyzer | Access to field 'ob_refcnt' results in a dereference of a null pointer (loaded from variable 'result') | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4712:5 | clang-analyzer | Access to field 'ob_refcnt' results in a dereference of a null pointer (loaded from variable 'result') | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4728:10 | clang-analyzer | Access to field 'ob_type' results in a dereference of a null pointer (loaded from variable 's') | ||
/builddir/build/BUILD/Python-2.7.3/Objects/stringobject.c:4728:10 | clang-analyzer | Access to field 'ob_type' results in a dereference of a null pointer (loaded from variable 's') |
1 /* String (str/bytes) object implementation */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include <ctype.h>
7 #include <stddef.h>
8
9 #ifdef COUNT_ALLOCS
10 Py_ssize_t null_strings, one_strings;
11 #endif
12
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
15
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23 */
24 static PyObject *interned;
25
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31 */
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
34 /*
35 For PyString_FromString(), the parameter `str' points to a null-terminated
36 string containing exactly `size' bytes.
37
38 For PyString_FromStringAndSize(), the parameter the parameter `str' is
39 either NULL or else points to a string containing at least `size' bytes.
40 For PyString_FromStringAndSize(), the string in the `str' parameter does
41 not have to be null-terminated. (Therefore it is safe to construct a
42 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
51 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
53 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
55 */
56 PyObject *
57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58 {
59 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
66 #ifdef COUNT_ALLOCS
67 null_strings++;
68 #endif
69 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
75 #ifdef COUNT_ALLOCS
76 one_strings++;
77 #endif
78 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81
82 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
86
87 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
112 }
113
114 PyObject *
115 PyString_FromString(const char *str)
116 {
117 register size_t size;
118 register PyStringObject *op;
119
120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
128 #ifdef COUNT_ALLOCS
129 null_strings++;
130 #endif
131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135 #ifdef COUNT_ALLOCS
136 one_strings++;
137 #endif
138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141
142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
165 }
166
167 PyObject *
168 PyString_FromFormatV(const char *format, va_list vargs)
169 {
170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
175
176 #ifdef VA_LIST_IS_ARRAY
177 Py_MEMCPY(count, vargs, sizeof(va_list));
178 #else
179 #ifdef __va_copy
180 __va_copy(count, vargs);
181 #else
182 count = vargs;
183 #endif
184 #endif
185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 #ifdef HAVE_LONG_LONG
189 int longlongflag = 0;
190 #endif
191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
194
195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
202 #ifdef HAVE_LONG_LONG
203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
208 #endif
209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
213
214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
223 #ifdef HAVE_LONG_LONG
224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
231 #endif
232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
237
238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
265 expand:
266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
272
273 s = PyString_AsString(string);
274
275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
280 #ifdef HAVE_LONG_LONG
281 int longlongflag = 0;
282 #endif
283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
303 #ifdef HAVE_LONG_LONG
304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
309 #endif
310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
316
317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
324 #ifdef HAVE_LONG_LONG
325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
328 #endif
329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
340 #ifdef HAVE_LONG_LONG
341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
344 #endif
345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
391 }
392
393 end:
394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
397 }
398
399 PyObject *
400 PyString_FromFormat(const char *format, ...)
401 {
402 PyObject* ret;
403 va_list vargs;
404
405 #ifdef HAVE_STDARG_PROTOTYPES
406 va_start(vargs, format);
407 #else
408 va_start(vargs);
409 #endif
410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
413 }
414
415
416 PyObject *PyString_Decode(const char *s,
417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
420 {
421 PyObject *v, *str;
422
423 str = PyString_FromStringAndSize(s, size);
424 if (str == NULL)
425 return NULL;
426 v = PyString_AsDecodedString(str, encoding, errors);
427 Py_DECREF(str);
428 return v;
429 }
430
431 PyObject *PyString_AsDecodedObject(PyObject *str,
432 const char *encoding,
433 const char *errors)
434 {
435 PyObject *v;
436
437 if (!PyString_Check(str)) {
438 PyErr_BadArgument();
439 goto onError;
440 }
441
442 if (encoding == NULL) {
443 #ifdef Py_USING_UNICODE
444 encoding = PyUnicode_GetDefaultEncoding();
445 #else
446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
448 #endif
449 }
450
451 /* Decode via the codec registry */
452 v = PyCodec_Decode(str, encoding, errors);
453 if (v == NULL)
454 goto onError;
455
456 return v;
457
458 onError:
459 return NULL;
460 }
461
462 PyObject *PyString_AsDecodedString(PyObject *str,
463 const char *encoding,
464 const char *errors)
465 {
466 PyObject *v;
467
468 v = PyString_AsDecodedObject(str, encoding, errors);
469 if (v == NULL)
470 goto onError;
471
472 #ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
480 }
481 #endif
482 if (!PyString_Check(v)) {
483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
488 }
489
490 return v;
491
492 onError:
493 return NULL;
494 }
495
496 PyObject *PyString_Encode(const char *s,
497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
500 {
501 PyObject *v, *str;
502
503 str = PyString_FromStringAndSize(s, size);
504 if (str == NULL)
505 return NULL;
506 v = PyString_AsEncodedString(str, encoding, errors);
507 Py_DECREF(str);
508 return v;
509 }
510
511 PyObject *PyString_AsEncodedObject(PyObject *str,
512 const char *encoding,
513 const char *errors)
514 {
515 PyObject *v;
516
517 if (!PyString_Check(str)) {
518 PyErr_BadArgument();
519 goto onError;
520 }
521
522 if (encoding == NULL) {
523 #ifdef Py_USING_UNICODE
524 encoding = PyUnicode_GetDefaultEncoding();
525 #else
526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
528 #endif
529 }
530
531 /* Encode via the codec registry */
532 v = PyCodec_Encode(str, encoding, errors);
533 if (v == NULL)
534 goto onError;
535
536 return v;
537
538 onError:
539 return NULL;
540 }
541
542 PyObject *PyString_AsEncodedString(PyObject *str,
543 const char *encoding,
544 const char *errors)
545 {
546 PyObject *v;
547
548 v = PyString_AsEncodedObject(str, encoding, errors);
549 if (v == NULL)
550 goto onError;
551
552 #ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
560 }
561 #endif
562 if (!PyString_Check(v)) {
563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
568 }
569
570 return v;
571
572 onError:
573 return NULL;
574 }
575
576 static void
577 string_dealloc(PyObject *op)
578 {
579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
582
583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
590
591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
593
594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
598 }
599
600 /* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
605 PyObject *PyString_DecodeEscape(const char *s,
606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
610 {
611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
624 #ifdef Py_USING_UNICODE
625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
635
636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
641
642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
652 }
653 #else
654 *p++ = *s++;
655 #endif
656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
668 case '\'': *p++ = '\''; break;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
669 case '\"': *p++ = '\"'; break;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
670 case 'b': *p++ = '\b'; break;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
671 case 'f': *p++ = '\014'; break; /* FF */
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
672 case 't': *p++ = '\t'; break;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
673 case 'n': *p++ = '\n'; break;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
674 case 'r': *p++ = '\r'; break;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
675 case 'v': *p++ = '\013'; break; /* VT */
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
729 #ifndef Py_USING_UNICODE
730 case 'u':
731 case 'U':
732 case 'N':
733 if (unicode) {
734 PyErr_SetString(PyExc_ValueError,
735 "Unicode escapes not legal "
736 "when Unicode disabled");
737 goto failed;
738 }
739 #endif
740 default:
741 *p++ = '\\';
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
742 s--;
743 goto non_esc; /* an arbitrary number of unescaped
744 UTF-8 bytes may follow. */
745 }
746 }
747 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
748 goto failed;
749 return v;
750 failed:
751 Py_DECREF(v);
752 return NULL;
753 }
754
755 /* -------------------------------------------------------------------- */
756 /* object api */
757
758 static Py_ssize_t
759 string_getsize(register PyObject *op)
760 {
761 char *s;
762 Py_ssize_t len;
763 if (PyString_AsStringAndSize(op, &s, &len))
764 return -1;
765 return len;
766 }
767
768 static /*const*/ char *
769 string_getbuffer(register PyObject *op)
770 {
771 char *s;
772 Py_ssize_t len;
773 if (PyString_AsStringAndSize(op, &s, &len))
774 return NULL;
775 return s;
776 }
777
778 Py_ssize_t
779 PyString_Size(register PyObject *op)
780 {
781 if (!PyString_Check(op))
782 return string_getsize(op);
783 return Py_SIZE(op);
784 }
785
786 /*const*/ char *
787 PyString_AsString(register PyObject *op)
788 {
789 if (!PyString_Check(op))
790 return string_getbuffer(op);
791 return ((PyStringObject *)op) -> ob_sval;
792 }
793
794 int
795 PyString_AsStringAndSize(register PyObject *obj,
796 register char **s,
797 register Py_ssize_t *len)
798 {
799 if (s == NULL) {
800 PyErr_BadInternalCall();
801 return -1;
802 }
803
804 if (!PyString_Check(obj)) {
805 #ifdef Py_USING_UNICODE
806 if (PyUnicode_Check(obj)) {
807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
808 if (obj == NULL)
809 return -1;
810 }
811 else
812 #endif
813 {
814 PyErr_Format(PyExc_TypeError,
815 "expected string or Unicode object, "
816 "%.200s found", Py_TYPE(obj)->tp_name);
817 return -1;
818 }
819 }
820
821 *s = PyString_AS_STRING(obj);
822 if (len != NULL)
823 *len = PyString_GET_SIZE(obj);
824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
825 PyErr_SetString(PyExc_TypeError,
826 "expected string without null bytes");
827 return -1;
828 }
829 return 0;
830 }
831
832 /* -------------------------------------------------------------------- */
833 /* Methods */
834
835 #include "stringlib/stringdefs.h"
836 #include "stringlib/fastsearch.h"
837
838 #include "stringlib/count.h"
839 #include "stringlib/find.h"
840 #include "stringlib/partition.h"
841 #include "stringlib/split.h"
842
843 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
844 #include "stringlib/localeutil.h"
845
846
847
848 static int
849 string_print(PyStringObject *op, FILE *fp, int flags)
850 {
851 Py_ssize_t i, str_len;
852 char c;
853 int quote;
854
855 /* XXX Ought to check for interrupts when writing long strings */
856 if (! PyString_CheckExact(op)) {
857 int ret;
858 /* A str subclass may have its own __str__ method. */
859 op = (PyStringObject *) PyObject_Str((PyObject *)op);
860 if (op == NULL)
861 return -1;
862 ret = string_print(op, fp, flags);
863 Py_DECREF(op);
864 return ret;
865 }
866 if (flags & Py_PRINT_RAW) {
867 char *data = op->ob_sval;
868 Py_ssize_t size = Py_SIZE(op);
869 Py_BEGIN_ALLOW_THREADS
870 while (size > INT_MAX) {
871 /* Very long strings cannot be written atomically.
872 * But don't write exactly INT_MAX bytes at a time
873 * to avoid memory aligment issues.
874 */
875 const int chunk_size = INT_MAX & ~0x3FFF;
876 fwrite(data, 1, chunk_size, fp);
877 data += chunk_size;
878 size -= chunk_size;
879 }
880 #ifdef __VMS
881 if (size) fwrite(data, (int)size, 1, fp);
882 #else
883 fwrite(data, 1, (int)size, fp);
884 #endif
885 Py_END_ALLOW_THREADS
886 return 0;
887 }
888
889 /* figure out which quote to use; single is preferred */
890 quote = '\'';
891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
894
895 str_len = Py_SIZE(op);
896 Py_BEGIN_ALLOW_THREADS
897 fputc(quote, fp);
898 for (i = 0; i < str_len; i++) {
899 /* Since strings are immutable and the caller should have a
900 reference, accessing the interal buffer should not be an issue
901 with the GIL released. */
902 c = op->ob_sval[i];
903 if (c == quote || c == '\\')
904 fprintf(fp, "\\%c", c);
905 else if (c == '\t')
906 fprintf(fp, "\\t");
907 else if (c == '\n')
908 fprintf(fp, "\\n");
909 else if (c == '\r')
910 fprintf(fp, "\\r");
911 else if (c < ' ' || c >= 0x7f)
912 fprintf(fp, "\\x%02x", c & 0xff);
913 else
914 fputc(c, fp);
915 }
916 fputc(quote, fp);
917 Py_END_ALLOW_THREADS
918 return 0;
919 }
920
921 PyObject *
922 PyString_Repr(PyObject *obj, int smartquotes)
923 {
924 register PyStringObject* op = (PyStringObject*) obj;
925 size_t newsize = 2 + 4 * Py_SIZE(op);
926 PyObject *v;
927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
928 PyErr_SetString(PyExc_OverflowError,
929 "string is too large to make repr");
930 return NULL;
931 }
932 v = PyString_FromStringAndSize((char *)NULL, newsize);
933 if (v == NULL) {
934 return NULL;
935 }
936 else {
937 register Py_ssize_t i;
938 register char c;
939 register char *p;
940 int quote;
941
942 /* figure out which quote to use; single is preferred */
943 quote = '\'';
944 if (smartquotes &&
945 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
946 !memchr(op->ob_sval, '"', Py_SIZE(op)))
947 quote = '"';
948
949 p = PyString_AS_STRING(v);
950 *p++ = quote;
951 for (i = 0; i < Py_SIZE(op); i++) {
952 /* There's at least enough room for a hex escape
953 and a closing quote. */
954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
955 c = op->ob_sval[i];
956 if (c == quote || c == '\\')
957 *p++ = '\\', *p++ = c;
958 else if (c == '\t')
959 *p++ = '\\', *p++ = 't';
960 else if (c == '\n')
961 *p++ = '\\', *p++ = 'n';
962 else if (c == '\r')
963 *p++ = '\\', *p++ = 'r';
964 else if (c < ' ' || c >= 0x7f) {
965 /* For performance, we don't want to call
966 PyOS_snprintf here (extra layers of
967 function call). */
968 sprintf(p, "\\x%02x", c & 0xff);
969 p += 4;
970 }
971 else
972 *p++ = c;
973 }
974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
975 *p++ = quote;
976 *p = '\0';
977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
978 return NULL;
979 return v;
980 }
981 }
982
983 static PyObject *
984 string_repr(PyObject *op)
985 {
986 return PyString_Repr(op, 1);
987 }
988
989 static PyObject *
990 string_str(PyObject *s)
991 {
992 assert(PyString_Check(s));
993 if (PyString_CheckExact(s)) {
994 Py_INCREF(s);
995 return s;
996 }
997 else {
998 /* Subtype -- return genuine string with the same value. */
999 PyStringObject *t = (PyStringObject *) s;
1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1001 }
1002 }
1003
1004 static Py_ssize_t
1005 string_length(PyStringObject *a)
1006 {
1007 return Py_SIZE(a);
1008 }
1009
1010 static PyObject *
1011 string_concat(register PyStringObject *a, register PyObject *bb)
1012 {
1013 register Py_ssize_t size;
1014 register PyStringObject *op;
1015 if (!PyString_Check(bb)) {
1016 #ifdef Py_USING_UNICODE
1017 if (PyUnicode_Check(bb))
1018 return PyUnicode_Concat((PyObject *)a, bb);
1019 #endif
1020 if (PyByteArray_Check(bb))
1021 return PyByteArray_Concat((PyObject *)a, bb);
1022 PyErr_Format(PyExc_TypeError,
1023 "cannot concatenate 'str' and '%.200s' objects",
1024 Py_TYPE(bb)->tp_name);
1025 return NULL;
1026 }
1027 #define b ((PyStringObject *)bb)
1028 /* Optimize cases with empty left or right operand */
1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1030 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1031 if (Py_SIZE(a) == 0) {
1032 Py_INCREF(bb);
1033 return bb;
1034 }
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 size = Py_SIZE(a) + Py_SIZE(b);
1039 /* Check that string sizes are not negative, to prevent an
1040 overflow in cases where we are passed incorrectly-created
1041 strings with negative lengths (due to a bug in other code).
1042 */
1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1045 PyErr_SetString(PyExc_OverflowError,
1046 "strings are too large to concat");
1047 return NULL;
1048 }
1049
1050 /* Inline PyObject_NewVar */
1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1052 PyErr_SetString(PyExc_OverflowError,
1053 "strings are too large to concat");
1054 return NULL;
1055 }
1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1057 if (op == NULL)
1058 return PyErr_NoMemory();
1059 PyObject_INIT_VAR(op, &PyString_Type, size);
1060 op->ob_shash = -1;
1061 op->ob_sstate = SSTATE_NOT_INTERNED;
1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1064 op->ob_sval[size] = '\0';
1065 return (PyObject *) op;
1066 #undef b
1067 }
1068
1069 static PyObject *
1070 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1071 {
1072 register Py_ssize_t i;
1073 register Py_ssize_t j;
1074 register Py_ssize_t size;
1075 register PyStringObject *op;
1076 size_t nbytes;
1077 if (n < 0)
1078 n = 0;
1079 /* watch out for overflows: the size can overflow int,
1080 * and the # of bytes needed can overflow size_t
1081 */
1082 size = Py_SIZE(a) * n;
1083 if (n && size / n != Py_SIZE(a)) {
1084 PyErr_SetString(PyExc_OverflowError,
1085 "repeated string is too long");
1086 return NULL;
1087 }
1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1089 Py_INCREF(a);
1090 return (PyObject *)a;
1091 }
1092 nbytes = (size_t)size;
1093 if (nbytes + PyStringObject_SIZE <= nbytes) {
1094 PyErr_SetString(PyExc_OverflowError,
1095 "repeated string is too long");
1096 return NULL;
1097 }
1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1099 if (op == NULL)
1100 return PyErr_NoMemory();
1101 PyObject_INIT_VAR(op, &PyString_Type, size);
1102 op->ob_shash = -1;
1103 op->ob_sstate = SSTATE_NOT_INTERNED;
1104 op->ob_sval[size] = '\0';
1105 if (Py_SIZE(a) == 1 && n > 0) {
1106 memset(op->ob_sval, a->ob_sval[0] , n);
1107 return (PyObject *) op;
1108 }
1109 i = 0;
1110 if (i < size) {
1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1112 i = Py_SIZE(a);
1113 }
1114 while (i < size) {
1115 j = (i <= size-i) ? i : size-i;
1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1117 i += j;
1118 }
1119 return (PyObject *) op;
1120 }
1121
1122 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1123
1124 static PyObject *
1125 string_slice(register PyStringObject *a, register Py_ssize_t i,
1126 register Py_ssize_t j)
1127 /* j -- may be negative! */
1128 {
1129 if (i < 0)
1130 i = 0;
1131 if (j < 0)
1132 j = 0; /* Avoid signed/unsigned bug in next line */
1133 if (j > Py_SIZE(a))
1134 j = Py_SIZE(a);
1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1136 /* It's the same as a */
1137 Py_INCREF(a);
1138 return (PyObject *)a;
1139 }
1140 if (j < i)
1141 j = i;
1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1143 }
1144
1145 static int
1146 string_contains(PyObject *str_obj, PyObject *sub_obj)
1147 {
1148 if (!PyString_CheckExact(sub_obj)) {
1149 #ifdef Py_USING_UNICODE
1150 if (PyUnicode_Check(sub_obj))
1151 return PyUnicode_Contains(str_obj, sub_obj);
1152 #endif
1153 if (!PyString_Check(sub_obj)) {
1154 PyErr_Format(PyExc_TypeError,
1155 "'in <string>' requires string as left operand, "
1156 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1157 return -1;
1158 }
1159 }
1160
1161 return stringlib_contains_obj(str_obj, sub_obj);
1162 }
1163
1164 static PyObject *
1165 string_item(PyStringObject *a, register Py_ssize_t i)
1166 {
1167 char pchar;
1168 PyObject *v;
1169 if (i < 0 || i >= Py_SIZE(a)) {
1170 PyErr_SetString(PyExc_IndexError, "string index out of range");
1171 return NULL;
1172 }
1173 pchar = a->ob_sval[i];
1174 v = (PyObject *)characters[pchar & UCHAR_MAX];
1175 if (v == NULL)
1176 v = PyString_FromStringAndSize(&pchar, 1);
1177 else {
1178 #ifdef COUNT_ALLOCS
1179 one_strings++;
1180 #endif
1181 Py_INCREF(v);
1182 }
1183 return v;
1184 }
1185
1186 static PyObject*
1187 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1188 {
1189 int c;
1190 Py_ssize_t len_a, len_b;
1191 Py_ssize_t min_len;
1192 PyObject *result;
1193
1194 /* Make sure both arguments are strings. */
1195 if (!(PyString_Check(a) && PyString_Check(b))) {
1196 result = Py_NotImplemented;
1197 goto out;
1198 }
1199 if (a == b) {
1200 switch (op) {
1201 case Py_EQ:case Py_LE:case Py_GE:
1202 result = Py_True;
1203 goto out;
1204 case Py_NE:case Py_LT:case Py_GT:
1205 result = Py_False;
1206 goto out;
1207 }
1208 }
1209 if (op == Py_EQ) {
1210 /* Supporting Py_NE here as well does not save
1211 much time, since Py_NE is rarely used. */
1212 if (Py_SIZE(a) == Py_SIZE(b)
1213 && (a->ob_sval[0] == b->ob_sval[0]
1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1215 result = Py_True;
1216 } else {
1217 result = Py_False;
1218 }
1219 goto out;
1220 }
1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1222 min_len = (len_a < len_b) ? len_a : len_b;
1223 if (min_len > 0) {
1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1225 if (c==0)
1226 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1227 } else
1228 c = 0;
1229 if (c == 0)
1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1231 switch (op) {
1232 case Py_LT: c = c < 0; break;
1233 case Py_LE: c = c <= 0; break;
1234 case Py_EQ: assert(0); break; /* unreachable */
1235 case Py_NE: c = c != 0; break;
1236 case Py_GT: c = c > 0; break;
1237 case Py_GE: c = c >= 0; break;
1238 default:
1239 result = Py_NotImplemented;
1240 goto out;
1241 }
1242 result = c ? Py_True : Py_False;
1243 out:
1244 Py_INCREF(result);
1245 return result;
1246 }
1247
1248 int
1249 _PyString_Eq(PyObject *o1, PyObject *o2)
1250 {
1251 PyStringObject *a = (PyStringObject*) o1;
1252 PyStringObject *b = (PyStringObject*) o2;
1253 return Py_SIZE(a) == Py_SIZE(b)
1254 && *a->ob_sval == *b->ob_sval
1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1256 }
1257
1258 static long
1259 string_hash(PyStringObject *a)
1260 {
1261 register Py_ssize_t len;
1262 register unsigned char *p;
1263 register long x;
1264
1265 #ifdef Py_DEBUG
1266 assert(_Py_HashSecret_Initialized);
1267 #endif
1268 if (a->ob_shash != -1)
1269 return a->ob_shash;
1270 len = Py_SIZE(a);
1271 /*
1272 We make the hash of the empty string be 0, rather than using
1273 (prefix ^ suffix), since this slightly obfuscates the hash secret
1274 */
1275 if (len == 0) {
1276 a->ob_shash = 0;
1277 return 0;
1278 }
1279 p = (unsigned char *) a->ob_sval;
1280 x = _Py_HashSecret.prefix;
1281 x ^= *p << 7;
1282 while (--len >= 0)
1283 x = (1000003*x) ^ *p++;
1284 x ^= Py_SIZE(a);
1285 x ^= _Py_HashSecret.suffix;
1286 if (x == -1)
1287 x = -2;
1288 a->ob_shash = x;
1289 return x;
1290 }
1291
1292 static PyObject*
1293 string_subscript(PyStringObject* self, PyObject* item)
1294 {
1295 if (PyIndex_Check(item)) {
1296 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1297 if (i == -1 && PyErr_Occurred())
1298 return NULL;
1299 if (i < 0)
1300 i += PyString_GET_SIZE(self);
1301 return string_item(self, i);
1302 }
1303 else if (PySlice_Check(item)) {
1304 Py_ssize_t start, stop, step, slicelength, cur, i;
1305 char* source_buf;
1306 char* result_buf;
1307 PyObject* result;
1308
1309 if (PySlice_GetIndicesEx((PySliceObject*)item,
1310 PyString_GET_SIZE(self),
1311 &start, &stop, &step, &slicelength) < 0) {
1312 return NULL;
1313 }
1314
1315 if (slicelength <= 0) {
1316 return PyString_FromStringAndSize("", 0);
1317 }
1318 else if (start == 0 && step == 1 &&
1319 slicelength == PyString_GET_SIZE(self) &&
1320 PyString_CheckExact(self)) {
1321 Py_INCREF(self);
1322 return (PyObject *)self;
1323 }
1324 else if (step == 1) {
1325 return PyString_FromStringAndSize(
1326 PyString_AS_STRING(self) + start,
1327 slicelength);
1328 }
1329 else {
1330 source_buf = PyString_AsString((PyObject*)self);
1331 result_buf = (char *)PyMem_Malloc(slicelength);
1332 if (result_buf == NULL)
1333 return PyErr_NoMemory();
1334
1335 for (cur = start, i = 0; i < slicelength;
1336 cur += step, i++) {
1337 result_buf[i] = source_buf[cur];
1338 }
1339
1340 result = PyString_FromStringAndSize(result_buf,
1341 slicelength);
1342 PyMem_Free(result_buf);
1343 return result;
1344 }
1345 }
1346 else {
1347 PyErr_Format(PyExc_TypeError,
1348 "string indices must be integers, not %.200s",
1349 Py_TYPE(item)->tp_name);
1350 return NULL;
1351 }
1352 }
1353
1354 static Py_ssize_t
1355 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1356 {
1357 if ( index != 0 ) {
1358 PyErr_SetString(PyExc_SystemError,
1359 "accessing non-existent string segment");
1360 return -1;
1361 }
1362 *ptr = (void *)self->ob_sval;
1363 return Py_SIZE(self);
1364 }
1365
1366 static Py_ssize_t
1367 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1368 {
1369 PyErr_SetString(PyExc_TypeError,
1370 "Cannot use string as modifiable buffer");
1371 return -1;
1372 }
1373
1374 static Py_ssize_t
1375 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1376 {
1377 if ( lenp )
1378 *lenp = Py_SIZE(self);
1379 return 1;
1380 }
1381
1382 static Py_ssize_t
1383 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1384 {
1385 if ( index != 0 ) {
1386 PyErr_SetString(PyExc_SystemError,
1387 "accessing non-existent string segment");
1388 return -1;
1389 }
1390 *ptr = self->ob_sval;
1391 return Py_SIZE(self);
1392 }
1393
1394 static int
1395 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1396 {
1397 return PyBuffer_FillInfo(view, (PyObject*)self,
1398 (void *)self->ob_sval, Py_SIZE(self),
1399 1, flags);
1400 }
1401
1402 static PySequenceMethods string_as_sequence = {
1403 (lenfunc)string_length, /*sq_length*/
1404 (binaryfunc)string_concat, /*sq_concat*/
1405 (ssizeargfunc)string_repeat, /*sq_repeat*/
1406 (ssizeargfunc)string_item, /*sq_item*/
1407 (ssizessizeargfunc)string_slice, /*sq_slice*/
1408 0, /*sq_ass_item*/
1409 0, /*sq_ass_slice*/
1410 (objobjproc)string_contains /*sq_contains*/
1411 };
1412
1413 static PyMappingMethods string_as_mapping = {
1414 (lenfunc)string_length,
1415 (binaryfunc)string_subscript,
1416 0,
1417 };
1418
1419 static PyBufferProcs string_as_buffer = {
1420 (readbufferproc)string_buffer_getreadbuf,
1421 (writebufferproc)string_buffer_getwritebuf,
1422 (segcountproc)string_buffer_getsegcount,
1423 (charbufferproc)string_buffer_getcharbuf,
1424 (getbufferproc)string_buffer_getbuffer,
1425 0, /* XXX */
1426 };
1427
1428
1429
1430 #define LEFTSTRIP 0
1431 #define RIGHTSTRIP 1
1432 #define BOTHSTRIP 2
1433
1434 /* Arrays indexed by above */
1435 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1436
1437 #define STRIPNAME(i) (stripformat[i]+3)
1438
1439 PyDoc_STRVAR(split__doc__,
1440 "S.split([sep [,maxsplit]]) -> list of strings\n\
1441 \n\
1442 Return a list of the words in the string S, using sep as the\n\
1443 delimiter string. If maxsplit is given, at most maxsplit\n\
1444 splits are done. If sep is not specified or is None, any\n\
1445 whitespace string is a separator and empty strings are removed\n\
1446 from the result.");
1447
1448 static PyObject *
1449 string_split(PyStringObject *self, PyObject *args)
1450 {
1451 Py_ssize_t len = PyString_GET_SIZE(self), n;
1452 Py_ssize_t maxsplit = -1;
1453 const char *s = PyString_AS_STRING(self), *sub;
1454 PyObject *subobj = Py_None;
1455
1456 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1457 return NULL;
1458 if (maxsplit < 0)
1459 maxsplit = PY_SSIZE_T_MAX;
1460 if (subobj == Py_None)
1461 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1462 if (PyString_Check(subobj)) {
1463 sub = PyString_AS_STRING(subobj);
1464 n = PyString_GET_SIZE(subobj);
1465 }
1466 #ifdef Py_USING_UNICODE
1467 else if (PyUnicode_Check(subobj))
1468 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1469 #endif
1470 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1471 return NULL;
1472
1473 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1474 }
1475
1476 PyDoc_STRVAR(partition__doc__,
1477 "S.partition(sep) -> (head, sep, tail)\n\
1478 \n\
1479 Search for the separator sep in S, and return the part before it,\n\
1480 the separator itself, and the part after it. If the separator is not\n\
1481 found, return S and two empty strings.");
1482
1483 static PyObject *
1484 string_partition(PyStringObject *self, PyObject *sep_obj)
1485 {
1486 const char *sep;
1487 Py_ssize_t sep_len;
1488
1489 if (PyString_Check(sep_obj)) {
1490 sep = PyString_AS_STRING(sep_obj);
1491 sep_len = PyString_GET_SIZE(sep_obj);
1492 }
1493 #ifdef Py_USING_UNICODE
1494 else if (PyUnicode_Check(sep_obj))
1495 return PyUnicode_Partition((PyObject *) self, sep_obj);
1496 #endif
1497 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1498 return NULL;
1499
1500 return stringlib_partition(
1501 (PyObject*) self,
1502 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1503 sep_obj, sep, sep_len
1504 );
1505 }
1506
1507 PyDoc_STRVAR(rpartition__doc__,
1508 "S.rpartition(sep) -> (head, sep, tail)\n\
1509 \n\
1510 Search for the separator sep in S, starting at the end of S, and return\n\
1511 the part before it, the separator itself, and the part after it. If the\n\
1512 separator is not found, return two empty strings and S.");
1513
1514 static PyObject *
1515 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1516 {
1517 const char *sep;
1518 Py_ssize_t sep_len;
1519
1520 if (PyString_Check(sep_obj)) {
1521 sep = PyString_AS_STRING(sep_obj);
1522 sep_len = PyString_GET_SIZE(sep_obj);
1523 }
1524 #ifdef Py_USING_UNICODE
1525 else if (PyUnicode_Check(sep_obj))
1526 return PyUnicode_RPartition((PyObject *) self, sep_obj);
1527 #endif
1528 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1529 return NULL;
1530
1531 return stringlib_rpartition(
1532 (PyObject*) self,
1533 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1534 sep_obj, sep, sep_len
1535 );
1536 }
1537
1538 PyDoc_STRVAR(rsplit__doc__,
1539 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1540 \n\
1541 Return a list of the words in the string S, using sep as the\n\
1542 delimiter string, starting at the end of the string and working\n\
1543 to the front. If maxsplit is given, at most maxsplit splits are\n\
1544 done. If sep is not specified or is None, any whitespace string\n\
1545 is a separator.");
1546
1547 static PyObject *
1548 string_rsplit(PyStringObject *self, PyObject *args)
1549 {
1550 Py_ssize_t len = PyString_GET_SIZE(self), n;
1551 Py_ssize_t maxsplit = -1;
1552 const char *s = PyString_AS_STRING(self), *sub;
1553 PyObject *subobj = Py_None;
1554
1555 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1556 return NULL;
1557 if (maxsplit < 0)
1558 maxsplit = PY_SSIZE_T_MAX;
1559 if (subobj == Py_None)
1560 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1561 if (PyString_Check(subobj)) {
1562 sub = PyString_AS_STRING(subobj);
1563 n = PyString_GET_SIZE(subobj);
1564 }
1565 #ifdef Py_USING_UNICODE
1566 else if (PyUnicode_Check(subobj))
1567 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1568 #endif
1569 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1570 return NULL;
1571
1572 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1573 }
1574
1575
1576 PyDoc_STRVAR(join__doc__,
1577 "S.join(iterable) -> string\n\
1578 \n\
1579 Return a string which is the concatenation of the strings in the\n\
1580 iterable. The separator between elements is S.");
1581
1582 static PyObject *
1583 string_join(PyStringObject *self, PyObject *orig)
1584 {
1585 char *sep = PyString_AS_STRING(self);
1586 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1587 PyObject *res = NULL;
1588 char *p;
1589 Py_ssize_t seqlen = 0;
1590 size_t sz = 0;
1591 Py_ssize_t i;
1592 PyObject *seq, *item;
1593
1594 seq = PySequence_Fast(orig, "");
1595 if (seq == NULL) {
1596 return NULL;
1597 }
1598
1599 seqlen = PySequence_Size(seq);
1600 if (seqlen == 0) {
1601 Py_DECREF(seq);
1602 return PyString_FromString("");
1603 }
1604 if (seqlen == 1) {
1605 item = PySequence_Fast_GET_ITEM(seq, 0);
1606 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1607 Py_INCREF(item);
1608 Py_DECREF(seq);
1609 return item;
1610 }
1611 }
1612
1613 /* There are at least two things to join, or else we have a subclass
1614 * of the builtin types in the sequence.
1615 * Do a pre-pass to figure out the total amount of space we'll
1616 * need (sz), see whether any argument is absurd, and defer to
1617 * the Unicode join if appropriate.
1618 */
1619 for (i = 0; i < seqlen; i++) {
1620 const size_t old_sz = sz;
1621 item = PySequence_Fast_GET_ITEM(seq, i);
1622 if (!PyString_Check(item)){
1623 #ifdef Py_USING_UNICODE
1624 if (PyUnicode_Check(item)) {
1625 /* Defer to Unicode join.
1626 * CAUTION: There's no gurantee that the
1627 * original sequence can be iterated over
1628 * again, so we must pass seq here.
1629 */
1630 PyObject *result;
1631 result = PyUnicode_Join((PyObject *)self, seq);
1632 Py_DECREF(seq);
1633 return result;
1634 }
1635 #endif
1636 PyErr_Format(PyExc_TypeError,
1637 "sequence item %zd: expected string,"
1638 " %.80s found",
1639 i, Py_TYPE(item)->tp_name);
1640 Py_DECREF(seq);
1641 return NULL;
1642 }
1643 sz += PyString_GET_SIZE(item);
1644 if (i != 0)
1645 sz += seplen;
1646 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1647 PyErr_SetString(PyExc_OverflowError,
1648 "join() result is too long for a Python string");
1649 Py_DECREF(seq);
1650 return NULL;
1651 }
1652 }
1653
1654 /* Allocate result space. */
1655 res = PyString_FromStringAndSize((char*)NULL, sz);
1656 if (res == NULL) {
1657 Py_DECREF(seq);
1658 return NULL;
1659 }
1660
1661 /* Catenate everything. */
1662 p = PyString_AS_STRING(res);
1663 for (i = 0; i < seqlen; ++i) {
1664 size_t n;
1665 item = PySequence_Fast_GET_ITEM(seq, i);
1666 n = PyString_GET_SIZE(item);
1667 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1668 p += n;
1669 if (i < seqlen - 1) {
1670 Py_MEMCPY(p, sep, seplen);
1671 p += seplen;
1672 }
1673 }
1674
1675 Py_DECREF(seq);
1676 return res;
1677 }
1678
1679 PyObject *
1680 _PyString_Join(PyObject *sep, PyObject *x)
1681 {
1682 assert(sep != NULL && PyString_Check(sep));
1683 assert(x != NULL);
1684 return string_join((PyStringObject *)sep, x);
1685 }
1686
1687 /* helper macro to fixup start/end slice values */
1688 #define ADJUST_INDICES(start, end, len) \
1689 if (end > len) \
1690 end = len; \
1691 else if (end < 0) { \
1692 end += len; \
1693 if (end < 0) \
1694 end = 0; \
1695 } \
1696 if (start < 0) { \
1697 start += len; \
1698 if (start < 0) \
1699 start = 0; \
1700 }
1701
1702 Py_LOCAL_INLINE(Py_ssize_t)
1703 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1704 {
1705 PyObject *subobj;
1706 const char *sub;
1707 Py_ssize_t sub_len;
1708 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1709
1710 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1711 args, &subobj, &start, &end))
1712 return -2;
1713
1714 if (PyString_Check(subobj)) {
1715 sub = PyString_AS_STRING(subobj);
1716 sub_len = PyString_GET_SIZE(subobj);
1717 }
1718 #ifdef Py_USING_UNICODE
1719 else if (PyUnicode_Check(subobj))
1720 return PyUnicode_Find(
1721 (PyObject *)self, subobj, start, end, dir);
1722 #endif
1723 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1724 /* XXX - the "expected a character buffer object" is pretty
1725 confusing for a non-expert. remap to something else ? */
1726 return -2;
1727
1728 if (dir > 0)
1729 return stringlib_find_slice(
1730 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1731 sub, sub_len, start, end);
1732 else
1733 return stringlib_rfind_slice(
1734 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735 sub, sub_len, start, end);
1736 }
1737
1738
1739 PyDoc_STRVAR(find__doc__,
1740 "S.find(sub [,start [,end]]) -> int\n\
1741 \n\
1742 Return the lowest index in S where substring sub is found,\n\
1743 such that sub is contained within S[start:end]. Optional\n\
1744 arguments start and end are interpreted as in slice notation.\n\
1745 \n\
1746 Return -1 on failure.");
1747
1748 static PyObject *
1749 string_find(PyStringObject *self, PyObject *args)
1750 {
1751 Py_ssize_t result = string_find_internal(self, args, +1);
1752 if (result == -2)
1753 return NULL;
1754 return PyInt_FromSsize_t(result);
1755 }
1756
1757
1758 PyDoc_STRVAR(index__doc__,
1759 "S.index(sub [,start [,end]]) -> int\n\
1760 \n\
1761 Like S.find() but raise ValueError when the substring is not found.");
1762
1763 static PyObject *
1764 string_index(PyStringObject *self, PyObject *args)
1765 {
1766 Py_ssize_t result = string_find_internal(self, args, +1);
1767 if (result == -2)
1768 return NULL;
1769 if (result == -1) {
1770 PyErr_SetString(PyExc_ValueError,
1771 "substring not found");
1772 return NULL;
1773 }
1774 return PyInt_FromSsize_t(result);
1775 }
1776
1777
1778 PyDoc_STRVAR(rfind__doc__,
1779 "S.rfind(sub [,start [,end]]) -> int\n\
1780 \n\
1781 Return the highest index in S where substring sub is found,\n\
1782 such that sub is contained within S[start:end]. Optional\n\
1783 arguments start and end are interpreted as in slice notation.\n\
1784 \n\
1785 Return -1 on failure.");
1786
1787 static PyObject *
1788 string_rfind(PyStringObject *self, PyObject *args)
1789 {
1790 Py_ssize_t result = string_find_internal(self, args, -1);
1791 if (result == -2)
1792 return NULL;
1793 return PyInt_FromSsize_t(result);
1794 }
1795
1796
1797 PyDoc_STRVAR(rindex__doc__,
1798 "S.rindex(sub [,start [,end]]) -> int\n\
1799 \n\
1800 Like S.rfind() but raise ValueError when the substring is not found.");
1801
1802 static PyObject *
1803 string_rindex(PyStringObject *self, PyObject *args)
1804 {
1805 Py_ssize_t result = string_find_internal(self, args, -1);
1806 if (result == -2)
1807 return NULL;
1808 if (result == -1) {
1809 PyErr_SetString(PyExc_ValueError,
1810 "substring not found");
1811 return NULL;
1812 }
1813 return PyInt_FromSsize_t(result);
1814 }
1815
1816
1817 Py_LOCAL_INLINE(PyObject *)
1818 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1819 {
1820 char *s = PyString_AS_STRING(self);
1821 Py_ssize_t len = PyString_GET_SIZE(self);
1822 char *sep = PyString_AS_STRING(sepobj);
1823 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1824 Py_ssize_t i, j;
1825
1826 i = 0;
1827 if (striptype != RIGHTSTRIP) {
1828 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1829 i++;
1830 }
1831 }
1832
1833 j = len;
1834 if (striptype != LEFTSTRIP) {
1835 do {
1836 j--;
1837 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1838 j++;
1839 }
1840
1841 if (i == 0 && j == len && PyString_CheckExact(self)) {
1842 Py_INCREF(self);
1843 return (PyObject*)self;
1844 }
1845 else
1846 return PyString_FromStringAndSize(s+i, j-i);
1847 }
1848
1849
1850 Py_LOCAL_INLINE(PyObject *)
1851 do_strip(PyStringObject *self, int striptype)
1852 {
1853 char *s = PyString_AS_STRING(self);
1854 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1855
1856 i = 0;
1857 if (striptype != RIGHTSTRIP) {
1858 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1859 i++;
1860 }
1861 }
1862
1863 j = len;
1864 if (striptype != LEFTSTRIP) {
1865 do {
1866 j--;
1867 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1868 j++;
1869 }
1870
1871 if (i == 0 && j == len && PyString_CheckExact(self)) {
1872 Py_INCREF(self);
1873 return (PyObject*)self;
1874 }
1875 else
1876 return PyString_FromStringAndSize(s+i, j-i);
1877 }
1878
1879
1880 Py_LOCAL_INLINE(PyObject *)
1881 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1882 {
1883 PyObject *sep = NULL;
1884
1885 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1886 return NULL;
1887
1888 if (sep != NULL && sep != Py_None) {
1889 if (PyString_Check(sep))
1890 return do_xstrip(self, striptype, sep);
1891 #ifdef Py_USING_UNICODE
1892 else if (PyUnicode_Check(sep)) {
1893 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1894 PyObject *res;
1895 if (uniself==NULL)
1896 return NULL;
1897 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1898 striptype, sep);
1899 Py_DECREF(uniself);
1900 return res;
1901 }
1902 #endif
1903 PyErr_Format(PyExc_TypeError,
1904 #ifdef Py_USING_UNICODE
1905 "%s arg must be None, str or unicode",
1906 #else
1907 "%s arg must be None or str",
1908 #endif
1909 STRIPNAME(striptype));
1910 return NULL;
1911 }
1912
1913 return do_strip(self, striptype);
1914 }
1915
1916
1917 PyDoc_STRVAR(strip__doc__,
1918 "S.strip([chars]) -> string or unicode\n\
1919 \n\
1920 Return a copy of the string S with leading and trailing\n\
1921 whitespace removed.\n\
1922 If chars is given and not None, remove characters in chars instead.\n\
1923 If chars is unicode, S will be converted to unicode before stripping");
1924
1925 static PyObject *
1926 string_strip(PyStringObject *self, PyObject *args)
1927 {
1928 if (PyTuple_GET_SIZE(args) == 0)
1929 return do_strip(self, BOTHSTRIP); /* Common case */
1930 else
1931 return do_argstrip(self, BOTHSTRIP, args);
1932 }
1933
1934
1935 PyDoc_STRVAR(lstrip__doc__,
1936 "S.lstrip([chars]) -> string or unicode\n\
1937 \n\
1938 Return a copy of the string S with leading whitespace removed.\n\
1939 If chars is given and not None, remove characters in chars instead.\n\
1940 If chars is unicode, S will be converted to unicode before stripping");
1941
1942 static PyObject *
1943 string_lstrip(PyStringObject *self, PyObject *args)
1944 {
1945 if (PyTuple_GET_SIZE(args) == 0)
1946 return do_strip(self, LEFTSTRIP); /* Common case */
1947 else
1948 return do_argstrip(self, LEFTSTRIP, args);
1949 }
1950
1951
1952 PyDoc_STRVAR(rstrip__doc__,
1953 "S.rstrip([chars]) -> string or unicode\n\
1954 \n\
1955 Return a copy of the string S with trailing whitespace removed.\n\
1956 If chars is given and not None, remove characters in chars instead.\n\
1957 If chars is unicode, S will be converted to unicode before stripping");
1958
1959 static PyObject *
1960 string_rstrip(PyStringObject *self, PyObject *args)
1961 {
1962 if (PyTuple_GET_SIZE(args) == 0)
1963 return do_strip(self, RIGHTSTRIP); /* Common case */
1964 else
1965 return do_argstrip(self, RIGHTSTRIP, args);
1966 }
1967
1968
1969 PyDoc_STRVAR(lower__doc__,
1970 "S.lower() -> string\n\
1971 \n\
1972 Return a copy of the string S converted to lowercase.");
1973
1974 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1975 #ifndef _tolower
1976 #define _tolower tolower
1977 #endif
1978
1979 static PyObject *
1980 string_lower(PyStringObject *self)
1981 {
1982 char *s;
1983 Py_ssize_t i, n = PyString_GET_SIZE(self);
1984 PyObject *newobj;
1985
1986 newobj = PyString_FromStringAndSize(NULL, n);
1987 if (!newobj)
1988 return NULL;
1989
1990 s = PyString_AS_STRING(newobj);
1991
1992 Py_MEMCPY(s, PyString_AS_STRING(self), n);
1993
1994 for (i = 0; i < n; i++) {
1995 int c = Py_CHARMASK(s[i]);
1996 if (isupper(c))
1997 s[i] = _tolower(c);
1998 }
1999
2000 return newobj;
2001 }
2002
2003 PyDoc_STRVAR(upper__doc__,
2004 "S.upper() -> string\n\
2005 \n\
2006 Return a copy of the string S converted to uppercase.");
2007
2008 #ifndef _toupper
2009 #define _toupper toupper
2010 #endif
2011
2012 static PyObject *
2013 string_upper(PyStringObject *self)
2014 {
2015 char *s;
2016 Py_ssize_t i, n = PyString_GET_SIZE(self);
2017 PyObject *newobj;
2018
2019 newobj = PyString_FromStringAndSize(NULL, n);
2020 if (!newobj)
2021 return NULL;
2022
2023 s = PyString_AS_STRING(newobj);
2024
2025 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2026
2027 for (i = 0; i < n; i++) {
2028 int c = Py_CHARMASK(s[i]);
2029 if (islower(c))
2030 s[i] = _toupper(c);
2031 }
2032
2033 return newobj;
2034 }
2035
2036 PyDoc_STRVAR(title__doc__,
2037 "S.title() -> string\n\
2038 \n\
2039 Return a titlecased version of S, i.e. words start with uppercase\n\
2040 characters, all remaining cased characters have lowercase.");
2041
2042 static PyObject*
2043 string_title(PyStringObject *self)
2044 {
2045 char *s = PyString_AS_STRING(self), *s_new;
2046 Py_ssize_t i, n = PyString_GET_SIZE(self);
2047 int previous_is_cased = 0;
2048 PyObject *newobj;
2049
2050 newobj = PyString_FromStringAndSize(NULL, n);
2051 if (newobj == NULL)
2052 return NULL;
2053 s_new = PyString_AsString(newobj);
2054 for (i = 0; i < n; i++) {
2055 int c = Py_CHARMASK(*s++);
2056 if (islower(c)) {
2057 if (!previous_is_cased)
2058 c = toupper(c);
2059 previous_is_cased = 1;
2060 } else if (isupper(c)) {
2061 if (previous_is_cased)
2062 c = tolower(c);
2063 previous_is_cased = 1;
2064 } else
2065 previous_is_cased = 0;
2066 *s_new++ = c;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
2067 }
2068 return newobj;
2069 }
2070
2071 PyDoc_STRVAR(capitalize__doc__,
2072 "S.capitalize() -> string\n\
2073 \n\
2074 Return a copy of the string S with only its first character\n\
2075 capitalized.");
2076
2077 static PyObject *
2078 string_capitalize(PyStringObject *self)
2079 {
2080 char *s = PyString_AS_STRING(self), *s_new;
2081 Py_ssize_t i, n = PyString_GET_SIZE(self);
2082 PyObject *newobj;
2083
2084 newobj = PyString_FromStringAndSize(NULL, n);
2085 if (newobj == NULL)
2086 return NULL;
2087 s_new = PyString_AsString(newobj);
2088 if (0 < n) {
2089 int c = Py_CHARMASK(*s++);
2090 if (islower(c))
2091 *s_new = toupper(c);
2092 else
2093 *s_new = c;
2094 s_new++;
2095 }
2096 for (i = 1; i < n; i++) {
2097 int c = Py_CHARMASK(*s++);
2098 if (isupper(c))
2099 *s_new = tolower(c);
2100 else
2101 *s_new = c;
2102 s_new++;
2103 }
2104 return newobj;
2105 }
2106
2107
2108 PyDoc_STRVAR(count__doc__,
2109 "S.count(sub[, start[, end]]) -> int\n\
2110 \n\
2111 Return the number of non-overlapping occurrences of substring sub in\n\
2112 string S[start:end]. Optional arguments start and end are interpreted\n\
2113 as in slice notation.");
2114
2115 static PyObject *
2116 string_count(PyStringObject *self, PyObject *args)
2117 {
2118 PyObject *sub_obj;
2119 const char *str = PyString_AS_STRING(self), *sub;
2120 Py_ssize_t sub_len;
2121 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2122
2123 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2124 return NULL;
2125
2126 if (PyString_Check(sub_obj)) {
2127 sub = PyString_AS_STRING(sub_obj);
2128 sub_len = PyString_GET_SIZE(sub_obj);
2129 }
2130 #ifdef Py_USING_UNICODE
2131 else if (PyUnicode_Check(sub_obj)) {
2132 Py_ssize_t count;
2133 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2134 if (count == -1)
2135 return NULL;
2136 else
2137 return PyInt_FromSsize_t(count);
2138 }
2139 #endif
2140 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2141 return NULL;
2142
2143 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2144
2145 return PyInt_FromSsize_t(
2146 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2147 );
2148 }
2149
2150 PyDoc_STRVAR(swapcase__doc__,
2151 "S.swapcase() -> string\n\
2152 \n\
2153 Return a copy of the string S with uppercase characters\n\
2154 converted to lowercase and vice versa.");
2155
2156 static PyObject *
2157 string_swapcase(PyStringObject *self)
2158 {
2159 char *s = PyString_AS_STRING(self), *s_new;
2160 Py_ssize_t i, n = PyString_GET_SIZE(self);
2161 PyObject *newobj;
2162
2163 newobj = PyString_FromStringAndSize(NULL, n);
2164 if (newobj == NULL)
2165 return NULL;
2166 s_new = PyString_AsString(newobj);
2167 for (i = 0; i < n; i++) {
2168 int c = Py_CHARMASK(*s++);
2169 if (islower(c)) {
2170 *s_new = toupper(c);
2171 }
2172 else if (isupper(c)) {
2173 *s_new = tolower(c);
2174 }
2175 else
2176 *s_new = c;
2177 s_new++;
2178 }
2179 return newobj;
2180 }
2181
2182
2183 PyDoc_STRVAR(translate__doc__,
2184 "S.translate(table [,deletechars]) -> string\n\
2185 \n\
2186 Return a copy of the string S, where all characters occurring\n\
2187 in the optional argument deletechars are removed, and the\n\
2188 remaining characters have been mapped through the given\n\
2189 translation table, which must be a string of length 256 or None.\n\
2190 If the table argument is None, no translation is applied and\n\
2191 the operation simply removes the characters in deletechars.");
2192
2193 static PyObject *
2194 string_translate(PyStringObject *self, PyObject *args)
2195 {
2196 register char *input, *output;
2197 const char *table;
2198 register Py_ssize_t i, c, changed = 0;
2199 PyObject *input_obj = (PyObject*)self;
2200 const char *output_start, *del_table=NULL;
2201 Py_ssize_t inlen, tablen, dellen = 0;
2202 PyObject *result;
2203 int trans_table[256];
2204 PyObject *tableobj, *delobj = NULL;
2205
2206 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2207 &tableobj, &delobj))
2208 return NULL;
2209
2210 if (PyString_Check(tableobj)) {
2211 table = PyString_AS_STRING(tableobj);
2212 tablen = PyString_GET_SIZE(tableobj);
2213 }
2214 else if (tableobj == Py_None) {
2215 table = NULL;
2216 tablen = 256;
2217 }
2218 #ifdef Py_USING_UNICODE
2219 else if (PyUnicode_Check(tableobj)) {
2220 /* Unicode .translate() does not support the deletechars
2221 parameter; instead a mapping to None will cause characters
2222 to be deleted. */
2223 if (delobj != NULL) {
2224 PyErr_SetString(PyExc_TypeError,
2225 "deletions are implemented differently for unicode");
2226 return NULL;
2227 }
2228 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2229 }
2230 #endif
2231 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2232 return NULL;
2233
2234 if (tablen != 256) {
2235 PyErr_SetString(PyExc_ValueError,
2236 "translation table must be 256 characters long");
2237 return NULL;
2238 }
2239
2240 if (delobj != NULL) {
2241 if (PyString_Check(delobj)) {
2242 del_table = PyString_AS_STRING(delobj);
2243 dellen = PyString_GET_SIZE(delobj);
2244 }
2245 #ifdef Py_USING_UNICODE
2246 else if (PyUnicode_Check(delobj)) {
2247 PyErr_SetString(PyExc_TypeError,
2248 "deletions are implemented differently for unicode");
2249 return NULL;
2250 }
2251 #endif
2252 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2253 return NULL;
2254 }
2255 else {
2256 del_table = NULL;
2257 dellen = 0;
2258 }
2259
2260 inlen = PyString_GET_SIZE(input_obj);
2261 result = PyString_FromStringAndSize((char *)NULL, inlen);
2262 if (result == NULL)
2263 return NULL;
2264 output_start = output = PyString_AsString(result);
2265 input = PyString_AS_STRING(input_obj);
2266
2267 if (dellen == 0 && table != NULL) {
2268 /* If no deletions are required, use faster code */
2269 for (i = inlen; --i >= 0; ) {
2270 c = Py_CHARMASK(*input++);
2271 if (Py_CHARMASK((*output++ = table[c])) != c)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
2272 changed = 1;
2273 }
2274 if (changed || !PyString_CheckExact(input_obj))
2275 return result;
2276 Py_DECREF(result);
2277 Py_INCREF(input_obj);
2278 return input_obj;
2279 }
2280
2281 if (table == NULL) {
2282 for (i = 0; i < 256; i++)
2283 trans_table[i] = Py_CHARMASK(i);
2284 } else {
2285 for (i = 0; i < 256; i++)
2286 trans_table[i] = Py_CHARMASK(table[i]);
2287 }
2288
2289 for (i = 0; i < dellen; i++)
2290 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2291
2292 for (i = inlen; --i >= 0; ) {
2293 c = Py_CHARMASK(*input++);
2294 if (trans_table[c] != -1)
2295 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2296 continue;
2297 changed = 1;
2298 }
2299 if (!changed && PyString_CheckExact(input_obj)) {
2300 Py_DECREF(result);
2301 Py_INCREF(input_obj);
2302 return input_obj;
2303 }
2304 /* Fix the size of the resulting string */
2305 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2306 return NULL;
2307 return result;
2308 }
2309
2310
2311 /* find and count characters and substrings */
2312
2313 #define findchar(target, target_len, c) \
2314 ((char *)memchr((const void *)(target), c, target_len))
2315
2316 /* String ops must return a string. */
2317 /* If the object is subclass of string, create a copy */
2318 Py_LOCAL(PyStringObject *)
2319 return_self(PyStringObject *self)
2320 {
2321 if (PyString_CheckExact(self)) {
2322 Py_INCREF(self);
2323 return self;
2324 }
2325 return (PyStringObject *)PyString_FromStringAndSize(
2326 PyString_AS_STRING(self),
2327 PyString_GET_SIZE(self));
2328 }
2329
2330 Py_LOCAL_INLINE(Py_ssize_t)
2331 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2332 {
2333 Py_ssize_t count=0;
2334 const char *start=target;
2335 const char *end=target+target_len;
2336
2337 while ( (start=findchar(start, end-start, c)) != NULL ) {
2338 count++;
2339 if (count >= maxcount)
2340 break;
2341 start += 1;
2342 }
2343 return count;
2344 }
2345
2346
2347 /* Algorithms for different cases of string replacement */
2348
2349 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2350 Py_LOCAL(PyStringObject *)
2351 replace_interleave(PyStringObject *self,
2352 const char *to_s, Py_ssize_t to_len,
2353 Py_ssize_t maxcount)
2354 {
2355 char *self_s, *result_s;
2356 Py_ssize_t self_len, result_len;
2357 Py_ssize_t count, i, product;
2358 PyStringObject *result;
2359
2360 self_len = PyString_GET_SIZE(self);
2361
2362 /* 1 at the end plus 1 after every character */
2363 count = self_len+1;
2364 if (maxcount < count)
2365 count = maxcount;
2366
2367 /* Check for overflow */
2368 /* result_len = count * to_len + self_len; */
2369 product = count * to_len;
2370 if (product / to_len != count) {
2371 PyErr_SetString(PyExc_OverflowError,
2372 "replace string is too long");
2373 return NULL;
2374 }
2375 result_len = product + self_len;
2376 if (result_len < 0) {
2377 PyErr_SetString(PyExc_OverflowError,
2378 "replace string is too long");
2379 return NULL;
2380 }
2381
2382 if (! (result = (PyStringObject *)
2383 PyString_FromStringAndSize(NULL, result_len)) )
2384 return NULL;
2385
2386 self_s = PyString_AS_STRING(self);
2387 result_s = PyString_AS_STRING(result);
2388
2389 /* TODO: special case single character, which doesn't need memcpy */
2390
2391 /* Lay the first one down (guaranteed this will occur) */
2392 Py_MEMCPY(result_s, to_s, to_len);
2393 result_s += to_len;
2394 count -= 1;
2395
2396 for (i=0; i<count; i++) {
2397 *result_s++ = *self_s++;
2398 Py_MEMCPY(result_s, to_s, to_len);
2399 result_s += to_len;
2400 }
2401
2402 /* Copy the rest of the original string */
2403 Py_MEMCPY(result_s, self_s, self_len-i);
2404
2405 return result;
2406 }
2407
2408 /* Special case for deleting a single character */
2409 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2410 Py_LOCAL(PyStringObject *)
2411 replace_delete_single_character(PyStringObject *self,
2412 char from_c, Py_ssize_t maxcount)
2413 {
2414 char *self_s, *result_s;
2415 char *start, *next, *end;
2416 Py_ssize_t self_len, result_len;
2417 Py_ssize_t count;
2418 PyStringObject *result;
2419
2420 self_len = PyString_GET_SIZE(self);
2421 self_s = PyString_AS_STRING(self);
2422
2423 count = countchar(self_s, self_len, from_c, maxcount);
2424 if (count == 0) {
2425 return return_self(self);
2426 }
2427
2428 result_len = self_len - count; /* from_len == 1 */
2429 assert(result_len>=0);
2430
2431 if ( (result = (PyStringObject *)
2432 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2433 return NULL;
2434 result_s = PyString_AS_STRING(result);
2435
2436 start = self_s;
2437 end = self_s + self_len;
2438 while (count-- > 0) {
2439 next = findchar(start, end-start, from_c);
2440 if (next == NULL)
2441 break;
2442 Py_MEMCPY(result_s, start, next-start);
2443 result_s += (next-start);
2444 start = next+1;
2445 }
2446 Py_MEMCPY(result_s, start, end-start);
2447
2448 return result;
2449 }
2450
2451 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2452
2453 Py_LOCAL(PyStringObject *)
2454 replace_delete_substring(PyStringObject *self,
2455 const char *from_s, Py_ssize_t from_len,
2456 Py_ssize_t maxcount) {
2457 char *self_s, *result_s;
2458 char *start, *next, *end;
2459 Py_ssize_t self_len, result_len;
2460 Py_ssize_t count, offset;
2461 PyStringObject *result;
2462
2463 self_len = PyString_GET_SIZE(self);
2464 self_s = PyString_AS_STRING(self);
2465
2466 count = stringlib_count(self_s, self_len,
2467 from_s, from_len,
2468 maxcount);
2469
2470 if (count == 0) {
2471 /* no matches */
2472 return return_self(self);
2473 }
2474
2475 result_len = self_len - (count * from_len);
2476 assert (result_len>=0);
2477
2478 if ( (result = (PyStringObject *)
2479 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2480 return NULL;
2481
2482 result_s = PyString_AS_STRING(result);
2483
2484 start = self_s;
2485 end = self_s + self_len;
2486 while (count-- > 0) {
2487 offset = stringlib_find(start, end-start,
2488 from_s, from_len,
2489 0);
2490 if (offset == -1)
2491 break;
2492 next = start + offset;
2493
2494 Py_MEMCPY(result_s, start, next-start);
2495
2496 result_s += (next-start);
2497 start = next+from_len;
2498 }
2499 Py_MEMCPY(result_s, start, end-start);
2500 return result;
2501 }
2502
2503 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2504 Py_LOCAL(PyStringObject *)
2505 replace_single_character_in_place(PyStringObject *self,
2506 char from_c, char to_c,
2507 Py_ssize_t maxcount)
2508 {
2509 char *self_s, *result_s, *start, *end, *next;
2510 Py_ssize_t self_len;
2511 PyStringObject *result;
2512
2513 /* The result string will be the same size */
2514 self_s = PyString_AS_STRING(self);
2515 self_len = PyString_GET_SIZE(self);
2516
2517 next = findchar(self_s, self_len, from_c);
2518
2519 if (next == NULL) {
2520 /* No matches; return the original string */
2521 return return_self(self);
2522 }
2523
2524 /* Need to make a new string */
2525 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2526 if (result == NULL)
2527 return NULL;
2528 result_s = PyString_AS_STRING(result);
2529 Py_MEMCPY(result_s, self_s, self_len);
2530
2531 /* change everything in-place, starting with this one */
2532 start = result_s + (next-self_s);
2533 *start = to_c;
2534 start++;
2535 end = result_s + self_len;
2536
2537 while (--maxcount > 0) {
2538 next = findchar(start, end-start, from_c);
2539 if (next == NULL)
2540 break;
2541 *next = to_c;
2542 start = next+1;
2543 }
2544
2545 return result;
2546 }
2547
2548 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2549 Py_LOCAL(PyStringObject *)
2550 replace_substring_in_place(PyStringObject *self,
2551 const char *from_s, Py_ssize_t from_len,
2552 const char *to_s, Py_ssize_t to_len,
2553 Py_ssize_t maxcount)
2554 {
2555 char *result_s, *start, *end;
2556 char *self_s;
2557 Py_ssize_t self_len, offset;
2558 PyStringObject *result;
2559
2560 /* The result string will be the same size */
2561
2562 self_s = PyString_AS_STRING(self);
2563 self_len = PyString_GET_SIZE(self);
2564
2565 offset = stringlib_find(self_s, self_len,
2566 from_s, from_len,
2567 0);
2568 if (offset == -1) {
2569 /* No matches; return the original string */
2570 return return_self(self);
2571 }
2572
2573 /* Need to make a new string */
2574 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2575 if (result == NULL)
2576 return NULL;
2577 result_s = PyString_AS_STRING(result);
2578 Py_MEMCPY(result_s, self_s, self_len);
2579
2580 /* change everything in-place, starting with this one */
2581 start = result_s + offset;
2582 Py_MEMCPY(start, to_s, from_len);
2583 start += from_len;
2584 end = result_s + self_len;
2585
2586 while ( --maxcount > 0) {
2587 offset = stringlib_find(start, end-start,
2588 from_s, from_len,
2589 0);
2590 if (offset==-1)
2591 break;
2592 Py_MEMCPY(start+offset, to_s, from_len);
2593 start += offset+from_len;
2594 }
2595
2596 return result;
2597 }
2598
2599 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2600 Py_LOCAL(PyStringObject *)
2601 replace_single_character(PyStringObject *self,
2602 char from_c,
2603 const char *to_s, Py_ssize_t to_len,
2604 Py_ssize_t maxcount)
2605 {
2606 char *self_s, *result_s;
2607 char *start, *next, *end;
2608 Py_ssize_t self_len, result_len;
2609 Py_ssize_t count, product;
2610 PyStringObject *result;
2611
2612 self_s = PyString_AS_STRING(self);
2613 self_len = PyString_GET_SIZE(self);
2614
2615 count = countchar(self_s, self_len, from_c, maxcount);
2616 if (count == 0) {
2617 /* no matches, return unchanged */
2618 return return_self(self);
2619 }
2620
2621 /* use the difference between current and new, hence the "-1" */
2622 /* result_len = self_len + count * (to_len-1) */
2623 product = count * (to_len-1);
2624 if (product / (to_len-1) != count) {
2625 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2626 return NULL;
2627 }
2628 result_len = self_len + product;
2629 if (result_len < 0) {
2630 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2631 return NULL;
2632 }
2633
2634 if ( (result = (PyStringObject *)
2635 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2636 return NULL;
2637 result_s = PyString_AS_STRING(result);
2638
2639 start = self_s;
2640 end = self_s + self_len;
2641 while (count-- > 0) {
2642 next = findchar(start, end-start, from_c);
2643 if (next == NULL)
2644 break;
2645
2646 if (next == start) {
2647 /* replace with the 'to' */
2648 Py_MEMCPY(result_s, to_s, to_len);
2649 result_s += to_len;
2650 start += 1;
2651 } else {
2652 /* copy the unchanged old then the 'to' */
2653 Py_MEMCPY(result_s, start, next-start);
2654 result_s += (next-start);
2655 Py_MEMCPY(result_s, to_s, to_len);
2656 result_s += to_len;
2657 start = next+1;
2658 }
2659 }
2660 /* Copy the remainder of the remaining string */
2661 Py_MEMCPY(result_s, start, end-start);
2662
2663 return result;
2664 }
2665
2666 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2667 Py_LOCAL(PyStringObject *)
2668 replace_substring(PyStringObject *self,
2669 const char *from_s, Py_ssize_t from_len,
2670 const char *to_s, Py_ssize_t to_len,
2671 Py_ssize_t maxcount) {
2672 char *self_s, *result_s;
2673 char *start, *next, *end;
2674 Py_ssize_t self_len, result_len;
2675 Py_ssize_t count, offset, product;
2676 PyStringObject *result;
2677
2678 self_s = PyString_AS_STRING(self);
2679 self_len = PyString_GET_SIZE(self);
2680
2681 count = stringlib_count(self_s, self_len,
2682 from_s, from_len,
2683 maxcount);
2684
2685 if (count == 0) {
2686 /* no matches, return unchanged */
2687 return return_self(self);
2688 }
2689
2690 /* Check for overflow */
2691 /* result_len = self_len + count * (to_len-from_len) */
2692 product = count * (to_len-from_len);
2693 if (product / (to_len-from_len) != count) {
2694 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2695 return NULL;
2696 }
2697 result_len = self_len + product;
2698 if (result_len < 0) {
2699 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2700 return NULL;
2701 }
2702
2703 if ( (result = (PyStringObject *)
2704 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2705 return NULL;
2706 result_s = PyString_AS_STRING(result);
2707
2708 start = self_s;
2709 end = self_s + self_len;
2710 while (count-- > 0) {
2711 offset = stringlib_find(start, end-start,
2712 from_s, from_len,
2713 0);
2714 if (offset == -1)
2715 break;
2716 next = start+offset;
2717 if (next == start) {
2718 /* replace with the 'to' */
2719 Py_MEMCPY(result_s, to_s, to_len);
2720 result_s += to_len;
2721 start += from_len;
2722 } else {
2723 /* copy the unchanged old then the 'to' */
2724 Py_MEMCPY(result_s, start, next-start);
2725 result_s += (next-start);
2726 Py_MEMCPY(result_s, to_s, to_len);
2727 result_s += to_len;
2728 start = next+from_len;
2729 }
2730 }
2731 /* Copy the remainder of the remaining string */
2732 Py_MEMCPY(result_s, start, end-start);
2733
2734 return result;
2735 }
2736
2737
2738 Py_LOCAL(PyStringObject *)
2739 replace(PyStringObject *self,
2740 const char *from_s, Py_ssize_t from_len,
2741 const char *to_s, Py_ssize_t to_len,
2742 Py_ssize_t maxcount)
2743 {
2744 if (maxcount < 0) {
2745 maxcount = PY_SSIZE_T_MAX;
2746 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2747 /* nothing to do; return the original string */
2748 return return_self(self);
2749 }
2750
2751 if (maxcount == 0 ||
2752 (from_len == 0 && to_len == 0)) {
2753 /* nothing to do; return the original string */
2754 return return_self(self);
2755 }
2756
2757 /* Handle zero-length special cases */
2758
2759 if (from_len == 0) {
2760 /* insert the 'to' string everywhere. */
2761 /* >>> "Python".replace("", ".") */
2762 /* '.P.y.t.h.o.n.' */
2763 return replace_interleave(self, to_s, to_len, maxcount);
2764 }
2765
2766 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2767 /* point for an empty self string to generate a non-empty string */
2768 /* Special case so the remaining code always gets a non-empty string */
2769 if (PyString_GET_SIZE(self) == 0) {
2770 return return_self(self);
2771 }
2772
2773 if (to_len == 0) {
2774 /* delete all occurances of 'from' string */
2775 if (from_len == 1) {
2776 return replace_delete_single_character(
2777 self, from_s[0], maxcount);
2778 } else {
2779 return replace_delete_substring(self, from_s, from_len, maxcount);
2780 }
2781 }
2782
2783 /* Handle special case where both strings have the same length */
2784
2785 if (from_len == to_len) {
2786 if (from_len == 1) {
2787 return replace_single_character_in_place(
2788 self,
2789 from_s[0],
2790 to_s[0],
2791 maxcount);
2792 } else {
2793 return replace_substring_in_place(
2794 self, from_s, from_len, to_s, to_len, maxcount);
2795 }
2796 }
2797
2798 /* Otherwise use the more generic algorithms */
2799 if (from_len == 1) {
2800 return replace_single_character(self, from_s[0],
2801 to_s, to_len, maxcount);
2802 } else {
2803 /* len('from')>=2, len('to')>=1 */
2804 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2805 }
2806 }
2807
2808 PyDoc_STRVAR(replace__doc__,
2809 "S.replace(old, new[, count]) -> string\n\
2810 \n\
2811 Return a copy of string S with all occurrences of substring\n\
2812 old replaced by new. If the optional argument count is\n\
2813 given, only the first count occurrences are replaced.");
2814
2815 static PyObject *
2816 string_replace(PyStringObject *self, PyObject *args)
2817 {
2818 Py_ssize_t count = -1;
2819 PyObject *from, *to;
2820 const char *from_s, *to_s;
2821 Py_ssize_t from_len, to_len;
2822
2823 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2824 return NULL;
2825
2826 if (PyString_Check(from)) {
2827 from_s = PyString_AS_STRING(from);
2828 from_len = PyString_GET_SIZE(from);
2829 }
2830 #ifdef Py_USING_UNICODE
2831 if (PyUnicode_Check(from))
2832 return PyUnicode_Replace((PyObject *)self,
2833 from, to, count);
2834 #endif
2835 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2836 return NULL;
2837
2838 if (PyString_Check(to)) {
2839 to_s = PyString_AS_STRING(to);
2840 to_len = PyString_GET_SIZE(to);
2841 }
2842 #ifdef Py_USING_UNICODE
2843 else if (PyUnicode_Check(to))
2844 return PyUnicode_Replace((PyObject *)self,
2845 from, to, count);
2846 #endif
2847 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2848 return NULL;
2849
2850 return (PyObject *)replace((PyStringObject *) self,
2851 from_s, from_len,
2852 to_s, to_len, count);
2853 }
2854
2855 /** End DALKE **/
2856
2857 /* Matches the end (direction >= 0) or start (direction < 0) of self
2858 * against substr, using the start and end arguments. Returns
2859 * -1 on error, 0 if not found and 1 if found.
2860 */
2861 Py_LOCAL(int)
2862 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2863 Py_ssize_t end, int direction)
2864 {
2865 Py_ssize_t len = PyString_GET_SIZE(self);
2866 Py_ssize_t slen;
2867 const char* sub;
2868 const char* str;
2869
2870 if (PyString_Check(substr)) {
2871 sub = PyString_AS_STRING(substr);
2872 slen = PyString_GET_SIZE(substr);
2873 }
2874 #ifdef Py_USING_UNICODE
2875 else if (PyUnicode_Check(substr))
2876 return PyUnicode_Tailmatch((PyObject *)self,
2877 substr, start, end, direction);
2878 #endif
2879 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2880 return -1;
2881 str = PyString_AS_STRING(self);
2882
2883 ADJUST_INDICES(start, end, len);
2884
2885 if (direction < 0) {
2886 /* startswith */
2887 if (start+slen > len)
2888 return 0;
2889 } else {
2890 /* endswith */
2891 if (end-start < slen || start > len)
2892 return 0;
2893
2894 if (end-slen > start)
2895 start = end - slen;
2896 }
2897 if (end-start >= slen)
2898 return ! memcmp(str+start, sub, slen);
2899 return 0;
2900 }
2901
2902
2903 PyDoc_STRVAR(startswith__doc__,
2904 "S.startswith(prefix[, start[, end]]) -> bool\n\
2905 \n\
2906 Return True if S starts with the specified prefix, False otherwise.\n\
2907 With optional start, test S beginning at that position.\n\
2908 With optional end, stop comparing S at that position.\n\
2909 prefix can also be a tuple of strings to try.");
2910
2911 static PyObject *
2912 string_startswith(PyStringObject *self, PyObject *args)
2913 {
2914 Py_ssize_t start = 0;
2915 Py_ssize_t end = PY_SSIZE_T_MAX;
2916 PyObject *subobj;
2917 int result;
2918
2919 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2920 return NULL;
2921 if (PyTuple_Check(subobj)) {
2922 Py_ssize_t i;
2923 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2924 result = _string_tailmatch(self,
2925 PyTuple_GET_ITEM(subobj, i),
2926 start, end, -1);
2927 if (result == -1)
2928 return NULL;
2929 else if (result) {
2930 Py_RETURN_TRUE;
2931 }
2932 }
2933 Py_RETURN_FALSE;
2934 }
2935 result = _string_tailmatch(self, subobj, start, end, -1);
2936 if (result == -1) {
2937 if (PyErr_ExceptionMatches(PyExc_TypeError))
2938 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2939 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2940 return NULL;
2941 }
2942 else
2943 return PyBool_FromLong(result);
2944 }
2945
2946
2947 PyDoc_STRVAR(endswith__doc__,
2948 "S.endswith(suffix[, start[, end]]) -> bool\n\
2949 \n\
2950 Return True if S ends with the specified suffix, False otherwise.\n\
2951 With optional start, test S beginning at that position.\n\
2952 With optional end, stop comparing S at that position.\n\
2953 suffix can also be a tuple of strings to try.");
2954
2955 static PyObject *
2956 string_endswith(PyStringObject *self, PyObject *args)
2957 {
2958 Py_ssize_t start = 0;
2959 Py_ssize_t end = PY_SSIZE_T_MAX;
2960 PyObject *subobj;
2961 int result;
2962
2963 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2964 return NULL;
2965 if (PyTuple_Check(subobj)) {
2966 Py_ssize_t i;
2967 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2968 result = _string_tailmatch(self,
2969 PyTuple_GET_ITEM(subobj, i),
2970 start, end, +1);
2971 if (result == -1)
2972 return NULL;
2973 else if (result) {
2974 Py_RETURN_TRUE;
2975 }
2976 }
2977 Py_RETURN_FALSE;
2978 }
2979 result = _string_tailmatch(self, subobj, start, end, +1);
2980 if (result == -1) {
2981 if (PyErr_ExceptionMatches(PyExc_TypeError))
2982 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2983 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2984 return NULL;
2985 }
2986 else
2987 return PyBool_FromLong(result);
2988 }
2989
2990
2991 PyDoc_STRVAR(encode__doc__,
2992 "S.encode([encoding[,errors]]) -> object\n\
2993 \n\
2994 Encodes S using the codec registered for encoding. encoding defaults\n\
2995 to the default encoding. errors may be given to set a different error\n\
2996 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2997 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2998 'xmlcharrefreplace' as well as any other name registered with\n\
2999 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3000
3001 static PyObject *
3002 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3003 {
3004 static char *kwlist[] = {"encoding", "errors", 0};
3005 char *encoding = NULL;
3006 char *errors = NULL;
3007 PyObject *v;
3008
3009 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3010 kwlist, &encoding, &errors))
3011 return NULL;
3012 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3013 if (v == NULL)
3014 goto onError;
3015 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3016 PyErr_Format(PyExc_TypeError,
3017 "encoder did not return a string/unicode object "
3018 "(type=%.400s)",
3019 Py_TYPE(v)->tp_name);
3020 Py_DECREF(v);
3021 return NULL;
3022 }
3023 return v;
3024
3025 onError:
3026 return NULL;
3027 }
3028
3029
3030 PyDoc_STRVAR(decode__doc__,
3031 "S.decode([encoding[,errors]]) -> object\n\
3032 \n\
3033 Decodes S using the codec registered for encoding. encoding defaults\n\
3034 to the default encoding. errors may be given to set a different error\n\
3035 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3036 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3037 as well as any other name registered with codecs.register_error that is\n\
3038 able to handle UnicodeDecodeErrors.");
3039
3040 static PyObject *
3041 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3042 {
3043 static char *kwlist[] = {"encoding", "errors", 0};
3044 char *encoding = NULL;
3045 char *errors = NULL;
3046 PyObject *v;
3047
3048 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3049 kwlist, &encoding, &errors))
3050 return NULL;
3051 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3052 if (v == NULL)
3053 goto onError;
3054 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3055 PyErr_Format(PyExc_TypeError,
3056 "decoder did not return a string/unicode object "
3057 "(type=%.400s)",
3058 Py_TYPE(v)->tp_name);
3059 Py_DECREF(v);
3060 return NULL;
3061 }
3062 return v;
3063
3064 onError:
3065 return NULL;
3066 }
3067
3068
3069 PyDoc_STRVAR(expandtabs__doc__,
3070 "S.expandtabs([tabsize]) -> string\n\
3071 \n\
3072 Return a copy of S where all tab characters are expanded using spaces.\n\
3073 If tabsize is not given, a tab size of 8 characters is assumed.");
3074
3075 static PyObject*
3076 string_expandtabs(PyStringObject *self, PyObject *args)
3077 {
3078 const char *e, *p, *qe;
3079 char *q;
3080 Py_ssize_t i, j, incr;
3081 PyObject *u;
3082 int tabsize = 8;
3083
3084 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3085 return NULL;
3086
3087 /* First pass: determine size of output string */
3088 i = 0; /* chars up to and including most recent \n or \r */
3089 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3090 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3091 for (p = PyString_AS_STRING(self); p < e; p++)
3092 if (*p == '\t') {
3093 if (tabsize > 0) {
3094 incr = tabsize - (j % tabsize);
3095 if (j > PY_SSIZE_T_MAX - incr)
3096 goto overflow1;
3097 j += incr;
3098 }
3099 }
3100 else {
3101 if (j > PY_SSIZE_T_MAX - 1)
3102 goto overflow1;
3103 j++;
3104 if (*p == '\n' || *p == '\r') {
3105 if (i > PY_SSIZE_T_MAX - j)
3106 goto overflow1;
3107 i += j;
3108 j = 0;
3109 }
3110 }
3111
3112 if (i > PY_SSIZE_T_MAX - j)
3113 goto overflow1;
3114
3115 /* Second pass: create output string and fill it */
3116 u = PyString_FromStringAndSize(NULL, i + j);
3117 if (!u)
3118 return NULL;
3119
3120 j = 0; /* same as in first pass */
3121 q = PyString_AS_STRING(u); /* next output char */
3122 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3123
3124 for (p = PyString_AS_STRING(self); p < e; p++)
3125 if (*p == '\t') {
3126 if (tabsize > 0) {
3127 i = tabsize - (j % tabsize);
3128 j += i;
3129 while (i--) {
3130 if (q >= qe)
3131 goto overflow2;
3132 *q++ = ' ';
3133 }
3134 }
3135 }
3136 else {
3137 if (q >= qe)
3138 goto overflow2;
3139 *q++ = *p;
3140 j++;
3141 if (*p == '\n' || *p == '\r')
3142 j = 0;
3143 }
3144
3145 return u;
3146
3147 overflow2:
3148 Py_DECREF(u);
3149 overflow1:
3150 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3151 return NULL;
3152 }
3153
3154 Py_LOCAL_INLINE(PyObject *)
3155 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3156 {
3157 PyObject *u;
3158
3159 if (left < 0)
3160 left = 0;
3161 if (right < 0)
3162 right = 0;
3163
3164 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3165 Py_INCREF(self);
3166 return (PyObject *)self;
3167 }
3168
3169 u = PyString_FromStringAndSize(NULL,
3170 left + PyString_GET_SIZE(self) + right);
3171 if (u) {
3172 if (left)
3173 memset(PyString_AS_STRING(u), fill, left);
3174 Py_MEMCPY(PyString_AS_STRING(u) + left,
3175 PyString_AS_STRING(self),
3176 PyString_GET_SIZE(self));
3177 if (right)
3178 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3179 fill, right);
3180 }
3181
3182 return u;
3183 }
3184
3185 PyDoc_STRVAR(ljust__doc__,
3186 "S.ljust(width[, fillchar]) -> string\n"
3187 "\n"
3188 "Return S left-justified in a string of length width. Padding is\n"
3189 "done using the specified fill character (default is a space).");
3190
3191 static PyObject *
3192 string_ljust(PyStringObject *self, PyObject *args)
3193 {
3194 Py_ssize_t width;
3195 char fillchar = ' ';
3196
3197 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3198 return NULL;
3199
3200 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3201 Py_INCREF(self);
3202 return (PyObject*) self;
3203 }
3204
3205 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3206 }
3207
3208
3209 PyDoc_STRVAR(rjust__doc__,
3210 "S.rjust(width[, fillchar]) -> string\n"
3211 "\n"
3212 "Return S right-justified in a string of length width. Padding is\n"
3213 "done using the specified fill character (default is a space)");
3214
3215 static PyObject *
3216 string_rjust(PyStringObject *self, PyObject *args)
3217 {
3218 Py_ssize_t width;
3219 char fillchar = ' ';
3220
3221 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3222 return NULL;
3223
3224 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3225 Py_INCREF(self);
3226 return (PyObject*) self;
3227 }
3228
3229 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3230 }
3231
3232
3233 PyDoc_STRVAR(center__doc__,
3234 "S.center(width[, fillchar]) -> string\n"
3235 "\n"
3236 "Return S centered in a string of length width. Padding is\n"
3237 "done using the specified fill character (default is a space)");
3238
3239 static PyObject *
3240 string_center(PyStringObject *self, PyObject *args)
3241 {
3242 Py_ssize_t marg, left;
3243 Py_ssize_t width;
3244 char fillchar = ' ';
3245
3246 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3247 return NULL;
3248
3249 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3250 Py_INCREF(self);
3251 return (PyObject*) self;
3252 }
3253
3254 marg = width - PyString_GET_SIZE(self);
3255 left = marg / 2 + (marg & width & 1);
3256
3257 return pad(self, left, marg - left, fillchar);
3258 }
3259
3260 PyDoc_STRVAR(zfill__doc__,
3261 "S.zfill(width) -> string\n"
3262 "\n"
3263 "Pad a numeric string S with zeros on the left, to fill a field\n"
3264 "of the specified width. The string S is never truncated.");
3265
3266 static PyObject *
3267 string_zfill(PyStringObject *self, PyObject *args)
3268 {
3269 Py_ssize_t fill;
3270 PyObject *s;
3271 char *p;
3272 Py_ssize_t width;
3273
3274 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3275 return NULL;
3276
3277 if (PyString_GET_SIZE(self) >= width) {
3278 if (PyString_CheckExact(self)) {
3279 Py_INCREF(self);
3280 return (PyObject*) self;
3281 }
3282 else
3283 return PyString_FromStringAndSize(
3284 PyString_AS_STRING(self),
3285 PyString_GET_SIZE(self)
3286 );
3287 }
3288
3289 fill = width - PyString_GET_SIZE(self);
3290
3291 s = pad(self, fill, 0, '0');
3292
3293 if (s == NULL)
3294 return NULL;
3295
3296 p = PyString_AS_STRING(s);
3297 if (p[fill] == '+' || p[fill] == '-') {
3298 /* move sign to beginning of string */
3299 p[0] = p[fill];
3300 p[fill] = '0';
3301 }
3302
3303 return (PyObject*) s;
3304 }
3305
3306 PyDoc_STRVAR(isspace__doc__,
3307 "S.isspace() -> bool\n\
3308 \n\
3309 Return True if all characters in S are whitespace\n\
3310 and there is at least one character in S, False otherwise.");
3311
3312 static PyObject*
3313 string_isspace(PyStringObject *self)
3314 {
3315 register const unsigned char *p
3316 = (unsigned char *) PyString_AS_STRING(self);
3317 register const unsigned char *e;
3318
3319 /* Shortcut for single character strings */
3320 if (PyString_GET_SIZE(self) == 1 &&
3321 isspace(*p))
3322 return PyBool_FromLong(1);
3323
3324 /* Special case for empty strings */
3325 if (PyString_GET_SIZE(self) == 0)
3326 return PyBool_FromLong(0);
3327
3328 e = p + PyString_GET_SIZE(self);
3329 for (; p < e; p++) {
3330 if (!isspace(*p))
3331 return PyBool_FromLong(0);
3332 }
3333 return PyBool_FromLong(1);
3334 }
3335
3336
3337 PyDoc_STRVAR(isalpha__doc__,
3338 "S.isalpha() -> bool\n\
3339 \n\
3340 Return True if all characters in S are alphabetic\n\
3341 and there is at least one character in S, False otherwise.");
3342
3343 static PyObject*
3344 string_isalpha(PyStringObject *self)
3345 {
3346 register const unsigned char *p
3347 = (unsigned char *) PyString_AS_STRING(self);
3348 register const unsigned char *e;
3349
3350 /* Shortcut for single character strings */
3351 if (PyString_GET_SIZE(self) == 1 &&
3352 isalpha(*p))
3353 return PyBool_FromLong(1);
3354
3355 /* Special case for empty strings */
3356 if (PyString_GET_SIZE(self) == 0)
3357 return PyBool_FromLong(0);
3358
3359 e = p + PyString_GET_SIZE(self);
3360 for (; p < e; p++) {
3361 if (!isalpha(*p))
3362 return PyBool_FromLong(0);
3363 }
3364 return PyBool_FromLong(1);
3365 }
3366
3367
3368 PyDoc_STRVAR(isalnum__doc__,
3369 "S.isalnum() -> bool\n\
3370 \n\
3371 Return True if all characters in S are alphanumeric\n\
3372 and there is at least one character in S, False otherwise.");
3373
3374 static PyObject*
3375 string_isalnum(PyStringObject *self)
3376 {
3377 register const unsigned char *p
3378 = (unsigned char *) PyString_AS_STRING(self);
3379 register const unsigned char *e;
3380
3381 /* Shortcut for single character strings */
3382 if (PyString_GET_SIZE(self) == 1 &&
3383 isalnum(*p))
3384 return PyBool_FromLong(1);
3385
3386 /* Special case for empty strings */
3387 if (PyString_GET_SIZE(self) == 0)
3388 return PyBool_FromLong(0);
3389
3390 e = p + PyString_GET_SIZE(self);
3391 for (; p < e; p++) {
3392 if (!isalnum(*p))
3393 return PyBool_FromLong(0);
3394 }
3395 return PyBool_FromLong(1);
3396 }
3397
3398
3399 PyDoc_STRVAR(isdigit__doc__,
3400 "S.isdigit() -> bool\n\
3401 \n\
3402 Return True if all characters in S are digits\n\
3403 and there is at least one character in S, False otherwise.");
3404
3405 static PyObject*
3406 string_isdigit(PyStringObject *self)
3407 {
3408 register const unsigned char *p
3409 = (unsigned char *) PyString_AS_STRING(self);
3410 register const unsigned char *e;
3411
3412 /* Shortcut for single character strings */
3413 if (PyString_GET_SIZE(self) == 1 &&
3414 isdigit(*p))
3415 return PyBool_FromLong(1);
3416
3417 /* Special case for empty strings */
3418 if (PyString_GET_SIZE(self) == 0)
3419 return PyBool_FromLong(0);
3420
3421 e = p + PyString_GET_SIZE(self);
3422 for (; p < e; p++) {
3423 if (!isdigit(*p))
3424 return PyBool_FromLong(0);
3425 }
3426 return PyBool_FromLong(1);
3427 }
3428
3429
3430 PyDoc_STRVAR(islower__doc__,
3431 "S.islower() -> bool\n\
3432 \n\
3433 Return True if all cased characters in S are lowercase and there is\n\
3434 at least one cased character in S, False otherwise.");
3435
3436 static PyObject*
3437 string_islower(PyStringObject *self)
3438 {
3439 register const unsigned char *p
3440 = (unsigned char *) PyString_AS_STRING(self);
3441 register const unsigned char *e;
3442 int cased;
3443
3444 /* Shortcut for single character strings */
3445 if (PyString_GET_SIZE(self) == 1)
3446 return PyBool_FromLong(islower(*p) != 0);
3447
3448 /* Special case for empty strings */
3449 if (PyString_GET_SIZE(self) == 0)
3450 return PyBool_FromLong(0);
3451
3452 e = p + PyString_GET_SIZE(self);
3453 cased = 0;
3454 for (; p < e; p++) {
3455 if (isupper(*p))
3456 return PyBool_FromLong(0);
3457 else if (!cased && islower(*p))
3458 cased = 1;
3459 }
3460 return PyBool_FromLong(cased);
3461 }
3462
3463
3464 PyDoc_STRVAR(isupper__doc__,
3465 "S.isupper() -> bool\n\
3466 \n\
3467 Return True if all cased characters in S are uppercase and there is\n\
3468 at least one cased character in S, False otherwise.");
3469
3470 static PyObject*
3471 string_isupper(PyStringObject *self)
3472 {
3473 register const unsigned char *p
3474 = (unsigned char *) PyString_AS_STRING(self);
3475 register const unsigned char *e;
3476 int cased;
3477
3478 /* Shortcut for single character strings */
3479 if (PyString_GET_SIZE(self) == 1)
3480 return PyBool_FromLong(isupper(*p) != 0);
3481
3482 /* Special case for empty strings */
3483 if (PyString_GET_SIZE(self) == 0)
3484 return PyBool_FromLong(0);
3485
3486 e = p + PyString_GET_SIZE(self);
3487 cased = 0;
3488 for (; p < e; p++) {
3489 if (islower(*p))
3490 return PyBool_FromLong(0);
3491 else if (!cased && isupper(*p))
3492 cased = 1;
3493 }
3494 return PyBool_FromLong(cased);
3495 }
3496
3497
3498 PyDoc_STRVAR(istitle__doc__,
3499 "S.istitle() -> bool\n\
3500 \n\
3501 Return True if S is a titlecased string and there is at least one\n\
3502 character in S, i.e. uppercase characters may only follow uncased\n\
3503 characters and lowercase characters only cased ones. Return False\n\
3504 otherwise.");
3505
3506 static PyObject*
3507 string_istitle(PyStringObject *self, PyObject *uncased)
3508 {
3509 register const unsigned char *p
3510 = (unsigned char *) PyString_AS_STRING(self);
3511 register const unsigned char *e;
3512 int cased, previous_is_cased;
3513
3514 /* Shortcut for single character strings */
3515 if (PyString_GET_SIZE(self) == 1)
3516 return PyBool_FromLong(isupper(*p) != 0);
3517
3518 /* Special case for empty strings */
3519 if (PyString_GET_SIZE(self) == 0)
3520 return PyBool_FromLong(0);
3521
3522 e = p + PyString_GET_SIZE(self);
3523 cased = 0;
3524 previous_is_cased = 0;
3525 for (; p < e; p++) {
3526 register const unsigned char ch = *p;
3527
3528 if (isupper(ch)) {
3529 if (previous_is_cased)
3530 return PyBool_FromLong(0);
3531 previous_is_cased = 1;
3532 cased = 1;
3533 }
3534 else if (islower(ch)) {
3535 if (!previous_is_cased)
3536 return PyBool_FromLong(0);
3537 previous_is_cased = 1;
3538 cased = 1;
3539 }
3540 else
3541 previous_is_cased = 0;
3542 }
3543 return PyBool_FromLong(cased);
3544 }
3545
3546
3547 PyDoc_STRVAR(splitlines__doc__,
3548 "S.splitlines([keepends]) -> list of strings\n\
3549 \n\
3550 Return a list of the lines in S, breaking at line boundaries.\n\
3551 Line breaks are not included in the resulting list unless keepends\n\
3552 is given and true.");
3553
3554 static PyObject*
3555 string_splitlines(PyStringObject *self, PyObject *args)
3556 {
3557 int keepends = 0;
3558
3559 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3560 return NULL;
3561
3562 return stringlib_splitlines(
3563 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3564 keepends
3565 );
3566 }
3567
3568 PyDoc_STRVAR(sizeof__doc__,
3569 "S.__sizeof__() -> size of S in memory, in bytes");
3570
3571 static PyObject *
3572 string_sizeof(PyStringObject *v)
3573 {
3574 Py_ssize_t res;
3575 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3576 return PyInt_FromSsize_t(res);
3577 }
3578
3579 static PyObject *
3580 string_getnewargs(PyStringObject *v)
3581 {
3582 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3583 }
3584
3585
3586 #include "stringlib/string_format.h"
3587
3588 PyDoc_STRVAR(format__doc__,
3589 "S.format(*args, **kwargs) -> string\n\
3590 \n\
3591 Return a formatted version of S, using substitutions from args and kwargs.\n\
3592 The substitutions are identified by braces ('{' and '}').");
3593
3594 static PyObject *
3595 string__format__(PyObject* self, PyObject* args)
3596 {
3597 PyObject *format_spec;
3598 PyObject *result = NULL;
3599 PyObject *tmp = NULL;
3600
3601 /* If 2.x, convert format_spec to the same type as value */
3602 /* This is to allow things like u''.format('') */
3603 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3604 goto done;
3605 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3606 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3607 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3608 goto done;
3609 }
3610 tmp = PyObject_Str(format_spec);
3611 if (tmp == NULL)
3612 goto done;
3613 format_spec = tmp;
3614
3615 result = _PyBytes_FormatAdvanced(self,
3616 PyString_AS_STRING(format_spec),
3617 PyString_GET_SIZE(format_spec));
3618 done:
3619 Py_XDECREF(tmp);
3620 return result;
3621 }
3622
3623 PyDoc_STRVAR(p_format__doc__,
3624 "S.__format__(format_spec) -> string\n\
3625 \n\
3626 Return a formatted version of S as described by format_spec.");
3627
3628
3629 static PyMethodDef
3630 string_methods[] = {
3631 /* Counterparts of the obsolete stropmodule functions; except
3632 string.maketrans(). */
3633 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3634 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3635 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3636 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3637 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3638 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3639 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3640 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3641 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3642 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3643 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3644 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3645 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3646 capitalize__doc__},
3647 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3648 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3649 endswith__doc__},
3650 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3651 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3652 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3653 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3654 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3655 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3656 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3657 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3658 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3659 rpartition__doc__},
3660 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3661 startswith__doc__},
3662 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3663 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3664 swapcase__doc__},
3665 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3666 translate__doc__},
3667 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3668 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3669 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3670 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3671 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3672 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3673 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3674 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3675 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3676 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3677 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3678 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3679 expandtabs__doc__},
3680 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3681 splitlines__doc__},
3682 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3683 sizeof__doc__},
3684 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3685 {NULL, NULL} /* sentinel */
3686 };
3687
3688 static PyObject *
3689 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3690
3691 static PyObject *
3692 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3693 {
3694 PyObject *x = NULL;
3695 static char *kwlist[] = {"object", 0};
3696
3697 if (type != &PyString_Type)
3698 return str_subtype_new(type, args, kwds);
3699 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3700 return NULL;
3701 if (x == NULL)
3702 return PyString_FromString("");
3703 return PyObject_Str(x);
3704 }
3705
3706 static PyObject *
3707 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3708 {
3709 PyObject *tmp, *pnew;
3710 Py_ssize_t n;
3711
3712 assert(PyType_IsSubtype(type, &PyString_Type));
3713 tmp = string_new(&PyString_Type, args, kwds);
3714 if (tmp == NULL)
3715 return NULL;
3716 assert(PyString_CheckExact(tmp));
3717 n = PyString_GET_SIZE(tmp);
3718 pnew = type->tp_alloc(type, n);
3719 if (pnew != NULL) {
3720 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3721 ((PyStringObject *)pnew)->ob_shash =
3722 ((PyStringObject *)tmp)->ob_shash;
3723 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3724 }
3725 Py_DECREF(tmp);
3726 return pnew;
3727 }
3728
3729 static PyObject *
3730 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3731 {
3732 PyErr_SetString(PyExc_TypeError,
3733 "The basestring type cannot be instantiated");
3734 return NULL;
3735 }
3736
3737 static PyObject *
3738 string_mod(PyObject *v, PyObject *w)
3739 {
3740 if (!PyString_Check(v)) {
3741 Py_INCREF(Py_NotImplemented);
3742 return Py_NotImplemented;
3743 }
3744 return PyString_Format(v, w);
3745 }
3746
3747 PyDoc_STRVAR(basestring_doc,
3748 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3749
3750 static PyNumberMethods string_as_number = {
3751 0, /*nb_add*/
3752 0, /*nb_subtract*/
3753 0, /*nb_multiply*/
3754 0, /*nb_divide*/
3755 string_mod, /*nb_remainder*/
3756 };
3757
3758
3759 PyTypeObject PyBaseString_Type = {
3760 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3761 "basestring",
3762 0,
3763 0,
3764 0, /* tp_dealloc */
3765 0, /* tp_print */
3766 0, /* tp_getattr */
3767 0, /* tp_setattr */
3768 0, /* tp_compare */
3769 0, /* tp_repr */
3770 0, /* tp_as_number */
3771 0, /* tp_as_sequence */
3772 0, /* tp_as_mapping */
3773 0, /* tp_hash */
3774 0, /* tp_call */
3775 0, /* tp_str */
3776 0, /* tp_getattro */
3777 0, /* tp_setattro */
3778 0, /* tp_as_buffer */
3779 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3780 basestring_doc, /* tp_doc */
3781 0, /* tp_traverse */
3782 0, /* tp_clear */
3783 0, /* tp_richcompare */
3784 0, /* tp_weaklistoffset */
3785 0, /* tp_iter */
3786 0, /* tp_iternext */
3787 0, /* tp_methods */
3788 0, /* tp_members */
3789 0, /* tp_getset */
3790 &PyBaseObject_Type, /* tp_base */
3791 0, /* tp_dict */
3792 0, /* tp_descr_get */
3793 0, /* tp_descr_set */
3794 0, /* tp_dictoffset */
3795 0, /* tp_init */
3796 0, /* tp_alloc */
3797 basestring_new, /* tp_new */
3798 0, /* tp_free */
3799 };
3800
3801 PyDoc_STRVAR(string_doc,
3802 "str(object) -> string\n\
3803 \n\
3804 Return a nice string representation of the object.\n\
3805 If the argument is a string, the return value is the same object.");
3806
3807 PyTypeObject PyString_Type = {
3808 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3809 "str",
3810 PyStringObject_SIZE,
3811 sizeof(char),
3812 string_dealloc, /* tp_dealloc */
3813 (printfunc)string_print, /* tp_print */
3814 0, /* tp_getattr */
3815 0, /* tp_setattr */
3816 0, /* tp_compare */
3817 string_repr, /* tp_repr */
3818 &string_as_number, /* tp_as_number */
3819 &string_as_sequence, /* tp_as_sequence */
3820 &string_as_mapping, /* tp_as_mapping */
3821 (hashfunc)string_hash, /* tp_hash */
3822 0, /* tp_call */
3823 string_str, /* tp_str */
3824 PyObject_GenericGetAttr, /* tp_getattro */
3825 0, /* tp_setattro */
3826 &string_as_buffer, /* tp_as_buffer */
3827 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3828 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3829 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3830 string_doc, /* tp_doc */
3831 0, /* tp_traverse */
3832 0, /* tp_clear */
3833 (richcmpfunc)string_richcompare, /* tp_richcompare */
3834 0, /* tp_weaklistoffset */
3835 0, /* tp_iter */
3836 0, /* tp_iternext */
3837 string_methods, /* tp_methods */
3838 0, /* tp_members */
3839 0, /* tp_getset */
3840 &PyBaseString_Type, /* tp_base */
3841 0, /* tp_dict */
3842 0, /* tp_descr_get */
3843 0, /* tp_descr_set */
3844 0, /* tp_dictoffset */
3845 0, /* tp_init */
3846 0, /* tp_alloc */
3847 string_new, /* tp_new */
3848 PyObject_Del, /* tp_free */
3849 };
3850
3851 void
3852 PyString_Concat(register PyObject **pv, register PyObject *w)
3853 {
3854 register PyObject *v;
3855 if (*pv == NULL)
3856 return;
3857 if (w == NULL || !PyString_Check(*pv)) {
3858 Py_DECREF(*pv);
3859 *pv = NULL;
3860 return;
3861 }
3862 v = string_concat((PyStringObject *) *pv, w);
3863 Py_DECREF(*pv);
3864 *pv = v;
3865 }
3866
3867 void
3868 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3869 {
3870 PyString_Concat(pv, w);
3871 Py_XDECREF(w);
3872 }
3873
3874
3875 /* The following function breaks the notion that strings are immutable:
3876 it changes the size of a string. We get away with this only if there
3877 is only one module referencing the object. You can also think of it
3878 as creating a new string object and destroying the old one, only
3879 more efficiently. In any case, don't use this if the string may
3880 already be known to some other part of the code...
3881 Note that if there's not enough memory to resize the string, the original
3882 string object at *pv is deallocated, *pv is set to NULL, an "out of
3883 memory" exception is set, and -1 is returned. Else (on success) 0 is
3884 returned, and the value in *pv may or may not be the same as on input.
3885 As always, an extra byte is allocated for a trailing \0 byte (newsize
3886 does *not* include that), and a trailing \0 byte is stored.
3887 */
3888
3889 int
3890 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3891 {
3892 register PyObject *v;
3893 register PyStringObject *sv;
3894 v = *pv;
3895 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3896 PyString_CHECK_INTERNED(v)) {
3897 *pv = 0;
3898 Py_DECREF(v);
3899 PyErr_BadInternalCall();
3900 return -1;
3901 }
3902 /* XXX UNREF/NEWREF interface should be more symmetrical */
3903 _Py_DEC_REFTOTAL;
3904 _Py_ForgetReference(v);
3905 *pv = (PyObject *)
3906 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3907 if (*pv == NULL) {
3908 PyObject_Del(v);
3909 PyErr_NoMemory();
3910 return -1;
3911 }
3912 _Py_NewReference(*pv);
3913 sv = (PyStringObject *) *pv;
3914 Py_SIZE(sv) = newsize;
3915 sv->ob_sval[newsize] = '\0';
3916 sv->ob_shash = -1; /* invalidate cached hash value */
3917 return 0;
3918 }
3919
3920 /* Helpers for formatstring */
3921
3922 Py_LOCAL_INLINE(PyObject *)
3923 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3924 {
3925 Py_ssize_t argidx = *p_argidx;
3926 if (argidx < arglen) {
3927 (*p_argidx)++;
3928 if (arglen < 0)
3929 return args;
3930 else
3931 return PyTuple_GetItem(args, argidx);
3932 }
3933 PyErr_SetString(PyExc_TypeError,
3934 "not enough arguments for format string");
3935 return NULL;
3936 }
3937
3938 /* Format codes
3939 * F_LJUST '-'
3940 * F_SIGN '+'
3941 * F_BLANK ' '
3942 * F_ALT '#'
3943 * F_ZERO '0'
3944 */
3945 #define F_LJUST (1<<0)
3946 #define F_SIGN (1<<1)
3947 #define F_BLANK (1<<2)
3948 #define F_ALT (1<<3)
3949 #define F_ZERO (1<<4)
3950
3951 /* Returns a new reference to a PyString object, or NULL on failure. */
3952
3953 static PyObject *
3954 formatfloat(PyObject *v, int flags, int prec, int type)
3955 {
3956 char *p;
3957 PyObject *result;
3958 double x;
3959
3960 x = PyFloat_AsDouble(v);
3961 if (x == -1.0 && PyErr_Occurred()) {
3962 PyErr_Format(PyExc_TypeError, "float argument required, "
3963 "not %.200s", Py_TYPE(v)->tp_name);
3964 return NULL;
3965 }
3966
3967 if (prec < 0)
3968 prec = 6;
3969
3970 p = PyOS_double_to_string(x, type, prec,
3971 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3972
3973 if (p == NULL)
3974 return NULL;
3975 result = PyString_FromStringAndSize(p, strlen(p));
3976 PyMem_Free(p);
3977 return result;
3978 }
3979
3980 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3981 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3982 * Python's regular ints.
3983 * Return value: a new PyString*, or NULL if error.
3984 * . *pbuf is set to point into it,
3985 * *plen set to the # of chars following that.
3986 * Caller must decref it when done using pbuf.
3987 * The string starting at *pbuf is of the form
3988 * "-"? ("0x" | "0X")? digit+
3989 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3990 * set in flags. The case of hex digits will be correct,
3991 * There will be at least prec digits, zero-filled on the left if
3992 * necessary to get that many.
3993 * val object to be converted
3994 * flags bitmask of format flags; only F_ALT is looked at
3995 * prec minimum number of digits; 0-fill on left if needed
3996 * type a character in [duoxX]; u acts the same as d
3997 *
3998 * CAUTION: o, x and X conversions on regular ints can never
3999 * produce a '-' sign, but can for Python's unbounded ints.
4000 */
4001 PyObject*
4002 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4003 char **pbuf, int *plen)
4004 {
4005 PyObject *result = NULL;
4006 char *buf;
4007 Py_ssize_t i;
4008 int sign; /* 1 if '-', else 0 */
4009 int len; /* number of characters */
4010 Py_ssize_t llen;
4011 int numdigits; /* len == numnondigits + numdigits */
4012 int numnondigits = 0;
4013
4014 switch (type) {
4015 case 'd':
4016 case 'u':
4017 result = Py_TYPE(val)->tp_str(val);
4018 break;
4019 case 'o':
4020 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4021 break;
4022 case 'x':
4023 case 'X':
4024 numnondigits = 2;
4025 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4026 break;
4027 default:
4028 assert(!"'type' not in [duoxX]");
4029 }
4030 if (!result)
4031 return NULL;
4032
4033 buf = PyString_AsString(result);
4034 if (!buf) {
4035 Py_DECREF(result);
4036 return NULL;
4037 }
4038
4039 /* To modify the string in-place, there can only be one reference. */
4040 if (Py_REFCNT(result) != 1) {
4041 PyErr_BadInternalCall();
4042 return NULL;
4043 }
4044 llen = PyString_Size(result);
4045 if (llen > INT_MAX) {
4046 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4047 return NULL;
4048 }
4049 len = (int)llen;
4050 if (buf[len-1] == 'L') {
4051 --len;
4052 buf[len] = '\0';
4053 }
4054 sign = buf[0] == '-';
4055 numnondigits += sign;
4056 numdigits = len - numnondigits;
4057 assert(numdigits > 0);
4058
4059 /* Get rid of base marker unless F_ALT */
4060 if ((flags & F_ALT) == 0) {
4061 /* Need to skip 0x, 0X or 0. */
4062 int skipped = 0;
4063 switch (type) {
4064 case 'o':
4065 assert(buf[sign] == '0');
4066 /* If 0 is only digit, leave it alone. */
4067 if (numdigits > 1) {
4068 skipped = 1;
4069 --numdigits;
4070 }
4071 break;
4072 case 'x':
4073 case 'X':
4074 assert(buf[sign] == '0');
4075 assert(buf[sign + 1] == 'x');
4076 skipped = 2;
4077 numnondigits -= 2;
4078 break;
4079 }
4080 if (skipped) {
4081 buf += skipped;
4082 len -= skipped;
4083 if (sign)
4084 buf[0] = '-';
4085 }
4086 assert(len == numnondigits + numdigits);
4087 assert(numdigits > 0);
4088 }
4089
4090 /* Fill with leading zeroes to meet minimum width. */
4091 if (prec > numdigits) {
4092 PyObject *r1 = PyString_FromStringAndSize(NULL,
4093 numnondigits + prec);
4094 char *b1;
4095 if (!r1) {
4096 Py_DECREF(result);
4097 return NULL;
4098 }
4099 b1 = PyString_AS_STRING(r1);
4100 for (i = 0; i < numnondigits; ++i)
4101 *b1++ = *buf++;
4102 for (i = 0; i < prec - numdigits; i++)
4103 *b1++ = '0';
4104 for (i = 0; i < numdigits; i++)
4105 *b1++ = *buf++;
4106 *b1 = '\0';
4107 Py_DECREF(result);
4108 result = r1;
4109 buf = PyString_AS_STRING(result);
4110 len = numnondigits + prec;
4111 }
4112
4113 /* Fix up case for hex conversions. */
4114 if (type == 'X') {
4115 /* Need to convert all lower case letters to upper case.
4116 and need to convert 0x to 0X (and -0x to -0X). */
4117 for (i = 0; i < len; i++)
4118 if (buf[i] >= 'a' && buf[i] <= 'x')
4119 buf[i] -= 'a'-'A';
4120 }
4121 *pbuf = buf;
4122 *plen = len;
4123 return result;
4124 }
4125
4126 Py_LOCAL_INLINE(int)
4127 formatint(char *buf, size_t buflen, int flags,
4128 int prec, int type, PyObject *v)
4129 {
4130 /* fmt = '%#.' + `prec` + 'l' + `type`
4131 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4132 + 1 + 1 = 24 */
4133 char fmt[64]; /* plenty big enough! */
4134 char *sign;
4135 long x;
4136
4137 x = PyInt_AsLong(v);
4138 if (x == -1 && PyErr_Occurred()) {
4139 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4140 Py_TYPE(v)->tp_name);
4141 return -1;
4142 }
4143 if (x < 0 && type == 'u') {
4144 type = 'd';
4145 }
4146 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4147 sign = "-";
4148 else
4149 sign = "";
4150 if (prec < 0)
4151 prec = 1;
4152
4153 if ((flags & F_ALT) &&
4154 (type == 'x' || type == 'X')) {
4155 /* When converting under %#x or %#X, there are a number
4156 * of issues that cause pain:
4157 * - when 0 is being converted, the C standard leaves off
4158 * the '0x' or '0X', which is inconsistent with other
4159 * %#x/%#X conversions and inconsistent with Python's
4160 * hex() function
4161 * - there are platforms that violate the standard and
4162 * convert 0 with the '0x' or '0X'
4163 * (Metrowerks, Compaq Tru64)
4164 * - there are platforms that give '0x' when converting
4165 * under %#X, but convert 0 in accordance with the
4166 * standard (OS/2 EMX)
4167 *
4168 * We can achieve the desired consistency by inserting our
4169 * own '0x' or '0X' prefix, and substituting %x/%X in place
4170 * of %#x/%#X.
4171 *
4172 * Note that this is the same approach as used in
4173 * formatint() in unicodeobject.c
4174 */
4175 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4176 sign, type, prec, type);
4177 }
4178 else {
4179 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4180 sign, (flags&F_ALT) ? "#" : "",
4181 prec, type);
4182 }
4183
4184 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4185 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4186 */
4187 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4188 PyErr_SetString(PyExc_OverflowError,
4189 "formatted integer is too long (precision too large?)");
4190 return -1;
4191 }
4192 if (sign[0])
4193 PyOS_snprintf(buf, buflen, fmt, -x);
4194 else
4195 PyOS_snprintf(buf, buflen, fmt, x);
4196 return (int)strlen(buf);
4197 }
4198
4199 Py_LOCAL_INLINE(int)
4200 formatchar(char *buf, size_t buflen, PyObject *v)
4201 {
4202 /* presume that the buffer is at least 2 characters long */
4203 if (PyString_Check(v)) {
4204 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4205 return -1;
4206 }
4207 else {
4208 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4209 return -1;
4210 }
4211 buf[1] = '\0';
4212 return 1;
4213 }
4214
4215 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4216
4217 FORMATBUFLEN is the length of the buffer in which the ints &
4218 chars are formatted. XXX This is a magic number. Each formatting
4219 routine does bounds checking to ensure no overflow, but a better
4220 solution may be to malloc a buffer of appropriate size for each
4221 format. For now, the current solution is sufficient.
4222 */
4223 #define FORMATBUFLEN (size_t)120
4224
4225 PyObject *
4226 PyString_Format(PyObject *format, PyObject *args)
4227 {
4228 char *fmt, *res;
4229 Py_ssize_t arglen, argidx;
4230 Py_ssize_t reslen, rescnt, fmtcnt;
4231 int args_owned = 0;
4232 PyObject *result, *orig_args;
4233 #ifdef Py_USING_UNICODE
4234 PyObject *v, *w;
4235 #endif
4236 PyObject *dict = NULL;
4237 if (format == NULL || !PyString_Check(format) || args == NULL) {
4238 PyErr_BadInternalCall();
4239 return NULL;
4240 }
4241 orig_args = args;
4242 fmt = PyString_AS_STRING(format);
4243 fmtcnt = PyString_GET_SIZE(format);
4244 reslen = rescnt = fmtcnt + 100;
4245 result = PyString_FromStringAndSize((char *)NULL, reslen);
4246 if (result == NULL)
4247 return NULL;
4248 res = PyString_AsString(result);
4249 if (PyTuple_Check(args)) {
4250 arglen = PyTuple_GET_SIZE(args);
4251 argidx = 0;
4252 }
4253 else {
4254 arglen = -1;
4255 argidx = -2;
4256 }
4257 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4258 !PyObject_TypeCheck(args, &PyBaseString_Type))
4259 dict = args;
4260 while (--fmtcnt >= 0) {
4261 if (*fmt != '%') {
4262 if (--rescnt < 0) {
4263 rescnt = fmtcnt + 100;
4264 reslen += rescnt;
4265 if (_PyString_Resize(&result, reslen))
4266 return NULL;
4267 res = PyString_AS_STRING(result)
4268 + reslen - rescnt;
4269 --rescnt;
4270 }
4271 *res++ = *fmt++;
4272 }
4273 else {
4274 /* Got a format specifier */
4275 int flags = 0;
4276 Py_ssize_t width = -1;
4277 int prec = -1;
4278 int c = '\0';
4279 int fill;
4280 int isnumok;
4281 PyObject *v = NULL;
4282 PyObject *temp = NULL;
4283 char *pbuf;
4284 int sign;
4285 Py_ssize_t len;
4286 char formatbuf[FORMATBUFLEN];
4287 /* For format{int,char}() */
4288 #ifdef Py_USING_UNICODE
4289 char *fmt_start = fmt;
4290 Py_ssize_t argidx_start = argidx;
4291 #endif
4292
4293 fmt++;
4294 if (*fmt == '(') {
4295 char *keystart;
4296 Py_ssize_t keylen;
4297 PyObject *key;
4298 int pcount = 1;
4299
4300 if (dict == NULL) {
4301 PyErr_SetString(PyExc_TypeError,
4302 "format requires a mapping");
4303 goto error;
4304 }
4305 ++fmt;
4306 --fmtcnt;
4307 keystart = fmt;
4308 /* Skip over balanced parentheses */
4309 while (pcount > 0 && --fmtcnt >= 0) {
4310 if (*fmt == ')')
4311 --pcount;
4312 else if (*fmt == '(')
4313 ++pcount;
4314 fmt++;
4315 }
4316 keylen = fmt - keystart - 1;
4317 if (fmtcnt < 0 || pcount > 0) {
4318 PyErr_SetString(PyExc_ValueError,
4319 "incomplete format key");
4320 goto error;
4321 }
4322 key = PyString_FromStringAndSize(keystart,
4323 keylen);
4324 if (key == NULL)
4325 goto error;
4326 if (args_owned) {
4327 Py_DECREF(args);
4328 args_owned = 0;
4329 }
4330 args = PyObject_GetItem(dict, key);
4331 Py_DECREF(key);
4332 if (args == NULL) {
4333 goto error;
4334 }
4335 args_owned = 1;
4336 arglen = -1;
4337 argidx = -2;
4338 }
4339 while (--fmtcnt >= 0) {
4340 switch (c = *fmt++) {
4341 case '-': flags |= F_LJUST; continue;
4342 case '+': flags |= F_SIGN; continue;
4343 case ' ': flags |= F_BLANK; continue;
4344 case '#': flags |= F_ALT; continue;
4345 case '0': flags |= F_ZERO; continue;
4346 }
4347 break;
4348 }
4349 if (c == '*') {
4350 v = getnextarg(args, arglen, &argidx);
4351 if (v == NULL)
4352 goto error;
4353 if (!PyInt_Check(v)) {
4354 PyErr_SetString(PyExc_TypeError,
4355 "* wants int");
4356 goto error;
4357 }
4358 width = PyInt_AsLong(v);
4359 if (width < 0) {
4360 flags |= F_LJUST;
4361 width = -width;
4362 }
4363 if (--fmtcnt >= 0)
4364 c = *fmt++;
4365 }
4366 else if (c >= 0 && isdigit(c)) {
4367 width = c - '0';
4368 while (--fmtcnt >= 0) {
4369 c = Py_CHARMASK(*fmt++);
4370 if (!isdigit(c))
4371 break;
4372 if ((width*10) / 10 != width) {
4373 PyErr_SetString(
4374 PyExc_ValueError,
4375 "width too big");
4376 goto error;
4377 }
4378 width = width*10 + (c - '0');
4379 }
4380 }
4381 if (c == '.') {
4382 prec = 0;
4383 if (--fmtcnt >= 0)
4384 c = *fmt++;
4385 if (c == '*') {
4386 v = getnextarg(args, arglen, &argidx);
4387 if (v == NULL)
4388 goto error;
4389 if (!PyInt_Check(v)) {
4390 PyErr_SetString(
4391 PyExc_TypeError,
4392 "* wants int");
4393 goto error;
4394 }
4395 prec = PyInt_AsLong(v);
4396 if (prec < 0)
4397 prec = 0;
4398 if (--fmtcnt >= 0)
4399 c = *fmt++;
4400 }
4401 else if (c >= 0 && isdigit(c)) {
4402 prec = c - '0';
4403 while (--fmtcnt >= 0) {
4404 c = Py_CHARMASK(*fmt++);
4405 if (!isdigit(c))
4406 break;
4407 if ((prec*10) / 10 != prec) {
4408 PyErr_SetString(
4409 PyExc_ValueError,
4410 "prec too big");
4411 goto error;
4412 }
4413 prec = prec*10 + (c - '0');
4414 }
4415 }
4416 } /* prec */
4417 if (fmtcnt >= 0) {
4418 if (c == 'h' || c == 'l' || c == 'L') {
4419 if (--fmtcnt >= 0)
4420 c = *fmt++;
4421 }
4422 }
4423 if (fmtcnt < 0) {
4424 PyErr_SetString(PyExc_ValueError,
4425 "incomplete format");
4426 goto error;
4427 }
4428 if (c != '%') {
4429 v = getnextarg(args, arglen, &argidx);
4430 if (v == NULL)
4431 goto error;
4432 }
4433 sign = 0;
4434 fill = ' ';
4435 switch (c) {
4436 case '%':
4437 pbuf = "%";
4438 len = 1;
4439 break;
4440 case 's':
4441 #ifdef Py_USING_UNICODE
4442 if (PyUnicode_Check(v)) {
4443 fmt = fmt_start;
4444 argidx = argidx_start;
4445 goto unicode;
4446 }
4447 #endif
4448 temp = _PyObject_Str(v);
4449 #ifdef Py_USING_UNICODE
4450 if (temp != NULL && PyUnicode_Check(temp)) {
4451 Py_DECREF(temp);
4452 fmt = fmt_start;
4453 argidx = argidx_start;
4454 goto unicode;
4455 }
4456 #endif
4457 /* Fall through */
4458 case 'r':
4459 if (c == 'r')
4460 temp = PyObject_Repr(v);
4461 if (temp == NULL)
4462 goto error;
4463 if (!PyString_Check(temp)) {
4464 PyErr_SetString(PyExc_TypeError,
4465 "%s argument has non-string str()");
4466 Py_DECREF(temp);
4467 goto error;
4468 }
4469 pbuf = PyString_AS_STRING(temp);
4470 len = PyString_GET_SIZE(temp);
4471 if (prec >= 0 && len > prec)
4472 len = prec;
4473 break;
4474 case 'i':
4475 case 'd':
4476 case 'u':
4477 case 'o':
4478 case 'x':
4479 case 'X':
4480 if (c == 'i')
4481 c = 'd';
4482 isnumok = 0;
4483 if (PyNumber_Check(v)) {
4484 PyObject *iobj=NULL;
4485
4486 if (PyInt_Check(v) || (PyLong_Check(v))) {
4487 iobj = v;
4488 Py_INCREF(iobj);
4489 }
4490 else {
4491 iobj = PyNumber_Int(v);
4492 if (iobj==NULL) iobj = PyNumber_Long(v);
4493 }
4494 if (iobj!=NULL) {
4495 if (PyInt_Check(iobj)) {
4496 isnumok = 1;
4497 pbuf = formatbuf;
4498 len = formatint(pbuf,
4499 sizeof(formatbuf),
4500 flags, prec, c, iobj);
4501 Py_DECREF(iobj);
4502 if (len < 0)
4503 goto error;
4504 sign = 1;
4505 }
4506 else if (PyLong_Check(iobj)) {
4507 int ilen;
4508
4509 isnumok = 1;
4510 temp = _PyString_FormatLong(iobj, flags,
4511 prec, c, &pbuf, &ilen);
4512 Py_DECREF(iobj);
4513 len = ilen;
4514 if (!temp)
4515 goto error;
4516 sign = 1;
4517 }
4518 else {
4519 Py_DECREF(iobj);
4520 }
4521 }
4522 }
4523 if (!isnumok) {
4524 PyErr_Format(PyExc_TypeError,
4525 "%%%c format: a number is required, "
4526 "not %.200s", c, Py_TYPE(v)->tp_name);
4527 goto error;
4528 }
4529 if (flags & F_ZERO)
4530 fill = '0';
4531 break;
4532 case 'e':
4533 case 'E':
4534 case 'f':
4535 case 'F':
4536 case 'g':
4537 case 'G':
4538 temp = formatfloat(v, flags, prec, c);
4539 if (temp == NULL)
4540 goto error;
4541 pbuf = PyString_AS_STRING(temp);
4542 len = PyString_GET_SIZE(temp);
4543 sign = 1;
4544 if (flags & F_ZERO)
4545 fill = '0';
4546 break;
4547 case 'c':
4548 #ifdef Py_USING_UNICODE
4549 if (PyUnicode_Check(v)) {
4550 fmt = fmt_start;
4551 argidx = argidx_start;
4552 goto unicode;
4553 }
4554 #endif
4555 pbuf = formatbuf;
4556 len = formatchar(pbuf, sizeof(formatbuf), v);
4557 if (len < 0)
4558 goto error;
4559 break;
4560 default:
4561 PyErr_Format(PyExc_ValueError,
4562 "unsupported format character '%c' (0x%x) "
4563 "at index %zd",
4564 c, c,
4565 (Py_ssize_t)(fmt - 1 -
4566 PyString_AsString(format)));
4567 goto error;
4568 }
4569 if (sign) {
4570 if (*pbuf == '-' || *pbuf == '+') {
4571 sign = *pbuf++;
4572 len--;
4573 }
4574 else if (flags & F_SIGN)
4575 sign = '+';
4576 else if (flags & F_BLANK)
4577 sign = ' ';
4578 else
4579 sign = 0;
4580 }
4581 if (width < len)
4582 width = len;
4583 if (rescnt - (sign != 0) < width) {
4584 reslen -= rescnt;
4585 rescnt = width + fmtcnt + 100;
4586 reslen += rescnt;
4587 if (reslen < 0) {
4588 Py_DECREF(result);
4589 Py_XDECREF(temp);
4590 return PyErr_NoMemory();
4591 }
4592 if (_PyString_Resize(&result, reslen)) {
4593 Py_XDECREF(temp);
4594 return NULL;
4595 }
4596 res = PyString_AS_STRING(result)
4597 + reslen - rescnt;
4598 }
4599 if (sign) {
4600 if (fill != ' ')
4601 *res++ = sign;
4602 rescnt--;
4603 if (width > len)
4604 width--;
4605 }
4606 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4607 assert(pbuf[0] == '0');
4608 assert(pbuf[1] == c);
4609 if (fill != ' ') {
4610 *res++ = *pbuf++;
4611 *res++ = *pbuf++;
4612 }
4613 rescnt -= 2;
4614 width -= 2;
4615 if (width < 0)
4616 width = 0;
4617 len -= 2;
4618 }
4619 if (width > len && !(flags & F_LJUST)) {
4620 do {
4621 --rescnt;
4622 *res++ = fill;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
4623 } while (--width > len);
4624 }
4625 if (fill == ' ') {
4626 if (sign)
4627 *res++ = sign;
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
4628 if ((flags & F_ALT) &&
4629 (c == 'x' || c == 'X')) {
4630 assert(pbuf[0] == '0');
4631 assert(pbuf[1] == c);
4632 *res++ = *pbuf++;
4633 *res++ = *pbuf++;
4634 }
4635 }
4636 Py_MEMCPY(res, pbuf, len);
4637 res += len;
4638 rescnt -= len;
4639 while (--width >= len) {
4640 --rescnt;
4641 *res++ = ' ';
4642 }
4643 if (dict && (argidx < arglen) && c != '%') {
4644 PyErr_SetString(PyExc_TypeError,
4645 "not all arguments converted during string formatting");
4646 Py_XDECREF(temp);
4647 goto error;
4648 }
4649 Py_XDECREF(temp);
4650 } /* '%' */
4651 } /* until end */
4652 if (argidx < arglen && !dict) {
4653 PyErr_SetString(PyExc_TypeError,
4654 "not all arguments converted during string formatting");
4655 goto error;
4656 }
4657 if (args_owned) {
4658 Py_DECREF(args);
4659 }
4660 if (_PyString_Resize(&result, reslen - rescnt))
4661 return NULL;
4662 return result;
4663
4664 #ifdef Py_USING_UNICODE
4665 unicode:
4666 if (args_owned) {
4667 Py_DECREF(args);
4668 args_owned = 0;
4669 }
4670 /* Fiddle args right (remove the first argidx arguments) */
4671 if (PyTuple_Check(orig_args) && argidx > 0) {
4672 PyObject *v;
4673 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4674 v = PyTuple_New(n);
4675 if (v == NULL)
4676 goto error;
4677 while (--n >= 0) {
4678 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4679 Py_INCREF(w);
4680 PyTuple_SET_ITEM(v, n, w);
4681 }
4682 args = v;
4683 } else {
4684 Py_INCREF(orig_args);
4685 args = orig_args;
4686 }
4687 args_owned = 1;
4688 /* Take what we have of the result and let the Unicode formatting
4689 function format the rest of the input. */
4690 rescnt = res - PyString_AS_STRING(result);
4691 if (_PyString_Resize(&result, rescnt))
4692 goto error;
4693 fmtcnt = PyString_GET_SIZE(format) - \
4694 (fmt - PyString_AS_STRING(format));
4695 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4696 if (format == NULL)
4697 goto error;
4698 v = PyUnicode_Format(format, args);
4699 Py_DECREF(format);
4700 if (v == NULL)
4701 goto error;
4702 /* Paste what we have (result) to what the Unicode formatting
4703 function returned (v) and return the result (or error) */
4704 w = PyUnicode_Concat(result, v);
4705 Py_DECREF(result);
4706 Py_DECREF(v);
4707 Py_DECREF(args);
4708 return w;
4709 #endif /* Py_USING_UNICODE */
4710
4711 error:
4712 Py_DECREF(result);
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
4713 if (args_owned) {
4714 Py_DECREF(args);
4715 }
4716 return NULL;
4717 }
4718
4719 void
4720 PyString_InternInPlace(PyObject **p)
4721 {
4722 register PyStringObject *s = (PyStringObject *)(*p);
4723 PyObject *t;
4724 if (s == NULL || !PyString_Check(s))
4725 Py_FatalError("PyString_InternInPlace: strings only please!");
4726 /* If it's a string subclass, we don't really know what putting
4727 it in the interned dict might do. */
4728 if (!PyString_CheckExact(s))
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
(emitted by clang-analyzer)TODO: a detailed trace is available in the data model (not yet rendered in this report)
4729 return;
4730 if (PyString_CHECK_INTERNED(s))
4731 return;
4732 if (interned == NULL) {
4733 interned = PyDict_New();
4734 if (interned == NULL) {
4735 PyErr_Clear(); /* Don't leave an exception */
4736 return;
4737 }
4738 }
4739 t = PyDict_GetItem(interned, (PyObject *)s);
4740 if (t) {
4741 Py_INCREF(t);
4742 Py_DECREF(*p);
4743 *p = t;
4744 return;
4745 }
4746
4747 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4748 PyErr_Clear();
4749 return;
4750 }
4751 /* The two references in interned are not counted by refcnt.
4752 The string deallocator will take care of this */
4753 Py_REFCNT(s) -= 2;
4754 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4755 }
4756
4757 void
4758 PyString_InternImmortal(PyObject **p)
4759 {
4760 PyString_InternInPlace(p);
4761 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4762 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4763 Py_INCREF(*p);
4764 }
4765 }
4766
4767
4768 PyObject *
4769 PyString_InternFromString(const char *cp)
4770 {
4771 PyObject *s = PyString_FromString(cp);
4772 if (s == NULL)
4773 return NULL;
4774 PyString_InternInPlace(&s);
4775 return s;
4776 }
4777
4778 void
4779 PyString_Fini(void)
4780 {
4781 int i;
4782 for (i = 0; i < UCHAR_MAX + 1; i++) {
4783 Py_XDECREF(characters[i]);
4784 characters[i] = NULL;
4785 }
4786 Py_XDECREF(nullstring);
4787 nullstring = NULL;
4788 }
4789
4790 void _Py_ReleaseInternedStrings(void)
4791 {
4792 PyObject *keys;
4793 PyStringObject *s;
4794 Py_ssize_t i, n;
4795 Py_ssize_t immortal_size = 0, mortal_size = 0;
4796
4797 if (interned == NULL || !PyDict_Check(interned))
4798 return;
4799 keys = PyDict_Keys(interned);
4800 if (keys == NULL || !PyList_Check(keys)) {
4801 PyErr_Clear();
4802 return;
4803 }
4804
4805 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4806 detector, interned strings are not forcibly deallocated; rather, we
4807 give them their stolen references back, and then clear and DECREF
4808 the interned dict. */
4809
4810 n = PyList_GET_SIZE(keys);
4811 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4812 n);
4813 for (i = 0; i < n; i++) {
4814 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4815 switch (s->ob_sstate) {
4816 case SSTATE_NOT_INTERNED:
4817 /* XXX Shouldn't happen */
4818 break;
4819 case SSTATE_INTERNED_IMMORTAL:
4820 Py_REFCNT(s) += 1;
4821 immortal_size += Py_SIZE(s);
4822 break;
4823 case SSTATE_INTERNED_MORTAL:
4824 Py_REFCNT(s) += 2;
4825 mortal_size += Py_SIZE(s);
4826 break;
4827 default:
4828 Py_FatalError("Inconsistent interned string state.");
4829 }
4830 s->ob_sstate = SSTATE_NOT_INTERNED;
4831 }
4832 fprintf(stderr, "total size of all interned strings: "
4833 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4834 "mortal/immortal\n", mortal_size, immortal_size);
4835 Py_DECREF(keys);
4836 PyDict_Clear(interned);
4837 Py_DECREF(interned);
4838 interned = NULL;
4839 }
4840
4841 void _PyString_DebugMallocStats(FILE *out)
4842 {
4843 ssize_t i;
4844 int num_immortal = 0, num_mortal = 0;
4845 ssize_t immortal_size = 0, mortal_size = 0;
4846
4847 if (interned == NULL || !PyDict_Check(interned))
4848 return;
4849
4850 for (i = 0; i <= ((PyDictObject*)interned)->ma_mask; i++) {
4851 PyDictEntry *ep = ((PyDictObject*)interned)->ma_table + i;
4852 PyObject *pvalue = ep->me_value;
4853 if (pvalue != NULL) {
4854 PyStringObject *s = (PyStringObject *)ep->me_key;
4855
4856 switch (s->ob_sstate) {
4857 case SSTATE_NOT_INTERNED:
4858 /* XXX Shouldn't happen */
4859 break;
4860 case SSTATE_INTERNED_IMMORTAL:
4861 num_immortal ++;
4862 immortal_size += s->ob_size;
4863 break;
4864 case SSTATE_INTERNED_MORTAL:
4865 num_mortal ++;
4866 mortal_size += s->ob_size;
4867 break;
4868 default:
4869 Py_FatalError("Inconsistent interned string state.");
4870 }
4871 }
4872 }
4873
4874 fprintf(out, "%d mortal interned strings\n", num_mortal);
4875 fprintf(out, "%d immortal interned strings\n", num_immortal);
4876 fprintf(out, "total size of all interned strings: "
4877 "%zi/%zi "
4878 "mortal/immortal\n", mortal_size, immortal_size);
4879 }