Python-2.7.3/Modules/stropmodule.c

No issues found

   1 /* strop module */
   2 
   3 #define PY_SSIZE_T_CLEAN
   4 #include "Python.h"
   5 #include <ctype.h>
   6 
   7 PyDoc_STRVAR(strop_module__doc__,
   8 "Common string manipulations, optimized for speed.\n"
   9 "\n"
  10 "Always use \"import string\" rather than referencing\n"
  11 "this module directly.");
  12 
  13 /* XXX This file assumes that the <ctype.h> is*() functions
  14    XXX are defined for all 8-bit characters! */
  15 
  16 #define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
  17                "strop functions are obsolete; use string methods")) \
  18          return NULL
  19 
  20 /* The lstrip(), rstrip() and strip() functions are implemented
  21    in do_strip(), which uses an additional parameter to indicate what
  22    type of strip should occur. */
  23 
  24 #define LEFTSTRIP 0
  25 #define RIGHTSTRIP 1
  26 #define BOTHSTRIP 2
  27 
  28 
  29 static PyObject *
  30 split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
  31 {
  32     Py_ssize_t i = 0, j;
  33     int err;
  34     Py_ssize_t countsplit = 0;
  35     PyObject* item;
  36     PyObject *list = PyList_New(0);
  37 
  38     if (list == NULL)
  39         return NULL;
  40 
  41     while (i < len) {
  42         while (i < len && isspace(Py_CHARMASK(s[i]))) {
  43             i = i+1;
  44         }
  45         j = i;
  46         while (i < len && !isspace(Py_CHARMASK(s[i]))) {
  47             i = i+1;
  48         }
  49         if (j < i) {
  50             item = PyString_FromStringAndSize(s+j, i-j);
  51             if (item == NULL)
  52                 goto finally;
  53 
  54             err = PyList_Append(list, item);
  55             Py_DECREF(item);
  56             if (err < 0)
  57                 goto finally;
  58 
  59             countsplit++;
  60             while (i < len && isspace(Py_CHARMASK(s[i]))) {
  61                 i = i+1;
  62             }
  63             if (maxsplit && (countsplit >= maxsplit) && i < len) {
  64                 item = PyString_FromStringAndSize(
  65                     s+i, len - i);
  66                 if (item == NULL)
  67                     goto finally;
  68 
  69                 err = PyList_Append(list, item);
  70                 Py_DECREF(item);
  71                 if (err < 0)
  72                     goto finally;
  73 
  74                 i = len;
  75             }
  76         }
  77     }
  78     return list;
  79   finally:
  80     Py_DECREF(list);
  81     return NULL;
  82 }
  83 
  84 
  85 PyDoc_STRVAR(splitfields__doc__,
  86 "split(s [,sep [,maxsplit]]) -> list of strings\n"
  87 "splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
  88 "\n"
  89 "Return a list of the words in the string s, using sep as the\n"
  90 "delimiter string.  If maxsplit is nonzero, splits into at most\n"
  91 "maxsplit words.  If sep is not specified, any whitespace string\n"
  92 "is a separator.  Maxsplit defaults to 0.\n"
  93 "\n"
  94 "(split and splitfields are synonymous)");
  95 
  96 static PyObject *
  97 strop_splitfields(PyObject *self, PyObject *args)
  98 {
  99     Py_ssize_t len, n, i, j, err;
 100     Py_ssize_t splitcount, maxsplit;
 101     char *s, *sub;
 102     PyObject *list, *item;
 103 
 104     WARN;
 105     sub = NULL;
 106     n = 0;
 107     splitcount = 0;
 108     maxsplit = 0;
 109     if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit))
 110         return NULL;
 111     if (sub == NULL)
 112         return split_whitespace(s, len, maxsplit);
 113     if (n == 0) {
 114         PyErr_SetString(PyExc_ValueError, "empty separator");
 115         return NULL;
 116     }
 117 
 118     list = PyList_New(0);
 119     if (list == NULL)
 120         return NULL;
 121 
 122     i = j = 0;
 123     while (i+n <= len) {
 124         if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
 125             item = PyString_FromStringAndSize(s+j, i-j);
 126             if (item == NULL)
 127                 goto fail;
 128             err = PyList_Append(list, item);
 129             Py_DECREF(item);
 130             if (err < 0)
 131                 goto fail;
 132             i = j = i + n;
 133             splitcount++;
 134             if (maxsplit && (splitcount >= maxsplit))
 135                 break;
 136         }
 137         else
 138             i++;
 139     }
 140     item = PyString_FromStringAndSize(s+j, len-j);
 141     if (item == NULL)
 142         goto fail;
 143     err = PyList_Append(list, item);
 144     Py_DECREF(item);
 145     if (err < 0)
 146         goto fail;
 147 
 148     return list;
 149 
 150  fail:
 151     Py_DECREF(list);
 152     return NULL;
 153 }
 154 
 155 
 156 PyDoc_STRVAR(joinfields__doc__,
 157 "join(list [,sep]) -> string\n"
 158 "joinfields(list [,sep]) -> string\n"
 159 "\n"
 160 "Return a string composed of the words in list, with\n"
 161 "intervening occurrences of sep.  Sep defaults to a single\n"
 162 "space.\n"
 163 "\n"
 164 "(join and joinfields are synonymous)");
 165 
 166 static PyObject *
 167 strop_joinfields(PyObject *self, PyObject *args)
 168 {
 169     PyObject *seq;
 170     char *sep = NULL;
 171     Py_ssize_t seqlen, seplen = 0;
 172     Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
 173     PyObject *res = NULL;
 174     char* p = NULL;
 175     ssizeargfunc getitemfunc;
 176 
 177     WARN;
 178     if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
 179         return NULL;
 180     if (sep == NULL) {
 181         sep = " ";
 182         seplen = 1;
 183     }
 184 
 185     seqlen = PySequence_Size(seq);
 186     if (seqlen < 0 && PyErr_Occurred())
 187         return NULL;
 188 
 189     if (seqlen == 1) {
 190         /* Optimization if there's only one item */
 191         PyObject *item = PySequence_GetItem(seq, 0);
 192         if (item && !PyString_Check(item)) {
 193             PyErr_SetString(PyExc_TypeError,
 194                      "first argument must be sequence of strings");
 195             Py_DECREF(item);
 196             return NULL;
 197         }
 198         return item;
 199     }
 200 
 201     if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
 202         return NULL;
 203     p = PyString_AsString(res);
 204 
 205     /* optimize for lists, since it's the most common case.  all others
 206      * (tuples and arbitrary sequences) just use the sequence abstract
 207      * interface.
 208      */
 209     if (PyList_Check(seq)) {
 210         for (i = 0; i < seqlen; i++) {
 211             PyObject *item = PyList_GET_ITEM(seq, i);
 212             if (!PyString_Check(item)) {
 213                 PyErr_SetString(PyExc_TypeError,
 214                 "first argument must be sequence of strings");
 215                 Py_DECREF(res);
 216                 return NULL;
 217             }
 218             slen = PyString_GET_SIZE(item);
 219             if (slen > PY_SSIZE_T_MAX - reslen ||
 220                 seplen > PY_SSIZE_T_MAX - reslen - seplen) {
 221                 PyErr_SetString(PyExc_OverflowError,
 222                                 "input too long");
 223                 Py_DECREF(res);
 224                 return NULL;
 225             }
 226             while (reslen + slen + seplen >= sz) {
 227                 if (_PyString_Resize(&res, sz * 2) < 0)
 228                     return NULL;
 229                 sz *= 2;
 230                 p = PyString_AsString(res) + reslen;
 231             }
 232             if (i > 0) {
 233                 memcpy(p, sep, seplen);
 234                 p += seplen;
 235                 reslen += seplen;
 236             }
 237             memcpy(p, PyString_AS_STRING(item), slen);
 238             p += slen;
 239             reslen += slen;
 240         }
 241         _PyString_Resize(&res, reslen);
 242         return res;
 243     }
 244 
 245     if (seq->ob_type->tp_as_sequence == NULL ||
 246              (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
 247     {
 248         PyErr_SetString(PyExc_TypeError,
 249                         "first argument must be a sequence");
 250         return NULL;
 251     }
 252     /* This is now type safe */
 253     for (i = 0; i < seqlen; i++) {
 254         PyObject *item = getitemfunc(seq, i);
 255         if (!item || !PyString_Check(item)) {
 256             PyErr_SetString(PyExc_TypeError,
 257                      "first argument must be sequence of strings");
 258             Py_DECREF(res);
 259             Py_XDECREF(item);
 260             return NULL;
 261         }
 262         slen = PyString_GET_SIZE(item);
 263         if (slen > PY_SSIZE_T_MAX - reslen ||
 264             seplen > PY_SSIZE_T_MAX - reslen - seplen) {
 265             PyErr_SetString(PyExc_OverflowError,
 266                             "input too long");
 267             Py_DECREF(res);
 268             Py_XDECREF(item);
 269             return NULL;
 270         }
 271         while (reslen + slen + seplen >= sz) {
 272             if (_PyString_Resize(&res, sz * 2) < 0) {
 273                 Py_DECREF(item);
 274                 return NULL;
 275             }
 276             sz *= 2;
 277             p = PyString_AsString(res) + reslen;
 278         }
 279         if (i > 0) {
 280             memcpy(p, sep, seplen);
 281             p += seplen;
 282             reslen += seplen;
 283         }
 284         memcpy(p, PyString_AS_STRING(item), slen);
 285         p += slen;
 286         reslen += slen;
 287         Py_DECREF(item);
 288     }
 289     _PyString_Resize(&res, reslen);
 290     return res;
 291 }
 292 
 293 
 294 PyDoc_STRVAR(find__doc__,
 295 "find(s, sub [,start [,end]]) -> in\n"
 296 "\n"
 297 "Return the lowest index in s where substring sub is found,\n"
 298 "such that sub is contained within s[start,end].  Optional\n"
 299 "arguments start and end are interpreted as in slice notation.\n"
 300 "\n"
 301 "Return -1 on failure.");
 302 
 303 static PyObject *
 304 strop_find(PyObject *self, PyObject *args)
 305 {
 306     char *s, *sub;
 307     Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
 308 
 309     WARN;
 310     if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last))
 311         return NULL;
 312 
 313     if (last > len)
 314         last = len;
 315     if (last < 0)
 316         last += len;
 317     if (last < 0)
 318         last = 0;
 319     if (i < 0)
 320         i += len;
 321     if (i < 0)
 322         i = 0;
 323 
 324     if (n == 0 && i <= last)
 325         return PyInt_FromLong((long)i);
 326 
 327     last -= n;
 328     for (; i <= last; ++i)
 329         if (s[i] == sub[0] &&
 330             (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
 331             return PyInt_FromLong((long)i);
 332 
 333     return PyInt_FromLong(-1L);
 334 }
 335 
 336 
 337 PyDoc_STRVAR(rfind__doc__,
 338 "rfind(s, sub [,start [,end]]) -> int\n"
 339 "\n"
 340 "Return the highest index in s where substring sub is found,\n"
 341 "such that sub is contained within s[start,end].  Optional\n"
 342 "arguments start and end are interpreted as in slice notation.\n"
 343 "\n"
 344 "Return -1 on failure.");
 345 
 346 static PyObject *
 347 strop_rfind(PyObject *self, PyObject *args)
 348 {
 349     char *s, *sub;
 350     Py_ssize_t len, n, j;
 351     Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
 352 
 353     WARN;
 354     if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last))
 355         return NULL;
 356 
 357     if (last > len)
 358         last = len;
 359     if (last < 0)
 360         last += len;
 361     if (last < 0)
 362         last = 0;
 363     if (i < 0)
 364         i += len;
 365     if (i < 0)
 366         i = 0;
 367 
 368     if (n == 0 && i <= last)
 369         return PyInt_FromLong((long)last);
 370 
 371     for (j = last-n; j >= i; --j)
 372         if (s[j] == sub[0] &&
 373             (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
 374             return PyInt_FromLong((long)j);
 375 
 376     return PyInt_FromLong(-1L);
 377 }
 378 
 379 
 380 static PyObject *
 381 do_strip(PyObject *args, int striptype)
 382 {
 383     char *s;
 384     Py_ssize_t len, i, j;
 385 
 386 
 387     if (PyString_AsStringAndSize(args, &s, &len))
 388         return NULL;
 389 
 390     i = 0;
 391     if (striptype != RIGHTSTRIP) {
 392         while (i < len && isspace(Py_CHARMASK(s[i]))) {
 393             i++;
 394         }
 395     }
 396 
 397     j = len;
 398     if (striptype != LEFTSTRIP) {
 399         do {
 400             j--;
 401         } while (j >= i && isspace(Py_CHARMASK(s[j])));
 402         j++;
 403     }
 404 
 405     if (i == 0 && j == len) {
 406         Py_INCREF(args);
 407         return args;
 408     }
 409     else
 410         return PyString_FromStringAndSize(s+i, j-i);
 411 }
 412 
 413 
 414 PyDoc_STRVAR(strip__doc__,
 415 "strip(s) -> string\n"
 416 "\n"
 417 "Return a copy of the string s with leading and trailing\n"
 418 "whitespace removed.");
 419 
 420 static PyObject *
 421 strop_strip(PyObject *self, PyObject *args)
 422 {
 423     WARN;
 424     return do_strip(args, BOTHSTRIP);
 425 }
 426 
 427 
 428 PyDoc_STRVAR(lstrip__doc__,
 429 "lstrip(s) -> string\n"
 430 "\n"
 431 "Return a copy of the string s with leading whitespace removed.");
 432 
 433 static PyObject *
 434 strop_lstrip(PyObject *self, PyObject *args)
 435 {
 436     WARN;
 437     return do_strip(args, LEFTSTRIP);
 438 }
 439 
 440 
 441 PyDoc_STRVAR(rstrip__doc__,
 442 "rstrip(s) -> string\n"
 443 "\n"
 444 "Return a copy of the string s with trailing whitespace removed.");
 445 
 446 static PyObject *
 447 strop_rstrip(PyObject *self, PyObject *args)
 448 {
 449     WARN;
 450     return do_strip(args, RIGHTSTRIP);
 451 }
 452 
 453 
 454 PyDoc_STRVAR(lower__doc__,
 455 "lower(s) -> string\n"
 456 "\n"
 457 "Return a copy of the string s converted to lowercase.");
 458 
 459 static PyObject *
 460 strop_lower(PyObject *self, PyObject *args)
 461 {
 462     char *s, *s_new;
 463     Py_ssize_t i, n;
 464     PyObject *newstr;
 465     int changed;
 466 
 467     WARN;
 468     if (PyString_AsStringAndSize(args, &s, &n))
 469         return NULL;
 470     newstr = PyString_FromStringAndSize(NULL, n);
 471     if (newstr == NULL)
 472         return NULL;
 473     s_new = PyString_AsString(newstr);
 474     changed = 0;
 475     for (i = 0; i < n; i++) {
 476         int c = Py_CHARMASK(*s++);
 477         if (isupper(c)) {
 478             changed = 1;
 479             *s_new = tolower(c);
 480         } else
 481             *s_new = c;
 482         s_new++;
 483     }
 484     if (!changed) {
 485         Py_DECREF(newstr);
 486         Py_INCREF(args);
 487         return args;
 488     }
 489     return newstr;
 490 }
 491 
 492 
 493 PyDoc_STRVAR(upper__doc__,
 494 "upper(s) -> string\n"
 495 "\n"
 496 "Return a copy of the string s converted to uppercase.");
 497 
 498 static PyObject *
 499 strop_upper(PyObject *self, PyObject *args)
 500 {
 501     char *s, *s_new;
 502     Py_ssize_t i, n;
 503     PyObject *newstr;
 504     int changed;
 505 
 506     WARN;
 507     if (PyString_AsStringAndSize(args, &s, &n))
 508         return NULL;
 509     newstr = PyString_FromStringAndSize(NULL, n);
 510     if (newstr == NULL)
 511         return NULL;
 512     s_new = PyString_AsString(newstr);
 513     changed = 0;
 514     for (i = 0; i < n; i++) {
 515         int c = Py_CHARMASK(*s++);
 516         if (islower(c)) {
 517             changed = 1;
 518             *s_new = toupper(c);
 519         } else
 520             *s_new = c;
 521         s_new++;
 522     }
 523     if (!changed) {
 524         Py_DECREF(newstr);
 525         Py_INCREF(args);
 526         return args;
 527     }
 528     return newstr;
 529 }
 530 
 531 
 532 PyDoc_STRVAR(capitalize__doc__,
 533 "capitalize(s) -> string\n"
 534 "\n"
 535 "Return a copy of the string s with only its first character\n"
 536 "capitalized.");
 537 
 538 static PyObject *
 539 strop_capitalize(PyObject *self, PyObject *args)
 540 {
 541     char *s, *s_new;
 542     Py_ssize_t i, n;
 543     PyObject *newstr;
 544     int changed;
 545 
 546     WARN;
 547     if (PyString_AsStringAndSize(args, &s, &n))
 548         return NULL;
 549     newstr = PyString_FromStringAndSize(NULL, n);
 550     if (newstr == NULL)
 551         return NULL;
 552     s_new = PyString_AsString(newstr);
 553     changed = 0;
 554     if (0 < n) {
 555         int c = Py_CHARMASK(*s++);
 556         if (islower(c)) {
 557             changed = 1;
 558             *s_new = toupper(c);
 559         } else
 560             *s_new = c;
 561         s_new++;
 562     }
 563     for (i = 1; i < n; i++) {
 564         int c = Py_CHARMASK(*s++);
 565         if (isupper(c)) {
 566             changed = 1;
 567             *s_new = tolower(c);
 568         } else
 569             *s_new = c;
 570         s_new++;
 571     }
 572     if (!changed) {
 573         Py_DECREF(newstr);
 574         Py_INCREF(args);
 575         return args;
 576     }
 577     return newstr;
 578 }
 579 
 580 
 581 PyDoc_STRVAR(expandtabs__doc__,
 582 "expandtabs(string, [tabsize]) -> string\n"
 583 "\n"
 584 "Expand tabs in a string, i.e. replace them by one or more spaces,\n"
 585 "depending on the current column and the given tab size (default 8).\n"
 586 "The column number is reset to zero after each newline occurring in the\n"
 587 "string.  This doesn't understand other non-printing characters.");
 588 
 589 static PyObject *
 590 strop_expandtabs(PyObject *self, PyObject *args)
 591 {
 592     /* Original by Fredrik Lundh */
 593     char* e;
 594     char* p;
 595     char* q;
 596     Py_ssize_t i, j, old_j;
 597     PyObject* out;
 598     char* string;
 599     Py_ssize_t stringlen;
 600     int tabsize = 8;
 601 
 602     WARN;
 603     /* Get arguments */
 604     if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
 605         return NULL;
 606     if (tabsize < 1) {
 607         PyErr_SetString(PyExc_ValueError,
 608                         "tabsize must be at least 1");
 609         return NULL;
 610     }
 611 
 612     /* First pass: determine size of output string */
 613     i = j = old_j = 0; /* j: current column; i: total of previous lines */
 614     e = string + stringlen;
 615     for (p = string; p < e; p++) {
 616         if (*p == '\t') {
 617             j += tabsize - (j%tabsize);
 618             if (old_j > j) {
 619                 PyErr_SetString(PyExc_OverflowError,
 620                                 "new string is too long");
 621                 return NULL;
 622             }
 623             old_j = j;
 624         } else {
 625             j++;
 626             if (*p == '\n') {
 627                 i += j;
 628                 j = 0;
 629             }
 630         }
 631     }
 632 
 633     if ((i + j) < 0) {
 634         PyErr_SetString(PyExc_OverflowError, "new string is too long");
 635         return NULL;
 636     }
 637 
 638     /* Second pass: create output string and fill it */
 639     out = PyString_FromStringAndSize(NULL, i+j);
 640     if (out == NULL)
 641         return NULL;
 642 
 643     i = 0;
 644     q = PyString_AS_STRING(out);
 645 
 646     for (p = string; p < e; p++) {
 647         if (*p == '\t') {
 648             j = tabsize - (i%tabsize);
 649             i += j;
 650             while (j-- > 0)
 651                 *q++ = ' ';
 652         } else {
 653             *q++ = *p;
 654             i++;
 655             if (*p == '\n')
 656                 i = 0;
 657         }
 658     }
 659 
 660     return out;
 661 }
 662 
 663 
 664 PyDoc_STRVAR(count__doc__,
 665 "count(s, sub[, start[, end]]) -> int\n"
 666 "\n"
 667 "Return the number of occurrences of substring sub in string\n"
 668 "s[start:end].  Optional arguments start and end are\n"
 669 "interpreted as in slice notation.");
 670 
 671 static PyObject *
 672 strop_count(PyObject *self, PyObject *args)
 673 {
 674     char *s, *sub;
 675     Py_ssize_t len, n;
 676     Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
 677     Py_ssize_t m, r;
 678 
 679     WARN;
 680     if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last))
 681         return NULL;
 682     if (last > len)
 683         last = len;
 684     if (last < 0)
 685         last += len;
 686     if (last < 0)
 687         last = 0;
 688     if (i < 0)
 689         i += len;
 690     if (i < 0)
 691         i = 0;
 692     m = last + 1 - n;
 693     if (n == 0)
 694         return PyInt_FromLong((long) (m-i));
 695 
 696     r = 0;
 697     while (i < m) {
 698         if (!memcmp(s+i, sub, n)) {
 699             r++;
 700             i += n;
 701         } else {
 702             i++;
 703         }
 704     }
 705     return PyInt_FromLong((long) r);
 706 }
 707 
 708 
 709 PyDoc_STRVAR(swapcase__doc__,
 710 "swapcase(s) -> string\n"
 711 "\n"
 712 "Return a copy of the string s with upper case characters\n"
 713 "converted to lowercase and vice versa.");
 714 
 715 static PyObject *
 716 strop_swapcase(PyObject *self, PyObject *args)
 717 {
 718     char *s, *s_new;
 719     Py_ssize_t i, n;
 720     PyObject *newstr;
 721     int changed;
 722 
 723     WARN;
 724     if (PyString_AsStringAndSize(args, &s, &n))
 725         return NULL;
 726     newstr = PyString_FromStringAndSize(NULL, n);
 727     if (newstr == NULL)
 728         return NULL;
 729     s_new = PyString_AsString(newstr);
 730     changed = 0;
 731     for (i = 0; i < n; i++) {
 732         int c = Py_CHARMASK(*s++);
 733         if (islower(c)) {
 734             changed = 1;
 735             *s_new = toupper(c);
 736         }
 737         else if (isupper(c)) {
 738             changed = 1;
 739             *s_new = tolower(c);
 740         }
 741         else
 742             *s_new = c;
 743         s_new++;
 744     }
 745     if (!changed) {
 746         Py_DECREF(newstr);
 747         Py_INCREF(args);
 748         return args;
 749     }
 750     return newstr;
 751 }
 752 
 753 
 754 PyDoc_STRVAR(atoi__doc__,
 755 "atoi(s [,base]) -> int\n"
 756 "\n"
 757 "Return the integer represented by the string s in the given\n"
 758 "base, which defaults to 10.  The string s must consist of one\n"
 759 "or more digits, possibly preceded by a sign.  If base is 0, it\n"
 760 "is chosen from the leading characters of s, 0 for octal, 0x or\n"
 761 "0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n"
 762 "accepted.");
 763 
 764 static PyObject *
 765 strop_atoi(PyObject *self, PyObject *args)
 766 {
 767     char *s, *end;
 768     int base = 10;
 769     long x;
 770     char buffer[256]; /* For errors */
 771 
 772     WARN;
 773     if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
 774         return NULL;
 775 
 776     if ((base != 0 && base < 2) || base > 36) {
 777         PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
 778         return NULL;
 779     }
 780 
 781     while (*s && isspace(Py_CHARMASK(*s)))
 782         s++;
 783     errno = 0;
 784     if (base == 0 && s[0] == '0')
 785         x = (long) PyOS_strtoul(s, &end, base);
 786     else
 787         x = PyOS_strtol(s, &end, base);
 788     if (end == s || !isalnum(Py_CHARMASK(end[-1])))
 789         goto bad;
 790     while (*end && isspace(Py_CHARMASK(*end)))
 791         end++;
 792     if (*end != '\0') {
 793   bad:
 794         PyOS_snprintf(buffer, sizeof(buffer),
 795                       "invalid literal for atoi(): %.200s", s);
 796         PyErr_SetString(PyExc_ValueError, buffer);
 797         return NULL;
 798     }
 799     else if (errno != 0) {
 800         PyOS_snprintf(buffer, sizeof(buffer),
 801                       "atoi() literal too large: %.200s", s);
 802         PyErr_SetString(PyExc_ValueError, buffer);
 803         return NULL;
 804     }
 805     return PyInt_FromLong(x);
 806 }
 807 
 808 
 809 PyDoc_STRVAR(atol__doc__,
 810 "atol(s [,base]) -> long\n"
 811 "\n"
 812 "Return the long integer represented by the string s in the\n"
 813 "given base, which defaults to 10.  The string s must consist\n"
 814 "of one or more digits, possibly preceded by a sign.  If base\n"
 815 "is 0, it is chosen from the leading characters of s, 0 for\n"
 816 "octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n"
 817 "0x or 0X is accepted.  A trailing L or l is not accepted,\n"
 818 "unless base is 0.");
 819 
 820 static PyObject *
 821 strop_atol(PyObject *self, PyObject *args)
 822 {
 823     char *s, *end;
 824     int base = 10;
 825     PyObject *x;
 826     char buffer[256]; /* For errors */
 827 
 828     WARN;
 829     if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
 830         return NULL;
 831 
 832     if ((base != 0 && base < 2) || base > 36) {
 833         PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
 834         return NULL;
 835     }
 836 
 837     while (*s && isspace(Py_CHARMASK(*s)))
 838         s++;
 839     if (s[0] == '\0') {
 840         PyErr_SetString(PyExc_ValueError, "empty string for atol()");
 841         return NULL;
 842     }
 843     x = PyLong_FromString(s, &end, base);
 844     if (x == NULL)
 845         return NULL;
 846     if (base == 0 && (*end == 'l' || *end == 'L'))
 847         end++;
 848     while (*end && isspace(Py_CHARMASK(*end)))
 849         end++;
 850     if (*end != '\0') {
 851         PyOS_snprintf(buffer, sizeof(buffer),
 852                       "invalid literal for atol(): %.200s", s);
 853         PyErr_SetString(PyExc_ValueError, buffer);
 854         Py_DECREF(x);
 855         return NULL;
 856     }
 857     return x;
 858 }
 859 
 860 
 861 PyDoc_STRVAR(atof__doc__,
 862 "atof(s) -> float\n"
 863 "\n"
 864 "Return the floating point number represented by the string s.");
 865 
 866 static PyObject *
 867 strop_atof(PyObject *self, PyObject *args)
 868 {
 869     char *s, *end;
 870     double x;
 871     char buffer[256]; /* For errors */
 872 
 873     WARN;
 874     if (!PyArg_ParseTuple(args, "s:atof", &s))
 875         return NULL;
 876     while (*s && isspace(Py_CHARMASK(*s)))
 877         s++;
 878     if (s[0] == '\0') {
 879         PyErr_SetString(PyExc_ValueError, "empty string for atof()");
 880         return NULL;
 881     }
 882 
 883     PyFPE_START_PROTECT("strop_atof", return 0)
 884     x = PyOS_string_to_double(s, &end, PyExc_OverflowError);
 885     PyFPE_END_PROTECT(x)
 886     if (x == -1 && PyErr_Occurred())
 887         return NULL;
 888     while (*end && isspace(Py_CHARMASK(*end)))
 889         end++;
 890     if (*end != '\0') {
 891         PyOS_snprintf(buffer, sizeof(buffer),
 892                       "invalid literal for atof(): %.200s", s);
 893         PyErr_SetString(PyExc_ValueError, buffer);
 894         return NULL;
 895     }
 896     return PyFloat_FromDouble(x);
 897 }
 898 
 899 
 900 PyDoc_STRVAR(maketrans__doc__,
 901 "maketrans(frm, to) -> string\n"
 902 "\n"
 903 "Return a translation table (a string of 256 bytes long)\n"
 904 "suitable for use in string.translate.  The strings frm and to\n"
 905 "must be of the same length.");
 906 
 907 static PyObject *
 908 strop_maketrans(PyObject *self, PyObject *args)
 909 {
 910     unsigned char *c, *from=NULL, *to=NULL;
 911     Py_ssize_t i, fromlen=0, tolen=0;
 912     PyObject *result;
 913 
 914     if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
 915         return NULL;
 916 
 917     if (fromlen != tolen) {
 918         PyErr_SetString(PyExc_ValueError,
 919                         "maketrans arguments must have same length");
 920         return NULL;
 921     }
 922 
 923     result = PyString_FromStringAndSize((char *)NULL, 256);
 924     if (result == NULL)
 925         return NULL;
 926     c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
 927     for (i = 0; i < 256; i++)
 928         c[i]=(unsigned char)i;
 929     for (i = 0; i < fromlen; i++)
 930         c[from[i]]=to[i];
 931 
 932     return result;
 933 }
 934 
 935 
 936 PyDoc_STRVAR(translate__doc__,
 937 "translate(s,table [,deletechars]) -> string\n"
 938 "\n"
 939 "Return a copy of the string s, where all characters occurring\n"
 940 "in the optional argument deletechars are removed, and the\n"
 941 "remaining characters have been mapped through the given\n"
 942 "translation table, which must be a string of length 256.");
 943 
 944 static PyObject *
 945 strop_translate(PyObject *self, PyObject *args)
 946 {
 947     register char *input, *table, *output;
 948     Py_ssize_t i;
 949     int c, changed = 0;
 950     PyObject *input_obj;
 951     char *table1, *output_start, *del_table=NULL;
 952     Py_ssize_t inlen, tablen, dellen = 0;
 953     PyObject *result;
 954     int trans_table[256];
 955 
 956     WARN;
 957     if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
 958                           &table1, &tablen, &del_table, &dellen))
 959         return NULL;
 960     if (tablen != 256) {
 961         PyErr_SetString(PyExc_ValueError,
 962                       "translation table must be 256 characters long");
 963         return NULL;
 964     }
 965 
 966     table = table1;
 967     inlen = PyString_GET_SIZE(input_obj);
 968     result = PyString_FromStringAndSize((char *)NULL, inlen);
 969     if (result == NULL)
 970         return NULL;
 971     output_start = output = PyString_AsString(result);
 972     input = PyString_AsString(input_obj);
 973 
 974     if (dellen == 0) {
 975         /* If no deletions are required, use faster code */
 976         for (i = inlen; --i >= 0; ) {
 977             c = Py_CHARMASK(*input++);
 978             if (Py_CHARMASK((*output++ = table[c])) != c)
 979                 changed = 1;
 980         }
 981         if (changed)
 982             return result;
 983         Py_DECREF(result);
 984         Py_INCREF(input_obj);
 985         return input_obj;
 986     }
 987 
 988     for (i = 0; i < 256; i++)
 989         trans_table[i] = Py_CHARMASK(table[i]);
 990 
 991     for (i = 0; i < dellen; i++)
 992         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
 993 
 994     for (i = inlen; --i >= 0; ) {
 995         c = Py_CHARMASK(*input++);
 996         if (trans_table[c] != -1)
 997             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
 998                 continue;
 999         changed = 1;
1000     }
1001     if (!changed) {
1002         Py_DECREF(result);
1003         Py_INCREF(input_obj);
1004         return input_obj;
1005     }
1006     /* Fix the size of the resulting string */
1007     if (inlen > 0)
1008         _PyString_Resize(&result, output - output_start);
1009     return result;
1010 }
1011 
1012 
1013 /* What follows is used for implementing replace().  Perry Stoll. */
1014 
1015 /*
1016   mymemfind
1017 
1018   strstr replacement for arbitrary blocks of memory.
1019 
1020   Locates the first occurrence in the memory pointed to by MEM of the
1021   contents of memory pointed to by PAT.  Returns the index into MEM if
1022   found, or -1 if not found.  If len of PAT is greater than length of
1023   MEM, the function returns -1.
1024 */
1025 static Py_ssize_t
1026 mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1027 {
1028     register Py_ssize_t ii;
1029 
1030     /* pattern can not occur in the last pat_len-1 chars */
1031     len -= pat_len;
1032 
1033     for (ii = 0; ii <= len; ii++) {
1034         if (mem[ii] == pat[0] &&
1035             (pat_len == 1 ||
1036              memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1037             return ii;
1038         }
1039     }
1040     return -1;
1041 }
1042 
1043 /*
1044   mymemcnt
1045 
1046    Return the number of distinct times PAT is found in MEM.
1047    meaning mem=1111 and pat==11 returns 2.
1048        mem=11111 and pat==11 also return 2.
1049  */
1050 static Py_ssize_t
1051 mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1052 {
1053     register Py_ssize_t offset = 0;
1054     Py_ssize_t nfound = 0;
1055 
1056     while (len >= 0) {
1057         offset = mymemfind(mem, len, pat, pat_len);
1058         if (offset == -1)
1059             break;
1060         mem += offset + pat_len;
1061         len -= offset + pat_len;
1062         nfound++;
1063     }
1064     return nfound;
1065 }
1066 
1067 /*
1068    mymemreplace
1069 
1070    Return a string in which all occurrences of PAT in memory STR are
1071    replaced with SUB.
1072 
1073    If length of PAT is less than length of STR or there are no occurrences
1074    of PAT in STR, then the original string is returned. Otherwise, a new
1075    string is allocated here and returned.
1076 
1077    on return, out_len is:
1078        the length of output string, or
1079        -1 if the input string is returned, or
1080        unchanged if an error occurs (no memory).
1081 
1082    return value is:
1083        the new string allocated locally, or
1084        NULL if an error occurred.
1085 */
1086 static char *
1087 mymemreplace(const char *str, Py_ssize_t len,           /* input string */
1088          const char *pat, Py_ssize_t pat_len,           /* pattern string to find */
1089          const char *sub, Py_ssize_t sub_len,           /* substitution string */
1090          Py_ssize_t count,                              /* number of replacements */
1091          Py_ssize_t *out_len)
1092 {
1093     char *out_s;
1094     char *new_s;
1095     Py_ssize_t nfound, offset, new_len;
1096 
1097     if (len == 0 || pat_len > len)
1098         goto return_same;
1099 
1100     /* find length of output string */
1101     nfound = mymemcnt(str, len, pat, pat_len);
1102     if (count < 0)
1103         count = PY_SSIZE_T_MAX;
1104     else if (nfound > count)
1105         nfound = count;
1106     if (nfound == 0)
1107         goto return_same;
1108 
1109     new_len = len + nfound*(sub_len - pat_len);
1110     if (new_len == 0) {
1111         /* Have to allocate something for the caller to free(). */
1112         out_s = (char *)PyMem_MALLOC(1);
1113         if (out_s == NULL)
1114             return NULL;
1115         out_s[0] = '\0';
1116     }
1117     else {
1118         assert(new_len > 0);
1119         new_s = (char *)PyMem_MALLOC(new_len);
1120         if (new_s == NULL)
1121             return NULL;
1122         out_s = new_s;
1123 
1124         for (; count > 0 && len > 0; --count) {
1125             /* find index of next instance of pattern */
1126             offset = mymemfind(str, len, pat, pat_len);
1127             if (offset == -1)
1128                 break;
1129 
1130             /* copy non matching part of input string */
1131             memcpy(new_s, str, offset);
1132             str += offset + pat_len;
1133             len -= offset + pat_len;
1134 
1135             /* copy substitute into the output string */
1136             new_s += offset;
1137             memcpy(new_s, sub, sub_len);
1138             new_s += sub_len;
1139         }
1140         /* copy any remaining values into output string */
1141         if (len > 0)
1142             memcpy(new_s, str, len);
1143     }
1144     *out_len = new_len;
1145     return out_s;
1146 
1147   return_same:
1148     *out_len = -1;
1149     return (char *)str; /* cast away const */
1150 }
1151 
1152 
1153 PyDoc_STRVAR(replace__doc__,
1154 "replace (str, old, new[, maxsplit]) -> string\n"
1155 "\n"
1156 "Return a copy of string str with all occurrences of substring\n"
1157 "old replaced by new. If the optional argument maxsplit is\n"
1158 "given, only the first maxsplit occurrences are replaced.");
1159 
1160 static PyObject *
1161 strop_replace(PyObject *self, PyObject *args)
1162 {
1163     char *str, *pat,*sub,*new_s;
1164     Py_ssize_t len,pat_len,sub_len,out_len;
1165     Py_ssize_t count = -1;
1166     PyObject *newstr;
1167 
1168     WARN;
1169     if (!PyArg_ParseTuple(args, "t#t#t#|n:replace",
1170                           &str, &len, &pat, &pat_len, &sub, &sub_len,
1171                           &count))
1172         return NULL;
1173     if (pat_len <= 0) {
1174         PyErr_SetString(PyExc_ValueError, "empty pattern string");
1175         return NULL;
1176     }
1177     /* CAUTION:  strop treats a replace count of 0 as infinity, unlke
1178      * current (2.1) string.py and string methods.  Preserve this for
1179      * ... well, hard to say for what <wink>.
1180      */
1181     if (count == 0)
1182         count = -1;
1183     new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1184     if (new_s == NULL) {
1185         PyErr_NoMemory();
1186         return NULL;
1187     }
1188     if (out_len == -1) {
1189         /* we're returning another reference to the input string */
1190         newstr = PyTuple_GetItem(args, 0);
1191         Py_XINCREF(newstr);
1192     }
1193     else {
1194         newstr = PyString_FromStringAndSize(new_s, out_len);
1195         PyMem_FREE(new_s);
1196     }
1197     return newstr;
1198 }
1199 
1200 
1201 /* List of functions defined in the module */
1202 
1203 static PyMethodDef
1204 strop_methods[] = {
1205     {"atof",            strop_atof,        METH_VARARGS, atof__doc__},
1206     {"atoi",            strop_atoi,        METH_VARARGS, atoi__doc__},
1207     {"atol",            strop_atol,        METH_VARARGS, atol__doc__},
1208     {"capitalize",      strop_capitalize,  METH_O,       capitalize__doc__},
1209     {"count",           strop_count,       METH_VARARGS, count__doc__},
1210     {"expandtabs",      strop_expandtabs,  METH_VARARGS, expandtabs__doc__},
1211     {"find",            strop_find,        METH_VARARGS, find__doc__},
1212     {"join",            strop_joinfields,  METH_VARARGS, joinfields__doc__},
1213     {"joinfields",      strop_joinfields,  METH_VARARGS, joinfields__doc__},
1214     {"lstrip",          strop_lstrip,      METH_O,       lstrip__doc__},
1215     {"lower",           strop_lower,       METH_O,       lower__doc__},
1216     {"maketrans",       strop_maketrans,   METH_VARARGS, maketrans__doc__},
1217     {"replace",         strop_replace,     METH_VARARGS, replace__doc__},
1218     {"rfind",           strop_rfind,       METH_VARARGS, rfind__doc__},
1219     {"rstrip",          strop_rstrip,      METH_O,       rstrip__doc__},
1220     {"split",           strop_splitfields, METH_VARARGS, splitfields__doc__},
1221     {"splitfields",     strop_splitfields, METH_VARARGS, splitfields__doc__},
1222     {"strip",           strop_strip,       METH_O,       strip__doc__},
1223     {"swapcase",        strop_swapcase,    METH_O,       swapcase__doc__},
1224     {"translate",       strop_translate,   METH_VARARGS, translate__doc__},
1225     {"upper",           strop_upper,       METH_O,       upper__doc__},
1226     {NULL,              NULL}   /* sentinel */
1227 };
1228 
1229 
1230 PyMODINIT_FUNC
1231 initstrop(void)
1232 {
1233     PyObject *m, *s;
1234     char buf[256];
1235     int c, n;
1236     m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1237                        (PyObject*)NULL, PYTHON_API_VERSION);
1238     if (m == NULL)
1239         return;
1240 
1241     /* Create 'whitespace' object */
1242     n = 0;
1243     for (c = 0; c < 256; c++) {
1244         if (isspace(c))
1245             buf[n++] = c;
1246     }
1247     s = PyString_FromStringAndSize(buf, n);
1248     if (s)
1249         PyModule_AddObject(m, "whitespace", s);
1250 
1251     /* Create 'lowercase' object */
1252     n = 0;
1253     for (c = 0; c < 256; c++) {
1254         if (islower(c))
1255             buf[n++] = c;
1256     }
1257     s = PyString_FromStringAndSize(buf, n);
1258     if (s)
1259         PyModule_AddObject(m, "lowercase", s);
1260 
1261     /* Create 'uppercase' object */
1262     n = 0;
1263     for (c = 0; c < 256; c++) {
1264         if (isupper(c))
1265             buf[n++] = c;
1266     }
1267     s = PyString_FromStringAndSize(buf, n);
1268     if (s)
1269         PyModule_AddObject(m, "uppercase", s);
1270 }