Python-2.7.3/Modules/_sre.c

Location Tool Test ID Function Issue
/builddir/build/BUILD/Python-2.7.3/Modules/_sre.c:2303:11 clang-analyzer Assigned value is garbage or undefined
/builddir/build/BUILD/Python-2.7.3/Modules/_sre.c:2303:11 clang-analyzer Assigned value is garbage or undefined
/builddir/build/BUILD/Python-2.7.3/Modules/_sre.c:2316:0 cppcheck uninitvar Uninitialized variable: literal
/builddir/build/BUILD/Python-2.7.3/Modules/_sre.c:2316:0 cppcheck uninitvar Uninitialized variable: literal
   1 /*
   2  * Secret Labs' Regular Expression Engine
   3  *
   4  * regular expression matching engine
   5  *
   6  * partial history:
   7  * 1999-10-24 fl  created (based on existing template matcher code)
   8  * 2000-03-06 fl  first alpha, sort of
   9  * 2000-08-01 fl  fixes for 1.6b1
  10  * 2000-08-07 fl  use PyOS_CheckStack() if available
  11  * 2000-09-20 fl  added expand method
  12  * 2001-03-20 fl  lots of fixes for 2.1b2
  13  * 2001-04-15 fl  export copyright as Python attribute, not global
  14  * 2001-04-28 fl  added __copy__ methods (work in progress)
  15  * 2001-05-14 fl  fixes for 1.5.2 compatibility
  16  * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
  17  * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
  18  * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
  19  * 2001-10-21 fl  added sub/subn primitive
  20  * 2001-10-24 fl  added finditer primitive (for 2.2 only)
  21  * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
  22  * 2002-11-09 fl  fixed empty sub/subn return type
  23  * 2003-04-18 mvl fully support 4-byte codes
  24  * 2003-10-17 gn  implemented non recursive scheme
  25  *
  26  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
  27  *
  28  * This version of the SRE library can be redistributed under CNRI's
  29  * Python 1.6 license.  For any other use, please contact Secret Labs
  30  * AB (info@pythonware.com).
  31  *
  32  * Portions of this engine have been developed in cooperation with
  33  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
  34  * other compatibility work.
  35  */
  36 
  37 #ifndef SRE_RECURSIVE
  38 
  39 static char copyright[] =
  40     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
  41 
  42 #define PY_SSIZE_T_CLEAN
  43 
  44 #include "Python.h"
  45 #include "structmember.h" /* offsetof */
  46 
  47 #include "sre.h"
  48 
  49 #include <ctype.h>
  50 
  51 /* name of this module, minus the leading underscore */
  52 #if !defined(SRE_MODULE)
  53 #define SRE_MODULE "sre"
  54 #endif
  55 
  56 #define SRE_PY_MODULE "re"
  57 
  58 /* defining this one enables tracing */
  59 #undef VERBOSE
  60 
  61 #if PY_VERSION_HEX >= 0x01060000
  62 #if PY_VERSION_HEX  < 0x02020000 || defined(Py_USING_UNICODE)
  63 /* defining this enables unicode support (default under 1.6a1 and later) */
  64 #define HAVE_UNICODE
  65 #endif
  66 #endif
  67 
  68 /* -------------------------------------------------------------------- */
  69 /* optional features */
  70 
  71 /* enables fast searching */
  72 #define USE_FAST_SEARCH
  73 
  74 /* enables aggressive inlining (always on for Visual C) */
  75 #undef USE_INLINE
  76 
  77 /* enables copy/deepcopy handling (work in progress) */
  78 #undef USE_BUILTIN_COPY
  79 
  80 #if PY_VERSION_HEX < 0x01060000
  81 #define PyObject_DEL(op) PyMem_DEL((op))
  82 #endif
  83 
  84 /* -------------------------------------------------------------------- */
  85 
  86 #if defined(_MSC_VER)
  87 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
  88 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
  89 /* fastest possible local call under MSVC */
  90 #define LOCAL(type) static __inline type __fastcall
  91 #elif defined(USE_INLINE)
  92 #define LOCAL(type) static inline type
  93 #else
  94 #define LOCAL(type) static type
  95 #endif
  96 
  97 /* error codes */
  98 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
  99 #define SRE_ERROR_STATE -2 /* illegal state */
 100 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
 101 #define SRE_ERROR_MEMORY -9 /* out of memory */
 102 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
 103 
 104 #if defined(VERBOSE)
 105 #define TRACE(v) printf v
 106 #else
 107 #define TRACE(v)
 108 #endif
 109 
 110 /* -------------------------------------------------------------------- */
 111 /* search engine state */
 112 
 113 /* default character predicates (run sre_chars.py to regenerate tables) */
 114 
 115 #define SRE_DIGIT_MASK 1
 116 #define SRE_SPACE_MASK 2
 117 #define SRE_LINEBREAK_MASK 4
 118 #define SRE_ALNUM_MASK 8
 119 #define SRE_WORD_MASK 16
 120 
 121 /* FIXME: this assumes ASCII.  create tables in init_sre() instead */
 122 
 123 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
 124 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
 125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
 126 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
 127 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
 128 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
 129 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
 130 
 131 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
 132 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
 133 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
 134 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
 135 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
 136 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
 137 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
 138 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
 139 120, 121, 122, 123, 124, 125, 126, 127 };
 140 
 141 #define SRE_IS_DIGIT(ch)\
 142     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
 143 #define SRE_IS_SPACE(ch)\
 144     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
 145 #define SRE_IS_LINEBREAK(ch)\
 146     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
 147 #define SRE_IS_ALNUM(ch)\
 148     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
 149 #define SRE_IS_WORD(ch)\
 150     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
 151 
 152 static unsigned int sre_lower(unsigned int ch)
 153 {
 154     return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
 155 }
 156 
 157 /* locale-specific character predicates */
 158 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
 159  * warnings when c's type supports only numbers < N+1 */
 160 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
 161 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
 162 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
 163 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
 164 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
 165 
 166 static unsigned int sre_lower_locale(unsigned int ch)
 167 {
 168     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
 169 }
 170 
 171 /* unicode-specific character predicates */
 172 
 173 #if defined(HAVE_UNICODE)
 174 
 175 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
 176 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
 177 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
 178 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
 179 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
 180 
 181 static unsigned int sre_lower_unicode(unsigned int ch)
 182 {
 183     return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
 184 }
 185 
 186 #endif
 187 
 188 LOCAL(int)
 189 sre_category(SRE_CODE category, unsigned int ch)
 190 {
 191     switch (category) {
 192 
 193     case SRE_CATEGORY_DIGIT:
 194         return SRE_IS_DIGIT(ch);
 195     case SRE_CATEGORY_NOT_DIGIT:
 196         return !SRE_IS_DIGIT(ch);
 197     case SRE_CATEGORY_SPACE:
 198         return SRE_IS_SPACE(ch);
 199     case SRE_CATEGORY_NOT_SPACE:
 200         return !SRE_IS_SPACE(ch);
 201     case SRE_CATEGORY_WORD:
 202         return SRE_IS_WORD(ch);
 203     case SRE_CATEGORY_NOT_WORD:
 204         return !SRE_IS_WORD(ch);
 205     case SRE_CATEGORY_LINEBREAK:
 206         return SRE_IS_LINEBREAK(ch);
 207     case SRE_CATEGORY_NOT_LINEBREAK:
 208         return !SRE_IS_LINEBREAK(ch);
 209 
 210     case SRE_CATEGORY_LOC_WORD:
 211         return SRE_LOC_IS_WORD(ch);
 212     case SRE_CATEGORY_LOC_NOT_WORD:
 213         return !SRE_LOC_IS_WORD(ch);
 214 
 215 #if defined(HAVE_UNICODE)
 216     case SRE_CATEGORY_UNI_DIGIT:
 217         return SRE_UNI_IS_DIGIT(ch);
 218     case SRE_CATEGORY_UNI_NOT_DIGIT:
 219         return !SRE_UNI_IS_DIGIT(ch);
 220     case SRE_CATEGORY_UNI_SPACE:
 221         return SRE_UNI_IS_SPACE(ch);
 222     case SRE_CATEGORY_UNI_NOT_SPACE:
 223         return !SRE_UNI_IS_SPACE(ch);
 224     case SRE_CATEGORY_UNI_WORD:
 225         return SRE_UNI_IS_WORD(ch);
 226     case SRE_CATEGORY_UNI_NOT_WORD:
 227         return !SRE_UNI_IS_WORD(ch);
 228     case SRE_CATEGORY_UNI_LINEBREAK:
 229         return SRE_UNI_IS_LINEBREAK(ch);
 230     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
 231         return !SRE_UNI_IS_LINEBREAK(ch);
 232 #else
 233     case SRE_CATEGORY_UNI_DIGIT:
 234         return SRE_IS_DIGIT(ch);
 235     case SRE_CATEGORY_UNI_NOT_DIGIT:
 236         return !SRE_IS_DIGIT(ch);
 237     case SRE_CATEGORY_UNI_SPACE:
 238         return SRE_IS_SPACE(ch);
 239     case SRE_CATEGORY_UNI_NOT_SPACE:
 240         return !SRE_IS_SPACE(ch);
 241     case SRE_CATEGORY_UNI_WORD:
 242         return SRE_LOC_IS_WORD(ch);
 243     case SRE_CATEGORY_UNI_NOT_WORD:
 244         return !SRE_LOC_IS_WORD(ch);
 245     case SRE_CATEGORY_UNI_LINEBREAK:
 246         return SRE_IS_LINEBREAK(ch);
 247     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
 248         return !SRE_IS_LINEBREAK(ch);
 249 #endif
 250     }
 251     return 0;
 252 }
 253 
 254 /* helpers */
 255 
 256 static void
 257 data_stack_dealloc(SRE_STATE* state)
 258 {
 259     if (state->data_stack) {
 260         PyMem_FREE(state->data_stack);
 261         state->data_stack = NULL;
 262     }
 263     state->data_stack_size = state->data_stack_base = 0;
 264 }
 265 
 266 static int
 267 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
 268 {
 269     Py_ssize_t minsize, cursize;
 270     minsize = state->data_stack_base+size;
 271     cursize = state->data_stack_size;
 272     if (cursize < minsize) {
 273         void* stack;
 274         cursize = minsize+minsize/4+1024;
 275         TRACE(("allocate/grow stack %d\n", cursize));
 276         stack = PyMem_REALLOC(state->data_stack, cursize);
 277         if (!stack) {
 278             data_stack_dealloc(state);
 279             return SRE_ERROR_MEMORY;
 280         }
 281         state->data_stack = (char *)stack;
 282         state->data_stack_size = cursize;
 283     }
 284     return 0;
 285 }
 286 
 287 /* generate 8-bit version */
 288 
 289 #define SRE_CHAR unsigned char
 290 #define SRE_AT sre_at
 291 #define SRE_COUNT sre_count
 292 #define SRE_CHARSET sre_charset
 293 #define SRE_INFO sre_info
 294 #define SRE_MATCH sre_match
 295 #define SRE_MATCH_CONTEXT sre_match_context
 296 #define SRE_SEARCH sre_search
 297 #define SRE_LITERAL_TEMPLATE sre_literal_template
 298 
 299 #if defined(HAVE_UNICODE)
 300 
 301 #define SRE_RECURSIVE
 302 #include "_sre.c"
 303 #undef SRE_RECURSIVE
 304 
 305 #undef SRE_LITERAL_TEMPLATE
 306 #undef SRE_SEARCH
 307 #undef SRE_MATCH
 308 #undef SRE_MATCH_CONTEXT
 309 #undef SRE_INFO
 310 #undef SRE_CHARSET
 311 #undef SRE_COUNT
 312 #undef SRE_AT
 313 #undef SRE_CHAR
 314 
 315 /* generate 16-bit unicode version */
 316 
 317 #define SRE_CHAR Py_UNICODE
 318 #define SRE_AT sre_uat
 319 #define SRE_COUNT sre_ucount
 320 #define SRE_CHARSET sre_ucharset
 321 #define SRE_INFO sre_uinfo
 322 #define SRE_MATCH sre_umatch
 323 #define SRE_MATCH_CONTEXT sre_umatch_context
 324 #define SRE_SEARCH sre_usearch
 325 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
 326 #endif
 327 
 328 #endif /* SRE_RECURSIVE */
 329 
 330 /* -------------------------------------------------------------------- */
 331 /* String matching engine */
 332 
 333 /* the following section is compiled twice, with different character
 334    settings */
 335 
 336 LOCAL(int)
 337 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
 338 {
 339     /* check if pointer is at given position */
 340 
 341     Py_ssize_t thisp, thatp;
 342 
 343     switch (at) {
 344 
 345     case SRE_AT_BEGINNING:
 346     case SRE_AT_BEGINNING_STRING:
 347         return ((void*) ptr == state->beginning);
 348 
 349     case SRE_AT_BEGINNING_LINE:
 350         return ((void*) ptr == state->beginning ||
 351                 SRE_IS_LINEBREAK((int) ptr[-1]));
 352 
 353     case SRE_AT_END:
 354         return (((void*) (ptr+1) == state->end &&
 355                  SRE_IS_LINEBREAK((int) ptr[0])) ||
 356                 ((void*) ptr == state->end));
 357 
 358     case SRE_AT_END_LINE:
 359         return ((void*) ptr == state->end ||
 360                 SRE_IS_LINEBREAK((int) ptr[0]));
 361 
 362     case SRE_AT_END_STRING:
 363         return ((void*) ptr == state->end);
 364 
 365     case SRE_AT_BOUNDARY:
 366         if (state->beginning == state->end)
 367             return 0;
 368         thatp = ((void*) ptr > state->beginning) ?
 369             SRE_IS_WORD((int) ptr[-1]) : 0;
 370         thisp = ((void*) ptr < state->end) ?
 371             SRE_IS_WORD((int) ptr[0]) : 0;
 372         return thisp != thatp;
 373 
 374     case SRE_AT_NON_BOUNDARY:
 375         if (state->beginning == state->end)
 376             return 0;
 377         thatp = ((void*) ptr > state->beginning) ?
 378             SRE_IS_WORD((int) ptr[-1]) : 0;
 379         thisp = ((void*) ptr < state->end) ?
 380             SRE_IS_WORD((int) ptr[0]) : 0;
 381         return thisp == thatp;
 382 
 383     case SRE_AT_LOC_BOUNDARY:
 384         if (state->beginning == state->end)
 385             return 0;
 386         thatp = ((void*) ptr > state->beginning) ?
 387             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
 388         thisp = ((void*) ptr < state->end) ?
 389             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
 390         return thisp != thatp;
 391 
 392     case SRE_AT_LOC_NON_BOUNDARY:
 393         if (state->beginning == state->end)
 394             return 0;
 395         thatp = ((void*) ptr > state->beginning) ?
 396             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
 397         thisp = ((void*) ptr < state->end) ?
 398             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
 399         return thisp == thatp;
 400 
 401 #if defined(HAVE_UNICODE)
 402     case SRE_AT_UNI_BOUNDARY:
 403         if (state->beginning == state->end)
 404             return 0;
 405         thatp = ((void*) ptr > state->beginning) ?
 406             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
 407         thisp = ((void*) ptr < state->end) ?
 408             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
 409         return thisp != thatp;
 410 
 411     case SRE_AT_UNI_NON_BOUNDARY:
 412         if (state->beginning == state->end)
 413             return 0;
 414         thatp = ((void*) ptr > state->beginning) ?
 415             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
 416         thisp = ((void*) ptr < state->end) ?
 417             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
 418         return thisp == thatp;
 419 #endif
 420 
 421     }
 422 
 423     return 0;
 424 }
 425 
 426 LOCAL(int)
 427 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
 428 {
 429     /* check if character is a member of the given set */
 430 
 431     int ok = 1;
 432 
 433     for (;;) {
 434         switch (*set++) {
 435 
 436         case SRE_OP_FAILURE:
 437             return !ok;
 438 
 439         case SRE_OP_LITERAL:
 440             /* <LITERAL> <code> */
 441             if (ch == set[0])
 442                 return ok;
 443             set++;
 444             break;
 445 
 446         case SRE_OP_CATEGORY:
 447             /* <CATEGORY> <code> */
 448             if (sre_category(set[0], (int) ch))
 449                 return ok;
 450             set += 1;
 451             break;
 452 
 453         case SRE_OP_CHARSET:
 454             if (sizeof(SRE_CODE) == 2) {
 455                 /* <CHARSET> <bitmap> (16 bits per code word) */
 456                 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
 457                     return ok;
 458                 set += 16;
 459             }
 460             else {
 461                 /* <CHARSET> <bitmap> (32 bits per code word) */
 462                 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
 463                     return ok;
 464                 set += 8;
 465             }
 466             break;
 467 
 468         case SRE_OP_RANGE:
 469             /* <RANGE> <lower> <upper> */
 470             if (set[0] <= ch && ch <= set[1])
 471                 return ok;
 472             set += 2;
 473             break;
 474 
 475         case SRE_OP_NEGATE:
 476             ok = !ok;
 477             break;
 478 
 479         case SRE_OP_BIGCHARSET:
 480             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
 481         {
 482             Py_ssize_t count, block;
 483             count = *(set++);
 484 
 485             if (sizeof(SRE_CODE) == 2) {
 486                 block = ((unsigned char*)set)[ch >> 8];
 487                 set += 128;
 488                 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
 489                     return ok;
 490                 set += count*16;
 491             }
 492             else {
 493                 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
 494                  * warnings when c's type supports only numbers < N+1 */
 495                 if (!(ch & ~65535))
 496                     block = ((unsigned char*)set)[ch >> 8];
 497                 else
 498                     block = -1;
 499                 set += 64;
 500                 if (block >=0 &&
 501                     (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
 502                     return ok;
 503                 set += count*8;
 504             }
 505             break;
 506         }
 507 
 508         default:
 509             /* internal error -- there's not much we can do about it
 510                here, so let's just pretend it didn't match... */
 511             return 0;
 512         }
 513     }
 514 }
 515 
 516 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
 517 
 518 LOCAL(Py_ssize_t)
 519 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
 520 {
 521     SRE_CODE chr;
 522     SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
 523     SRE_CHAR* end = (SRE_CHAR *)state->end;
 524     Py_ssize_t i;
 525 
 526     /* adjust end */
 527     if (maxcount < end - ptr && maxcount != 65535)
 528         end = ptr + maxcount;
 529 
 530     switch (pattern[0]) {
 531 
 532     case SRE_OP_IN:
 533         /* repeated set */
 534         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
 535         while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
 536             ptr++;
 537         break;
 538 
 539     case SRE_OP_ANY:
 540         /* repeated dot wildcard. */
 541         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
 542         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
 543             ptr++;
 544         break;
 545 
 546     case SRE_OP_ANY_ALL:
 547         /* repeated dot wildcard.  skip to the end of the target
 548            string, and backtrack from there */
 549         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
 550         ptr = end;
 551         break;
 552 
 553     case SRE_OP_LITERAL:
 554         /* repeated literal */
 555         chr = pattern[1];
 556         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
 557         while (ptr < end && (SRE_CODE) *ptr == chr)
 558             ptr++;
 559         break;
 560 
 561     case SRE_OP_LITERAL_IGNORE:
 562         /* repeated literal */
 563         chr = pattern[1];
 564         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
 565         while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
 566             ptr++;
 567         break;
 568 
 569     case SRE_OP_NOT_LITERAL:
 570         /* repeated non-literal */
 571         chr = pattern[1];
 572         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
 573         while (ptr < end && (SRE_CODE) *ptr != chr)
 574             ptr++;
 575         break;
 576 
 577     case SRE_OP_NOT_LITERAL_IGNORE:
 578         /* repeated non-literal */
 579         chr = pattern[1];
 580         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
 581         while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
 582             ptr++;
 583         break;
 584 
 585     default:
 586         /* repeated single character pattern */
 587         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
 588         while ((SRE_CHAR*) state->ptr < end) {
 589             i = SRE_MATCH(state, pattern);
 590             if (i < 0)
 591                 return i;
 592             if (!i)
 593                 break;
 594         }
 595         TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
 596                (SRE_CHAR*) state->ptr - ptr));
 597         return (SRE_CHAR*) state->ptr - ptr;
 598     }
 599 
 600     TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
 601     return ptr - (SRE_CHAR*) state->ptr;
 602 }
 603 
 604 #if 0 /* not used in this release */
 605 LOCAL(int)
 606 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
 607 {
 608     /* check if an SRE_OP_INFO block matches at the current position.
 609        returns the number of SRE_CODE objects to skip if successful, 0
 610        if no match */
 611 
 612     SRE_CHAR* end = state->end;
 613     SRE_CHAR* ptr = state->ptr;
 614     Py_ssize_t i;
 615 
 616     /* check minimal length */
 617     if (pattern[3] && (end - ptr) < pattern[3])
 618         return 0;
 619 
 620     /* check known prefix */
 621     if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
 622         /* <length> <skip> <prefix data> <overlap data> */
 623         for (i = 0; i < pattern[5]; i++)
 624             if ((SRE_CODE) ptr[i] != pattern[7 + i])
 625                 return 0;
 626         return pattern[0] + 2 * pattern[6];
 627     }
 628     return pattern[0];
 629 }
 630 #endif
 631 
 632 /* The macros below should be used to protect recursive SRE_MATCH()
 633  * calls that *failed* and do *not* return immediately (IOW, those
 634  * that will backtrack). Explaining:
 635  *
 636  * - Recursive SRE_MATCH() returned true: that's usually a success
 637  *   (besides atypical cases like ASSERT_NOT), therefore there's no
 638  *   reason to restore lastmark;
 639  *
 640  * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
 641  *   is returning to the caller: If the current SRE_MATCH() is the
 642  *   top function of the recursion, returning false will be a matching
 643  *   failure, and it doesn't matter where lastmark is pointing to.
 644  *   If it's *not* the top function, it will be a recursive SRE_MATCH()
 645  *   failure by itself, and the calling SRE_MATCH() will have to deal
 646  *   with the failure by the same rules explained here (it will restore
 647  *   lastmark by itself if necessary);
 648  *
 649  * - Recursive SRE_MATCH() returned false, and will continue the
 650  *   outside 'for' loop: must be protected when breaking, since the next
 651  *   OP could potentially depend on lastmark;
 652  *
 653  * - Recursive SRE_MATCH() returned false, and will be called again
 654  *   inside a local for/while loop: must be protected between each
 655  *   loop iteration, since the recursive SRE_MATCH() could do anything,
 656  *   and could potentially depend on lastmark.
 657  *
 658  * For more information, check the discussion at SF patch #712900.
 659  */
 660 #define LASTMARK_SAVE()     \
 661     do { \
 662         ctx->lastmark = state->lastmark; \
 663         ctx->lastindex = state->lastindex; \
 664     } while (0)
 665 #define LASTMARK_RESTORE()  \
 666     do { \
 667         state->lastmark = ctx->lastmark; \
 668         state->lastindex = ctx->lastindex; \
 669     } while (0)
 670 
 671 #define RETURN_ERROR(i) do { return i; } while(0)
 672 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
 673 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
 674 
 675 #define RETURN_ON_ERROR(i) \
 676     do { if (i < 0) RETURN_ERROR(i); } while (0)
 677 #define RETURN_ON_SUCCESS(i) \
 678     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
 679 #define RETURN_ON_FAILURE(i) \
 680     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
 681 
 682 #define SFY(x) #x
 683 
 684 #define DATA_STACK_ALLOC(state, type, ptr) \
 685 do { \
 686     alloc_pos = state->data_stack_base; \
 687     TRACE(("allocating %s in %d (%d)\n", \
 688            SFY(type), alloc_pos, sizeof(type))); \
 689     if (state->data_stack_size < alloc_pos+sizeof(type)) { \
 690         int j = data_stack_grow(state, sizeof(type)); \
 691         if (j < 0) return j; \
 692         if (ctx_pos != -1) \
 693             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
 694     } \
 695     ptr = (type*)(state->data_stack+alloc_pos); \
 696     state->data_stack_base += sizeof(type); \
 697 } while (0)
 698 
 699 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
 700 do { \
 701     TRACE(("looking up %s at %d\n", SFY(type), pos)); \
 702     ptr = (type*)(state->data_stack+pos); \
 703 } while (0)
 704 
 705 #define DATA_STACK_PUSH(state, data, size) \
 706 do { \
 707     TRACE(("copy data in %p to %d (%d)\n", \
 708            data, state->data_stack_base, size)); \
 709     if (state->data_stack_size < state->data_stack_base+size) { \
 710         int j = data_stack_grow(state, size); \
 711         if (j < 0) return j; \
 712         if (ctx_pos != -1) \
 713             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
 714     } \
 715     memcpy(state->data_stack+state->data_stack_base, data, size); \
 716     state->data_stack_base += size; \
 717 } while (0)
 718 
 719 #define DATA_STACK_POP(state, data, size, discard) \
 720 do { \
 721     TRACE(("copy data to %p from %d (%d)\n", \
 722            data, state->data_stack_base-size, size)); \
 723     memcpy(data, state->data_stack+state->data_stack_base-size, size); \
 724     if (discard) \
 725         state->data_stack_base -= size; \
 726 } while (0)
 727 
 728 #define DATA_STACK_POP_DISCARD(state, size) \
 729 do { \
 730     TRACE(("discard data from %d (%d)\n", \
 731            state->data_stack_base-size, size)); \
 732     state->data_stack_base -= size; \
 733 } while(0)
 734 
 735 #define DATA_PUSH(x) \
 736     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
 737 #define DATA_POP(x) \
 738     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
 739 #define DATA_POP_DISCARD(x) \
 740     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
 741 #define DATA_ALLOC(t,p) \
 742     DATA_STACK_ALLOC(state, t, p)
 743 #define DATA_LOOKUP_AT(t,p,pos) \
 744     DATA_STACK_LOOKUP_AT(state,t,p,pos)
 745 
 746 #define MARK_PUSH(lastmark) \
 747     do if (lastmark > 0) { \
 748         i = lastmark; /* ctx->lastmark may change if reallocated */ \
 749         DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
 750     } while (0)
 751 #define MARK_POP(lastmark) \
 752     do if (lastmark > 0) { \
 753         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
 754     } while (0)
 755 #define MARK_POP_KEEP(lastmark) \
 756     do if (lastmark > 0) { \
 757         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
 758     } while (0)
 759 #define MARK_POP_DISCARD(lastmark) \
 760     do if (lastmark > 0) { \
 761         DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
 762     } while (0)
 763 
 764 #define JUMP_NONE            0
 765 #define JUMP_MAX_UNTIL_1     1
 766 #define JUMP_MAX_UNTIL_2     2
 767 #define JUMP_MAX_UNTIL_3     3
 768 #define JUMP_MIN_UNTIL_1     4
 769 #define JUMP_MIN_UNTIL_2     5
 770 #define JUMP_MIN_UNTIL_3     6
 771 #define JUMP_REPEAT          7
 772 #define JUMP_REPEAT_ONE_1    8
 773 #define JUMP_REPEAT_ONE_2    9
 774 #define JUMP_MIN_REPEAT_ONE  10
 775 #define JUMP_BRANCH          11
 776 #define JUMP_ASSERT          12
 777 #define JUMP_ASSERT_NOT      13
 778 
 779 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
 780     DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
 781     nextctx->last_ctx_pos = ctx_pos; \
 782     nextctx->jump = jumpvalue; \
 783     nextctx->pattern = nextpattern; \
 784     ctx_pos = alloc_pos; \
 785     ctx = nextctx; \
 786     goto entrance; \
 787     jumplabel: \
 788     while (0) /* gcc doesn't like labels at end of scopes */ \
 789 
 790 typedef struct {
 791     Py_ssize_t last_ctx_pos;
 792     Py_ssize_t jump;
 793     SRE_CHAR* ptr;
 794     SRE_CODE* pattern;
 795     Py_ssize_t count;
 796     Py_ssize_t lastmark;
 797     Py_ssize_t lastindex;
 798     union {
 799         SRE_CODE chr;
 800         SRE_REPEAT* rep;
 801     } u;
 802 } SRE_MATCH_CONTEXT;
 803 
 804 /* check if string matches the given pattern.  returns <0 for
 805    error, 0 for failure, and 1 for success */
 806 LOCAL(Py_ssize_t)
 807 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 808 {
 809     SRE_CHAR* end = (SRE_CHAR *)state->end;
 810     Py_ssize_t alloc_pos, ctx_pos = -1;
 811     Py_ssize_t i, ret = 0;
 812     Py_ssize_t jump;
 813     unsigned int sigcount=0;
 814 
 815     SRE_MATCH_CONTEXT* ctx;
 816     SRE_MATCH_CONTEXT* nextctx;
 817 
 818     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
 819 
 820     DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
 821     ctx->last_ctx_pos = -1;
 822     ctx->jump = JUMP_NONE;
 823     ctx->pattern = pattern;
 824     ctx_pos = alloc_pos;
 825 
 826 entrance:
 827 
 828     ctx->ptr = (SRE_CHAR *)state->ptr;
 829 
 830     if (ctx->pattern[0] == SRE_OP_INFO) {
 831         /* optimization info block */
 832         /* <INFO> <1=skip> <2=flags> <3=min> ... */
 833         if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
 834             TRACE(("reject (got %d chars, need %d)\n",
 835                    (end - ctx->ptr), ctx->pattern[3]));
 836             RETURN_FAILURE;
 837         }
 838         ctx->pattern += ctx->pattern[1] + 1;
 839     }
 840 
 841     for (;;) {
 842         ++sigcount;
 843         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
 844             RETURN_ERROR(SRE_ERROR_INTERRUPTED);
 845 
 846         switch (*ctx->pattern++) {
 847 
 848         case SRE_OP_MARK:
 849             /* set mark */
 850             /* <MARK> <gid> */
 851             TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
 852                    ctx->ptr, ctx->pattern[0]));
 853             i = ctx->pattern[0];
 854             if (i & 1)
 855                 state->lastindex = i/2 + 1;
 856             if (i > state->lastmark) {
 857                 /* state->lastmark is the highest valid index in the
 858                    state->mark array.  If it is increased by more than 1,
 859                    the intervening marks must be set to NULL to signal
 860                    that these marks have not been encountered. */
 861                 Py_ssize_t j = state->lastmark + 1;
 862                 while (j < i)
 863                     state->mark[j++] = NULL;
 864                 state->lastmark = i;
 865             }
 866             state->mark[i] = ctx->ptr;
 867             ctx->pattern++;
 868             break;
 869 
 870         case SRE_OP_LITERAL:
 871             /* match literal string */
 872             /* <LITERAL> <code> */
 873             TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
 874                    ctx->ptr, *ctx->pattern));
 875             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
 876                 RETURN_FAILURE;
 877             ctx->pattern++;
 878             ctx->ptr++;
 879             break;
 880 
 881         case SRE_OP_NOT_LITERAL:
 882             /* match anything that is not literal character */
 883             /* <NOT_LITERAL> <code> */
 884             TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
 885                    ctx->ptr, *ctx->pattern));
 886             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
 887                 RETURN_FAILURE;
 888             ctx->pattern++;
 889             ctx->ptr++;
 890             break;
 891 
 892         case SRE_OP_SUCCESS:
 893             /* end of pattern */
 894             TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
 895             state->ptr = ctx->ptr;
 896             RETURN_SUCCESS;
 897 
 898         case SRE_OP_AT:
 899             /* match at given position */
 900             /* <AT> <code> */
 901             TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
 902             if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
 903                 RETURN_FAILURE;
 904             ctx->pattern++;
 905             break;
 906 
 907         case SRE_OP_CATEGORY:
 908             /* match at given category */
 909             /* <CATEGORY> <code> */
 910             TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
 911                    ctx->ptr, *ctx->pattern));
 912             if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
 913                 RETURN_FAILURE;
 914             ctx->pattern++;
 915             ctx->ptr++;
 916             break;
 917 
 918         case SRE_OP_ANY:
 919             /* match anything (except a newline) */
 920             /* <ANY> */
 921             TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
 922             if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
 923                 RETURN_FAILURE;
 924             ctx->ptr++;
 925             break;
 926 
 927         case SRE_OP_ANY_ALL:
 928             /* match anything */
 929             /* <ANY_ALL> */
 930             TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
 931             if (ctx->ptr >= end)
 932                 RETURN_FAILURE;
 933             ctx->ptr++;
 934             break;
 935 
 936         case SRE_OP_IN:
 937             /* match set member (or non_member) */
 938             /* <IN> <skip> <set> */
 939             TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
 940             if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
 941                 RETURN_FAILURE;
 942             ctx->pattern += ctx->pattern[0];
 943             ctx->ptr++;
 944             break;
 945 
 946         case SRE_OP_LITERAL_IGNORE:
 947             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
 948                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
 949             if (ctx->ptr >= end ||
 950                 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
 951                 RETURN_FAILURE;
 952             ctx->pattern++;
 953             ctx->ptr++;
 954             break;
 955 
 956         case SRE_OP_NOT_LITERAL_IGNORE:
 957             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
 958                    ctx->pattern, ctx->ptr, *ctx->pattern));
 959             if (ctx->ptr >= end ||
 960                 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
 961                 RETURN_FAILURE;
 962             ctx->pattern++;
 963             ctx->ptr++;
 964             break;
 965 
 966         case SRE_OP_IN_IGNORE:
 967             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
 968             if (ctx->ptr >= end
 969                 || !SRE_CHARSET(ctx->pattern+1,
 970                                 (SRE_CODE)state->lower(*ctx->ptr)))
 971                 RETURN_FAILURE;
 972             ctx->pattern += ctx->pattern[0];
 973             ctx->ptr++;
 974             break;
 975 
 976         case SRE_OP_JUMP:
 977         case SRE_OP_INFO:
 978             /* jump forward */
 979             /* <JUMP> <offset> */
 980             TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
 981                    ctx->ptr, ctx->pattern[0]));
 982             ctx->pattern += ctx->pattern[0];
 983             break;
 984 
 985         case SRE_OP_BRANCH:
 986             /* alternation */
 987             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
 988             TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
 989             LASTMARK_SAVE();
 990             ctx->u.rep = state->repeat;
 991             if (ctx->u.rep)
 992                 MARK_PUSH(ctx->lastmark);
 993             for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
 994                 if (ctx->pattern[1] == SRE_OP_LITERAL &&
 995                     (ctx->ptr >= end ||
 996                      (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
 997                     continue;
 998                 if (ctx->pattern[1] == SRE_OP_IN &&
 999                     (ctx->ptr >= end ||
1000                      !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
1001                     continue;
1002                 state->ptr = ctx->ptr;
1003                 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
1004                 if (ret) {
1005                     if (ctx->u.rep)
1006                         MARK_POP_DISCARD(ctx->lastmark);
1007                     RETURN_ON_ERROR(ret);
1008                     RETURN_SUCCESS;
1009                 }
1010                 if (ctx->u.rep)
1011                     MARK_POP_KEEP(ctx->lastmark);
1012                 LASTMARK_RESTORE();
1013             }
1014             if (ctx->u.rep)
1015                 MARK_POP_DISCARD(ctx->lastmark);
1016             RETURN_FAILURE;
1017 
1018         case SRE_OP_REPEAT_ONE:
1019             /* match repeated sequence (maximizing regexp) */
1020 
1021             /* this operator only works if the repeated item is
1022                exactly one character wide, and we're not already
1023                collecting backtracking points.  for other cases,
1024                use the MAX_REPEAT operator */
1025 
1026             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1027 
1028             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1029                    ctx->pattern[1], ctx->pattern[2]));
1030 
1031             if (ctx->ptr + ctx->pattern[1] > end)
1032                 RETURN_FAILURE; /* cannot match */
1033 
1034             state->ptr = ctx->ptr;
1035 
1036             ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1037             RETURN_ON_ERROR(ret);
1038             DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1039             ctx->count = ret;
1040             ctx->ptr += ctx->count;
1041 
1042             /* when we arrive here, count contains the number of
1043                matches, and ctx->ptr points to the tail of the target
1044                string.  check if the rest of the pattern matches,
1045                and backtrack if not. */
1046 
1047             if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1048                 RETURN_FAILURE;
1049 
1050             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1051                 /* tail is empty.  we're finished */
1052                 state->ptr = ctx->ptr;
1053                 RETURN_SUCCESS;
1054             }
1055 
1056             LASTMARK_SAVE();
1057 
1058             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
1059                 /* tail starts with a literal. skip positions where
1060                    the rest of the pattern cannot possibly match */
1061                 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
1062                 for (;;) {
1063                     while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
1064                            (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1065                         ctx->ptr--;
1066                         ctx->count--;
1067                     }
1068                     if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1069                         break;
1070                     state->ptr = ctx->ptr;
1071                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1072                             ctx->pattern+ctx->pattern[0]);
1073                     if (ret) {
1074                         RETURN_ON_ERROR(ret);
1075                         RETURN_SUCCESS;
1076                     }
1077 
1078                     LASTMARK_RESTORE();
1079 
1080                     ctx->ptr--;
1081                     ctx->count--;
1082                 }
1083 
1084             } else {
1085                 /* general case */
1086                 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
1087                     state->ptr = ctx->ptr;
1088                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1089                             ctx->pattern+ctx->pattern[0]);
1090                     if (ret) {
1091                         RETURN_ON_ERROR(ret);
1092                         RETURN_SUCCESS;
1093                     }
1094                     ctx->ptr--;
1095                     ctx->count--;
1096                     LASTMARK_RESTORE();
1097                 }
1098             }
1099             RETURN_FAILURE;
1100 
1101         case SRE_OP_MIN_REPEAT_ONE:
1102             /* match repeated sequence (minimizing regexp) */
1103 
1104             /* this operator only works if the repeated item is
1105                exactly one character wide, and we're not already
1106                collecting backtracking points.  for other cases,
1107                use the MIN_REPEAT operator */
1108 
1109             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1110 
1111             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1112                    ctx->pattern[1], ctx->pattern[2]));
1113 
1114             if (ctx->ptr + ctx->pattern[1] > end)
1115                 RETURN_FAILURE; /* cannot match */
1116 
1117             state->ptr = ctx->ptr;
1118 
1119             if (ctx->pattern[1] == 0)
1120                 ctx->count = 0;
1121             else {
1122                 /* count using pattern min as the maximum */
1123                 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1124                 RETURN_ON_ERROR(ret);
1125                 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1126                 if (ret < (Py_ssize_t) ctx->pattern[1])
1127                     /* didn't match minimum number of times */
1128                     RETURN_FAILURE;
1129                 /* advance past minimum matches of repeat */
1130                 ctx->count = ret;
1131                 ctx->ptr += ctx->count;
1132             }
1133 
1134             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1135                 /* tail is empty.  we're finished */
1136                 state->ptr = ctx->ptr;
1137                 RETURN_SUCCESS;
1138 
1139             } else {
1140                 /* general case */
1141                 LASTMARK_SAVE();
1142                 while ((Py_ssize_t)ctx->pattern[2] == 65535
1143                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
1144                     state->ptr = ctx->ptr;
1145                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1146                             ctx->pattern+ctx->pattern[0]);
1147                     if (ret) {
1148                         RETURN_ON_ERROR(ret);
1149                         RETURN_SUCCESS;
1150                     }
1151                     state->ptr = ctx->ptr;
1152                     ret = SRE_COUNT(state, ctx->pattern+3, 1);
1153                     RETURN_ON_ERROR(ret);
1154                     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1155                     if (ret == 0)
1156                         break;
1157                     assert(ret == 1);
1158                     ctx->ptr++;
1159                     ctx->count++;
1160                     LASTMARK_RESTORE();
1161                 }
1162             }
1163             RETURN_FAILURE;
1164 
1165         case SRE_OP_REPEAT:
1166             /* create repeat context.  all the hard work is done
1167                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1168             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1169             TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1170                    ctx->pattern[1], ctx->pattern[2]));
1171 
1172             /* install new repeat context */
1173             ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
1174             if (!ctx->u.rep) {
1175                 PyErr_NoMemory();
1176                 RETURN_FAILURE;
1177             }
1178             ctx->u.rep->count = -1;
1179             ctx->u.rep->pattern = ctx->pattern;
1180             ctx->u.rep->prev = state->repeat;
1181             ctx->u.rep->last_ptr = NULL;
1182             state->repeat = ctx->u.rep;
1183 
1184             state->ptr = ctx->ptr;
1185             DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1186             state->repeat = ctx->u.rep->prev;
1187             PyObject_FREE(ctx->u.rep);
1188 
1189             if (ret) {
1190                 RETURN_ON_ERROR(ret);
1191                 RETURN_SUCCESS;
1192             }
1193             RETURN_FAILURE;
1194 
1195         case SRE_OP_MAX_UNTIL:
1196             /* maximizing repeat */
1197             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1198 
1199             /* FIXME: we probably need to deal with zero-width
1200                matches in here... */
1201 
1202             ctx->u.rep = state->repeat;
1203             if (!ctx->u.rep)
1204                 RETURN_ERROR(SRE_ERROR_STATE);
1205 
1206             state->ptr = ctx->ptr;
1207 
1208             ctx->count = ctx->u.rep->count+1;
1209 
1210             TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
1211                    ctx->ptr, ctx->count));
1212 
1213             if (ctx->count < ctx->u.rep->pattern[1]) {
1214                 /* not enough matches */
1215                 ctx->u.rep->count = ctx->count;
1216                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1217                         ctx->u.rep->pattern+3);
1218                 if (ret) {
1219                     RETURN_ON_ERROR(ret);
1220                     RETURN_SUCCESS;
1221                 }
1222                 ctx->u.rep->count = ctx->count-1;
1223                 state->ptr = ctx->ptr;
1224                 RETURN_FAILURE;
1225             }
1226 
1227             if ((ctx->count < ctx->u.rep->pattern[2] ||
1228                 ctx->u.rep->pattern[2] == 65535) &&
1229                 state->ptr != ctx->u.rep->last_ptr) {
1230                 /* we may have enough matches, but if we can
1231                    match another item, do so */
1232                 ctx->u.rep->count = ctx->count;
1233                 LASTMARK_SAVE();
1234                 MARK_PUSH(ctx->lastmark);
1235                 /* zero-width match protection */
1236                 DATA_PUSH(&ctx->u.rep->last_ptr);
1237                 ctx->u.rep->last_ptr = state->ptr;
1238                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1239                         ctx->u.rep->pattern+3);
1240                 DATA_POP(&ctx->u.rep->last_ptr);
1241                 if (ret) {
1242                     MARK_POP_DISCARD(ctx->lastmark);
1243                     RETURN_ON_ERROR(ret);
1244                     RETURN_SUCCESS;
1245                 }
1246                 MARK_POP(ctx->lastmark);
1247                 LASTMARK_RESTORE();
1248                 ctx->u.rep->count = ctx->count-1;
1249                 state->ptr = ctx->ptr;
1250             }
1251 
1252             /* cannot match more repeated items here.  make sure the
1253                tail matches */
1254             state->repeat = ctx->u.rep->prev;
1255             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1256             RETURN_ON_SUCCESS(ret);
1257             state->repeat = ctx->u.rep;
1258             state->ptr = ctx->ptr;
1259             RETURN_FAILURE;
1260 
1261         case SRE_OP_MIN_UNTIL:
1262             /* minimizing repeat */
1263             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1264 
1265             ctx->u.rep = state->repeat;
1266             if (!ctx->u.rep)
1267                 RETURN_ERROR(SRE_ERROR_STATE);
1268 
1269             state->ptr = ctx->ptr;
1270 
1271             ctx->count = ctx->u.rep->count+1;
1272 
1273             TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
1274                    ctx->ptr, ctx->count, ctx->u.rep->pattern));
1275 
1276             if (ctx->count < ctx->u.rep->pattern[1]) {
1277                 /* not enough matches */
1278                 ctx->u.rep->count = ctx->count;
1279                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1280                         ctx->u.rep->pattern+3);
1281                 if (ret) {
1282                     RETURN_ON_ERROR(ret);
1283                     RETURN_SUCCESS;
1284                 }
1285                 ctx->u.rep->count = ctx->count-1;
1286                 state->ptr = ctx->ptr;
1287                 RETURN_FAILURE;
1288             }
1289 
1290             LASTMARK_SAVE();
1291 
1292             /* see if the tail matches */
1293             state->repeat = ctx->u.rep->prev;
1294             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1295             if (ret) {
1296                 RETURN_ON_ERROR(ret);
1297                 RETURN_SUCCESS;
1298             }
1299 
1300             state->repeat = ctx->u.rep;
1301             state->ptr = ctx->ptr;
1302 
1303             LASTMARK_RESTORE();
1304 
1305             if (ctx->count >= ctx->u.rep->pattern[2]
1306                 && ctx->u.rep->pattern[2] != 65535)
1307                 RETURN_FAILURE;
1308 
1309             ctx->u.rep->count = ctx->count;
1310             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1311                     ctx->u.rep->pattern+3);
1312             if (ret) {
1313                 RETURN_ON_ERROR(ret);
1314                 RETURN_SUCCESS;
1315             }
1316             ctx->u.rep->count = ctx->count-1;
1317             state->ptr = ctx->ptr;
1318             RETURN_FAILURE;
1319 
1320         case SRE_OP_GROUPREF:
1321             /* match backreference */
1322             TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1323                    ctx->ptr, ctx->pattern[0]));
1324             i = ctx->pattern[0];
1325             {
1326                 Py_ssize_t groupref = i+i;
1327                 if (groupref >= state->lastmark) {
1328                     RETURN_FAILURE;
1329                 } else {
1330                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1331                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1332                     if (!p || !e || e < p)
1333                         RETURN_FAILURE;
1334                     while (p < e) {
1335                         if (ctx->ptr >= end || *ctx->ptr != *p)
1336                             RETURN_FAILURE;
1337                         p++; ctx->ptr++;
1338                     }
1339                 }
1340             }
1341             ctx->pattern++;
1342             break;
1343 
1344         case SRE_OP_GROUPREF_IGNORE:
1345             /* match backreference */
1346             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1347                    ctx->ptr, ctx->pattern[0]));
1348             i = ctx->pattern[0];
1349             {
1350                 Py_ssize_t groupref = i+i;
1351                 if (groupref >= state->lastmark) {
1352                     RETURN_FAILURE;
1353                 } else {
1354                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1355                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1356                     if (!p || !e || e < p)
1357                         RETURN_FAILURE;
1358                     while (p < e) {
1359                         if (ctx->ptr >= end ||
1360                             state->lower(*ctx->ptr) != state->lower(*p))
1361                             RETURN_FAILURE;
1362                         p++; ctx->ptr++;
1363                     }
1364                 }
1365             }
1366             ctx->pattern++;
1367             break;
1368 
1369         case SRE_OP_GROUPREF_EXISTS:
1370             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1371                    ctx->ptr, ctx->pattern[0]));
1372             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1373             i = ctx->pattern[0];
1374             {
1375                 Py_ssize_t groupref = i+i;
1376                 if (groupref >= state->lastmark) {
1377                     ctx->pattern += ctx->pattern[1];
1378                     break;
1379                 } else {
1380                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1381                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1382                     if (!p || !e || e < p) {
1383                         ctx->pattern += ctx->pattern[1];
1384                         break;
1385                     }
1386                 }
1387             }
1388             ctx->pattern += 2;
1389             break;
1390 
1391         case SRE_OP_ASSERT:
1392             /* assert subpattern */
1393             /* <ASSERT> <skip> <back> <pattern> */
1394             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1395                    ctx->ptr, ctx->pattern[1]));
1396             state->ptr = ctx->ptr - ctx->pattern[1];
1397             if (state->ptr < state->beginning)
1398                 RETURN_FAILURE;
1399             DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1400             RETURN_ON_FAILURE(ret);
1401             ctx->pattern += ctx->pattern[0];
1402             break;
1403 
1404         case SRE_OP_ASSERT_NOT:
1405             /* assert not subpattern */
1406             /* <ASSERT_NOT> <skip> <back> <pattern> */
1407             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1408                    ctx->ptr, ctx->pattern[1]));
1409             state->ptr = ctx->ptr - ctx->pattern[1];
1410             if (state->ptr >= state->beginning) {
1411                 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1412                 if (ret) {
1413                     RETURN_ON_ERROR(ret);
1414                     RETURN_FAILURE;
1415                 }
1416             }
1417             ctx->pattern += ctx->pattern[0];
1418             break;
1419 
1420         case SRE_OP_FAILURE:
1421             /* immediate failure */
1422             TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1423             RETURN_FAILURE;
1424 
1425         default:
1426             TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1427                    ctx->pattern[-1]));
1428             RETURN_ERROR(SRE_ERROR_ILLEGAL);
1429         }
1430     }
1431 
1432 exit:
1433     ctx_pos = ctx->last_ctx_pos;
1434     jump = ctx->jump;
1435     DATA_POP_DISCARD(ctx);
1436     if (ctx_pos == -1)
1437         return ret;
1438     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1439 
1440     switch (jump) {
1441         case JUMP_MAX_UNTIL_2:
1442             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1443             goto jump_max_until_2;
1444         case JUMP_MAX_UNTIL_3:
1445             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1446             goto jump_max_until_3;
1447         case JUMP_MIN_UNTIL_2:
1448             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1449             goto jump_min_until_2;
1450         case JUMP_MIN_UNTIL_3:
1451             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1452             goto jump_min_until_3;
1453         case JUMP_BRANCH:
1454             TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1455             goto jump_branch;
1456         case JUMP_MAX_UNTIL_1:
1457             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1458             goto jump_max_until_1;
1459         case JUMP_MIN_UNTIL_1:
1460             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1461             goto jump_min_until_1;
1462         case JUMP_REPEAT:
1463             TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1464             goto jump_repeat;
1465         case JUMP_REPEAT_ONE_1:
1466             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1467             goto jump_repeat_one_1;
1468         case JUMP_REPEAT_ONE_2:
1469             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1470             goto jump_repeat_one_2;
1471         case JUMP_MIN_REPEAT_ONE:
1472             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1473             goto jump_min_repeat_one;
1474         case JUMP_ASSERT:
1475             TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1476             goto jump_assert;
1477         case JUMP_ASSERT_NOT:
1478             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1479             goto jump_assert_not;
1480         case JUMP_NONE:
1481             TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
1482             break;
1483     }
1484 
1485     return ret; /* should never get here */
1486 }
1487 
1488 LOCAL(Py_ssize_t)
1489 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1490 {
1491     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1492     SRE_CHAR* end = (SRE_CHAR *)state->end;
1493     Py_ssize_t status = 0;
1494     Py_ssize_t prefix_len = 0;
1495     Py_ssize_t prefix_skip = 0;
1496     SRE_CODE* prefix = NULL;
1497     SRE_CODE* charset = NULL;
1498     SRE_CODE* overlap = NULL;
1499     int flags = 0;
1500 
1501     if (pattern[0] == SRE_OP_INFO) {
1502         /* optimization info block */
1503         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1504 
1505         flags = pattern[2];
1506 
1507         if (pattern[3] > 1) {
1508             /* adjust end point (but make sure we leave at least one
1509                character in there, so literal search will work) */
1510             end -= pattern[3]-1;
1511             if (end <= ptr)
1512                 end = ptr+1;
1513         }
1514 
1515         if (flags & SRE_INFO_PREFIX) {
1516             /* pattern starts with a known prefix */
1517             /* <length> <skip> <prefix data> <overlap data> */
1518             prefix_len = pattern[5];
1519             prefix_skip = pattern[6];
1520             prefix = pattern + 7;
1521             overlap = prefix + prefix_len - 1;
1522         } else if (flags & SRE_INFO_CHARSET)
1523             /* pattern starts with a character from a known set */
1524             /* <charset> */
1525             charset = pattern + 5;
1526 
1527         pattern += 1 + pattern[1];
1528     }
1529 
1530     TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1531     TRACE(("charset = %p\n", charset));
1532 
1533 #if defined(USE_FAST_SEARCH)
1534     if (prefix_len > 1) {
1535         /* pattern starts with a known prefix.  use the overlap
1536            table to skip forward as fast as we possibly can */
1537         Py_ssize_t i = 0;
1538         end = (SRE_CHAR *)state->end;
1539         while (ptr < end) {
1540             for (;;) {
1541                 if ((SRE_CODE) ptr[0] != prefix[i]) {
1542                     if (!i)
1543                         break;
1544                     else
1545                         i = overlap[i];
1546                 } else {
1547                     if (++i == prefix_len) {
1548                         /* found a potential match */
1549                         TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1550                         state->start = ptr + 1 - prefix_len;
1551                         state->ptr = ptr + 1 - prefix_len + prefix_skip;
1552                         if (flags & SRE_INFO_LITERAL)
1553                             return 1; /* we got all of it */
1554                         status = SRE_MATCH(state, pattern + 2*prefix_skip);
1555                         if (status != 0)
1556                             return status;
1557                         /* close but no cigar -- try again */
1558                         i = overlap[i];
1559                     }
1560                     break;
1561                 }
1562             }
1563             ptr++;
1564         }
1565         return 0;
1566     }
1567 #endif
1568 
1569     if (pattern[0] == SRE_OP_LITERAL) {
1570         /* pattern starts with a literal character.  this is used
1571            for short prefixes, and if fast search is disabled */
1572         SRE_CODE chr = pattern[1];
1573         end = (SRE_CHAR *)state->end;
1574         for (;;) {
1575             while (ptr < end && (SRE_CODE) ptr[0] != chr)
1576                 ptr++;
1577             if (ptr >= end)
1578                 return 0;
1579             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1580             state->start = ptr;
1581             state->ptr = ++ptr;
1582             if (flags & SRE_INFO_LITERAL)
1583                 return 1; /* we got all of it */
1584             status = SRE_MATCH(state, pattern + 2);
1585             if (status != 0)
1586                 break;
1587         }
1588     } else if (charset) {
1589         /* pattern starts with a character from a known set */
1590         end = (SRE_CHAR *)state->end;
1591         for (;;) {
1592             while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
1593                 ptr++;
1594             if (ptr >= end)
1595                 return 0;
1596             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1597             state->start = ptr;
1598             state->ptr = ptr;
1599             status = SRE_MATCH(state, pattern);
1600             if (status != 0)
1601                 break;
1602             ptr++;
1603         }
1604     } else
1605         /* general case */
1606         while (ptr <= end) {
1607             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1608             state->start = state->ptr = ptr++;
1609             status = SRE_MATCH(state, pattern);
1610             if (status != 0)
1611                 break;
1612         }
1613 
1614     return status;
1615 }
1616 
1617 LOCAL(int)
1618 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
1619 {
1620     /* check if given string is a literal template (i.e. no escapes) */
1621     while (len-- > 0)
1622         if (*ptr++ == '\\')
1623             return 0;
1624     return 1;
1625 }
1626 
1627 #if !defined(SRE_RECURSIVE)
1628 
1629 /* -------------------------------------------------------------------- */
1630 /* factories and destructors */
1631 
1632 /* see sre.h for object declarations */
1633 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
1634 static PyObject*pattern_scanner(PatternObject*, PyObject*);
1635 
1636 static PyObject *
1637 sre_codesize(PyObject* self, PyObject *unused)
1638 {
1639     return Py_BuildValue("l", sizeof(SRE_CODE));
1640 }
1641 
1642 static PyObject *
1643 sre_getlower(PyObject* self, PyObject* args)
1644 {
1645     int character, flags;
1646     if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1647         return NULL;
1648     if (flags & SRE_FLAG_LOCALE)
1649         return Py_BuildValue("i", sre_lower_locale(character));
1650     if (flags & SRE_FLAG_UNICODE)
1651 #if defined(HAVE_UNICODE)
1652         return Py_BuildValue("i", sre_lower_unicode(character));
1653 #else
1654         return Py_BuildValue("i", sre_lower_locale(character));
1655 #endif
1656     return Py_BuildValue("i", sre_lower(character));
1657 }
1658 
1659 LOCAL(void)
1660 state_reset(SRE_STATE* state)
1661 {
1662     /* FIXME: dynamic! */
1663     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
1664 
1665     state->lastmark = -1;
1666     state->lastindex = -1;
1667 
1668     state->repeat = NULL;
1669 
1670     data_stack_dealloc(state);
1671 }
1672 
1673 static void*
1674 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
1675 {
1676     /* given a python object, return a data pointer, a length (in
1677        characters), and a character size.  return NULL if the object
1678        is not a string (or not compatible) */
1679 
1680     PyBufferProcs *buffer;
1681     Py_ssize_t size, bytes;
1682     int charsize;
1683     void* ptr;
1684 
1685 #if defined(HAVE_UNICODE)
1686     if (PyUnicode_Check(string)) {
1687         /* unicode strings doesn't always support the buffer interface */
1688         ptr = (void*) PyUnicode_AS_DATA(string);
1689         /* bytes = PyUnicode_GET_DATA_SIZE(string); */
1690         size = PyUnicode_GET_SIZE(string);
1691         charsize = sizeof(Py_UNICODE);
1692 
1693     } else {
1694 #endif
1695 
1696     /* get pointer to string buffer */
1697     buffer = Py_TYPE(string)->tp_as_buffer;
1698     if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1699         buffer->bf_getsegcount(string, NULL) != 1) {
1700         PyErr_SetString(PyExc_TypeError, "expected string or buffer");
1701         return NULL;
1702     }
1703 
1704     /* determine buffer size */
1705     bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1706     if (bytes < 0) {
1707         PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1708         return NULL;
1709     }
1710 
1711     /* determine character size */
1712 #if PY_VERSION_HEX >= 0x01060000
1713     size = PyObject_Size(string);
1714 #else
1715     size = PyObject_Length(string);
1716 #endif
1717 
1718     if (PyString_Check(string) || bytes == size)
1719         charsize = 1;
1720 #if defined(HAVE_UNICODE)
1721     else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
1722         charsize = sizeof(Py_UNICODE);
1723 #endif
1724     else {
1725         PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1726         return NULL;
1727     }
1728 
1729 #if defined(HAVE_UNICODE)
1730     }
1731 #endif
1732 
1733     *p_length = size;
1734     *p_charsize = charsize;
1735 
1736     return ptr;
1737 }
1738 
1739 LOCAL(PyObject*)
1740 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1741            Py_ssize_t start, Py_ssize_t end)
1742 {
1743     /* prepare state object */
1744 
1745     Py_ssize_t length;
1746     int charsize;
1747     void* ptr;
1748 
1749     memset(state, 0, sizeof(SRE_STATE));
1750 
1751     state->lastmark = -1;
1752     state->lastindex = -1;
1753 
1754     ptr = getstring(string, &length, &charsize);
1755     if (!ptr)
1756         return NULL;
1757 
1758     /* adjust boundaries */
1759     if (start < 0)
1760         start = 0;
1761     else if (start > length)
1762         start = length;
1763 
1764     if (end < 0)
1765         end = 0;
1766     else if (end > length)
1767         end = length;
1768 
1769     state->charsize = charsize;
1770 
1771     state->beginning = ptr;
1772 
1773     state->start = (void*) ((char*) ptr + start * state->charsize);
1774     state->end = (void*) ((char*) ptr + end * state->charsize);
1775 
1776     Py_INCREF(string);
1777     state->string = string;
1778     state->pos = start;
1779     state->endpos = end;
1780 
1781     if (pattern->flags & SRE_FLAG_LOCALE)
1782         state->lower = sre_lower_locale;
1783     else if (pattern->flags & SRE_FLAG_UNICODE)
1784 #if defined(HAVE_UNICODE)
1785         state->lower = sre_lower_unicode;
1786 #else
1787         state->lower = sre_lower_locale;
1788 #endif
1789     else
1790         state->lower = sre_lower;
1791 
1792     return string;
1793 }
1794 
1795 LOCAL(void)
1796 state_fini(SRE_STATE* state)
1797 {
1798     Py_XDECREF(state->string);
1799     data_stack_dealloc(state);
1800 }
1801 
1802 /* calculate offset from start of string */
1803 #define STATE_OFFSET(state, member)\
1804     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1805 
1806 LOCAL(PyObject*)
1807 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
1808 {
1809     Py_ssize_t i, j;
1810 
1811     index = (index - 1) * 2;
1812 
1813     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
1814         if (empty)
1815             /* want empty string */
1816             i = j = 0;
1817         else {
1818             Py_INCREF(Py_None);
1819             return Py_None;
1820         }
1821     } else {
1822         i = STATE_OFFSET(state, state->mark[index]);
1823         j = STATE_OFFSET(state, state->mark[index+1]);
1824     }
1825 
1826     return PySequence_GetSlice(string, i, j);
1827 }
1828 
1829 static void
1830 pattern_error(int status)
1831 {
1832     switch (status) {
1833     case SRE_ERROR_RECURSION_LIMIT:
1834         PyErr_SetString(
1835             PyExc_RuntimeError,
1836             "maximum recursion limit exceeded"
1837             );
1838         break;
1839     case SRE_ERROR_MEMORY:
1840         PyErr_NoMemory();
1841         break;
1842     case SRE_ERROR_INTERRUPTED:
1843     /* An exception has already been raised, so let it fly */
1844         break;
1845     default:
1846         /* other error codes indicate compiler/engine bugs */
1847         PyErr_SetString(
1848             PyExc_RuntimeError,
1849             "internal error in regular expression engine"
1850             );
1851     }
1852 }
1853 
1854 static void
1855 pattern_dealloc(PatternObject* self)
1856 {
1857     if (self->weakreflist != NULL)
1858         PyObject_ClearWeakRefs((PyObject *) self);
1859     Py_XDECREF(self->pattern);
1860     Py_XDECREF(self->groupindex);
1861     Py_XDECREF(self->indexgroup);
1862     PyObject_DEL(self);
1863 }
1864 
1865 static PyObject*
1866 pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
1867 {
1868     SRE_STATE state;
1869     int status;
1870 
1871     PyObject* string;
1872     Py_ssize_t start = 0;
1873     Py_ssize_t end = PY_SSIZE_T_MAX;
1874     static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1875     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
1876                                      &string, &start, &end))
1877         return NULL;
1878 
1879     string = state_init(&state, self, string, start, end);
1880     if (!string)
1881         return NULL;
1882 
1883     state.ptr = state.start;
1884 
1885     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1886 
1887     if (state.charsize == 1) {
1888         status = sre_match(&state, PatternObject_GetCode(self));
1889     } else {
1890 #if defined(HAVE_UNICODE)
1891         status = sre_umatch(&state, PatternObject_GetCode(self));
1892 #endif
1893     }
1894 
1895     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1896     if (PyErr_Occurred())
1897         return NULL;
1898 
1899     state_fini(&state);
1900 
1901     return pattern_new_match(self, &state, status);
1902 }
1903 
1904 static PyObject*
1905 pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
1906 {
1907     SRE_STATE state;
1908     int status;
1909 
1910     PyObject* string;
1911     Py_ssize_t start = 0;
1912     Py_ssize_t end = PY_SSIZE_T_MAX;
1913     static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1914     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
1915                                      &string, &start, &end))
1916         return NULL;
1917 
1918     string = state_init(&state, self, string, start, end);
1919     if (!string)
1920         return NULL;
1921 
1922     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1923 
1924     if (state.charsize == 1) {
1925         status = sre_search(&state, PatternObject_GetCode(self));
1926     } else {
1927 #if defined(HAVE_UNICODE)
1928         status = sre_usearch(&state, PatternObject_GetCode(self));
1929 #endif
1930     }
1931 
1932     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1933 
1934     state_fini(&state);
1935 
1936     if (PyErr_Occurred())
1937         return NULL;
1938 
1939     return pattern_new_match(self, &state, status);
1940 }
1941 
1942 static PyObject*
1943 call(char* module, char* function, PyObject* args)
1944 {
1945     PyObject* name;
1946     PyObject* mod;
1947     PyObject* func;
1948     PyObject* result;
1949 
1950     if (!args)
1951         return NULL;
1952     name = PyString_FromString(module);
1953     if (!name)
1954         return NULL;
1955     mod = PyImport_Import(name);
1956     Py_DECREF(name);
1957     if (!mod)
1958         return NULL;
1959     func = PyObject_GetAttrString(mod, function);
1960     Py_DECREF(mod);
1961     if (!func)
1962         return NULL;
1963     result = PyObject_CallObject(func, args);
1964     Py_DECREF(func);
1965     Py_DECREF(args);
1966     return result;
1967 }
1968 
1969 #ifdef USE_BUILTIN_COPY
1970 static int
1971 deepcopy(PyObject** object, PyObject* memo)
1972 {
1973     PyObject* copy;
1974 
1975     copy = call(
1976         "copy", "deepcopy",
1977         PyTuple_Pack(2, *object, memo)
1978         );
1979     if (!copy)
1980         return 0;
1981 
1982     Py_DECREF(*object);
1983     *object = copy;
1984 
1985     return 1; /* success */
1986 }
1987 #endif
1988 
1989 static PyObject*
1990 join_list(PyObject* list, PyObject* string)
1991 {
1992     /* join list elements */
1993 
1994     PyObject* joiner;
1995 #if PY_VERSION_HEX >= 0x01060000
1996     PyObject* function;
1997     PyObject* args;
1998 #endif
1999     PyObject* result;
2000 
2001     joiner = PySequence_GetSlice(string, 0, 0);
2002     if (!joiner)
2003         return NULL;
2004 
2005     if (PyList_GET_SIZE(list) == 0) {
2006         Py_DECREF(list);
2007         return joiner;
2008     }
2009 
2010 #if PY_VERSION_HEX >= 0x01060000
2011     function = PyObject_GetAttrString(joiner, "join");
2012     if (!function) {
2013         Py_DECREF(joiner);
2014         return NULL;
2015     }
2016     args = PyTuple_New(1);
2017     if (!args) {
2018         Py_DECREF(function);
2019         Py_DECREF(joiner);
2020         return NULL;
2021     }
2022     PyTuple_SET_ITEM(args, 0, list);
2023     result = PyObject_CallObject(function, args);
2024     Py_DECREF(args); /* also removes list */
2025     Py_DECREF(function);
2026 #else
2027     result = call(
2028         "string", "join",
2029         PyTuple_Pack(2, list, joiner)
2030         );
2031 #endif
2032     Py_DECREF(joiner);
2033 
2034     return result;
2035 }
2036 
2037 static PyObject*
2038 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
2039 {
2040     SRE_STATE state;
2041     PyObject* list;
2042     int status;
2043     Py_ssize_t i, b, e;
2044 
2045     PyObject* string;
2046     Py_ssize_t start = 0;
2047     Py_ssize_t end = PY_SSIZE_T_MAX;
2048     static char* kwlist[] = { "source", "pos", "endpos", NULL };
2049     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
2050                                      &string, &start, &end))
2051         return NULL;
2052 
2053     string = state_init(&state, self, string, start, end);
2054     if (!string)
2055         return NULL;
2056 
2057     list = PyList_New(0);
2058     if (!list) {
2059         state_fini(&state);
2060         return NULL;
2061     }
2062 
2063     while (state.start <= state.end) {
2064 
2065         PyObject* item;
2066 
2067         state_reset(&state);
2068 
2069         state.ptr = state.start;
2070 
2071         if (state.charsize == 1) {
2072             status = sre_search(&state, PatternObject_GetCode(self));
2073         } else {
2074 #if defined(HAVE_UNICODE)
2075             status = sre_usearch(&state, PatternObject_GetCode(self));
2076 #endif
2077         }
2078 
2079 	if (PyErr_Occurred())
2080 	    goto error;
2081 
2082         if (status <= 0) {
2083             if (status == 0)
2084                 break;
2085             pattern_error(status);
2086             goto error;
2087         }
2088 
2089         /* don't bother to build a match object */
2090         switch (self->groups) {
2091         case 0:
2092             b = STATE_OFFSET(&state, state.start);
2093             e = STATE_OFFSET(&state, state.ptr);
2094             item = PySequence_GetSlice(string, b, e);
2095             if (!item)
2096                 goto error;
2097             break;
2098         case 1:
2099             item = state_getslice(&state, 1, string, 1);
2100             if (!item)
2101                 goto error;
2102             break;
2103         default:
2104             item = PyTuple_New(self->groups);
2105             if (!item)
2106                 goto error;
2107             for (i = 0; i < self->groups; i++) {
2108                 PyObject* o = state_getslice(&state, i+1, string, 1);
2109                 if (!o) {
2110                     Py_DECREF(item);
2111                     goto error;
2112                 }
2113                 PyTuple_SET_ITEM(item, i, o);
2114             }
2115             break;
2116         }
2117 
2118         status = PyList_Append(list, item);
2119         Py_DECREF(item);
2120         if (status < 0)
2121             goto error;
2122 
2123         if (state.ptr == state.start)
2124             state.start = (void*) ((char*) state.ptr + state.charsize);
2125         else
2126             state.start = state.ptr;
2127 
2128     }
2129 
2130     state_fini(&state);
2131     return list;
2132 
2133 error:
2134     Py_DECREF(list);
2135     state_fini(&state);
2136     return NULL;
2137 
2138 }
2139 
2140 #if PY_VERSION_HEX >= 0x02020000
2141 static PyObject*
2142 pattern_finditer(PatternObject* pattern, PyObject* args)
2143 {
2144     PyObject* scanner;
2145     PyObject* search;
2146     PyObject* iterator;
2147 
2148     scanner = pattern_scanner(pattern, args);
2149     if (!scanner)
2150         return NULL;
2151 
2152     search = PyObject_GetAttrString(scanner, "search");
2153     Py_DECREF(scanner);
2154     if (!search)
2155         return NULL;
2156 
2157     iterator = PyCallIter_New(search, Py_None);
2158     Py_DECREF(search);
2159 
2160     return iterator;
2161 }
2162 #endif
2163 
2164 static PyObject*
2165 pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2166 {
2167     SRE_STATE state;
2168     PyObject* list;
2169     PyObject* item;
2170     int status;
2171     Py_ssize_t n;
2172     Py_ssize_t i;
2173     void* last;
2174 
2175     PyObject* string;
2176     Py_ssize_t maxsplit = 0;
2177     static char* kwlist[] = { "source", "maxsplit", NULL };
2178     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
2179                                      &string, &maxsplit))
2180         return NULL;
2181 
2182     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
2183     if (!string)
2184         return NULL;
2185 
2186     list = PyList_New(0);
2187     if (!list) {
2188         state_fini(&state);
2189         return NULL;
2190     }
2191 
2192     n = 0;
2193     last = state.start;
2194 
2195     while (!maxsplit || n < maxsplit) {
2196 
2197         state_reset(&state);
2198 
2199         state.ptr = state.start;
2200 
2201         if (state.charsize == 1) {
2202             status = sre_search(&state, PatternObject_GetCode(self));
2203         } else {
2204 #if defined(HAVE_UNICODE)
2205             status = sre_usearch(&state, PatternObject_GetCode(self));
2206 #endif
2207         }
2208 
2209 	if (PyErr_Occurred())
2210 	    goto error;
2211 
2212         if (status <= 0) {
2213             if (status == 0)
2214                 break;
2215             pattern_error(status);
2216             goto error;
2217         }
2218 
2219         if (state.start == state.ptr) {
2220             if (last == state.end)
2221                 break;
2222             /* skip one character */
2223             state.start = (void*) ((char*) state.ptr + state.charsize);
2224             continue;
2225         }
2226 
2227         /* get segment before this match */
2228         item = PySequence_GetSlice(
2229             string, STATE_OFFSET(&state, last),
2230             STATE_OFFSET(&state, state.start)
2231             );
2232         if (!item)
2233             goto error;
2234         status = PyList_Append(list, item);
2235         Py_DECREF(item);
2236         if (status < 0)
2237             goto error;
2238 
2239         /* add groups (if any) */
2240         for (i = 0; i < self->groups; i++) {
2241             item = state_getslice(&state, i+1, string, 0);
2242             if (!item)
2243                 goto error;
2244             status = PyList_Append(list, item);
2245             Py_DECREF(item);
2246             if (status < 0)
2247                 goto error;
2248         }
2249 
2250         n = n + 1;
2251 
2252         last = state.start = state.ptr;
2253 
2254     }
2255 
2256     /* get segment following last match (even if empty) */
2257     item = PySequence_GetSlice(
2258         string, STATE_OFFSET(&state, last), state.endpos
2259         );
2260     if (!item)
2261         goto error;
2262     status = PyList_Append(list, item);
2263     Py_DECREF(item);
2264     if (status < 0)
2265         goto error;
2266 
2267     state_fini(&state);
2268     return list;
2269 
2270 error:
2271     Py_DECREF(list);
2272     state_fini(&state);
2273     return NULL;
2274 
2275 }
2276 
2277 static PyObject*
2278 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
2279              Py_ssize_t count, Py_ssize_t subn)
2280 {
2281     SRE_STATE state;
2282     PyObject* list;
2283     PyObject* item;
2284     PyObject* filter;
2285     PyObject* args;
2286     PyObject* match;
2287     void* ptr;
2288     int status;
2289     Py_ssize_t n;
2290     Py_ssize_t i, b, e;
2291     int bint;
2292     int filter_is_callable;
2293 
2294     if (PyCallable_Check(ptemplate)) {
2295         /* sub/subn takes either a function or a template */
2296         filter = ptemplate;
2297         Py_INCREF(filter);
2298         filter_is_callable = 1;
2299     } else {
2300         /* if not callable, check if it's a literal string */
2301         int literal;
2302         ptr = getstring(ptemplate, &n, &bint);
2303         b = bint;
Assigned value is garbage or undefined
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Assigned value is garbage or undefined
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

2304 if (ptr) { 2305 if (b == 1) { 2306 literal = sre_literal_template((unsigned char *)ptr, n); 2307 } else { 2308 #if defined(HAVE_UNICODE) 2309 literal = sre_uliteral_template((Py_UNICODE *)ptr, n); 2310 #endif 2311 } 2312 } else { 2313 PyErr_Clear(); 2314 literal = 0; 2315 } 2316 if (literal) {
Uninitialized variable: literal
(emitted by cppcheck)
Uninitialized variable: literal
(emitted by cppcheck)
2317 filter = ptemplate; 2318 Py_INCREF(filter); 2319 filter_is_callable = 0; 2320 } else { 2321 /* not a literal; hand it over to the template compiler */ 2322 filter = call( 2323 SRE_PY_MODULE, "_subx", 2324 PyTuple_Pack(2, self, ptemplate) 2325 ); 2326 if (!filter) 2327 return NULL; 2328 filter_is_callable = PyCallable_Check(filter); 2329 } 2330 } 2331 2332 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX); 2333 if (!string) { 2334 Py_DECREF(filter); 2335 return NULL; 2336 } 2337 2338 list = PyList_New(0); 2339 if (!list) { 2340 Py_DECREF(filter); 2341 state_fini(&state); 2342 return NULL; 2343 } 2344 2345 n = i = 0; 2346 2347 while (!count || n < count) { 2348 2349 state_reset(&state); 2350 2351 state.ptr = state.start; 2352 2353 if (state.charsize == 1) { 2354 status = sre_search(&state, PatternObject_GetCode(self)); 2355 } else { 2356 #if defined(HAVE_UNICODE) 2357 status = sre_usearch(&state, PatternObject_GetCode(self)); 2358 #endif 2359 } 2360 2361 if (PyErr_Occurred()) 2362 goto error; 2363 2364 if (status <= 0) { 2365 if (status == 0) 2366 break; 2367 pattern_error(status); 2368 goto error; 2369 } 2370 2371 b = STATE_OFFSET(&state, state.start); 2372 e = STATE_OFFSET(&state, state.ptr); 2373 2374 if (i < b) { 2375 /* get segment before this match */ 2376 item = PySequence_GetSlice(string, i, b); 2377 if (!item) 2378 goto error; 2379 status = PyList_Append(list, item); 2380 Py_DECREF(item); 2381 if (status < 0) 2382 goto error; 2383 2384 } else if (i == b && i == e && n > 0) 2385 /* ignore empty match on latest position */ 2386 goto next; 2387 2388 if (filter_is_callable) { 2389 /* pass match object through filter */ 2390 match = pattern_new_match(self, &state, 1); 2391 if (!match) 2392 goto error; 2393 args = PyTuple_Pack(1, match); 2394 if (!args) { 2395 Py_DECREF(match); 2396 goto error; 2397 } 2398 item = PyObject_CallObject(filter, args); 2399 Py_DECREF(args); 2400 Py_DECREF(match); 2401 if (!item) 2402 goto error; 2403 } else { 2404 /* filter is literal string */ 2405 item = filter; 2406 Py_INCREF(item); 2407 } 2408 2409 /* add to list */ 2410 if (item != Py_None) { 2411 status = PyList_Append(list, item); 2412 Py_DECREF(item); 2413 if (status < 0) 2414 goto error; 2415 } 2416 2417 i = e; 2418 n = n + 1; 2419 2420 next: 2421 /* move on */ 2422 if (state.ptr == state.start) 2423 state.start = (void*) ((char*) state.ptr + state.charsize); 2424 else 2425 state.start = state.ptr; 2426 2427 } 2428 2429 /* get segment following last match */ 2430 if (i < state.endpos) { 2431 item = PySequence_GetSlice(string, i, state.endpos); 2432 if (!item) 2433 goto error; 2434 status = PyList_Append(list, item); 2435 Py_DECREF(item); 2436 if (status < 0) 2437 goto error; 2438 } 2439 2440 state_fini(&state); 2441 2442 Py_DECREF(filter); 2443 2444 /* convert list to single string (also removes list) */ 2445 item = join_list(list, string); 2446 2447 if (!item) 2448 return NULL; 2449 2450 if (subn) 2451 return Py_BuildValue("Ni", item, n); 2452 2453 return item; 2454 2455 error: 2456 Py_DECREF(list); 2457 state_fini(&state); 2458 Py_DECREF(filter); 2459 return NULL; 2460 2461 } 2462 2463 static PyObject* 2464 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw) 2465 { 2466 PyObject* ptemplate; 2467 PyObject* string; 2468 Py_ssize_t count = 0; 2469 static char* kwlist[] = { "repl", "string", "count", NULL }; 2470 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist, 2471 &ptemplate, &string, &count)) 2472 return NULL; 2473 2474 return pattern_subx(self, ptemplate, string, count, 0); 2475 } 2476 2477 static PyObject* 2478 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw) 2479 { 2480 PyObject* ptemplate; 2481 PyObject* string; 2482 Py_ssize_t count = 0; 2483 static char* kwlist[] = { "repl", "string", "count", NULL }; 2484 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist, 2485 &ptemplate, &string, &count)) 2486 return NULL; 2487 2488 return pattern_subx(self, ptemplate, string, count, 1); 2489 } 2490 2491 static PyObject* 2492 pattern_copy(PatternObject* self, PyObject *unused) 2493 { 2494 #ifdef USE_BUILTIN_COPY 2495 PatternObject* copy; 2496 int offset; 2497 2498 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize); 2499 if (!copy) 2500 return NULL; 2501 2502 offset = offsetof(PatternObject, groups); 2503 2504 Py_XINCREF(self->groupindex); 2505 Py_XINCREF(self->indexgroup); 2506 Py_XINCREF(self->pattern); 2507 2508 memcpy((char*) copy + offset, (char*) self + offset, 2509 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset); 2510 copy->weakreflist = NULL; 2511 2512 return (PyObject*) copy; 2513 #else 2514 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object"); 2515 return NULL; 2516 #endif 2517 } 2518 2519 static PyObject* 2520 pattern_deepcopy(PatternObject* self, PyObject* memo) 2521 { 2522 #ifdef USE_BUILTIN_COPY 2523 PatternObject* copy; 2524 2525 copy = (PatternObject*) pattern_copy(self); 2526 if (!copy) 2527 return NULL; 2528 2529 if (!deepcopy(&copy->groupindex, memo) || 2530 !deepcopy(&copy->indexgroup, memo) || 2531 !deepcopy(&copy->pattern, memo)) { 2532 Py_DECREF(copy); 2533 return NULL; 2534 } 2535 2536 #else 2537 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object"); 2538 return NULL; 2539 #endif 2540 } 2541 2542 PyDoc_STRVAR(pattern_match_doc, 2543 "match(string[, pos[, endpos]]) --> match object or None.\n\ 2544 Matches zero or more characters at the beginning of the string"); 2545 2546 PyDoc_STRVAR(pattern_search_doc, 2547 "search(string[, pos[, endpos]]) --> match object or None.\n\ 2548 Scan through string looking for a match, and return a corresponding\n\ 2549 MatchObject instance. Return None if no position in the string matches."); 2550 2551 PyDoc_STRVAR(pattern_split_doc, 2552 "split(string[, maxsplit = 0]) --> list.\n\ 2553 Split string by the occurrences of pattern."); 2554 2555 PyDoc_STRVAR(pattern_findall_doc, 2556 "findall(string[, pos[, endpos]]) --> list.\n\ 2557 Return a list of all non-overlapping matches of pattern in string."); 2558 2559 PyDoc_STRVAR(pattern_finditer_doc, 2560 "finditer(string[, pos[, endpos]]) --> iterator.\n\ 2561 Return an iterator over all non-overlapping matches for the \n\ 2562 RE pattern in string. For each match, the iterator returns a\n\ 2563 match object."); 2564 2565 PyDoc_STRVAR(pattern_sub_doc, 2566 "sub(repl, string[, count = 0]) --> newstring\n\ 2567 Return the string obtained by replacing the leftmost non-overlapping\n\ 2568 occurrences of pattern in string by the replacement repl."); 2569 2570 PyDoc_STRVAR(pattern_subn_doc, 2571 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\ 2572 Return the tuple (new_string, number_of_subs_made) found by replacing\n\ 2573 the leftmost non-overlapping occurrences of pattern with the\n\ 2574 replacement repl."); 2575 2576 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); 2577 2578 static PyMethodDef pattern_methods[] = { 2579 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS, 2580 pattern_match_doc}, 2581 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS, 2582 pattern_search_doc}, 2583 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS, 2584 pattern_sub_doc}, 2585 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS, 2586 pattern_subn_doc}, 2587 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS, 2588 pattern_split_doc}, 2589 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS, 2590 pattern_findall_doc}, 2591 #if PY_VERSION_HEX >= 0x02020000 2592 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS, 2593 pattern_finditer_doc}, 2594 #endif 2595 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS}, 2596 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS}, 2597 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O}, 2598 {NULL, NULL} 2599 }; 2600 2601 #define PAT_OFF(x) offsetof(PatternObject, x) 2602 static PyMemberDef pattern_members[] = { 2603 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY}, 2604 {"flags", T_INT, PAT_OFF(flags), READONLY}, 2605 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY}, 2606 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY}, 2607 {NULL} /* Sentinel */ 2608 }; 2609 2610 statichere PyTypeObject Pattern_Type = { 2611 PyObject_HEAD_INIT(NULL) 2612 0, "_" SRE_MODULE ".SRE_Pattern", 2613 sizeof(PatternObject), sizeof(SRE_CODE), 2614 (destructor)pattern_dealloc, /*tp_dealloc*/ 2615 0, /* tp_print */ 2616 0, /* tp_getattrn */ 2617 0, /* tp_setattr */ 2618 0, /* tp_compare */ 2619 0, /* tp_repr */ 2620 0, /* tp_as_number */ 2621 0, /* tp_as_sequence */ 2622 0, /* tp_as_mapping */ 2623 0, /* tp_hash */ 2624 0, /* tp_call */ 2625 0, /* tp_str */ 2626 0, /* tp_getattro */ 2627 0, /* tp_setattro */ 2628 0, /* tp_as_buffer */ 2629 Py_TPFLAGS_DEFAULT, /* tp_flags */ 2630 pattern_doc, /* tp_doc */ 2631 0, /* tp_traverse */ 2632 0, /* tp_clear */ 2633 0, /* tp_richcompare */ 2634 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */ 2635 0, /* tp_iter */ 2636 0, /* tp_iternext */ 2637 pattern_methods, /* tp_methods */ 2638 pattern_members, /* tp_members */ 2639 }; 2640 2641 static int _validate(PatternObject *self); /* Forward */ 2642 2643 static PyObject * 2644 _compile(PyObject* self_, PyObject* args) 2645 { 2646 /* "compile" pattern descriptor to pattern object */ 2647 2648 PatternObject* self; 2649 Py_ssize_t i, n; 2650 2651 PyObject* pattern; 2652 int flags = 0; 2653 PyObject* code; 2654 Py_ssize_t groups = 0; 2655 PyObject* groupindex = NULL; 2656 PyObject* indexgroup = NULL; 2657 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags, 2658 &PyList_Type, &code, &groups, 2659 &groupindex, &indexgroup)) 2660 return NULL; 2661 2662 n = PyList_GET_SIZE(code); 2663 /* coverity[ampersand_in_size] */ 2664 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n); 2665 if (!self) 2666 return NULL; 2667 self->weakreflist = NULL; 2668 self->pattern = NULL; 2669 self->groupindex = NULL; 2670 self->indexgroup = NULL; 2671 2672 self->codesize = n; 2673 2674 for (i = 0; i < n; i++) { 2675 PyObject *o = PyList_GET_ITEM(code, i); 2676 unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o) 2677 : PyLong_AsUnsignedLong(o); 2678 self->code[i] = (SRE_CODE) value; 2679 if ((unsigned long) self->code[i] != value) { 2680 PyErr_SetString(PyExc_OverflowError, 2681 "regular expression code size limit exceeded"); 2682 break; 2683 } 2684 } 2685 2686 if (PyErr_Occurred()) { 2687 Py_DECREF(self); 2688 return NULL; 2689 } 2690 2691 Py_INCREF(pattern); 2692 self->pattern = pattern; 2693 2694 self->flags = flags; 2695 2696 self->groups = groups; 2697 2698 Py_XINCREF(groupindex); 2699 self->groupindex = groupindex; 2700 2701 Py_XINCREF(indexgroup); 2702 self->indexgroup = indexgroup; 2703 2704 self->weakreflist = NULL; 2705 2706 if (!_validate(self)) { 2707 Py_DECREF(self); 2708 return NULL; 2709 } 2710 2711 return (PyObject*) self; 2712 } 2713 2714 /* -------------------------------------------------------------------- */ 2715 /* Code validation */ 2716 2717 /* To learn more about this code, have a look at the _compile() function in 2718 Lib/sre_compile.py. The validation functions below checks the code array 2719 for conformance with the code patterns generated there. 2720 2721 The nice thing about the generated code is that it is position-independent: 2722 all jumps are relative jumps forward. Also, jumps don't cross each other: 2723 the target of a later jump is always earlier than the target of an earlier 2724 jump. IOW, this is okay: 2725 2726 J---------J-------T--------T 2727 \ \_____/ / 2728 \______________________/ 2729 2730 but this is not: 2731 2732 J---------J-------T--------T 2733 \_________\_____/ / 2734 \____________/ 2735 2736 It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4 2737 bytes wide (the latter if Python is compiled for "wide" unicode support). 2738 */ 2739 2740 /* Defining this one enables tracing of the validator */ 2741 #undef VVERBOSE 2742 2743 /* Trace macro for the validator */ 2744 #if defined(VVERBOSE) 2745 #define VTRACE(v) printf v 2746 #else 2747 #define VTRACE(v) do {} while(0) /* do nothing */ 2748 #endif 2749 2750 /* Report failure */ 2751 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0) 2752 2753 /* Extract opcode, argument, or skip count from code array */ 2754 #define GET_OP \ 2755 do { \ 2756 VTRACE(("%p: ", code)); \ 2757 if (code >= end) FAIL; \ 2758 op = *code++; \ 2759 VTRACE(("%lu (op)\n", (unsigned long)op)); \ 2760 } while (0) 2761 #define GET_ARG \ 2762 do { \ 2763 VTRACE(("%p= ", code)); \ 2764 if (code >= end) FAIL; \ 2765 arg = *code++; \ 2766 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \ 2767 } while (0) 2768 #define GET_SKIP_ADJ(adj) \ 2769 do { \ 2770 VTRACE(("%p= ", code)); \ 2771 if (code >= end) FAIL; \ 2772 skip = *code; \ 2773 VTRACE(("%lu (skip to %p)\n", \ 2774 (unsigned long)skip, code+skip)); \ 2775 if (code+skip-adj < code || code+skip-adj > end)\ 2776 FAIL; \ 2777 code++; \ 2778 } while (0) 2779 #define GET_SKIP GET_SKIP_ADJ(0) 2780 2781 static int 2782 _validate_charset(SRE_CODE *code, SRE_CODE *end) 2783 { 2784 /* Some variables are manipulated by the macros above */ 2785 SRE_CODE op; 2786 SRE_CODE arg; 2787 SRE_CODE offset; 2788 int i; 2789 2790 while (code < end) { 2791 GET_OP; 2792 switch (op) { 2793 2794 case SRE_OP_NEGATE: 2795 break; 2796 2797 case SRE_OP_LITERAL: 2798 GET_ARG; 2799 break; 2800 2801 case SRE_OP_RANGE: 2802 GET_ARG; 2803 GET_ARG; 2804 break; 2805 2806 case SRE_OP_CHARSET: 2807 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */ 2808 if (code+offset < code || code+offset > end) 2809 FAIL; 2810 code += offset; 2811 break; 2812 2813 case SRE_OP_BIGCHARSET: 2814 GET_ARG; /* Number of blocks */ 2815 offset = 256/sizeof(SRE_CODE); /* 256-byte table */ 2816 if (code+offset < code || code+offset > end) 2817 FAIL; 2818 /* Make sure that each byte points to a valid block */ 2819 for (i = 0; i < 256; i++) { 2820 if (((unsigned char *)code)[i] >= arg) 2821 FAIL; 2822 } 2823 code += offset; 2824 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */ 2825 if (code+offset < code || code+offset > end) 2826 FAIL; 2827 code += offset; 2828 break; 2829 2830 case SRE_OP_CATEGORY: 2831 GET_ARG; 2832 switch (arg) { 2833 case SRE_CATEGORY_DIGIT: 2834 case SRE_CATEGORY_NOT_DIGIT: 2835 case SRE_CATEGORY_SPACE: 2836 case SRE_CATEGORY_NOT_SPACE: 2837 case SRE_CATEGORY_WORD: 2838 case SRE_CATEGORY_NOT_WORD: 2839 case SRE_CATEGORY_LINEBREAK: 2840 case SRE_CATEGORY_NOT_LINEBREAK: 2841 case SRE_CATEGORY_LOC_WORD: 2842 case SRE_CATEGORY_LOC_NOT_WORD: 2843 case SRE_CATEGORY_UNI_DIGIT: 2844 case SRE_CATEGORY_UNI_NOT_DIGIT: 2845 case SRE_CATEGORY_UNI_SPACE: 2846 case SRE_CATEGORY_UNI_NOT_SPACE: 2847 case SRE_CATEGORY_UNI_WORD: 2848 case SRE_CATEGORY_UNI_NOT_WORD: 2849 case SRE_CATEGORY_UNI_LINEBREAK: 2850 case SRE_CATEGORY_UNI_NOT_LINEBREAK: 2851 break; 2852 default: 2853 FAIL; 2854 } 2855 break; 2856 2857 default: 2858 FAIL; 2859 2860 } 2861 } 2862 2863 return 1; 2864 } 2865 2866 static int 2867 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 2868 { 2869 /* Some variables are manipulated by the macros above */ 2870 SRE_CODE op; 2871 SRE_CODE arg; 2872 SRE_CODE skip; 2873 2874 VTRACE(("code=%p, end=%p\n", code, end)); 2875 2876 if (code > end) 2877 FAIL; 2878 2879 while (code < end) { 2880 GET_OP; 2881 switch (op) { 2882 2883 case SRE_OP_MARK: 2884 /* We don't check whether marks are properly nested; the 2885 sre_match() code is robust even if they don't, and the worst 2886 you can get is nonsensical match results. */ 2887 GET_ARG; 2888 if (arg > 2*groups+1) { 2889 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); 2890 FAIL; 2891 } 2892 break; 2893 2894 case SRE_OP_LITERAL: 2895 case SRE_OP_NOT_LITERAL: 2896 case SRE_OP_LITERAL_IGNORE: 2897 case SRE_OP_NOT_LITERAL_IGNORE: 2898 GET_ARG; 2899 /* The arg is just a character, nothing to check */ 2900 break; 2901 2902 case SRE_OP_SUCCESS: 2903 case SRE_OP_FAILURE: 2904 /* Nothing to check; these normally end the matching process */ 2905 break; 2906 2907 case SRE_OP_AT: 2908 GET_ARG; 2909 switch (arg) { 2910 case SRE_AT_BEGINNING: 2911 case SRE_AT_BEGINNING_STRING: 2912 case SRE_AT_BEGINNING_LINE: 2913 case SRE_AT_END: 2914 case SRE_AT_END_LINE: 2915 case SRE_AT_END_STRING: 2916 case SRE_AT_BOUNDARY: 2917 case SRE_AT_NON_BOUNDARY: 2918 case SRE_AT_LOC_BOUNDARY: 2919 case SRE_AT_LOC_NON_BOUNDARY: 2920 case SRE_AT_UNI_BOUNDARY: 2921 case SRE_AT_UNI_NON_BOUNDARY: 2922 break; 2923 default: 2924 FAIL; 2925 } 2926 break; 2927 2928 case SRE_OP_ANY: 2929 case SRE_OP_ANY_ALL: 2930 /* These have no operands */ 2931 break; 2932 2933 case SRE_OP_IN: 2934 case SRE_OP_IN_IGNORE: 2935 GET_SKIP; 2936 /* Stop 1 before the end; we check the FAILURE below */ 2937 if (!_validate_charset(code, code+skip-2)) 2938 FAIL; 2939 if (code[skip-2] != SRE_OP_FAILURE) 2940 FAIL; 2941 code += skip-1; 2942 break; 2943 2944 case SRE_OP_INFO: 2945 { 2946 /* A minimal info field is 2947 <INFO> <1=skip> <2=flags> <3=min> <4=max>; 2948 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, 2949 more follows. */ 2950 SRE_CODE flags, i; 2951 SRE_CODE *newcode; 2952 GET_SKIP; 2953 newcode = code+skip-1; 2954 GET_ARG; flags = arg; 2955 GET_ARG; /* min */ 2956 GET_ARG; /* max */ 2957 /* Check that only valid flags are present */ 2958 if ((flags & ~(SRE_INFO_PREFIX | 2959 SRE_INFO_LITERAL | 2960 SRE_INFO_CHARSET)) != 0) 2961 FAIL; 2962 /* PREFIX and CHARSET are mutually exclusive */ 2963 if ((flags & SRE_INFO_PREFIX) && 2964 (flags & SRE_INFO_CHARSET)) 2965 FAIL; 2966 /* LITERAL implies PREFIX */ 2967 if ((flags & SRE_INFO_LITERAL) && 2968 !(flags & SRE_INFO_PREFIX)) 2969 FAIL; 2970 /* Validate the prefix */ 2971 if (flags & SRE_INFO_PREFIX) { 2972 SRE_CODE prefix_len; 2973 GET_ARG; prefix_len = arg; 2974 GET_ARG; /* prefix skip */ 2975 /* Here comes the prefix string */ 2976 if (code+prefix_len < code || code+prefix_len > newcode) 2977 FAIL; 2978 code += prefix_len; 2979 /* And here comes the overlap table */ 2980 if (code+prefix_len < code || code+prefix_len > newcode) 2981 FAIL; 2982 /* Each overlap value should be < prefix_len */ 2983 for (i = 0; i < prefix_len; i++) { 2984 if (code[i] >= prefix_len) 2985 FAIL; 2986 } 2987 code += prefix_len; 2988 } 2989 /* Validate the charset */ 2990 if (flags & SRE_INFO_CHARSET) { 2991 if (!_validate_charset(code, newcode-1)) 2992 FAIL; 2993 if (newcode[-1] != SRE_OP_FAILURE) 2994 FAIL; 2995 code = newcode; 2996 } 2997 else if (code != newcode) { 2998 VTRACE(("code=%p, newcode=%p\n", code, newcode)); 2999 FAIL; 3000 } 3001 } 3002 break; 3003 3004 case SRE_OP_BRANCH: 3005 { 3006 SRE_CODE *target = NULL; 3007 for (;;) { 3008 GET_SKIP; 3009 if (skip == 0) 3010 break; 3011 /* Stop 2 before the end; we check the JUMP below */ 3012 if (!_validate_inner(code, code+skip-3, groups)) 3013 FAIL; 3014 code += skip-3; 3015 /* Check that it ends with a JUMP, and that each JUMP 3016 has the same target */ 3017 GET_OP; 3018 if (op != SRE_OP_JUMP) 3019 FAIL; 3020 GET_SKIP; 3021 if (target == NULL) 3022 target = code+skip-1; 3023 else if (code+skip-1 != target) 3024 FAIL; 3025 } 3026 } 3027 break; 3028 3029 case SRE_OP_REPEAT_ONE: 3030 case SRE_OP_MIN_REPEAT_ONE: 3031 { 3032 SRE_CODE min, max; 3033 GET_SKIP; 3034 GET_ARG; min = arg; 3035 GET_ARG; max = arg; 3036 if (min > max) 3037 FAIL; 3038 #ifdef Py_UNICODE_WIDE 3039 if (max > 65535) 3040 FAIL; 3041 #endif 3042 if (!_validate_inner(code, code+skip-4, groups)) 3043 FAIL; 3044 code += skip-4; 3045 GET_OP; 3046 if (op != SRE_OP_SUCCESS) 3047 FAIL; 3048 } 3049 break; 3050 3051 case SRE_OP_REPEAT: 3052 { 3053 SRE_CODE min, max; 3054 GET_SKIP; 3055 GET_ARG; min = arg; 3056 GET_ARG; max = arg; 3057 if (min > max) 3058 FAIL; 3059 #ifdef Py_UNICODE_WIDE 3060 if (max > 65535) 3061 FAIL; 3062 #endif 3063 if (!_validate_inner(code, code+skip-3, groups)) 3064 FAIL; 3065 code += skip-3; 3066 GET_OP; 3067 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL) 3068 FAIL; 3069 } 3070 break; 3071 3072 case SRE_OP_GROUPREF: 3073 case SRE_OP_GROUPREF_IGNORE: 3074 GET_ARG; 3075 if (arg >= groups) 3076 FAIL; 3077 break; 3078 3079 case SRE_OP_GROUPREF_EXISTS: 3080 /* The regex syntax for this is: '(?(group)then|else)', where 3081 'group' is either an integer group number or a group name, 3082 'then' and 'else' are sub-regexes, and 'else' is optional. */ 3083 GET_ARG; 3084 if (arg >= groups) 3085 FAIL; 3086 GET_SKIP_ADJ(1); 3087 code--; /* The skip is relative to the first arg! */ 3088 /* There are two possibilities here: if there is both a 'then' 3089 part and an 'else' part, the generated code looks like: 3090 3091 GROUPREF_EXISTS 3092 <group> 3093 <skipyes> 3094 ...then part... 3095 JUMP 3096 <skipno> 3097 (<skipyes> jumps here) 3098 ...else part... 3099 (<skipno> jumps here) 3100 3101 If there is only a 'then' part, it looks like: 3102 3103 GROUPREF_EXISTS 3104 <group> 3105 <skip> 3106 ...then part... 3107 (<skip> jumps here) 3108 3109 There is no direct way to decide which it is, and we don't want 3110 to allow arbitrary jumps anywhere in the code; so we just look 3111 for a JUMP opcode preceding our skip target. 3112 */ 3113 if (skip >= 3 && code+skip-3 >= code && 3114 code[skip-3] == SRE_OP_JUMP) 3115 { 3116 VTRACE(("both then and else parts present\n")); 3117 if (!_validate_inner(code+1, code+skip-3, groups)) 3118 FAIL; 3119 code += skip-2; /* Position after JUMP, at <skipno> */ 3120 GET_SKIP; 3121 if (!_validate_inner(code, code+skip-1, groups)) 3122 FAIL; 3123 code += skip-1; 3124 } 3125 else { 3126 VTRACE(("only a then part present\n")); 3127 if (!_validate_inner(code+1, code+skip-1, groups)) 3128 FAIL; 3129 code += skip-1; 3130 } 3131 break; 3132 3133 case SRE_OP_ASSERT: 3134 case SRE_OP_ASSERT_NOT: 3135 GET_SKIP; 3136 GET_ARG; /* 0 for lookahead, width for lookbehind */ 3137 code--; /* Back up over arg to simplify math below */ 3138 if (arg & 0x80000000) 3139 FAIL; /* Width too large */ 3140 /* Stop 1 before the end; we check the SUCCESS below */ 3141 if (!_validate_inner(code+1, code+skip-2, groups)) 3142 FAIL; 3143 code += skip-2; 3144 GET_OP; 3145 if (op != SRE_OP_SUCCESS) 3146 FAIL; 3147 break; 3148 3149 default: 3150 FAIL; 3151 3152 } 3153 } 3154 3155 VTRACE(("okay\n")); 3156 return 1; 3157 } 3158 3159 static int 3160 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 3161 { 3162 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS) 3163 FAIL; 3164 if (groups == 0) /* fix for simplejson */ 3165 groups = 100; /* 100 groups should always be safe */ 3166 return _validate_inner(code, end-1, groups); 3167 } 3168 3169 static int 3170 _validate(PatternObject *self) 3171 { 3172 if (!_validate_outer(self->code, self->code+self->codesize, self->groups)) 3173 { 3174 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); 3175 return 0; 3176 } 3177 else 3178 VTRACE(("Success!\n")); 3179 return 1; 3180 } 3181 3182 /* -------------------------------------------------------------------- */ 3183 /* match methods */ 3184 3185 static void 3186 match_dealloc(MatchObject* self) 3187 { 3188 Py_XDECREF(self->regs); 3189 Py_XDECREF(self->string); 3190 Py_DECREF(self->pattern); 3191 PyObject_DEL(self); 3192 } 3193 3194 static PyObject* 3195 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) 3196 { 3197 if (index < 0 || index >= self->groups) { 3198 /* raise IndexError if we were given a bad group number */ 3199 PyErr_SetString( 3200 PyExc_IndexError, 3201 "no such group" 3202 ); 3203 return NULL; 3204 } 3205 3206 index *= 2; 3207 3208 if (self->string == Py_None || self->mark[index] < 0) { 3209 /* return default value if the string or group is undefined */ 3210 Py_INCREF(def); 3211 return def; 3212 } 3213 3214 return PySequence_GetSlice( 3215 self->string, self->mark[index], self->mark[index+1] 3216 ); 3217 } 3218 3219 static Py_ssize_t 3220 match_getindex(MatchObject* self, PyObject* index) 3221 { 3222 Py_ssize_t i; 3223 3224 if (PyInt_Check(index)) 3225 return PyInt_AsSsize_t(index); 3226 3227 i = -1; 3228 3229 if (self->pattern->groupindex) { 3230 index = PyObject_GetItem(self->pattern->groupindex, index); 3231 if (index) { 3232 if (PyInt_Check(index) || PyLong_Check(index)) 3233 i = PyInt_AsSsize_t(index); 3234 Py_DECREF(index); 3235 } else 3236 PyErr_Clear(); 3237 } 3238 3239 return i; 3240 } 3241 3242 static PyObject* 3243 match_getslice(MatchObject* self, PyObject* index, PyObject* def) 3244 { 3245 return match_getslice_by_index(self, match_getindex(self, index), def); 3246 } 3247 3248 static PyObject* 3249 match_expand(MatchObject* self, PyObject* ptemplate) 3250 { 3251 /* delegate to Python code */ 3252 return call( 3253 SRE_PY_MODULE, "_expand", 3254 PyTuple_Pack(3, self->pattern, self, ptemplate) 3255 ); 3256 } 3257 3258 static PyObject* 3259 match_group(MatchObject* self, PyObject* args) 3260 { 3261 PyObject* result; 3262 Py_ssize_t i, size; 3263 3264 size = PyTuple_GET_SIZE(args); 3265 3266 switch (size) { 3267 case 0: 3268 result = match_getslice(self, Py_False, Py_None); 3269 break; 3270 case 1: 3271 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); 3272 break; 3273 default: 3274 /* fetch multiple items */ 3275 result = PyTuple_New(size); 3276 if (!result) 3277 return NULL; 3278 for (i = 0; i < size; i++) { 3279 PyObject* item = match_getslice( 3280 self, PyTuple_GET_ITEM(args, i), Py_None 3281 ); 3282 if (!item) { 3283 Py_DECREF(result); 3284 return NULL; 3285 } 3286 PyTuple_SET_ITEM(result, i, item); 3287 } 3288 break; 3289 } 3290 return result; 3291 } 3292 3293 static PyObject* 3294 match_groups(MatchObject* self, PyObject* args, PyObject* kw) 3295 { 3296 PyObject* result; 3297 Py_ssize_t index; 3298 3299 PyObject* def = Py_None; 3300 static char* kwlist[] = { "default", NULL }; 3301 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def)) 3302 return NULL; 3303 3304 result = PyTuple_New(self->groups-1); 3305 if (!result) 3306 return NULL; 3307 3308 for (index = 1; index < self->groups; index++) { 3309 PyObject* item; 3310 item = match_getslice_by_index(self, index, def); 3311 if (!item) { 3312 Py_DECREF(result); 3313 return NULL; 3314 } 3315 PyTuple_SET_ITEM(result, index-1, item); 3316 } 3317 3318 return result; 3319 } 3320 3321 static PyObject* 3322 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw) 3323 { 3324 PyObject* result; 3325 PyObject* keys; 3326 Py_ssize_t index; 3327 3328 PyObject* def = Py_None; 3329 static char* kwlist[] = { "default", NULL }; 3330 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def)) 3331 return NULL; 3332 3333 result = PyDict_New(); 3334 if (!result || !self->pattern->groupindex) 3335 return result; 3336 3337 keys = PyMapping_Keys(self->pattern->groupindex); 3338 if (!keys) 3339 goto failed; 3340 3341 for (index = 0; index < PyList_GET_SIZE(keys); index++) { 3342 int status; 3343 PyObject* key; 3344 PyObject* value; 3345 key = PyList_GET_ITEM(keys, index); 3346 if (!key) 3347 goto failed; 3348 value = match_getslice(self, key, def); 3349 if (!value) { 3350 Py_DECREF(key); 3351 goto failed; 3352 } 3353 status = PyDict_SetItem(result, key, value); 3354 Py_DECREF(value); 3355 if (status < 0) 3356 goto failed; 3357 } 3358 3359 Py_DECREF(keys); 3360 3361 return result; 3362 3363 failed: 3364 Py_XDECREF(keys); 3365 Py_DECREF(result); 3366 return NULL; 3367 } 3368 3369 static PyObject* 3370 match_start(MatchObject* self, PyObject* args) 3371 { 3372 Py_ssize_t index; 3373 3374 PyObject* index_ = Py_False; /* zero */ 3375 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_)) 3376 return NULL; 3377 3378 index = match_getindex(self, index_); 3379 3380 if (index < 0 || index >= self->groups) { 3381 PyErr_SetString( 3382 PyExc_IndexError, 3383 "no such group" 3384 ); 3385 return NULL; 3386 } 3387 3388 /* mark is -1 if group is undefined */ 3389 return Py_BuildValue("i", self->mark[index*2]); 3390 } 3391 3392 static PyObject* 3393 match_end(MatchObject* self, PyObject* args) 3394 { 3395 Py_ssize_t index; 3396 3397 PyObject* index_ = Py_False; /* zero */ 3398 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_)) 3399 return NULL; 3400 3401 index = match_getindex(self, index_); 3402 3403 if (index < 0 || index >= self->groups) { 3404 PyErr_SetString( 3405 PyExc_IndexError, 3406 "no such group" 3407 ); 3408 return NULL; 3409 } 3410 3411 /* mark is -1 if group is undefined */ 3412 return Py_BuildValue("i", self->mark[index*2+1]); 3413 } 3414 3415 LOCAL(PyObject*) 3416 _pair(Py_ssize_t i1, Py_ssize_t i2) 3417 { 3418 PyObject* pair; 3419 PyObject* item; 3420 3421 pair = PyTuple_New(2); 3422 if (!pair) 3423 return NULL; 3424 3425 item = PyInt_FromSsize_t(i1); 3426 if (!item) 3427 goto error; 3428 PyTuple_SET_ITEM(pair, 0, item); 3429 3430 item = PyInt_FromSsize_t(i2); 3431 if (!item) 3432 goto error; 3433 PyTuple_SET_ITEM(pair, 1, item); 3434 3435 return pair; 3436 3437 error: 3438 Py_DECREF(pair); 3439 return NULL; 3440 } 3441 3442 static PyObject* 3443 match_span(MatchObject* self, PyObject* args) 3444 { 3445 Py_ssize_t index; 3446 3447 PyObject* index_ = Py_False; /* zero */ 3448 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_)) 3449 return NULL; 3450 3451 index = match_getindex(self, index_); 3452 3453 if (index < 0 || index >= self->groups) { 3454 PyErr_SetString( 3455 PyExc_IndexError, 3456 "no such group" 3457 ); 3458 return NULL; 3459 } 3460 3461 /* marks are -1 if group is undefined */ 3462 return _pair(self->mark[index*2], self->mark[index*2+1]); 3463 } 3464 3465 static PyObject* 3466 match_regs(MatchObject* self) 3467 { 3468 PyObject* regs; 3469 PyObject* item; 3470 Py_ssize_t index; 3471 3472 regs = PyTuple_New(self->groups); 3473 if (!regs) 3474 return NULL; 3475 3476 for (index = 0; index < self->groups; index++) { 3477 item = _pair(self->mark[index*2], self->mark[index*2+1]); 3478 if (!item) { 3479 Py_DECREF(regs); 3480 return NULL; 3481 } 3482 PyTuple_SET_ITEM(regs, index, item); 3483 } 3484 3485 Py_INCREF(regs); 3486 self->regs = regs; 3487 3488 return regs; 3489 } 3490 3491 static PyObject* 3492 match_copy(MatchObject* self, PyObject *unused) 3493 { 3494 #ifdef USE_BUILTIN_COPY 3495 MatchObject* copy; 3496 Py_ssize_t slots, offset; 3497 3498 slots = 2 * (self->pattern->groups+1); 3499 3500 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots); 3501 if (!copy) 3502 return NULL; 3503 3504 /* this value a constant, but any compiler should be able to 3505 figure that out all by itself */ 3506 offset = offsetof(MatchObject, string); 3507 3508 Py_XINCREF(self->pattern); 3509 Py_XINCREF(self->string); 3510 Py_XINCREF(self->regs); 3511 3512 memcpy((char*) copy + offset, (char*) self + offset, 3513 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset); 3514 3515 return (PyObject*) copy; 3516 #else 3517 PyErr_SetString(PyExc_TypeError, "cannot copy this match object"); 3518 return NULL; 3519 #endif 3520 } 3521 3522 static PyObject* 3523 match_deepcopy(MatchObject* self, PyObject* memo) 3524 { 3525 #ifdef USE_BUILTIN_COPY 3526 MatchObject* copy; 3527 3528 copy = (MatchObject*) match_copy(self); 3529 if (!copy) 3530 return NULL; 3531 3532 if (!deepcopy((PyObject**) &copy->pattern, memo) || 3533 !deepcopy(&copy->string, memo) || 3534 !deepcopy(&copy->regs, memo)) { 3535 Py_DECREF(copy); 3536 return NULL; 3537 } 3538 3539 #else 3540 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object"); 3541 return NULL; 3542 #endif 3543 } 3544 3545 static struct PyMethodDef match_methods[] = { 3546 {"group", (PyCFunction) match_group, METH_VARARGS}, 3547 {"start", (PyCFunction) match_start, METH_VARARGS}, 3548 {"end", (PyCFunction) match_end, METH_VARARGS}, 3549 {"span", (PyCFunction) match_span, METH_VARARGS}, 3550 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS}, 3551 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS}, 3552 {"expand", (PyCFunction) match_expand, METH_O}, 3553 {"__copy__", (PyCFunction) match_copy, METH_NOARGS}, 3554 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O}, 3555 {NULL, NULL} 3556 }; 3557 3558 static PyObject * 3559 match_lastindex_get(MatchObject *self) 3560 { 3561 if (self->lastindex >= 0) 3562 return Py_BuildValue("i", self->lastindex); 3563 Py_INCREF(Py_None); 3564 return Py_None; 3565 } 3566 3567 static PyObject * 3568 match_lastgroup_get(MatchObject *self) 3569 { 3570 if (self->pattern->indexgroup && self->lastindex >= 0) { 3571 PyObject* result = PySequence_GetItem( 3572 self->pattern->indexgroup, self->lastindex 3573 ); 3574 if (result) 3575 return result; 3576 PyErr_Clear(); 3577 } 3578 Py_INCREF(Py_None); 3579 return Py_None; 3580 } 3581 3582 static PyObject * 3583 match_regs_get(MatchObject *self) 3584 { 3585 if (self->regs) { 3586 Py_INCREF(self->regs); 3587 return self->regs; 3588 } else 3589 return match_regs(self); 3590 } 3591 3592 static PyGetSetDef match_getset[] = { 3593 {"lastindex", (getter)match_lastindex_get, (setter)NULL}, 3594 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL}, 3595 {"regs", (getter)match_regs_get, (setter)NULL}, 3596 {NULL} 3597 }; 3598 3599 #define MATCH_OFF(x) offsetof(MatchObject, x) 3600 static PyMemberDef match_members[] = { 3601 {"string", T_OBJECT, MATCH_OFF(string), READONLY}, 3602 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY}, 3603 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY}, 3604 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY}, 3605 {NULL} 3606 }; 3607 3608 3609 /* FIXME: implement setattr("string", None) as a special case (to 3610 detach the associated string, if any */ 3611 3612 static PyTypeObject Match_Type = { 3613 PyVarObject_HEAD_INIT(NULL, 0) 3614 "_" SRE_MODULE ".SRE_Match", 3615 sizeof(MatchObject), sizeof(Py_ssize_t), 3616 (destructor)match_dealloc, /* tp_dealloc */ 3617 0, /* tp_print */ 3618 0, /* tp_getattr */ 3619 0, /* tp_setattr */ 3620 0, /* tp_compare */ 3621 0, /* tp_repr */ 3622 0, /* tp_as_number */ 3623 0, /* tp_as_sequence */ 3624 0, /* tp_as_mapping */ 3625 0, /* tp_hash */ 3626 0, /* tp_call */ 3627 0, /* tp_str */ 3628 0, /* tp_getattro */ 3629 0, /* tp_setattro */ 3630 0, /* tp_as_buffer */ 3631 Py_TPFLAGS_DEFAULT, 3632 0, /* tp_doc */ 3633 0, /* tp_traverse */ 3634 0, /* tp_clear */ 3635 0, /* tp_richcompare */ 3636 0, /* tp_weaklistoffset */ 3637 0, /* tp_iter */ 3638 0, /* tp_iternext */ 3639 match_methods, /* tp_methods */ 3640 match_members, /* tp_members */ 3641 match_getset, /* tp_getset */ 3642 }; 3643 3644 static PyObject* 3645 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status) 3646 { 3647 /* create match object (from state object) */ 3648 3649 MatchObject* match; 3650 Py_ssize_t i, j; 3651 char* base; 3652 int n; 3653 3654 if (status > 0) { 3655 3656 /* create match object (with room for extra group marks) */ 3657 /* coverity[ampersand_in_size] */ 3658 match = PyObject_NEW_VAR(MatchObject, &Match_Type, 3659 2*(pattern->groups+1)); 3660 if (!match) 3661 return NULL; 3662 3663 Py_INCREF(pattern); 3664 match->pattern = pattern; 3665 3666 Py_INCREF(state->string); 3667 match->string = state->string; 3668 3669 match->regs = NULL; 3670 match->groups = pattern->groups+1; 3671 3672 /* fill in group slices */ 3673 3674 base = (char*) state->beginning; 3675 n = state->charsize; 3676 3677 match->mark[0] = ((char*) state->start - base) / n; 3678 match->mark[1] = ((char*) state->ptr - base) / n; 3679 3680 for (i = j = 0; i < pattern->groups; i++, j+=2) 3681 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { 3682 match->mark[j+2] = ((char*) state->mark[j] - base) / n; 3683 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; 3684 } else 3685 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ 3686 3687 match->pos = state->pos; 3688 match->endpos = state->endpos; 3689 3690 match->lastindex = state->lastindex; 3691 3692 return (PyObject*) match; 3693 3694 } else if (status == 0) { 3695 3696 /* no match */ 3697 Py_INCREF(Py_None); 3698 return Py_None; 3699 3700 } 3701 3702 /* internal error */ 3703 pattern_error(status); 3704 return NULL; 3705 } 3706 3707 3708 /* -------------------------------------------------------------------- */ 3709 /* scanner methods (experimental) */ 3710 3711 static void 3712 scanner_dealloc(ScannerObject* self) 3713 { 3714 state_fini(&self->state); 3715 Py_XDECREF(self->pattern); 3716 PyObject_DEL(self); 3717 } 3718 3719 static PyObject* 3720 scanner_match(ScannerObject* self, PyObject *unused) 3721 { 3722 SRE_STATE* state = &self->state; 3723 PyObject* match; 3724 int status; 3725 3726 state_reset(state); 3727 3728 state->ptr = state->start; 3729 3730 if (state->charsize == 1) { 3731 status = sre_match(state, PatternObject_GetCode(self->pattern)); 3732 } else { 3733 #if defined(HAVE_UNICODE) 3734 status = sre_umatch(state, PatternObject_GetCode(self->pattern)); 3735 #endif 3736 } 3737 if (PyErr_Occurred()) 3738 return NULL; 3739 3740 match = pattern_new_match((PatternObject*) self->pattern, 3741 state, status); 3742 3743 if (status == 0 || state->ptr == state->start) 3744 state->start = (void*) ((char*) state->ptr + state->charsize); 3745 else 3746 state->start = state->ptr; 3747 3748 return match; 3749 } 3750 3751 3752 static PyObject* 3753 scanner_search(ScannerObject* self, PyObject *unused) 3754 { 3755 SRE_STATE* state = &self->state; 3756 PyObject* match; 3757 int status; 3758 3759 state_reset(state); 3760 3761 state->ptr = state->start; 3762 3763 if (state->charsize == 1) { 3764 status = sre_search(state, PatternObject_GetCode(self->pattern)); 3765 } else { 3766 #if defined(HAVE_UNICODE) 3767 status = sre_usearch(state, PatternObject_GetCode(self->pattern)); 3768 #endif 3769 } 3770 if (PyErr_Occurred()) 3771 return NULL; 3772 3773 match = pattern_new_match((PatternObject*) self->pattern, 3774 state, status); 3775 3776 if (status == 0 || state->ptr == state->start) 3777 state->start = (void*) ((char*) state->ptr + state->charsize); 3778 else 3779 state->start = state->ptr; 3780 3781 return match; 3782 } 3783 3784 static PyMethodDef scanner_methods[] = { 3785 {"match", (PyCFunction) scanner_match, METH_NOARGS}, 3786 {"search", (PyCFunction) scanner_search, METH_NOARGS}, 3787 {NULL, NULL} 3788 }; 3789 3790 #define SCAN_OFF(x) offsetof(ScannerObject, x) 3791 static PyMemberDef scanner_members[] = { 3792 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY}, 3793 {NULL} /* Sentinel */ 3794 }; 3795 3796 statichere PyTypeObject Scanner_Type = { 3797 PyObject_HEAD_INIT(NULL) 3798 0, "_" SRE_MODULE ".SRE_Scanner", 3799 sizeof(ScannerObject), 0, 3800 (destructor)scanner_dealloc, /*tp_dealloc*/ 3801 0, /* tp_print */ 3802 0, /* tp_getattr */ 3803 0, /* tp_setattr */ 3804 0, /* tp_reserved */ 3805 0, /* tp_repr */ 3806 0, /* tp_as_number */ 3807 0, /* tp_as_sequence */ 3808 0, /* tp_as_mapping */ 3809 0, /* tp_hash */ 3810 0, /* tp_call */ 3811 0, /* tp_str */ 3812 0, /* tp_getattro */ 3813 0, /* tp_setattro */ 3814 0, /* tp_as_buffer */ 3815 Py_TPFLAGS_DEFAULT, /* tp_flags */ 3816 0, /* tp_doc */ 3817 0, /* tp_traverse */ 3818 0, /* tp_clear */ 3819 0, /* tp_richcompare */ 3820 0, /* tp_weaklistoffset */ 3821 0, /* tp_iter */ 3822 0, /* tp_iternext */ 3823 scanner_methods, /* tp_methods */ 3824 scanner_members, /* tp_members */ 3825 0, /* tp_getset */ 3826 }; 3827 3828 static PyObject* 3829 pattern_scanner(PatternObject* pattern, PyObject* args) 3830 { 3831 /* create search state object */ 3832 3833 ScannerObject* self; 3834 3835 PyObject* string; 3836 Py_ssize_t start = 0; 3837 Py_ssize_t end = PY_SSIZE_T_MAX; 3838 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end)) 3839 return NULL; 3840 3841 /* create scanner object */ 3842 self = PyObject_NEW(ScannerObject, &Scanner_Type); 3843 if (!self) 3844 return NULL; 3845 self->pattern = NULL; 3846 3847 string = state_init(&self->state, pattern, string, start, end); 3848 if (!string) { 3849 Py_DECREF(self); 3850 return NULL; 3851 } 3852 3853 Py_INCREF(pattern); 3854 self->pattern = (PyObject*) pattern; 3855 3856 return (PyObject*) self; 3857 } 3858 3859 static PyMethodDef _functions[] = { 3860 {"compile", _compile, METH_VARARGS}, 3861 {"getcodesize", sre_codesize, METH_NOARGS}, 3862 {"getlower", sre_getlower, METH_VARARGS}, 3863 {NULL, NULL} 3864 }; 3865 3866 #if PY_VERSION_HEX < 0x02030000 3867 DL_EXPORT(void) init_sre(void) 3868 #else 3869 PyMODINIT_FUNC init_sre(void) 3870 #endif 3871 { 3872 PyObject* m; 3873 PyObject* d; 3874 PyObject* x; 3875 3876 /* Patch object types */ 3877 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) || 3878 PyType_Ready(&Scanner_Type)) 3879 return; 3880 3881 m = Py_InitModule("_" SRE_MODULE, _functions); 3882 if (m == NULL) 3883 return; 3884 d = PyModule_GetDict(m); 3885 3886 x = PyInt_FromLong(SRE_MAGIC); 3887 if (x) { 3888 PyDict_SetItemString(d, "MAGIC", x); 3889 Py_DECREF(x); 3890 } 3891 3892 x = PyInt_FromLong(sizeof(SRE_CODE)); 3893 if (x) { 3894 PyDict_SetItemString(d, "CODESIZE", x); 3895 Py_DECREF(x); 3896 } 3897 3898 x = PyString_FromString(copyright); 3899 if (x) { 3900 PyDict_SetItemString(d, "copyright", x); 3901 Py_DECREF(x); 3902 } 3903 } 3904 3905 #endif /* !defined(SRE_RECURSIVE) */ 3906 3907 /* vim:ts=4:sw=4:et 3908 */