Python-2.7.3/Modules/cjkcodecs/_codecs_iso2022.c

Location Tool Test ID Function Issue
/builddir/build/BUILD/Python-2.7.3/Modules/cjkcodecs/_codecs_iso2022.c:126:39 clang-analyzer Value stored to 'desig' during its initialization is never read
/builddir/build/BUILD/Python-2.7.3/Modules/cjkcodecs/_codecs_iso2022.c:126:39 clang-analyzer Value stored to 'desig' during its initialization is never read
   1 /*
   2  * _codecs_iso2022.c: Codecs collection for ISO-2022 encodings.
   3  *
   4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
   5  */
   6 
   7 #define USING_IMPORTED_MAPS
   8 #define USING_BINARY_PAIR_SEARCH
   9 #define EXTERN_JISX0213_PAIR
  10 #define EMULATE_JISX0213_2000_ENCODE_INVALID MAP_UNMAPPABLE
  11 #define EMULATE_JISX0213_2000_DECODE_INVALID MAP_UNMAPPABLE
  12 
  13 #include "cjkcodecs.h"
  14 #include "alg_jisx0201.h"
  15 #include "emu_jisx0213_2000.h"
  16 #include "mappings_jisx0213_pair.h"
  17 
  18 /* STATE
  19 
  20    state->c[0-3]
  21 
  22     00000000
  23     ||^^^^^|
  24     |+-----+----  G0-3 Character Set
  25     +-----------  Is G0-3 double byte?
  26 
  27    state->c[4]
  28 
  29     00000000
  30           ||
  31           |+----  Locked-Shift?
  32           +-----  ESC Throughout
  33 */
  34 
  35 #define ESC                     0x1B
  36 #define SO                      0x0E
  37 #define SI                      0x0F
  38 #define LF                      0x0A
  39 
  40 #define MAX_ESCSEQLEN           16
  41 
  42 #define CHARSET_ISO8859_1       'A'
  43 #define CHARSET_ASCII           'B'
  44 #define CHARSET_ISO8859_7       'F'
  45 #define CHARSET_JISX0201_K      'I'
  46 #define CHARSET_JISX0201_R      'J'
  47 
  48 #define CHARSET_GB2312          ('A'|CHARSET_DBCS)
  49 #define CHARSET_JISX0208        ('B'|CHARSET_DBCS)
  50 #define CHARSET_KSX1001         ('C'|CHARSET_DBCS)
  51 #define CHARSET_JISX0212        ('D'|CHARSET_DBCS)
  52 #define CHARSET_GB2312_8565     ('E'|CHARSET_DBCS)
  53 #define CHARSET_CNS11643_1      ('G'|CHARSET_DBCS)
  54 #define CHARSET_CNS11643_2      ('H'|CHARSET_DBCS)
  55 #define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)
  56 #define CHARSET_JISX0213_2      ('P'|CHARSET_DBCS)
  57 #define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)
  58 #define CHARSET_JISX0208_O      ('@'|CHARSET_DBCS)
  59 
  60 #define CHARSET_DBCS            0x80
  61 #define ESCMARK(mark)           ((mark) & 0x7f)
  62 
  63 #define IS_ESCEND(c)    (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
  64 #define IS_ISO2022ESC(c2) \
  65         ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
  66          (c2) == '.' || (c2) == '&')
  67     /* this is not a complete list of ISO-2022 escape sequence headers.
  68      * but, it's enough to implement CJK instances of iso-2022. */
  69 
  70 #define MAP_UNMAPPABLE          0xFFFF
  71 #define MAP_MULTIPLE_AVAIL      0xFFFE /* for JIS X 0213 */
  72 
  73 #define F_SHIFTED               0x01
  74 #define F_ESCTHROUGHOUT         0x02
  75 
  76 #define STATE_SETG(dn, v)       ((state)->c[dn]) = (v);
  77 #define STATE_GETG(dn)          ((state)->c[dn])
  78 
  79 #define STATE_G0                STATE_GETG(0)
  80 #define STATE_G1                STATE_GETG(1)
  81 #define STATE_G2                STATE_GETG(2)
  82 #define STATE_G3                STATE_GETG(3)
  83 #define STATE_SETG0(v)          STATE_SETG(0, v)
  84 #define STATE_SETG1(v)          STATE_SETG(1, v)
  85 #define STATE_SETG2(v)          STATE_SETG(2, v)
  86 #define STATE_SETG3(v)          STATE_SETG(3, v)
  87 
  88 #define STATE_SETFLAG(f)        ((state)->c[4]) |= (f);
  89 #define STATE_GETFLAG(f)        ((state)->c[4] & (f))
  90 #define STATE_CLEARFLAG(f)      ((state)->c[4]) &= ~(f);
  91 #define STATE_CLEARFLAGS()      ((state)->c[4]) = 0;
  92 
  93 #define ISO2022_CONFIG          ((const struct iso2022_config *)config)
  94 #define CONFIG_ISSET(flag)      (ISO2022_CONFIG->flags & (flag))
  95 #define CONFIG_DESIGNATIONS     (ISO2022_CONFIG->designations)
  96 
  97 /* iso2022_config.flags */
  98 #define NO_SHIFT                0x01
  99 #define USE_G2                  0x02
 100 #define USE_JISX0208_EXT        0x04
 101 
 102 /*-*- internal data structures -*-*/
 103 
 104 typedef int (*iso2022_init_func)(void);
 105 typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
 106 typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);
 107 
 108 struct iso2022_designation {
 109     unsigned char mark;
 110     unsigned char plane;
 111     unsigned char width;
 112     iso2022_init_func initializer;
 113     iso2022_decode_func decoder;
 114     iso2022_encode_func encoder;
 115 };
 116 
 117 struct iso2022_config {
 118     int flags;
 119     const struct iso2022_designation *designations; /* non-ascii desigs */
 120 };
 121 
 122 /*-*- iso-2022 codec implementation -*-*/
 123 
 124 CODEC_INIT(iso2022)
 125 {
 126     const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;
Value stored to 'desig' during its initialization is never read
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

Value stored to 'desig' during its initialization is never read
(emitted by clang-analyzer)

TODO: a detailed trace is available in the data model (not yet rendered in this report)

127 for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++) 128 if (desig->initializer != NULL && desig->initializer() != 0) 129 return -1; 130 return 0; 131 } 132 133 ENCODER_INIT(iso2022) 134 { 135 STATE_CLEARFLAGS() 136 STATE_SETG0(CHARSET_ASCII) 137 STATE_SETG1(CHARSET_ASCII) 138 return 0; 139 } 140 141 ENCODER_RESET(iso2022) 142 { 143 if (STATE_GETFLAG(F_SHIFTED)) { 144 WRITE1(SI) 145 NEXT_OUT(1) 146 STATE_CLEARFLAG(F_SHIFTED) 147 } 148 if (STATE_G0 != CHARSET_ASCII) { 149 WRITE3(ESC, '(', 'B') 150 NEXT_OUT(3) 151 STATE_SETG0(CHARSET_ASCII) 152 } 153 return 0; 154 } 155 156 ENCODER(iso2022) 157 { 158 while (inleft > 0) { 159 const struct iso2022_designation *dsg; 160 DBCHAR encoded; 161 ucs4_t c = **inbuf; 162 Py_ssize_t insize; 163 164 if (c < 0x80) { 165 if (STATE_G0 != CHARSET_ASCII) { 166 WRITE3(ESC, '(', 'B') 167 STATE_SETG0(CHARSET_ASCII) 168 NEXT_OUT(3) 169 } 170 if (STATE_GETFLAG(F_SHIFTED)) { 171 WRITE1(SI) 172 STATE_CLEARFLAG(F_SHIFTED) 173 NEXT_OUT(1) 174 } 175 WRITE1((unsigned char)c) 176 NEXT(1, 1) 177 continue; 178 } 179 180 DECODE_SURROGATE(c) 181 insize = GET_INSIZE(c); 182 183 encoded = MAP_UNMAPPABLE; 184 for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { 185 Py_ssize_t length = 1; 186 encoded = dsg->encoder(&c, &length); 187 if (encoded == MAP_MULTIPLE_AVAIL) { 188 /* this implementation won't work for pair 189 * of non-bmp characters. */ 190 if (inleft < 2) { 191 if (!(flags & MBENC_FLUSH)) 192 return MBERR_TOOFEW; 193 length = -1; 194 } 195 else 196 length = 2; 197 #if Py_UNICODE_SIZE == 2 198 if (length == 2) { 199 ucs4_t u4in[2]; 200 u4in[0] = (ucs4_t)IN1; 201 u4in[1] = (ucs4_t)IN2; 202 encoded = dsg->encoder(u4in, &length); 203 } else 204 encoded = dsg->encoder(&c, &length); 205 #else 206 encoded = dsg->encoder(&c, &length); 207 #endif 208 if (encoded != MAP_UNMAPPABLE) { 209 insize = length; 210 break; 211 } 212 } 213 else if (encoded != MAP_UNMAPPABLE) 214 break; 215 } 216 217 if (!dsg->mark) 218 return 1; 219 assert(dsg->width == 1 || dsg->width == 2); 220 221 switch (dsg->plane) { 222 case 0: /* G0 */ 223 if (STATE_GETFLAG(F_SHIFTED)) { 224 WRITE1(SI) 225 STATE_CLEARFLAG(F_SHIFTED) 226 NEXT_OUT(1) 227 } 228 if (STATE_G0 != dsg->mark) { 229 if (dsg->width == 1) { 230 WRITE3(ESC, '(', ESCMARK(dsg->mark)) 231 STATE_SETG0(dsg->mark) 232 NEXT_OUT(3) 233 } 234 else if (dsg->mark == CHARSET_JISX0208) { 235 WRITE3(ESC, '$', ESCMARK(dsg->mark)) 236 STATE_SETG0(dsg->mark) 237 NEXT_OUT(3) 238 } 239 else { 240 WRITE4(ESC, '$', '(', 241 ESCMARK(dsg->mark)) 242 STATE_SETG0(dsg->mark) 243 NEXT_OUT(4) 244 } 245 } 246 break; 247 case 1: /* G1 */ 248 if (STATE_G1 != dsg->mark) { 249 if (dsg->width == 1) { 250 WRITE3(ESC, ')', ESCMARK(dsg->mark)) 251 STATE_SETG1(dsg->mark) 252 NEXT_OUT(3) 253 } 254 else { 255 WRITE4(ESC, '$', ')', 256 ESCMARK(dsg->mark)) 257 STATE_SETG1(dsg->mark) 258 NEXT_OUT(4) 259 } 260 } 261 if (!STATE_GETFLAG(F_SHIFTED)) { 262 WRITE1(SO) 263 STATE_SETFLAG(F_SHIFTED) 264 NEXT_OUT(1) 265 } 266 break; 267 default: /* G2 and G3 is not supported: no encoding in 268 * CJKCodecs are using them yet */ 269 return MBERR_INTERNAL; 270 } 271 272 if (dsg->width == 1) { 273 WRITE1((unsigned char)encoded) 274 NEXT_OUT(1) 275 } 276 else { 277 WRITE2(encoded >> 8, encoded & 0xff) 278 NEXT_OUT(2) 279 } 280 NEXT_IN(insize) 281 } 282 283 return 0; 284 } 285 286 DECODER_INIT(iso2022) 287 { 288 STATE_CLEARFLAGS() 289 STATE_SETG0(CHARSET_ASCII) 290 STATE_SETG1(CHARSET_ASCII) 291 STATE_SETG2(CHARSET_ASCII) 292 return 0; 293 } 294 295 DECODER_RESET(iso2022) 296 { 297 STATE_SETG0(CHARSET_ASCII) 298 STATE_CLEARFLAG(F_SHIFTED) 299 return 0; 300 } 301 302 static Py_ssize_t 303 iso2022processesc(const void *config, MultibyteCodec_State *state, 304 const unsigned char **inbuf, Py_ssize_t *inleft) 305 { 306 unsigned char charset, designation; 307 Py_ssize_t i, esclen; 308 309 for (i = 1;i < MAX_ESCSEQLEN;i++) { 310 if (i >= *inleft) 311 return MBERR_TOOFEW; 312 if (IS_ESCEND((*inbuf)[i])) { 313 esclen = i + 1; 314 break; 315 } 316 else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft && 317 (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') 318 i += 2; 319 } 320 321 if (i >= MAX_ESCSEQLEN) 322 return 1; /* unterminated escape sequence */ 323 324 switch (esclen) { 325 case 3: 326 if (IN2 == '$') { 327 charset = IN3 | CHARSET_DBCS; 328 designation = 0; 329 } 330 else { 331 charset = IN3; 332 if (IN2 == '(') designation = 0; 333 else if (IN2 == ')') designation = 1; 334 else if (CONFIG_ISSET(USE_G2) && IN2 == '.') 335 designation = 2; 336 else return 3; 337 } 338 break; 339 case 4: 340 if (IN2 != '$') 341 return 4; 342 343 charset = IN4 | CHARSET_DBCS; 344 if (IN3 == '(') designation = 0; 345 else if (IN3 == ')') designation = 1; 346 else return 4; 347 break; 348 case 6: /* designation with prefix */ 349 if (CONFIG_ISSET(USE_JISX0208_EXT) && 350 (*inbuf)[3] == ESC && (*inbuf)[4] == '$' && 351 (*inbuf)[5] == 'B') { 352 charset = 'B' | CHARSET_DBCS; 353 designation = 0; 354 } 355 else 356 return 6; 357 break; 358 default: 359 return esclen; 360 } 361 362 /* raise error when the charset is not designated for this encoding */ 363 if (charset != CHARSET_ASCII) { 364 const struct iso2022_designation *dsg; 365 366 for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) 367 if (dsg->mark == charset) 368 break; 369 if (!dsg->mark) 370 return esclen; 371 } 372 373 STATE_SETG(designation, charset) 374 *inleft -= esclen; 375 (*inbuf) += esclen; 376 return 0; 377 } 378 379 #define ISO8859_7_DECODE(c, assi) \ 380 if ((c) < 0xa0) (assi) = (c); \ 381 else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \ 382 (assi) = (c); \ 383 else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \ 384 (0xbffffd77L & (1L << ((c)-0xb4))))) \ 385 (assi) = 0x02d0 + (c); \ 386 else if ((c) == 0xa1) (assi) = 0x2018; \ 387 else if ((c) == 0xa2) (assi) = 0x2019; \ 388 else if ((c) == 0xaf) (assi) = 0x2015; 389 390 static Py_ssize_t 391 iso2022processg2(const void *config, MultibyteCodec_State *state, 392 const unsigned char **inbuf, Py_ssize_t *inleft, 393 Py_UNICODE **outbuf, Py_ssize_t *outleft) 394 { 395 /* not written to use encoder, decoder functions because only few 396 * encodings use G2 designations in CJKCodecs */ 397 if (STATE_G2 == CHARSET_ISO8859_1) { 398 if (IN3 < 0x80) 399 OUT1(IN3 + 0x80) 400 else 401 return 3; 402 } 403 else if (STATE_G2 == CHARSET_ISO8859_7) { 404 ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf) 405 else return 3; 406 } 407 else if (STATE_G2 == CHARSET_ASCII) { 408 if (IN3 & 0x80) return 3; 409 else **outbuf = IN3; 410 } 411 else 412 return MBERR_INTERNAL; 413 414 (*inbuf) += 3; 415 *inleft -= 3; 416 (*outbuf) += 1; 417 *outleft -= 1; 418 return 0; 419 } 420 421 DECODER(iso2022) 422 { 423 const struct iso2022_designation *dsgcache = NULL; 424 425 while (inleft > 0) { 426 unsigned char c = IN1; 427 Py_ssize_t err; 428 429 if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { 430 /* ESC throughout mode: 431 * for non-iso2022 escape sequences */ 432 WRITE1(c) /* assume as ISO-8859-1 */ 433 NEXT(1, 1) 434 if (IS_ESCEND(c)) { 435 STATE_CLEARFLAG(F_ESCTHROUGHOUT) 436 } 437 continue; 438 } 439 440 switch (c) { 441 case ESC: 442 REQUIRE_INBUF(2) 443 if (IS_ISO2022ESC(IN2)) { 444 err = iso2022processesc(config, state, 445 inbuf, &inleft); 446 if (err != 0) 447 return err; 448 } 449 else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */ 450 REQUIRE_INBUF(3) 451 err = iso2022processg2(config, state, 452 inbuf, &inleft, outbuf, &outleft); 453 if (err != 0) 454 return err; 455 } 456 else { 457 WRITE1(ESC) 458 STATE_SETFLAG(F_ESCTHROUGHOUT) 459 NEXT(1, 1) 460 } 461 break; 462 case SI: 463 if (CONFIG_ISSET(NO_SHIFT)) 464 goto bypass; 465 STATE_CLEARFLAG(F_SHIFTED) 466 NEXT_IN(1) 467 break; 468 case SO: 469 if (CONFIG_ISSET(NO_SHIFT)) 470 goto bypass; 471 STATE_SETFLAG(F_SHIFTED) 472 NEXT_IN(1) 473 break; 474 case LF: 475 STATE_CLEARFLAG(F_SHIFTED) 476 WRITE1(LF) 477 NEXT(1, 1) 478 break; 479 default: 480 if (c < 0x20) /* C0 */ 481 goto bypass; 482 else if (c >= 0x80) 483 return 1; 484 else { 485 const struct iso2022_designation *dsg; 486 unsigned char charset; 487 ucs4_t decoded; 488 489 if (STATE_GETFLAG(F_SHIFTED)) 490 charset = STATE_G1; 491 else 492 charset = STATE_G0; 493 494 if (charset == CHARSET_ASCII) { 495 bypass: WRITE1(c) 496 NEXT(1, 1) 497 break; 498 } 499 500 if (dsgcache != NULL && 501 dsgcache->mark == charset) 502 dsg = dsgcache; 503 else { 504 for (dsg = CONFIG_DESIGNATIONS; 505 dsg->mark != charset 506 #ifdef Py_DEBUG 507 && dsg->mark != '\0' 508 #endif 509 ;dsg++) 510 /* noop */; 511 assert(dsg->mark != '\0'); 512 dsgcache = dsg; 513 } 514 515 REQUIRE_INBUF(dsg->width) 516 decoded = dsg->decoder(*inbuf); 517 if (decoded == MAP_UNMAPPABLE) 518 return dsg->width; 519 520 if (decoded < 0x10000) { 521 WRITE1(decoded) 522 NEXT_OUT(1) 523 } 524 else if (decoded < 0x30000) { 525 WRITEUCS4(decoded) 526 } 527 else { /* JIS X 0213 pairs */ 528 WRITE2(decoded >> 16, decoded & 0xffff) 529 NEXT_OUT(2) 530 } 531 NEXT_IN(dsg->width) 532 } 533 break; 534 } 535 } 536 return 0; 537 } 538 539 /*-*- mapping table holders -*-*/ 540 541 #define ENCMAP(enc) static const encode_map *enc##_encmap = NULL; 542 #define DECMAP(enc) static const decode_map *enc##_decmap = NULL; 543 544 /* kr */ 545 ENCMAP(cp949) 546 DECMAP(ksx1001) 547 548 /* jp */ 549 ENCMAP(jisxcommon) 550 DECMAP(jisx0208) 551 DECMAP(jisx0212) 552 ENCMAP(jisx0213_bmp) 553 DECMAP(jisx0213_1_bmp) 554 DECMAP(jisx0213_2_bmp) 555 ENCMAP(jisx0213_emp) 556 DECMAP(jisx0213_1_emp) 557 DECMAP(jisx0213_2_emp) 558 559 /* cn */ 560 ENCMAP(gbcommon) 561 DECMAP(gb2312) 562 563 /* tw */ 564 565 /*-*- mapping access functions -*-*/ 566 567 static int 568 ksx1001_init(void) 569 { 570 static int initialized = 0; 571 572 if (!initialized && ( 573 IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) || 574 IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap))) 575 return -1; 576 initialized = 1; 577 return 0; 578 } 579 580 static ucs4_t 581 ksx1001_decoder(const unsigned char *data) 582 { 583 ucs4_t u; 584 TRYMAP_DEC(ksx1001, u, data[0], data[1]) 585 return u; 586 else 587 return MAP_UNMAPPABLE; 588 } 589 590 static DBCHAR 591 ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length) 592 { 593 DBCHAR coded; 594 assert(*length == 1); 595 if (*data < 0x10000) { 596 TRYMAP_ENC(cp949, coded, *data) 597 if (!(coded & 0x8000)) 598 return coded; 599 } 600 return MAP_UNMAPPABLE; 601 } 602 603 static int 604 jisx0208_init(void) 605 { 606 static int initialized = 0; 607 608 if (!initialized && ( 609 IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) || 610 IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap))) 611 return -1; 612 initialized = 1; 613 return 0; 614 } 615 616 static ucs4_t 617 jisx0208_decoder(const unsigned char *data) 618 { 619 ucs4_t u; 620 if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ 621 return 0xff3c; 622 else TRYMAP_DEC(jisx0208, u, data[0], data[1]) 623 return u; 624 else 625 return MAP_UNMAPPABLE; 626 } 627 628 static DBCHAR 629 jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length) 630 { 631 DBCHAR coded; 632 assert(*length == 1); 633 if (*data < 0x10000) { 634 if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */ 635 return 0x2140; 636 else TRYMAP_ENC(jisxcommon, coded, *data) { 637 if (!(coded & 0x8000)) 638 return coded; 639 } 640 } 641 return MAP_UNMAPPABLE; 642 } 643 644 static int 645 jisx0212_init(void) 646 { 647 static int initialized = 0; 648 649 if (!initialized && ( 650 IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) || 651 IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap))) 652 return -1; 653 initialized = 1; 654 return 0; 655 } 656 657 static ucs4_t 658 jisx0212_decoder(const unsigned char *data) 659 { 660 ucs4_t u; 661 TRYMAP_DEC(jisx0212, u, data[0], data[1]) 662 return u; 663 else 664 return MAP_UNMAPPABLE; 665 } 666 667 static DBCHAR 668 jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length) 669 { 670 DBCHAR coded; 671 assert(*length == 1); 672 if (*data < 0x10000) { 673 TRYMAP_ENC(jisxcommon, coded, *data) { 674 if (coded & 0x8000) 675 return coded & 0x7fff; 676 } 677 } 678 return MAP_UNMAPPABLE; 679 } 680 681 static int 682 jisx0213_init(void) 683 { 684 static int initialized = 0; 685 686 if (!initialized && ( 687 jisx0208_init() || 688 IMPORT_MAP(jp, jisx0213_bmp, 689 &jisx0213_bmp_encmap, NULL) || 690 IMPORT_MAP(jp, jisx0213_1_bmp, 691 NULL, &jisx0213_1_bmp_decmap) || 692 IMPORT_MAP(jp, jisx0213_2_bmp, 693 NULL, &jisx0213_2_bmp_decmap) || 694 IMPORT_MAP(jp, jisx0213_emp, 695 &jisx0213_emp_encmap, NULL) || 696 IMPORT_MAP(jp, jisx0213_1_emp, 697 NULL, &jisx0213_1_emp_decmap) || 698 IMPORT_MAP(jp, jisx0213_2_emp, 699 NULL, &jisx0213_2_emp_decmap) || 700 IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap, 701 &jisx0213_pair_decmap))) 702 return -1; 703 initialized = 1; 704 return 0; 705 } 706 707 #define config ((void *)2000) 708 static ucs4_t 709 jisx0213_2000_1_decoder(const unsigned char *data) 710 { 711 ucs4_t u; 712 EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1]) 713 else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ 714 return 0xff3c; 715 else TRYMAP_DEC(jisx0208, u, data[0], data[1]); 716 else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); 717 else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) 718 u |= 0x20000; 719 else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); 720 else 721 return MAP_UNMAPPABLE; 722 return u; 723 } 724 725 static ucs4_t 726 jisx0213_2000_2_decoder(const unsigned char *data) 727 { 728 ucs4_t u; 729 EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1]) 730 TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); 731 else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) 732 u |= 0x20000; 733 else 734 return MAP_UNMAPPABLE; 735 return u; 736 } 737 #undef config 738 739 static ucs4_t 740 jisx0213_2004_1_decoder(const unsigned char *data) 741 { 742 ucs4_t u; 743 if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ 744 return 0xff3c; 745 else TRYMAP_DEC(jisx0208, u, data[0], data[1]); 746 else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); 747 else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) 748 u |= 0x20000; 749 else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); 750 else 751 return MAP_UNMAPPABLE; 752 return u; 753 } 754 755 static ucs4_t 756 jisx0213_2004_2_decoder(const unsigned char *data) 757 { 758 ucs4_t u; 759 TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); 760 else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) 761 u |= 0x20000; 762 else 763 return MAP_UNMAPPABLE; 764 return u; 765 } 766 767 static DBCHAR 768 jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) 769 { 770 DBCHAR coded; 771 772 switch (*length) { 773 case 1: /* first character */ 774 if (*data >= 0x10000) { 775 if ((*data) >> 16 == 0x20000 >> 16) { 776 EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data) 777 else TRYMAP_ENC(jisx0213_emp, coded, 778 (*data) & 0xffff) 779 return coded; 780 } 781 return MAP_UNMAPPABLE; 782 } 783 784 EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data) 785 else TRYMAP_ENC(jisx0213_bmp, coded, *data) { 786 if (coded == MULTIC) 787 return MAP_MULTIPLE_AVAIL; 788 } 789 else TRYMAP_ENC(jisxcommon, coded, *data) { 790 if (coded & 0x8000) 791 return MAP_UNMAPPABLE; 792 } 793 else 794 return MAP_UNMAPPABLE; 795 return coded; 796 case 2: /* second character of unicode pair */ 797 coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], 798 jisx0213_pair_encmap, JISX0213_ENCPAIRS); 799 if (coded == DBCINV) { 800 *length = 1; 801 coded = find_pairencmap((ucs2_t)data[0], 0, 802 jisx0213_pair_encmap, JISX0213_ENCPAIRS); 803 if (coded == DBCINV) 804 return MAP_UNMAPPABLE; 805 } 806 else 807 return coded; 808 case -1: /* flush unterminated */ 809 *length = 1; 810 coded = find_pairencmap((ucs2_t)data[0], 0, 811 jisx0213_pair_encmap, JISX0213_ENCPAIRS); 812 if (coded == DBCINV) 813 return MAP_UNMAPPABLE; 814 else 815 return coded; 816 default: 817 return MAP_UNMAPPABLE; 818 } 819 } 820 821 static DBCHAR 822 jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length) 823 { 824 DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); 825 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) 826 return coded; 827 else if (coded & 0x8000) 828 return MAP_UNMAPPABLE; 829 else 830 return coded; 831 } 832 833 static DBCHAR 834 jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) 835 { 836 DBCHAR coded; 837 Py_ssize_t ilength = *length; 838 839 coded = jisx0213_encoder(data, length, (void *)2000); 840 switch (ilength) { 841 case 1: 842 if (coded == MAP_MULTIPLE_AVAIL) 843 return MAP_MULTIPLE_AVAIL; 844 else 845 return MAP_UNMAPPABLE; 846 case 2: 847 if (*length != 2) 848 return MAP_UNMAPPABLE; 849 else 850 return coded; 851 default: 852 return MAP_UNMAPPABLE; 853 } 854 } 855 856 static DBCHAR 857 jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length) 858 { 859 DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); 860 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) 861 return coded; 862 else if (coded & 0x8000) 863 return coded & 0x7fff; 864 else 865 return MAP_UNMAPPABLE; 866 } 867 868 static DBCHAR 869 jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length) 870 { 871 DBCHAR coded = jisx0213_encoder(data, length, NULL); 872 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) 873 return coded; 874 else if (coded & 0x8000) 875 return MAP_UNMAPPABLE; 876 else 877 return coded; 878 } 879 880 static DBCHAR 881 jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) 882 { 883 DBCHAR coded; 884 Py_ssize_t ilength = *length; 885 886 coded = jisx0213_encoder(data, length, NULL); 887 switch (ilength) { 888 case 1: 889 if (coded == MAP_MULTIPLE_AVAIL) 890 return MAP_MULTIPLE_AVAIL; 891 else 892 return MAP_UNMAPPABLE; 893 case 2: 894 if (*length != 2) 895 return MAP_UNMAPPABLE; 896 else 897 return coded; 898 default: 899 return MAP_UNMAPPABLE; 900 } 901 } 902 903 static DBCHAR 904 jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length) 905 { 906 DBCHAR coded = jisx0213_encoder(data, length, NULL); 907 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) 908 return coded; 909 else if (coded & 0x8000) 910 return coded & 0x7fff; 911 else 912 return MAP_UNMAPPABLE; 913 } 914 915 static ucs4_t 916 jisx0201_r_decoder(const unsigned char *data) 917 { 918 ucs4_t u; 919 JISX0201_R_DECODE(*data, u) 920 else return MAP_UNMAPPABLE; 921 return u; 922 } 923 924 static DBCHAR 925 jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length) 926 { 927 DBCHAR coded; 928 JISX0201_R_ENCODE(*data, coded) 929 else return MAP_UNMAPPABLE; 930 return coded; 931 } 932 933 static ucs4_t 934 jisx0201_k_decoder(const unsigned char *data) 935 { 936 ucs4_t u; 937 JISX0201_K_DECODE(*data ^ 0x80, u) 938 else return MAP_UNMAPPABLE; 939 return u; 940 } 941 942 static DBCHAR 943 jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length) 944 { 945 DBCHAR coded; 946 JISX0201_K_ENCODE(*data, coded) 947 else return MAP_UNMAPPABLE; 948 return coded - 0x80; 949 } 950 951 static int 952 gb2312_init(void) 953 { 954 static int initialized = 0; 955 956 if (!initialized && ( 957 IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) || 958 IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap))) 959 return -1; 960 initialized = 1; 961 return 0; 962 } 963 964 static ucs4_t 965 gb2312_decoder(const unsigned char *data) 966 { 967 ucs4_t u; 968 TRYMAP_DEC(gb2312, u, data[0], data[1]) 969 return u; 970 else 971 return MAP_UNMAPPABLE; 972 } 973 974 static DBCHAR 975 gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) 976 { 977 DBCHAR coded; 978 assert(*length == 1); 979 if (*data < 0x10000) { 980 TRYMAP_ENC(gbcommon, coded, *data) { 981 if (!(coded & 0x8000)) 982 return coded; 983 } 984 } 985 return MAP_UNMAPPABLE; 986 } 987 988 989 static ucs4_t 990 dummy_decoder(const unsigned char *data) 991 { 992 return MAP_UNMAPPABLE; 993 } 994 995 static DBCHAR 996 dummy_encoder(const ucs4_t *data, Py_ssize_t *length) 997 { 998 return MAP_UNMAPPABLE; 999 } 1000 1001 /*-*- registry tables -*-*/ 1002 1003 #define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \ 1004 ksx1001_init, \ 1005 ksx1001_decoder, ksx1001_encoder } 1006 #define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \ 1007 ksx1001_init, \ 1008 ksx1001_decoder, ksx1001_encoder } 1009 #define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \ 1010 NULL, \ 1011 jisx0201_r_decoder, jisx0201_r_encoder } 1012 #define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \ 1013 NULL, \ 1014 jisx0201_k_decoder, jisx0201_k_encoder } 1015 #define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \ 1016 jisx0208_init, \ 1017 jisx0208_decoder, jisx0208_encoder } 1018 #define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \ 1019 jisx0208_init, \ 1020 jisx0208_decoder, jisx0208_encoder } 1021 #define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \ 1022 jisx0212_init, \ 1023 jisx0212_decoder, jisx0212_encoder } 1024 #define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \ 1025 jisx0213_init, \ 1026 jisx0213_2000_1_decoder, \ 1027 jisx0213_2000_1_encoder } 1028 #define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \ 1029 jisx0213_init, \ 1030 jisx0213_2000_1_decoder, \ 1031 jisx0213_2000_1_encoder_paironly } 1032 #define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \ 1033 jisx0213_init, \ 1034 jisx0213_2000_2_decoder, \ 1035 jisx0213_2000_2_encoder } 1036 #define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \ 1037 jisx0213_init, \ 1038 jisx0213_2004_1_decoder, \ 1039 jisx0213_2004_1_encoder } 1040 #define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \ 1041 jisx0213_init, \ 1042 jisx0213_2004_1_decoder, \ 1043 jisx0213_2004_1_encoder_paironly } 1044 #define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \ 1045 jisx0213_init, \ 1046 jisx0213_2004_2_decoder, \ 1047 jisx0213_2004_2_encoder } 1048 #define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \ 1049 gb2312_init, \ 1050 gb2312_decoder, gb2312_encoder } 1051 #define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \ 1052 cns11643_init, \ 1053 cns11643_1_decoder, cns11643_1_encoder } 1054 #define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \ 1055 cns11643_init, \ 1056 cns11643_2_decoder, cns11643_2_encoder } 1057 #define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \ 1058 NULL, dummy_decoder, dummy_encoder } 1059 #define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \ 1060 NULL, dummy_decoder, dummy_encoder } 1061 #define REGISTRY_SENTINEL { 0, } 1062 #define CONFIGDEF(var, attrs) \ 1063 static const struct iso2022_config iso2022_##var##_config = { \ 1064 attrs, iso2022_##var##_designations \ 1065 }; 1066 1067 static const struct iso2022_designation iso2022_kr_designations[] = { 1068 REGISTRY_KSX1001_G1, REGISTRY_SENTINEL 1069 }; 1070 CONFIGDEF(kr, 0) 1071 1072 static const struct iso2022_designation iso2022_jp_designations[] = { 1073 REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O, 1074 REGISTRY_SENTINEL 1075 }; 1076 CONFIGDEF(jp, NO_SHIFT | USE_JISX0208_EXT) 1077 1078 static const struct iso2022_designation iso2022_jp_1_designations[] = { 1079 REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R, 1080 REGISTRY_JISX0208_O, REGISTRY_SENTINEL 1081 }; 1082 CONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT) 1083 1084 static const struct iso2022_designation iso2022_jp_2_designations[] = { 1085 REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0, 1086 REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O, 1087 REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL 1088 }; 1089 CONFIGDEF(jp_2, NO_SHIFT | USE_G2 | USE_JISX0208_EXT) 1090 1091 static const struct iso2022_designation iso2022_jp_2004_designations[] = { 1092 REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208, 1093 REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL 1094 }; 1095 CONFIGDEF(jp_2004, NO_SHIFT | USE_JISX0208_EXT) 1096 1097 static const struct iso2022_designation iso2022_jp_3_designations[] = { 1098 REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208, 1099 REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL 1100 }; 1101 CONFIGDEF(jp_3, NO_SHIFT | USE_JISX0208_EXT) 1102 1103 static const struct iso2022_designation iso2022_jp_ext_designations[] = { 1104 REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R, 1105 REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL 1106 }; 1107 CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT) 1108 1109 1110 BEGIN_MAPPINGS_LIST 1111 /* no mapping table here */ 1112 END_MAPPINGS_LIST 1113 1114 #define ISO2022_CODEC(variation) { \ 1115 "iso2022_" #variation, \ 1116 &iso2022_##variation##_config, \ 1117 iso2022_codec_init, \ 1118 _STATEFUL_METHODS(iso2022) \ 1119 }, 1120 1121 BEGIN_CODECS_LIST 1122 ISO2022_CODEC(kr) 1123 ISO2022_CODEC(jp) 1124 ISO2022_CODEC(jp_1) 1125 ISO2022_CODEC(jp_2) 1126 ISO2022_CODEC(jp_2004) 1127 ISO2022_CODEC(jp_3) 1128 ISO2022_CODEC(jp_ext) 1129 END_CODECS_LIST 1130 1131 I_AM_A_MODULE_FOR(iso2022)