Python-2.7.3/Modules/cjkcodecs/_codecs_hk.c

No issues found

  1 /*
  2  * _codecs_hk.c: Codecs collection for encodings from Hong Kong
  3  *
  4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
  5  */
  6 
  7 #define USING_IMPORTED_MAPS
  8 
  9 #include "cjkcodecs.h"
 10 #include "mappings_hk.h"
 11 
 12 /*
 13  * BIG5HKSCS codec
 14  */
 15 
 16 static const encode_map *big5_encmap = NULL;
 17 static const decode_map *big5_decmap = NULL;
 18 
 19 CODEC_INIT(big5hkscs)
 20 {
 21     static int initialized = 0;
 22 
 23     if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
 24         return -1;
 25     initialized = 1;
 26     return 0;
 27 }
 28 
 29 /*
 30  * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
 31  *  U+00CA U+0304 -> 8862  (U+00CA alone is mapped to 8866)
 32  *  U+00CA U+030C -> 8864
 33  *  U+00EA U+0304 -> 88a3  (U+00EA alone is mapped to 88a7)
 34  *  U+00EA U+030C -> 88a5
 35  * These are handled by not mapping tables but a hand-written code.
 36  */
 37 static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
 38 
 39 ENCODER(big5hkscs)
 40 {
 41     while (inleft > 0) {
 42         ucs4_t c = **inbuf;
 43         DBCHAR code;
 44         Py_ssize_t insize;
 45 
 46         if (c < 0x80) {
 47             REQUIRE_OUTBUF(1)
 48             **outbuf = (unsigned char)c;
 49             NEXT(1, 1)
 50             continue;
 51         }
 52 
 53         DECODE_SURROGATE(c)
 54         insize = GET_INSIZE(c);
 55 
 56         REQUIRE_OUTBUF(2)
 57 
 58         if (c < 0x10000) {
 59             TRYMAP_ENC(big5hkscs_bmp, code, c) {
 60                 if (code == MULTIC) {
 61                     if (inleft >= 2 &&
 62                         ((c & 0xffdf) == 0x00ca) &&
 63                         (((*inbuf)[1] & 0xfff7) == 0x0304)) {
 64                         code = big5hkscs_pairenc_table[
 65                             ((c >> 4) |
 66                              ((*inbuf)[1] >> 3)) & 3];
 67                         insize = 2;
 68                     }
 69                     else if (inleft < 2 &&
 70                              !(flags & MBENC_FLUSH))
 71                         return MBERR_TOOFEW;
 72                     else {
 73                         if (c == 0xca)
 74                             code = 0x8866;
 75                         else /* c == 0xea */
 76                             code = 0x88a7;
 77                     }
 78                 }
 79             }
 80             else TRYMAP_ENC(big5, code, c);
 81             else return 1;
 82         }
 83         else if (c < 0x20000)
 84             return insize;
 85         else if (c < 0x30000) {
 86             TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
 87             else return insize;
 88         }
 89         else
 90             return insize;
 91 
 92         OUT1(code >> 8)
 93         OUT2(code & 0xFF)
 94         NEXT(insize, 2)
 95     }
 96 
 97     return 0;
 98 }
 99 
100 #define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
101 
102 DECODER(big5hkscs)
103 {
104     while (inleft > 0) {
105         unsigned char c = IN1;
106         ucs4_t decoded;
107 
108         REQUIRE_OUTBUF(1)
109 
110         if (c < 0x80) {
111             OUT1(c)
112             NEXT(1, 1)
113             continue;
114         }
115 
116         REQUIRE_INBUF(2)
117 
118         if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
119             TRYMAP_DEC(big5, **outbuf, c, IN2) {
120                 NEXT(2, 1)
121                 continue;
122             }
123         }
124 
125         TRYMAP_DEC(big5hkscs, decoded, c, IN2)
126         {
127             int s = BH2S(c, IN2);
128             const unsigned char *hintbase;
129 
130             assert(0x87 <= c && c <= 0xfe);
131             assert(0x40 <= IN2 && IN2 <= 0xfe);
132 
133             if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
134                     hintbase = big5hkscs_phint_0;
135                     s -= BH2S(0x87, 0x40);
136             }
137             else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
138                     hintbase = big5hkscs_phint_12130;
139                     s -= BH2S(0xc6, 0xa1);
140             }
141             else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
142                     hintbase = big5hkscs_phint_21924;
143                     s -= BH2S(0xf9, 0xd6);
144             }
145             else
146                     return MBERR_INTERNAL;
147 
148             if (hintbase[s >> 3] & (1 << (s & 7))) {
149                     WRITEUCS4(decoded | 0x20000)
150                     NEXT_IN(2)
151             }
152             else {
153                     OUT1(decoded)
154                     NEXT(2, 1)
155             }
156             continue;
157         }
158 
159         switch ((c << 8) | IN2) {
160         case 0x8862: WRITE2(0x00ca, 0x0304); break;
161         case 0x8864: WRITE2(0x00ca, 0x030c); break;
162         case 0x88a3: WRITE2(0x00ea, 0x0304); break;
163         case 0x88a5: WRITE2(0x00ea, 0x030c); break;
164         default: return 2;
165         }
166 
167         NEXT(2, 2) /* all decoded codepoints are pairs, above. */
168     }
169 
170     return 0;
171 }
172 
173 
174 BEGIN_MAPPINGS_LIST
175   MAPPING_DECONLY(big5hkscs)
176   MAPPING_ENCONLY(big5hkscs_bmp)
177   MAPPING_ENCONLY(big5hkscs_nonbmp)
178 END_MAPPINGS_LIST
179 
180 BEGIN_CODECS_LIST
181   CODEC_STATELESS_WINIT(big5hkscs)
182 END_CODECS_LIST
183 
184 I_AM_A_MODULE_FOR(hk)