No issues found
1 /*
2 * _codecs_kr.c: Codecs collection for Korean encodings
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
5 */
6
7 #include "cjkcodecs.h"
8 #include "mappings_kr.h"
9
10 /*
11 * EUC-KR codec
12 */
13
14 #define EUCKR_JAMO_FIRSTBYTE 0xA4
15 #define EUCKR_JAMO_FILLER 0xD4
16
17 static const unsigned char u2cgk_choseong[19] = {
18 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
19 0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
20 0xbc, 0xbd, 0xbe
21 };
22 static const unsigned char u2cgk_jungseong[21] = {
23 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
24 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
25 0xcf, 0xd0, 0xd1, 0xd2, 0xd3
26 };
27 static const unsigned char u2cgk_jongseong[28] = {
28 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
29 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
30 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
31 0xbb, 0xbc, 0xbd, 0xbe
32 };
33
34 ENCODER(euc_kr)
35 {
36 while (inleft > 0) {
37 Py_UNICODE c = IN1;
38 DBCHAR code;
39
40 if (c < 0x80) {
41 WRITE1((unsigned char)c)
42 NEXT(1, 1)
43 continue;
44 }
45 UCS4INVALID(c)
46
47 REQUIRE_OUTBUF(2)
48 TRYMAP_ENC(cp949, code, c);
49 else return 1;
50
51 if ((code & 0x8000) == 0) {
52 /* KS X 1001 coded character */
53 OUT1((code >> 8) | 0x80)
54 OUT2((code & 0xFF) | 0x80)
55 NEXT(1, 2)
56 }
57 else { /* Mapping is found in CP949 extension,
58 * but we encode it in KS X 1001:1998 Annex 3,
59 * make-up sequence for EUC-KR. */
60
61 REQUIRE_OUTBUF(8)
62
63 /* syllable composition precedence */
64 OUT1(EUCKR_JAMO_FIRSTBYTE)
65 OUT2(EUCKR_JAMO_FILLER)
66
67 /* All codepoints in CP949 extension are in unicode
68 * Hangul Syllable area. */
69 assert(0xac00 <= c && c <= 0xd7a3);
70 c -= 0xac00;
71
72 OUT3(EUCKR_JAMO_FIRSTBYTE)
73 OUT4(u2cgk_choseong[c / 588])
74 NEXT_OUT(4)
75
76 OUT1(EUCKR_JAMO_FIRSTBYTE)
77 OUT2(u2cgk_jungseong[(c / 28) % 21])
78 OUT3(EUCKR_JAMO_FIRSTBYTE)
79 OUT4(u2cgk_jongseong[c % 28])
80 NEXT(1, 4)
81 }
82 }
83
84 return 0;
85 }
86
87 #define NONE 127
88
89 static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
90 0, 1, NONE, 2, NONE, NONE, 3, 4,
91 5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
92 6, 7, 8, NONE, 9, 10, 11, 12,
93 13, 14, 15, 16, 17, 18
94 };
95 static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
96 1, 2, 3, 4, 5, 6, 7, NONE,
97 8, 9, 10, 11, 12, 13, 14, 15,
98 16, 17, NONE, 18, 19, 20, 21, 22,
99 NONE, 23, 24, 25, 26, 27
100 };
101
102 DECODER(euc_kr)
103 {
104 while (inleft > 0) {
105 unsigned char c = IN1;
106
107 REQUIRE_OUTBUF(1)
108
109 if (c < 0x80) {
110 OUT1(c)
111 NEXT(1, 1)
112 continue;
113 }
114
115 REQUIRE_INBUF(2)
116
117 if (c == EUCKR_JAMO_FIRSTBYTE &&
118 IN2 == EUCKR_JAMO_FILLER) {
119 /* KS X 1001:1998 Annex 3 make-up sequence */
120 DBCHAR cho, jung, jong;
121
122 REQUIRE_INBUF(8)
123 if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
124 (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
125 (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
126 return 8;
127
128 c = (*inbuf)[3];
129 if (0xa1 <= c && c <= 0xbe)
130 cho = cgk2u_choseong[c - 0xa1];
131 else
132 cho = NONE;
133
134 c = (*inbuf)[5];
135 jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
136
137 c = (*inbuf)[7];
138 if (c == EUCKR_JAMO_FILLER)
139 jong = 0;
140 else if (0xa1 <= c && c <= 0xbe)
141 jong = cgk2u_jongseong[c - 0xa1];
142 else
143 jong = NONE;
144
145 if (cho == NONE || jung == NONE || jong == NONE)
146 return 8;
147
148 OUT1(0xac00 + cho*588 + jung*28 + jong);
149 NEXT(8, 1)
150 }
151 else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
152 NEXT(2, 1)
153 }
154 else
155 return 2;
156 }
157
158 return 0;
159 }
160 #undef NONE
161
162
163 /*
164 * CP949 codec
165 */
166
167 ENCODER(cp949)
168 {
169 while (inleft > 0) {
170 Py_UNICODE c = IN1;
171 DBCHAR code;
172
173 if (c < 0x80) {
174 WRITE1((unsigned char)c)
175 NEXT(1, 1)
176 continue;
177 }
178 UCS4INVALID(c)
179
180 REQUIRE_OUTBUF(2)
181 TRYMAP_ENC(cp949, code, c);
182 else return 1;
183
184 OUT1((code >> 8) | 0x80)
185 if (code & 0x8000)
186 OUT2(code & 0xFF) /* MSB set: CP949 */
187 else
188 OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
189 NEXT(1, 2)
190 }
191
192 return 0;
193 }
194
195 DECODER(cp949)
196 {
197 while (inleft > 0) {
198 unsigned char c = IN1;
199
200 REQUIRE_OUTBUF(1)
201
202 if (c < 0x80) {
203 OUT1(c)
204 NEXT(1, 1)
205 continue;
206 }
207
208 REQUIRE_INBUF(2)
209 TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
210 else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
211 else return 2;
212
213 NEXT(2, 1)
214 }
215
216 return 0;
217 }
218
219
220 /*
221 * JOHAB codec
222 */
223
224 static const unsigned char u2johabidx_choseong[32] = {
225 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
226 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
227 0x10, 0x11, 0x12, 0x13, 0x14,
228 };
229 static const unsigned char u2johabidx_jungseong[32] = {
230 0x03, 0x04, 0x05, 0x06, 0x07,
231 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
232 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
233 0x1a, 0x1b, 0x1c, 0x1d,
234 };
235 static const unsigned char u2johabidx_jongseong[32] = {
236 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
237 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
238 0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
239 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
240 };
241 static const DBCHAR u2johabjamo[] = {
242 0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
243 0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
244 0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
245 0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
246 0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
247 0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
248 0x8741, 0x8761, 0x8781, 0x87a1,
249 };
250
251 ENCODER(johab)
252 {
253 while (inleft > 0) {
254 Py_UNICODE c = IN1;
255 DBCHAR code;
256
257 if (c < 0x80) {
258 WRITE1((unsigned char)c)
259 NEXT(1, 1)
260 continue;
261 }
262 UCS4INVALID(c)
263
264 REQUIRE_OUTBUF(2)
265
266 if (c >= 0xac00 && c <= 0xd7a3) {
267 c -= 0xac00;
268 code = 0x8000 |
269 (u2johabidx_choseong[c / 588] << 10) |
270 (u2johabidx_jungseong[(c / 28) % 21] << 5) |
271 u2johabidx_jongseong[c % 28];
272 }
273 else if (c >= 0x3131 && c <= 0x3163)
274 code = u2johabjamo[c - 0x3131];
275 else TRYMAP_ENC(cp949, code, c) {
276 unsigned char c1, c2, t2;
277 unsigned short t1;
278
279 assert((code & 0x8000) == 0);
280 c1 = code >> 8;
281 c2 = code & 0xff;
282 if (((c1 >= 0x21 && c1 <= 0x2c) ||
283 (c1 >= 0x4a && c1 <= 0x7d)) &&
284 (c2 >= 0x21 && c2 <= 0x7e)) {
285 t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
286 (c1 - 0x21 + 0x197));
287 t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
288 OUT1(t1 >> 1)
289 OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
290 NEXT(1, 2)
291 continue;
292 }
293 else
294 return 1;
295 }
296 else
297 return 1;
298
299 OUT1(code >> 8)
300 OUT2(code & 0xff)
301 NEXT(1, 2)
302 }
303
304 return 0;
305 }
306
307 #define FILL 0xfd
308 #define NONE 0xff
309
310 static const unsigned char johabidx_choseong[32] = {
311 NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
312 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
313 0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
314 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
315 };
316 static const unsigned char johabidx_jungseong[32] = {
317 NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
318 NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
319 NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
320 NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
321 };
322 static const unsigned char johabidx_jongseong[32] = {
323 NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
324 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
325 0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
326 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
327 };
328
329 static const unsigned char johabjamo_choseong[32] = {
330 NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
331 0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
332 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
333 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
334 };
335 static const unsigned char johabjamo_jungseong[32] = {
336 NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
337 NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
338 NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
339 NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
340 };
341 static const unsigned char johabjamo_jongseong[32] = {
342 NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
343 0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
344 0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
345 0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
346 };
347
348 DECODER(johab)
349 {
350 while (inleft > 0) {
351 unsigned char c = IN1, c2;
352
353 REQUIRE_OUTBUF(1)
354
355 if (c < 0x80) {
356 OUT1(c)
357 NEXT(1, 1)
358 continue;
359 }
360
361 REQUIRE_INBUF(2)
362 c2 = IN2;
363
364 if (c < 0xd8) {
365 /* johab hangul */
366 unsigned char c_cho, c_jung, c_jong;
367 unsigned char i_cho, i_jung, i_jong;
368
369 c_cho = (c >> 2) & 0x1f;
370 c_jung = ((c << 3) | c2 >> 5) & 0x1f;
371 c_jong = c2 & 0x1f;
372
373 i_cho = johabidx_choseong[c_cho];
374 i_jung = johabidx_jungseong[c_jung];
375 i_jong = johabidx_jongseong[c_jong];
376
377 if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
378 return 2;
379
380 /* we don't use U+1100 hangul jamo yet. */
381 if (i_cho == FILL) {
382 if (i_jung == FILL) {
383 if (i_jong == FILL)
384 OUT1(0x3000)
385 else
386 OUT1(0x3100 |
387 johabjamo_jongseong[c_jong])
388 }
389 else {
390 if (i_jong == FILL)
391 OUT1(0x3100 |
392 johabjamo_jungseong[c_jung])
393 else
394 return 2;
395 }
396 } else {
397 if (i_jung == FILL) {
398 if (i_jong == FILL)
399 OUT1(0x3100 |
400 johabjamo_choseong[c_cho])
401 else
402 return 2;
403 }
404 else
405 OUT1(0xac00 +
406 i_cho * 588 +
407 i_jung * 28 +
408 (i_jong == FILL ? 0 : i_jong))
409 }
410 NEXT(2, 1)
411 } else {
412 /* KS X 1001 except hangul jamos and syllables */
413 if (c == 0xdf || c > 0xf9 ||
414 c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
415 (c2 & 0x7f) == 0x7f ||
416 (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
417 return 2;
418 else {
419 unsigned char t1, t2;
420
421 t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
422 2 * c - 0x197);
423 t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
424 t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
425 t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
426
427 TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
428 else return 2;
429 NEXT(2, 1)
430 }
431 }
432 }
433
434 return 0;
435 }
436 #undef NONE
437 #undef FILL
438
439
440 BEGIN_MAPPINGS_LIST
441 MAPPING_DECONLY(ksx1001)
442 MAPPING_ENCONLY(cp949)
443 MAPPING_DECONLY(cp949ext)
444 END_MAPPINGS_LIST
445
446 BEGIN_CODECS_LIST
447 CODEC_STATELESS(euc_kr)
448 CODEC_STATELESS(cp949)
449 CODEC_STATELESS(johab)
450 END_CODECS_LIST
451
452 I_AM_A_MODULE_FOR(kr)