1 /*
2 * Copyright (c) 2001, Dr Martin Porter
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8 * Neither the name of the <ORGANIZATION> nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9 *
10 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11 */
12
13
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18
19 #include "header.h"
20
21 #define unless(C) if(!(C))
22
23 #define CREATE_SIZE 1
24
25 extern symbol * create_s(void) {
26 symbol * p;
27 void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
28 if (mem == NULL) return NULL;
29 p = (symbol *) (HEAD + (char *) mem);
30 CAPACITY(p) = CREATE_SIZE;
31 SET_SIZE(p, CREATE_SIZE);
32 return p;
Memory leak: mem
(emitted by cppcheck)
33 }
34
35 extern void lose_s(symbol * p) {
36 if (p == NULL) return;
37 free((char *) p - HEAD);
38 }
39
40 /*
41 new_p = X_skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
42 if n +ve, or n characters backwards from p +c - 1 if n -ve. new_p is the new
43 position, or 0 on failure.
44
45 -- used to implement hop and next in the utf8 case.
46 */
47
48 extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
49 int b;
50 if (n >= 0) {
51 for (; n > 0; n--) {
52 if (c >= l) return -1;
53 b = p[c++];
54 if (b >= 0xC0) { /* 1100 0000 */
55 while (c < l) {
56 b = p[c];
57 if (b >= 0xC0 || b < 0x80) break;
58 /* break unless b is 10------ */
59 c++;
60 }
61 }
62 }
63 } else {
64 for (; n < 0; n++) {
65 if (c <= lb) return -1;
66 b = p[--c];
67 if (b >= 0x80) { /* 1000 0000 */
68 while (c > lb) {
69 b = p[c];
70 if (b >= 0xC0) break; /* 1100 0000 */
71 c--;
72 }
73 }
74 }
75 }
76 return c;
77 }
78
79 /* Code for character groupings: utf8 cases */
80
81 static int get_utf8(const symbol * p, int c, int l, int * slot) {
82 int b0, b1;
83 if (c >= l) return 0;
84 b0 = p[c++];
85 if (b0 < 0xC0 || c == l) { /* 1100 0000 */
86 * slot = b0; return 1;
87 }
88 b1 = p[c++];
89 if (b0 < 0xE0 || c == l) { /* 1110 0000 */
90 * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
91 }
92 * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (*p & 0x3F); return 3;
93 }
94
95 static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
96 int b0, b1;
97 if (c <= lb) return 0;
98 b0 = p[--c];
99 if (b0 < 0x80 || c == lb) { /* 1000 0000 */
100 * slot = b0; return 1;
101 }
102 b1 = p[--c];
103 if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
104 * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
105 }
106 * slot = (*p & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
107 }
108
109 extern int in_grouping_U(struct SN_env * z, unsigned char * s, int min, int max) {
110 int ch;
111 int w = get_utf8(z->p, z->c, z->l, & ch);
112 unless (w) return 0;
113 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
114 z->c += w; return 1;
115 }
116
117 extern int in_grouping_b_U(struct SN_env * z, unsigned char * s, int min, int max) {
118 int ch;
119 int w = get_b_utf8(z->p, z->c, z->lb, & ch);
120 unless (w) return 0;
121 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
122 z->c -= w; return 1;
123 }
124
125 extern int out_grouping_U(struct SN_env * z, unsigned char * s, int min, int max) {
126 int ch;
127 int w = get_utf8(z->p, z->c, z->l, & ch);
128 unless (w) return 0;
129 unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
130 z->c += w; return 1;
131 }
132
133 extern int out_grouping_b_U(struct SN_env * z, unsigned char * s, int min, int max) {
134 int ch;
135 int w = get_b_utf8(z->p, z->c, z->lb, & ch);
136 unless (w) return 0;
137 unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
138 z->c -= w; return 1;
139 }
140
141 /* Code for character groupings: non-utf8 cases */
142
143 extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max) {
144 int ch;
145 if (z->c >= z->l) return 0;
146 ch = z->p[z->c];
147 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
148 z->c++; return 1;
149 }
150
151 extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max) {
152 int ch;
153 if (z->c <= z->lb) return 0;
154 ch = z->p[z->c - 1];
155 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
156 z->c--; return 1;
157 }
158
159 extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max) {
160 int ch;
161 if (z->c >= z->l) return 0;
162 ch = z->p[z->c];
163 unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
164 z->c++; return 1;
165 }
166
167 extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max) {
168 int ch;
169 if (z->c <= z->lb) return 0;
170 ch = z->p[z->c - 1];
171 unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
172 z->c--; return 1;
173 }
174
175 extern int eq_s(struct SN_env * z, int s_size, symbol * s) {
176 if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
177 z->c += s_size; return 1;
178 }
179
180 extern int eq_s_b(struct SN_env * z, int s_size, symbol * s) {
181 if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
182 z->c -= s_size; return 1;
183 }
184
185 extern int eq_v(struct SN_env * z, symbol * p) {
186 return eq_s(z, SIZE(p), p);
187 }
188
189 extern int eq_v_b(struct SN_env * z, symbol * p) {
190 return eq_s_b(z, SIZE(p), p);
191 }
192
193 extern int find_among(struct SN_env * z, struct among * v, int v_size) {
194
195 int i = 0;
196 int j = v_size;
197
198 int c = z->c; int l = z->l;
199 symbol * q = z->p + c;
200
201 struct among * w;
202
203 int common_i = 0;
204 int common_j = 0;
205
206 int first_key_inspected = 0;
207
208 while(1) {
209 int k = i + ((j - i) >> 1);
210 int diff = 0;
211 int common = common_i < common_j ? common_i : common_j; /* smaller */
212 w = v + k;
213 {
214 int i; for (i = common; i < w->s_size; i++) {
215 if (c + common == l) { diff = -1; break; }
216 diff = q[common] - w->s[i];
217 if (diff != 0) break;
218 common++;
219 }
220 }
221 if (diff < 0) { j = k; common_j = common; }
222 else { i = k; common_i = common; }
223 if (j - i <= 1) {
224 if (i > 0) break; /* v->s has been inspected */
225 if (j == i) break; /* only one item in v */
226
227 /* - but now we need to go round once more to get
228 v->s inspected. This looks messy, but is actually
229 the optimal approach. */
230
231 if (first_key_inspected) break;
232 first_key_inspected = 1;
233 }
234 }
235 while(1) {
236 w = v + i;
237 if (common_i >= w->s_size) {
238 z->c = c + w->s_size;
239 if (w->function == 0) return w->result;
240 {
241 int res = w->function(z);
242 z->c = c + w->s_size;
243 if (res) return w->result;
244 }
245 }
246 i = w->substring_i;
247 if (i < 0) return 0;
248 }
249 }
250
251 /* find_among_b is for backwards processing. Same comments apply */
252
253 extern int find_among_b(struct SN_env * z, struct among * v, int v_size) {
254
255 int i = 0;
256 int j = v_size;
257
258 int c = z->c; int lb = z->lb;
259 symbol * q = z->p + c - 1;
260
261 struct among * w;
262
263 int common_i = 0;
264 int common_j = 0;
265
266 int first_key_inspected = 0;
267
268 while(1) {
269 int k = i + ((j - i) >> 1);
270 int diff = 0;
271 int common = common_i < common_j ? common_i : common_j;
272 w = v + k;
273 {
274 int i; for (i = w->s_size - 1 - common; i >= 0; i--) {
275 if (c - common == lb) { diff = -1; break; }
276 diff = q[- common] - w->s[i];
277 if (diff != 0) break;
278 common++;
279 }
280 }
281 if (diff < 0) { j = k; common_j = common; }
282 else { i = k; common_i = common; }
283 if (j - i <= 1) {
284 if (i > 0) break;
285 if (j == i) break;
286 if (first_key_inspected) break;
287 first_key_inspected = 1;
288 }
289 }
290 while(1) {
291 w = v + i;
292 if (common_i >= w->s_size) {
293 z->c = c - w->s_size;
294 if (w->function == 0) return w->result;
295 {
296 int res = w->function(z);
297 z->c = c - w->s_size;
298 if (res) return w->result;
299 }
300 }
301 i = w->substring_i;
302 if (i < 0) return 0;
303 }
304 }
305
306
307 /* Increase the size of the buffer pointed to by p to at least n symbols.
308 * If insufficient memory, returns NULL and frees the old buffer.
309 */
310 static symbol * increase_size(symbol * p, int n) {
311 symbol * q;
312 int new_size = n + 20;
313 void * mem = realloc((char *) p - HEAD,
314 HEAD + (new_size + 1) * sizeof(symbol));
315 if (mem == NULL) {
316 lose_s(p);
317 return NULL;
318 }
319 q = (symbol *) (HEAD + (char *)mem);
320 CAPACITY(q) = new_size;
321 return q;
322 }
323
324 /* to replace symbols between c_bra and c_ket in z->p by the
325 s_size symbols at s.
326 Returns 0 on success, -1 on error.
327 Also, frees z->p (and sets it to NULL) on error.
328 */
329 extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
330 {
331 int adjustment;
332 int len;
333 if (z->p == NULL) {
334 z->p = create_s();
335 if (z->p == NULL) return -1;
336 }
337 adjustment = s_size - (c_ket - c_bra);
338 len = SIZE(z->p);
339 if (adjustment != 0) {
340 if (adjustment + len > CAPACITY(z->p)) {
341 z->p = increase_size(z->p, adjustment + len);
342 if (z->p == NULL) return -1;
343 }
344 memmove(z->p + c_ket + adjustment,
345 z->p + c_ket,
346 (len - c_ket) * sizeof(symbol));
347 SET_SIZE(z->p, adjustment + len);
348 z->l += adjustment;
349 if (z->c >= c_ket)
350 z->c += adjustment;
351 else
352 if (z->c > c_bra)
353 z->c = c_bra;
354 }
355 unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
356 if (adjptr != NULL)
357 *adjptr = adjustment;
358 return 0;
359 }
360
361 static int slice_check(struct SN_env * z) {
362
363 if (z->bra < 0 ||
364 z->bra > z->ket ||
365 z->ket > z->l ||
366 z->p == NULL ||
367 z->l > SIZE(z->p)) /* this line could be removed */
368 {
369 #if 0
370 fprintf(stderr, "faulty slice operation:\n");
371 debug(z, -1, 0);
372 #endif
373 return -1;
374 }
375 return 0;
376 }
377
378 extern int slice_from_s(struct SN_env * z, int s_size, symbol * s) {
379 if (slice_check(z)) return -1;
380 return replace_s(z, z->bra, z->ket, s_size, s, NULL);
381 }
382
383 extern int slice_from_v(struct SN_env * z, symbol * p) {
384 return slice_from_s(z, SIZE(p), p);
385 }
386
387 extern int slice_del(struct SN_env * z) {
388 return slice_from_s(z, 0, 0);
389 }
390
391 extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s) {
392 int adjustment;
393 if (replace_s(z, bra, ket, s_size, s, &adjustment))
394 return -1;
395 if (bra <= z->bra) z->bra += adjustment;
396 if (bra <= z->ket) z->ket += adjustment;
397 return 0;
398 }
399
400 extern int insert_v(struct SN_env * z, int bra, int ket, symbol * p) {
401 int adjustment;
402 if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
403 return -1;
404 if (bra <= z->bra) z->bra += adjustment;
405 if (bra <= z->ket) z->ket += adjustment;
406 return 0;
407 }
408
409 extern symbol * slice_to(struct SN_env * z, symbol * p) {
410 if (slice_check(z)) {
411 lose_s(p);
412 return NULL;
413 }
414 {
415 int len = z->ket - z->bra;
416 if (CAPACITY(p) < len) {
417 p = increase_size(p, len);
418 if (p == NULL)
419 return NULL;
420 }
421 memmove(p, z->p + z->bra, len * sizeof(symbol));
422 SET_SIZE(p, len);
423 }
424 return p;
425 }
426
427 extern symbol * assign_to(struct SN_env * z, symbol * p) {
428 int len = z->l;
429 if (CAPACITY(p) < len) {
430 p = increase_size(p, len);
431 if (p == NULL)
432 return NULL;
433 }
434 memmove(p, z->p, len * sizeof(symbol));
435 SET_SIZE(p, len);
436 return p;
437 }
438
439 #if 0
440 extern void debug(struct SN_env * z, int number, int line_count) {
441 int i;
442 int limit = SIZE(z->p);
443 /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
444 if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
445 for (i = 0; i <= limit; i++) {
446 if (z->lb == i) printf("{");
447 if (z->bra == i) printf("[");
448 if (z->c == i) printf("|");
449 if (z->ket == i) printf("]");
450 if (z->l == i) printf("}");
451 if (i < limit)
452 { int ch = z->p[i];
453 if (ch == 0) ch = '#';
454 printf("%c", ch);
455 }
456 }
457 printf("'\n");
458 }
459 #endif