1 /* $MirOS: contrib/code/jupp/regex.c,v 1.5 2012/12/22 00:06:13 tg Exp $ */
3 * Regular expression subroutines
5 * (C) 1992 Joseph H. Allen
7 * This file is part of JOE (Joe's Own Editor)
19 int escape(int utf8_,unsigned char **a, int *b)
22 unsigned char *s = *a;
25 if (*s == '\\' && l >= 2) {
74 if (l > 0 && *s >= '0' && *s <= '7') {
75 c = c * 8 + s[1] - '0';
78 if (l > 0 && *s >= '0' && *s <= '7') {
79 c = c * 8 + s[1] - '0';
87 if (l > 0 && *s >= '0' && *s <= '9') {
88 c = c * 16 + *s - '0';
90 } else if (l > 0 && *s >= 'A' && *s <= 'F') {
91 c = c * 16 + *s - 'A' + 10;
93 } else if (l > 0 && *s >= 'a' && *s <= 'f') {
94 c = c * 16 + *s - 'a' + 10;
98 if (l > 0 && *s >= '0' && *s <= '9') {
99 c = c * 16 + *s - '0';
101 } else if (l > 0 && *s >= 'A' && *s <= 'F') {
102 c = c * 16 + *s - 'A' + 10;
104 } else if (l > 0 && *s >= 'a' && *s <= 'f') {
105 c = c * 16 + *s - 'a' + 10;
111 c = utf8_decode_fwrd(&s, &l);
119 c = utf8_decode_fwrd(&s,&l);
129 static int brack(int utf8_,unsigned char **a, int *la, int c)
133 unsigned char *s = *a;
138 if (*s == '^' || *s == '*') {
143 if (l && *s == ']') {
157 cl = escape(utf8_, &s, &l);
159 if (l >= 2 && s[0] == '-' && s[1] != ']') {
162 cr = escape(utf8_, &s, &l);
163 if (c >= cl && c <= cr)
176 static void savec(int utf8_,unsigned char **pieces, int n, int c)
178 unsigned char buf[16];
180 unsigned char *s = NULL;
183 len = utf8_encode(buf,c);
191 s = vsncpy(s, 0, buf, len);
195 #define MAX_REGEX_SAVED 16384 /* Largest regex string we will save */
197 static void saves(unsigned char **pieces, int n, P *p, long int szz)
199 if (szz > MAX_REGEX_SAVED)
200 pieces[n] = vstrunc(pieces[n], 0);
202 pieces[n] = vstrunc(pieces[n], (int) szz);
203 brmem(p, pieces[n], (int) szz);
207 /* Returns -1 (NO_MORE_DATA) for end of file.
208 * Returns -2 if we skipped a special sequence and didn't take the character
209 * after it (this happens for "strings").
210 * Otherwise returns character after sequence (character will be >=0).
213 static int skip_special(P *p)
217 switch (s = pgetc(p)) {
220 if ((s = pgetc(p)) == '\\') {
224 } while (s != NO_MORE_DATA && s != '"');
229 if ((s = pgetc(p)) == '\\') {
235 if ((s = pgetc(p)) == '\'')
237 if ((s = pgetc(p)) == '\'')
251 } while (s != to && s != NO_MORE_DATA);
261 if ((s = pgetc(p)) == '/')
263 } while (s != NO_MORE_DATA);
264 else if (s != NO_MORE_DATA)
273 int pmatch(unsigned char **pieces, unsigned char *regex, int len, P *p, int n, int icase)
278 int utf8_ = p->b->o.charmap->type;
279 struct charmap *map = p->b->o.charmap;
287 c = utf8_decode(&sm,*regex++);
289 } while (len && c<0);
301 switch (c = *regex++) {
304 if (d == NO_MORE_DATA)
306 savec(utf8_, pieces, n++, d);
329 if (pgetc(p) != escape(utf8_, ®ex, &len))
333 /* Find shortest matching sequence */
338 if (pmatch(pieces, regex, len, p, n + 1, icase)) {
339 saves(pieces, n, o, pb - o->byte);
343 } while (c != NO_MORE_DATA && c != '\n');
350 if (pmatch(pieces, regex, len, p, n + 1, icase)) {
351 saves(pieces, n, o, pb - o->byte);
354 } while (skip_special(p) != NO_MORE_DATA);
358 if (d == NO_MORE_DATA)
360 if (!brack(utf8_, ®ex, &len, d))
362 savec(utf8_, pieces, n++, d);
366 unsigned char *oregex = regex; /* Point to character to skip */
369 unsigned char *tregex;
380 /* Advance over character to skip. Save character in d unless
381 we're skipping over a \[..] */
382 if (len >= 2 && regex[0] == '\\') {
383 if (regex[1] == '[') {
386 brack(utf8_, ®ex, &len, 0);
388 d = escape(utf8_, ®ex, &len);
390 d = joe_tolower(map,d);
393 if ((d = utf8_decode_fwrd(®ex, &len)) < 0)
396 d = joe_tolower(map,d);
402 d = joe_tolower(map,d);
407 /* Now oregex/olen point to character to skip over and
408 regex/len point to sequence which follows */
413 if (pmatch(pieces, regex, len, p, n + 1, icase)) {
414 saves(pieces, n, o, z->byte - o->byte);
424 if (*oregex == '\\') {
425 if (oregex[1] == '[') {
428 match = brack(utf8_, &tregex, &tlen, c);
433 match = (joe_tolower(map,c) == d);
437 } while (c != NO_MORE_DATA && match);
477 if (joe_tolower(map,d) != joe_tolower(map,c))