2 * Syntax highlighting DFA interpreter
4 * (C) 2004 Joseph H. Allen
6 * This file is part of JOE (Joe's Own Editor)
12 __RCSID("$MirOS: contrib/code/jupp/syntax.c,v 1.20 2017/12/02 04:36:56 tg Exp $");
43 if (utfstate.start < utfstate.limit)
44 return (utfstate.buf[utfstate.start++]);
46 return (NO_MORE_DATA);
47 utfstate.eaten = utfstate.limit = 0;
49 if (!utfstate.limit) {
52 c = utfstate.buf[utfstate.start];
56 if ((c == NO_MORE_DATA) || (c < 0x80))
58 if ((c < 0xC2) || (c >= 0xFE))
61 utfstate.buf[utfstate.start++] = (unsigned char)c;
62 utfstate.limit = (c < 0xE0) ? 2 : (c < 0xF0) ? 3 :
63 (c < 0xF8) ? 4 : (c < 0xFC) ? 5 : 6;
65 while (utfstate.start < utfstate.limit) {
66 if (((c = pgetb(p)) == NO_MORE_DATA) || ((c ^ 0x80) > 0x3F)) {
67 /* invalid follow byte, invalidate all previous ones */
69 while (utfstate.limit < utfstate.start)
70 utfstate.buf[utfstate.limit++] = 0xFF;
71 /* append this as ungetch unless the well is dry */
72 if (c == NO_MORE_DATA)
75 utfstate.buf[utfstate.limit] = (unsigned char)c;
78 /* now return those bytes */
81 utfstate.buf[utfstate.start++] = (unsigned char)c;
94 if (!(utfstate.start < utfstate.limit)) {
95 if ((c = pgetb(p)) == NO_MORE_DATA)
96 return (NO_MORE_DATA);
98 utfstate.limit = utf8_encode(utfstate.buf,
99 to_uni(p->b->o.charmap, c));
103 return (utfstate.buf[utfstate.start++]);
106 /* Parse one line. Returns new state.
107 'syntax' is the loaded syntax definition for this buffer.
108 'line' is advanced to start of next line.
109 Global array 'attr_buf' end up with coloring for each character of line.
110 'state' is initial parser state for the line (0 is initial state).
116 int parse(struct high_syntax *syntax, P *line, int state)
118 struct high_state *h = syntax->states[state];
120 unsigned char buf[20]; /* Name buffer (trunc after 19 characters) */
121 int buf_idx = 0; /* Index into buffer */
122 int buf_len = 0; /* counts only starting characters */
123 int buf_en = 0; /* Set for name buffering */
124 int *attr_end = attr_buf+attr_size;
125 int *attr = attr_buf;
126 int c; /* Current character */
127 int ofst = 0; /* record length after we've stopped buffering */
128 int (*getoctet)(P *) = line->b->o.charmap->type ? utfoctet : octetutf;
130 memset(&utfstate, 0, sizeof(utfstate));
133 /* Get next character */
134 while((c = getoctet(line)) != NO_MORE_DATA) {
135 struct high_cmd *cmd, *kw_cmd;
138 /* Expand attribute array if necessary */
140 attr_buf = realloc(attr_buf,sizeof(int)*(attr_size*2));
141 attr = attr_buf + attr_size;
143 attr_end = attr_buf + attr_size;
146 /* Advance to next attribute position (note attr[-1] below) */
150 /* Loop while noeat */
152 /* Color with current state */
154 /* Get command for this character */
156 /* Determine new state */
157 if (cmd->keywords && (cmd->ignore ?
158 (kw_cmd = htfind(cmd->keywords, joe_strtolower(buf))) :
159 (kw_cmd = htfind(cmd->keywords, buf)))) {
162 /* Recolor keyword */
163 for (x = -(buf_len + 1); x < -1; ++x)
164 attr[x - ofst] = h->color;
168 /* Recolor if necessary */
170 while (&attr[x] < attr_buf)
173 attr[x++] = h->color;
175 /* Start buffering? */
176 if (cmd->start_buffering) {
183 /* Stop buffering? */
184 if (cmd->stop_buffering)
188 /* Save character in buffer */
190 ofst += utfstate.first;
191 else if (buf_idx < 19) {
194 buf_len += utfstate.first;
200 /* Return new state number */
204 /* Subroutines for load_dfa() */
206 static struct high_state *find_state(struct high_syntax *syntax, const unsigned char *name)
209 struct high_state *state;
212 for(x=0;x!=syntax->nstates;++x)
213 if(!strcmp(syntax->states[x]->name,name))
216 /* It doesn't exist, so create it */
217 if(x==syntax->nstates) {
219 state=malloc(sizeof(struct high_state));
220 state->name=(const unsigned char *)strdup((const char *)name);
221 state->no=syntax->nstates;
222 state->color=FG_WHITE;
224 /* We're the first state */
225 syntax->default_cmd.new_state = state;
226 if(syntax->nstates==syntax->szstates)
227 syntax->states=realloc(syntax->states,sizeof(struct high_state *)*(syntax->szstates*=2));
228 syntax->states[syntax->nstates++]=state;
229 for(y=0; y!=256; ++y)
230 state->cmd[y] = &syntax->default_cmd;
232 state = syntax->states[x];
236 /* Load syntax file */
238 struct high_syntax *syntax_list;
240 struct high_syntax *load_dfa(const unsigned char *name)
242 unsigned char buf[1024];
243 unsigned char bf[256];
244 unsigned char bf1[256];
249 struct high_state *state=0; /* Current state */
250 struct high_syntax *syntax; /* New syntax table */
259 attr_buf = malloc(sizeof(int)*attr_size);
262 /* Find syntax table */
264 /* Already loaded? */
265 for(syntax=syntax_list;syntax;syntax=syntax->next)
266 if(!strcmp(syntax->name,name))
270 p = (unsigned char *)getenv("HOME");
272 joe_snprintf_2((char *)buf,sizeof(buf),"%s/.jupp/syntax/%s.jsf",p,name);
273 f = fopen((char *)buf,"r");
276 if (!f && has_JOERC) {
277 joe_snprintf_2((char *)buf,sizeof(buf),"%ssyntax/%s.jsf",get_JOERC,name);
278 f = fopen((char *)buf,"r");
284 syntax = calloc(1, sizeof(struct high_syntax));
285 syntax->name = (const unsigned char *)strdup((const char *)name);
286 syntax->next = syntax_list;
287 syntax_list = syntax;
288 syntax->states = malloc(sizeof(struct high_state *)*(syntax->szstates=64));
289 syntax->sync_lines = 120;
291 memset(clist, 0, sizeof(clist));
294 while(fgets((char *)buf,1023,f)) {
298 if(!parse_char(&p, ':')) {
299 if(!parse_ident(&p, bf, 255)) {
301 state = find_state(syntax,bf);
304 if(!parse_ident(&p,bf,255)) {
305 struct high_color *color;
306 for(color=syntax->color;color;color=color->next)
307 if(!strcmp(color->name,bf))
310 state->color=color->color;
313 fprintf(stderr,"%s:%d: Unknown class '%s'\n", name, line, bf);
316 fprintf(stderr,"%s:%d: Missing color for state definition\n", name, line);
318 fprintf(stderr,"%s:%d: Missing state name\n", name, line);
319 } else if(!parse_char(&p, '=')) {
320 if(!parse_ident(&p, bf, 255)) {
321 struct high_color *color;
324 for(color=syntax->color;color;color=color->next)
325 if(!strcmp(color->name,bf))
327 /* If it doesn't exist, create it */
329 color = calloc(1, sizeof(struct high_color));
330 color->name = (unsigned char *)strdup((char *)bf);
331 color->next = syntax->color;
332 syntax->color = color;
334 fprintf(stderr,"%s:%d: Class '%s' already defined\n", name, line, bf);
337 /* Parse color definition */
338 while(parse_ws(&p,'#'), !parse_ident(&p,bf,255)) {
339 color->color |= meta_color(bf);
342 } else if(!parse_char(&p, '-')) { /* No. sync lines */
343 syntax->sync_lines = (int)ustolb(p, &np,
344 INT_MIN, INT_MAX, USTOL_TRIM);
346 syntax->sync_lines = -1;
350 c = parse_ws(&p,'#');
353 } else if (c=='"' || c=='*') {
355 struct high_cmd *cmd;
356 if(!parse_field(&p, US "*")) {
361 c = parse_string(&p, bf, 255);
363 fprintf(stderr,"%s:%d: Bad string\n", name, line);
367 unsigned char *t = bf;
370 while(!parse_range(&t, &first, &second)) {
379 cmd = calloc(1, sizeof(struct high_cmd));
381 if(!parse_ident(&p,bf,255)) {
383 cmd->new_state = find_state(syntax,bf);
386 while (parse_ws(&p,'#'), !parse_ident(&p,bf,255))
387 if(!strcmp(bf,"buffer")) {
388 cmd->start_buffering = 1;
389 } else if(!strcmp(bf,"hold")) {
390 cmd->stop_buffering = 1;
391 } else if(!strcmp(bf,"recolor")) {
393 if(!parse_char(&p,'=')) {
395 cmd->recolor = (int)ustolb(p, &np,
396 INT_MIN, INT_MAX, USTOL_TRIM);
398 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
402 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
403 } else if(!strcmp(bf,"strings") || !strcmp(bf,"istrings")) {
406 while(fgets((char *)buf,1023,f)) {
411 if(!parse_field(&p,US "done"))
413 if(!parse_string(&p,bf,255)) {
417 if(!parse_ident(&p,bf1,255)) {
418 struct high_cmd *kw_cmd = calloc(1, sizeof(struct high_cmd));
420 kw_cmd->new_state = find_state(syntax,bf1);
422 cmd->keywords = htmk(64);
423 htadd(cmd->keywords,(unsigned char *)strdup((char *)bf),kw_cmd);
424 while (parse_ws(&p,'#'), !parse_ident(&p,bf,255))
425 if(!strcmp(bf,"buffer")) {
426 kw_cmd->start_buffering = 1;
427 } else if(!strcmp(bf,"hold")) {
428 kw_cmd->stop_buffering = 1;
429 } else if(!strcmp(bf,"recolor")) {
431 if(!parse_char(&p,'=')) {
433 kw_cmd->recolor = (int)ustolb(p, &np,
434 INT_MIN, INT_MAX, USTOL_TRIM);
436 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
440 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
442 fprintf(stderr,"%s:%d: Unknown option '%s'\n", name, line, bf);
444 fprintf(stderr,"%s:%d: Missing state name\n", name, line);
446 fprintf(stderr,"%s:%d: Missing string\n", name, line);
449 } else if(!strcmp(bf,"noeat")) {
451 } else if(!strcmp(bf,"mark")) {
452 /* not implemented yet */ ;
453 } else if(!strcmp(bf,"markend")) {
454 /* not implemented yet */ ;
455 } else if(!strcmp(bf,"recolormark")) {
456 /* not implemented yet */ ;
458 fprintf(stderr,"%s:%d: Unknown option '%s'\n", name, line, bf);
460 /* Install command */
465 fprintf(stderr,"%s:%d: Missing jump\n", name, line);
467 fprintf(stderr,"%s:%d: No state\n", name, line);
469 fprintf(stderr,"%s:%d: Unknown character\n", name, line);