1 /* $MirOS: contrib/code/jupp/syntax.c,v 1.16 2017/01/11 22:04:33 tg Exp $ */
3 * Syntax highlighting DFA interpreter
5 * (C) 2004 Joseph H. Allen
7 * This file is part of JOE (Joe's Own Editor)
40 if (utfstate.start < utfstate.limit)
41 return (utfstate.buf[utfstate.start++]);
43 return (NO_MORE_DATA);
44 utfstate.eaten = utfstate.limit = 0;
46 if (!utfstate.limit) {
49 c = utfstate.buf[utfstate.start];
53 if ((c == NO_MORE_DATA) || (c < 0x80))
55 if ((c < 0xC2) || (c >= 0xFE))
58 utfstate.buf[utfstate.start++] = (unsigned char)c;
59 utfstate.limit = (c < 0xE0) ? 2 : (c < 0xF0) ? 3 :
60 (c < 0xF8) ? 4 : (c < 0xFC) ? 5 : 6;
62 while (utfstate.start < utfstate.limit) {
63 if (((c = pgetb(p)) == NO_MORE_DATA) || ((c ^ 0x80) > 0x3F)) {
64 /* invalid follow byte, invalidate all previous ones */
66 while (utfstate.limit < utfstate.start)
67 utfstate.buf[utfstate.limit++] = 0xFF;
68 /* append this as ungetch unless the well is dry */
69 if (c == NO_MORE_DATA)
72 utfstate.buf[utfstate.limit] = (unsigned char)c;
75 /* now return those bytes */
78 utfstate.buf[utfstate.start++] = (unsigned char)c;
91 if (!(utfstate.start < utfstate.limit)) {
92 if ((c = pgetb(p)) == NO_MORE_DATA)
93 return (NO_MORE_DATA);
95 utfstate.limit = utf8_encode(utfstate.buf,
96 to_uni(p->b->o.charmap, c));
100 return (utfstate.buf[utfstate.start++]);
103 /* Parse one line. Returns new state.
104 'syntax' is the loaded syntax definition for this buffer.
105 'line' is advanced to start of next line.
106 Global array 'attr_buf' end up with coloring for each character of line.
107 'state' is initial parser state for the line (0 is initial state).
113 int parse(struct high_syntax *syntax, P *line, int state)
115 struct high_state *h = syntax->states[state];
117 unsigned char buf[20]; /* Name buffer (trunc after 19 characters) */
118 int buf_idx = 0; /* Index into buffer */
119 int buf_len = 0; /* counts only starting characters */
120 int buf_en = 0; /* Set for name buffering */
121 int *attr_end = attr_buf+attr_size;
122 int *attr = attr_buf;
123 int c; /* Current character */
124 int ofst = 0; /* record length after we've stopped buffering */
125 int (*getoctet)(P *) = line->b->o.charmap->type ? utfoctet : octetutf;
127 memset(&utfstate, 0, sizeof(utfstate));
130 /* Get next character */
131 while((c = getoctet(line)) != NO_MORE_DATA) {
132 struct high_cmd *cmd, *kw_cmd;
135 /* Expand attribute array if necessary */
137 attr_buf = realloc(attr_buf,sizeof(int)*(attr_size*2));
138 attr = attr_buf + attr_size;
140 attr_end = attr_buf + attr_size;
143 /* Advance to next attribute position (note attr[-1] below) */
147 /* Loop while noeat */
149 /* Color with current state */
151 /* Get command for this character */
153 /* Determine new state */
154 if (cmd->keywords && (cmd->ignore ?
155 (kw_cmd = htfind(cmd->keywords, joe_strtolower(buf))) :
156 (kw_cmd = htfind(cmd->keywords, buf)))) {
159 /* Recolor keyword */
160 for (x = -(buf_len + 1); x < -1; ++x)
161 attr[x - ofst] = h->color;
165 /* Recolor if necessary */
167 while (&attr[x] < attr_buf)
170 attr[x++] = h->color;
172 /* Start buffering? */
173 if (cmd->start_buffering) {
180 /* Stop buffering? */
181 if (cmd->stop_buffering)
185 /* Save character in buffer */
187 ofst += utfstate.first;
188 else if (buf_idx < 19) {
191 buf_len += utfstate.first;
197 /* Return new state number */
201 /* Subroutines for load_dfa() */
203 static struct high_state *find_state(struct high_syntax *syntax, const unsigned char *name)
206 struct high_state *state;
209 for(x=0;x!=syntax->nstates;++x)
210 if(!strcmp(syntax->states[x]->name,name))
213 /* It doesn't exist, so create it */
214 if(x==syntax->nstates) {
216 state=malloc(sizeof(struct high_state));
217 state->name=(const unsigned char *)strdup((const char *)name);
218 state->no=syntax->nstates;
219 state->color=FG_WHITE;
221 /* We're the first state */
222 syntax->default_cmd.new_state = state;
223 if(syntax->nstates==syntax->szstates)
224 syntax->states=realloc(syntax->states,sizeof(struct high_state *)*(syntax->szstates*=2));
225 syntax->states[syntax->nstates++]=state;
226 for(y=0; y!=256; ++y)
227 state->cmd[y] = &syntax->default_cmd;
229 state = syntax->states[x];
233 /* Load syntax file */
235 struct high_syntax *syntax_list;
237 struct high_syntax *load_dfa(const unsigned char *name)
239 unsigned char buf[1024];
240 unsigned char bf[256];
241 unsigned char bf1[256];
246 struct high_state *state=0; /* Current state */
247 struct high_syntax *syntax; /* New syntax table */
255 attr_buf = malloc(sizeof(int)*attr_size);
258 /* Find syntax table */
260 /* Already loaded? */
261 for(syntax=syntax_list;syntax;syntax=syntax->next)
262 if(!strcmp(syntax->name,name))
266 p = (unsigned char *)getenv("HOME");
268 joe_snprintf_2((char *)buf,sizeof(buf),"%s/.jupp/syntax/%s.jsf",p,name);
269 f = fopen((char *)buf,"r");
272 if (!f && has_JOERC) {
273 joe_snprintf_2((char *)buf,sizeof(buf),"%ssyntax/%s.jsf",get_JOERC,name);
274 f = fopen((char *)buf,"r");
280 syntax = calloc(1, sizeof(struct high_syntax));
281 syntax->name = (const unsigned char *)strdup((const char *)name);
282 syntax->next = syntax_list;
283 syntax_list = syntax;
284 syntax->states = malloc(sizeof(struct high_state *)*(syntax->szstates=64));
285 syntax->sync_lines = 120;
287 memset(clist, 0, sizeof(clist));
290 while(fgets((char *)buf,1023,f)) {
294 if(!parse_char(&p, ':')) {
295 if(!parse_ident(&p, bf, 255)) {
297 state = find_state(syntax,bf);
300 if(!parse_ident(&p,bf,255)) {
301 struct high_color *color;
302 for(color=syntax->color;color;color=color->next)
303 if(!strcmp(color->name,bf))
306 state->color=color->color;
309 fprintf(stderr,"%s:%d: Unknown class '%s'\n", name, line, bf);
312 fprintf(stderr,"%s:%d: Missing color for state definition\n", name, line);
314 fprintf(stderr,"%s:%d: Missing state name\n", name, line);
315 } else if(!parse_char(&p, '=')) {
316 if(!parse_ident(&p, bf, 255)) {
317 struct high_color *color;
320 for(color=syntax->color;color;color=color->next)
321 if(!strcmp(color->name,bf))
323 /* If it doesn't exist, create it */
325 color = calloc(1, sizeof(struct high_color));
326 color->name = (unsigned char *)strdup((char *)bf);
327 color->next = syntax->color;
328 syntax->color = color;
330 fprintf(stderr,"%s:%d: Class '%s' already defined\n", name, line, bf);
333 /* Parse color definition */
334 while(parse_ws(&p,'#'), !parse_ident(&p,bf,255)) {
335 color->color |= meta_color(bf);
338 } else if(!parse_char(&p, '-')) { /* No. sync lines */
339 if(parse_int(&p, &syntax->sync_lines))
340 syntax->sync_lines = -1;
342 c = parse_ws(&p,'#');
345 } else if (c=='"' || c=='*') {
347 struct high_cmd *cmd;
348 if(!parse_field(&p, US "*")) {
353 c = parse_string(&p, bf, 255);
355 fprintf(stderr,"%s:%d: Bad string\n", name, line);
359 unsigned char *t = bf;
362 while(!parse_range(&t, &first, &second)) {
371 cmd = calloc(1, sizeof(struct high_cmd));
373 if(!parse_ident(&p,bf,255)) {
375 cmd->new_state = find_state(syntax,bf);
378 while (parse_ws(&p,'#'), !parse_ident(&p,bf,255))
379 if(!strcmp(bf,"buffer")) {
380 cmd->start_buffering = 1;
381 } else if(!strcmp(bf,"hold")) {
382 cmd->stop_buffering = 1;
383 } else if(!strcmp(bf,"recolor")) {
385 if(!parse_char(&p,'=')) {
387 if(parse_int(&p,&cmd->recolor))
388 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
390 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
391 } else if(!strcmp(bf,"strings") || !strcmp(bf,"istrings")) {
394 while(fgets((char *)buf,1023,f)) {
399 if(!parse_field(&p,US "done"))
401 if(!parse_string(&p,bf,255)) {
405 if(!parse_ident(&p,bf1,255)) {
406 struct high_cmd *kw_cmd = calloc(1, sizeof(struct high_cmd));
408 kw_cmd->new_state = find_state(syntax,bf1);
410 cmd->keywords = htmk(64);
411 htadd(cmd->keywords,(unsigned char *)strdup((char *)bf),kw_cmd);
412 while (parse_ws(&p,'#'), !parse_ident(&p,bf,255))
413 if(!strcmp(bf,"buffer")) {
414 kw_cmd->start_buffering = 1;
415 } else if(!strcmp(bf,"hold")) {
416 kw_cmd->stop_buffering = 1;
417 } else if(!strcmp(bf,"recolor")) {
419 if(!parse_char(&p,'=')) {
421 if(parse_int(&p,&kw_cmd->recolor))
422 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
424 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
426 fprintf(stderr,"%s:%d: Unknown option '%s'\n", name, line, bf);
428 fprintf(stderr,"%s:%d: Missing state name\n", name, line);
430 fprintf(stderr,"%s:%d: Missing string\n", name, line);
433 } else if(!strcmp(bf,"noeat")) {
435 } else if(!strcmp(bf,"mark")) {
436 /* not implemented yet */ ;
437 } else if(!strcmp(bf,"markend")) {
438 /* not implemented yet */ ;
439 } else if(!strcmp(bf,"recolormark")) {
440 /* not implemented yet */ ;
442 fprintf(stderr,"%s:%d: Unknown option '%s'\n", name, line, bf);
444 /* Install command */
449 fprintf(stderr,"%s:%d: Missing jump\n", name, line);
451 fprintf(stderr,"%s:%d: No state\n", name, line);
453 fprintf(stderr,"%s:%d: Unknown character\n", name, line);