use split CVS repo address, like in dietlibc
[alioth/jupp.git] / syntax.c
1 /*
2  *      Syntax highlighting DFA interpreter
3  *      Copyright
4  *              (C) 2004 Joseph H. Allen
5  *
6  *      This file is part of JOE (Joe's Own Editor)
7  */
8
9 #include "config.h"
10 #include "types.h"
11
12 __RCSID("$MirOS: contrib/code/jupp/syntax.c,v 1.24 2018/01/07 23:51:35 tg Exp $");
13
14 #include <stdlib.h>
15 #include <string.h>
16 #include "b.h"
17 #include "scrn.h"
18 #include "utils.h"
19 #include "hash.h"
20 #include "path.h"
21 #include "charmap.h"
22 #include "syntax.h"
23
24 static struct {
25         unsigned char buf[7];
26         unsigned char start;
27         unsigned char limit;
28         unsigned eaten : 1;
29         unsigned ebbed : 1;
30         unsigned unget : 1;
31         unsigned first : 1;
32 } utfstate;
33
34 static int
35 utfoctet(P *p)
36 {
37         int c;
38
39         utfstate.first = 0;
40         if (utfstate.eaten) {
41  ate:
42                 if (utfstate.start < utfstate.limit)
43                         return (utfstate.buf[utfstate.start++]);
44                 if (utfstate.ebbed)
45                         return (NO_MORE_DATA);
46                 utfstate.eaten = utfstate.limit = 0;
47         }
48         if (!utfstate.limit) {
49                 utfstate.first = 1;
50                 if (utfstate.unget) {
51                         c = utfstate.buf[utfstate.start];
52                         utfstate.unget = 0;
53                 } else
54                         c = pgetb(p);
55                 if ((c == NO_MORE_DATA) || (c < 0x80))
56                         return (c);
57                 if ((c < 0xC2) || (c >= 0xFE))
58                         return (0xFF);
59                 utfstate.start = 0;
60                 utfstate.buf[utfstate.start++] = (unsigned char)c;
61                 utfstate.limit = (c < 0xE0) ? 2 : (c < 0xF0) ? 3 :
62                     (c < 0xF8) ? 4 : (c < 0xFC) ? 5 : 6;
63         }
64         while (utfstate.start < utfstate.limit) {
65                 if (((c = pgetb(p)) == NO_MORE_DATA) || ((c ^ 0x80) > 0x3F)) {
66                         /* invalid follow byte, invalidate all previous ones */
67                         utfstate.limit = 0;
68                         while (utfstate.limit < utfstate.start)
69                                 utfstate.buf[utfstate.limit++] = 0xFF;
70                         /* append this as ungetch unless the well is dry */
71                         if (c == NO_MORE_DATA)
72                                 utfstate.ebbed = 1;
73                         else {
74                                 utfstate.buf[utfstate.limit] = (unsigned char)c;
75                                 utfstate.unget = 1;
76                         }
77                         /* now return those bytes */
78                         break;
79                 }
80                 utfstate.buf[utfstate.start++] = (unsigned char)c;
81         }
82         utfstate.start = 0;
83         utfstate.eaten = 1;
84         goto ate;
85 }
86
87 static int
88 octetutf(P *p)
89 {
90         int c;
91
92         utfstate.first = 0;
93         if (!(utfstate.start < utfstate.limit)) {
94                 if ((c = pgetb(p)) == NO_MORE_DATA)
95                         return (NO_MORE_DATA);
96
97                 utfstate.limit = utf8_encode(utfstate.buf,
98                     to_uni(p->b->o.charmap, c));
99                 utfstate.start = 0;
100                 utfstate.first = 1;
101         }
102         return (utfstate.buf[utfstate.start++]);
103 }
104
105 /* Parse one line.  Returns new state.
106    'syntax' is the loaded syntax definition for this buffer.
107    'line' is advanced to start of next line.
108    Global array 'attr_buf' end up with coloring for each character of line.
109    'state' is initial parser state for the line (0 is initial state).
110 */
111
112 int *attr_buf = 0;
113 int attr_size = 0;
114
115 int parse(struct high_syntax *syntax, P *line, int state)
116 {
117         struct high_state *h = syntax->states[state];
118                         /* Current state */
119         unsigned char buf[20];  /* Name buffer (trunc after 19 characters) */
120         int buf_idx = 0;        /* Index into buffer */
121         int buf_len = 0;        /* counts only starting characters */
122         int buf_en = 0;         /* Set for name buffering */
123         int *attr_end = attr_buf+attr_size;
124         int *attr = attr_buf;
125         int c;                  /* Current character */
126         int ofst = 0;   /* record length after we've stopped buffering */
127         int (*getoctet)(P *) = line->b->o.charmap->type ? utfoctet : octetutf;
128
129         memset(&utfstate, 0, sizeof(utfstate));
130         buf[0] = 0;
131
132         /* Get next character */
133         while((c = getoctet(line)) != NO_MORE_DATA) {
134                 struct high_cmd *cmd, *kw_cmd;
135                 int x;
136
137                 /* Expand attribute array if necessary */
138                 if(attr==attr_end) {
139                         attr_buf = realloc(attr_buf,
140                             sizeof(int) * (attr_size * 2));
141                         attr = attr_buf + attr_size;
142                         attr_size *= 2;
143                         attr_end = attr_buf + attr_size;
144                 }
145
146                 /* Advance to next attribute position (note attr[-1] below) */
147                 if (utfstate.first)
148                         attr++;
149
150                 /* Loop while noeat */
151                 do {
152                         /* Color with current state */
153                         attr[-1] = h->color;
154                         /* Get command for this character */
155                         cmd = h->cmd[c];
156                         /* Determine new state */
157                         if (cmd->keywords && (cmd->ignore ?
158                             (kw_cmd = htfind(cmd->keywords, joe_strtolower(buf))) :
159                             (kw_cmd = htfind(cmd->keywords, buf)))) {
160                                 cmd = kw_cmd;
161                                 h = cmd->new_state;
162                                 /* Recolor keyword */
163                                 for (x = -(buf_len + 1); x < -1; ++x)
164                                         attr[x - ofst] = h->color;
165                         } else {
166                                 h = cmd->new_state;
167                         }
168                         /* Recolor if necessary */
169                         x = cmd->recolor;
170                         while (&attr[x] < attr_buf)
171                                 ++x;
172                         while (x < 0)
173                                 attr[x++] = h->color;
174
175                         /* Start buffering? */
176                         if (cmd->start_buffering) {
177                                 buf_idx = 0;
178                                 buf_len = 0;
179                                 buf_en = 1;
180                                 ofst = 0;
181                         }
182
183                         /* Stop buffering? */
184                         if (cmd->stop_buffering)
185                                 buf_en = 0;
186                 } while(cmd->noeat);
187
188                 /* Save character in buffer */
189                 if (!buf_en)
190                         ofst += utfstate.first;
191                 else if (buf_idx < 19) {
192                         buf[buf_idx++] = c;
193                         buf[buf_idx] = 0;
194                         buf_len += utfstate.first;
195                 }
196
197                 if (c == '\n')
198                         break;
199         }
200         /* Return new state number */
201         return h->no;
202 }
203
204 /* Subroutines for load_dfa() */
205
206 static struct high_state *find_state(struct high_syntax *syntax, const unsigned char *name)
207 {
208         int x;
209         struct high_state *state;
210
211         /* Find state */
212         for(x=0;x!=syntax->nstates;++x)
213                 if(!strcmp(syntax->states[x]->name,name))
214                         break;
215
216         /* It doesn't exist, so create it */
217         if(x==syntax->nstates) {
218                 int y;
219                 state = malloc(sizeof(struct high_state));
220                 state->name=(const unsigned char *)strdup((const char *)name);
221                 state->no=syntax->nstates;
222                 state->color=FG_WHITE;
223                 if(!syntax->nstates)
224                         /* We're the first state */
225                         syntax->default_cmd.new_state = state;
226                 if(syntax->nstates==syntax->szstates)
227                         syntax->states = realloc(syntax->states,
228                            sizeof(struct high_state *) * (syntax->szstates *= 2));
229                 syntax->states[syntax->nstates++]=state;
230                 for(y=0; y!=256; ++y)
231                         state->cmd[y] = &syntax->default_cmd;
232         } else
233                 state = syntax->states[x];
234         return state;
235 }
236
237 /* Load syntax file */
238
239 struct high_syntax *syntax_list;
240
241 struct high_syntax *load_dfa(const unsigned char *name)
242 {
243         unsigned char buf[1024];
244         unsigned char bf[256];
245         unsigned char bf1[256];
246         int clist[256];
247         unsigned char *p;
248         int c;
249         FILE *f = NULL;
250         struct high_state *state=0;     /* Current state */
251         struct high_syntax *syntax;     /* New syntax table */
252         int line = 0;
253         void *np;
254
255         if (!name)
256                 return NULL;
257
258         if (!attr_buf)
259                 attr_buf = ralloc((size_t)(attr_size = 1024), sizeof(int));
260
261         /* Find syntax table */
262
263         /* Already loaded? */
264         for(syntax=syntax_list;syntax;syntax=syntax->next)
265                 if(!strcmp(syntax->name,name))
266                         return syntax;
267
268         /* Load it */
269         p = (unsigned char *)getenv("HOME");
270         if (p) {
271                 joe_snprintf_2((char *)buf,sizeof(buf),"%s/.jupp/syntax/%s.jsf",p,name);
272                 f = fopen((char *)buf,"r");
273         }
274
275         if (!f && has_JOERC) {
276                 joe_snprintf_2((char *)buf,sizeof(buf),"%ssyntax/%s.jsf",get_JOERC,name);
277                 f = fopen((char *)buf,"r");
278         }
279         if(!f)
280                 return 0;
281
282         /* Create new one */
283         syntax = calloc(1, sizeof(struct high_syntax));
284         syntax->name = (const unsigned char *)strdup((const char *)name);
285         syntax->next = syntax_list;
286         syntax_list = syntax;
287         syntax->states = ralloc((size_t)(syntax->szstates = 64),
288             sizeof(struct high_state *));
289         syntax->sync_lines = 120;
290
291         memset(clist, 0, sizeof(clist));
292
293         /* Parse file */
294         while(fgets((char *)buf,1023,f)) {
295                 ++line;
296                 p = buf;
297                 parse_ws(&p,'#');
298                 if(!parse_char(&p, ':')) {
299                         if(!parse_ident(&p, bf, 255)) {
300
301                                 state = find_state(syntax,bf);
302
303                                 parse_ws(&p,'#');
304                                 if(!parse_ident(&p,bf,255)) {
305                                         struct high_color *color;
306                                         for(color=syntax->color;color;color=color->next)
307                                                 if(!strcmp(color->name,bf))
308                                                         break;
309                                         if(color)
310                                                 state->color=color->color;
311                                         else {
312                                                 state->color=0;
313                                                 fprintf(stderr,"%s:%d: Unknown class '%s'\n", name, line, bf);
314                                         }
315                                 } else
316                                         fprintf(stderr,"%s:%d: Missing color for state definition\n", name, line);
317                         } else
318                                 fprintf(stderr,"%s:%d: Missing state name\n", name, line);
319                 } else if(!parse_char(&p, '=')) {
320                         if(!parse_ident(&p, bf, 255)) {
321                                 struct high_color *color;
322
323                                 /* Find color */
324                                 for(color=syntax->color;color;color=color->next)
325                                         if(!strcmp(color->name,bf))
326                                                 break;
327                                 /* If it doesn't exist, create it */
328                                 if(!color) {
329                                         color = calloc(1, sizeof(struct high_color));
330                                         color->name = (unsigned char *)strdup((char *)bf);
331                                         color->next = syntax->color;
332                                         syntax->color = color;
333                                 } else {
334                                         fprintf(stderr,"%s:%d: Class '%s' already defined\n", name, line, bf);
335                                 }
336
337                                 /* Parse color definition */
338                                 while(parse_ws(&p,'#'), !parse_ident(&p,bf,255)) {
339                                         color->color |= meta_color(bf);
340                                 }
341                         }
342                 } else if(!parse_char(&p, '-')) { /* No. sync lines */
343                         syntax->sync_lines = (int)ustolb(p, &np,
344                             INT_MIN, INT_MAX, USTOL_TRIM);
345                         if (!np)
346                                 syntax->sync_lines = -1;
347                         else
348                                 p = np;
349                 } else {
350                         c = parse_ws(&p,'#');
351
352                         if (!c) {
353                         } else if (c=='"' || c=='*') {
354                                 if (state) {
355                                         struct high_cmd *cmd;
356                                         if (!parse_field(&p, UC "*")) {
357                                                 int z;
358                                                 for(z=0;z!=256;++z)
359                                                         clist[z] = 1;
360                                         } else {
361                                                 c = parse_string(&p, bf, 255);
362                                                 if(c)
363                                                         fprintf(stderr,"%s:%d: Bad string\n", name, line);
364                                                 else {
365                                                         int z;
366                                                         int first, second;
367                                                         unsigned char *t = bf;
368                                                         for(z=0;z!=256;++z)
369                                                                 clist[z] = 0;
370                                                         while(!parse_range(&t, &first, &second)) {
371                                                                 if(first>second)
372                                                                         second = first;
373                                                                 while(first<=second)
374                                                                         clist[first++] = 1;
375                                                         }
376                                                 }
377                                         }
378                                         /* Create command */
379                                         cmd = calloc(1, sizeof(struct high_cmd));
380                                         parse_ws(&p,'#');
381                                         if(!parse_ident(&p,bf,255)) {
382                                                 int z;
383                                                 cmd->new_state = find_state(syntax,bf);
384
385                                                 /* Parse options */
386                                                 while (parse_ws(&p,'#'), !parse_ident(&p,bf,255))
387                                                         if(!strcmp(bf,"buffer")) {
388                                                                 cmd->start_buffering = 1;
389                                                         } else if(!strcmp(bf,"hold")) {
390                                                                 cmd->stop_buffering = 1;
391                                                         } else if(!strcmp(bf,"recolor")) {
392                                                                 parse_ws(&p,'#');
393                                                                 if(!parse_char(&p,'=')) {
394                                                                         parse_ws(&p,'#');
395                                                                         cmd->recolor = (int)ustolb(p, &np,
396                                                                             INT_MIN, INT_MAX, USTOL_TRIM);
397                                                                         if (!np)
398                                                                                 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
399                                                                         else
400                                                                                 p = np;
401                                                                 } else
402                                                                         fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
403                                                         } else if(!strcmp(bf,"strings") || !strcmp(bf,"istrings")) {
404                                                                 if (bf[0]=='i')
405                                                                         cmd->ignore = 1;
406                                                                 while(fgets((char *)buf,1023,f)) {
407                                                                         ++line;
408                                                                         p = buf;
409                                                                         parse_ws(&p,'#');
410                                                                         if (*p) {
411                                                                                 if (!parse_field(&p, UC "done"))
412                                                                                         break;
413                                                                                 if(!parse_string(&p,bf,255)) {
414                                                                                         parse_ws(&p,'#');
415                                                                                         if (cmd->ignore)
416                                                                                                 joe_strtolower(bf);
417                                                                                         if(!parse_ident(&p,bf1,255)) {
418                                                                                                 struct high_cmd *kw_cmd = calloc(1, sizeof(struct high_cmd));
419                                                                                                 kw_cmd->noeat=1;
420                                                                                                 kw_cmd->new_state = find_state(syntax,bf1);
421                                                                                                 if(!cmd->keywords)
422                                                                                                         cmd->keywords = htmk(64);
423                                                                                                 htadd(cmd->keywords,(unsigned char *)strdup((char *)bf),kw_cmd);
424                                                                                                 while (parse_ws(&p,'#'), !parse_ident(&p,bf,255))
425                                                                                                         if(!strcmp(bf,"buffer")) {
426                                                                                                                 kw_cmd->start_buffering = 1;
427                                                                                                         } else if(!strcmp(bf,"hold")) {
428                                                                                                                 kw_cmd->stop_buffering = 1;
429                                                                                                         } else if(!strcmp(bf,"recolor")) {
430                                                                                                                 parse_ws(&p,'#');
431                                                                                                                 if(!parse_char(&p,'=')) {
432                                                                                                                         parse_ws(&p,'#');
433                                                                                                                         kw_cmd->recolor = (int)ustolb(p, &np,
434                                                                                                                             INT_MIN, INT_MAX, USTOL_TRIM);
435                                                                                                                         if (!np)
436                                                                                                                                 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
437                                                                                                                         else
438                                                                                                                                 p = np;
439                                                                                                                 } else
440                                                                                                                         fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
441                                                                                                         } else
442                                                                                                                 fprintf(stderr,"%s:%d: Unknown option '%s'\n", name, line, bf);
443                                                                                         } else
444                                                                                                 fprintf(stderr,"%s:%d: Missing state name\n", name, line);
445                                                                                 } else
446                                                                                         fprintf(stderr,"%s:%d: Missing string\n", name, line);
447                                                                         }
448                                                                 }
449                                                         } else if(!strcmp(bf,"noeat")) {
450                                                                 cmd->noeat = 1;
451                                                         } else if(!strcmp(bf,"mark")) {
452                                                                 /* not implemented yet */ ;
453                                                         } else if(!strcmp(bf,"markend")) {
454                                                                 /* not implemented yet */ ;
455                                                         } else if(!strcmp(bf,"recolormark")) {
456                                                                 /* not implemented yet */ ;
457                                                         } else
458                                                                 fprintf(stderr,"%s:%d: Unknown option '%s'\n", name, line, bf);
459
460                                                 /* Install command */
461                                                 for(z=0;z!=256;++z)
462                                                         if(clist[z])
463                                                                 state->cmd[z]=cmd;
464                                         } else
465                                                 fprintf(stderr,"%s:%d: Missing jump\n", name, line);
466                                 } else
467                                         fprintf(stderr,"%s:%d: No state\n", name, line);
468                         } else
469                                 fprintf(stderr,"%s:%d: Unknown character\n", name, line);
470                 }
471         }
472
473         fclose(f);
474
475         return syntax;
476 }