add a CVS snapshot, to thoroughly test on the Debian side
[alioth/jupp.git] / syntax.c
1 /* $MirOS: contrib/code/jupp/syntax.c,v 1.15 2017/01/10 20:55:03 tg Exp $ */
2 /*
3  *      Syntax highlighting DFA interpreter
4  *      Copyright
5  *              (C) 2004 Joseph H. Allen
6  *
7  *      This file is part of JOE (Joe's Own Editor)
8  */
9
10 #include "config.h"
11 #include <stdlib.h>
12 #include <string.h>
13 #include "b.h"
14 #include "types.h"
15 #include "scrn.h"
16 #include "utils.h"
17 #include "hash.h"
18 #include "path.h"
19 #include "charmap.h"
20 #include "syntax.h"
21
22 static struct {
23         unsigned char buf[6];
24         unsigned char start;
25         unsigned char limit;
26         unsigned eaten : 1;
27         unsigned ebbed : 1;
28         unsigned unget : 1;
29         unsigned first : 1;
30 } utfstate;
31
32 static int
33 utfoctet(P *p)
34 {
35         int c;
36
37         utfstate.first = 0;
38         if (utfstate.eaten) {
39  ate:
40                 if (utfstate.start < utfstate.limit)
41                         return (utfstate.buf[utfstate.start++]);
42                 if (utfstate.ebbed)
43                         return (NO_MORE_DATA);
44                 utfstate.eaten = utfstate.limit = 0;
45         }
46         if (!utfstate.limit) {
47                 utfstate.first = 1;
48                 if (utfstate.unget) {
49                         c = utfstate.buf[utfstate.start];
50                         utfstate.unget = 0;
51                 } else
52                         c = pgetb(p);
53                 if ((c == NO_MORE_DATA) || (c < 0x80))
54                         return (c);
55                 if ((c < 0xC2) || (c >= 0xFE))
56                         return (0xFF);
57                 utfstate.start = 0;
58                 utfstate.buf[utfstate.start++] = (unsigned char)c;
59                 utfstate.limit = (c < 0xE0) ? 2 : (c < 0xF0) ? 3 :
60                     (c < 0xF8) ? 4 : (c < 0xFC) ? 5 : 6;
61         }
62         while (utfstate.start < utfstate.limit) {
63                 if (((c = pgetb(p)) == NO_MORE_DATA) || ((c ^ 0x80) > 0x3F)) {
64                         /* invalid follow byte, invalidate all previous ones */
65                         utfstate.limit = 0;
66                         while (utfstate.limit < utfstate.start)
67                                 utfstate.buf[utfstate.limit++] = 0xFF;
68                         /* append this as ungetch unless the well is dry */
69                         if (c == NO_MORE_DATA)
70                                 utfstate.ebbed = 1;
71                         else {
72                                 utfstate.buf[utfstate.limit] = (unsigned char)c;
73                                 utfstate.unget = 1;
74                         }
75                         /* now return those bytes */
76                         break;
77                 }
78                 utfstate.buf[utfstate.start++] = (unsigned char)c;
79         }
80         utfstate.start = 0;
81         utfstate.eaten = 1;
82         goto ate;
83 }
84
85 static int
86 octetutf(P *p)
87 {
88         int c;
89
90         utfstate.first = 0;
91         if (!(utfstate.start < utfstate.limit)) {
92                 if ((c = pgetb(p)) == NO_MORE_DATA)
93                         return (NO_MORE_DATA);
94
95                 utfstate.limit = utf8_encode(utfstate.buf,
96                     to_uni(p->b->o.charmap, c));
97                 utfstate.start = 0;
98                 utfstate.first = 1;
99         }
100         return (utfstate.buf[utfstate.start++]);
101 }
102
103 /* Parse one line.  Returns new state.
104    'syntax' is the loaded syntax definition for this buffer.
105    'line' is advanced to start of next line.
106    Global array 'attr_buf' end up with coloring for each character of line.
107    'state' is initial parser state for the line (0 is initial state).
108 */
109
110 int *attr_buf = 0;
111 int attr_size = 0;
112
113 int parse(struct high_syntax *syntax, P *line, int state)
114 {
115         struct high_state *h = syntax->states[state];
116                         /* Current state */
117         unsigned char buf[20];  /* Name buffer (trunc after 19 characters) */
118         int buf_idx = 0;        /* Index into buffer */
119         int buf_len = 0;        /* counts only starting characters */
120         int buf_en = 0;         /* Set for name buffering */
121         int *attr_end = attr_buf+attr_size;
122         int *attr = attr_buf;
123         int c;                  /* Current character */
124         int ofst = 0;   /* record length after we've stopped buffering */
125         int (*getoctet)(P *) = line->b->o.charmap->type ? utfoctet : octetutf;
126
127         memset(&utfstate, 0, sizeof(utfstate));
128         buf[0] = 0;
129
130         /* Get next character */
131         while((c = getoctet(line)) != NO_MORE_DATA) {
132                 struct high_cmd *cmd, *kw_cmd;
133                 int x;
134
135                 /* Expand attribute array if necessary */
136                 if(attr==attr_end) {
137                         attr_buf = realloc(attr_buf,sizeof(int)*(attr_size*2));
138                         attr = attr_buf + attr_size;
139                         attr_size *= 2;
140                         attr_end = attr_buf + attr_size;
141                 }
142
143                 /* Advance to next attribute position (note attr[-1] below) */
144                 if (utfstate.first)
145                         attr++;
146
147                 /* Loop while noeat */
148                 do {
149                         /* Color with current state */
150                         attr[-1] = h->color;
151                         /* Get command for this character */
152                         cmd = h->cmd[c];
153                         /* Determine new state */
154                         if (cmd->keywords && (cmd->ignore ?
155                             (kw_cmd = htfind(cmd->keywords, joe_strtolower(buf))) :
156                             (kw_cmd = htfind(cmd->keywords, buf)))) {
157                                 cmd = kw_cmd;
158                                 h = cmd->new_state;
159                                 /* Recolor keyword */
160                                 for (x = -(buf_len + 1); x < -1; ++x)
161                                         attr[x - ofst] = h->color;
162                         } else {
163                                 h = cmd->new_state;
164                         }
165                         /* Recolor if necessary */
166                         x = cmd->recolor;
167                         while (&attr[x] < attr_buf)
168                                 ++x;
169                         while (x < 0)
170                                 attr[x++] = h->color;
171
172                         /* Start buffering? */
173                         if (cmd->start_buffering) {
174                                 buf_idx = 0;
175                                 buf_len = 0;
176                                 buf_en = 1;
177                                 ofst = 0;
178                         }
179
180                         /* Stop buffering? */
181                         if (cmd->stop_buffering)
182                                 buf_en = 0;
183                 } while(cmd->noeat);
184
185                 /* Save character in buffer */
186                 if (!buf_en)
187                         ofst += utfstate.first;
188                 else if (buf_idx < 19) {
189                         buf[buf_idx++] = c;
190                         buf[buf_idx] = 0;
191                         buf_len += utfstate.first;
192                 }
193
194                 if (c == '\n')
195                         break;
196         }
197         /* Return new state number */
198         return h->no;
199 }
200
201 /* Subroutines for load_dfa() */
202
203 static struct high_state *find_state(struct high_syntax *syntax, const unsigned char *name)
204 {
205         int x;
206         struct high_state *state;
207
208         /* Find state */
209         for(x=0;x!=syntax->nstates;++x)
210                 if(!strcmp(syntax->states[x]->name,name))
211                         break;
212
213         /* It doesn't exist, so create it */
214         if(x==syntax->nstates) {
215                 int y;
216                 state=malloc(sizeof(struct high_state));
217                 state->name=(const unsigned char *)strdup((const char *)name);
218                 state->no=syntax->nstates;
219                 state->color=FG_WHITE;
220                 if(!syntax->nstates)
221                         /* We're the first state */
222                         syntax->default_cmd.new_state = state;
223                 if(syntax->nstates==syntax->szstates)
224                         syntax->states=realloc(syntax->states,sizeof(struct high_state *)*(syntax->szstates*=2));
225                 syntax->states[syntax->nstates++]=state;
226                 for(y=0; y!=256; ++y)
227                         state->cmd[y] = &syntax->default_cmd;
228         } else
229                 state = syntax->states[x];
230         return state;
231 }
232
233 /* Load syntax file */
234
235 struct high_syntax *syntax_list;
236
237 struct high_syntax *load_dfa(const unsigned char *name)
238 {
239         unsigned char buf[1024];
240         unsigned char bf[256];
241         unsigned char bf1[256];
242         int clist[256];
243         unsigned char *p;
244         int c;
245         FILE *f = NULL;
246         struct high_state *state=0;     /* Current state */
247         struct high_syntax *syntax;     /* New syntax table */
248         int line = 0;
249
250         if (!name)
251                 return NULL;
252
253         if(!attr_buf) {
254                 attr_size = 1024;
255                 attr_buf = malloc(sizeof(int)*attr_size);
256         }
257
258         /* Find syntax table */
259
260         /* Already loaded? */
261         for(syntax=syntax_list;syntax;syntax=syntax->next)
262                 if(!strcmp(syntax->name,name))
263                         return syntax;
264
265         /* Load it */
266         p = (unsigned char *)getenv("HOME");
267         if (p) {
268                 joe_snprintf_2((char *)buf,sizeof(buf),"%s/.joe/syntax/%s.jsf",p,name);
269                 f = fopen((char *)buf,"r");
270         }
271
272         if (!f && has_JOERC) {
273                 joe_snprintf_2((char *)buf,sizeof(buf),"%ssyntax/%s.jsf",get_JOERC,name);
274                 f = fopen((char *)buf,"r");
275         }
276         if(!f)
277                 return 0;
278
279         /* Create new one */
280         syntax = calloc(1, sizeof(struct high_syntax));
281         syntax->name = (const unsigned char *)strdup((const char *)name);
282         syntax->next = syntax_list;
283         syntax_list = syntax;
284         syntax->states = malloc(sizeof(struct high_state *)*(syntax->szstates=64));
285         syntax->sync_lines = 120;
286
287         memset(clist, 0, sizeof(clist));
288
289         /* Parse file */
290         while(fgets((char *)buf,1023,f)) {
291                 ++line;
292                 p = buf;
293                 parse_ws(&p,'#');
294                 if(!parse_char(&p, ':')) {
295                         if(!parse_ident(&p, bf, 255)) {
296
297                                 state = find_state(syntax,bf);
298
299                                 parse_ws(&p,'#');
300                                 if(!parse_ident(&p,bf,255)) {
301                                         struct high_color *color;
302                                         for(color=syntax->color;color;color=color->next)
303                                                 if(!strcmp(color->name,bf))
304                                                         break;
305                                         if(color)
306                                                 state->color=color->color;
307                                         else {
308                                                 state->color=0;
309                                                 fprintf(stderr,"%s:%d: Unknown class '%s'\n", name, line, bf);
310                                         }
311                                 } else
312                                         fprintf(stderr,"%s:%d: Missing color for state definition\n", name, line);
313                         } else
314                                 fprintf(stderr,"%s:%d: Missing state name\n", name, line);
315                 } else if(!parse_char(&p, '=')) {
316                         if(!parse_ident(&p, bf, 255)) {
317                                 struct high_color *color;
318
319                                 /* Find color */
320                                 for(color=syntax->color;color;color=color->next)
321                                         if(!strcmp(color->name,bf))
322                                                 break;
323                                 /* If it doesn't exist, create it */
324                                 if(!color) {
325                                         color = calloc(1, sizeof(struct high_color));
326                                         color->name = (unsigned char *)strdup((char *)bf);
327                                         color->next = syntax->color;
328                                         syntax->color = color;
329                                 } else {
330                                         fprintf(stderr,"%s:%d: Class '%s' already defined\n", name, line, bf);
331                                 }
332
333                                 /* Parse color definition */
334                                 while(parse_ws(&p,'#'), !parse_ident(&p,bf,255)) {
335                                         color->color |= meta_color(bf);
336                                 }
337                         }
338                 } else if(!parse_char(&p, '-')) { /* No. sync lines */
339                         if(parse_int(&p, &syntax->sync_lines))
340                                 syntax->sync_lines = -1;
341                 } else {
342                         c = parse_ws(&p,'#');
343
344                         if (!c) {
345                         } else if (c=='"' || c=='*') {
346                                 if (state) {
347                                         struct high_cmd *cmd;
348                                         if(!parse_field(&p, US "*")) {
349                                                 int z;
350                                                 for(z=0;z!=256;++z)
351                                                         clist[z] = 1;
352                                         } else {
353                                                 c = parse_string(&p, bf, 255);
354                                                 if(c)
355                                                         fprintf(stderr,"%s:%d: Bad string\n", name, line);
356                                                 else {
357                                                         int z;
358                                                         int first, second;
359                                                         unsigned char *t = bf;
360                                                         for(z=0;z!=256;++z)
361                                                                 clist[z] = 0;
362                                                         while(!parse_range(&t, &first, &second)) {
363                                                                 if(first>second)
364                                                                         second = first;
365                                                                 while(first<=second)
366                                                                         clist[first++] = 1;
367                                                         }
368                                                 }
369                                         }
370                                         /* Create command */
371                                         cmd = calloc(1, sizeof(struct high_cmd));
372                                         parse_ws(&p,'#');
373                                         if(!parse_ident(&p,bf,255)) {
374                                                 int z;
375                                                 cmd->new_state = find_state(syntax,bf);
376
377                                                 /* Parse options */
378                                                 while (parse_ws(&p,'#'), !parse_ident(&p,bf,255))
379                                                         if(!strcmp(bf,"buffer")) {
380                                                                 cmd->start_buffering = 1;
381                                                         } else if(!strcmp(bf,"hold")) {
382                                                                 cmd->stop_buffering = 1;
383                                                         } else if(!strcmp(bf,"recolor")) {
384                                                                 parse_ws(&p,'#');
385                                                                 if(!parse_char(&p,'=')) {
386                                                                         parse_ws(&p,'#');
387                                                                         if(parse_int(&p,&cmd->recolor))
388                                                                                 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
389                                                                 } else
390                                                                         fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
391                                                         } else if(!strcmp(bf,"strings") || !strcmp(bf,"istrings")) {
392                                                                 if (bf[0]=='i')
393                                                                         cmd->ignore = 1;
394                                                                 while(fgets((char *)buf,1023,f)) {
395                                                                         ++line;
396                                                                         p = buf;
397                                                                         parse_ws(&p,'#');
398                                                                         if (*p) {
399                                                                                 if(!parse_field(&p,US "done"))
400                                                                                         break;
401                                                                                 if(!parse_string(&p,bf,255)) {
402                                                                                         parse_ws(&p,'#');
403                                                                                         if (cmd->ignore)
404                                                                                                 joe_strtolower(bf);
405                                                                                         if(!parse_ident(&p,bf1,255)) {
406                                                                                                 struct high_cmd *kw_cmd = calloc(1, sizeof(struct high_cmd));
407                                                                                                 kw_cmd->noeat=1;
408                                                                                                 kw_cmd->new_state = find_state(syntax,bf1);
409                                                                                                 if(!cmd->keywords)
410                                                                                                         cmd->keywords = htmk(64);
411                                                                                                 htadd(cmd->keywords,(unsigned char *)strdup((char *)bf),kw_cmd);
412                                                                                                 while (parse_ws(&p,'#'), !parse_ident(&p,bf,255))
413                                                                                                         if(!strcmp(bf,"buffer")) {
414                                                                                                                 kw_cmd->start_buffering = 1;
415                                                                                                         } else if(!strcmp(bf,"hold")) {
416                                                                                                                 kw_cmd->stop_buffering = 1;
417                                                                                                         } else if(!strcmp(bf,"recolor")) {
418                                                                                                                 parse_ws(&p,'#');
419                                                                                                                 if(!parse_char(&p,'=')) {
420                                                                                                                         parse_ws(&p,'#');
421                                                                                                                         if(parse_int(&p,&kw_cmd->recolor))
422                                                                                                                                 fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
423                                                                                                                 } else
424                                                                                                                         fprintf(stderr,"%s:%d: Missing value for option %s\n", name, line, bf);
425                                                                                                         } else
426                                                                                                                 fprintf(stderr,"%s:%d: Unknown option '%s'\n", name, line, bf);
427                                                                                         } else
428                                                                                                 fprintf(stderr,"%s:%d: Missing state name\n", name, line);
429                                                                                 } else
430                                                                                         fprintf(stderr,"%s:%d: Missing string\n", name, line);
431                                                                         }
432                                                                 }
433                                                         } else if(!strcmp(bf,"noeat")) {
434                                                                 cmd->noeat = 1;
435                                                         } else if(!strcmp(bf,"mark")) {
436                                                                 /* not implemented yet */ ;
437                                                         } else if(!strcmp(bf,"markend")) {
438                                                                 /* not implemented yet */ ;
439                                                         } else if(!strcmp(bf,"recolormark")) {
440                                                                 /* not implemented yet */ ;
441                                                         } else
442                                                                 fprintf(stderr,"%s:%d: Unknown option '%s'\n", name, line, bf);
443
444                                                 /* Install command */
445                                                 for(z=0;z!=256;++z)
446                                                         if(clist[z])
447                                                                 state->cmd[z]=cmd;
448                                         } else
449                                                 fprintf(stderr,"%s:%d: Missing jump\n", name, line);
450                                 } else
451                                         fprintf(stderr,"%s:%d: No state\n", name, line);
452                         } else
453                                 fprintf(stderr,"%s:%d: Unknown character\n", name, line);
454                 }
455         }
456
457         fclose(f);
458
459         return syntax;
460 }