#include typedef struct { char *a, *b; } Rule; static int rules_len; static Rule rules[0x100]; static char dict[0x8000], *dict_ = dict; static char prog[0x1000], *prog_ = prog; static char outp[0x1000], *outp_ = outp; static char *regs[0x100]; #define spacer(c) (c == ' ' || c == '(' || c == ')') static char * walk(char *s) { char c; int depth = 0; if(s[0] == '(') { while((c = *s++)) { if(c == '(') depth++; if(c == ')') --depth; if(!depth) return s; } } while(!spacer(s[0]) && (c = *s++)) ; return s; } static char * match(char *p, Rule *r) { char c, *a = r->a, *b = p; while((c = *a)) { if(c == '?') regs[(int)*(++a)] = b, a++, b = walk(b), c = *b; if(!a[0]) return b; if(c != *b) return NULL; a++, b++; } return b; } static void writereg(char r) { int depth = 0; char c, *s = regs[(int)r]; if(s[0] == '(') { while((c = *s++)) { if(c == '(') depth++; *outp_++ = c; if(c == ')') --depth; if(!depth) return; } } while(!spacer(s[0]) && (*outp_++ = *s++)) ; return; } static void save(void) { int i, end = outp_ - outp; /* todo: change pointer instead of copying memory */ for(i = 0; i <= end; i++) prog[i] = outp[i]; prog_ = prog, outp_ = outp; printf(".. %s\n", prog); } static int rewrite(void) { char c, *p = prog; while((c = *p)) { int i; for(i = 0; i < rules_len; i++) { Rule *r = &rules[i]; char *res = match(p, r); if(res != NULL) { char cc, *b = r->b; while((cc = *b++)) { if(cc == '?') writereg(*b++); else *outp_++ = cc; } while((*outp_++ = *res++)) ; *outp_++ = 0; save(); return 1; } } *outp_++ = c; p++; } *outp_++ = 0; return 0; } static void print_rules(void) { int i; for(i = 0; i < rules_len; i++) printf("Rule #%d: %s -> %s\n", i, rules[i].a, rules[i].b); printf("\n"); } static char * parse_rulefrag(FILE *f) { int depth = 0, trim = 0; char c, *origin = dict_; while(f && fread(&c, 1, 1, f) && c && c != 0xa) { if(c == ' ' && !trim) continue; trim = 1; if(c == '(') { depth++; if(depth == 1) continue; } if(c == ')') { --depth; if(depth == 0) continue; } if(c == ' ' && !depth) break; *dict_++ = c; } *dict_++ = 0; return origin; } static void tokenize(char *t, FILE *f) { char c; if(!t[0]) return; if(t[0] == '<' && t[1] == '>') { Rule *r = &rules[rules_len++]; r->a = parse_rulefrag(f), r->b = parse_rulefrag(f); return; } while((c = *t++)) *prog_++ = c; *prog_++ = ' '; while(f && fread(&c, 1, 1, f) && c) *prog_++ = c == 0xa ? ' ' : c; } static int parse(char *path) { FILE *f; char c, token[0x40], *token_; if(!(f = fopen(path, "r"))) return !printf("Invalid file: %s\n", path); token_ = token; while(f && fread(&c, 1, 1, f)) { if(c < 0x21) *token_++ = 0x00, tokenize(token, f), token_ = token; else if(token_ - token < 0x3f) *token_++ = c; else return printf("Token too long: %s\n", token); } *token_++ = 0x00, tokenize(token, f), token_ = token; fclose(f); return 1; } int main(int argc, char **argv) { if(argc < 2) return !printf("usage: modal [-v] source.modal\n"); if(argc < 3 && argv[1][0] == '-' && argv[1][1] == 'v') return !printf("Modal - Modal Interpreter, 3 Apr 2024.\n"); parse(argv[1]); print_rules(); printf(".. %s\n", prog); while(rewrite()) ; return 0; }