/* texuni, a program to convert TeX's accented characters to Unicode. Version 2015-10-17. Copyright 2015 Dmitri Pavlov. Distributed under the terms of GNU General Public License version 3. */ #include #include #include #include struct { char c; const char *s; } accents[] = { {'`', " ̀"}, {'\'', " ́"}, {'^', " ̂"}, {'"', " ̈"}, {'~', " ̃"}, {'=', " ̄"}, {'.', " ̇"}, {'u', " ̆"}, {'v', " ̌"}, {'H', " ̋"}, {'t', " ͡"}, {'c', " ̧"}, {'d', " ̣"}, {'b', " ̱"}, }; struct { const char *c, *s; } sletters[] = { {"oe", "œ"}, {"OE", "Œ"}, {"ae", "æ"}, {"AE", "Æ"}, {"aa", "å"}, {"AA", "Å"}, {"o", "ø"}, {"O", "Ø"}, {"l", "ł"}, {"L", "Ł"}, {"ss", "ß"}, {"i", "i"}, {"j", "j"}, /* The following does not work with accents: {"i", "ı"}, {"j", "ȷ"}, */ }; int line = 1, column = 0; void die(const char *s) { fprintf(stderr, "line %d, column %d: %s\n", line, column, s); exit(1); } int get(void) { int c = getchar(); if (ferror(stdin)) { perror("getchar"); exit(1); } if (c == '\n') { column = 0; line++; } else if (c != EOF) column++; return c; } int sget(void) { int c = get(); if (c == EOF) die("unexpected end of file"); return c; } void put(int c) { if (putchar(c) == EOF) { perror("putchar"); exit(1); } fflush(stdout); } void putstr(const char *s) { if (fputs(s, stdout) == EOF) { perror("fputs"); exit(1); } } int main(void) { int c; while ((c = get()) != EOF) if (c != '\\' && c != '{') { put(c); } else { int accent = -1, curly = 0; if (c == '{') { curly++; c = sget(); if (c != '\\') { put('{'); put(c); continue; } } c = sget(); int k; for (k = 0; k < sizeof(accents) / sizeof(accents[0]); k++) if (accents[k].c == c) { int d = get(); if (isalpha(c) && isalpha(d)) { if (curly) put('{'); put('\\'); put(c); put(d); goto next; } if (d == ' ') while ((d = sget()) == ' ') ; if (d == '{') { d = sget(); if (d == '\\') { accent = k; c = sget(); curly++; break; } put(d); if (sget() != '}') die("expected }"); } else if (d == '\\') { accent = k; c = sget(); break; } else put(d); putstr(accents[k].s + 1); if (curly) { c = sget(); if (c != '}') die("expected }"); } goto next; } char seq[3] = {c, 0, 0}; int d = sget(); if (isalpha(d)) { seq[1] = d; d = sget(); } if (isalpha(d)) { if (curly) put('{'); put('\\'); put(c); if (seq[1] != 0) put(seq[1]); put(d); goto next; } if (d == '{') { d = sget(); curly++; } for (k = 0; k < sizeof(sletters) / sizeof(sletters[0]); k++) if (strcmp(sletters[k].c, seq) == 0) { if (curly && d != '}') die("expected }"); putstr(sletters[k].s); if (accent >= 0) putstr(accents[accent].s + 1); if (d == '}' && curly == 2) d = sget(); if (!isspace(d) && !curly) put(d); goto next; } if (curly) put('{'); put('\\'); put(c); if (seq[1] != 0) put(seq[1]); put(d); next: ; } return 0; }