/* fetchbib: generate a Makefile that will automatically fetch the bibliography data from various databases. Version 2020-03-18. Copyright 2015 Dmitri Pavlov. Distributed under the terms of GNU General Public License version 3. */ #include #include #include #include #include #include #define preamble "\tset -o pipefail; set -e; " int line = 1, column = 0; void kprintf(const char *file, int sline, const char *func, const char *format, ...) { putchar('\n'); fflush(stdout); fprintf(stderr, "Error: source file %s, line %d, function %s (line %d, column %d in the input file):\n", file, sline, func, line, column); va_list ap; va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); fprintf(stderr, "\n"); exit(1); } #define die(...) kprintf(__FILE__, __LINE__, __func__, __VA_ARGS__) #define sdie(s) (errno != 0 ? die("system error: function %s, error %s", s, strerror(errno)) : (void)0) int get(void) { int c = getchar(); if (errno != 0) sdie("getchar"); if (c == '\n') { line++; column = 0; } else if (c != EOF) column++; if (c == EOF || c == '\n' || (c >= 32 && c < 127) || c >= 160) return c; die("invalid character: %d = %c", c, c); } int get_char(void) { int c = get(); if (c == EOF) die("unexpected end of file"); return c; } int get_word(char *s) { int n = 0; while (1) { int c = get(); if (c == EOF) if (n == 0) return 0; else die("unexpected end of file"); if (c == ' ' || c == '\n') if (n == 0) die("word expected"); else { s[n] = 0; return c == '\n' ? 1 : 2; } s[n++] = c; } } void quot(char *s) { for (int k = 0; s[k] != 0; k++) { putchar(s[k]); if (s[k] == '\'') printf("\"'\"'"); } } int main(int argc, char *argv[]) { int ignorezb = 0; if (argc == 2 && strcmp(argv[1], "-z") == 0) ignorezb = 1; else if (argc != 1) { fprintf(stderr, "Usage: %s [ -z ]\n -z: ignore zbMATH entries\n", argv[0]); return 1; } int verbose = 0; char all[1 << 12] = ""; while (1) { char s[256]; switch (get_word(s)) { case 0: printf("refs.bib:%s\n" preamble "cat $+ >prelim.$@; mv prelim.$@ $@\n", all); return 0; case 1: die("no data"); case 2: if (verbose) fprintf(stderr, "> %s\n", s); char t[256], target[256] = "", san[256]; int k; for (k = 0; s[k] != 0; k++) san[k] = s[k] == ':' ? '.' : s[k]; san[k] = 0; char mr[64] = "", zbl[64] = "", arxiv[64] = "", doi[64] = "", gen[64] = "", url[64] = "", data[64] = ""; while (1) { int status = get_word(t); if (status == 0) die("unexpected end of file"); if (verbose) fprintf(stderr, ">>%d %s\n", status, t); if (strncmp(t, "MR", 2) == 0) { printf("%s:\n" preamble "curl -v -f -s -S https://mathscinet.ams.org/mathscinet/search/publications.html?fmt=bibtex\\&pg1=MR\\&s1=%s | sed -n '1,/
/d;/<\\/pre>/,$$d;p' | sed -n '/@article\\|@book\\|@incollection\\|@inproceedings/,$$p' | sed '1s/{.*//;$$d' | tr '\\n' '\\t' | sed 's/\\t             //g' | tr '\\t' '\\n' | sed 's/^ *\\([a-zA-Z0-9]* = \\)/\\L\\1/' >prelim.$@; mv prelim.$@ $@\n", t, t + 2);
          strcpy(mr, t);
          strcat(target, " ");
          strcat(target, mr);
        } else if (strncmp(t, "Zbl:", 4) == 0) {
          printf("Zbl.%s:\n" preamble "zcurl() { data=\"$$(curl -f -s -S \"$$@\")\"; while [[ $$data == *captcha* ]]; do id=\"$$(printf '%%s\\n' \"$$data\" | grep captcha_id | sed 's/.*value=\"\\([^\"]*\\)\".*/\\1/')\"; show \"https://zbmath.org/captcha/$$id\"; echo Enter CAPTCHA:; read -r captcha; data=\"$$(curl -f -s -S -F captcha_id=\"$$id\" -F captcha_solution=\"$$captcha\" \"$$@\")\"; done; printf '%%s\\n' \"$$data\"; }; zcurl https://zbmath.org/bibtex/$$(zcurl https://zbmath.org/?q=an:%s | sed -n 's@.*bibtex/\\([^\"]*\\)\".*@\\1@gp') | sed '1s/{.*//;$$d' | sed '$$s/$$/,/' | sed 's/^ *\\([a-zA-Z0-9]* = \\)/\\L\\1/;s/@[a-zA-Z0-9]*$$/\\L&/' >prelim.$@; mv prelim.$@ $@\n", t + 4, t + 4);
          strcpy(zbl, "Zbl.");
          strcat(zbl, t + 4);
          if (!ignorezb) {
            strcat(target, " ");
            strcat(target, zbl);
          }
        } else if (strncmp(t, "arXiv:", 6) == 0) {
          char u[64];
          int k;
          for (k = 0; t[k] != 0; k++)
            u[k] = t[k] == ':' || t[k] == '/' ? '.' : t[k];
          u[k] = 0;
          printf("%s:\n" preamble "data=`curl -v -f -s -S http://export.arxiv.org/api/query?id_list=%s | tr \\\\\\n \\  | sed '"
                 "s@<[^>]*/>@\\n&\\n@g;s@<[^/][^>]*>@\\n&@g;s@]*>@&\\n@g' | sed -n '"
                 "s@^$$@archiveprefix = {arXiv},@p;"
                 "s@^http://arxiv.org/abs/\\(.*\\)$$@eprint = {\\1},@p;"
                 "s@^\\(.*\\)$$@title = {{\\1}},@p;"
                 "s@^\\(.*\\)$$@author:\\1@p;"
                 "s@^\\(....\\).*$$@year = {\\1},@p;"
                 "s@^\\(.*\\)$$@doi = {\\1},@p;"
                 "s@^$$@primaryclass = {{\\1}},@p;"
                 "'`; "
                 "echo \"$$data\"; "
                 "{ printf '@misc\\nauthor = {'; "
                 "echo \"$$data\" | sed -n 's@^author:@@p' | tr \\\\\\n \\\\\\f | sed 's/\\f$$//' | sed 's/\\f/ and /g'; "
                 "echo '},'; "
                 "echo \"$$data\" | grep -v '^author:'; } >prelim.$@; mv prelim.$@ $@\n", u, t + 6);
          strcpy(arxiv, u);
          strcat(target, " ");
          strcat(target, arxiv);
        } else if (strncmp(t, "doi:", 4) == 0) {
          char u[256];
          int k;
          for (k = 0; t[k] != 0; k++)
            u[k] = t[k] == ':' || t[k] == '/' || t[k] == '(' || t[k] == ')' ? '.' : t[k];
          u[k] = 0;
          printf("%s:\n" preamble "curl -v -f -s -S -LH 'Accept: application/x-bibtex' 'https://doi.org/%s' | grep -v '^}$$' | tr \\\\\\n \\\\\\f | sed 's/\\f\\t\\t//g' | tr \\\\\\f \\\\\\n | sed '1s/{.*//;s/}$$/},/' | sed 's/^\\t//g' >prelim.$@; mv prelim.$@ $@\n", u, t + 4);
          strcpy(doi, u);
          strcat(target, " ");
          strcat(target, doi);
        } else if (strncmp(t, "gen:", 4) == 0) {
          printf("gen.%s:\n" preamble "echo 'url = {http://gen.lib.rus.ec/book/index.php?md5=%s},' >prelim.$@; mv prelim.$@ $@\n", t + 4, t + 4);
          printf("\tcurl -v -f -I -s -S 'http://gen.lib.rus.ec/book/index.php?md5=%s' >/dev/null\n", t + 4);
          strcpy(gen, "gen.");
          strcat(gen, t + 4);
          strcat(target, " ");
          strcat(target, gen);
        } else if (strncmp(t, "http://", 7) == 0 || strncmp(t, "https://", 8) == 0) {
          printf("url.%s:\n" preamble "echo 'url = {", san);
          for (int k = 0; t[k] != 0; k++)
            if (t[k] == '%')
              printf("\\%%");
            else
              putchar(t[k]);
          printf("},' >prelim.$@; mv prelim.$@ $@\n");
          printf("\tcurl -v -f -I -s -S '%s' >/dev/null\n", t);
          strcpy(url, "url.");
          strcat(url, san);
          strcat(target, " ");
          strcat(target, url);
        } else {
          strcpy(data, "data.");
          strcat(data, san);
          strcat(target, " ");
          strcat(target, data);
          printf("data.%s:\n" preamble "{ echo 'author = {", san);
          while (1) {
            int y = strlen(t);
            switch (t[y - 1]) {
            case ',':
              t[y - 1] = 0;
              quot(t);
              printf(" and ");
              break;
            case ':':
              t[y - 1] = 0;
              quot(t);
              printf("},'; ");
              goto title;
            default:
              quot(t);
              printf(" ");
              break;
            }
            status = get_word(t);
            if (status != 2)
              die("expected author name");
          }
        title:
          printf("echo 'title = {{");
          int c;
          while ((c = get_char()) != '\n') {
            putchar(c);
            if (c == '\'')
              printf("\"'\"'");
          }
          printf("}},'; } >prelim.$@; mv prelim.$@ $@\n");
          break;
        }
        if (status == 1)
          break;
      }
      int psrc = 0, esrc = 0;
      printf("coll.%s:%s\n" preamble "{ ", san, target);
      if (strcmp(mr, "") != 0)
        printf("sed 1q <%s; ", mr);
      else if (strcmp(zbl, "") != 0 && !ignorezb)
        printf("sed 1q <%s; ", zbl);
      else if (strcmp(doi, "") != 0)
        printf("sed 1q <%s; ", doi);
      else if (strcmp(arxiv, "") != 0)
        printf("sed 1q <%s; ", arxiv);
      else
        printf("echo @misc; ");
      printf("echo {%s,; ", s);
      if (strcmp(url, "") != 0)
        printf("cat %s; ", url);
      if (strcmp(mr, "") != 0)
        printf("sed 1d <%s | sed 's/mrnumber = {\\([0-9]*\\) .*}/mrnumber = {\\1}/;/author/s/\\\\\"./{&}/g;/author/s/\\\\u\\\\i /{&}/g' | grep -v 'url = {http://dx.doi.org/'; ", mr), psrc++, esrc++;
      if (strcmp(zbl, "") != 0) {
      	psrc++;
      	if (!ignorezb) {
      	  esrc++;
          printf("sed 1d <%s; ", zbl);
        }
      }
      if (strcmp(doi, "") != 0)
        printf("sed 1d <%s | grep -v 'url = {https://doi.org/'; ", doi), psrc++, esrc++;
      if (strcmp(arxiv, "") != 0)
        printf("sed 1d <%s; ", arxiv), psrc++, esrc++;
      if (strcmp(gen, "") != 0)
        printf("cat %s; ", gen);
      if (strcmp(data, "") != 0)
        printf("cat %s; ", data);
      printf("echo }; } | nl -nln -w1 -s ' ' | sort -u -t ' ' -k2,2 | sort -n -k1 -t ' ' | cut -d ' ' -f2- >prelim.$@; mv prelim.$@ $@\n");
      strcat(all, " coll.");
      strcat(all, san);
      if (psrc > 0 && esrc == 0)
      	die("Not enough primary sources to generate an entry for %s (i.e., -z is specified and only a zbMATH id is given)", s);
    }
  }
}