#!/bin/bash
# article: a bash script to download a paper indexed by arXiv, Mathematical Reviews, Zentralblatt MATH, CrossRef, CiteSeerX, Project Euclid
# and save it under a file name like smith-brown-an-example-of-a-model-category.pdf.
# See the source code for the list of supported HTTP URLs.
# Copyright 2014--2021 Dmitri Pavlov.  Distributed under the terms of the GNU Affero General Public License version 3.
version=2025-06-25

export SED="env LC_CTYPE=C sed"

set -u -e -o pipefail

echoerr() {
  >&2 echo "$@"
}

fatal() {
  echoerr "$@"
  exit 1
}

echon() {
  if [[ ! -v quiet ]]; then echoerr "$@"; fi
}

echov() {
  if [[ -v verbose ]]; then echoerr "$@"; fi
}

syn() {
  fatal "Synopsis: $0" '[ options ] [ MR[0-9]* | Zbl:[0-9]*.[0-9]* | arXiv:[0-9]*.[0-9]* | arXiv:[-a-z]*/[0-9]* | doi:.* | isbn:[0-9-]* | cs:[0-9.]* | http://.* | https://.* ]' "
  -d directory: directory where to save the file
  -q: quiet, do not output any diagnostics
  -v: verbose, print additional diagnostics
  -i: offer an interactive choice of a full-text URL to download
  -u url: use url as a full text URL
  -f: fancy file names
  -n: dry run: print the final file name and the full text URLs, but do not download anything
  -a: extract and print abstract page URLs, do not download anything
  -e command arguments --: execute a command after a successful download
  -p pairing: use an AMS pairing key to access MathSciNet"
}

texsimp() {
  texuni | uconv -x any-nfc
}

texstrip() {
  sed 's/\\[a-zA-Z]*//g;s/[$^_{}]//g'
}

texnorm() {
  texsimp | texstrip
}

fetch() {
  echov Fetching "$@"
  cline=(curl -s -S -f)
  if [[ -v amspairing ]]; then
    case "${@:$#}" in
    *.ams.org/*)
      cline+=("-b" "amspairing=$amspairing") ;;
    esac
  fi
  set +e
  "${cline[@]}" "$@" || echov fetch "$@" failed
  set -e
}

fetchc() {
  fetch -b /dev/null "$@"
}

fetchr() {
  fetch -L "$@"
}

fetchcr() {
  fetchc -L "$@"
}

sfetch() {
  fetch -o /dev/null -I -w "%{redirect_url}\n" "$@"
}

fetchz() {
  data="$(fetch "$@")"
  while [[ $data == *captcha* ]]; do
    echon zbMATH demands a CAPTCHA, which means that no subscription is available.  Manual entry.
    id="$(printf '%s\n' "$data" | grep -a captcha_id | sed 's/.*value="\([^"]*\)".*/\1/')"
    show "https://zbmath.org/captcha/$id"
    echo Enter zbMATH CAPTCHA:
    read -r captcha
    echov Entered CAPTCHA: "$captcha"
    data="$(fetch -F captcha_id="$id" -F captcha_solution="$captcha" "$@")"
  done
  printf '%s\n' "$data"
}

retft() {
  local pdf="$1"
  echon Attempting to retrieve the full text using URL "$pdf"
  local tname="$2"

  local jar="/dev/null"
  local -a addopts=("-L")
  case "$pdf" in
  https://www.jstor.org/*)
    #jar="$(mktemp /tmp/article-XXX)"
    #echov Special cookie treatment for JSTOR
    #echov Cookie jar file: "$jar"
    #fetchcr -c "$jar" -I "$pdf"
    addopts+=("-L" "-A" 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36')
#curl -L -b /dev/null -A 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36' https://www.jstor.org/tc/verify?origin=/stable/pdf/1969099.pdf
    pdf="$pdf"'?acceptTC=true' ;;
  http*://projecteuclid.org/*)
    echov Publisher: Project Euclid
    echov Adding referrer
    addopts+=("-e" "https://projecteuclid.org") ;;
  http*://*oup.com/*)
    echov Publisher: OUP
    echov Added user agent
    addopts+=("-A" "Mozilla/5.0") ;;
  http*://*cms.math.ca/*)
    echov Publisher: CMS
    echov Adding referrer
    addopts+=("-e" "https://cms.math.ca/") ;;
  http*://libgen.pw*)
    echov Repository: Library Genesis pw
    echov Adding referrer
    addopts+=("-e" "https://libgen.pw") ;;
  http*://sci.libgen.pw*)
    echov Repository: Sci Library Genesis pw
    echov Adding insecure option
    addopts+=("-k") ;;
  http*://*eudml.org*)
    echov Repository: EuDML
    echov Adding insecure option
    addopts+=("-k") ;;
  http*://*bookfi.net*)
    echov Repository: Library Genesis bookfi
    echov Adding referrer
    addopts+=("-e" "https://bookfi.net") ;;
  #http*://*b-ok.cc*)
  #  echov Repository: Library Genesis b-ok
  #  echov Adding referrer
  #  addopts+=("-e" "https://b-ok.cc") ;;
  #http*://booksc.xyz*)
  #  echov Repository: Library Genesis BookSC
  #  echov Adding referrer
  #  addopts+=("-e" "https://booksc.xyz") ;;
  http*://*sciencedirect*)
    echov Repository: Elsevier
    echov Adding user agent
    addopts+=("-A" "Mozilla") ;;
  http*://*ams.org/*)
    echov Publisher: AMS
    if [[ -v amspairing ]]; then
      echov Adding pairing key
      jar="amspairing=$amspairing"
    else
      echov No pairing key specified
    fi ;;
  esac

  cline=(curl -b "$jar" "${addopts[@]}" "$pdf" -w '%{content_type}\n%{http_code}\n' -o "$tname")
  echov "Command line for full text download: ${cline[*]}"
  result="$("${cline[@]}")"
  type="$(echo "$result" | head -1)"
  echov Content-Type: "$type"
  code="$(echo "$result" | tail -1)"
  echov HTTP code: "$code"

  case "$code" in
  200) ;;
  401)
    echon "HTTP code 401 (Unauthorized) most likely means that you have no subscription to this resource."
    echon "Check whether you have a subscription; if you can successfully download the full text file, please email me."
    return 1 ;;
  404)
    echon "HTTP code 404 (Not Found) usually means that there is a bug in the script, unless downloading from one of Library Genesis mirrors."
    echon "Please email me the command line and the output of the script so that I can fix the bug."
    return 1 ;;
  *)
    echon "Error: HTTP code is $code, not 200, downloaded file ignored."
    echon "Check whether you have a subscription; if you can successfully download the full text file, please email me."
    return 1 ;;
  esac

  case "$type" in
  application/pdf) echon PDF file; dextn="pdf" ;;
  application/pdf*) echon PDF file from JSTOR; dextn="pdf" ;; # JSTOR server is run by incompetent people
  text/pdf) echon PDF file from CUP; dextn="pdf" ;; # CUP server is also run by incompetent people
  "multipart/form-data;charset=utf-8") echon PDF file from AIM; dextn="pdf" ;; # so is the AIM server
  application/postscript) echon PostScript; dextn="ps" ;;
  image/vnd.djvu) echon DjVu; dextn="djvu" ;;
  application/x-dvi) echon DVI; dextn="dvi" ;;
  application/x-tar) echon DVI in a TAR file; dextn="tar" ;;
  application/octet-stream*|application/download*)
    if [[ -v extn && -n "$extn" ]]; then
      echon File from Library Genesis, database-supplied extension "$extn"
      dextn="$extn"
    else
      echon "No extension supplied for application/octet-stream.  Will attempt to determine the file extension using the file type."
      return 0
    fi ;;
  *)
    echon "Unrecognized Content-Type: not PDF, PostScript, DjVu, or DVI, downloaded file ignored."
    echon "This might mean that you have no subscription to this content, because many scientific repositories use a brain-damaged way to report authorization errors."
    echon "If you do have a subscription and can successfully download and view the full text file, please email me."
    return 1 ;;
  esac
  [[ -v dextn ]] && echon Extension: "$dextn"
}

declare -A xtried

fturl() {
  if [[ -v abstract ]]; then
    echo "$1"
    return
  fi
  local url="$1"
  echon Abstract page URL: "$url"

  set +e # allow curl to fail so that doi links still get processed
  meta="$(fetchcr -k -I -A / -w "%{content_type}\n%{url_effective}\n" "$url")"
  set -e
  ctype="$(echo "$meta" | tail -2 | head -1)"
  echon Content-Type: "$ctype"  
  crurl="$(echo "$meta" | tail -1)"
  echon Completely resolved URL: "$crurl"
  case "$ctype" in
  application/pdf*)
    urls+=("$crurl")
    echov "Content-Type is application/pdf"
    echov "URL added: PDF: $crurl"
    #data="" ;;
    set +e
    data="$(fetchcr -k -A / "$url")"
    set -e ;;
  *)
    set +e
    data="$(fetchcr -k -A / "$url")"
    set -e ;;
  esac
  if [[ -v auti ]]; then if echo "$data" | grep -aEc 'doi.org/|"doi"' >/dev/null; then
    dois="$(echo "$data" | sed -n 's@.*doi.org/\([^ "<]*\).*@\1@p;s/"doi":"\([^"]*\)"/\1/p')"
    echov Extracted DOIs:
    echov "$dois"
    unset auti
    doi "$(echo "$dois" | head -1)"
    return
  fi; fi
  if echo "$data" | grep -ac citation_pdf_url >/dev/null; then
    echon 'Generic method (citation_pdf_url), e.g., AMS, EMS, Project Euclid, CUP, OUP, Springer, de Gruyter, Wiley'
    local pdf
    echo "$data" >/tmp/debugdata
    pdf="$(echo "$data" | tr \\n \  | $SED -n 's@.*\(<[^>]*citation_pdf_url[^>]*>\).*@\1@p' | $SED -n 's@.*content=[^"]*"\([^"]*\)".*@\1@p')"
    echon citation_pdf_url: "$pdf"
    case "$pdf" in
    http*://onlinelibrary.wiley.com/doi/*)
      echov Publisher: Wiley
      pdf="$(fetch "$pdf" | sed -n 's@.*id="pdfDocument" src="\([^"]*\)".*@\1@p' | sed 's/&amp;/\&/g')"
      if [[ -z "$pdf" ]]; then
        echon 'No Wiley PDF URL found; possible cause: no subscription'
        unset pdf
      else
        echov Adjusted Wiley PDF URL: "$pdf"
      fi ;;
    http*://journals.cambridge.org/*)
      echov Publisher: CUP
      pdf="$(sfetch "$pdf")""&toPdf=true"
      echov Adjusted CUP PDF URL: "$pdf" ;;
    http://*)
      echov Generic HTTP URL ;;
    https://*)
      echov Generic HTTPS URL ;;
    *)
      echov Generic relative URL
      urlbase="$(sfetch "$url" | sed -n 's@^\(http.*//[^/]*\).*@\1@p')"
      echov Base "$urlbase"
      pdf="$urlbase$pdf"
      echov Adjusted URL: "$pdf" ;;
    esac
    case "$data" in
    *"Duke Mathematical Journal"*)
      echov Switching to the nonenhanced PDF for the Duke Mathematical Journal due to an insane color scheme
      pdf="${pdf//pdfview/pdf}" ;;
    esac
    if [[ -v pdf ]]; then
      urls+=("$pdf")
      echon URL added: citation_pdf_url: "$pdf"
    fi
  fi
  if [[ -v auti ]]; then
    echov Attempting to extract title and authors from the HTML
    data="$(echo "$data" | iconv -f "$(echo "$data" | file -b --mime-encoding -)" -t utf-8 -c)"
    if echo "$data" | grep -ac citation_title >/dev/null; then
      title="$(echo "$data" | tr \\n \  | $SED -n 's@.*\(<[^>]*citation_title[^>]*>\).*@\1@p' | tee | sed -n 's@.*content=[^"]*"\([^"]*\)".*@\1@p')"
      echov citation_title: "$title"
    fi
    if echo "$data" | grep -ac citation_author >/dev/null; then
      authors="$(echo "$data" | sed -n 's@.*\(<[^>]*citation_author[^>]*>\).*@\1@p' | sed -n 's@.*content=[^"]*"\([^"]*\)".*@\1@p')"
      echov citation_author: "$authors"
      if echo "$authors" | grep -a ,; then
        authors="$(echo "$authors" | sed 's/,.*//')"
        echov Author last names before commas: "$authors"
      else
        authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')"
        echov Author last names: "$authors"
      fi
    fi
  fi
  local pdf
  case "$url" in
  http*://*doi.org/*)
    doi="${url##*doi.org/}"
    echon DOI: "$doi"
    echov DOI URL: "$url"
    rurl="$(sfetch "$url")"
    echon Resolved DOI: "$rurl" ;;
  *)
    rurl="$url" ;;
  esac
  url="$rurl"

  case "$rurl" in
  http*://mr.crossref.org/*)
    echon CrossRef fork
    links="$(fetch "$rurl" | { grep -ao "href=['\"][^'\"]*['\"]" || true; } | sed 's/href=.//;s/.$//' | { grep -aEv '^https?://.*doi.org' || true; } | uniq)"
    echon Detected links:
    echon "$links"
    for i in $links; do
      echon Recursively processing link "$i"
      fturl "$i"
    done ;;
  http*://linkinghub.elsevier.com/retrieve/pii/*)
    echov Publisher: Elsevier
    eid="${rurl##*/pii/}"
    echov Elsevier ID: "$eid"
    #pdf="$(echo "$data" | sed -n 's/.*pdfurl="\([^"]*\)".*/\1/p')"
    set +e
    pdf="$(fetch -L -A / "https://www.sciencedirect.com/science/article/pii/$eid" | grep application/json | sed 's@<script[^>]*>@@;s@</script>@@' | jq -r '.article.pdfDownload.urlMetadata|"https://www.sciencedirect.com/\(.path)/\(.pii)\(.pdfExtension)?md5=\(.queryParams.md5)&pid=\(.queryParams.pid)"')"
    #pdf="$(fetch -L -A / "https://www.sciencedirect.com/science/article/pii/$eid/pdfft" | sed -n "s@.*'\(https://pdf[^']*\)'.*@\1@p")"
    set -e
    #pdf="$(echo "$eapdata" | sed -n 's/.*pdfurl="\([^"]*\)".*/\1/p')"
    #pdf="$(echo "$eapdata" | sed -n 's@.*<a class="pdf-download-btn-link" href="\([^"]*\)".*@https://www.sciencedirect.com\1@p' | sed 's/&amp;/\&/g')"
    #pdf="$(echo "$eapdata" | sed -n 's@.*href="\(https://ac.els-cdn.com/[^"]*\)".*@\1@p' | sed 's/&amp;/\&/g')"
    if [[ -z "$pdf" ]]; then
      #echon 'No Elsevier PDF URL found.'
      #pdf="$(echo "$eapdata" | tr \\n @ | sed -n 's/.*pdf-download-link"@ *href="\([^"]*\)".*/\1/p')"
      #if [[ -z "$pdf" ]]; then
        echon 'No Elsevier PDF link found.'
        echon 'Possible cause: no subscription.'
        echon 'If you do have subscription, please email me.'
        unset pdf
      #fi
    fi
    if [[ -v pdf && $pdf == //* ]]; then
      pdf="https:$pdf"
      echov Adjusting // Elsevier URL to "$pdf"
    fi ;;
  http*://www.tandfonline.com/doi/abs/*)
    echov Publisher: Taylor and Francis
    pdf="${rurl//abs/pdf}" ;;
  http*://www.worldscientific.com/doi/abs/*)
    echov Publisher: World Scientific
    pdf="${rurl//abs/pdfplus}" ;;
  http*://epubs.siam.org/doi/abs/*)
    echov Publisher: SIAM
    pdf="${rurl//abs/pdf}" ;;
  http*://www.msp.warwick.ac.uk/*)
    echov Publisher: MSP
    pdf=http://www.msp.warwick.ac.uk"$(echo "$data" | sed -n 's/.*"\([^"]*[sp]\.pdf\)".*/\1/p' | head -1)" ;;
  http*://*journalofsing.org/*)
    echov Publisher: Journal of Singularities
    pdf="${rurl%/*}/$(fetch "$rurl" | sed -n 's@.*href="\([^"]*pdf\)" style.*@\1@p')" ;;
  http*://www.jstor.org/*)
    echov Publisher: JSTOR
    #pdf=https://www.jstor.org/stable/pdf/${doi:8}.pdf
    pdf="https://www.jstor.org/tc/verify?origin=/stable/pdf/${doi:8}.pdf" ;;
  http*://*cms.math.ca/*)
    echov Publisher: CMS
    pdf="https://cms.math.ca$(echo "$data" | sed -n 's@.*<a href="\([^"]*\)">Read article</a>.*@\1@p')" ;;
  http*://www.intlpress.com/*)
    echov Publisher: International Press
    pdf="$rurl"$(fetch "$rurl/body.html" | sed -n 's@.*"\([^"]*.pdf\)".*@\1@p') ;;
  http*://*.impan.pl/cgi-bin/doi*)
    echov Publisher: IMPAN
    pdf="${rurl//\/doi/\/pdf}"
    if [[ "${pdf: -2:1}" == "-" ]]; then
      pdf="${pdf:0:-2}-0${pdf: -1:1}"
    fi ;;
  http*://retro.seals.ch/digbib/view?rid=*)
    echov Publisher: retro.seals
    pdf="${rurl//digbib\/view?rid=/cntmng?pid=}" ;;
  # end of DOI URLs
  http*://www.numdam.org/item?id=*)
    echov Publisher: Numdam
    numdam="${url:30}"
    echov Numdam ID: "$numdam"
    pdf="http://archive.numdam.org/article/$numdam.pdf" ;;
  http*://www.numdam.org/item/*)
    echov Publisher: Numdam
    numdam="${url:27}"
    echov Numdam ID: "$numdam"
    pdf="http://archive.numdam.org/article/$numdam.pdf" ;;
  http*://*.cedram.org/item?id=*)
    echov Publisher: Cedram
    numdam="${url#*item?id=}"
    echov Numdam ID: "$numdam"
    pdf="http://archive.numdam.org/article/$numdam.pdf" ;;
  http*://eudml.org/*)
    echov Publisher: EuDML
    pdf="$(echo "$data" | sed -n "s@.*<a href='\\([^']*\\)'"' title="" target="_blank" rel="nofollow">Full (PDF)</a>.*@\1@p')"
    if [ -z "$pdf" ]; then
      link="$(echo "$data" | sed -n "s@.*<a href='\\([^']*\\)'"' title="" target="_blank" rel="nofollow">Access to full text</a>.*@\1@p')"
      echov Intermediate link: "$link"
      fturl "$link"
    fi ;;
  http*://muse.jhu.edu/*.pdf)
    echov Publisher: MUSE
    pdf="$url" ;;
  http*://www.emis.de/*abs.html)
    echov Publisher: EMIS
    pdf="${url//abs.html/.pdf}" ;;
  http*://www.emis.de/*.html)
    echov Publisher: EMIS
    pdf="${url//html/pdf}" ;;
  http*://www.digizeitschriften.de/dms/*)
    echov Publisher: DigiZeitschriften
    link="${url//resolveppn/img}"
    echov PDF page for DigiZeitschriften: "$link"
    data="$(fetch "$link")"
    pdf="$(echo "$data" | sed -n 's@.*<a href="\([^"]*\)" class="maintitle_pdf">.*@\1@p')" ;;
  http*://gdz.sub.uni-goettingen.de/*)
    echov Publisher: GDZ "$url"
    gdzppn="${url#*PPN=}"
    case "$gdzppn" in
    *DMDID=dmdlog*)
      dmd="${gdzppn#*dmdlog}"
      echov DMD: "$dmd"
      gdzppn="${gdzppn%&amp;*}"
      echov Trimmed GDZ PPN: "$gdzppn"
      pdf="https://gdz.sub.uni-goettingen.de/download/pdf/$gdzppn/LOG_$(printf %04d "$dmd").pdf" ;;
    *)  
      echov GDZ PPN: "$gdzppn"
      data="$(fetchr "$url")"
      manifest="$(echo "$data" | sed -n 's@.*data-manifest-url="\([^"]*\)".*@\1@p')"
      echov GDZ manifest URL: "$manifest"
      mdata="$(fetchr "$manifest")"
      #echov Manifest data:
      #echov "$mdata"
      pdf="$(echo "$mdata"| jq -r '.structures[] | select(.metadata[].value == "http://resolver.sub.uni-goettingen.de/purl?'"$gdzppn"'") | .rendering[]."@id"')" ;;
    esac ;;
  http*://*tac.mta.ca*)
    echov Publisher: TAC
    pdf="${url//abs.html/.pdf}" ;;
  http*://www.pnas.org/cgi/doi/*)
    echov Publisher: PNAS
    pdf="$crurl" ;;
  http*://tcms.org.ge/*)
    echov Publisher: TCMS
    volume="$(echo "$data" | sed -n 's@.*Vol. \([^(]*\)(.*@\1@p')"
    echov Volume: "$volume"
    trim="${url%/abstract.htm}"
    echov Trimmed URL: "$trim"
    stem="${trim##*/}"
    echov URL stem: "$stem"
    pdf="${trim//volumes/xvolumes}/v${volume}${stem}hl.pdf" ;;
  http*://*mathematik.uni-bielefeld.de/documenta/*)
    echov Publisher: Documenta Mathematica
    pdf="${url//html/pdf}" ;;
  http*://*iumj.indiana.edu/*)
    echov Publisher: Indiana University Mathematical Journal
    pdf=http://www.iumj.indiana.edu/"$(echo "$data" | LC_ALL=C sed -n 's@.*"\(/IUMJ/FTDLOAD/[^"]*/pdf\)".*@\1@p')" ;;
  http*://d-nb.info/*)
    echov Publisher: DNB
    pdf="$url" ;;
  *)
    echov Unknown URL "$url"
    echov If the script is unable to download the full text, please email me so that I can add support for this type of URL. ;;
  esac
  case "$crurl" in
  http*://link.springer.com/book/*)
    echon 'Publisher: Springer (book)'
    pdf="${crurl/book/content/pdf}.pdf"
    #https://link.springer.com/content/pdf/10.1007%2F978-3-319-09354-3.pdf
    #https://link.springer.com/book/10.1007%2F978-3-319-09354-3
    #echon Springer books are typically split into many individual files, which does not fit the operational model of this script.  Aborting.
  esac

  if [[ -v pdf && "$pdf" != "" ]]; then
    echon URL added: publisher: "$pdf"
    urls+=("$pdf")
  fi

  url="$1"
  case "$url" in
  http*://*doi.org/*)
    doi="${url##*doi.org/}"
    mapfile -t arxivurls < <(fetch https://export.arxiv.org/api/query --data-urlencode "search_query=doi:\"$doi\"" | xidel - -s -e "//feed/entry/link[@title='pdf']/@href" | sed '/^$/d')
    if [[ -v arxivurls ]]; then
      echon "URL added: arXiv URLs obtained using DOI:"
      echon "${arxivurls[@]}"
      urls+=("${arxivurls[@]}")
    fi

    set +e
    aaurl="$(fetchr https://annas-archive.org/scidb/"$doi"/ | rg -o 'href="([^"]*)">Download' -r '$1')"
    set -e
    if [[ -v aaurl && "$aaurl" != "" ]]; then
      echov "Anna's Archive URL: $aaurl"
      urls+=("$aaurl")
    fi

    set +e
    gsurl="$(fetchr -G http://libgen.gs/scimag/ads.php --data-urlencode "doi=$doi" | grep GET | sed -n 's@.*href="\([^"]*\)".*@\1@p')"
    set -e
    if [[ -v gsurl && "$gsurl" != "" ]]; then
      echov "LibGen.GS URL: $gsurl"
      #urls+=("http://libgen.gs/$gsurl")
      #gsurl="${gsurl/\\get.php/\/get.php}"
      gsurl="http://libgen.gs/$gsurl"
      urls+=("$gsurl")
      #extn=pdf # libgen.gs returns application/octet-stream
    fi
    ;;
  esac


    #http://libgen.gs/index.php?req=10.1017/CBO9781107325609
    #set +e
    #gsburl="$(fetchr -G http://libgen.gs/index.php --data-urlencode "req=$doi" | grep GET | sed -n 's@.*href="\([^"]*\)".*@\1@p')"
    #set -e
    #if [[ -v gsburl && "$gsburl" != "" ]]; then
    #  echov "LibGen.GS URL: $gsburl"
    #  #urls+=("http://libgen.gs/$gsburl")
    #  #gsburl="${gsburl/\\get.php/\/get.php}"
    #  gsburl="http://libgen.gs/$gsburl"
    #  urls+=("$gsburl")
    #  #extn=pdf # libgen.gs returns application/octet-stream
    #fi

    #eteka="$(fetchr -G http://eteka.info/scimag/ads.php --data-urlencode "doi=$doi" | grep -o '"http[^"]*"' | tr -d '"')"
    #if [[ -v eteka ]]; then
    #  echov "eteka.info URL: $eteka"
    #  urls+=("$eteka")
    #fi

    #set +e
    #pwid="$(fetchr -k -G https://sci.libgen.pw/search --data-urlencode "q=$doi" | sed -n 's@.*/item/detail/id/\([0-9a-fA-F]*\).*@\1@p')"
    #set -e
    #if [[ -v pwid && "$pwid" != "" ]]; then
    #  echov PW id: "$pwid"
    #  urls+=("https://sci.libgen.pw/download/sci/$pwid")
    #fi

    #set +e
    #scurl="$(fetchr -G https://booksc.org/s/ --data-urlencode "q=$doi" | grep dlButton | sed -n 's@.*href="\([^"]*\)".*@\1@p')"
    #set -e
    #if [[ -v scurl && "$scurl" != "" ]]; then
    #  echov "BookSC.org URL: $scurl"
    #  urls+=("https://booksc.org$scurl")
    #fi
    
    #set +e
    #lolurl="$(fetchr "http://library.lol/scimag/$doi" | grep GET | sed -n 's@.*href="\([^"]*\)".*@\1@p')"
    #set -e
    #if [[ -v lolurl && "$lolurl" != "" ]]; then
    #  echov "Library.LOL URL: $lolurl"
    #  urls+=("$lolurl")
    #  extn=pdf # ???
    #fi


    #set +e
    #bookscurl="$(fetchr -G https://booksc.xyz/s/ --data-urlencode "q=$doi" | sed '/fit your search query exactly but very close/,$d' | grep 'itemprop="name"' | head -1 | sed -n 's@.*href="\([^"]*\)".*@\1@p')"
    #set -e
    #if [[ -v bookscurl && "$bookscurl" != "" ]]; then
    #  echov "BookSC URL: $bookscurl"
    #  bsc="$(fetchr "https://booksc.xyz$bookscurl" | sed -n 's@.*href="\(/dl/[^"]*\)".*@\1@p' | head -1)"
    #  urls+=("https://booksc.xyz$bsc")
    #  extn="pdf"
    #  echon "URL added: Library Genesis BookSC DOI URL: $bsc with extension $extn"
    #else
    #  echon BookSC search unsuccessful.
    #fi
    
    #lgpdf="$(fetchr -G "http://booksdescr.org/scimag/" --data-urlencode "s=$doi" --data-urlencode "redirect=1" | sed 's/<h2>/\n<h2>/' | grep -a scimag/get | sed 's/.*href=".*\(http:[^"]*\)".*/\1/g' ||:)"
    #lgpdf="$(fetch -G "http://booksdescr.org/scimag/ads.php" --data-urlencode "doi=$doi" | sed 's/<h2>/\n<h2>/' | grep -a scimag/get | sed 's/.*href=".*\(http:[^"]*\)".*/\1/g' ||:)"
    #if [[ -v lgpdf && "$lgpdf" != "" ]]; then
    #  urls+=("$lgpdf")
    #  extn="pdf"
    #  echon "URL added: Library Genesis DOI URL: $lgpdf with extension $extn"
    #fi

    #lgmd5="$(fetchr -G "https://sci.booksdescr.com/search" --data-urlencode "q=$doi" | sed -n 's@.*/item/detail/id/\([0-9a-zA-Z]*\).*@\1@p')"
    #if [[ -v lgmd5 && "$lgmd5" != "" ]]; then
    #  echov Library Genesis MD5: "$lgmd5"
    #  data="$(fetchr "http://libgen.rs/book/index.php?md5=$lgmd5")"
    #  genurls "$lgmd5" "$data"
    #else
    #  set +e
    #  shub="$(fetchcr -e "https://sci-hub.tw/$doi" "https://sci-hub.tw/$doi")"
    #  set -e
    #  shuburl="$(printf '%s\n' "$shub" | sed -n 's@.*\(http[^"'\'']*\).*save.*@\1@p')"
    #  if [[ -v shuburl && "$shuburl" != "" ]]; then
    #    echon Sci-Hub URL: "$shuburl"
    #    urls+=("$shuburl")
    #    #lgid="${shubid#*/}"
    #    #echon Sci-Hub redirected to Library Genesis with extended ID "$shubid" and ordinary ID "$lgid"
    #    #lone="http://download.library1.org/main/$shubid/"
    #    #urls+=("$lone")
    #    #echon URL added: Sci-Hub redirect to Library1.org: "$lone"
    #    #genurls "$lgid" "$shub"
    #  fi 
    #fi

  echon Attempting to extract raw URLs from the abstract page "$crurl":
  mapfile -t newurls < <(echo "$data" | xidel - -s -e "(//@href, //@src)/resolve-uri(.,\"$crurl\")" | sed 's/#.*//' | grep -a pdf | grep -av "degruyter.com/flyer/\|degruyter.com/.*.toc.xml\|degruyter.com/.*.fm.xml\|ams.org/publications/\|ams.org/firefox\|endmatter\|msp.org/forms\|math.ca/Membership\|math.ca/Docs\|math.ca/.*/abstract/\|pdf-preview\|/marketing/\|\.gif$")
  for i in "${!newurls[@]}"; do
    if [[ ! ${xtried["${newurls["$i"]}"]+_} ]]; then
      vnewurls+=("${newurls[$i]}")
      xtried["${newurls["$i"]}"]=1
      echon 'Added a last resort URL extracted from the abstract page (only used in the interactive mode):'
      echon "${newurls["$i"]}"
    fi
  done
  if [[ -v vnewurls ]]; then
    if [[ -v interactive ]]; then
      urls+=("${vnewurls[@]}")
    fi
    echon Warning: some publishers link irrelevant PDFs from the abstract page, e.g., license agreements, abstracts, etc.
    echon If the script ends up downloading such an irrelevant PDF, please email me so that I can add it to the list of exceptions.
  fi
}

arXiv() {
  echon Database: arXiv "$1"
  data="$(fetch https://export.arxiv.org/api/query?id_list="$1" | tr \\n \  | $SED 's@<[^/][^>]*/>@\n&\n@g;s@<[^/][^>]*[^/>]>@\n&@g;s@</[^>]*>@&\n@g')"
  echov Processed output:
  echov "$data"
  arxiverr="$(echo "$data" | sed -n '\@^<id>https://arxiv.org/api/errors.*</id>$@{p;q1}')"
  if [[ -n "$arxiverr" ]]; then echon "$arxiverr"; fi
  id="$(echo "$data" | sed -n 's@^<id>https\?://arxiv.org/abs/\(.*\)</id>$@\1@p')"
  echov arXiv ID: "$id"
  title="$(echo "$data" | sed -n 's@^<title>\(.*\)</title>$@\1@p')"
  authors="$(echo "$data" | sed -n 's@^<name>\(.*\)</name>$@\1@p' | sed 's/.* \([^ ]*\)/\1/')"
  urls+=("https://arxiv.org/pdf/$id.pdf")
}

msn() {
  echon Database: MathSciNet "$1"
  data="$(fetch https://mathscinet.ams.org/mathscinet/search/publications.html?fmt=endnote\&pg1=MR\&s1=MR"$1" | sed -n '1,/.*<pre>/d;/.*<\/pre>/,$d;p')"
  echov EndNote:
  echov "$data"
  #data="$(echo "$data" | tr \\n \\t | sed 's/\t  //g' | tr \\t \\n)"
  title="$(echo "$data" | tr \\n @ | sed -n 's/.*@%T \([^%]*\)@ *%.*/\1/p' | tr @ \  | texnorm)"
  echov Title: "$title"
  authors="$(echo "$data" | sed -n 's/^%A //p' | sed 's/\([^,]*\),.*/\1/' | texnorm)"
  echov Authors: "$authors"

  local url
  url="$(echo "$data" | sed -n 's/^%U //p')"
  if [ -z "$url" ]; then
    echov No URL found in EndNote data, attempting to extract a URL from the HTML file
    hdata="$(fetch https://mathscinet.ams.org/mathscinet-getitem?mr=MR"$1" | sed -n "1,/.*<strong>MR0*$1<\/strong>.*/d;/.*Make Link.*/,\$d;p")"
    echov Processed output:
    echov "$hdata"
    #authors="$(echo "$hdata" | sed 's/<a href/\n<a href/g;s@</a>@</a>\n@g' | sed -n 's@<a href="/mathscinet/search/author.html?mrauthid=[^"]*">\([^<]*\)</a>@\1@p' | sed 's/\([^,]*\),.*/\1/')"
    #title="$(echo "$hdata" | tr \\n \  | sed -n 's@.*<span class="title">\([^<]*\)</span>.*@\1@p')"

    url="$(echo "$hdata" | sed -n 's@.*<a target="NEW" href="/leavingmsn?url=\([^"]*\)">\(Article\|Chapter\|Book\)</a>.*@\1@p')"
    if [ -z "$url" ]; then
      case "$data" in
      *"%@ 1201-561X"*)
        echov Journal: Theory and Applications of Categories
        volume="$(echo "$data" | sed -n 's/^%V //p')"
        number="$(echo "$data" | sed -n 's/^%P [^0-9]*\([0-9]*\),.*/\1/p')"
        echov Volume "$volume", number "$number"
        stem="$number.pdf"
        if (( volume < 10 )); then
          stem="n$stem"
          if (( volume == 1 )); then
            stem="v1$stem"
          fi
          stem="n$number/$stem"
        else
          if (( number < 10 )); then
            stem="0$stem"
          fi
          stem="$number/$volume-$stem"
        fi
        if (( volume < 6 )); then
          ((volume+=1994))
        fi
        stem="$volume/$stem"
        echov Stem "$stem"
        urls+=("http://tac.mta.ca/tac/volumes/$stem")
        return ;;
      *"%@ 1431-0635"*)
        echov Journal: Documenta Mathematica
        volume="$(echo "$data" | sed -n 's/^%V //p')"
        pages="$(echo "$data" | sed -n 's/^%P //p')"
        echov "Volume $volume, $pages"
        url="http://mathematik.uni-bielefeld.de/documenta/vol-$volume/"$(fetch "http://mathematik.uni-bielefeld.de/documenta/vol-$volume/vol-$volume.html" | tr \\n @ | sed -n "s|.* $pages@"'[^@]*<A HREF="\([^"]*\)">Abstract</A>.*|\1|p') ;;
      *)
        echon 'No full text URL supplied by MathSciNet.  Try zbMATH, sometimes it gives a full text URL when MathSciNet does not.'
        return ;;
      esac
    fi
  fi
  fturl "$url"
}


zbl() {
  echon Database: zbMATH "$1"
  data="$(fetchz https://zbmath.org/?q=an:"$1")"
  #authors="$(echo "$data" | sed 's/<a href/\n<a href/g;s@</a>@</a>\n@g' | sed -n 's@<a href="authors/?q=ai:[^"]*" title="Author Profile">\([^<]*\)</a>@\1@p' | sed 's/\([^,]*\),.*/\1/')"
  #title="$(echo "$data" | sed -n 's@.*<div class="title">\([^<]*\)<.*@\1@p')"
  #echo "$data" >/tmp/zb
  url="$(echo "$data" | sed -n 's@.*<a class="btn btn-default btn-xs" type="button" href="\([^"]*\)".*@\1@p')"
  echov zbMATH URL: "$url"
  #bibtexurl="$(echo "$data" | sed -n 's@.*<a class="btn btn-mini bib" data-container="body" type="button" href="\([^"]*\)".*@\1@p')"
  bibtexurl="https://zbmath.org/bibtex/$1.bib"
  echov zbMATH BibTeX URL: "$bibtexurl"
  data="$(fetchz "$bibtexurl")"
  echov zbMATH BibTeX:
  echov "$data"
  echo "$data" >/tmp/zb2
  #grep -aEo ' {([^{}]|({[^}]*}))*} ' | 
  authors="$(echo "$data" | sed -n 's@^ *Author = {\(.*\)},$@ \1 @p' | sed 's/^.//;s/.$//' | texsimp)"
  echov BibTeX authors: "$authors"
  title="$(echo "$data" | sed -n 's@^ *Title = {\(.*\)},$@\1@p' | texnorm)"
  echov BibTeX title: "$title"
  if [ -z "$url" ]; then
    echon 'No full text URL supplied by zbMATH.  Try MathSciNet, sometimes it gives a full text URL when zbMATH does not.'
    return
  fi
  while read -r iurl; do
    echon Trying zbMATH abstract page URL "$iurl"
    fturl "$iurl"
  done <<< "$url"
}

urldecode() {
  a="$(cat)"
  printf '%b' "${a//%/\\x}"
}

doi() {
  echon Database: CrossRef "$1"
  ##data="$(fetch -H "Accept: application/vnd.citationstyles.csl+json" "https://data.crossref.org/$1")"
  #data="$(fetch "https://doi.crossref.org/servlet/query?pid=some.address@mailinator.com&format=json&id=$1")"
  data="$(fetch "https://api.crossref.org/works/$1")"
  echov CrossRef data: "$data"
  title="$(echo "$data" | jq -r .message.title[] | tr \\n \ )"
  #title="$(echo "$data" | jq -r .created.title | tr \\n \ )"  # for servlet
  echov CrossRef raw title: "$title"
  title="$(echo "$title" | sed 's@<[^>]*>@@g')"
  echov CrossRef processed title with stripped tags: "$title"
  set +e
  authors="$(echo "$data" | jq -r .message.author[].family)"
  #authors="$(echo "$data" | jq -r .author[].family)"   # for servlet
  set -e
  echov CrossRef authors: "$authors"
  if [[ ! -v stoprecursion ]]; then
    stoprecursion=1 
    fturl "https://doi.org/$1"
    unset stoprecursion
  fi
}

cs() {
  echon Database: CiteSeerX "$1"
  data="$(fetch "http://citeseerx.ist.psu.edu/oai2?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:CiteSeerX.psu:$1")"
  title="$(echo "$data" | sed -n 's@.*<dc:title>\(.*\)</dc:title>.*@\1@p')"
  authors="$(echo "$data" | sed -n 's@.*<dc:creator>\(.*\)</dc:creator>.*@\1@gp' | sed 's/.* \([^ ]*\)/\1/')"
  urls+=("http://citeseerx.ist.psu.edu/viewdoc/download?doi=$1&rep=rep1&type=pdf")
}

pe() {
  echon Database: Project Euclid "$1"
  eucit="$(fetch https://projecteuclid.org/citation/download -H 'Content-Type: application/json; charset=utf-8' --data '{"contentType":"0","formatType":"0","referenceType":"","urlid":"'"$1"'"}')"
  echov Project Euclid internal citation id: "$eucit"
  data="$(fetch "https://projecteuclid.org/citation/download/$eucit")"
  #data="$(fetch https://projecteuclid.org/export_citations --data format=ris --data-urlencode "h=$1")"
  echov Project Euclid bibliographic data:
  echov "$data"
  title="$(echo "$data" | sed -n 's/^TI  - //p')"
  authors="$(echo "$data" | sed -n 's/^AU  - //p' | sed 's/.* \([^ ]*\)/\1/')"
  urls+=("https://projecteuclid.org/download/pdf_1/$1")
}

eudml() {
  echon Database: EuDML "$1"
  data="$(fetch -k "https://eudml.org/api/rest/urn:$1?format=oai_dc")"
  echov EuDML data:
  echov "$data"
  title="$(echo "$data" | sed -n 's|.*>\([^>]*\)</dc:title>.*|\1|p' | head -1)"
  authors="$(echo "$data" | sed -n 's|.*>\([^>]*\)</dc:creator>.*|\1|p')"
  if echo "$authors" | grep -ac , >/dev/null; then
    authors="$(echo "$authors" | sed 's/\([^ ,]*\).*/\1/')"
  else
    authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')"
  fi
  fturl "https://eudml.org/doc/${1##eudml:doc:}"
}

numdam() {
  echon Database: Numdam "$1"
  data="$(fetch "https://www.numdam.org/oai/" --data-urlencode "verb=GetRecord" --data-urlencode "metadataPrefix=oai_dc" --data-urlencode "identifier=oai:numdam.org:$1")"
  echov Numdam data:
  echov "$data"
  title="$(echo "$data" | sed -n 's|.*>\([^>]*\)</dc:title>.*|\1|p' | head -1)"
  authors="$(echo "$data" | sed -n 's|.*>\([^>]*\)</dc:creator>.*|\1|p')"
  if echo "$authors" | grep -ac , >/dev/null; then
    authors="$(echo "$authors" | sed 's/\([^ ,]*\).*/\1/')"
  else
    authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')"
  fi
  fturl "https://www.numdam.org/item/$1"
}

mathnet() {
  echon Database: Math-Net.Ru "$1"
  data="$(fetchr "http://mi.mathnet.ru/eng/$1")"
  echov Math-Net.Ru data:
  echov "$data"
  #title="$(echo "$data" | sed -n 's@.*<span class=red><font size=+1>\([^<]*\)</font>.*@\1@p' | python3 -c 'import html,sys; print(html.unescape(sys.stdin.read()), end="")')"
  title="$(echo "$data" | sed -n 's@.*<span class=red><font size=+1>\([^<]*\)</font>.*@\1@p' | sed 's/&[^;]*;//g')"
  authors="$(echo "$data" | grep personid= | sed -n 's@.*<a [^>]*>\([^<]*\)</a>.*@\1@;s/&nbsp;/ /gp')"
  if echo "$authors" | grep -ac , >/dev/null; then
    authors="$(echo "$authors" | sed 's/\([^ ,]*\).*/\1/')"
  else
    authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')"
  fi
  fulltext="$(echo "$data" | grep getFT | sed -n 's@.*href=\([^>]*\)>.*@\1@p')"
  fturl "http://mathnet.ru$fulltext"
}

isbn() {
  echon "Database: ISBN (via Library Genesis) $1"
  data="$(fetchr "http://libgen.gs/search.php?req=$1&column=identifier")"
  isbn=("$1")
}

gen() {
  echon Database: Library Genesis "$1"
  #data="$(fetchr "http://libgen.gs/book/index.php?md5=$1")"
  data="$(fetchr "http://libgen.gs/ads$1")"
  #isbn=($(echo "$data" | sed -n 's@.*ISBN:</font></td><td>\([^<]*\)</td>.*@\1@p' | tr -cs 0-9- \ ))
  #mapfile -t isbn < <(echo "$data" | sed -n 's@.*ISBN:</font></td><td>\([^<]*\)</td>.*@\1@p' | tr -cs 0-9- \  | sed 's/^ *//;s/ *$//')
  mapfile -t isbn < <(echo "$data" | sed -n 's@.*ISBN: \([^<]*\)<br></td>.*@\1@p' | tr -cs 0-9- \  | sed 's/^ *//;s/ *$//')
  if [[ ${#isbn[@]} -ne 0 ]]; then
    echov ISBNs from Library Genesis: "${isbn[@]}"
  fi
  #title="$(echo "$data" | sed -n 's@.*Title: </font></nobr></td><td colspan=2><b><a href=[^>]*>\([^<]*\)</a>.*@\1@p')"
  #authors="$(echo "$data" | sed -n 's@.*Author(s):</font></nobr></td><td colspan=3><b>\([^<]*\)</b>.*@\1@p' | sed 's/(auth.)//g' | sed 's/, /\n/g' | sed 's/^ *//;s/ *$//' | sed -n 's/.* \([^ ]*\)/\1/p')"
  title="$(echo "$data" | sed -n 's@.*Title: \([^<]*\)<br>.*@\1@p')"
  authors="$(echo "$data" | sed -n 's@.*Author(s): \([^<]*\)<br>.*@\1@p' | sed 's/(auth.)//g' | sed 's/, /\n/g' | sed 's/^ *//;s/ *$//' | sed -n 's/.* \([^ ]*\)/\1/p')"
  #for i in "${!isbn[@]}"; do
  #  echov Trying ISBN "${isbn[$i]}" with WorldCat
  #  set +e
  #  wdata="$(fetch "http://xisbn.worldcat.org/webservices/xid/isbn/${isbn[$i]}?method=getMetadata&format=json&fl=*")"
  #  set -e
  #  echov ISBN bibliographic data from WorldCat: "$wdata"
  #  if [[ "ok" != "$(echo "$wdata" | jq -r .stat)" ]]; then
  #    continue
  #  fi
  #  if ! authors="$(echo "$wdata" | jq -e -r '.list[0].author' | sed 's/\.$//;s/ ; /\n/g;s/ and /\n/g;s/, /\n/g' | sed 's/.* \([^ ]*\)/\1/')"; then
  #    oclc="$(echo "$wdata" | jq -e -r '.list[0].oclcnum[0]')"
  #    echov OCLC number: "$oclc"
  #    wwdata="$(fetchr "http://www.worldcat.org/oclc/$oclc?page=endnote&client=worldcat.org-detailed_record")"
  #    echov EndNote bibliographic data from WorldCat: "$wwdata"
  #    authors="$(echo "$wwdata" | sed -n 's/^AU  - //p' | sed 's/\(.*\),.*/\1/')"
  #  fi
  #  echov Authors from WorldCat: "$authors"
  #  title="$(echo "$wdata" | jq -r .list[0].title)"
  #  echov Title from WorldCat: "$title"
  #  if [[ -n "$authors" && -n "$title" ]]; then
  #    break
  #  fi
  #done
  if [[ -z "$authors" || -z "$title" ]]; then
    for i in "${!isbn[@]}"; do
      echov Trying ISBN "${isbn[$i]}" with Google Books
      isbns="${isbn[*]}"
      allisbn="${isbns// /+}"
      printf 'allisbn=%s\n' "$allisbn"
      bdata="$(fetch "https://www.googleapis.com/books/v1/volumes?q=isbn+$allisbn&fields=items/volumeInfo(title,authors)&maxResults=1")"
      echov ISBN bibliographic data from Google Books: "$bdata"
      if [[ "$bdata" = "{}" ]]; then
        continue
      fi
      authors="$(echo "$bdata" | jq -r .items[0].volumeInfo.authors[] | sed 's/.* \([^ ]*\)/\1/')"
      echov Authors from Google Books: "$authors"
      title="$(echo "$bdata" | jq -r .items[0].volumeInfo.title)"
      echov Title from Google Books: "$title"
      if [[ -n "$authors" && -n "$title" ]]; then
        break
      fi
    done
  fi
  genurls "$1" "$data"
}

genurls() {
  data="$2"
  #extn="$(echo "$data" | sed -n 's@.*Extension: \([^<]*\)</td>.*@\1@p')"
  #id="$(echo "$data" | sed -n 's@.*ID: </font></nobr></td><td>\([^<]*\)</td>.*@\1@p')"
  #echov Library Genesis extension: "$extn"
  #echov Library Genesis ID: "$id"

  annas="$(fetchr "https://annas-archive.org/md5/$1")"
  pinataurl="$(echo "$annas" | sed -n 's@.*href='"'"'\(https://gateway.pinata.cloud[^'"'"']*\)'"'"'.*@\1@p' | head -1)"
  echov 'IPFS URL 1 from annas-archive.org: ' "$pinataurl" # https://gateway.pinata.cloud/ipfs/$ipfs
  cfurl="$(echo "$annas" | sed -n 's@.*href='"'"'\(https://cloudflare-ipfs.com[^'"'"']*\)'"'"'.*@\1@p' | head -1)"
  echov 'IPFS URL 2 from annas-archive.org: ' "$cfurl" # https://cloudflare-ipfs.com/ipfs/$ipfs

  msurl="$(fetchr "https://books.ms/main/$1" | grep GET | sed -n 's/.*href="\([^"]*\)".*/\1/p')"
  echov 'URL from books.ms:' "$msurl"

  liurl="http://libgen.gs/$(fetchr "http://libgen.gs/ads.php?md5=$1" | grep GET | sed -n 's/.*href="\([^"]*\)".*/\1/p')"
  echov 'URL from libgen.gs:' "$liurl"

  for i in "$cfurl" "$pinataurl" "$msurl" "$liurl"; do
  	if [ -z "$i" ]; then
  	  echov Skipping an empty URL
  	else
  	  urls+=("$i")
  	fi
  done
}

  #sdurl="$(fetchr "https://annas-archive.org/slow_download/$1/0/1")"

  #rocksurl="$(fetchr "https://libgen.rocks/ads.php?md5=$1" | sed -n 's@.*\(get\.php[^"]*\).*@https://libgen.rocks/\1@p')"
  #echov 'URL from libgen.rocks:' "$rocksurl"

  #lolurl="$(fetchr "http://library.lol/main/$1" | grep GET | sed -n 's/.*href="\([^"]*\)".*/\1/p')"
  #echov 'URL from library.lol:' "$lolurl"

  #urls+=("http://93.174.95.29/main/${id::-3}000/${1,,}/filename")

  #lgkey="$(fetchr "http://libgen.rs/ads.php?md5=$1" | tr -d \\r | sed -n 's/.*key=\([^"]*\)".*/\1/p')"
  #echov Library Genesis Key: "$lgkey"
  #lgurl="$(fetchr "http://libgen.rs/ads.php?md5=$1" | grep GET | sed -n 's/.*href="\([^"]*\)".*/\1/p')"

  #pwid="$(fetchr "https://libgen.pw/item/detail/id/$id" | sed -n 's@.*download/book/\([^"]*\)".*@\1@p')"
  #if [[ "$pwid" != "" ]]; then
  #  echov PW id: "$pwid"
  #  urls+=("https://libgen.pw/download/book/$pwid") # ${id::-3}000/ &hidden0=name.pdf&hidden1=${1,,}
  #  urls+=("http://dl.lux.bookfi.net/genesis/${id::-3}000/${1,,}/_as/name.pdf")
  #else
  #  echov PW id not found
  #fi

  #ambryid="$(fetchr "https://ambry.pw/item/detail/id/$id" | sed -n 's@.*download/book/\([^"]*\).*@\1@p')"
  #echov Ambry ID: "$ambryid"
  #urls+=("https://dnld.ambry.cx/download/book/$ambryid")

  #xyzida="$(fetchr "http://b-ok.cc/s/?q=$1" | sed -n 's@.*<a href="/book/\([^"]*\)".*@\1@p' | head -1)"
  #echov XYZ id A: "$xyzida"
  #xyzidb="$(fetchr "http://b-ok.cc/book/$xyzida" | sed -n 's@.*href="/dl/\([^"]*\)".*@\1@p' | head -1)"
  #echov XYZ id B: "$xyzidb"
  #urls+=("https://b-ok.cc/dl/$xyzidb")
  #urls+=("https://d-si.bksdl.xyz/download/sci/$1")
  #urls+=("http://dl.b-ok.org/genesis/${id::-3}000/${1,,}/_as/") # ${id::-3}000/
  #urls+=("http://dlx.b-ok.org/genesis/${id::-3}000/${1,,}/_as/")
  #urls+=("http://download.library1.org/main/${id::-3}000/${1,,}/")
  #urls+=("http://booksdl.org/get.php?md5=$1&key=$lgkey")
  #urls+=("http://93.174.95.91/get.php?md5=$1&key=$lgkey")

urls=()

while (( $# > 0 )); do
  case "$1" in
  -d)
    shift
    if (( $# == 0 )); then
      fatal "Option -d requires an argument"
    fi
    if [[ -d "$1" ]]; then
      dirname="$1"
    else
      fatal "No such directory: $1"
    fi ;;
  -q)
    quiet=1 ;;
  -v)
    set -v
    verbose=1 ;;
  -i)
    interactive=1 ;;
  -u)
    shift
    if (( $# == 0 )); then
      fatal "Option -u requires an argument"
    fi
    echov "URL added: command line option -u: $1"
    urls+=("$1") ;;
  -f)
    fancy=1 ;;
  -n)
    dryrun=1 ;;
  -a)
    abstract=1 ;;
  -e)
    shift
    if (( $# == 0 )); then
      fatal "Option -e requires arguments"
    fi
    cmd="$1"
    args=()
    shift
    while (( $# > 0 )); do
      if [[ "$1" == "--" ]]; then
        break
      fi
      args+=("$1")
      shift
    done
    if (( $# == 0 )); then
      fatal "Unterminated -e option"
    fi ;;
  -p)
    shift
    if (( $# == 0 )); then
      fatal "Option -p requires an argument"
    fi
    amspairing="$1" ;;
  *)
     if (( $# == 1 )); then
       artid="$1"
     else
       fatal "Unrecognized option $1"
     fi ;;
  esac
  shift
done
if [[ ! -v artid ]]; then
  echoerr "No article id specified"
  syn
fi

echon "Scientific article full text downloader by Dmitri Pavlov, version $version."

echon "To report bugs and missing features, please email me (host math.berkeley.edu, user pavlov).
Please include the command line and the output of the script when run with the -v option in your email.
Before submitting a bug report, please make sure that you can download the full text using your browser;
the inability of the script to download the full text is often an indication that you don't have a subscription.
"

echov "Supported databases: MathSciNet, zbMATH, arXiv, DOI, Library Genesis.
Email me if you want the script to support other databases.

An incomplete list of supported repositories: ScienceDirect, SpringerLink,
Taylor and Francis, Walter de Gruyter, World Scientific, SIAM, AMS, OUP,
CUP, CMS, MSP, MUSE, TAC, JSTOR, Project Euclid, NUMDAM, CEDRAM, EuDML.
Many additional repositories are supported by virtue of generic methods.
Email me if you want the script to support other repositories.
"

echon Article ID: "$artid"

case "${artid,,}" in
*/leavingmsn?url=*)
  artid="${artid##*/leavingmsn?url=}" ;;
esac

case "${artid,,}" in
http*://*arxiv.org/abs/*)
  echov arXiv URL "$artid"
  arXiv "${artid##*abs/}" ;;
http*://*arxiv.org/pdf/*)
  echov arXiv URL "$artid"
  trimurl="${artid##*pdf/}"
  arXiv "${trimurl%.pdf}" ;;
*arxiv.org/abs/*)
  echov arXiv URL "$artid"
  arXiv "${artid##*abs/}" ;;
*arxiv.org/pdf/*)
  echov arXiv URL "$artid"
  trimurl="${artid##*pdf/}"
  arXiv "${trimurl%.pdf}" ;;
http*://front.math.ucdavis.edu/*)
  echov Front for the arXiv URL "$artid"
  arXiv "${artid##*ucdavis.edu/}" ;;
http*://*ams.org/mathscinet-getitem?mr=*)
  echov MathSciNet getitem URL "$artid"
  msnid="${artid##*mr=}"
  msn "${msnid##MR}" ;;
http*://*ams.org/mathscinet/search/publdoc.html*mx-pid=*)
  echov MathSciNet search URL "$artid"
  msnid="${artid##*mx-pid=}"
  msn "${msnid%%&*}" ;;
http*://*ams.org/mathscinet/search/*)
  echov MathSciNet generic search URL "$artid"
  data="$(fetch "$artid")"
  msnid="$(echo "$data" | grep -a mathscinet-getitem | sed 's/.*mathscinet-getitem?mr=\([^"]*\)".*/\1/')"
  msn "$msnid" ;;
http*://*zbmath.org/?q=an:*)
  echov zbMATH URL "$artid"
  zbl "${artid##*q=an:}" ;;
http*://zbmath.org/*)
  echov zbMATH URL "$artid"
  zbl "${artid##*zbmath.org/}" ;;
http*://*doi.org/*)
  echov DOI URL "$artid"
  doi "${artid##*doi.org/}" ;;
http*://hdl.handle.net/*)
  echov Handle URL "$artid"
  doi "${artid##*handle.net/}" ;;
http*://*libgen*md5=*)
  echov Library Genesis URL "$artid"
  genid="${artid##*md5=}"
  gen "${genid%%&*}" ;;
http*://*citeseerx*/*doi=*)
  echov CiteSeerX URL "$artid"
  csid="${artid##*doi=}"
  cs "${csid%%&*}" ;;
http*://projecteuclid.org/euclid.*)
  echov Project Euclid URL "$artid"
  pe "${artid##*projecteuclid.org/}" ;;
http*://eudml.org/doc/*)
  echov EuDML URL "$artid"
  eudml "${artid##*eudml.org/doc/}" ;;
http*://*numdam.org/item/?id=*)
  echov Numdam URL "$artid"
  nid="${artid##*numdam.org/item/?id=}"
  numdam "${nid%%/}" ;;
http*://*numdam.org/item/*)
  echov Numdam URL "$artid"
  nid="${artid##*numdam.org/item/}"
  numdam "${nid%%/}" ;;
http*://mi.mathnet.ru/eng/*)
  echov Math-Net.Ru URL "$artid"
  mnid="${artid##*mathnet.ru/eng/}"
  mathnet "$mnid" ;;
http*://mi.mathnet.ru/rus/*)
  echov Math-Net.Ru URL "$artid"
  mnid="${artid##*mathnet.ru/rus/}"
  mathnet "$mnid" ;;
#http://www.mathnet.ru/php/archive.phtml?wshow=paper&jrnid=ivm&paperid=4812&option_lang=eng
http*://*mathnet.ru/*jrnid=*)
  echov Math-Net.Ru URL "$artid"
  mnid="$(echo "$artid" | sed 's@.*jrnid=\([^&]*\)&paperid=\([^&]*\).*@\1\2@')"
  mathnet "$mnid" ;;
https://link.springer.com/article/*)
  echov Springer Link URL "$artid"
  slid="${artid##*springer.com/article/}"
  echov Springer DOI "$slid"
  doi "$slid" ;;
http*://annas-archive.org/md5/*)
  echov "Anna's Archive URL $artid"
  aaid="${artid##*annas-archive.org/md5/}"
  echov "Anna's Archive MD5: $aaid"
  gen "$aaid" ;;
http*://*)
  echon Unknown HTTP URL: "$artid"
  echon Attempting generic full-text URL extraction
  title=unknown-title
  authors=unknown-authors
  auti=1
  fturl "$artid" ;;
arxiv:*)
  arXiv "${artid:6}" ;;
mr*)
  msn "${artid:2}" ;;
zbl:*)
  zbl "${artid:4}" ;;
doi:*)
  doi "${artid:4}" ;;
gen:*)
  gen "${artid:4}" ;;
cs:*)
  cs "${artid:3}" ;;
pe:*)
  pe "${artid:3}" ;;
eudml:*)
  eudml "$artid" ;;
numdam:*)
  numdam "${artid:7}" ;;
mathnet:*)
  mathnet "${artid:8}" ;;
*)
  fatal "Unrecognized article ID: $artid" ;;
esac

echov Title: "$title"
echov Authors:
echov "$authors"

stripp() {
  sed 's/\[[^]]*\]//g;s/\\[a-zA-Z]*//g;s|/|-|g' | if [[ -v fancy ]]; then
    sed 's/[[:space:]]\+/ /g;s/^ //;s/ $//' | tr \\n \\f | sed 's/\.$//g' | sed 's/\f$/.  /;s/\f/, /g'
  else
    sed 's/.*/\L&/' | sed 's/'"'"'/\f/g;s/[[:punct:]]/ /g;s/\f/'"'"'/g;s/'"''"'//g;s/[[:space:]]\+/-/g;s/^-//;s/-$//' | tr \\n -
  fi
}

title="$(echo -n "$title" | stripp)"
authors="$(echo "$authors" | stripp)"
if [[ "$authors" == "-" ]]; then
  authors=
fi
name="$authors$title"
echov Stripped title: "$title"
echov Combined authors: "$authors"
echov Local file name without extension: "$name"

if [[ -v dirname ]]; then
  echov Directory: "$dirname"
  name="$dirname/$name"
  echon Directory and file name without extension: "$name"
fi

if [[ -v dryrun ]]; then
  echo "$name"
  printf '%s\n' "${urls[@]}"
  exit 0
fi

tryft() {
  tname="$(mktemp /tmp/article-XXX)"
  echon Temporary file name: "$tname"
  if retft "$1" "$tname"; then
    if [ -s "$tname" ]; then
      echon Successfully downloaded "$1"
      if [[ ! -v dextn || -z "$dextn" ]]; then
        echon "Last resort attempt to determine file extension using file type"
        fitype="$(file -b --mime-type "$tname")"
        echon "File type: $fitype"
        case "$fitype" in
        application/pdf) echon PDF; vextn="pdf" ;;
        application/postscript) echon PostScript; vextn="ps" ;;
        image/vnd.djvu) echon DjVu; vextn="djvu" ;;
        application/x-dvi) echon DVI; vextn="dvi" ;;
        application/x-tar) echon DVI in a TAR file; vextn="tar" ;;
        application/octet-stream)
          echon "No extension supplied for application/octet-stream.  Report this error to me by email."
          return 1 ;;
        *)
          echon "Unrecognized file type: not PDF, PostScript, DjVu, or DVI, the downloaded file will have no extension."
          return 1 ;;
        esac
        echon Extension: "$vextn"
        fqname="$name.$vextn"
      else
        fqname="$name.$dextn"
      fi
      echon Moving "$tname" to "$fqname"
      mv "$tname" "$fqname"
      if [[ -v cmd ]]; then
        echon Launching "$cmd" "${args[@]:+${args[@]}}" "$fqname"
        "$cmd" "${args[@]:+${args[@]}}" "$fqname"
      fi
      exit 0
    else
      echon Downloaded an empty file, skipping.
    fi
  fi
}

if [ ${#urls[@]} -eq 0 ]; then
  echon No full text URLs found for "$artid"
  echon Email me if you can access the full text.
  exit 1
fi
if [[ -v interactive ]]; then
  echo Full text URLs:
  for i in "${!urls[@]}"; do
   echo "$i) ${urls[$i]}"
  done
  if [[ "${#urls[@]}" == 1 ]]; then
    echon Automatically selecting the only URL
    tryft "${urls[0]}"
    exit 1
  fi
  while true; do
    read -r
    if [ -z "$REPLY" ]; then
      echon Nothing selected
      exit 1
    else
      echon Selected "$REPLY": "${urls["$REPLY"]}"
      tryft "${urls["$REPLY"]}"
    fi
  done
else
  echon Full text URLs:
  for i in "${!urls[@]}"; do
    echon "$i) ${urls[$i]}"
  done
  declare -A tried
  for i in "${!urls[@]}"; do
    echov "i=$i"
    echov "urls[i]=${urls["$i"]}"
    if [[ ${tried["${urls["$i"]}"]+_} ]]; then
      echon Skipping the duplicate URL "$i": "${urls[$i]}"
      continue
    fi
    echon Attempting to download full text URL "$i": "${urls[$i]}"
    tried["${urls["$i"]}"]=1
    tryft "${urls[$i]}"
  done
  echon No working full text URLs
  exit 1
fi