#!/bin/bash # article: a bash script to download a paper indexed by arXiv, Mathematical Reviews, Zentralblatt MATH, CrossRef, CiteSeerX, Project Euclid # and save it under a file name like smith-brown-an-example-of-a-model-category.pdf. # See the source code for the list of supported HTTP URLs. # Copyright 2014--2021 Dmitri Pavlov. Distributed under the terms of the GNU Affero General Public License version 3. version=2024-04-19 export SED="env LC_CTYPE=C sed" set -u -e -o pipefail echoerr() { >&2 echo "$@" } fatal() { echoerr "$@" exit 1 } echon() { if [[ ! -v quiet ]]; then echoerr "$@"; fi } echov() { if [[ -v verbose ]]; then echoerr "$@"; fi } syn() { fatal "Synopsis: $0" '[ options ] [ MR[0-9]* | Zbl:[0-9]*.[0-9]* | arXiv:[0-9]*.[0-9]* | arXiv:[-a-z]*/[0-9]* | doi:.* | isbn:[0-9-]* | cs:[0-9.]* | http://.* | https://.* ]' " -d directory: directory where to save the file -q: quiet, do not output any diagnostics -v: verbose, print additional diagnostics -i: offer an interactive choice of a full-text URL to download -u url: use url as a full text URL -f: fancy file names -n: dry run: print the final file name and the full text URLs, but do not download anything -a: extract and print abstract page URLs, do not download anything -e command arguments --: execute a command after a successful download -p pairing: use an AMS pairing key to access MathSciNet" } texsimp() { texuni | uconv -x any-nfc } texstrip() { sed 's/\\[a-zA-Z]*//g;s/[$^_{}]//g' } texnorm() { texsimp | texstrip } fetch() { echov Fetching "$@" cline=(curl -s -S -f) if [[ -v amspairing ]]; then case "${@:$#}" in *.ams.org/*) cline+=("-b" "amspairing=$amspairing") ;; esac fi set +e "${cline[@]}" "$@" || echov fetch "$@" failed set -e } fetchc() { fetch -b /dev/null "$@" } fetchr() { fetch -L "$@" } fetchcr() { fetchc -L "$@" } sfetch() { fetch -o /dev/null -I -w "%{redirect_url}\n" "$@" } fetchz() { data="$(fetch "$@")" while [[ $data == *captcha* ]]; do echon zbMATH demands a CAPTCHA, which means that no subscription is available. Manual entry. id="$(printf '%s\n' "$data" | grep -a captcha_id | sed 's/.*value="\([^"]*\)".*/\1/')" show "https://zbmath.org/captcha/$id" echo Enter zbMATH CAPTCHA: read -r captcha echov Entered CAPTCHA: "$captcha" data="$(fetch -F captcha_id="$id" -F captcha_solution="$captcha" "$@")" done printf '%s\n' "$data" } retft() { local pdf="$1" echon Attempting to retrieve the full text using URL "$pdf" local tname="$2" local jar="/dev/null" local -a addopts=("-L") case "$pdf" in https://www.jstor.org/*) #jar="$(mktemp /tmp/article-XXX)" #echov Special cookie treatment for JSTOR #echov Cookie jar file: "$jar" #fetchcr -c "$jar" -I "$pdf" addopts+=("-L" "-A" 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36') #curl -L -b /dev/null -A 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36' https://www.jstor.org/tc/verify?origin=/stable/pdf/1969099.pdf pdf="$pdf"'?acceptTC=true' ;; http*://projecteuclid.org/*) echov Publisher: Project Euclid echov Adding referrer addopts+=("-e" "https://projecteuclid.org") ;; http*://*oup.com/*) echov Publisher: OUP echov Added user agent addopts+=("-A" "Mozilla/5.0") ;; http*://*cms.math.ca/*) echov Publisher: CMS echov Adding referrer addopts+=("-e" "https://cms.math.ca/") ;; http*://libgen.pw*) echov Repository: Library Genesis pw echov Adding referrer addopts+=("-e" "https://libgen.pw") ;; http*://sci.libgen.pw*) echov Repository: Sci Library Genesis pw echov Adding insecure option addopts+=("-k") ;; http*://*eudml.org*) echov Repository: EuDML echov Adding insecure option addopts+=("-k") ;; http*://*bookfi.net*) echov Repository: Library Genesis bookfi echov Adding referrer addopts+=("-e" "https://bookfi.net") ;; #http*://*b-ok.cc*) # echov Repository: Library Genesis b-ok # echov Adding referrer # addopts+=("-e" "https://b-ok.cc") ;; #http*://booksc.xyz*) # echov Repository: Library Genesis BookSC # echov Adding referrer # addopts+=("-e" "https://booksc.xyz") ;; http*://*sciencedirect*) echov Repository: Elsevier echov Adding user agent addopts+=("-A" "Mozilla") ;; http*://*ams.org/*) echov Publisher: AMS if [[ -v amspairing ]]; then echov Adding pairing key jar="amspairing=$amspairing" else echov No pairing key specified fi ;; esac cline=(curl -b "$jar" "${addopts[@]}" "$pdf" -w '%{content_type}\n%{http_code}\n' -o "$tname") echov "Command line for full text download: ${cline[*]}" result="$("${cline[@]}")" type="$(echo "$result" | head -1)" echov Content-Type: "$type" code="$(echo "$result" | tail -1)" echov HTTP code: "$code" case "$code" in 200) ;; 401) echon "HTTP code 401 (Unauthorized) most likely means that you have no subscription to this resource." echon "Check whether you have a subscription; if you can successfully download the full text file, please email me." return 1 ;; 404) echon "HTTP code 404 (Not Found) usually means that there is a bug in the script, unless downloading from one of Library Genesis mirrors." echon "Please email me the command line and the output of the script so that I can fix the bug." return 1 ;; *) echon "Error: HTTP code is $code, not 200, downloaded file ignored." echon "Check whether you have a subscription; if you can successfully download the full text file, please email me." return 1 ;; esac case "$type" in application/pdf) echon PDF file; dextn="pdf" ;; application/pdf*) echon PDF file from JSTOR; dextn="pdf" ;; # JSTOR server is run by incompetent people text/pdf) echon PDF file from CUP; dextn="pdf" ;; # CUP server is also run by incompetent people "multipart/form-data;charset=utf-8") echon PDF file from AIM; dextn="pdf" ;; # so is the AIM server application/postscript) echon PostScript; dextn="ps" ;; image/vnd.djvu) echon DjVu; dextn="djvu" ;; application/x-dvi) echon DVI; dextn="dvi" ;; application/x-tar) echon DVI in a TAR file; dextn="tar" ;; application/octet-stream*|application/download*) if [[ -v extn ]]; then echon File from Library Genesis, extension "$extn" dextn="$extn" else echon "No extension supplied for application/octet-stream. Report this error to me by email." return 1 fi ;; *) echon "Unrecognized Content-Type: not PDF, PostScript, DjVu, or DVI, downloaded file ignored." echon "This might mean that you have no subscription to this content, because many scientific repositories use a brain-damaged way to report authorization errors." echon "If you do have a subscription and can successfully download and view the full text file, please email me." return 1 ;; esac echon Extension: "$dextn" } declare -A xtried fturl() { if [[ -v abstract ]]; then echo "$1" return fi local url="$1" echon Abstract page URL: "$url" set +e # allow curl to fail so that doi links still get processed meta="$(fetchcr -k -I -A / -w "%{content_type}\n%{url_effective}\n" "$url")" set -e ctype="$(echo "$meta" | tail -2 | head -1)" echon Content-Type: "$ctype" crurl="$(echo "$meta" | tail -1)" echon Completely resolved URL: "$crurl" case "$ctype" in application/pdf*) urls+=("$crurl") echov "Content-Type is application/pdf" echov "URL added: PDF: $crurl" #data="" ;; set +e data="$(fetchcr -k -A / "$url")" set -e ;; *) set +e data="$(fetchcr -k -A / "$url")" set -e ;; esac if [[ -v auti ]]; then if echo "$data" | grep -aEc 'doi.org/|"doi"' >/dev/null; then dois="$(echo "$data" | sed -n 's@.*doi.org/\([^ "<]*\).*@\1@p;s/"doi":"\([^"]*\)"/\1/p')" echov Extracted DOIs: echov "$dois" unset auti doi "$(echo "$dois" | head -1)" return fi; fi if echo "$data" | grep -ac citation_pdf_url >/dev/null; then echon 'Generic method (citation_pdf_url), e.g., AMS, EMS, Project Euclid, CUP, OUP, Springer, de Gruyter, Wiley' local pdf echo "$data" >/tmp/debugdata pdf="$(echo "$data" | tr \\n \ | $SED -n 's@.*\(<[^>]*citation_pdf_url[^>]*>\).*@\1@p' | $SED -n 's@.*content=[^"]*"\([^"]*\)".*@\1@p')" echon citation_pdf_url: "$pdf" case "$pdf" in http*://onlinelibrary.wiley.com/doi/*) echov Publisher: Wiley pdf="$(fetch "$pdf" | sed -n 's@.*id="pdfDocument" src="\([^"]*\)".*@\1@p' | sed 's/&/\&/g')" if [[ -z "$pdf" ]]; then echon 'No Wiley PDF URL found; possible cause: no subscription' unset pdf else echov Adjusted Wiley PDF URL: "$pdf" fi ;; http*://journals.cambridge.org/*) echov Publisher: CUP pdf="$(sfetch "$pdf")""&toPdf=true" echov Adjusted CUP PDF URL: "$pdf" ;; http://*) echov Generic HTTP URL ;; https://*) echov Generic HTTPS URL ;; *) echov Generic relative URL urlbase="$(sfetch "$url" | sed -n 's@^\(http.*//[^/]*\).*@\1@p')" echov Base "$urlbase" pdf="$urlbase$pdf" echov Adjusted URL: "$pdf" ;; esac case "$data" in *"Duke Mathematical Journal"*) echov Switching to the nonenhanced PDF for the Duke Mathematical Journal due to an insane color scheme pdf="${pdf//pdfview/pdf}" ;; esac if [[ -v pdf ]]; then urls+=("$pdf") echon URL added: citation_pdf_url: "$pdf" fi fi if [[ -v auti ]]; then echov Attempting to extract title and authors from the HTML data="$(echo "$data" | iconv -f "$(echo "$data" | file -b --mime-encoding -)" -t utf-8 -c)" if echo "$data" | grep -ac citation_title >/dev/null; then title="$(echo "$data" | tr \\n \ | $SED -n 's@.*\(<[^>]*citation_title[^>]*>\).*@\1@p' | tee | sed -n 's@.*content=[^"]*"\([^"]*\)".*@\1@p')" echov citation_title: "$title" fi if echo "$data" | grep -ac citation_author >/dev/null; then authors="$(echo "$data" | sed -n 's@.*\(<[^>]*citation_author[^>]*>\).*@\1@p' | sed -n 's@.*content=[^"]*"\([^"]*\)".*@\1@p')" echov citation_author: "$authors" if echo "$authors" | grep -a ,; then authors="$(echo "$authors" | sed 's/,.*//')" echov Author last names before commas: "$authors" else authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')" echov Author last names: "$authors" fi fi fi local pdf case "$url" in http*://*doi.org/*) doi="${url##*doi.org/}" echon DOI: "$doi" echov DOI URL: "$url" rurl="$(sfetch "$url")" echon Resolved DOI: "$rurl" ;; *) rurl="$url" ;; esac url="$rurl" case "$rurl" in http*://mr.crossref.org/*) echon CrossRef fork links="$(fetch "$rurl" | { grep -ao "href=['\"][^'\"]*['\"]" || true; } | sed 's/href=.//;s/.$//' | { grep -aEv '^https?://.*doi.org' || true; } | uniq)" echon Detected links: echon "$links" for i in $links; do echon Recursively processing link "$i" fturl "$i" done ;; http*://linkinghub.elsevier.com/retrieve/pii/*) echov Publisher: Elsevier eid="${rurl##*/pii/}" echov Elsevier ID: "$eid" #pdf="$(echo "$data" | sed -n 's/.*pdfurl="\([^"]*\)".*/\1/p')" set +e pdf="$(fetch -L -A / "https://www.sciencedirect.com/science/article/pii/$eid" | grep application/json | sed 's@]*>@@;s@@@' | jq -r '.article.pdfDownload.urlMetadata|"https://www.sciencedirect.com/\(.path)/\(.pii)\(.pdfExtension)?md5=\(.queryParams.md5)&pid=\(.queryParams.pid)"')" #pdf="$(fetch -L -A / "https://www.sciencedirect.com/science/article/pii/$eid/pdfft" | sed -n "s@.*'\(https://pdf[^']*\)'.*@\1@p")" set -e #pdf="$(echo "$eapdata" | sed -n 's/.*pdfurl="\([^"]*\)".*/\1/p')" #pdf="$(echo "$eapdata" | sed -n 's@.*Read article.*@\1@p')" ;; http*://www.intlpress.com/*) echov Publisher: International Press pdf="$rurl"$(fetch "$rurl/body.html" | sed -n 's@.*"\([^"]*.pdf\)".*@\1@p') ;; http*://*.impan.pl/cgi-bin/doi*) echov Publisher: IMPAN pdf="${rurl//\/doi/\/pdf}" if [[ "${pdf: -2:1}" == "-" ]]; then pdf="${pdf:0:-2}-0${pdf: -1:1}" fi ;; http*://retro.seals.ch/digbib/view?rid=*) echov Publisher: retro.seals pdf="${rurl//digbib\/view?rid=/cntmng?pid=}" ;; # end of DOI URLs http*://www.numdam.org/item?id=*) echov Publisher: Numdam numdam="${url:30}" echov Numdam ID: "$numdam" pdf="http://archive.numdam.org/article/$numdam.pdf" ;; http*://www.numdam.org/item/*) echov Publisher: Numdam numdam="${url:27}" echov Numdam ID: "$numdam" pdf="http://archive.numdam.org/article/$numdam.pdf" ;; http*://*.cedram.org/item?id=*) echov Publisher: Cedram numdam="${url#*item?id=}" echov Numdam ID: "$numdam" pdf="http://archive.numdam.org/article/$numdam.pdf" ;; http*://eudml.org/*) echov Publisher: EuDML pdf="$(echo "$data" | sed -n "s@.*Full (PDF).*@\1@p')" if [ -z "$pdf" ]; then link="$(echo "$data" | sed -n "s@.*Access to full text.*@\1@p')" echov Intermediate link: "$link" fturl "$link" fi ;; http*://muse.jhu.edu/*.pdf) echov Publisher: MUSE pdf="$url" ;; http*://www.emis.de/*abs.html) echov Publisher: EMIS pdf="${url//abs.html/.pdf}" ;; http*://www.emis.de/*.html) echov Publisher: EMIS pdf="${url//html/pdf}" ;; http*://www.digizeitschriften.de/dms/*) echov Publisher: DigiZeitschriften link="${url//resolveppn/img}" echov PDF page for DigiZeitschriften: "$link" data="$(fetch "$link")" pdf="$(echo "$data" | sed -n 's@.*.*@\1@p')" ;; http*://gdz.sub.uni-goettingen.de/*) echov Publisher: GDZ "$url" gdzppn="${url#*PPN=}" case "$gdzppn" in *DMDID=dmdlog*) dmd="${gdzppn#*dmdlog}" echov DMD: "$dmd" gdzppn="${gdzppn%&*}" echov Trimmed GDZ PPN: "$gdzppn" pdf="https://gdz.sub.uni-goettingen.de/download/pdf/$gdzppn/LOG_$(printf %04d "$dmd").pdf" ;; *) echov GDZ PPN: "$gdzppn" data="$(fetchr "$url")" manifest="$(echo "$data" | sed -n 's@.*data-manifest-url="\([^"]*\)".*@\1@p')" echov GDZ manifest URL: "$manifest" mdata="$(fetchr "$manifest")" #echov Manifest data: #echov "$mdata" pdf="$(echo "$mdata"| jq -r '.structures[] | select(.metadata[].value == "http://resolver.sub.uni-goettingen.de/purl?'"$gdzppn"'") | .rendering[]."@id"')" ;; esac ;; http*://*tac.mta.ca*) echov Publisher: TAC pdf="${url//abs.html/.pdf}" ;; http*://www.pnas.org/cgi/doi/*) echov Publisher: PNAS pdf="$crurl" ;; http*://tcms.org.ge/*) echov Publisher: TCMS volume="$(echo "$data" | sed -n 's@.*Vol. \([^(]*\)(.*@\1@p')" echov Volume: "$volume" trim="${url%/abstract.htm}" echov Trimmed URL: "$trim" stem="${trim##*/}" echov URL stem: "$stem" pdf="${trim//volumes/xvolumes}/v${volume}${stem}hl.pdf" ;; http*://*mathematik.uni-bielefeld.de/documenta/*) echov Publisher: Documenta Mathematica pdf="${url//html/pdf}" ;; http*://d-nb.info/*) echov Publisher: DNB pdf="$url" ;; *) echov Unknown URL "$url" echov If the script is unable to download the full text, please email me so that I can add support for this type of URL. ;; esac case "$crurl" in http*://link.springer.com/book/*) echon 'Publisher: Springer (book)' pdf="${crurl/book/content/pdf}.pdf" #https://link.springer.com/content/pdf/10.1007%2F978-3-319-09354-3.pdf #https://link.springer.com/book/10.1007%2F978-3-319-09354-3 #echon Springer books are typically split into many individual files, which does not fit the operational model of this script. Aborting. esac if [[ -v pdf && "$pdf" != "" ]]; then echon URL added: publisher: "$pdf" urls+=("$pdf") fi url="$1" case "$url" in http*://*doi.org/*) doi="${url##*doi.org/}" mapfile -t arxivurls < <(fetch http://export.arxiv.org/api/query --data-urlencode "search_query=doi:\"$doi\"" | xidel - -s -e "//feed/entry/link[@title='pdf']/@href" | sed '/^$/d') if [[ -v arxivurls ]]; then echon "URL added: arXiv URLs obtained using DOI:" echon "${arxivurls[@]}" urls+=("${arxivurls[@]}") fi #eteka="$(fetchr -G http://eteka.info/scimag/ads.php --data-urlencode "doi=$doi" | grep -o '"http[^"]*"' | tr -d '"')" #if [[ -v eteka ]]; then # echov "eteka.info URL: $eteka" # urls+=("$eteka") #fi #set +e #pwid="$(fetchr -k -G https://sci.libgen.pw/search --data-urlencode "q=$doi" | sed -n 's@.*/item/detail/id/\([0-9a-fA-F]*\).*@\1@p')" #set -e #if [[ -v pwid && "$pwid" != "" ]]; then # echov PW id: "$pwid" # urls+=("https://sci.libgen.pw/download/sci/$pwid") #fi #set +e #scurl="$(fetchr -G https://booksc.org/s/ --data-urlencode "q=$doi" | grep dlButton | sed -n 's@.*href="\([^"]*\)".*@\1@p')" #set -e #if [[ -v scurl && "$scurl" != "" ]]; then # echov "BookSC.org URL: $scurl" # urls+=("https://booksc.org$scurl") #fi set +e lolurl="$(fetchr "http://library.lol/scimag/$doi" | grep GET | sed -n 's@.*href="\([^"]*\)".*@\1@p')" set -e if [[ -v lolurl && "$lolurl" != "" ]]; then echov "Library.LOL URL: $lolurl" urls+=("$lolurl") extn=pdf # ??? fi set +e gsurl="$(fetchr -G http://libgen.gs/scimag/ads.php --data-urlencode "doi=$doi" | grep GET | sed -n 's@.*href="\([^"]*\)".*@\1@p')" set -e if [[ -v gsurl && "$gsurl" != "" ]]; then echov "LibGen.GS URL: $gsurl" #urls+=("http://libgen.gs/$gsurl") #gsurl="${gsurl/\\get.php/\/get.php}" gsurl="http://libgen.gs/$gsurl" urls+=("$gsurl") extn=pdf # libgen.gs returns application/octet-stream fi #set +e #bookscurl="$(fetchr -G https://booksc.xyz/s/ --data-urlencode "q=$doi" | sed '/fit your search query exactly but very close/,$d' | grep 'itemprop="name"' | head -1 | sed -n 's@.*href="\([^"]*\)".*@\1@p')" #set -e #if [[ -v bookscurl && "$bookscurl" != "" ]]; then # echov "BookSC URL: $bookscurl" # bsc="$(fetchr "https://booksc.xyz$bookscurl" | sed -n 's@.*href="\(/dl/[^"]*\)".*@\1@p' | head -1)" # urls+=("https://booksc.xyz$bsc") # extn="pdf" # echon "URL added: Library Genesis BookSC DOI URL: $bsc with extension $extn" #else # echon BookSC search unsuccessful. #fi #lgpdf="$(fetchr -G "http://booksdescr.org/scimag/" --data-urlencode "s=$doi" --data-urlencode "redirect=1" | sed 's/

/\n

/' | grep -a scimag/get | sed 's/.*href=".*\(http:[^"]*\)".*/\1/g' ||:)" #lgpdf="$(fetch -G "http://booksdescr.org/scimag/ads.php" --data-urlencode "doi=$doi" | sed 's/

/\n

/' | grep -a scimag/get | sed 's/.*href=".*\(http:[^"]*\)".*/\1/g' ||:)" #if [[ -v lgpdf && "$lgpdf" != "" ]]; then # urls+=("$lgpdf") # extn="pdf" # echon "URL added: Library Genesis DOI URL: $lgpdf with extension $extn" #fi #lgmd5="$(fetchr -G "https://sci.booksdescr.com/search" --data-urlencode "q=$doi" | sed -n 's@.*/item/detail/id/\([0-9a-zA-Z]*\).*@\1@p')" #if [[ -v lgmd5 && "$lgmd5" != "" ]]; then # echov Library Genesis MD5: "$lgmd5" # data="$(fetchr "http://libgen.rs/book/index.php?md5=$lgmd5")" # genurls "$lgmd5" "$data" #else # set +e # shub="$(fetchcr -e "https://sci-hub.tw/$doi" "https://sci-hub.tw/$doi")" # set -e # shuburl="$(printf '%s\n' "$shub" | sed -n 's@.*\(http[^"'\'']*\).*save.*@\1@p')" # if [[ -v shuburl && "$shuburl" != "" ]]; then # echon Sci-Hub URL: "$shuburl" # urls+=("$shuburl") # #lgid="${shubid#*/}" # #echon Sci-Hub redirected to Library Genesis with extended ID "$shubid" and ordinary ID "$lgid" # #lone="http://download.library1.org/main/$shubid/" # #urls+=("$lone") # #echon URL added: Sci-Hub redirect to Library1.org: "$lone" # #genurls "$lgid" "$shub" # fi #fi ;; esac echon Attempting to extract raw URLs from the abstract page "$crurl": mapfile -t newurls < <(echo "$data" | xidel - -s -e "(//@href, //@src)/resolve-uri(.,\"$crurl\")" | sed 's/#.*//' | grep -a pdf | grep -av "degruyter.com/flyer/\|degruyter.com/.*.toc.xml\|degruyter.com/.*.fm.xml\|ams.org/publications/\|ams.org/firefox\|endmatter\|msp.org/forms\|math.ca/Membership\|math.ca/Docs\|math.ca/.*/abstract/\|pdf-preview\|/marketing/\|\.gif$") for i in "${!newurls[@]}"; do if [[ ! ${xtried["${newurls["$i"]}"]+_} ]]; then vnewurls+=("${newurls[$i]}") xtried["${newurls["$i"]}"]=1 echon 'Added a last resort URL extracted from the abstract page (only used in the interactive mode):' echon "${newurls["$i"]}" fi done if [[ -v vnewurls ]]; then if [[ -v interactive ]]; then urls+=("${vnewurls[@]}") fi echon Warning: some publishers link irrelevant PDFs from the abstract page, e.g., license agreements, abstracts, etc. echon If the script ends up downloading such an irrelevant PDF, please email me so that I can add it to the list of exceptions. fi } arXiv() { echon Database: arXiv "$1" data="$(fetch http://export.arxiv.org/api/query?id_list="$1" | tr \\n \ | $SED 's@<[^/][^>]*/>@\n&\n@g;s@<[^/][^>]*[^/>]>@\n&@g;s@]*>@&\n@g')" echov Processed output: echov "$data" arxiverr="$(echo "$data" | sed -n '\@^http://arxiv.org/api/errors.*$@{p;q1}')" if [[ -n "$arxiverr" ]]; then echon "$arxiverr"; fi id="$(echo "$data" | sed -n 's@^http://arxiv.org/abs/\(.*\)$@\1@p')" echov arXiv ID: "$id" title="$(echo "$data" | sed -n 's@^\(.*\)$@\1@p')" authors="$(echo "$data" | sed -n 's@^\(.*\)$@\1@p' | sed 's/.* \([^ ]*\)/\1/')" urls+=("http://arxiv.org/pdf/$id.pdf") } msn() { echon Database: MathSciNet "$1" data="$(fetch https://mathscinet.ams.org/mathscinet/search/publications.html?fmt=endnote\&pg1=MR\&s1=MR"$1" | sed -n '1,/.*
/d;/.*<\/pre>/,$d;p')"
  echov EndNote:
  echov "$data"
  #data="$(echo "$data" | tr \\n \\t | sed 's/\t  //g' | tr \\t \\n)"
  title="$(echo "$data" | tr \\n @ | sed -n 's/.*@%T \([^%]*\)@ *%.*/\1/p' | tr @ \  | texnorm)"
  echov Title: "$title"
  authors="$(echo "$data" | sed -n 's/^%A //p' | sed 's/\([^,]*\),.*/\1/' | texnorm)"
  echov Authors: "$authors"

  local url
  url="$(echo "$data" | sed -n 's/^%U //p')"
  if [ -z "$url" ]; then
    echov No URL found in EndNote data, attempting to extract a URL from the HTML file
    hdata="$(fetch https://mathscinet.ams.org/mathscinet-getitem?mr=MR"$1" | sed -n "1,/.*MR0*$1<\/strong>.*/d;/.*Make Link.*/,\$d;p")"
    echov Processed output:
    echov "$hdata"
    #authors="$(echo "$hdata" | sed 's/@\n@g' | sed -n 's@\([^<]*\)@\1@p' | sed 's/\([^,]*\),.*/\1/')"
    #title="$(echo "$hdata" | tr \\n \  | sed -n 's@.*\([^<]*\).*@\1@p')"

    url="$(echo "$hdata" | sed -n 's@.*\(Article\|Chapter\|Book\).*@\1@p')"
    if [ -z "$url" ]; then
      case "$data" in
      *"%@ 1201-561X"*)
        echov Journal: Theory and Applications of Categories
        volume="$(echo "$data" | sed -n 's/^%V //p')"
        number="$(echo "$data" | sed -n 's/^%P [^0-9]*\([0-9]*\),.*/\1/p')"
        echov Volume "$volume", number "$number"
        stem="$number.pdf"
        if (( volume < 10 )); then
          stem="n$stem"
          if (( volume == 1 )); then
            stem="v1$stem"
          fi
          stem="n$number/$stem"
        else
          if (( number < 10 )); then
            stem="0$stem"
          fi
          stem="$number/$volume-$stem"
        fi
        if (( volume < 6 )); then
          ((volume+=1994))
        fi
        stem="$volume/$stem"
        echov Stem "$stem"
        urls+=("http://tac.mta.ca/tac/volumes/$stem")
        return ;;
      *"%@ 1431-0635"*)
        echov Journal: Documenta Mathematica
        volume="$(echo "$data" | sed -n 's/^%V //p')"
        pages="$(echo "$data" | sed -n 's/^%P //p')"
        echov "Volume $volume, $pages"
        url="http://mathematik.uni-bielefeld.de/documenta/vol-$volume/"$(fetch "http://mathematik.uni-bielefeld.de/documenta/vol-$volume/vol-$volume.html" | tr \\n @ | sed -n "s|.* $pages@"'[^@]*Abstract.*|\1|p') ;;
      *)
        echon 'No full text URL supplied by MathSciNet.  Try zbMATH, sometimes it gives a full text URL when MathSciNet does not.'
        return ;;
      esac
    fi
  fi
  fturl "$url"
}


zbl() {
  echon Database: zbMATH "$1"
  data="$(fetchz https://zbmath.org/?q=an:"$1")"
  #authors="$(echo "$data" | sed 's/@\n@g' | sed -n 's@\([^<]*\)@\1@p' | sed 's/\([^,]*\),.*/\1/')"
  #title="$(echo "$data" | sed -n 's@.*
\([^<]*\)<.*@\1@p')" #echo "$data" >/tmp/zb url="$(echo "$data" | sed -n 's@.*/tmp/zb2 #grep -aEo ' {([^{}]|({[^}]*}))*} ' | authors="$(echo "$data" | sed -n 's@^ *Author = {\(.*\)},$@ \1 @p' | sed 's/^.//;s/.$//' | texsimp)" echov BibTeX authors: "$authors" title="$(echo "$data" | sed -n 's@^ *Title = {\(.*\)},$@\1@p' | texnorm)" echov BibTeX title: "$title" if [ -z "$url" ]; then echon 'No full text URL supplied by zbMATH. Try MathSciNet, sometimes it gives a full text URL when zbMATH does not.' return fi while read -r iurl; do echon Trying zbMATH abstract page URL "$iurl" fturl "$iurl" done <<< "$url" } urldecode() { a="$(cat)" printf '%b' "${a//%/\\x}" } doi() { echon Database: CrossRef "$1" ##data="$(fetch -H "Accept: application/vnd.citationstyles.csl+json" "https://data.crossref.org/$1")" #data="$(fetch "https://api.crossref.org/v1/works/$1")" data="$(fetch "https://doi.crossref.org/servlet/query?pid=some.address@mailinator.com&format=json&id=$1")" echov CrossRef data: "$data" #title="$(echo "$data" | jq -r .message.title[] | tr \\n \ )" title="$(echo "$data" | jq -r .created.title | tr \\n \ )" echov CrossRef raw title: "$title" title="$(echo "$title" | sed 's@<[^>]*>@@g')" echov CrossRef processed title with stripped tags: "$title" set +e #authors="$(echo "$data" | jq -r .message.author[].family)" authors="$(echo "$data" | jq -r .author[].family)" set -e echov CrossRef authors: "$authors" if [[ ! -v stoprecursion ]]; then stoprecursion=1 fturl "https://doi.org/$1" unset stoprecursion fi } cs() { echon Database: CiteSeerX "$1" data="$(fetch "http://citeseerx.ist.psu.edu/oai2?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:CiteSeerX.psu:$1")" title="$(echo "$data" | sed -n 's@.*\(.*\).*@\1@p')" authors="$(echo "$data" | sed -n 's@.*\(.*\).*@\1@gp' | sed 's/.* \([^ ]*\)/\1/')" urls+=("http://citeseerx.ist.psu.edu/viewdoc/download?doi=$1&rep=rep1&type=pdf") } pe() { echon Database: Project Euclid "$1" eucit="$(fetch https://projecteuclid.org/citation/download -H 'Content-Type: application/json; charset=utf-8' --data '{"contentType":"0","formatType":"0","referenceType":"","urlid":"'"$1"'"}')" echov Project Euclid internal citation id: "$eucit" data="$(fetch "https://projecteuclid.org/citation/download/$eucit")" #data="$(fetch http://projecteuclid.org/export_citations --data format=ris --data-urlencode "h=$1")" echov Project Euclid bibliographic data: echov "$data" title="$(echo "$data" | sed -n 's/^TI - //p')" authors="$(echo "$data" | sed -n 's/^AU - //p' | sed 's/.* \([^ ]*\)/\1/')" urls+=("http://projecteuclid.org/download/pdf_1/$1") } eudml() { echon Database: EuDML "$1" data="$(fetch -k "https://eudml.org/api/rest/urn:$1?format=oai_dc")" echov EuDML data: echov "$data" title="$(echo "$data" | sed -n 's|.*>\([^>]*\).*|\1|p' | head -1)" authors="$(echo "$data" | sed -n 's|.*>\([^>]*\).*|\1|p')" if echo "$authors" | grep -ac , >/dev/null; then authors="$(echo "$authors" | sed 's/\([^ ,]*\).*/\1/')" else authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')" fi fturl "https://eudml.org/doc/${1##eudml:doc:}" } numdam() { echon Database: Numdam "$1" data="$(fetch "http://www.numdam.org/oai/" --data-urlencode "verb=GetRecord" --data-urlencode "metadataPrefix=oai_dc" --data-urlencode "identifier=oai:numdam.org:$1")" echov Numdam data: echov "$data" title="$(echo "$data" | sed -n 's|.*>\([^>]*\).*|\1|p' | head -1)" authors="$(echo "$data" | sed -n 's|.*>\([^>]*\).*|\1|p')" if echo "$authors" | grep -ac , >/dev/null; then authors="$(echo "$authors" | sed 's/\([^ ,]*\).*/\1/')" else authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')" fi fturl "http://www.numdam.org/item/$1" } mathnet() { echon Database: Math-Net.Ru "$1" data="$(fetchr "http://mi.mathnet.ru/eng/$1")" echov Math-Net.Ru data: echov "$data" #title="$(echo "$data" | sed -n 's@.*\([^<]*\).*@\1@p' | python3 -c 'import html,sys; print(html.unescape(sys.stdin.read()), end="")')" title="$(echo "$data" | sed -n 's@.*\([^<]*\).*@\1@p' | sed 's/&[^;]*;//g')" authors="$(echo "$data" | grep personid= | sed -n 's@.*]*>\([^<]*\).*@\1@;s/ / /gp')" if echo "$authors" | grep -ac , >/dev/null; then authors="$(echo "$authors" | sed 's/\([^ ,]*\).*/\1/')" else authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')" fi fulltext="$(echo "$data" | grep getFT | sed -n 's@.*href=\([^>]*\)>.*@\1@p')" fturl "http://mathnet.ru$fulltext" } isbn() { echon "Database: ISBN (via Library Genesis) $1" data="$(fetchr "http://libgen.rs/search.php?req=$1&column=identifier")" isbn=("$1") } gen() { echon Database: Library Genesis "$1" data="$(fetchr "http://libgen.rs/book/index.php?md5=$1")" #isbn=($(echo "$data" | sed -n 's@.*ISBN:\([^<]*\).*@\1@p' | tr -cs 0-9- \ )) mapfile -t isbn < <(echo "$data" | sed -n 's@.*ISBN:\([^<]*\).*@\1@p' | tr -cs 0-9- \ | sed 's/^ *//;s/ *$//') if [[ ${#isbn[@]} -ne 0 ]]; then echov ISBNs from Library Genesis: "${isbn[@]}" fi title="$(echo "$data" | sed -n 's@.*Title: ]*>\([^<]*\).*@\1@p')" authors="$(echo "$data" | sed -n 's@.*Author(s):\([^<]*\).*@\1@p' | sed 's/(auth.)//g' | sed 's/, /\n/g' | sed 's/^ *//;s/ *$//' | sed -n 's/.* \([^ ]*\)/\1/p')" #for i in "${!isbn[@]}"; do # echov Trying ISBN "${isbn[$i]}" with WorldCat # set +e # wdata="$(fetch "http://xisbn.worldcat.org/webservices/xid/isbn/${isbn[$i]}?method=getMetadata&format=json&fl=*")" # set -e # echov ISBN bibliographic data from WorldCat: "$wdata" # if [[ "ok" != "$(echo "$wdata" | jq -r .stat)" ]]; then # continue # fi # if ! authors="$(echo "$wdata" | jq -e -r '.list[0].author' | sed 's/\.$//;s/ ; /\n/g;s/ and /\n/g;s/, /\n/g' | sed 's/.* \([^ ]*\)/\1/')"; then # oclc="$(echo "$wdata" | jq -e -r '.list[0].oclcnum[0]')" # echov OCLC number: "$oclc" # wwdata="$(fetchr "http://www.worldcat.org/oclc/$oclc?page=endnote&client=worldcat.org-detailed_record")" # echov EndNote bibliographic data from WorldCat: "$wwdata" # authors="$(echo "$wwdata" | sed -n 's/^AU - //p' | sed 's/\(.*\),.*/\1/')" # fi # echov Authors from WorldCat: "$authors" # title="$(echo "$wdata" | jq -r .list[0].title)" # echov Title from WorldCat: "$title" # if [[ -n "$authors" && -n "$title" ]]; then # break # fi #done if [[ -z "$authors" || -z "$title" ]]; then for i in "${!isbn[@]}"; do echov Trying ISBN "${isbn[$i]}" with Google Books isbns="${isbn[*]}" allisbn="${isbns// /+}" printf 'allisbn=%s\n' "$allisbn" bdata="$(fetch "https://www.googleapis.com/books/v1/volumes?q=isbn+$allisbn&fields=items/volumeInfo(title,authors)&maxResults=1")" echov ISBN bibliographic data from Google Books: "$bdata" if [[ "$bdata" = "{}" ]]; then continue fi authors="$(echo "$bdata" | jq -r .items[0].volumeInfo.authors[] | sed 's/.* \([^ ]*\)/\1/')" echov Authors from Google Books: "$authors" title="$(echo "$bdata" | jq -r .items[0].volumeInfo.title)" echov Title from Google Books: "$title" if [[ -n "$authors" && -n "$title" ]]; then break fi done fi genurls "$1" "$data" } genurls() { data="$2" extn="$(echo "$data" | sed -n 's@.*Extension:\([^<]*\).*@\1@p')" id="$(echo "$data" | sed -n 's@.*ID:\([^<]*\).*@\1@p')" echov Library Genesis extension: "$extn" echov Library Genesis ID: "$id" annas="$(fetchr "https://annas-archive.org/md5/$1")" pinataurl="$(echo "$annas" | sed -n 's@.*href='"'"'\(https://gateway.pinata.cloud[^'"'"']*\)'"'"'.*@\1@p' | head -1)" echov 'IPFS URL 1 from annas-archive.org: ' "$pinataurl" # https://gateway.pinata.cloud/ipfs/$ipfs cfurl="$(echo "$annas" | sed -n 's@.*href='"'"'\(https://cloudflare-ipfs.com[^'"'"']*\)'"'"'.*@\1@p' | head -1)" echov 'IPFS URL 2 from annas-archive.org: ' "$cfurl" # https://cloudflare-ipfs.com/ipfs/$ipfs #sdurl="$(fetchr "https://annas-archive.org/slow_download/$1/0/1")" rocksurl="$(fetchr "https://libgen.rocks/ads.php?md5=$1" | sed -n 's@.*\(get\.php[^"]*\).*@https://libgen.rocks/\1@p')" echov 'URL from libgen.rocks:' "$rocksurl" lolurl="$(fetchr "http://library.lol/main/$1" | grep GET | sed -n 's/.*href="\([^"]*\)".*/\1/p')" echov 'URL from library.lol:' "$lolurl" liurl="http://libgen.li/$(fetchr "http://libgen.li/ads.php?md5=$1" | grep GET | sed -n 's/.*href="\([^"]*\)".*/\1/p')" echov 'URL from libgen.li:' "$liurl" urls+=("$cfurl") urls+=("$pinataurl") urls+=("$rocksurl") urls+=("$liurl") urls+=("$lolurl") } #urls+=("http://93.174.95.29/main/${id::-3}000/${1,,}/filename") #lgkey="$(fetchr "http://libgen.rs/ads.php?md5=$1" | tr -d \\r | sed -n 's/.*key=\([^"]*\)".*/\1/p')" #echov Library Genesis Key: "$lgkey" #lgurl="$(fetchr "http://libgen.rs/ads.php?md5=$1" | grep GET | sed -n 's/.*href="\([^"]*\)".*/\1/p')" #pwid="$(fetchr "https://libgen.pw/item/detail/id/$id" | sed -n 's@.*download/book/\([^"]*\)".*@\1@p')" #if [[ "$pwid" != "" ]]; then # echov PW id: "$pwid" # urls+=("https://libgen.pw/download/book/$pwid") # ${id::-3}000/ &hidden0=name.pdf&hidden1=${1,,} # urls+=("http://dl.lux.bookfi.net/genesis/${id::-3}000/${1,,}/_as/name.pdf") #else # echov PW id not found #fi #ambryid="$(fetchr "https://ambry.pw/item/detail/id/$id" | sed -n 's@.*download/book/\([^"]*\).*@\1@p')" #echov Ambry ID: "$ambryid" #urls+=("https://dnld.ambry.cx/download/book/$ambryid") #xyzida="$(fetchr "http://b-ok.cc/s/?q=$1" | sed -n 's@.* 0 )); do case "$1" in -d) shift if (( $# == 0 )); then fatal "Option -d requires an argument" fi if [[ -d "$1" ]]; then dirname="$1" else fatal "No such directory: $1" fi ;; -q) quiet=1 ;; -v) set -v verbose=1 ;; -i) interactive=1 ;; -u) shift if (( $# == 0 )); then fatal "Option -u requires an argument" fi echov "URL added: command line option -u: $1" urls+=("$1") ;; -f) fancy=1 ;; -n) dryrun=1 ;; -a) abstract=1 ;; -e) shift if (( $# == 0 )); then fatal "Option -e requires arguments" fi cmd="$1" args=() shift while (( $# > 0 )); do if [[ "$1" == "--" ]]; then break fi args+=("$1") shift done if (( $# == 0 )); then fatal "Unterminated -e option" fi ;; -p) shift if (( $# == 0 )); then fatal "Option -p requires an argument" fi amspairing="$1" ;; *) if (( $# == 1 )); then artid="$1" else fatal "Unrecognized option $1" fi ;; esac shift done if [[ ! -v artid ]]; then echoerr "No article id specified" syn fi echon "Scientific article full text downloader by Dmitri Pavlov, version $version." echon "To report bugs and missing features, please email me (host math.berkeley.edu, user pavlov). Please include the command line and the output of the script when run with the -v option in your email. Before submitting a bug report, please make sure that you can download the full text using your browser; the inability of the script to download the full text is often an indication that you don't have a subscription. " echov "Supported databases: MathSciNet, zbMATH, arXiv, DOI, Library Genesis. Email me if you want the script to support other databases. An incomplete list of supported repositories: ScienceDirect, SpringerLink, Taylor and Francis, Walter de Gruyter, World Scientific, SIAM, AMS, OUP, CUP, CMS, MSP, MUSE, TAC, JSTOR, Project Euclid, NUMDAM, CEDRAM, EuDML. Many additional repositories are supported by virtue of generic methods. Email me if you want the script to support other repositories. " echon Article ID: "$artid" case "${artid,,}" in */leavingmsn?url=*) artid="${artid##*/leavingmsn?url=}" ;; esac case "${artid,,}" in http*://*arxiv.org/abs/*) echov arXiv URL "$artid" arXiv "${artid##*abs/}" ;; http*://*arxiv.org/pdf/*) echov arXiv URL "$artid" trimurl="${artid##*pdf/}" arXiv "${trimurl%.pdf}" ;; *arxiv.org/abs/*) echov arXiv URL "$artid" arXiv "${artid##*abs/}" ;; *arxiv.org/pdf/*) echov arXiv URL "$artid" trimurl="${artid##*pdf/}" arXiv "${trimurl%.pdf}" ;; http://front.math.ucdavis.edu/*) echov Front for the arXiv URL "$artid" arXiv "${artid##*ucdavis.edu/}" ;; http*://*ams.org/mathscinet-getitem?mr=*) echov MathSciNet getitem URL "$artid" msnid="${artid##*mr=}" msn "${msnid##MR}" ;; http*://*ams.org/mathscinet/search/publdoc.html*mx-pid=*) echov MathSciNet search URL "$artid" msnid="${artid##*mx-pid=}" msn "${msnid%%&*}" ;; http*://*ams.org/mathscinet/search/*) echov MathSciNet generic search URL "$artid" data="$(fetch "$artid")" msnid="$(echo "$data" | grep -a mathscinet-getitem | sed 's/.*mathscinet-getitem?mr=\([^"]*\)".*/\1/')" msn "$msnid" ;; https://*zbmath.org/?q=an:*) echov zbMATH URL "$artid" zbl "${artid##*q=an:}" ;; https://zbmath.org/*) echov zbMATH URL "$artid" zbl "${artid##*zbmath.org/}" ;; http://*doi.org/* | https://*doi.org/*) echov DOI URL "$artid" doi "${artid##*doi.org/}" ;; http://*gen*md5=*) echov Library Genesis URL "$artid" genid="${artid##*md5=}" gen "${genid%%&*}" ;; http*://*citeseerx*/*doi=*) echov CiteSeerX URL "$artid" csid="${artid##*doi=}" cs "${csid%%&*}" ;; http://projecteuclid.org/euclid.*) echov Project Euclid URL "$artid" pe "${artid##http://projecteuclid.org/}" ;; http*://www.numdam.org/item/?id=*) echov Numdam URL "$artid" nid="${artid##http*://www.numdam.org/item/?id=}" numdam "${nid%%/}" ;; http*://www.numdam.org/item/*) echov Numdam URL "$artid" nid="${artid##http*://www.numdam.org/item/}" numdam "${nid%%/}" ;; http://mi.mathnet.ru/eng/*) echov Math-Net.Ru URL "$artid" mnid="${artid##*mathnet.ru/eng/}" mathnet "$mnid" ;; http://mi.mathnet.ru/rus/*) echov Math-Net.Ru URL "$artid" mnid="${artid##*mathnet.ru/rus/}" mathnet "$mnid" ;; #http://www.mathnet.ru/php/archive.phtml?wshow=paper&jrnid=ivm&paperid=4812&option_lang=eng http*://*mathnet.ru/*jrnid=*) echov Math-Net.Ru URL "$artid" mnid="$(echo "$artid" | sed 's@.*jrnid=\([^&]*\)&paperid=\([^&]*\).*@\1\2@')" mathnet "$mnid" ;; https://link.springer.com/article/*) echov Springer Link URL "$artid" slid="${artid##*springer.com/article/}" echov Springer DOI "$slid" doi "$slid" ;; http*://*) echon Unknown HTTP URL: "$artid" echon Attempting generic full-text URL extraction title=unknown-title authors=unknown-authors auti=1 fturl "$artid" ;; arxiv:*) arXiv "${artid:6}" ;; mr*) msn "${artid:2}" ;; zbl:*) zbl "${artid:4}" ;; doi:*) doi "${artid:4}" ;; gen:*) gen "${artid:4}" ;; cs:*) cs "${artid:3}" ;; pe:*) pe "${artid:3}" ;; eudml:*) eudml "$artid" ;; numdam:*) numdam "${artid:7}" ;; mathnet:*) mathnet "${artid:8}" ;; *) fatal "Unrecognized article ID: $artid" ;; esac echov Title: "$title" echov Authors: echov "$authors" stripp() { sed 's/\[[^]]*\]//g;s/\\[a-zA-Z]*//g;s|/|-|g' | if [[ -v fancy ]]; then sed 's/[[:space:]]\+/ /g;s/^ //;s/ $//' | tr \\n \\f | sed 's/\.$//g' | sed 's/\f$/. /;s/\f/, /g' else sed 's/.*/\L&/' | sed 's/'"'"'/\f/g;s/[[:punct:]]/ /g;s/\f/'"'"'/g;s/'"''"'//g;s/[[:space:]]\+/-/g;s/^-//;s/-$//' | tr \\n - fi } title="$(echo -n "$title" | stripp)" authors="$(echo "$authors" | stripp)" if [[ "$authors" == "-" ]]; then authors= fi name="$authors$title" echov Stripped title: "$title" echov Combined authors: "$authors" echov Local file name without extension: "$name" if [[ -v dirname ]]; then echov Directory: "$dirname" name="$dirname/$name" echon Directory and file name without extension: "$name" fi if [[ -v dryrun ]]; then echo "$name" printf '%s\n' "${urls[@]}" exit 0 fi tryft() { tname="$(mktemp /tmp/article-XXX)" echon Temporary file name: "$tname" if retft "$1" "$tname"; then if [ -s "$tname" ]; then echon Successfully downloaded "$1" fqname="$name.$dextn" echon Moving "$tname" to "$fqname" mv "$tname" "$fqname" if [[ -v cmd ]]; then echon Launching "$cmd" "${args[@]:+${args[@]}}" "$fqname" "$cmd" "${args[@]:+${args[@]}}" "$fqname" fi exit 0 else echon Downloaded an empty file, skipping. fi fi } if [ ${#urls[@]} -eq 0 ]; then echon No full text URLs found for "$artid" echon Email me if you can access the full text. exit 1 fi if [[ -v interactive ]]; then echo Full text URLs: for i in "${!urls[@]}"; do echo "$i) ${urls[$i]}" done if [[ "${#urls[@]}" == 1 ]]; then echon Automatically selecting the only URL tryft "${urls[0]}" exit 1 fi while true; do read -r if [ -z "$REPLY" ]; then echon Nothing selected exit 1 else echon Selected "$REPLY": "${urls["$REPLY"]}" tryft "${urls["$REPLY"]}" fi done else echon Full text URLs: for i in "${!urls[@]}"; do echon "$i) ${urls[$i]}" done declare -A tried for i in "${!urls[@]}"; do echov "i=$i" echov "urls[i]=${urls["$i"]}" if [[ ${tried["${urls["$i"]}"]+_} ]]; then echon Skipping the duplicate URL "$i": "${urls[$i]}" continue fi echon Attempting to download full text URL "$i": "${urls[$i]}" tried["${urls["$i"]}"]=1 tryft "${urls[$i]}" done echon No working full text URLs exit 1 fi