.*@\1@p')" ;;
http*://gdz.sub.uni-goettingen.de/*)
echov Publisher: GDZ "$url"
gdzppn="${url#*PPN=}"
case "$gdzppn" in
*DMDID=dmdlog*)
dmd="${gdzppn#*dmdlog}"
echov DMD: "$dmd"
gdzppn="${gdzppn%&*}"
echov Trimmed GDZ PPN: "$gdzppn"
pdf="https://gdz.sub.uni-goettingen.de/download/pdf/$gdzppn/LOG_$(printf %04d "$dmd").pdf" ;;
*)
echov GDZ PPN: "$gdzppn"
data="$(fetchr "$url")"
manifest="$(echo "$data" | sed -n 's@.*data-manifest-url="\([^"]*\)".*@\1@p')"
echov GDZ manifest URL: "$manifest"
mdata="$(fetchr "$manifest")"
#echov Manifest data:
#echov "$mdata"
pdf="$(echo "$mdata"| jq -r '.structures[] | select(.metadata[].value == "http://resolver.sub.uni-goettingen.de/purl?'"$gdzppn"'") | .rendering[]."@id"')" ;;
esac ;;
http*://*tac.mta.ca*)
echov Publisher: TAC
pdf="${url//abs.html/.pdf}" ;;
http*://www.pnas.org/cgi/doi/*)
echov Publisher: PNAS
pdf="$crurl" ;;
http*://tcms.org.ge/*)
echov Publisher: TCMS
volume="$(echo "$data" | sed -n 's@.*Vol. \([^(]*\)(.*@\1@p')"
echov Volume: "$volume"
trim="${url%/abstract.htm}"
echov Trimmed URL: "$trim"
stem="${trim##*/}"
echov URL stem: "$stem"
pdf="${trim//volumes/xvolumes}/v${volume}${stem}hl.pdf" ;;
http*://*mathematik.uni-bielefeld.de/documenta/*)
echov Publisher: Documenta Mathematica
pdf="${url//html/pdf}" ;;
http*://*iumj.indiana.edu/*)
echov Publisher: Indiana University Mathematical Journal
pdf=http://www.iumj.indiana.edu/"$(echo "$data" | LC_ALL=C sed -n 's@.*"\(/IUMJ/FTDLOAD/[^"]*/pdf\)".*@\1@p')" ;;
http*://d-nb.info/*)
echov Publisher: DNB
pdf="$url" ;;
*)
echov Unknown URL "$url"
echov If the script is unable to download the full text, please email me so that I can add support for this type of URL. ;;
esac
case "$crurl" in
http*://link.springer.com/book/*)
echon 'Publisher: Springer (book)'
pdf="${crurl/book/content/pdf}.pdf"
#https://link.springer.com/content/pdf/10.1007%2F978-3-319-09354-3.pdf
#https://link.springer.com/book/10.1007%2F978-3-319-09354-3
#echon Springer books are typically split into many individual files, which does not fit the operational model of this script. Aborting.
esac
if [[ -v pdf && "$pdf" != "" ]]; then
echon URL added: publisher: "$pdf"
urls+=("$pdf")
fi
url="$1"
case "$url" in
http*://*doi.org/*)
doi="${url##*doi.org/}"
mapfile -t arxivurls < <(fetch https://export.arxiv.org/api/query --data-urlencode "search_query=doi:\"$doi\"" | xidel - -s -e "//feed/entry/link[@title='pdf']/@href" | sed '/^$/d')
if [[ -v arxivurls ]]; then
echon "URL added: arXiv URLs obtained using DOI:"
echon "${arxivurls[@]}"
urls+=("${arxivurls[@]}")
fi
set +e
aaurl="$(fetchr https://annas-archive.org/scidb/"$doi"/ | rg -o 'href="([^"]*)">Download[^a-zA-Z]' -r '$1')"
set -e
if [[ -v aaurl && "$aaurl" != "" ]]; then
echov "Anna's Archive URL: $aaurl"
urls+=("$aaurl")
fi
set +e
lgplusurl="$(fetchr -G http://$lgplusdomain/scimag/ads.php --data-urlencode "doi=$doi" | grep GET | sed -n 's@.*href="\([^"]*\)".*@\1@p')"
set -e
if [[ -v lgplusurl && "$lgplusurl" != "" ]]; then
echov "$lgplusdomain URL: $lgplusurl"
#urls+=("http://$lgplusdomain/$lgplusurl")
#lgplusurl="${lgplusurl/\\get.php/\/get.php}"
lgplusurl="http://$lgplusdomain/$lgplusurl"
urls+=("$lgplusurl")
#extn=pdf # $lgplusdomain returns application/octet-stream
fi
;;
esac
#http://libgen.gs/index.php?req=10.1017/CBO9781107325609
#set +e
#gsburl="$(fetchr -G http://libgen.gs/index.php --data-urlencode "req=$doi" | grep GET | sed -n 's@.*href="\([^"]*\)".*@\1@p')"
#set -e
#if [[ -v gsburl && "$gsburl" != "" ]]; then
# echov "LibGen.GS URL: $gsburl"
# #urls+=("http://libgen.gs/$gsburl")
# #gsburl="${gsburl/\\get.php/\/get.php}"
# gsburl="http://libgen.gs/$gsburl"
# urls+=("$gsburl")
# #extn=pdf # libgen.gs returns application/octet-stream
#fi
#set +e
#scurl="$(fetchr -G https://booksc.org/s/ --data-urlencode "q=$doi" | grep dlButton | sed -n 's@.*href="\([^"]*\)".*@\1@p')"
#set -e
#if [[ -v scurl && "$scurl" != "" ]]; then
# echov "BookSC.org URL: $scurl"
# urls+=("https://booksc.org$scurl")
#fi
#set +e
#bookscurl="$(fetchr -G https://booksc.xyz/s/ --data-urlencode "q=$doi" | sed '/fit your search query exactly but very close/,$d' | grep 'itemprop="name"' | head -1 | sed -n 's@.*href="\([^"]*\)".*@\1@p')"
#set -e
#if [[ -v bookscurl && "$bookscurl" != "" ]]; then
# echov "BookSC URL: $bookscurl"
# bsc="$(fetchr "https://booksc.xyz$bookscurl" | sed -n 's@.*href="\(/dl/[^"]*\)".*@\1@p' | head -1)"
# urls+=("https://booksc.xyz$bsc")
# extn="pdf"
# echon "URL added: Library Genesis BookSC DOI URL: $bsc with extension $extn"
#else
# echon BookSC search unsuccessful.
#fi
echon Attempting to extract raw URLs from the abstract page "$crurl":
mapfile -t newurls < <(echo "$data" | xidel - -s -e "(//@href, //@src)/resolve-uri(.,\"$crurl\")" | sed 's/#.*//' | grep -a pdf | grep -av "degruyter.com/flyer/\|degruyter.com/.*.toc.xml\|degruyter.com/.*.fm.xml\|ams.org/publications/\|ams.org/firefox\|endmatter\|msp.org/forms\|math.ca/Membership\|math.ca/Docs\|math.ca/.*/abstract/\|pdf-preview\|/marketing/\|\.gif$")
for i in "${!newurls[@]}"; do
if [[ ! ${xtried["${newurls["$i"]}"]+_} ]]; then
vnewurls+=("${newurls[$i]}")
xtried["${newurls["$i"]}"]=1
echon 'Added a last resort URL extracted from the abstract page (only used in the interactive mode):'
echon "${newurls["$i"]}"
fi
done
if [[ -v vnewurls ]]; then
if [[ -v interactive ]]; then
urls+=("${vnewurls[@]}")
fi
echon Warning: some publishers link irrelevant PDFs from the abstract page, e.g., license agreements, abstracts, etc.
echon If the script ends up downloading such an irrelevant PDF, please email me so that I can add it to the list of exceptions.
fi
}
arXiv() {
echon Database: arXiv "$1"
data="$(fetch https://export.arxiv.org/api/query?id_list="$1" | tr \\n \ | $SED 's@<[^/][^>]*/>@\n&\n@g;s@<[^/][^>]*[^/>]>@\n&@g;s@[^>]*>@&\n@g')"
echov Processed output:
echov "$data"
arxiverr="$(echo "$data" | sed -n '\@^https://arxiv.org/api/errors.*$@{p;q1}')"
if [[ -n "$arxiverr" ]]; then echon "$arxiverr"; fi
id="$(echo "$data" | sed -n 's@^https\?://arxiv.org/abs/\(.*\)$@\1@p')"
echov arXiv ID: "$id"
title="$(echo "$data" | sed -n 's@^\(.*\)$@\1@p')"
authors="$(echo "$data" | sed -n 's@^\(.*\)$@\1@p' | sed 's/.* \([^ ]*\)/\1/')"
urls+=("https://arxiv.org/pdf/$id.pdf")
}
msn() {
echon Database: MathSciNet "$1"
data="$(fetch https://mathscinet.ams.org/mathscinet/search/publications.html?fmt=endnote\&pg1=MR\&s1=MR"$1" | sed -n '1,/.*/d;/.*<\/pre>/,$d;p')"
echov EndNote:
echov "$data"
#data="$(echo "$data" | tr \\n \\t | sed 's/\t //g' | tr \\t \\n)"
title="$(echo "$data" | tr \\n @ | sed -n 's/.*@%T \([^%]*\)@ *%.*/\1/p' | tr @ \ | texnorm)"
echov Title: "$title"
authors="$(echo "$data" | sed -n 's/^%A //p' | sed 's/\([^,]*\),.*/\1/' | texnorm)"
echov Authors: "$authors"
local url
url="$(echo "$data" | sed -n 's/^%U //p')"
if [ -z "$url" ]; then
echov No URL found in EndNote data, attempting to extract a URL from the HTML file
hdata="$(fetch https://mathscinet.ams.org/mathscinet-getitem?mr=MR"$1" | sed -n "1,/.*MR0*$1<\/strong>.*/d;/.*Make Link.*/,\$d;p")"
echov Processed output:
echov "$hdata"
#authors="$(echo "$hdata" | sed 's/@\n@g' | sed -n 's@\([^<]*\)@\1@p' | sed 's/\([^,]*\),.*/\1/')"
#title="$(echo "$hdata" | tr \\n \ | sed -n 's@.*\([^<]*\).*@\1@p')"
url="$(echo "$hdata" | sed -n 's@.*\(Article\|Chapter\|Book\).*@\1@p')"
if [ -z "$url" ]; then
case "$data" in
*"%@ 1201-561X"*)
echov Journal: Theory and Applications of Categories
volume="$(echo "$data" | sed -n 's/^%V //p')"
number="$(echo "$data" | sed -n 's/^%P [^0-9]*\([0-9]*\),.*/\1/p')"
echov Volume "$volume", number "$number"
stem="$number.pdf"
if (( volume < 10 )); then
stem="n$stem"
if (( volume == 1 )); then
stem="v1$stem"
fi
stem="n$number/$stem"
else
if (( number < 10 )); then
stem="0$stem"
fi
stem="$number/$volume-$stem"
fi
if (( volume < 6 )); then
((volume+=1994))
fi
stem="$volume/$stem"
echov Stem "$stem"
urls+=("http://tac.mta.ca/tac/volumes/$stem")
return ;;
*"%@ 1431-0635"*)
echov Journal: Documenta Mathematica
volume="$(echo "$data" | sed -n 's/^%V //p')"
pages="$(echo "$data" | sed -n 's/^%P //p')"
echov "Volume $volume, $pages"
url="http://mathematik.uni-bielefeld.de/documenta/vol-$volume/"$(fetch "http://mathematik.uni-bielefeld.de/documenta/vol-$volume/vol-$volume.html" | tr \\n @ | sed -n "s|.* $pages@"'[^@]*Abstract.*|\1|p') ;;
*)
echon 'No full text URL supplied by MathSciNet. Try zbMATH, sometimes it gives a full text URL when MathSciNet does not.'
return ;;
esac
fi
fi
fturl "$url"
}
zbl() {
echon Database: zbMATH "$1"
data="$(fetchz https://zbmath.org/?q=an:"$1")"
#authors="$(echo "$data" | sed 's/@\n@g' | sed -n 's@\([^<]*\)@\1@p' | sed 's/\([^,]*\),.*/\1/')"
#title="$(echo "$data" | sed -n 's@.*\([^<]*\)<.*@\1@p')"
#echo "$data" >/tmp/zb
url="$(echo "$data" | sed -n 's@.*
/tmp/zb2
#grep -aEo ' {([^{}]|({[^}]*}))*} ' |
authors="$(echo "$data" | sed -n 's@^ *Author = {\(.*\)},$@ \1 @p' | sed 's/^.//;s/.$//' | texsimp)"
echov BibTeX authors: "$authors"
title="$(echo "$data" | sed -n 's@^ *Title = {\(.*\)},$@\1@p' | texnorm)"
echov BibTeX title: "$title"
if [ -z "$url" ]; then
echon 'No full text URL supplied by zbMATH. Try MathSciNet, sometimes it gives a full text URL when zbMATH does not.'
return
fi
while read -r iurl; do
echon Trying zbMATH abstract page URL "$iurl"
fturl "$iurl"
done <<< "$url"
}
urldecode() {
a="$(cat)"
printf '%b' "${a//%/\\x}"
}
doi() {
echon Database: CrossRef "$1"
##data="$(fetch -H "Accept: application/vnd.citationstyles.csl+json" "https://data.crossref.org/$1")"
#data="$(fetch "https://doi.crossref.org/servlet/query?pid=some.address@mailinator.com&format=json&id=$1")"
data="$(fetch "https://api.crossref.org/works/$1")"
echov CrossRef data: "$data"
title="$(echo "$data" | jq -r .message.title[] | tr \\n \ )"
#title="$(echo "$data" | jq -r .created.title | tr \\n \ )" # for servlet
echov CrossRef raw title: "$title"
title="$(echo "$title" | sed 's@<[^>]*>@@g')"
echov CrossRef processed title with stripped tags: "$title"
set +e
authors="$(echo "$data" | jq -r .message.author[].family)"
#authors="$(echo "$data" | jq -r .author[].family)" # for servlet
set -e
echov CrossRef authors: "$authors"
if [[ ! -v stoprecursion ]]; then
stoprecursion=1
fturl "https://doi.org/$1"
unset stoprecursion
fi
}
cs() {
echon Database: CiteSeerX "$1"
data="$(fetch "http://citeseerx.ist.psu.edu/oai2?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:CiteSeerX.psu:$1")"
title="$(echo "$data" | sed -n 's@.*\(.*\).*@\1@p')"
authors="$(echo "$data" | sed -n 's@.*\(.*\).*@\1@gp' | sed 's/.* \([^ ]*\)/\1/')"
urls+=("http://citeseerx.ist.psu.edu/viewdoc/download?doi=$1&rep=rep1&type=pdf")
}
pe() {
echon Database: Project Euclid "$1"
eucit="$(fetch https://projecteuclid.org/citation/download -H 'Content-Type: application/json; charset=utf-8' --data '{"contentType":"0","formatType":"0","referenceType":"","urlid":"'"$1"'"}')"
echov Project Euclid internal citation id: "$eucit"
data="$(fetch "https://projecteuclid.org/citation/download/$eucit")"
#data="$(fetch https://projecteuclid.org/export_citations --data format=ris --data-urlencode "h=$1")"
echov Project Euclid bibliographic data:
echov "$data"
title="$(echo "$data" | sed -n 's/^TI - //p')"
authors="$(echo "$data" | sed -n 's/^AU - //p' | sed 's/.* \([^ ]*\)/\1/')"
urls+=("https://projecteuclid.org/download/pdf_1/$1")
}
eudml() {
echon Database: EuDML "$1"
data="$(fetch -k "https://eudml.org/api/rest/urn:$1?format=oai_dc")"
echov EuDML data:
echov "$data"
title="$(echo "$data" | sed -n 's|.*>\([^>]*\).*|\1|p' | head -1)"
authors="$(echo "$data" | sed -n 's|.*>\([^>]*\).*|\1|p')"
if echo "$authors" | grep -ac , >/dev/null; then
authors="$(echo "$authors" | sed 's/\([^ ,]*\).*/\1/')"
else
authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')"
fi
fturl "https://eudml.org/doc/${1##eudml:doc:}"
}
numdam() {
echon Database: Numdam "$1"
data="$(fetch "https://www.numdam.org/oai/" --data-urlencode "verb=GetRecord" --data-urlencode "metadataPrefix=oai_dc" --data-urlencode "identifier=oai:numdam.org:$1")"
echov Numdam data:
echov "$data"
title="$(echo "$data" | sed -n 's|.*>\([^>]*\).*|\1|p' | head -1)"
authors="$(echo "$data" | sed -n 's|.*>\([^>]*\).*|\1|p')"
if echo "$authors" | grep -ac , >/dev/null; then
authors="$(echo "$authors" | sed 's/\([^ ,]*\).*/\1/')"
else
authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')"
fi
fturl "https://www.numdam.org/item/$1"
}
mathnet() {
echon Database: Math-Net.Ru "$1"
data="$(fetchr "http://mi.mathnet.ru/eng/$1")"
echov Math-Net.Ru data:
echov "$data"
#title="$(echo "$data" | sed -n 's@.*\([^<]*\).*@\1@p' | python3 -c 'import html,sys; print(html.unescape(sys.stdin.read()), end="")')"
title="$(echo "$data" | sed -n 's@.*\([^<]*\).*@\1@p' | sed 's/&[^;]*;//g')"
authors="$(echo "$data" | grep personid= | sed -n 's@.*]*>\([^<]*\).*@\1@;s/ / /gp')"
if echo "$authors" | grep -ac , >/dev/null; then
authors="$(echo "$authors" | sed 's/\([^ ,]*\).*/\1/')"
else
authors="$(echo "$authors" | sed 's/.* \([^ ]*\)/\1/')"
fi
fulltext="$(echo "$data" | grep getFT | sed -n 's@.*href=\([^>]*\)>.*@\1@p')"
fturl "http://mathnet.ru$fulltext"
}
isbn() {
echon "Database: ISBN (via Library Genesis) $1"
data="$(fetchr "https://$lgplusdomain/search.php?req=$1&column=identifier")"
isbn=("$1")
}
gen() {
echon Database: Library Genesis "$1"
#data="$(fetchr "https://$lgplusdomain/book/index.php?md5=$1")"
data="$(fetchr "https://$lgplusdomain/ads$1")"
#isbn=($(echo "$data" | sed -n 's@.*ISBN:\([^<]*\) | .*@\1@p' | tr -cs 0-9- \ ))
#mapfile -t isbn < <(echo "$data" | sed -n 's@.*ISBN:
\([^<]*\) | .*@\1@p' | tr -cs 0-9- \ | sed 's/^ *//;s/ *$//')
mapfile -t isbn < <(echo "$data" | sed -n 's@.*ISBN: \([^<]*\)
.*@\1@p' | tr -cs 0-9- \ | sed 's/^ *//;s/ *$//')
if [[ ${#isbn[@]} -ne 0 ]]; then
echov ISBNs from Library Genesis: "${isbn[@]}"
fi
#title="$(echo "$data" | sed -n 's@.*Title:
]*>\([^<]*\).*@\1@p')"
#authors="$(echo "$data" | sed -n 's@.*Author(s): | \([^<]*\).*@\1@p' | sed 's/(auth.)//g' | sed 's/, /\n/g' | sed 's/^ *//;s/ *$//' | sed -n 's/.* \([^ ]*\)/\1/p')"
title="$(echo "$data" | sed -n 's@.*Title: \([^<]*\) .*@\1@p')"
authors="$(echo "$data" | sed -n 's@.*Author(s): \([^<]*\) .*@\1@p' | sed 's/(auth.)//g' | sed 's/, /\n/g' | sed 's/^ *//;s/ *$//' | sed -n 's/.* \([^ ]*\)/\1/p')"
#for i in "${!isbn[@]}"; do
# echov Trying ISBN "${isbn[$i]}" with WorldCat
# set +e
# wdata="$(fetch "http://xisbn.worldcat.org/webservices/xid/isbn/${isbn[$i]}?method=getMetadata&format=json&fl=*")"
# set -e
# echov ISBN bibliographic data from WorldCat: "$wdata"
# if [[ "ok" != "$(echo "$wdata" | jq -r .stat)" ]]; then
# continue
# fi
# if ! authors="$(echo "$wdata" | jq -e -r '.list[0].author' | sed 's/\.$//;s/ ; /\n/g;s/ and /\n/g;s/, /\n/g' | sed 's/.* \([^ ]*\)/\1/')"; then
# oclc="$(echo "$wdata" | jq -e -r '.list[0].oclcnum[0]')"
# echov OCLC number: "$oclc"
# wwdata="$(fetchr "http://www.worldcat.org/oclc/$oclc?page=endnote&client=worldcat.org-detailed_record")"
# echov EndNote bibliographic data from WorldCat: "$wwdata"
# authors="$(echo "$wwdata" | sed -n 's/^AU - //p' | sed 's/\(.*\),.*/\1/')"
# fi
# echov Authors from WorldCat: "$authors"
# title="$(echo "$wdata" | jq -r .list[0].title)"
# echov Title from WorldCat: "$title"
# if [[ -n "$authors" && -n "$title" ]]; then
# break
# fi
#done
if [[ -z "$authors" || -z "$title" ]]; then
for i in "${!isbn[@]}"; do
echov Trying ISBN "${isbn[$i]}" with Google Books
isbns="${isbn[*]}"
allisbn="${isbns// /+}"
printf 'allisbn=%s\n' "$allisbn"
bdata="$(fetch "https://www.googleapis.com/books/v1/volumes?q=isbn+$allisbn&fields=items/volumeInfo(title,authors)&maxResults=1")"
echov ISBN bibliographic data from Google Books: "$bdata"
if [[ "$bdata" = "{}" ]]; then
continue
fi
authors="$(echo "$bdata" | jq -r .items[0].volumeInfo.authors[] | sed 's/.* \([^ ]*\)/\1/')"
echov Authors from Google Books: "$authors"
title="$(echo "$bdata" | jq -r .items[0].volumeInfo.title)"
echov Title from Google Books: "$title"
if [[ -n "$authors" && -n "$title" ]]; then
break
fi
done
fi
genurls "$1" "$data"
}
genurls() {
data="$2"
#extn="$(echo "$data" | sed -n 's@.*Extension: \([^<]*\) | .*@\1@p')"
#id="$(echo "$data" | sed -n 's@.*ID:
\([^<]*\) | .*@\1@p')"
#echov Library Genesis extension: "$extn"
#echov Library Genesis ID: "$id"
annas="$(fetchr "https://annas-archive.org/md5/$1")"
#pinataurl="$(echo "$annas" | sed -n 's@.*href='"'"'\(https://gateway.pinata.cloud[^'"'"']*\)'"'"'.*@\1@p' | head -1)"
#echov 'IPFS URL 1 from annas-archive.org: ' "$pinataurl" # https://gateway.pinata.cloud/ipfs/$ipfs
#cfurl="$(echo "$annas" | sed -n 's@.*href='"'"'\(https://cloudflare-ipfs.com[^'"'"']*\)'"'"'.*@\1@p' | head -1)"
#echov 'IPFS URL 2 from annas-archive.org: ' "$cfurl" # https://cloudflare-ipfs.com/ipfs/$ipfs
#msurl="$(fetchr "https://books.ms/main/$1" | grep GET | sed -n 's/.*href="\([^"]*\)".*/\1/p')"
#echov 'URL from books.ms:' "$msurl"
liurl="https://$lgplusdomain/$(fetchr "https://$lgplusdomain/ads.php?md5=$1" | grep GET | sed -n 's/.*href="\([^"]*\)".*/\1/p')"
echov 'URL from $lgplusdomain:' "$liurl"
#for i in "$cfurl" "$pinataurl" "$msurl" "$liurl"; do
for i in "$liurl"; do
if [ -z "$i" ]; then
echov Skipping an empty URL
else
urls+=("$i")
fi
done
}
#sdurl="$(fetchr "https://annas-archive.org/slow_download/$1/0/1")"
urls=()
while (( $# > 0 )); do
case "$1" in
-d)
shift
if (( $# == 0 )); then
fatal "Option -d requires an argument"
fi
if [[ -d "$1" ]]; then
dirname="$1"
else
fatal "No such directory: $1"
fi ;;
-q)
quiet=1 ;;
-v)
set -v
verbose=1 ;;
-i)
interactive=1 ;;
-u)
shift
if (( $# == 0 )); then
fatal "Option -u requires an argument"
fi
echov "URL added: command line option -u: $1"
urls+=("$1") ;;
-f)
fancy=1 ;;
-n)
dryrun=1 ;;
-a)
abstract=1 ;;
-e)
shift
if (( $# == 0 )); then
fatal "Option -e requires arguments"
fi
cmd="$1"
args=()
shift
while (( $# > 0 )); do
if [[ "$1" == "--" ]]; then
break
fi
args+=("$1")
shift
done
if (( $# == 0 )); then
fatal "Unterminated -e option"
fi ;;
-p)
shift
if (( $# == 0 )); then
fatal "Option -p requires an argument"
fi
amspairing="$1" ;;
*)
if (( $# == 1 )); then
artid="$1"
else
fatal "Unrecognized option $1"
fi ;;
esac
shift
done
if [[ ! -v artid ]]; then
echoerr "No article id specified"
syn
fi
echon "Scientific article full text downloader by Dmitri Pavlov, version $version."
echon "To report bugs and missing features, please email me (host math.berkeley.edu, user pavlov).
Please include the command line and the output of the script when run with the -v option in your email.
Before submitting a bug report, please make sure that you can download the full text using your browser;
the inability of the script to download the full text is often an indication that you don't have a subscription.
"
echov "Supported databases: MathSciNet, zbMATH, arXiv, DOI, Library Genesis.
Email me if you want the script to support other databases.
An incomplete list of supported repositories: ScienceDirect, SpringerLink,
Taylor and Francis, Walter de Gruyter, World Scientific, SIAM, AMS, OUP,
CUP, CMS, MSP, MUSE, TAC, JSTOR, Project Euclid, NUMDAM, CEDRAM, EuDML.
Many additional repositories are supported by virtue of generic methods.
Email me if you want the script to support other repositories.
"
echon Article ID: "$artid"
case "${artid,,}" in
*/leavingmsn?url=*)
artid="${artid##*/leavingmsn?url=}" ;;
esac
case "${artid,,}" in
http*://*arxiv.org/abs/*)
echov arXiv URL "$artid"
arXiv "${artid##*abs/}" ;;
http*://*arxiv.org/pdf/*)
echov arXiv URL "$artid"
trimurl="${artid##*pdf/}"
arXiv "${trimurl%.pdf}" ;;
*arxiv.org/abs/*)
echov arXiv URL "$artid"
arXiv "${artid##*abs/}" ;;
*arxiv.org/pdf/*)
echov arXiv URL "$artid"
trimurl="${artid##*pdf/}"
arXiv "${trimurl%.pdf}" ;;
http*://front.math.ucdavis.edu/*)
echov Front for the arXiv URL "$artid"
arXiv "${artid##*ucdavis.edu/}" ;;
http*://*ams.org/mathscinet-getitem?mr=*)
echov MathSciNet getitem URL "$artid"
msnid="${artid##*mr=}"
msn "${msnid##MR}" ;;
http*://*ams.org/mathscinet/search/publdoc.html*mx-pid=*)
echov MathSciNet search URL "$artid"
msnid="${artid##*mx-pid=}"
msn "${msnid%%&*}" ;;
http*://*ams.org/mathscinet/search/*)
echov MathSciNet generic search URL "$artid"
data="$(fetch "$artid")"
msnid="$(echo "$data" | grep -a mathscinet-getitem | sed 's/.*mathscinet-getitem?mr=\([^"]*\)".*/\1/')"
msn "$msnid" ;;
http*://*zbmath.org/?q=an:*)
echov zbMATH URL "$artid"
zbl "${artid##*q=an:}" ;;
http*://zbmath.org/*)
echov zbMATH URL "$artid"
zbl "${artid##*zbmath.org/}" ;;
http*://*doi.org/*)
echov DOI URL "$artid"
doi "${artid##*doi.org/}" ;;
http*://hdl.handle.net/*)
echov Handle URL "$artid"
doi "${artid##*handle.net/}" ;;
http*://*libgen*md5=*)
echov Library Genesis URL "$artid"
genid="${artid##*md5=}"
gen "${genid%%&*}" ;;
http*://*citeseerx*/*doi=*)
echov CiteSeerX URL "$artid"
csid="${artid##*doi=}"
cs "${csid%%&*}" ;;
http*://*jstor.org/stable/*)
echov "JSTOR URL $artid"
jstorid="${artid##*/stable/}"
echov "JSTOR id: $jstorid"
doi "10.2307/$jstorid" ;;
http*://projecteuclid.org/euclid.*)
echov Project Euclid URL "$artid"
pe "${artid##*projecteuclid.org/}" ;;
http*://eudml.org/doc/*)
echov EuDML URL "$artid"
eudml "${artid##*eudml.org/doc/}" ;;
http*://*numdam.org/item/?id=*)
echov Numdam URL "$artid"
nid="${artid##*numdam.org/item/?id=}"
numdam "${nid%%/}" ;;
http*://*numdam.org/item/*)
echov Numdam URL "$artid"
nid="${artid##*numdam.org/item/}"
numdam "${nid%%/}" ;;
http*://mi.mathnet.ru/eng/*)
echov Math-Net.Ru URL "$artid"
mnid="${artid##*mathnet.ru/eng/}"
mathnet "$mnid" ;;
http*://mi.mathnet.ru/rus/*)
echov Math-Net.Ru URL "$artid"
mnid="${artid##*mathnet.ru/rus/}"
mathnet "$mnid" ;;
#http://www.mathnet.ru/php/archive.phtml?wshow=paper&jrnid=ivm&paperid=4812&option_lang=eng
http*://*mathnet.ru/*jrnid=*)
echov Math-Net.Ru URL "$artid"
mnid="$(echo "$artid" | sed 's@.*jrnid=\([^&]*\)&paperid=\([^&]*\).*@\1\2@')"
mathnet "$mnid" ;;
https://link.springer.com/article/*)
echov Springer Link URL "$artid"
slid="${artid##*springer.com/article/}"
echov Springer DOI "$slid"
doi "$slid" ;;
http*://annas-archive.org/md5/*)
echov "Anna's Archive URL $artid"
aaid="${artid##*annas-archive.org/md5/}"
echov "Anna's Archive MD5: $aaid"
gen "$aaid" ;;
http*://*)
echon Unknown HTTP URL: "$artid"
echon Attempting generic full-text URL extraction
title=unknown-title
authors=unknown-authors
auti=1
fturl "$artid" ;;
arxiv:*)
arXiv "${artid:6}" ;;
mr*)
msn "${artid:2}" ;;
zbl:*)
zbl "${artid:4}" ;;
doi:*)
doi "${artid:4}" ;;
gen:*)
gen "${artid:4}" ;;
cs:*)
cs "${artid:3}" ;;
pe:*)
pe "${artid:3}" ;;
eudml:*)
eudml "$artid" ;;
numdam:*)
numdam "${artid:7}" ;;
mathnet:*)
mathnet "${artid:8}" ;;
*)
fatal "Unrecognized article ID: $artid" ;;
esac
echov Title: "$title"
echov Authors:
echov "$authors"
stripp() {
sed 's/\[[^]]*\]//g;s/\\[a-zA-Z]*//g;s|/|-|g' | if [[ -v fancy ]]; then
sed 's/[[:space:]]\+/ /g;s/^ //;s/ $//' | tr \\n \\f | sed 's/\.$//g' | sed 's/\f$/. /;s/\f/, /g'
else
sed 's/.*/\L&/' | sed 's/'"'"'/\f/g;s/[[:punct:]]/ /g;s/\f/'"'"'/g;s/'"''"'//g;s/[[:space:]]\+/-/g;s/^-//;s/-$//' | tr \\n -
fi
}
title="$(echo -n "$title" | stripp)"
authors="$(echo "$authors" | stripp)"
if [[ "$authors" == "-" ]]; then
authors=
fi
name="$authors$title"
echov Stripped title: "$title"
echov Combined authors: "$authors"
echov Local file name without extension: "$name"
if [[ -v dirname ]]; then
echov Directory: "$dirname"
name="$dirname/$name"
echon Directory and file name without extension: "$name"
fi
if [[ -v dryrun ]]; then
echo "$name"
printf '%s\n' "${urls[@]}"
exit 0
fi
tryft() {
tname="$(mktemp /tmp/article-XXX)"
echon Temporary file name: "$tname"
if retft "$1" "$tname"; then
if [ -s "$tname" ]; then
echon Successfully downloaded "$1"
if [[ ! -v dextn || -z "$dextn" ]]; then
echon "Last resort attempt to determine file extension using file type"
fitype="$(file -b --mime-type "$tname")"
echon "File type: $fitype"
case "$fitype" in
application/pdf) echon PDF; vextn="pdf" ;;
application/postscript) echon PostScript; vextn="ps" ;;
image/vnd.djvu) echon DjVu; vextn="djvu" ;;
application/x-dvi) echon DVI; vextn="dvi" ;;
application/x-tar) echon DVI in a TAR file; vextn="tar" ;;
application/octet-stream)
echon "No extension supplied for application/octet-stream. Report this error to me by email."
return 1 ;;
*)
echon "Unrecognized file type: not PDF, PostScript, DjVu, or DVI, the downloaded file will have no extension."
return 1 ;;
esac
echon Extension: "$vextn"
fqname="$name.$vextn"
else
fqname="$name.$dextn"
fi
echon Moving "$tname" to "$fqname"
mv "$tname" "$fqname"
if [[ -v cmd ]]; then
echon Launching "$cmd" "${args[@]:+${args[@]}}" "$fqname"
"$cmd" "${args[@]:+${args[@]}}" "$fqname"
fi
exit 0
else
echon Downloaded an empty file, skipping.
fi
fi
}
if [ ${#urls[@]} -eq 0 ]; then
echon No full text URLs found for "$artid"
echon Email me if you can access the full text.
exit 1
fi
if [[ -v interactive ]]; then
echo Full text URLs:
for i in "${!urls[@]}"; do
echo "$i) ${urls[$i]}"
done
if [[ "${#urls[@]}" == 1 ]]; then
echon Automatically selecting the only URL
tryft "${urls[0]}"
exit 1
fi
while true; do
read -r
if [ -z "$REPLY" ]; then
echon Nothing selected
exit 1
else
echon Selected "$REPLY": "${urls["$REPLY"]}"
tryft "${urls["$REPLY"]}"
fi
done
else
echon Full text URLs:
for i in "${!urls[@]}"; do
echon "$i) ${urls[$i]}"
done
declare -A tried
for i in "${!urls[@]}"; do
echov "i=$i"
echov "urls[i]=${urls["$i"]}"
if [[ ${tried["${urls["$i"]}"]+_} ]]; then
echon Skipping the duplicate URL "$i": "${urls[$i]}"
continue
fi
echon Attempting to download full text URL "$i": "${urls[$i]}"
tried["${urls["$i"]}"]=1
tryft "${urls[$i]}"
done
echon No working full text URLs
exit 1
fi