#!/bin/bash # mathid: a bash script to identify and possibly download a paper indexed by # arXiv, Mathematical Reviews, Zentralblatt MATH, CrossRef DOI, Library Genesis, or Google Scholar # using its title. # Copyright 2015, 2016 Dmitri Pavlov. Distributed under the terms of the GNU Affero General Public License version 3. version=2023-11-21 databases= dbnames=("CrossRef / DOI" "EuDML" "MathSciNet" "zbMATH" "arXiv" "Library Genesis" "ProjectEuclid") # "CiteSeerX" "ISBN" "Google Scholar" for (( i=0; i < ${#dbnames[@]}; i++ )); do if (( i>0 )); then dbd+=", "; fi databases+="$(echo "${dbnames[i]}" | sed 's/.* //;s/\(.\).*/\1/' | tr A-Z a-z)" dbd+="${dbnames[i]} (${databases:$i:1})" done set -u -o pipefail echoerr() { >&2 echo "$@" } fatal() { echoerr "$@" exit 1 } echon() { echoerr "$@" } echov() { if [[ -v verbose ]]; then echoerr "$@"; fi } syn() { fatal "Synopsis: $0 [ options ] title -v: be verbose -b: format entries for TeX bibliography -d [$databases]: only query the specified databases: $dbd -a author: author name -p pairing: use an AMS pairing key to access MathSciNet -e command arguments ---: execute a command after a successful choice was made -x command arguments ---: execute a command after all ids were collected" } while (( $# > 0 )); do curopt="$1" case "$1" in -v) set -v verbose=1 ;; -b) biblio=1 ;; -p) shift if (( $# == 0 )); then fatal "Option -p requires an argument" fi amspairing="$1" ;; -a) shift if (( $# == 0 )); then fatal "Option -a requires an argument" fi author="$1" ;; -d) shift if (( $# == 0 )); then fatal "Option -d requires an argument" fi if [[ "$1" =~ [^"$databases"] ]]; then fatal "Synopsis: -d [$databases]" fi databases="$1" ;; -e|-x) shift if (( $# == 0 )); then fatal "Option $curopt requires arguments" fi cmd="$1" args=() shift while (( $# > 0 )); do if [[ "$1" == "---" ]]; then break fi args+=("$1") shift done if (( $# == 0 )); then fatal "Unterminated $curopt option" fi if [[ "$curopt" == "-e" ]]; then execimm=1 fi ;; *) break ;; esac shift done if (( $# == 0 )); then echoerr "No article title specified" syn fi echon "Math article identifier by Dmitri Pavlov, version $version. To report bugs and missing features, please email me (host math.berkeley.edu, user pavlov). Please include the command line and the output of the script in your email. Supported databases: $dbd. Email me if you want the script to support other databases. " echon Query: "$@" fetch() { echov Fetching "$@" curl -s -S "$@" } fetchc() { fetch -b /dev/null "$@" } fetchr() { fetch -L "$@" } fetchcr() { fetchc -L "$@" } sfetch() { fetch -o /dev/null -I -w "%{redirect_url}\n" "$@" } fetchmsn() { echov Fetching MSN "$@" if [[ -v amspairing ]]; then fetchr -b amspairing="$amspairing" "$@" else fetchr "$@" fi } fetchz() { data="$(fetch "$@")" while [[ $data == *captcha* ]]; do echon zbMATH demands a CAPTCHA, which means that no subscription is available. Manual entry. id="$(printf '%s\n' "$data" | grep captcha_id | sed 's/.*value="\([^"]*\)".*/\1/')" show "https://zbmath.org/captcha/$id" echo Enter zbMATH CAPTCHA: read -r captcha echov Entered CAPTCHA: "$captcha" data="$(fetch -F captcha_id="$id" -F captcha_solution="$captcha" "$@")" done printf '%s\n' "$data" } selid() { if [ -z "$1" ]; then echon Nothing found return 1 fi records=() j=0 while read -r line; do echon $j\) "$line"; j=$((j+1)); records+=("$line"); done <<<"$1" read -r if [ -z "$REPLY" ]; then echon Nothing selected unset id return 1 else id="${records[$REPLY]}" echon Selected "$id" return 0 fi } append() { if [[ -v execimm ]]; then echon Launching "$cmd" "${args[@]:+${args[@]}}" "$id" if "$cmd" "${args[@]:+${args[@]}}" "$id"; then exit 0 fi fi r+=("$id") } declare -a r #zburl=() for (( i=0; i < ${#databases}; i++ )); do case "${databases:$i:1}" in m) echon Querying MathSciNet: msnquery=(--data extend=1 --data pg1=TI --data-urlencode "s1=$*") if [[ -v author ]]; then msnquery+=(--data pg2=AUCN --data-urlencode "s2=$author") fi echov Query: "${msnquery[@]}" if rawdata=$(fetchmsn -w '\n%{http_code}\n' -G "https://mathscinet.ams.org/mathscinet/search/publications.html" "${msnquery[@]}"); then if [[ "$(echo "$rawdata" | tail -1)" == 401 ]]; then echon No subscription to MathSciNet, skipping else echov Success list=$(echo "$rawdata" | sed -e '/./{H;$!d;}' -e 'x;/"mrnum"/b' -e '/MR[0-9]*<\/strong>/b' -e '/"item_status"/b' -e '/"title"/b' -e d | tr \\n @ | sed 's:@[^@]*\(MR[^<]*\)[^@]*@:@///\1@:g;s:@[^@]*"item_status"[^@]*\([^<]*\)[^@]*@:@(\1)@:g;s:<[^>]*>::g;s:@@:@:g;s:@///::;s:@///:\n:g;s:@: :g;s/$/\n/') if selid "$list"; then id="${id%% *}" echon MR ID: "$id" append fi fi fi ;; z) echon Querying zbMATH: zbquery="q=ti:$*" if [[ -v author ]]; then zbquery="$zbquery & au:$author" fi echov Query: "$zbquery" data="$(fetchz "https://zbmath.org/" -G --data-urlencode "$zbquery")" echo "$data" >/tmp/zb list="$(echo "$data" | tr \\n $'\x1E' | sed 's@
@\n
@g' | sed 1d | tr -d $'\x1E' | tr -d $'\t' | tr -s ' ' | sed 's@
.*@@g' | sed 's@

@ :: @g' | sed 's@
@ :: @g' | sed 's@
@ :: @g' | sed 's@
.*
@@g' | sed 's@(English)@@g' | sed 's@@: \1@g' | sed 's@>Zbl @> :: https://zbmath.org/?q=an:@g' | sed 's@<[^>]*>@@g' | sed 's/Full Text://' | tr -s ' ' | sed 's/^ //;s/ $//;s/’/'"'"'/g')" #list="$(echo "$data" | sed 's/^[[:space:]]*//g;s/ / /g;s/^
/\n&/g' | sed -e '/./{H;$!d;}' -e 'x;/"source"/b' -e d | tr \\n @ | sed 's/^@//;s/@@/\n/g;s/@$/\n/' | sed 's^.*>\(Zbl\|JFM\) \([^<]*\).*^Zbl:\2 &^g' | tr @ \ | sed 's/<[^<]*>//g')" if selid "$list"; then #id="$(echo "$id" | sed 's/.*Zbl:/Zbl:/;s/ .*//')" id="$(echo "$id" | sed 's@.*https://zbmath.org/?q=an:@Zbl:@;s/ .*//')" echon Zbl ID: "$id" append fi ;; a) echon Querying arXiv: #arxivquery="search_query=ti:\"$*\"" arxivquery="search_query=ti:\"${*//-/ }\"" if [[ -v author ]]; then arxivquery="$arxivquery AND au:\"$author\"" fi echov Query: "$arxivquery" list=$(fetch https://export.arxiv.org/api/query --data-urlencode "$arxivquery" | xidel - -s -e 'feed/entry/(string-join(author/name,", ")||": "||title||" ("||updated||"): "||id||"@@@")' | sed 's/^[[:space:]]*//g' | tr \\n \ | sed 's/ $//;s/@@@/\n/g') if selid "$list"; then id=arXiv:"${id##*http://arxiv.org/abs/}" echon arXiv ID: "$id" append fi ;; d) echon Querying CrossRef: #crossrefquery="q=$*" #if [[ -v author ]]; then # crossrefquery="$crossrefquery $author" #fi #echov Query: "$crossrefquery" #list=$(fetch -f -G https://search.crossref.org/dois --data-urlencode "$crossrefquery" | jq -r '.[]|@text"\(.fullCitation): https://doi.org/\(.doi)@@@"' | tr -d \\n | sed 's|||g;s|||g;s/ */ /g;s/&/\&/g;s/@@@/\n/g') # | jq -r '.[]|@text"\(.fullCitation): https://doi.org/\(.doi)@@@"' | tr -d \\n | sed 's|||g;s|||g;s/ */ /g;s/&/\&/g;s/@@@/\n/g') crossrefquery=(--data-urlencode "query.bibliographic=$*") if [[ -v author ]]; then crossrefquery+=(--data-urlencode "query.author=$author") fi crossrefquery+=(--data-urlencode "select=author,title,container-title,volume,issue,page,publisher,published-print,URL") crossrefquery+=(--data-urlencode "mailto=somebody@somewhere.info") echov Query: "${crossrefquery[@]}" crdata="$(fetch -f -G https://api.crossref.org/works "${crossrefquery[@]}")" echov Result: "$crdata" if [[ -v biblio ]]; then printf '%s\n' "$crdata" | jq -r '.message.items[]|"\([.author[]?|[.given?,.family?]|map(select(.!=null))|join(" ")]|join(", ")).\n\(.title[]?|gsub("\\p{Cc}";"")|gsub(" *";" ")).\n\((."container-title")?|join(" :: ")|gsub("\\p{Cc}"; "")|gsub(" *";" ")) \(.volume?):\(.issue?) (\(."published-print"."date-parts"[0][0])), \(.page?).\n\(.publisher|gsub("\\p{Cc}";"")|gsub(" *";" "))\n\(.URL)\n"' | less else list="$(printf '%s\n' "$crdata" | jq -r '.message.items[]|"\([.author[]?|[.given?,.family?]|map(select(.!=null))|join(" ")]|join(", ")). \(.title[]?|gsub("\\p{Cc}";"")|gsub(" *";" ")). \((."container-title")?|join(" :: ")|gsub("\\p{Cc}"; "")|gsub(" *";" ")) \(.volume?):\(.issue?) (\(."published-print"."date-parts"[0][0])), \(.page?). \(.publisher|gsub("\\p{Cc}";"")|gsub(" *";" ")) :: \(.URL)"')" if selid "$list"; then id="${id##* }" echon DOI: "$id" append fi fi ;; e) echon Querying EuDML: if [[ -v author ]]; then eudmlquery=(--data format=xml --data-urlencode "q=authorCoauthorName all \"$author\" and defName all \"$*\"") else eudmlquery=(--data format=xml --data-urlencode "q=defName all \"$*\"") fi echov Query: "${eudmlquery[@]}" list=$(fetch -G -k https://eudml.org/api/search "${eudmlquery[@]}" | xidel - -s -e 'response/results/result/(string-join(author,", ")||": "||title/normalize-space(.)||" :: "||id)') if selid "$list"; then id="${id##* :: urn:}" echon EuDML id: "$id" append fi ;; g) echon "Querying Library Genesis via Anna's Archive:" if [[ -v author ]]; then aaquery=(--data-urlencode "q=$author $*") else aaquery=(--data-urlencode "q=$*") fi echov Query: "${aaquery[@]}" list=$(fetch -G https://annas-archive.org/search "${aaquery[@]}" \ | tr -d '\r' \ | sed '/
/,$d' \ | sed -n '/
]*>//g;s/^ */:: /;p;d};/
]*>//g;s/^ */:: /;p;d};//g;s/"/"/g' \ | sed "s/&/\&/g;s/'/'/g" \ | sed 's/<[^>]*>//g' \ | sed '/./{H;$!d} ; x ; s/\n\t/@/g' \ | sed '/./{H;$!d} ; x ; s/\n\([^\n]*\)\n:: \(.*\)\n:: \(.*\)\n:: \(.*\)\n:: \(.*\)/\5 :: \x1b[1m\3\x1b[0m :: \2 :: \4 :: gen:\1/' \ | sed '/^$/d') #echon Querying Library Genesis: #if [[ -v author ]]; then # libgenquery=(--data "column=author,title" --data-urlencode "req=$* $author") #else # libgenquery=(--data "column=title" --data-urlencode "req=$*") #fi #echov Query: "${libgenquery[@]}" #list=$(fetch -G http://libgen.rs/search.php "${libgenquery[@]}" | sed 's/]*>/b' -e d | tr -d \\r | tr \\n @ | sed 's/^@//;s/@$//;s/@@/\n/g' | sed 's/^.*md5=\([0-9A-Fa-f]*\).*$/&: gen:\1/' | sed 's/<[^>]*>//g' | sed 's/^[0-9]*@//;s/@/ /g' | sed 's/ / /g') if selid "$list"; then id=gen:"${id##*gen:}" echon "Library Genesis (Anna's Archive) ID: $id" append fi ;; p) echon Querying Project Euclid pequery=(--data-urlencode "q.a.title=$*") if [[ -v author ]]; then pequery+=(--data-urlencode "q.a.author=$author") fi pequery+=(--data "type=index") data="$(fetch -G http://projecteuclid.org/search_result "${pequery[@]}" | xidel - -s -e '*')" echov Raw data: echov "$data" list="$(printf %s "$data" | tr \\n @ | sed 's/link := \/\([^@]*\)@title := \([^@]*\)@\(\(author := [^@]*@\)*\)/\3: \2 pe:\1\n/g' | sed 's/author := \([^@]*\)@/\1, /g;s/, : /: /')" echov List: "$list" if selid "$list"; then id=pe:"${id##* pe:}" echon Project Euclid ID: "$id" append fi ;; c) echon Querying CiteSeerX: if [[ -v author ]]; then csquery=(--data-urlencode "q=title:($*)+AND+author:$author") else csquery=(--data-urlencode "q=title:($*)") fi echov Query: "${csquery[@]}" data="$(fetch -G http://citeseerx.ist.psu.edu/search "${csquery[@]}" | xidel - -s -e '

{link:=@href,title:=normalize-space(.)}

{authors:=normalize-space(.)}
{abstract := normalize-space(.)}
*')" echov Raw data: echov "$data" list="$(printf %s "$data" | tr \\n @ | sed 's/link := [^@]*doi=\([0-9.]*\)[^@]*@title := \([^@]*\)@authors := \([^@]*\)@abstract := \([^@]*\)@/\3: \2 :: \4 cs:\1\n/g')" if selid "$list"; then id=cs:"${id##* cs:}" echon CiteSeerX ID: "$id" append fi ;; s) echon Querying Google Scholar: scholarquery=(--data-urlencode "as_q=$*" --data as_occt=title) if [[ -v author ]]; then scholarquery+=(--data-urlencode "as_sauthors=$author") fi echov Query: "${scholarquery[@]}" data="$(fetch -A Mozilla -G https://scholar.google.com/scholar "${scholarquery[@]}" | xidel - -s -e '

{title:=inner-xml(.)}

{author:=inner-xml(.)}
{abstract:=inner-xml(.)}
*')" echov Raw data: echov "$data" data="$(echo "$data" | sed 's@@@g;s@@@g;s@
@@g;s@link := /scholar?\(cites\|cluster\)=\([0-9]*\).*@link := \1@g')" echov Results: echov "$data" list="$(printf %s "$data" | tr \\n @ | sed 's/title := \([^@]*\)@author := \([^@]*\)@abstract := \([^@]*\)@link := \([^@]*\)@/\2: \1 :: \3 gs:\4\n/g')" if selid "$list"; then id=gs:"${id##*gs:}" echon Google Scholar ID: "$id" append fi ;; i) echon 'Querying Google Books for ISBN numbers:' ;; #jq -C -r '.items[].volumeInfo|"\(select(.authors!=null)|.authors|join(", ")) :: \([.title?,.subtitle?]|map(select(.!=null))|join("; ")) :: \(.publisher?), \(.publishedDate?)"' *) fatal "Unrecognized database letter: ${databases:$i:1}" ;; esac done echon Collected IDs: echon "${r[@]:+${r[@]}}" if [[ ! -v r ]]; then echon Nothing found elif [[ -v cmd ]]; then echon Launching "$cmd" "${args[@]:+${args[@]}}" "${r[@]:+${r[@]}}" exec "$cmd" "${args[@]:+${args[@]}}" "${r[@]:+${r[@]}}" fi