Source code for bibmanager.ads_manager.ads_manager

# Copyright (c) 2018-2024 Patricio Cubillos.
# bibmanager is open-source software under the MIT license (see LICENSE).

__all__ = [
    'manager',
    'search',
    'display',
    'add_bibtex',
    'update',
    'key_update',
]

import json
import os
import pickle
import sys
import textwrap
import urllib

import prompt_toolkit
import pygments
from pygments.token import Token
import requests

from .. import bib_manager as bm
from .. import config_manager as cm
from .. import utils as u


[docs] def manager(query=None): """ A manager, it doesn't really do anything, it just delegates. """ rows = int(cm.get('ads_display')) if query is None and not os.path.exists(u.BM_CACHE()): print("There are no more entries for this query.") return if query is None: with open(u.BM_CACHE(), 'rb') as handle: results, query, start, index, nmatch = pickle.load(handle) last = start + len(results) if last < nmatch and index + rows > last: new_results, nmatch = search(query, start=last) results = results[index-start:] + new_results start = index last = start + len(results) else: start = 0 index = start results, nmatch = search(query, start=start) display(results, start, index, rows, nmatch) index += rows if index >= nmatch: with u.ignored(OSError): os.remove(u.BM_CACHE()) else: with open(u.BM_CACHE(), 'wb') as handle: pickle.dump( [results, query, start, index, nmatch], handle, protocol=4)
[docs] def display(results, start, index, rows, nmatch, short=True): """ Show on the prompt a list of entries from an ADS search. Parameters ---------- results: List of dicts Subset of entries returned by a query. start: Integer Index assigned to first entry in results. index: Integer First index to display. rows: Integer Number of entries to display. nmatch: Integer Total number of entries corresponding to query (not necessarily the number of entries in results). short: Bool Format for author list. If True, truncate with 'et al' after the second author. Examples -------- >>> import bibmanager.ads_manager as am >>> start = index = 0 >>> query = 'author:"^cubillos, p" property:refereed' >>> results, nmatch = am.search(query, start=start) >>> display(results, start, index, rows, nmatch) """ for result in results[index-start:index-start+rows]: tokens = [(Token.Text, '\n')] title = textwrap.fill( f"Title: {result['title'][0]}", width=78, subsequent_indent=' ') tokens += u.tokenizer('Title', title[7:]) if 'author' in result: author_list = [u.parse_name(author) for author in result['author']] author_format = 'short' if short else 'long' authors = textwrap.fill( f"Authors: {u.get_authors(author_list, format=author_format)}", width=78, subsequent_indent=' ', ) else: authors = 'Authors: ---' tokens += u.tokenizer('Authors', authors[9:]) adsurl = f"https://ui.adsabs.harvard.edu/abs/{result['bibcode']}" tokens += u.tokenizer('ADS URL', adsurl) bibcode = result['bibcode'] tokens += u.tokenizer('bibcode', bibcode, Token.Name.Label) style = prompt_toolkit.styles.style_from_pygments_cls( pygments.styles.get_style_by_name(cm.get('style'))) prompt_toolkit.print_formatted_text( prompt_toolkit.formatted_text.PygmentsTokens(tokens), end="", style=style, output=prompt_toolkit.output.defaults.create_output(sys.stdout)) if index + rows < nmatch: more = " To show the next set, execute:\nbibm ads-search -n" else: more = "" print( f"\nShowing entries {index+1}--{min(index+rows, nmatch)} out of " f"{nmatch} matches.{more}")
[docs] def add_bibtex( input_bibcodes, input_keys, eprints=[], dois=[], update_keys=True, base=None, tags=None, return_replacements=False, ): """ Add bibtex entries from a list of ADS bibcodes, with specified keys. New entries will replace old ones without asking if they are duplicates. Parameters ---------- input_bibcodes: List of strings A list of ADS bibcodes. input_keys: List of strings BibTeX keys to assign to each bibcode. eprints: List of strings List of ArXiv IDs corresponding to the input bibcodes. dois: List of strings List of DOIs corresponding to the input bibcodes. update_keys: Bool If True, attempt to update keys of entries that were updated from arxiv to published versions. base: List of Bib() objects If None, merge new entries into the bibmanager database. If not None, merge new entries into base. tags: Nested list of strings The list of tags for each input bibcode. return_replacements: Bool If True, also return a dictionary of replaced keys. Returns ------- bibs: List of Bib() objects Updated list of BibTeX entries. reps: Dict A dictionary of replaced key names. Examples -------- >>> import bibmanager.ads_manager as am >>> # A successful add call: >>> bibcodes = ['1925PhDT.........1P'] >>> keys = ['Payne1925phdStellarAtmospheres'] >>> am.add_bibtex(bibcodes, keys) >>> # A failing add call: >>> bibcodes = ['1925PhDT....X....1P'] >>> am.add_bibtex(bibcodes, keys) Error: There were no entries found for the input bibcodes. >>> # A successful add call with multiple entries: >>> bibcodes = ['1925PhDT.........1P', '2018MNRAS.481.5286F'] >>> keys = ['Payne1925phdStellarAtmospheres', 'FolsomEtal2018mnrasHD219134'] >>> am.add_bibtex(bibcodes, keys) >>> # A partially failing call will still add those that succeed: >>> bibcodes = ['1925PhDT.....X...1P', '2018MNRAS.481.5286F'] >>> am.add_bibtex(bibcodes, keys) Warning: bibcode '1925PhDT.....X...1P' not found. """ token = cm.get('ads_token') # Keep the originals untouched (copies will be modified): bibcodes, keys = input_bibcodes.copy(), input_keys.copy() if tags is None: tags = [[] for _ in bibcodes] # Make request: size = 2000 bibcode_chunks = [bibcodes[i:i+size] for i in range(0,len(bibcodes), size)] nfound = 0 results = '' for bc_chunk in bibcode_chunks: r = requests.post( "https://api.adsabs.harvard.edu/v1/export/bibtex", headers={"Authorization": f'Bearer {token}', "Content-type": "application/json"}, data=json.dumps({"bibcode":bc_chunk})) # No valid outputs: if not r.ok: if r.status_code == 500: raise ValueError( 'HTTP request has failed (500): ' 'Internal Server Error') if r.status_code == 401: raise ValueError( 'Unauthorized access to ADS. ' 'Check that the ADS token is valid.') if r.status_code == 404: raise ValueError( 'There were no entries found for the requested bibcodes.') try: reason = r.json()['error'] except: reason = r.text raise ValueError(f'HTTP request failed ({r.status_code}): {reason}') resp = r.json() nfound += int(resp['msg'].split()[1]) results += resp["export"] # Keep counts of things: nreqs = len(bibcodes) # Split output into separate BibTeX entries (keep as strings): results = results.strip().split("\n\n") new_keys = {} new_bibs = [] founds = [False for _ in bibcodes] arxiv_updates = 0 # Match results to bibcodes,keys: for result in reversed(results): ibib = None new = bm.Bib(result) # Output bibcode is one of the input bibcodes: if new.bibcode in bibcodes: ibib = bibcodes.index(new.bibcode) # Else, check for bibcode updates in remaining bibcodes: elif new.eprint is not None and new.eprint in eprints: ibib = eprints.index(new.eprint) elif new.doi is not None and new.doi in dois: ibib = dois.index(new.doi) if ibib is not None: new.tags = tags[ibib] new_key = keys[ibib] updated_key = key_update(new_key, new.bibcode, bibcodes[ibib]) if update_keys and updated_key != new_key: new_key = updated_key new_keys[keys[ibib]] = updated_key if 'arXiv' in bibcodes[ibib] and 'arXiv' not in new.bibcode: arxiv_updates += 1 new.update_key(new_key) new_bibs.append(new) founds[ibib] = True results.remove(result) # Warnings: if nfound < nreqs or len(results) > 0: warning = u.BANNER + "Warning:\n" # bibcodes not found missing = [ bibcode for bibcode,found in zip(bibcodes, founds) if not found] if nfound < nreqs: warning += ( '\nThere were bibcodes unmatched or not found in ADS:\n - ' + '\n - '.join(missing) + "\n") # bibcodes not matched: if len(results) > 0: warning += '\nThese ADS results did not match input bibcodes:\n\n' warning += '\n\n'.join(results) + "\n" warning += u.BANNER print(warning) n_new = len(new_bibs) if base is None: nbibs = len(bm.load()) else: nbibs = len(base) # Add to bibmanager database or base: updated = bm.merge(new=new_bibs, take='new', base=base) actually_new = len(updated) - nbibs updated_existing = n_new - actually_new if updated_existing > 0: print(f'Updated {updated_existing} existing entries.') # Report arXiv updates: if arxiv_updates > 0: print( f"\nThere were {arxiv_updates} entries updated from ArXiv to " "their peer-reviewed version." ) if len(new_keys) > 0: print("These entries changed their key:") for old_key,new_key in new_keys.items(): print(f' {old_key} -> {new_key}') if return_replacements: return updated, new_keys return updated
[docs] def update(update_keys=True, base=None, return_replacements=False): """ Do an ADS query by bibcode for all entries that have an ADS bibcode. Replacing old entries with the new ones. The main use of this function is to update arxiv version of articles. Parameters ---------- update_keys: Bool If True, attempt to update keys of entries that were updated from arxiv to published versions. base: List of Bib() objects The bibfile entries to update. If None, use the entries from the bibmanager database as base. return_replacements: Bool If True, also return a dictionary of replaced keys. Returns ------- reps: Dict A dictionary of replaced key names. """ if base is None: bibs = bm.load() else: bibs = base # Filter entries that have a bibcode and not frozen: keys = [ bib.key for bib in bibs if bib.bibcode is not None and not bib.freeze] bibcodes = [ bib.bibcode for bib in bibs if bib.bibcode is not None and not bib.freeze] eprints = [ bib.eprint for bib in bibs if bib.bibcode is not None and not bib.freeze] dois = [ bib.doi for bib in bibs if bib.bibcode is not None and not bib.freeze] tags = [ bib.tags for bib in bibs if bib.bibcode is not None and not bib.freeze] # Query-replace: bibs, replacements = add_bibtex( bibcodes, keys, eprints, dois, update_keys, base, tags, return_replacements=True, ) if return_replacements: return bibs, replacements return bibs
[docs] def key_update(key, bibcode, alternate_bibcode): r""" Update key with year and journal of arxiv version of a key. This function will search and update the year in a key, and the journal if the key contains the word 'arxiv' (case insensitive). The function extracts the info from the old and new bibcodes. ADS bibcode format: http://adsabs.github.io/help/actions/bibcode Examples -------- >>> import bibmanager.ads_manager as am >>> key = 'BeaulieuEtal2010arxivGJ436b' >>> bibcode = '2011ApJ...731...16B' >>> alternate_bibcode = '2010arXiv1007.0324B' >>> new_key = am.key_update(key, bibcode, alternate_bibcode) >>> print(f'{key}\n{new_key}') BeaulieuEtal2010arxivGJ436b BeaulieuEtal2011apjGJ436b >>> key = 'CubillosEtal2018arXivRetrievals' >>> bibcode = '2019A&A...550A.100B' >>> alternate_bibcode = '2018arXiv123401234B' >>> new_key = am.key_update(key, bibcode, alternate_bibcode) >>> print(f'{key}\n{new_key}') CubillosEtal2018arXivRetrievals CubillosEtal2019aaRetrievals """ old_year = alternate_bibcode[0:4] year = bibcode[0:4] # Update year: if old_year != year and old_year in key: key = key.replace(old_year, year, 1) # Update journal: journal = bibcode[4:9].replace('.','').replace('&','').lower() # Search for the word 'arxiv' in key: ijournal = key.lower().find('arxiv') if ijournal >= 0: key = "".join([key[:ijournal], journal, key[ijournal+5:]]) return key