Source code for bibmanager.ads_manager.ads_manager

# Copyright (c) 2018-2026 Patricio Cubillos.
# bibmanager is open-source software under the MIT license (see LICENSE).

__all__ = [
    'manager',
    'search',
    'display',
    'add_bibtex',
    'update',
    'key_update',
]

import json
import os
import pickle
import sys
import textwrap
import urllib

import prompt_toolkit
import pygments
from pygments.token import Token
import requests

from .. import bib_manager as bm
from .. import config_manager as cm
from .. import utils as u



[docs]
def manager(query=None):
    """
    A manager, it doesn't really do anything, it just delegates.
    """
    rows = int(cm.get('ads_display'))
    if query is None and not os.path.exists(u.BM_CACHE()):
        print("There are no more entries for this query.")
        return

    if query is None:
        with open(u.BM_CACHE(), 'rb') as handle:
            results, query, start, index, nmatch = pickle.load(handle)
        last = start + len(results)
        if last < nmatch and index + rows > last:
            new_results, nmatch = search(query, start=last)
            results = results[index-start:] + new_results
            start = index
            last = start + len(results)
    else:
        start = 0
        index = start
        results, nmatch = search(query, start=start)

    display(results, start, index, rows, nmatch)
    index += rows
    if index >= nmatch:
        with u.ignored(OSError):
            os.remove(u.BM_CACHE())
    else:
        with open(u.BM_CACHE(), 'wb') as handle:
            pickle.dump(
                [results, query, start, index, nmatch], handle, protocol=4)




[docs]
def search(query, start=0, cache_rows=200, sort='pubdate+desc'):
    """
    Make a query from ADS.

    Parameters
    ----------
    query: String
        A query string like an entry in the new ADS interface:
        https://ui.adsabs.harvard.edu/
    start: Integer
        Starting index of entry to return.
    cache_rows: Integer
        Maximum number of entries to return.
    sort: String
        Sorting field and direction to use.

    Returns
    -------
    results: List of dicts
        Query outputs between indices start and start+rows.
    nmatch: Integer
        Total number of entries matched by the query.

    Resources
    ---------
    A comprehensive description of the query format:
    - http://adsabs.github.io/help/search/
    Description of the query parameters:
    - https://github.com/adsabs/adsabs-dev-api/blob/master/Search_API.ipynb

    Examples
    --------
    >>> import bibmanager.ads_manager as am
    >>> # Search entries by author (note the need for double quotes,
    >>> # otherwise, the search might produce bogus results):
    >>> query = 'author:"cubillos, p"'
    >>> results, nmatch = am.search(query)
    >>> # Search entries by first author:
    >>> query = 'author:"^cubillos, p"'
    >>> # Combine search by first author and year:
    >>> query = 'author:"^cubillos, p" year:2017'
    >>> # Restrict search to article-type entries:
    >>> query = 'author:"^cubillos, p" property:article'
    >>> # Restrict search to peer-reviewed articles:
    >>> query = 'author:"^cubillos, p" property:refereed'

    >>> # Attempt with invalid token:
    >>> results, nmatch = am.search(query)
    ValueError: Invalid ADS request: Unauthorized, check you have a valid ADS token.
    >>> # Attempt with invalid query ('properties' instead of 'property'):
    >>> results, nmatch = am.search('author:"^cubillos, p" properties:refereed')
    ValueError: Invalid ADS request:
    org.apache.solr.search.SyntaxError: org.apache.solr.common.SolrException: undefined field properties
    """
    token = cm.get('ads_token')
    query = urllib.parse.quote(query)

    r = requests.get(
        'https://api.adsabs.harvard.edu/v1/search/query?'
        f'q={query}&start={start}&rows={cache_rows}'
        f'&sort={sort}&fl=title,author,year,bibcode,pub',
        headers={'Authorization': f'Bearer {token}'})
    if not r.ok:
        if r.status_code == 401:
            raise ValueError(
                'Unauthorized access to ADS.  Check that the ADS token is valid.')
        try:
            reason = r.json()['error']
        except:
            reason = r.text
        raise ValueError(f'HTTP request failed ({r.status_code}): {reason}')

    resp = r.json()

    nmatch  = resp['response']['numFound']
    results = resp['response']['docs']

    return results, nmatch




[docs]
def display(results, start, index, rows, nmatch, short=True):
    """
    Show on the prompt a list of entries from an ADS search.

    Parameters
    ----------
    results: List of dicts
        Subset of entries returned by a query.
    start: Integer
        Index assigned to first entry in results.
    index: Integer
        First index to display.
    rows: Integer
        Number of entries to display.
    nmatch: Integer
        Total number of entries corresponding to query (not necessarily
        the number of entries in results).
    short: Bool
        Format for author list. If True, truncate with 'et al' after
        the second author.

    Examples
    --------
    >>> import bibmanager.ads_manager as am
    >>> start = index = 0
    >>> query = 'author:"^cubillos, p" property:refereed'
    >>> results, nmatch = am.search(query, start=start)
    >>> display(results, start, index, rows, nmatch)
    """
    for result in results[index-start:index-start+rows]:
        tokens = [(Token.Text, '\n')]
        title = textwrap.fill(
            f"Title: {result['title'][0]}",
            width=78,
            subsequent_indent='    ')
        tokens += u.tokenizer('Title', title[7:])

        if 'author' in result:
            author_list = [u.parse_name(author) for author in result['author']]
            author_format = 'short' if short else 'long'
            authors = textwrap.fill(
                f"Authors: {u.get_authors(author_list, format=author_format)}",
                width=78,
                subsequent_indent='    ',
            )
        else:
            authors = 'Authors: ---'
        tokens += u.tokenizer('Authors', authors[9:])

        adsurl = f"https://ui.adsabs.harvard.edu/abs/{result['bibcode']}"
        tokens += u.tokenizer('ADS URL', adsurl)

        bibcode = result['bibcode']
        tokens += u.tokenizer('bibcode', bibcode, Token.Name.Label)

        style = prompt_toolkit.styles.style_from_pygments_cls(
            pygments.styles.get_style_by_name(cm.get('style')))
        prompt_toolkit.print_formatted_text(
            prompt_toolkit.formatted_text.PygmentsTokens(tokens),
            end="",
            style=style,
            output=prompt_toolkit.output.defaults.create_output(sys.stdout))

    if index + rows < nmatch:
        more = "  To show the next set, execute:\nbibm ads-search -n"
    else:
        more = ""
    print(
        f"\nShowing entries {index+1}--{min(index+rows, nmatch)} out of "
        f"{nmatch} matches.{more}")




[docs]
def add_bibtex(
        input_bibcodes, input_keys, eprints=[], dois=[],
        update_keys=True, base=None, tags=None, return_replacements=False,
    ):
    """
    Add bibtex entries from a list of ADS bibcodes, with specified keys.
    New entries will replace old ones without asking if they are
    duplicates.

    Parameters
    ----------
    input_bibcodes: List of strings
        A list of ADS bibcodes.
    input_keys: List of strings
        BibTeX keys to assign to each bibcode.
    eprints: List of strings
        List of ArXiv IDs corresponding to the input bibcodes.
    dois: List of strings
        List of DOIs corresponding to the input bibcodes.
    update_keys: Bool
        If True, attempt to update keys of entries that were updated
        from arxiv to published versions.
    base: List of Bib() objects
        If None, merge new entries into the bibmanager database.
        If not None, merge new entries into base.
    tags: Nested list of strings
        The list of tags for each input bibcode.
    return_replacements: Bool
        If True, also return a dictionary of replaced keys.

    Returns
    -------
    bibs: List of Bib() objects
        Updated list of BibTeX entries.
    reps: Dict
        A dictionary of replaced key names.

    Examples
    --------
    >>> import bibmanager.ads_manager as am
    >>> # A successful add call:
    >>> bibcodes = ['1925PhDT.........1P']
    >>> keys = ['Payne1925phdStellarAtmospheres']
    >>> am.add_bibtex(bibcodes, keys)
    >>> # A failing add call:
    >>> bibcodes = ['1925PhDT....X....1P']
    >>> am.add_bibtex(bibcodes, keys)
    Error: There were no entries found for the input bibcodes.

    >>> # A successful add call with multiple entries:
    >>> bibcodes = ['1925PhDT.........1P', '2018MNRAS.481.5286F']
    >>> keys = ['Payne1925phdStellarAtmospheres', 'FolsomEtal2018mnrasHD219134']
    >>> am.add_bibtex(bibcodes, keys)
    >>> # A partially failing call will still add those that succeed:
    >>> bibcodes = ['1925PhDT.....X...1P', '2018MNRAS.481.5286F']
    >>> am.add_bibtex(bibcodes, keys)
    Warning: bibcode '1925PhDT.....X...1P' not found.
    """
    token = cm.get('ads_token')
    # Keep the originals untouched (copies will be modified):
    bibcodes, keys = input_bibcodes.copy(), input_keys.copy()

    if tags is None:
        tags = [[] for _ in bibcodes]

    # Make request:
    size = 2000
    bibcode_chunks = [bibcodes[i:i+size] for i in range(0,len(bibcodes), size)]

    nfound = 0
    results = ''
    for bc_chunk in bibcode_chunks:
        r = requests.post(
            "https://api.adsabs.harvard.edu/v1/export/bibtex",
            headers={"Authorization": f'Bearer {token}',
                     "Content-type": "application/json"},
            data=json.dumps({"bibcode":bc_chunk}))
        # No valid outputs:
        if not r.ok:
            if r.status_code == 500:
                raise ValueError(
                    'HTTP request has failed (500): '
                    'Internal Server Error')
            if r.status_code == 401:
                raise ValueError(
                    'Unauthorized access to ADS.  '
                    'Check that the ADS token is valid.')
            if r.status_code == 404:
                raise ValueError(
                    'There were no entries found for the requested bibcodes.')
            try:
                reason = r.json()['error']
            except:
                reason = r.text
            raise ValueError(f'HTTP request failed ({r.status_code}): {reason}')
        resp = r.json()
        nfound += int(resp['msg'].split()[1])
        results += resp["export"]

    # Keep counts of things:
    nreqs = len(bibcodes)

    # Split output into separate BibTeX entries (keep as strings):
    results = results.strip().split("\n\n")

    new_keys = {}
    new_bibs = []
    founds = [False for _ in bibcodes]
    arxiv_updates = 0
    # Match results to bibcodes,keys:
    for result in reversed(results):
        ibib = None
        new = bm.Bib(result)
        # Output bibcode is one of the input bibcodes:
        if new.bibcode in bibcodes:
            ibib = bibcodes.index(new.bibcode)
        # Else, check for bibcode updates in remaining bibcodes:
        elif new.eprint is not None and new.eprint in eprints:
            ibib = eprints.index(new.eprint)
        elif new.doi is not None and new.doi in dois:
            ibib = dois.index(new.doi)

        if ibib is not None:
            new.tags = tags[ibib]
            new_key = keys[ibib]
            updated_key = key_update(new_key, new.bibcode, bibcodes[ibib])
            if update_keys and updated_key != new_key:
                new_key = updated_key
                new_keys[keys[ibib]] = updated_key
            if 'arXiv' in bibcodes[ibib] and 'arXiv' not in new.bibcode:
                arxiv_updates += 1

            new.update_key(new_key)
            new_bibs.append(new)
            founds[ibib] = True
            results.remove(result)

    # Warnings:
    if nfound < nreqs or len(results) > 0:
        warning = u.BANNER + "Warning:\n"
        # bibcodes not found
        missing = [
            bibcode
            for bibcode,found in zip(bibcodes, founds)
            if not found]
        if nfound < nreqs:
            warning += (
                '\nThere were bibcodes unmatched or not found in ADS:\n - '
                + '\n - '.join(missing) + "\n")
        # bibcodes not matched:
        if len(results) > 0:
            warning += '\nThese ADS results did not match input bibcodes:\n\n'
            warning += '\n\n'.join(results) + "\n"
        warning += u.BANNER
        print(warning)

    n_new = len(new_bibs)
    if base is None:
        nbibs = len(bm.load())
    else:
        nbibs = len(base)

    # Add to bibmanager database or base:
    updated = bm.merge(new=new_bibs, take='new', base=base)
    actually_new = len(updated) - nbibs
    updated_existing = n_new - actually_new
    if updated_existing > 0:
        print(f'Updated {updated_existing} existing entries.')

    # Report arXiv updates:
    if arxiv_updates > 0:
        print(
            f"\nThere were {arxiv_updates} entries updated from ArXiv to "
            "their peer-reviewed version."
        )
    if len(new_keys) > 0:
        print("These entries changed their key:")
        for old_key,new_key in new_keys.items():
            print(f'  {old_key} -> {new_key}')

    if return_replacements:
        return updated, new_keys
    return updated




[docs]
def update(update_keys=True, base=None, return_replacements=False):
    """
    Do an ADS query by bibcode for all entries that have an ADS bibcode.
    Replacing old entries with the new ones.  The main use of
    this function is to update arxiv version of articles.

    Parameters
    ----------
    update_keys: Bool
        If True, attempt to update keys of entries that were updated
        from arxiv to published versions.
    base: List of Bib() objects
        The bibfile entries to update.  If None, use the entries from
        the bibmanager database as base.
    return_replacements: Bool
        If True, also return a dictionary of replaced keys.

    Returns
    -------
    reps: Dict
        A dictionary of replaced key names.
    """
    if base is None:
        bibs = bm.load()
    else:
        bibs = base

    # Filter entries that have a bibcode and not frozen:
    keys = [
        bib.key for bib in bibs
        if bib.bibcode is not None and not bib.freeze]
    bibcodes = [
        bib.bibcode for bib in bibs
        if bib.bibcode is not None and not bib.freeze]
    eprints = [
        bib.eprint for bib in bibs
        if bib.bibcode is not None and not bib.freeze]
    dois = [
        bib.doi for bib in bibs
        if bib.bibcode is not None and not bib.freeze]
    tags = [
        bib.tags for bib in bibs
        if bib.bibcode is not None and not bib.freeze]
    # Query-replace:
    bibs, replacements = add_bibtex(
        bibcodes, keys, eprints, dois, update_keys, base, tags,
        return_replacements=True,
    )

    if return_replacements:
        return bibs, replacements
    return bibs




[docs]
def key_update(key, bibcode, alternate_bibcode):
    r"""
    Update key with year and journal of arxiv version of a key.

    This function will search and update the year in a key,
    and the journal if the key contains the word 'arxiv' (case
    insensitive).

    The function extracts the info from the old and new bibcodes.
    ADS bibcode format: http://adsabs.github.io/help/actions/bibcode

    Examples
    --------
    >>> import bibmanager.ads_manager as am
    >>> key = 'BeaulieuEtal2010arxivGJ436b'
    >>> bibcode           = '2011ApJ...731...16B'
    >>> alternate_bibcode = '2010arXiv1007.0324B'
    >>> new_key = am.key_update(key, bibcode, alternate_bibcode)
    >>> print(f'{key}\n{new_key}')
    BeaulieuEtal2010arxivGJ436b
    BeaulieuEtal2011apjGJ436b

    >>> key = 'CubillosEtal2018arXivRetrievals'
    >>> bibcode           = '2019A&A...550A.100B'
    >>> alternate_bibcode = '2018arXiv123401234B'
    >>> new_key = am.key_update(key, bibcode, alternate_bibcode)
    >>> print(f'{key}\n{new_key}')
    CubillosEtal2018arXivRetrievals
    CubillosEtal2019aaRetrievals
    """
    old_year = alternate_bibcode[0:4]
    year = bibcode[0:4]
    # Update year:
    if old_year != year and old_year in key:
        key = key.replace(old_year, year, 1)

    # Update journal:
    journal = bibcode[4:9].replace('.','').replace('&','').lower()
    # Search for the word 'arxiv' in key:
    ijournal = key.lower().find('arxiv')
    if ijournal >= 0:
        key = "".join([key[:ijournal], journal, key[ijournal+5:]])

    return key