Source code for eli5.ipython

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from typing import Any, Dict, Tuple
import warnings

from IPython.display import HTML, Image

from .explain import explain_weights, explain_prediction
from .formatters import format_as_html, fields
try:
    from .formatters.image import format_as_image
except ImportError as e:
    # missing dependencies
    format_as_image = e  # type: ignore


FORMAT_KWARGS = {'include_styles', 'force_weights',
                 'show', 'preserve_density',
                 'highlight_spaces', 'horizontal_layout',
                 'show_feature_values',
                 # kwargs for image formatter
                 'resampling_filter', 'colormap', 
                 'alpha_limit',
}
# TODO: automatically get FORMAT_KWARGS from function signatures



[docs]
def show_weights(estimator, **kwargs):
    """ Return an explanation of estimator parameters (weights)
    as an IPython.display.HTML object. Use this function
    to show classifier weights in IPython.

    :func:`show_weights` accepts all
    :func:`eli5.explain_weights` arguments and all
    :func:`eli5.formatters.html.format_as_html`
    keyword arguments, so it is possible to get explanation and
    customize formatting in a single call.

    Parameters
    ----------
    estimator : object
        Estimator instance. This argument must be positional.

    top : int or (int, int) tuple, optional
        Number of features to show. When ``top`` is int, ``top`` features with
        a highest absolute values are shown. When it is (pos, neg) tuple,
        no more than ``pos`` positive features and no more than ``neg``
        negative features is shown. ``None`` value means no limit.

        This argument may be supported or not, depending on estimator type.

    target_names : list[str] or {'old_name': 'new_name'} dict, optional
        Names of targets or classes. This argument can be used to provide
        human-readable class/target names for estimators which don't expose
        clss names themselves. It can be also used to rename estimator-provided
        classes before displaying them.

        This argument may be supported or not, depending on estimator type.

    targets : list, optional
        Order of class/target names to show. This argument can be also used
        to show information only for a subset of classes. It should be a list
        of class / target names which match either names provided by
        an estimator or names defined in ``target_names`` parameter.

        This argument may be supported or not, depending on estimator type.

    feature_names : list, optional
        A list of feature names. It allows to specify feature
        names when they are not provided by an estimator object.

        This argument may be supported or not, depending on estimator type.

    feature_re : str, optional
        Only feature names which match ``feature_re`` regex are shown
        (more precisely, ``re.search(feature_re, x)`` is checked).

    feature_filter : Callable[[str], bool], optional
        Only feature names for which ``feature_filter`` function returns True
        are shown.

    show : List[str], optional
        List of sections to show. Allowed values:

        * 'targets' - per-target feature weights;
        * 'transition_features' - transition features of a CRF model;
        * 'feature_importances' - feature importances of a decision tree or
          an ensemble-based estimator;
        * 'decision_tree' - decision tree in a graphical form;
        * 'method' - a string with explanation method;
        * 'description' - description of explanation method and its caveats.

        ``eli5.formatters.fields`` provides constants that cover common cases:
        ``INFO`` (method and description), ``WEIGHTS`` (all the rest),
        and ``ALL`` (all).

    horizontal_layout : bool
        When True, feature weight tables are printed horizontally
        (left to right); when False, feature weight tables are printed
        vertically (top to down). Default is True.

    highlight_spaces : bool or None, optional
        Whether to highlight spaces in feature names. This is useful if
        you work with text and have ngram features which may include spaces
        at left or right. Default is None, meaning that the value used
        is set automatically based on vectorizer and feature values.

    include_styles : bool
        Most styles are inline, but some are included separately in <style> tag;
        you can omit them by passing ``include_styles=False``. Default is True.

    **kwargs: dict
        Keyword arguments. All keyword arguments are passed to
        concrete explain_weights... implementations.

    Returns
    -------
    IPython.display.HTML
        The result is printed in IPython notebook as an HTML widget.
        If you need to display several explanations as an output of a single
        cell, or if you want to display it from a function then use
        IPython.display.display::

            from IPython.display import display
            display(eli5.show_weights(clf1))
            display(eli5.show_weights(clf2))

    """
    format_kwargs, explain_kwargs = _split_kwargs(kwargs)
    expl = explain_weights(estimator, **explain_kwargs)
    _set_html_kwargs_defaults(format_kwargs)
    html = format_as_html(expl, **format_kwargs)
    return HTML(html)




[docs]
def show_prediction(estimator, doc, **kwargs):
    """ Return an explanation of estimator prediction
    as an IPython.display.HTML object. Use this function
    to show information about classifier prediction in IPython.

    :func:`show_prediction` accepts all
    :func:`eli5.explain_prediction` arguments and all
    :func:`eli5.formatters.html.format_as_html`
    keyword arguments, so it is possible to get explanation and
    customize formatting in a single call.


    If :func:`explain_prediction` returns an :class:`base.Explanation` object with
    the ``image`` attribute not set to None, i.e. if explaining image based models,
    then formatting is dispatched to an image display implementation, 
    and image explanations are shown in an IPython cell.
    Extra keyword arguments are passed to :func:`eli5.format_as_image`.

    Note that this image display implementation 
    requires ``matplotlib`` and ``Pillow`` as extra dependencies.
    If the dependencies are missing, no formatting is done
    and the original :class:`base.Explanation` object is returned.


    Parameters
    ----------
    estimator : object
        Estimator instance. This argument must be positional.

    doc : object
        Example to run estimator on. Estimator makes a prediction for this
        example, and :func:`show_prediction` tries to show information
        about this prediction. Pass a single element, not a one-element array:
        if you fitted your estimator on ``X``, that would be ``X[i]`` for
        most containers, and ``X.iloc[i]`` for ``pandas.DataFrame``.

    top : int or (int, int) tuple, optional
        Number of features to show. When ``top`` is int, ``top`` features with
        a highest absolute values are shown. When it is (pos, neg) tuple,
        no more than ``pos`` positive features and no more than ``neg``
        negative features is shown. ``None`` value means no limit (default).

        This argument may be supported or not, depending on estimator type.

    top_targets : int, optional
        Number of targets to show. When ``top_targets`` is provided,
        only specified number of targets with highest scores are shown.
        Negative value means targets with lowest scores are shown.
        Must not be given with ``targets`` argument.
        ``None`` value means no limit: all targets are shown (default).

        This argument may be supported or not, depending on estimator type.

    target_names : list[str] or {'old_name': 'new_name'} dict, optional
        Names of targets or classes. This argument can be used to provide
        human-readable class/target names for estimators which don't expose
        clss names themselves. It can be also used to rename estimator-provided
        classes before displaying them.

        This argument may be supported or not, depending on estimator type.

    targets : list, optional
        Order of class/target names to show. This argument can be also used
        to show information only for a subset of classes. It should be a list
        of class / target names which match either names provided by
        an estimator or names defined in ``target_names`` parameter.

        In case of binary classification you can use this argument to
        set the class which probability or score should be displayed, with
        an appropriate explanation. By default a result for predicted class
        is shown. For example, you can use ``targets=[True]`` to always show
        result for a positive class, even if the predicted label is False.

        This argument may be supported or not, depending on estimator type.

    feature_names : list, optional
        A list of feature names. It allows to specify feature
        names when they are not provided by an estimator object.

        This argument may be supported or not, depending on estimator type.

    feature_re : str, optional
        Only feature names which match ``feature_re`` regex are shown
        (more precisely, ``re.search(feature_re, x)`` is checked).

    feature_filter : Callable[[str, float], bool], optional
        Only feature names for which ``feature_filter`` function returns True
        are shown. It must accept feature name and feature value.
        Missing features always have a NaN value.

    show : List[str], optional
        List of sections to show. Allowed values:

        * 'targets' - per-target feature weights;
        * 'transition_features' - transition features of a CRF model;
        * 'feature_importances' - feature importances of a decision tree or
          an ensemble-based estimator;
        * 'decision_tree' - decision tree in a graphical form;
        * 'method' - a string with explanation method;
        * 'description' - description of explanation method and its caveats.

        ``eli5.formatters.fields`` provides constants that cover common cases:
        ``INFO`` (method and description), ``WEIGHTS`` (all the rest),
        and ``ALL`` (all).

    horizontal_layout : bool
        When True, feature weight tables are printed horizontally
        (left to right); when False, feature weight tables are printed
        vertically (top to down). Default is True.

    highlight_spaces : bool or None, optional
        Whether to highlight spaces in feature names. This is useful if
        you work with text and have ngram features which may include spaces
        at left or right. Default is None, meaning that the value used
        is set automatically based on vectorizer and feature values.

    include_styles : bool
        Most styles are inline, but some are included separately in <style> tag;
        you can omit them by passing ``include_styles=False``. Default is True.

    force_weights : bool
        When True, a table with feature weights is displayed even if all
        features are already highlighted in text. Default is False.

    preserve_density: bool or None
        This argument currently only makes sense when used with text data
        and vectorizers from scikit-learn.

        If preserve_density is True, then color for longer fragments will be
        less intensive than for shorter fragments, so that "sum" of intensities
        will correspond to feature weight.

        If preserve_density is None, then it's value is chosen depending on
        analyzer kind: it is preserved for "char" and "char_wb" analyzers,
        and not preserved for "word" analyzers.

        Default is None.

    show_feature_values : bool
        When True, feature values are shown along with feature contributions.
        Default is False.

    **kwargs: dict
        Keyword arguments. All keyword arguments are passed to
        concrete explain_prediction... implementations.

    Returns
    -------
    IPython.display.HTML
        The result is printed in IPython notebook as an HTML widget.
        If you need to display several explanations as an output of a single
        cell, or if you want to display it from a function then use
        IPython.display.display::

            from IPython.display import display
            display(eli5.show_weights(clf1))
            display(eli5.show_weights(clf2))

    PIL.Image.Image
        Image with a heatmap overlay, *if explaining image based models*.
        The image is shown in an IPython notebook cell
        if it is the last thing returned.
        To display the image in a loop, function, or other case, 
        use IPython.display.display::

            from IPython.display import display
            for cls_idx in [0, 432]:
                display(eli5.show_prediction(clf, doc, targets=[cls_idx]))

    """
    format_kwargs, explain_kwargs = _split_kwargs(kwargs)
    expl = explain_prediction(estimator, doc, **explain_kwargs)
    if expl.image is not None:
        # dispatch to image display implementation
        if isinstance(format_as_image, ImportError):
            warnings.warn('Missing dependencies: "{}". ' 
                          'Returning original Explanation.'.format(
                            format_as_image))
            return expl
        else:
            return format_as_image(expl, **format_kwargs)
    else:
        # use default implementation
        # TODO: a better design / refactorings might be needed
        _set_html_kwargs_defaults(format_kwargs)
        html = format_as_html(expl, **format_kwargs)
        return HTML(html)



def _split_kwargs(kwargs):
    # type: (Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]
    format_kwargs = {k: v for k, v in kwargs.items() if k in FORMAT_KWARGS}
    explain_kwargs = {k: v for k, v in kwargs.items() if k not in FORMAT_KWARGS}
    return format_kwargs, explain_kwargs
    # TODO: consider moving this to utils.py as a function that splits kwargs based on an argset


def _set_html_kwargs_defaults(format_kwargs):
    # type: (Dict[str, Any]) -> None
    format_kwargs.setdefault('show', fields.WEIGHTS)
    format_kwargs.setdefault('force_weights', False)