Source code for eli5.ipython

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from typing import Any, Dict, Tuple
import warnings

from IPython.display import HTML, Image

from .explain import explain_weights, explain_prediction
from .formatters import format_as_html, fields
try:
    from .formatters.image import format_as_image
except ImportError as e:
    # missing dependencies
    format_as_image = e  # type: ignore


FORMAT_KWARGS = {'include_styles', 'force_weights',
                 'show', 'preserve_density',
                 'highlight_spaces', 'horizontal_layout',
                 'show_feature_values',
                 # kwargs for image formatter
                 'resampling_filter', 'colormap', 
                 'alpha_limit',
}
# TODO: automatically get FORMAT_KWARGS from function signatures


[docs]def show_weights(estimator, **kwargs): """ Return an explanation of estimator parameters (weights) as an IPython.display.HTML object. Use this function to show classifier weights in IPython. :func:`show_weights` accepts all :func:`eli5.explain_weights` arguments and all :func:`eli5.formatters.html.format_as_html` keyword arguments, so it is possible to get explanation and customize formatting in a single call. Parameters ---------- estimator : object Estimator instance. This argument must be positional. top : int or (int, int) tuple, optional Number of features to show. When ``top`` is int, ``top`` features with a highest absolute values are shown. When it is (pos, neg) tuple, no more than ``pos`` positive features and no more than ``neg`` negative features is shown. ``None`` value means no limit. This argument may be supported or not, depending on estimator type. target_names : list[str] or {'old_name': 'new_name'} dict, optional Names of targets or classes. This argument can be used to provide human-readable class/target names for estimators which don't expose clss names themselves. It can be also used to rename estimator-provided classes before displaying them. This argument may be supported or not, depending on estimator type. targets : list, optional Order of class/target names to show. This argument can be also used to show information only for a subset of classes. It should be a list of class / target names which match either names provided by an estimator or names defined in ``target_names`` parameter. This argument may be supported or not, depending on estimator type. feature_names : list, optional A list of feature names. It allows to specify feature names when they are not provided by an estimator object. This argument may be supported or not, depending on estimator type. feature_re : str, optional Only feature names which match ``feature_re`` regex are shown (more precisely, ``re.search(feature_re, x)`` is checked). feature_filter : Callable[[str], bool], optional Only feature names for which ``feature_filter`` function returns True are shown. show : List[str], optional List of sections to show. Allowed values: * 'targets' - per-target feature weights; * 'transition_features' - transition features of a CRF model; * 'feature_importances' - feature importances of a decision tree or an ensemble-based estimator; * 'decision_tree' - decision tree in a graphical form; * 'method' - a string with explanation method; * 'description' - description of explanation method and its caveats. ``eli5.formatters.fields`` provides constants that cover common cases: ``INFO`` (method and description), ``WEIGHTS`` (all the rest), and ``ALL`` (all). horizontal_layout : bool When True, feature weight tables are printed horizontally (left to right); when False, feature weight tables are printed vertically (top to down). Default is True. highlight_spaces : bool or None, optional Whether to highlight spaces in feature names. This is useful if you work with text and have ngram features which may include spaces at left or right. Default is None, meaning that the value used is set automatically based on vectorizer and feature values. include_styles : bool Most styles are inline, but some are included separately in <style> tag; you can omit them by passing ``include_styles=False``. Default is True. **kwargs: dict Keyword arguments. All keyword arguments are passed to concrete explain_weights... implementations. Returns ------- IPython.display.HTML The result is printed in IPython notebook as an HTML widget. If you need to display several explanations as an output of a single cell, or if you want to display it from a function then use IPython.display.display:: from IPython.display import display display(eli5.show_weights(clf1)) display(eli5.show_weights(clf2)) """ format_kwargs, explain_kwargs = _split_kwargs(kwargs) expl = explain_weights(estimator, **explain_kwargs) _set_html_kwargs_defaults(format_kwargs) html = format_as_html(expl, **format_kwargs) return HTML(html)
[docs]def show_prediction(estimator, doc, **kwargs): """ Return an explanation of estimator prediction as an IPython.display.HTML object. Use this function to show information about classifier prediction in IPython. :func:`show_prediction` accepts all :func:`eli5.explain_prediction` arguments and all :func:`eli5.formatters.html.format_as_html` keyword arguments, so it is possible to get explanation and customize formatting in a single call. If :func:`explain_prediction` returns an :class:`base.Explanation` object with the ``image`` attribute not set to None, i.e. if explaining image based models, then formatting is dispatched to an image display implementation, and image explanations are shown in an IPython cell. Extra keyword arguments are passed to :func:`eli5.format_as_image`. Note that this image display implementation requires ``matplotlib`` and ``Pillow`` as extra dependencies. If the dependencies are missing, no formatting is done and the original :class:`base.Explanation` object is returned. Parameters ---------- estimator : object Estimator instance. This argument must be positional. doc : object Example to run estimator on. Estimator makes a prediction for this example, and :func:`show_prediction` tries to show information about this prediction. Pass a single element, not a one-element array: if you fitted your estimator on ``X``, that would be ``X[i]`` for most containers, and ``X.iloc[i]`` for ``pandas.DataFrame``. top : int or (int, int) tuple, optional Number of features to show. When ``top`` is int, ``top`` features with a highest absolute values are shown. When it is (pos, neg) tuple, no more than ``pos`` positive features and no more than ``neg`` negative features is shown. ``None`` value means no limit (default). This argument may be supported or not, depending on estimator type. top_targets : int, optional Number of targets to show. When ``top_targets`` is provided, only specified number of targets with highest scores are shown. Negative value means targets with lowest scores are shown. Must not be given with ``targets`` argument. ``None`` value means no limit: all targets are shown (default). This argument may be supported or not, depending on estimator type. target_names : list[str] or {'old_name': 'new_name'} dict, optional Names of targets or classes. This argument can be used to provide human-readable class/target names for estimators which don't expose clss names themselves. It can be also used to rename estimator-provided classes before displaying them. This argument may be supported or not, depending on estimator type. targets : list, optional Order of class/target names to show. This argument can be also used to show information only for a subset of classes. It should be a list of class / target names which match either names provided by an estimator or names defined in ``target_names`` parameter. In case of binary classification you can use this argument to set the class which probability or score should be displayed, with an appropriate explanation. By default a result for predicted class is shown. For example, you can use ``targets=[True]`` to always show result for a positive class, even if the predicted label is False. This argument may be supported or not, depending on estimator type. feature_names : list, optional A list of feature names. It allows to specify feature names when they are not provided by an estimator object. This argument may be supported or not, depending on estimator type. feature_re : str, optional Only feature names which match ``feature_re`` regex are shown (more precisely, ``re.search(feature_re, x)`` is checked). feature_filter : Callable[[str, float], bool], optional Only feature names for which ``feature_filter`` function returns True are shown. It must accept feature name and feature value. Missing features always have a NaN value. show : List[str], optional List of sections to show. Allowed values: * 'targets' - per-target feature weights; * 'transition_features' - transition features of a CRF model; * 'feature_importances' - feature importances of a decision tree or an ensemble-based estimator; * 'decision_tree' - decision tree in a graphical form; * 'method' - a string with explanation method; * 'description' - description of explanation method and its caveats. ``eli5.formatters.fields`` provides constants that cover common cases: ``INFO`` (method and description), ``WEIGHTS`` (all the rest), and ``ALL`` (all). horizontal_layout : bool When True, feature weight tables are printed horizontally (left to right); when False, feature weight tables are printed vertically (top to down). Default is True. highlight_spaces : bool or None, optional Whether to highlight spaces in feature names. This is useful if you work with text and have ngram features which may include spaces at left or right. Default is None, meaning that the value used is set automatically based on vectorizer and feature values. include_styles : bool Most styles are inline, but some are included separately in <style> tag; you can omit them by passing ``include_styles=False``. Default is True. force_weights : bool When True, a table with feature weights is displayed even if all features are already highlighted in text. Default is False. preserve_density: bool or None This argument currently only makes sense when used with text data and vectorizers from scikit-learn. If preserve_density is True, then color for longer fragments will be less intensive than for shorter fragments, so that "sum" of intensities will correspond to feature weight. If preserve_density is None, then it's value is chosen depending on analyzer kind: it is preserved for "char" and "char_wb" analyzers, and not preserved for "word" analyzers. Default is None. show_feature_values : bool When True, feature values are shown along with feature contributions. Default is False. **kwargs: dict Keyword arguments. All keyword arguments are passed to concrete explain_prediction... implementations. Returns ------- IPython.display.HTML The result is printed in IPython notebook as an HTML widget. If you need to display several explanations as an output of a single cell, or if you want to display it from a function then use IPython.display.display:: from IPython.display import display display(eli5.show_weights(clf1)) display(eli5.show_weights(clf2)) PIL.Image.Image Image with a heatmap overlay, *if explaining image based models*. The image is shown in an IPython notebook cell if it is the last thing returned. To display the image in a loop, function, or other case, use IPython.display.display:: from IPython.display import display for cls_idx in [0, 432]: display(eli5.show_prediction(clf, doc, targets=[cls_idx])) """ format_kwargs, explain_kwargs = _split_kwargs(kwargs) expl = explain_prediction(estimator, doc, **explain_kwargs) if expl.image is not None: # dispatch to image display implementation if isinstance(format_as_image, ImportError): warnings.warn('Missing dependencies: "{}". ' 'Returning original Explanation.'.format( format_as_image)) return expl else: return format_as_image(expl, **format_kwargs) else: # use default implementation # TODO: a better design / refactorings might be needed _set_html_kwargs_defaults(format_kwargs) html = format_as_html(expl, **format_kwargs) return HTML(html)
def _split_kwargs(kwargs): # type: (Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]] format_kwargs = {k: v for k, v in kwargs.items() if k in FORMAT_KWARGS} explain_kwargs = {k: v for k, v in kwargs.items() if k not in FORMAT_KWARGS} return format_kwargs, explain_kwargs # TODO: consider moving this to utils.py as a function that splits kwargs based on an argset def _set_html_kwargs_defaults(format_kwargs): # type: (Dict[str, Any]) -> None format_kwargs.setdefault('show', fields.WEIGHTS) format_kwargs.setdefault('force_weights', False)