Source code for eli5.formatters.text

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from itertools import chain
import six
from tabulate import tabulate
from typing import List, Optional, Iterator

from eli5.base import Explanation, FeatureImportances
from . import fields
from .features import FormattedFeatureName
from .utils import (
    format_signed, format_value, format_weight, has_any_values_for_weights,
    replace_spaces, should_highlight_spaces)
from .utils import tabulate as eli5_tabulate
from .trees import tree2text


_PLUS_MINUS = "+-" if six.PY2 else "±"
_ELLIPSIS = '...' if six.PY2 else '…'
_SPACE = '_' if six.PY2 else '░'


[docs]def format_as_text(expl, # type: Explanation show=fields.ALL, highlight_spaces=None, # type: Optional[bool] show_feature_values=False, # type: bool ): # type: (...) -> str """ Format explanation as text. Parameters ---------- expl : eli5.base.Explanation Explanation returned by ``eli5.explain_weights`` or ``eli5.explain_prediction`` functions. highlight_spaces : bool or None, optional Whether to highlight spaces in feature names. This is useful if you work with text and have ngram features which may include spaces at left or right. Default is None, meaning that the value used is set automatically based on vectorizer and feature values. show_feature_values : bool When True, feature values are shown along with feature contributions. Default is False. show : List[str], optional List of sections to show. Allowed values: * 'targets' - per-target feature weights; * 'transition_features' - transition features of a CRF model; * 'feature_importances' - feature importances of a decision tree or an ensemble-based estimator; * 'decision_tree' - decision tree in a graphical form; * 'method' - a string with explanation method; * 'description' - description of explanation method and its caveats. ``eli5.formatters.fields`` provides constants that cover common cases: ``INFO`` (method and description), ``WEIGHTS`` (all the rest), and ``ALL`` (all). """ lines = [] # type: List[str] if highlight_spaces is None: highlight_spaces = should_highlight_spaces(expl) if expl.error: # always shown lines.extend(_error_lines(expl)) explaining_prediction = has_any_values_for_weights(expl) show_feature_values = show_feature_values and explaining_prediction for key in show: if not getattr(expl, key, None): continue if key == 'method': lines.extend(_method_lines(expl)) if key == 'description': lines.extend(_description_lines(expl)) if key == 'transition_features': lines.extend(_transition_features_lines(expl)) if key == 'targets': lines.extend(_targets_lines( expl, hl_spaces=highlight_spaces, show_feature_values=show_feature_values, explaining_prediction=explaining_prediction, )) if key == 'feature_importances': lines.extend(_feature_importances_lines( expl, hl_spaces=highlight_spaces)) if key == 'decision_tree': lines.extend(_decision_tree_lines(expl)) return '\n'.join(lines)
def _method_lines(explanation): # type: (Explanation) -> List[str] return ['Explained as: {}'.format(explanation.method)] def _description_lines(explanation): # type: (Explanation) -> List[str] return [explanation.description or ''] def _error_lines(explanation): # type: (Explanation) -> List[str] return ['Error: {}'.format(explanation.error)] def _feature_importances_lines(explanation, hl_spaces): # type: (Explanation, Optional[bool]) -> Iterator[str] max_width = 0 assert explanation.feature_importances is not None for line in _fi_lines(explanation.feature_importances, hl_spaces): max_width = max(max_width, len(line)) yield line if explanation.feature_importances.remaining: yield _format_remaining( explanation.feature_importances.remaining, kind='', width=max_width) def _fi_lines(feature_importances, hl_spaces): # type: (FeatureImportances, Optional[bool]) -> Iterator[str] for fw in feature_importances.importances: featname = _format_feature(fw.feature, hl_spaces) if fw.std or fw.weight: w = u'{:0.4f}'.format(fw.weight) else: w = u"0".rjust(6) if fw.std is None: yield u'{w} {feature}'.format(feature=featname, w=w) else: yield u'{w} {plus} {std:0.4f} {feature}'.format( feature=featname, w=w, plus=_PLUS_MINUS, std=2 * fw.std, ) def _decision_tree_lines(explanation): # type: (Explanation) -> List[str] assert explanation.decision_tree is not None return ["", tree2text(explanation.decision_tree)] def _transition_features_lines(explanation): # type: (Explanation) -> List[str] tf = explanation.transition_features assert tf is not None return [ "", "Transition features:", tabulate(tf.coef, headers=tf.class_names, showindex=tf.class_names, floatfmt="0.3f"), "" ] def _targets_lines(explanation, # type: Explanation hl_spaces, # type: Optional[bool] show_feature_values, # type: bool explaining_prediction, # type: bool ): # type: (...) -> List[str] lines = [] assert explanation.targets is not None for target in explanation.targets: scores = _format_scores(target.proba, target.score) if scores: scores = " (%s)" % scores header = "%s%r%s top features" % ( 'y=' if not explanation.is_regression else '', target.target, scores) lines.append(header) if explaining_prediction: table_header = ['Contribution', 'Feature'] else: table_header = ['Weight', 'Feature'] if show_feature_values: table_header.append('Value') table_line = lambda fw: [ format_weight(fw.weight), _format_feature(fw.feature, hl_spaces), format_value(fw.value)] col_align = 'rlr' else: table_line = lambda fw: [ format_weight(fw.weight), _format_feature(fw.feature, hl_spaces)] col_align = 'rl' w = target.feature_weights assert w is not None table = eli5_tabulate( [table_line(fw) for fw in chain(w.pos, reversed(w.neg))], header=table_header, col_align=col_align, ) max_width = len(table[1]) pos_table = '\n'.join(table[:-len(w.neg)]) neg_table = '\n'.join(table[-len(w.neg):]) if pos_table: lines.append(pos_table) if w.pos_remaining: lines.append( _format_remaining(w.pos_remaining, 'positive', max_width)) if w.neg_remaining: lines.append( _format_remaining(w.neg_remaining, 'negative', max_width)) if neg_table: lines.append(neg_table) lines.append('') return lines def _format_scores(proba, score): # type: (Optional[float], Optional[float]) -> str scores = [] if proba is not None: scores.append("probability=%0.3f" % proba) if score is not None: scores.append("score=%0.3f" % score) return ", ".join(scores) def _format_remaining(remaining, kind, width): # type: (int, str, int) -> str s = '{ellipsis} {remaining} more {kind}{ellipsis}'.format( ellipsis=_ELLIPSIS, remaining=remaining, kind=(kind + ' ') if kind else '', ) return ('{:^%d}' % width).format(s) def _format_feature(name, hl_spaces): # type: (...) -> str if isinstance(name, bytes): name = name.decode('utf8') if isinstance(name, FormattedFeatureName): return name.format() elif isinstance(name, list) and \ all('name' in x and 'sign' in x for x in name): return _format_unhashed_feature(name, hl_spaces=hl_spaces) else: return _format_single_feature(name, hl_spaces=hl_spaces) def _format_single_feature(feature, hl_spaces): # type: (str, bool) -> str if hl_spaces: return replace_spaces(feature, lambda n, _: _SPACE * n) else: return feature def _format_unhashed_feature(name, hl_spaces, sep=' | '): # type: (List, bool, str) -> str """ Format feature name for hashed features. """ return sep.join( format_signed(n, _format_single_feature, hl_spaces=hl_spaces) for n in name)