Source code for eli5.formatters.text

from itertools import chain
from tabulate import tabulate
from typing import Optional, Iterator

from eli5.base import Explanation, FeatureImportances
from . import fields
from .features import FormattedFeatureName
from .utils import (
    format_signed, format_value, format_weight, has_any_values_for_weights,
    replace_spaces, should_highlight_spaces)
from .utils import tabulate as eli5_tabulate, numpy_to_python
from .trees import tree2text


_PLUS_MINUS = "±"
_ELLIPSIS = '…'
_SPACE = '░'



[docs]
def format_as_text(expl: Explanation,
                   show=fields.ALL,
                   highlight_spaces: Optional[bool] = None,
                   show_feature_values: bool = False,
                   ) -> str:
    """ Format explanation as text.

    Parameters
    ----------
    expl : eli5.base.Explanation
        Explanation returned by ``eli5.explain_weights`` or
        ``eli5.explain_prediction`` functions.

    highlight_spaces : bool or None, optional
        Whether to highlight spaces in feature names. This is useful if
        you work with text and have ngram features which may include spaces
        at left or right. Default is None, meaning that the value used
        is set automatically based on vectorizer and feature values.

    show_feature_values : bool
        When True, feature values are shown along with feature contributions.
        Default is False.

    show : list[str], optional
        List of sections to show. Allowed values:

        * 'targets' - per-target feature weights;
        * 'transition_features' - transition features of a CRF model;
        * 'feature_importances' - feature importances of a decision tree or
          an ensemble-based estimator;
        * 'decision_tree' - decision tree in a graphical form;
        * 'method' - a string with explanation method;
        * 'description' - description of explanation method and its caveats.

        ``eli5.formatters.fields`` provides constants that cover common cases:
        ``INFO`` (method and description), ``WEIGHTS`` (all the rest),
        and ``ALL`` (all).
    """
    lines: list[str] = []

    if highlight_spaces is None:
        highlight_spaces = should_highlight_spaces(expl)

    if expl.error:  # always shown
        lines.extend(_error_lines(expl))

    explaining_prediction = has_any_values_for_weights(expl)
    show_feature_values = show_feature_values and explaining_prediction

    for key in show:
        if not getattr(expl, key, None):
            continue

        if key == 'method':
            lines.extend(_method_lines(expl))

        if key == 'description':
            lines.extend(_description_lines(expl))

        if key == 'transition_features':
            lines.extend(_transition_features_lines(expl))

        if key == 'targets':
            lines.extend(_targets_lines(
                expl,
                hl_spaces=highlight_spaces,
                show_feature_values=show_feature_values,
                explaining_prediction=explaining_prediction,
            ))

        if key == 'feature_importances':
            lines.extend(_feature_importances_lines(
                expl, hl_spaces=highlight_spaces))

        if key == 'decision_tree':
            lines.extend(_decision_tree_lines(expl))

    return '\n'.join(lines)



def _method_lines(explanation: Explanation) -> list[str]:
    return ['Explained as: {}'.format(explanation.method)]


def _description_lines(explanation: Explanation) -> list[str]:
    return [explanation.description or '']


def _error_lines(explanation: Explanation) -> list[str]:
    return ['Error: {}'.format(explanation.error)]


def _feature_importances_lines(
        explanation: Explanation, hl_spaces: Optional[bool]) -> Iterator[str]:
    max_width = 0
    assert explanation.feature_importances is not None
    for line in _fi_lines(explanation.feature_importances, hl_spaces):
        max_width = max(max_width, len(line))
        yield line
    if explanation.feature_importances.remaining:
        yield _format_remaining(
            explanation.feature_importances.remaining, kind='', width=max_width)


def _fi_lines(
        feature_importances: FeatureImportances, hl_spaces: Optional[bool],
    ) -> Iterator[str]:
    for fw in feature_importances.importances:
        featname = _format_feature(fw.feature, hl_spaces)
        if fw.std or fw.weight:
            w = u'{:0.4f}'.format(fw.weight)
        else:
            w = u"0".rjust(6)
        if fw.std is None:
            yield u'{w}  {feature}'.format(feature=featname, w=w)
        else:
            yield u'{w} {plus} {std:0.4f}  {feature}'.format(
                feature=featname,
                w=w,
                plus=_PLUS_MINUS,
                std=2 * fw.std,
            )


def _decision_tree_lines(explanation: Explanation) -> list[str]:
    assert explanation.decision_tree is not None
    return ["", tree2text(explanation.decision_tree)]


def _transition_features_lines(explanation: Explanation) -> list[str]:
    tf = explanation.transition_features
    assert tf is not None
    return [
        "",
        "Transition features:",
        tabulate(tf.coef, headers=tf.class_names, showindex=tf.class_names,
                 floatfmt="0.3f"),
        ""
    ]


def _targets_lines(explanation: Explanation,
                   hl_spaces: Optional[bool],
                   show_feature_values: bool,
                   explaining_prediction: bool,
                   ) -> list[str]:
    lines = []
    assert explanation.targets is not None
    for target in explanation.targets:
        scores = _format_scores(target.proba, target.score)
        if scores:
            scores = " (%s)" % scores

        header = "%s%r%s top features" % (
            'y=' if not explanation.is_regression else '',
            numpy_to_python(target.target),
            scores)
        lines.append(header)

        if explaining_prediction:
            table_header = ['Contribution', 'Feature']
        else:
            table_header = ['Weight', 'Feature']
        if show_feature_values:
            table_header.append('Value')
            table_line = lambda fw: [
                format_weight(fw.weight),
                _format_feature(fw.feature, hl_spaces),
                format_value(fw.value)]
            col_align = 'rlr'
        else:
            table_line = lambda fw: [
                format_weight(fw.weight),
                _format_feature(fw.feature, hl_spaces)]
            col_align = 'rl'

        w = target.feature_weights
        assert w is not None
        table = eli5_tabulate(
            [table_line(fw) for fw in chain(w.pos, reversed(w.neg))],
            header=table_header,
            col_align=col_align,
        )
        max_width = len(table[1])
        pos_table = '\n'.join(table[:-len(w.neg)])
        neg_table = '\n'.join(table[-len(w.neg):])

        if pos_table:
            lines.append(pos_table)
        if w.pos_remaining:
            lines.append(
                _format_remaining(w.pos_remaining, 'positive', max_width))
        if w.neg_remaining:
            lines.append(
                _format_remaining(w.neg_remaining, 'negative', max_width))
        if neg_table:
            lines.append(neg_table)

        lines.append('')
    return lines


def _format_scores(proba: Optional[float], score: Optional[float]) -> str:
    scores = []
    if proba is not None:
        scores.append("probability=%0.3f" % proba)
    if score is not None:
        scores.append("score=%0.3f" % score)
    return ", ".join(scores)


def _format_remaining(remaining: int, kind: str, width: int) -> str:
    s = '{ellipsis} {remaining} more {kind}{ellipsis}'.format(
        ellipsis=_ELLIPSIS,
        remaining=remaining,
        kind=(kind + ' ') if kind else '',
    )
    return ('{:^%d}' % width).format(s)


def _format_feature(name, hl_spaces) -> str:
    if isinstance(name, bytes):
        name = name.decode('utf8')
    if isinstance(name, FormattedFeatureName):
        return name.format()
    elif isinstance(name, list) and \
            all('name' in x and 'sign' in x for x in name):
        return _format_unhashed_feature(name, hl_spaces=hl_spaces)
    else:
        return _format_single_feature(name, hl_spaces=hl_spaces)


def _format_single_feature(feature: str, hl_spaces: bool) -> str:
    if hl_spaces:
        return replace_spaces(feature, lambda n, _: _SPACE * n)
    else:
        return feature


def _format_unhashed_feature(name: list, hl_spaces: bool, sep=' | ') -> str:
    """
    Format feature name for hashed features.
    """
    return sep.join(
        format_signed(n, _format_single_feature, hl_spaces=hl_spaces)
        for n in name)