Source code for eli5.formatters.as_dataframe

from itertools import chain
from typing import Any, Optional
import warnings

import pandas as pd

import eli5
from eli5.base import (
    Explanation, FeatureImportances, TargetExplanation,
    TransitionFeatureWeights,
)
from eli5.base_utils import singledispatch



[docs]
def explain_weights_df(estimator, **kwargs) -> pd.DataFrame:
    """ Explain weights and export them to ``pandas.DataFrame``.
    All keyword arguments are passed to :func:`eli5.explain_weights`.
    Weights of all features are exported by default.
    """
    kwargs = _set_defaults(kwargs)
    return format_as_dataframe(
        eli5.explain_weights(estimator, **kwargs))




[docs]
def explain_weights_dfs(estimator, **kwargs) -> dict[str, pd.DataFrame]:
    """ Explain weights and export them to a dict with ``pandas.DataFrame``
    values (as :func:`eli5.formatters.as_dataframe.format_as_dataframes` does).
    All keyword arguments are passed to :func:`eli5.explain_weights`.
    Weights of all features are exported by default.
    """
    kwargs = _set_defaults(kwargs)
    return format_as_dataframes(
        eli5.explain_weights(estimator, **kwargs))




[docs]
def explain_prediction_df(estimator, doc, **kwargs) -> pd.DataFrame:
    """ Explain prediction and export explanation to ``pandas.DataFrame``
    All keyword arguments are passed to :func:`eli5.explain_prediction`.
    Weights of all features are exported by default.
    """
    kwargs = _set_defaults(kwargs)
    return format_as_dataframe(
        eli5.explain_prediction(estimator, doc, **kwargs))




[docs]
def explain_prediction_dfs(estimator, doc, **kwargs) -> dict[str, pd.DataFrame]:
    """ Explain prediction and export explanation
    to a dict with ``pandas.DataFrame`` values
    (as :func:`eli5.formatters.as_dataframe.format_as_dataframes` does).
    All keyword arguments are passed to :func:`eli5.explain_prediction`.
    Weights of all features are exported by default.
    """
    kwargs = _set_defaults(kwargs)
    return format_as_dataframes(
        eli5.explain_prediction(estimator, doc, **kwargs))



def _set_defaults(kwargs):
    if 'top' not in kwargs:
        # No limit on number of features by default.
        kwargs['top'] = None
    return kwargs


_EXPORTED_ATTRIBUTES = ['transition_features', 'targets', 'feature_importances']



[docs]
def format_as_dataframes(explanation: Explanation) -> dict[str, pd.DataFrame]:
    """ Export an explanation to a dictionary with ``pandas.DataFrame`` values
    and string keys that correspond to explanation attributes.
    Use this method if several dataframes can be exported from a single
    explanation (e.g. for CRF explanation with has both feature weights
    and transition matrix).
    Note that :func:`eli5.explain_weights` limits number of features
    by default. If you need all features, pass ``top=None`` to
    :func:`eli5.explain_weights`, or use
    :func:`explain_weights_dfs`.
    """
    result = {}
    for attr in _EXPORTED_ATTRIBUTES:
        value = getattr(explanation, attr)
        if value:
            result[attr] = format_as_dataframe(value)
    return result




[docs]
@singledispatch
def format_as_dataframe(explanation) -> Optional[pd.DataFrame]:
    """ Export an explanation to a single ``pandas.DataFrame``.
    In case several dataframes could be exported by
    :func:`eli5.formatters.as_dataframe.format_as_dataframes`,
    a warning is raised. If no dataframe can be exported, ``None`` is returned.
    This function also accepts some components of the explanation as arguments:
    feature importances, targets, transition features.
    Note that :func:`eli5.explain_weights` limits number of features
    by default. If you need all features, pass ``top=None`` to
    :func:`eli5.explain_weights`, or use
    :func:`explain_weights_df`.
    """
    for attr in _EXPORTED_ATTRIBUTES:
        value = getattr(explanation, attr)
        if value:
            other_attrs = [a for a in _EXPORTED_ATTRIBUTES
                           if getattr(explanation, a) and a != attr]
            if other_attrs:
                warnings.warn('Exporting {} to DataFrame, but also {} could be '
                              'exported. Consider using eli5.format_as_dataframes.'
                              .format(attr, ', '.join(other_attrs)))
            return format_as_dataframe(value)
    return None



@format_as_dataframe.register(FeatureImportances)
def _feature_importances_to_df(feature_importances: FeatureImportances) -> pd.DataFrame:
    weights = feature_importances.importances
    df = pd.DataFrame(
        {'feature': [fw.feature for fw in weights],
         'weight': [fw.weight for fw in weights],
         },
        columns=['feature', 'weight'])
    if any(fw.std is not None for fw in weights):
        df['std'] = [fw.std for fw in weights]
    if any(fw.value is not None for fw in weights):
        df['value'] = [fw.value for fw in weights]
    return df


@format_as_dataframe.register(list)
def _targets_to_df(targets: list[TargetExplanation]) -> pd.DataFrame:
    if targets and not isinstance(targets[0], TargetExplanation):
        raise ValueError('Only lists of TargetExplanation are supported')
    columns = ['target', 'feature', 'weight', 'std', 'value']
    df_data: dict[str, list[Any]] = {f: [] for f in columns}
    for target in targets:
        assert target.feature_weights is not None
        for fw in chain(target.feature_weights.pos,
                        reversed(target.feature_weights.neg)):
            df_data['target'].append(target.target)
            df_data['feature'].append(fw.feature)
            df_data['weight'].append(fw.weight)
            df_data['std'].append(fw.std)
            df_data['value'].append(fw.value)
    for optional_field in ['std', 'value']:
        if all(x is None for x in df_data[optional_field]):
            df_data.pop(optional_field)
            columns.remove(optional_field)
    return pd.DataFrame(df_data, columns=columns)


@format_as_dataframe.register(TransitionFeatureWeights)
def _transition_features_to_df(transition_features: TransitionFeatureWeights) -> pd.DataFrame:
    class_names = list(transition_features.class_names)
    return pd.DataFrame(
        {'from': [f for f in class_names for _ in class_names],
         'to': [f for _ in class_names for f in class_names],
         'coef': transition_features.coef.reshape(-1),
         },
        columns=['from', 'to', 'coef'])