# -*- coding: utf-8 -*-
from typing import Any, List, Tuple, Union, Optional
import numpy as np
from .base_utils import attrs
from .formatters.features import FormattedFeatureName
# @attrs decorator used in this file calls @attr.s(slots=True),
# creating attr.ib entries based on the signature of __init__.
[docs]@attrs
class Explanation(object):
""" An explanation for classifier or regressor,
it can either explain weights or a single prediction.
"""
def __init__(self,
estimator, # type: str
description=None, # type: Optional[str]
error=None, # type: Optional[str]
method=None, # type: Optional[str]
is_regression=False, # type: bool
targets=None, # type: Optional[List[TargetExplanation]]
feature_importances=None, # type: Optional[FeatureImportances]
decision_tree=None, # type: Optional[TreeInfo]
highlight_spaces=None, # type: Optional[bool]
transition_features=None, # type: Optional[TransitionFeatureWeights]
image=None, # type: Any
):
# type: (...) -> None
self.estimator = estimator
self.description = description
self.error = error
self.method = method
self.is_regression = is_regression
self.targets = targets
self.feature_importances = feature_importances
self.decision_tree = decision_tree
self.highlight_spaces = highlight_spaces
self.transition_features = transition_features
self.image = image # if arg is not None, assume we are working with images
def _repr_html_(self):
""" HTML formatting for the notebook.
"""
from eli5.formatters import fields
from eli5.formatters.html import format_as_html
return format_as_html(self, force_weights=False, show=fields.WEIGHTS)
[docs]@attrs
class FeatureImportances(object):
""" Feature importances with number of remaining non-zero features.
"""
def __init__(self, importances, remaining):
# type: (...) -> None
self.importances = importances # type: List[FeatureWeight]
self.remaining = remaining # type: int
@classmethod
def from_names_values(cls, names, values, std=None, **kwargs):
params = zip(names, values) if std is None else zip(names, values, std)
importances = [FeatureWeight(*x) for x in params] # type: ignore
return cls(importances, **kwargs)
[docs]@attrs
class TargetExplanation(object):
""" Explanation for a single target or class.
Feature weights are stored in the :feature_weights: attribute,
and features highlighted in text in the :weighted_spans: attribute.
Spatial values are stored in the :heatmap: attribute.
"""
def __init__(self,
target, # type: Union[str, int]
feature_weights=None, # type: Optional[FeatureWeights]
proba=None, # type: Optional[float]
score=None, # type: Optional[float]
weighted_spans=None, # type: Optional[WeightedSpans]
heatmap=None, # type: Optional[np.ndarray]
):
# type: (...) -> None
self.target = target
self.feature_weights = feature_weights
self.proba = proba
self.score = score
self.weighted_spans = weighted_spans
self.heatmap = heatmap
# List is currently used for unhashed features
Feature = Union[str, List, FormattedFeatureName]
[docs]@attrs
class FeatureWeights(object):
""" Weights for top features, :pos: for positive and :neg: for negative,
sorted by descending absolute value.
Number of remaining positive and negative features are stored in
:pos_remaining: and :neg_remaining: attributes.
"""
def __init__(self,
pos, # type: List[FeatureWeight]
neg, # type: List[FeatureWeight]
pos_remaining=0, # type: int
neg_remaining=0, # type: int
):
# type: (...) -> None
self.pos = pos
self.neg = neg
self.pos_remaining = pos_remaining
self.neg_remaining = neg_remaining
@attrs
class FeatureWeight(object):
def __init__(self,
feature, # type: Feature
weight, # type: float
std=None, # type: float
value=None, # type: Any
):
# type: (...) -> None
self.feature = feature
self.weight = weight
self.std = std
self.value = value
[docs]@attrs
class WeightedSpans(object):
""" Holds highlighted spans for parts of document - a DocWeightedSpans
object for each vectorizer, and other features not highlighted anywhere.
"""
def __init__(self,
docs_weighted_spans, # type: List[DocWeightedSpans]
other=None, # type: FeatureWeights
):
# type: (...) -> None
self.docs_weighted_spans = docs_weighted_spans
self.other = other
WeightedSpan = Tuple[
Feature,
List[Tuple[int, int]], # list of spans (start, end) for this feature
float, # feature weight
]
[docs]@attrs
class DocWeightedSpans(object):
""" Features highlighted in text. :document: is a pre-processed document
before applying the analyzer. :weighted_spans: holds a list of spans
for features found in text (span indices correspond to
:document:). :preserve_density: determines how features are colored
when doing formatting - it is better set to True for char features
and to False for word features.
"""
def __init__(self,
document, # type: str
spans, # type: List[WeightedSpan]
preserve_density=None, # type: bool
vec_name=None, # type: str
):
# type: (...) -> None
self.document = document
self.spans = spans
self.preserve_density = preserve_density
self.vec_name = vec_name
[docs]@attrs
class TransitionFeatureWeights(object):
""" Weights matrix for transition features. """
def __init__(self,
class_names, # type: List[str]
coef,
):
# type: (...) -> None
self.class_names = class_names
self.coef = coef
[docs]@attrs
class TreeInfo(object):
""" Information about the decision tree. :criterion: is the name of
the function to measure the quality of a split, :tree: holds all nodes
of the tree, and :graphviz: is the tree rendered in graphviz .dot format.
"""
def __init__(self,
criterion, # type: str
tree, # type: NodeInfo
graphviz, # type: str
is_classification, # type: bool
):
# type: (...) -> None
self.criterion = criterion
self.tree = tree
self.graphviz = graphviz
self.is_classification = is_classification
[docs]@attrs
class NodeInfo(object):
""" A node in a binary tree.
Pointers to left and right children are in :left: and :right: attributes.
"""
def __init__(self,
id, # type: int
is_leaf, # type: bool
value,
value_ratio,
impurity, # type: float
samples, # type: int
sample_ratio, # type: float
feature_name=None, # type: str
feature_id=None, # type: int
threshold=None, # type: float
left=None, # type: NodeInfo
right=None, # type: NodeInfo
):
# type: (...) -> None
self.id = id
self.is_leaf = is_leaf
self.value = value
self.value_ratio = value_ratio
self.impurity = impurity
self.samples = samples
self.sample_ratio = sample_ratio
self.feature_name = feature_name
self.feature_id = feature_id
self.threshold = threshold
self.left = left
self.right = right