import abc
import numpy as np
from . import init
from . import line_searchers
__all__ = ['L2Loss', 'L1Loss', 'LogLoss']
class Loss(abc.ABC):
@abc.abstractmethod
def __call__(self, y_true, y_pred):
pass
@abc.abstractmethod
def gradient(self, y_true, y_pred):
pass
@property
@abc.abstractmethod
def default_init_estimator(self):
pass
@property
def tree_line_searcher(self):
return None
[docs]class L2Loss(Loss):
"""Computes the L2 loss, also known as the mean squared error.
Mathematically, the L2 loss is defined as
:math:`L = \\frac{1}{n} \\sum_i^n (p_i - y_i)^2`
It's gradient is
:math:`\\frac{\\partial L}{\\partial y_i} = p_i`
Using `MSE` is equivalent to setting the `loss` parameter to `ls` in scikit-learn's
`GradientBoostingRegressor.`
"""
[docs] def __call__(self, y_true, y_pred):
"""Returns the L2 loss.
Example:
>>> import starboost as sb
>>> y_true = [10, 25, 0]
>>> y_pred = [5, 30, 5]
>>> sb.losses.L2Loss()(y_true, y_pred)
25.0
"""
return np.power(np.subtract(y_pred, y_true), 2).mean()
[docs] def gradient(self, y_true, y_pred):
"""Returns the gradient of the L2 loss with respect to each prediction.
Example:
>>> import starboost as sb
>>> y_true = [10, 25, 0]
>>> y_pred = [5, 30, 5]
>>> sb.losses.L2Loss().gradient(y_true, y_pred)
array([-5, 5, 5])
"""
return np.subtract(y_pred, y_true)
@property
def default_init_estimator(self):
"""Returns ``starboost.init.MeanEstimator()``."""
return init.MeanEstimator()
@property
def tree_line_searcher(self):
return None
[docs]class L1Loss(Loss):
"""Computes the L1 loss, also known as the mean absolute error.
Mathematically, the L1 loss is defined as
:math:`L = \\frac{1}{n} \\sum_i^n |p_i - y_i|`
It's gradient is
:math:`\\frac{\\partial L}{\\partial y_i} = sign(p_i - y_i)`
where :math:`sign(p_i - y_i)` is equal to 0 if :math:`p_i` is equal to :math:`y_i`. Note that
this is slightly different from scikit-learn, which replaces 0s by -1s.
Using ``L1Loss`` produces mostly the same results as when setting the ``loss`` parameter to
``'lad'`` in scikit-learn's ``GradientBoostingRegressor``.
"""
[docs] def __call__(self, y_true, y_pred):
"""Returns the L1 loss.
Example:
>>> import starboost as sb
>>> y_true = [0, 0, 1]
>>> y_pred = [0.5, 0.5, 0.5]
>>> sb.losses.L1Loss()(y_true, y_pred)
0.5
"""
return np.abs(np.subtract(y_pred, y_true)).mean()
[docs] def gradient(self, y_true, y_pred):
"""Returns the gradient of the L1 loss with respect to each prediction.
Example:
>>> import starboost as sb
>>> y_true = [0, 0, 1]
>>> y_pred = [0.3, 0, 0.8]
>>> sb.losses.L1Loss().gradient(y_true, y_pred)
array([ 1., 0., -1.])
"""
return np.sign(np.subtract(y_pred, y_true))
@property
def default_init_estimator(self):
"""Returns ``starboost.init.QuantileEstimator(alpha=0.5)``."""
return init.QuantileEstimator(alpha=0.5)
def _update_leaf(self, y_true, y_pred, gradient):
return np.median(y_true - y_pred)
@property
def tree_line_searcher(self):
"""When using ``L1Loss`` the gradient descent procedure will chase the negative of
``L1Loss``'s gradient. The negative of the gradient is solely composed of 1s, -1s, and 0s.
It turns out that replacing the estimated descent direction with the median of the
according residuals will in fact minimize the overall mean absolute error much faster.
This is exactly the same procedure scikit-learn uses to modify the leaves of decision trees
in ``GradientBoostingRegressor``. However this procedure is more generic and works with any
kind of weak learner.
"""
return line_searchers.LeafLineSearcher(update_leaf=self._update_leaf)
[docs]class LogLoss(Loss):
"""Computes the logarithmic loss.
Mathematically, the log loss is defined as
:math:`L = -\\frac{1}{n} \\sum_i^n y_i log(p_i) + (1-y_i) log(1-p_i)`
It's gradient is
:math:`\\frac{\\partial L}{\\partial y_i} = sign(p_i - y_i)`
This loss works for binary classification as well as for multi-class cases (in which case the
loss is usually referred to as "cross-entropy").
"""
[docs] def __call__(self, y_true, y_pred):
"""Returns the log loss.
Example:
>>> import starboost as sb
>>> y_true = [0, 0, 1]
>>> y_pred = [0.5, 0.5, 0.5]
>>> sb.losses.LogLoss()(y_true, y_pred)
0.807410...
"""
loss = -((np.multiply(y_true, y_pred)) - np.logaddexp(0., y_pred))
return loss.mean()
[docs] def gradient(self, y_true, y_pred):
"""Returns the gradient of the log loss with respect to each prediction.
Example:
>>> import starboost as sb
>>> y_true = [0, 0, 1]
>>> y_pred = [0.5, 0.5, 0.5]
>>> sb.losses.LogLoss().gradient(y_true, y_pred)
array([ 0.5, 0.5, -0.5])
"""
return np.subtract(y_pred, y_true)
@property
def default_init_estimator(self):
"""Returns ``starboost.init.PriorProbabilityEstimator()``."""
return init.PriorProbabilityEstimator()
def _update_leaf(self, y_true, y_pred, gradient):
numerator = np.sum(-gradient)
denominator = np.sum((y_true + gradient) * (1 - y_true - gradient))
return (numerator / denominator) if denominator > 1e-150 else 0.
@property
def tree_line_searcher(self):
return line_searchers.LeafLineSearcher(update_leaf=self._update_leaf)