Source code for permetrics.classification

#!/usr/bin/env python
# Created by "Thieu" at 09:29, 23/09/2020 ----------%
#       Email: nguyenthieu2102@gmail.com            %
#       Github: https://github.com/thieu1995        %
# --------------------------------------------------%

from permetrics.evaluator import Evaluator
from permetrics.utils import data_util as du
from permetrics.utils import classifier_util as cu
import numpy as np



[docs]
class ClassificationMetric(Evaluator):
    """
    A class for evaluating classification metrics.

    Parameters
    ----------
    y_true : tuple, list, np.ndarray, optional
        The ground truth values. Default is None.

    y_pred : tuple, list, np.ndarray, optional
        The predicted values. Default is None.

    labels : tuple, list, np.ndarray, optional
        List of labels to index the matrix. This may be used to reorder or select a subset of labels. Default is None.

    pos_label : int or str
        Positive label for binary classification.

    average : str or None, optional
        Determines the type of averaging performed on the data. Options are:
        - 'binary': Calculate for binary classification problem
        - 'micro': Calculate metrics globally by considering each element of the label indicator matrix as a label.
        - 'macro': Calculate metrics for each label and find their unweighted mean.
        - 'weighted': Calculate metrics for each label and find their average, weighted by support.
        - None: Scores for each class are returned.
        Default is "binary".

    Methods
    -------
    get_support(name=None, verbose=True)
        Retrieve the support information for a specific metric or all metrics.

    get_processed_data(y_true=None, y_pred=None)
        Process and format the input data for evaluation.

    get_processed_data2(y_true=None, y_pred=None)
        Process and format the input data for ROC and probability-based metrics.

    precision_score(...)
        Calculate the precision score.

    negative_predictive_value(...)
        Calculate the negative predictive value.

    specificity_score(...)
        Calculate the specificity score.

    recall_score(...)
        Calculate the recall score.

    f1_score(...)
        Calculate the F1 score.

    f2_score(...)
        Calculate the F2 score.

    fbeta_score(...)
        Calculate the F-beta score.

    matthews_correlation_coefficient(...)
        Calculate the Matthews correlation coefficient.

    hamming_loss(...)
        Calculate the hamming loss.

    lift_score(...)
        Calculate the lift score.

    cohen_kappa_score(...)
        Calculate the Cohen's kappa score.

    jaccard_similarity_index(...)
        Calculate the Jaccard similarity index.

    g_mean_score(...)
        Calculate the geometric mean score.

    accuracy_score(...)
        Calculate the accuracy score.

    confusion_matrix(...)
        Generate the confusion matrix.

    roc_auc_score(...)
        Calculate the ROC-AUC score.

    gini_index(...)
        Calculate the Gini index.

    brier_score_loss(...)
        Calculate the Brier score loss.

    crossentropy_loss(...)
        Calculate the cross-entropy loss.

    hinge_loss(...)
        Calculate the hinge loss.

    kullback_leibler_divergence_loss(...)
        Calculate the Kullback-Leibler divergence loss.
    """

    SUPPORT = {
        "AS": {"type": "max", "range": "[0, 1]", "best": "1"},
        "PS": {"type": "max", "range": "[0, 1]", "best": "1"},
        "NPV": {"type": "max", "range": "[0, 1]", "best": "1"},
        "RS": {"type": "max", "range": "[0, 1]", "best": "1"},
        "SS": {"type": "max", "range": "[0, 1]", "best": "1"},
        "F1S": {"type": "max", "range": "[0, 1]", "best": "1"},
        "F2S": {"type": "max", "range": "[0, 1]", "best": "1"},
        "FBS": {"type": "max", "range": "[0, 1]", "best": "1"},
        "MCC": {"type": "max", "range": "[-1, +1]", "best": "1"},
        "CKS": {"type": "max", "range": "[-1, +1]", "best": "1"},
        "JSI": {"type": "max", "range": "[0, 1]", "best": "1"},
        "JSS": {"type": "max", "range": "[0, 1]", "best": "1"},
        "GMS": {"type": "max", "range": "[0, 1]", "best": "1"},
        "ROC-AUC": {"type": "max", "range": "[0, 1]", "best": "1"},
        "ROC": {"type": "max", "range": "[0, 1]", "best": "1"},
        "AUC": {"type": "max", "range": "[0, 1]", "best": "1"},
        "GINI": {"type": "max", "range": "[-1, 1]", "best": "1"},
        "LS": {"type": "max", "range": "[0, +inf)", "best": "unknown"},

        "CEL": {"type": "min", "range": "[0, +inf)", "best": "0"},
        "HML": {"type": "min", "range": "[0, 1]", "best": "0"},
        "HGL": {"type": "min", "range": "[0, +inf)", "best": "0"},
        "KLDL": {"type": "min", "range": "[0, +inf)", "best": "0"},
        "BSL": {"type": "min", "range": "[0, 1]", "best": "0"}
    }

    def __init__(self, y_true=None, y_pred=None, **kwargs):
        super().__init__(y_true, y_pred, **kwargs)
        if kwargs is None: kwargs = {}
        self.set_keyword_arguments(kwargs)
        self.binary = True
        self.representor = "number"     # "number" or "string"
        self.le = None  # LabelEncoder


[docs]
    @staticmethod
    def get_support(name=None, verbose=True):
        """
        Retrieve the support information for a specific metric or all metrics.

        Parameters
        ----------
        name : str, optional
            Name of the metric to retrieve. Use "all" to retrieve all metrics.
        verbose : bool, optional
            Whether to print the metric details.

        Returns
        -------
        dict
            Support information for the specified metric(s).
        """
        if name == "all":
            if verbose:
                for key, value in ClassificationMetric.SUPPORT.items():
                    print(f"Metric {key} : {value}")
            return ClassificationMetric.SUPPORT
        if name not in list(ClassificationMetric.SUPPORT.keys()):
            raise ValueError(f"ClassificationMetric doesn't support metric named: {name}")
        if verbose:
            print(f"Metric {name}: {ClassificationMetric.SUPPORT[name]}")
        return ClassificationMetric.SUPPORT[name]



[docs]
    def get_processed_data(self, y_true=None, y_pred=None):
        """
        Process and format the input data for evaluation.

        Returns:
            y_true_final: y_true used in evaluation process.
            y_pred_final: y_pred used in evaluation process
            unique_classes: All unique classes from y_true and y_pred
            representor: the label is number or string
        """
        if (y_true is not None) and (y_pred is not None):
            return du.format_classification_data(y_true, y_pred)
        if (self.y_true is not None) and (self.y_pred is not None):
            return du.format_classification_data(self.y_true, self.y_pred)
        raise ValueError("y_true or y_pred is None. You need to pass y_true and y_pred to object creation or function called.")



[docs]
    def get_processed_data2(self, y_true=None, y_pred=None):
        """
        Returns:
            y_true_final: y_true used in evaluation process.
            y_pred_final: y_pred used in evaluation process
            binary: is problem binary or multi-class classification
            representor: the label is number or string
        """
        if (y_true is not None) and (y_pred is not None):
            return du.format_y_score(y_true, y_pred)
        if (self.y_true is not None) and (self.y_pred is not None):
            return du.format_y_score(self.y_true, self.y_pred)
        raise ValueError("y_true or y_pred is None. You need to pass y_true and y_pred to object creation or function called.")


    def _get_micro_stats(self, matrix):
        """Helper calculates accurate global components for multi-class classification"""
        N = matrix.sum()
        K = matrix.shape[0]
        tp = np.trace(matrix)
        fp = fn = N - tp
        tn = N * K - (tp + fp + fn)
        return tp, fp, fn, tn

    def _aggregate(self, metric_key, y_true, y_pred, labels, pos_label, average, beta=1.0):
        """
        Aggregate metrics based on the specified averaging method.

        Parameters
        ----------
        metric_key : str
            Metric key to calculate.
        y_true : array-like
            Ground truth values.
        y_pred : array-like
            Predicted values.
        labels : list, optional
            List of labels to consider.
        pos_label : int or str
            Positive label for binary classification.
        average : str or None
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).
        beta : float, optional
            Weight of recall in the F-beta score.

        Returns
        -------
        float or dict
            Aggregated metric value(s).
        """
        y_true, y_pred, unique_classes, _ = self.get_processed_data(y_true, y_pred)

        # 1. Check binary classification problem
        if average == "binary":
            if len(unique_classes) > 2:
                raise ValueError(f"Target is multiclass ({len(unique_classes)} classes) but average='binary'. "
                    "Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].")
            if len(unique_classes) > 1 and pos_label not in unique_classes:
                raise ValueError(f"pos_label={pos_label} is not a valid label. Unique labels are {unique_classes}")

        # 2. Calculate the original Confusion Matrix
        matrix, imap, imap_count = cu.calculate_confusion_matrix(y_true, y_pred, labels=None, normalize=None)

        # 3. Micro: Combining multi-class problems into original composite 2x2 matrices
        if average == "micro":
            tp, fp, fn, tn = self._get_micro_stats(matrix)
            m_micro = np.array([[tp, fn], [fp, tn]], dtype=float)
            res_micro = cu.calculate_single_label_metric(m_micro, {"_m": 0}, {"_m": tp + fn}, beta=beta)
            return float(res_micro["_m"][metric_key])

        # 4. Calculate all classes
        all_metrics = cu.calculate_single_label_metric(matrix, imap, imap_count, beta=beta)

        # 5. Binary (Returns the correct float of pos_label)
        if average == "binary":
            return float(all_metrics[pos_label][metric_key]) if pos_label in all_metrics else 0.0

        target_labels = list(labels) if labels is not None else list(imap.keys())
        if not np.all(np.isin(target_labels, unique_classes)):
            raise ValueError("Specified labels do not exist in data.")

        # 6. None (Returns a dict based on the label)
        if average is None:
            return {lbl: all_metrics[lbl][metric_key] for lbl in target_labels if lbl in all_metrics}

        vals = np.array([all_metrics[lbl][metric_key] for lbl in target_labels if lbl in all_metrics], dtype=float)
        supps = np.array([all_metrics[lbl]["n_true"] for lbl in target_labels if lbl in all_metrics], dtype=float)

        if average == "macro":
            return float(np.mean(vals)) if len(vals) > 0 else 0.0
        if average == "weighted":
            total_s = np.sum(supps)
            return float(np.dot(vals, supps) / total_s) if total_s > 0 else 0.0

        raise ValueError(f"Unsupported average setting: {average}")


[docs]
    def precision_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            Precision score.
        """
        return self._aggregate("precision", y_true, y_pred, labels, pos_label, average)



[docs]
    def negative_predictive_value(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Calculate the negative predictive value.

        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            Negative predictive value.
        """
        return self._aggregate("negative_predictive_value", y_true, y_pred, labels, pos_label, average)



[docs]
    def specificity_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Calculate the specificity score.

        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            Specificity score.
        """
        return self._aggregate("specificity", y_true, y_pred, labels, pos_label, average)



[docs]
    def recall_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            Recall score.
        """
        return self._aggregate("recall", y_true, y_pred, labels, pos_label, average)



[docs]
    def f1_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            F1 score.
        """
        return self._aggregate("f1", y_true, y_pred, labels, pos_label, average)



[docs]
    def f2_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            F2 score.
        """
        return self._aggregate("f2", y_true, y_pred, labels, pos_label, average)



[docs]
    def fbeta_score(self, y_true=None, y_pred=None, beta=1.0, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        beta : float, optional
            Weight of recall in the F-beta score.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            F-beta score.
        """
        return self._aggregate("fbeta", y_true, y_pred, labels, pos_label, average, beta=beta)



[docs]
    def matthews_correlation_coefficient(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            Matthews correlation coefficient.
        """
        return self._aggregate("mcc", y_true, y_pred, labels, pos_label, average)



[docs]
    def hamming_loss(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            Hamming loss.
        """
        return self._aggregate("hamming_loss", y_true, y_pred, labels, pos_label, average)



[docs]
    def lift_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            Lift score.
        """
        return self._aggregate("lift_score", y_true, y_pred, labels, pos_label, average)



[docs]
    def cohen_kappa_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            Cohen's kappa score.
        """
        return self._aggregate("kappa_score", y_true, y_pred, labels, pos_label, average)



[docs]
    def jaccard_similarity_index(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            Jaccard similarity index.
        """
        return self._aggregate("jaccard_score", y_true, y_pred, labels, pos_label, average)



[docs]
    def g_mean_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth values.
        y_pred : array-like, optional
            Predicted values.
        labels : list, optional
            List of labels to include in the calculation.
        pos_label : int or str, optional
            The positive class label for binary classification.
        average : str, optional
            Averaging method ('binary', 'micro', 'macro', 'weighted', or None).

        Returns
        -------
        float or dict
            Geometric mean (G-mean) score.
        """
        return self._aggregate("g_mean", y_true, y_pred, labels, pos_label, average)



[docs]
    def accuracy_score(self, y_true=None, y_pred=None, normalize=True, sample_weight=None, **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth (correct) target values.
        y_pred : array-like, optional
            Estimated target values.
        normalize : bool, optional
            If True, return the fraction of correctly classified samples (float).
            If False, return the number of correctly classified samples (int).
        sample_weight : array-like, optional
            Sample weights.

        Returns
        -------
        float or int
            Accuracy score.
        """
        y_true, y_pred, _, _ = self.get_processed_data(y_true, y_pred)
        return cu.calculate_accuracy_score(y_true, y_pred, normalize=normalize, sample_weight=sample_weight)



[docs]
    def confusion_matrix(self, y_true=None, y_pred=None, labels=None, normalize=None, **kwargs):
        """
        Compute the confusion matrix for classification tasks.

        Args:
            y_true (array-like): Ground truth (correct) labels.
            y_pred (array-like): Predicted labels.
            labels (list, optional): Subset of labels to include in the matrix. Default is None.
            normalize (str, optional): Normalization method. One of {"true", "pred", "all"}.
                - "true": Normalize rows (true labels).
                - "pred": Normalize columns (predicted labels).
                - "all": Normalize the entire matrix.
                Default is None (no normalization).

        Returns:
            tuple:
                - matrix (ndarray): Confusion matrix (normalized if specified).
                - imap (dict): Mapping of labels to matrix indices.
                - imap_count (dict): Count of true labels for each class.

        Raises:
            ValueError: If specified labels do not exist in `y_true` or `y_pred`.
        """
        y_true, y_pred, _, _ = self.get_processed_data(y_true, y_pred)
        return cu.calculate_confusion_matrix(y_true, y_pred, labels, normalize)



[docs]
    def roc_auc_score(self, y_true=None, y_pred=None, average="macro", **kwargs):
        """
        Compute the Area Under the Receiver Operating Characteristic Curve (ROC AUC).

        Parameters
        ----------
        y_true : array-like, optional
            Ground truth (correct) target values.
        y_pred : array-like, optional
            Estimated probabilities or decision function.
        average : str, optional
            Averaging method ('macro', 'weighted', or None).

        Returns
        -------
        float or dict
            ROC AUC score.
        """
        y_true, y_score, binary, _ = self.get_processed_data2(y_true, y_pred)
        # 1. Only 1 class in y_true
        if len(np.unique(y_true)) == 1:
            raise ValueError("Only one class present in y_true. ROC AUC score is not defined in that case.")

        trapz = getattr(np, "trapezoid", getattr(np, "trapz", None))
        # 2. Binary cases
        if binary or len(np.unique(y_true)) == 2:
            if y_score.ndim == 2:
                if y_score.shape[1] == 2:
                    y_score = y_score[:, 1]  #  probability of class Positive
                elif y_score.shape[1] == 1:
                    y_score = y_score.ravel()
                else:
                    raise ValueError(f"Target is binary but y_score has {y_score.shape[1]} columns.")
            tpr, fpr, _ = cu.calculate_roc_curve(y_true, y_score)
            return float(trapz(tpr, fpr))

        # 3. Multiclass (One-vs-Rest)
        classes = np.unique(y_true).tolist()
        auc_list = [float(trapz(*cu.calculate_roc_curve(np.where(y_true == cls, 1, 0), y_score[:, i])[:2]))
                    for i, cls in enumerate(classes)]

        if average == "macro":
            return float(np.mean(auc_list))
        if average == "weighted":
            weights = cu.calculate_class_support(y_true)
            return float(np.dot(weights, auc_list) / np.sum(weights))
        return dict(zip(classes, auc_list))



[docs]
    def gini_index(self, y_true=None, y_pred=None, **kwargs):
        """
        Compute the Gini index based on the ROC AUC score.

        Parameters
        ----------
        y_true : array-like, optional
            Ground truth (correct) target values.
        y_pred : array-like, optional
            Estimated probabilities or decision function.

        Returns
        -------
        float or dict
            Gini index.
        """
        auc_val = self.roc_auc_score(y_true, y_pred, **kwargs)
        return {k: 2 * v - 1.0 for k, v in auc_val.items()} if isinstance(auc_val, dict) else float(2 * auc_val - 1.0)



[docs]
    def brier_score_loss(self, y_true=None, y_pred=None, **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth (correct) target values.
        y_pred : array-like, optional
            Predicted probabilities.

        Returns
        -------
        float
            Brier score loss.
        """
        y_true, y_pred, _, _ = self.get_processed_data2(y_true, y_pred)
        return float(np.mean(np.sum((np.eye(y_pred.shape[1] if y_pred.ndim > 1 else 2)[y_true.astype(int)] - y_pred) ** 2, axis=1)))



[docs]
    def crossentropy_loss(self, y_true=None, y_pred=None, **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth (correct) target values.
        y_pred : array-like, optional
            Predicted probabilities.

        Returns
        -------
        float
            Cross-entropy loss.
        """
        y_true, y_pred, _, _ = self.get_processed_data2(y_true, y_pred)

        # 1. Transmit 1D hard labels [0, 2] or 2D soft labels [[0.9, 0.1]]
        if y_true.ndim == 1 or (y_true.ndim == 2 and y_true.shape[1] == 1):
            n_classes = y_pred.shape[1] if y_pred.ndim > 1 else 2
            y_t = np.eye(n_classes)[y_true.ravel().astype(int)]
        else:
            y_t = y_true.astype(float)
        # 2. ONLY the lower bound clip to avoid the log(0) trap, the upper bound 1.0 is absolutely safe.
        y_p = np.clip(y_pred, self.EPSILON, 1.0)
        return float(-np.mean(np.sum(y_t * np.log(y_p), axis=1)))



[docs]
    def hinge_loss(self, y_true=None, y_pred=None, **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth (correct) target values.
        y_pred : array-like, optional
            Predicted scores.

        Returns
        -------
        float
            Hinge loss.
        """
        y_true, y_pred, _, _ = self.get_processed_data2(y_true, y_pred)
        y_true_oh = np.eye(y_pred.shape[1] if y_pred.ndim > 1 else 2)[y_true.astype(int)]
        return float(np.mean(np.maximum(0.0, np.max((1.0 - y_true_oh) * y_pred, axis=1) - np.sum(y_true_oh * y_pred, axis=1) + 1.0)))



[docs]
    def kullback_leibler_divergence_loss(self, y_true=None, y_pred=None, **kwargs):
        """
        Parameters
        ----------
        y_true : array-like, optional
            Ground truth (correct) target values.
        y_pred : array-like, optional
            Predicted probabilities.

        Returns
        -------
        float
            Kullback-Leibler divergence loss.
        """
        y_true, y_pred, _, _ = self.get_processed_data2(y_true, y_pred)

        # 1. Pass Hard label [0, 2, 1] or pass Soft label [[0.8, 0.2]]
        if y_true.ndim == 1 or (y_true.ndim == 2 and y_true.shape[1] == 1):
            n_classes = y_pred.shape[1] if y_pred.ndim > 1 else 2
            y_t = np.eye(n_classes)[y_true.ravel().astype(int)]
        else:
            y_t = y_true.astype(float)
        # 2. Only clip y_pred to avoid log(0), preserve the purity of y_true.
        y_p = np.clip(y_pred, self.EPSILON, 1.0)
        # 3. Technique to eliminate the "0 * -inf = nan" trap:
        # Where y_t == 0, force the ratio y_t / y_p = 1.0 -> log(1.0) = 0 -> 0 * 0 = 0
        ratio = np.where(y_t > 0, y_t / y_p, 1.0)
        return float(np.mean(np.sum(y_t * np.log(ratio), axis=1)))



    CM = confusion_matrix
    PS = precision_score
    NPV = negative_predictive_value
    RS = recall_score
    AS = accuracy_score
    F1S = f1_score
    F2S = f2_score
    FBS = fbeta_score
    SS = specificity_score
    MCC = matthews_correlation_coefficient
    CKS = cohen_kappa_score
    ROC = AUC = RAS = roc_auc_score
    JSI = jaccard_similarity_coefficient = JSS = jaccard_similarity_score = JSC = jaccard_similarity_index
    GMS = g_mean_score
    GINI = gini_index
    LS = lift_score

    HML = hamming_loss
    HGL = hinge_loss
    KLDL = kullback_leibler_divergence_loss
    BSL = brier_score_loss
    CEL = crossentropy_loss