Source code for permetrics.classification

#!/usr/bin/env python
# Created by "Thieu" at 09:29, 23/09/2020 ----------%
#       Email: nguyenthieu2102@gmail.com            %
#       Github: https://github.com/thieu1995        %
# --------------------------------------------------%

from permetrics.evaluator import Evaluator
from permetrics.utils import data_util as du
from permetrics.utils import classifier_util as cu
import numpy as np


[docs] class ClassificationMetric(Evaluator): """ A class for evaluating classification metrics. Parameters ---------- y_true : tuple, list, np.ndarray, optional The ground truth values. Default is None. y_pred : tuple, list, np.ndarray, optional The predicted values. Default is None. labels : tuple, list, np.ndarray, optional List of labels to index the matrix. This may be used to reorder or select a subset of labels. Default is None. pos_label : int or str Positive label for binary classification. average : str or None, optional Determines the type of averaging performed on the data. Options are: - 'binary': Calculate for binary classification problem - 'micro': Calculate metrics globally by considering each element of the label indicator matrix as a label. - 'macro': Calculate metrics for each label and find their unweighted mean. - 'weighted': Calculate metrics for each label and find their average, weighted by support. - None: Scores for each class are returned. Default is "binary". Methods ------- get_support(name=None, verbose=True) Retrieve the support information for a specific metric or all metrics. get_processed_data(y_true=None, y_pred=None) Process and format the input data for evaluation. get_processed_data2(y_true=None, y_pred=None) Process and format the input data for ROC and probability-based metrics. precision_score(...) Calculate the precision score. negative_predictive_value(...) Calculate the negative predictive value. specificity_score(...) Calculate the specificity score. recall_score(...) Calculate the recall score. f1_score(...) Calculate the F1 score. f2_score(...) Calculate the F2 score. fbeta_score(...) Calculate the F-beta score. matthews_correlation_coefficient(...) Calculate the Matthews correlation coefficient. hamming_loss(...) Calculate the hamming loss. lift_score(...) Calculate the lift score. cohen_kappa_score(...) Calculate the Cohen's kappa score. jaccard_similarity_index(...) Calculate the Jaccard similarity index. g_mean_score(...) Calculate the geometric mean score. accuracy_score(...) Calculate the accuracy score. confusion_matrix(...) Generate the confusion matrix. roc_auc_score(...) Calculate the ROC-AUC score. gini_index(...) Calculate the Gini index. brier_score_loss(...) Calculate the Brier score loss. crossentropy_loss(...) Calculate the cross-entropy loss. hinge_loss(...) Calculate the hinge loss. kullback_leibler_divergence_loss(...) Calculate the Kullback-Leibler divergence loss. """ SUPPORT = { "AS": {"type": "max", "range": "[0, 1]", "best": "1"}, "PS": {"type": "max", "range": "[0, 1]", "best": "1"}, "NPV": {"type": "max", "range": "[0, 1]", "best": "1"}, "RS": {"type": "max", "range": "[0, 1]", "best": "1"}, "SS": {"type": "max", "range": "[0, 1]", "best": "1"}, "F1S": {"type": "max", "range": "[0, 1]", "best": "1"}, "F2S": {"type": "max", "range": "[0, 1]", "best": "1"}, "FBS": {"type": "max", "range": "[0, 1]", "best": "1"}, "MCC": {"type": "max", "range": "[-1, +1]", "best": "1"}, "CKS": {"type": "max", "range": "[-1, +1]", "best": "1"}, "JSI": {"type": "max", "range": "[0, 1]", "best": "1"}, "JSS": {"type": "max", "range": "[0, 1]", "best": "1"}, "GMS": {"type": "max", "range": "[0, 1]", "best": "1"}, "ROC-AUC": {"type": "max", "range": "[0, 1]", "best": "1"}, "ROC": {"type": "max", "range": "[0, 1]", "best": "1"}, "AUC": {"type": "max", "range": "[0, 1]", "best": "1"}, "GINI": {"type": "max", "range": "[-1, 1]", "best": "1"}, "LS": {"type": "max", "range": "[0, +inf)", "best": "unknown"}, "CEL": {"type": "min", "range": "[0, +inf)", "best": "0"}, "HML": {"type": "min", "range": "[0, 1]", "best": "0"}, "HGL": {"type": "min", "range": "[0, +inf)", "best": "0"}, "KLDL": {"type": "min", "range": "[0, +inf)", "best": "0"}, "BSL": {"type": "min", "range": "[0, 1]", "best": "0"} } def __init__(self, y_true=None, y_pred=None, **kwargs): super().__init__(y_true, y_pred, **kwargs) if kwargs is None: kwargs = {} self.set_keyword_arguments(kwargs) self.binary = True self.representor = "number" # "number" or "string" self.le = None # LabelEncoder
[docs] @staticmethod def get_support(name=None, verbose=True): """ Retrieve the support information for a specific metric or all metrics. Parameters ---------- name : str, optional Name of the metric to retrieve. Use "all" to retrieve all metrics. verbose : bool, optional Whether to print the metric details. Returns ------- dict Support information for the specified metric(s). """ if name == "all": if verbose: for key, value in ClassificationMetric.SUPPORT.items(): print(f"Metric {key} : {value}") return ClassificationMetric.SUPPORT if name not in list(ClassificationMetric.SUPPORT.keys()): raise ValueError(f"ClassificationMetric doesn't support metric named: {name}") if verbose: print(f"Metric {name}: {ClassificationMetric.SUPPORT[name]}") return ClassificationMetric.SUPPORT[name]
[docs] def get_processed_data(self, y_true=None, y_pred=None): """ Process and format the input data for evaluation. Returns: y_true_final: y_true used in evaluation process. y_pred_final: y_pred used in evaluation process unique_classes: All unique classes from y_true and y_pred representor: the label is number or string """ if (y_true is not None) and (y_pred is not None): return du.format_classification_data(y_true, y_pred) if (self.y_true is not None) and (self.y_pred is not None): return du.format_classification_data(self.y_true, self.y_pred) raise ValueError("y_true or y_pred is None. You need to pass y_true and y_pred to object creation or function called.")
[docs] def get_processed_data2(self, y_true=None, y_pred=None): """ Returns: y_true_final: y_true used in evaluation process. y_pred_final: y_pred used in evaluation process binary: is problem binary or multi-class classification representor: the label is number or string """ if (y_true is not None) and (y_pred is not None): return du.format_y_score(y_true, y_pred) if (self.y_true is not None) and (self.y_pred is not None): return du.format_y_score(self.y_true, self.y_pred) raise ValueError("y_true or y_pred is None. You need to pass y_true and y_pred to object creation or function called.")
def _get_micro_stats(self, matrix): """Helper calculates accurate global components for multi-class classification""" N = matrix.sum() K = matrix.shape[0] tp = np.trace(matrix) fp = fn = N - tp tn = N * K - (tp + fp + fn) return tp, fp, fn, tn def _aggregate(self, metric_key, y_true, y_pred, labels, pos_label, average, beta=1.0): """ Aggregate metrics based on the specified averaging method. Parameters ---------- metric_key : str Metric key to calculate. y_true : array-like Ground truth values. y_pred : array-like Predicted values. labels : list, optional List of labels to consider. pos_label : int or str Positive label for binary classification. average : str or None Averaging method ('binary', 'micro', 'macro', 'weighted', or None). beta : float, optional Weight of recall in the F-beta score. Returns ------- float or dict Aggregated metric value(s). """ y_true, y_pred, unique_classes, _ = self.get_processed_data(y_true, y_pred) # 1. Check binary classification problem if average == "binary": if len(unique_classes) > 2: raise ValueError(f"Target is multiclass ({len(unique_classes)} classes) but average='binary'. " "Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].") if len(unique_classes) > 1 and pos_label not in unique_classes: raise ValueError(f"pos_label={pos_label} is not a valid label. Unique labels are {unique_classes}") # 2. Calculate the original Confusion Matrix matrix, imap, imap_count = cu.calculate_confusion_matrix(y_true, y_pred, labels=None, normalize=None) # 3. Micro: Combining multi-class problems into original composite 2x2 matrices if average == "micro": tp, fp, fn, tn = self._get_micro_stats(matrix) m_micro = np.array([[tp, fn], [fp, tn]], dtype=float) res_micro = cu.calculate_single_label_metric(m_micro, {"_m": 0}, {"_m": tp + fn}, beta=beta) return float(res_micro["_m"][metric_key]) # 4. Calculate all classes all_metrics = cu.calculate_single_label_metric(matrix, imap, imap_count, beta=beta) # 5. Binary (Returns the correct float of pos_label) if average == "binary": return float(all_metrics[pos_label][metric_key]) if pos_label in all_metrics else 0.0 target_labels = list(labels) if labels is not None else list(imap.keys()) if not np.all(np.isin(target_labels, unique_classes)): raise ValueError("Specified labels do not exist in data.") # 6. None (Returns a dict based on the label) if average is None: return {lbl: all_metrics[lbl][metric_key] for lbl in target_labels if lbl in all_metrics} vals = np.array([all_metrics[lbl][metric_key] for lbl in target_labels if lbl in all_metrics], dtype=float) supps = np.array([all_metrics[lbl]["n_true"] for lbl in target_labels if lbl in all_metrics], dtype=float) if average == "macro": return float(np.mean(vals)) if len(vals) > 0 else 0.0 if average == "weighted": total_s = np.sum(supps) return float(np.dot(vals, supps) / total_s) if total_s > 0 else 0.0 raise ValueError(f"Unsupported average setting: {average}")
[docs] def precision_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict Precision score. """ return self._aggregate("precision", y_true, y_pred, labels, pos_label, average)
[docs] def negative_predictive_value(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Calculate the negative predictive value. Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict Negative predictive value. """ return self._aggregate("negative_predictive_value", y_true, y_pred, labels, pos_label, average)
[docs] def specificity_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Calculate the specificity score. Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict Specificity score. """ return self._aggregate("specificity", y_true, y_pred, labels, pos_label, average)
[docs] def recall_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict Recall score. """ return self._aggregate("recall", y_true, y_pred, labels, pos_label, average)
[docs] def f1_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict F1 score. """ return self._aggregate("f1", y_true, y_pred, labels, pos_label, average)
[docs] def f2_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict F2 score. """ return self._aggregate("f2", y_true, y_pred, labels, pos_label, average)
[docs] def fbeta_score(self, y_true=None, y_pred=None, beta=1.0, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. beta : float, optional Weight of recall in the F-beta score. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict F-beta score. """ return self._aggregate("fbeta", y_true, y_pred, labels, pos_label, average, beta=beta)
[docs] def matthews_correlation_coefficient(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict Matthews correlation coefficient. """ return self._aggregate("mcc", y_true, y_pred, labels, pos_label, average)
[docs] def hamming_loss(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict Hamming loss. """ return self._aggregate("hamming_loss", y_true, y_pred, labels, pos_label, average)
[docs] def lift_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict Lift score. """ return self._aggregate("lift_score", y_true, y_pred, labels, pos_label, average)
[docs] def cohen_kappa_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict Cohen's kappa score. """ return self._aggregate("kappa_score", y_true, y_pred, labels, pos_label, average)
[docs] def jaccard_similarity_index(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict Jaccard similarity index. """ return self._aggregate("jaccard_score", y_true, y_pred, labels, pos_label, average)
[docs] def g_mean_score(self, y_true=None, y_pred=None, labels=None, pos_label=1, average="binary", **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth values. y_pred : array-like, optional Predicted values. labels : list, optional List of labels to include in the calculation. pos_label : int or str, optional The positive class label for binary classification. average : str, optional Averaging method ('binary', 'micro', 'macro', 'weighted', or None). Returns ------- float or dict Geometric mean (G-mean) score. """ return self._aggregate("g_mean", y_true, y_pred, labels, pos_label, average)
[docs] def accuracy_score(self, y_true=None, y_pred=None, normalize=True, sample_weight=None, **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth (correct) target values. y_pred : array-like, optional Estimated target values. normalize : bool, optional If True, return the fraction of correctly classified samples (float). If False, return the number of correctly classified samples (int). sample_weight : array-like, optional Sample weights. Returns ------- float or int Accuracy score. """ y_true, y_pred, _, _ = self.get_processed_data(y_true, y_pred) return cu.calculate_accuracy_score(y_true, y_pred, normalize=normalize, sample_weight=sample_weight)
[docs] def confusion_matrix(self, y_true=None, y_pred=None, labels=None, normalize=None, **kwargs): """ Compute the confusion matrix for classification tasks. Args: y_true (array-like): Ground truth (correct) labels. y_pred (array-like): Predicted labels. labels (list, optional): Subset of labels to include in the matrix. Default is None. normalize (str, optional): Normalization method. One of {"true", "pred", "all"}. - "true": Normalize rows (true labels). - "pred": Normalize columns (predicted labels). - "all": Normalize the entire matrix. Default is None (no normalization). Returns: tuple: - matrix (ndarray): Confusion matrix (normalized if specified). - imap (dict): Mapping of labels to matrix indices. - imap_count (dict): Count of true labels for each class. Raises: ValueError: If specified labels do not exist in `y_true` or `y_pred`. """ y_true, y_pred, _, _ = self.get_processed_data(y_true, y_pred) return cu.calculate_confusion_matrix(y_true, y_pred, labels, normalize)
[docs] def roc_auc_score(self, y_true=None, y_pred=None, average="macro", **kwargs): """ Compute the Area Under the Receiver Operating Characteristic Curve (ROC AUC). Parameters ---------- y_true : array-like, optional Ground truth (correct) target values. y_pred : array-like, optional Estimated probabilities or decision function. average : str, optional Averaging method ('macro', 'weighted', or None). Returns ------- float or dict ROC AUC score. """ y_true, y_score, binary, _ = self.get_processed_data2(y_true, y_pred) # 1. Only 1 class in y_true if len(np.unique(y_true)) == 1: raise ValueError("Only one class present in y_true. ROC AUC score is not defined in that case.") trapz = getattr(np, "trapezoid", getattr(np, "trapz", None)) # 2. Binary cases if binary or len(np.unique(y_true)) == 2: if y_score.ndim == 2: if y_score.shape[1] == 2: y_score = y_score[:, 1] # probability of class Positive elif y_score.shape[1] == 1: y_score = y_score.ravel() else: raise ValueError(f"Target is binary but y_score has {y_score.shape[1]} columns.") tpr, fpr, _ = cu.calculate_roc_curve(y_true, y_score) return float(trapz(tpr, fpr)) # 3. Multiclass (One-vs-Rest) classes = np.unique(y_true).tolist() auc_list = [float(trapz(*cu.calculate_roc_curve(np.where(y_true == cls, 1, 0), y_score[:, i])[:2])) for i, cls in enumerate(classes)] if average == "macro": return float(np.mean(auc_list)) if average == "weighted": weights = cu.calculate_class_support(y_true) return float(np.dot(weights, auc_list) / np.sum(weights)) return dict(zip(classes, auc_list))
[docs] def gini_index(self, y_true=None, y_pred=None, **kwargs): """ Compute the Gini index based on the ROC AUC score. Parameters ---------- y_true : array-like, optional Ground truth (correct) target values. y_pred : array-like, optional Estimated probabilities or decision function. Returns ------- float or dict Gini index. """ auc_val = self.roc_auc_score(y_true, y_pred, **kwargs) return {k: 2 * v - 1.0 for k, v in auc_val.items()} if isinstance(auc_val, dict) else float(2 * auc_val - 1.0)
[docs] def brier_score_loss(self, y_true=None, y_pred=None, **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth (correct) target values. y_pred : array-like, optional Predicted probabilities. Returns ------- float Brier score loss. """ y_true, y_pred, _, _ = self.get_processed_data2(y_true, y_pred) return float(np.mean(np.sum((np.eye(y_pred.shape[1] if y_pred.ndim > 1 else 2)[y_true.astype(int)] - y_pred) ** 2, axis=1)))
[docs] def crossentropy_loss(self, y_true=None, y_pred=None, **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth (correct) target values. y_pred : array-like, optional Predicted probabilities. Returns ------- float Cross-entropy loss. """ y_true, y_pred, _, _ = self.get_processed_data2(y_true, y_pred) # 1. Transmit 1D hard labels [0, 2] or 2D soft labels [[0.9, 0.1]] if y_true.ndim == 1 or (y_true.ndim == 2 and y_true.shape[1] == 1): n_classes = y_pred.shape[1] if y_pred.ndim > 1 else 2 y_t = np.eye(n_classes)[y_true.ravel().astype(int)] else: y_t = y_true.astype(float) # 2. ONLY the lower bound clip to avoid the log(0) trap, the upper bound 1.0 is absolutely safe. y_p = np.clip(y_pred, self.EPSILON, 1.0) return float(-np.mean(np.sum(y_t * np.log(y_p), axis=1)))
[docs] def hinge_loss(self, y_true=None, y_pred=None, **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth (correct) target values. y_pred : array-like, optional Predicted scores. Returns ------- float Hinge loss. """ y_true, y_pred, _, _ = self.get_processed_data2(y_true, y_pred) y_true_oh = np.eye(y_pred.shape[1] if y_pred.ndim > 1 else 2)[y_true.astype(int)] return float(np.mean(np.maximum(0.0, np.max((1.0 - y_true_oh) * y_pred, axis=1) - np.sum(y_true_oh * y_pred, axis=1) + 1.0)))
[docs] def kullback_leibler_divergence_loss(self, y_true=None, y_pred=None, **kwargs): """ Parameters ---------- y_true : array-like, optional Ground truth (correct) target values. y_pred : array-like, optional Predicted probabilities. Returns ------- float Kullback-Leibler divergence loss. """ y_true, y_pred, _, _ = self.get_processed_data2(y_true, y_pred) # 1. Pass Hard label [0, 2, 1] or pass Soft label [[0.8, 0.2]] if y_true.ndim == 1 or (y_true.ndim == 2 and y_true.shape[1] == 1): n_classes = y_pred.shape[1] if y_pred.ndim > 1 else 2 y_t = np.eye(n_classes)[y_true.ravel().astype(int)] else: y_t = y_true.astype(float) # 2. Only clip y_pred to avoid log(0), preserve the purity of y_true. y_p = np.clip(y_pred, self.EPSILON, 1.0) # 3. Technique to eliminate the "0 * -inf = nan" trap: # Where y_t == 0, force the ratio y_t / y_p = 1.0 -> log(1.0) = 0 -> 0 * 0 = 0 ratio = np.where(y_t > 0, y_t / y_p, 1.0) return float(np.mean(np.sum(y_t * np.log(ratio), axis=1)))
CM = confusion_matrix PS = precision_score NPV = negative_predictive_value RS = recall_score AS = accuracy_score F1S = f1_score F2S = f2_score FBS = fbeta_score SS = specificity_score MCC = matthews_correlation_coefficient CKS = cohen_kappa_score ROC = AUC = RAS = roc_auc_score JSI = jaccard_similarity_coefficient = JSS = jaccard_similarity_score = JSC = jaccard_similarity_index GMS = g_mean_score GINI = gini_index LS = lift_score HML = hamming_loss HGL = hinge_loss KLDL = kullback_leibler_divergence_loss BSL = brier_score_loss CEL = crossentropy_loss