Source code for modelsight.curves._delong

"""
This file deals with the implementation of the DeLong test for the comparison of
pairs of correlated areas under the receiver-operating characteristics curves.
"""

import pandas as pd
import numpy as np
import scipy.stats
from typing import Tuple

# AUC comparison adapted from
# https://github.com/Netflix/vmaf/
[docs]def compute_midrank(x: np.ndarray) -> np.ndarray: """ Computes midranks. Parameters ---------- x : np.ndarray a 1-d array of predicted probabilities. Returns ------- T2 : np.ndarray array of midranks """ J = np.argsort(x) Z = x[J] N = len(x) T = np.zeros(N, dtype=np.float64) i = 0 while i < N: j = i while j < N and Z[j] == Z[i]: j += 1 T[i:j] = 0.5*(i + j - 1) i = j T2 = np.empty(N, dtype=np.float64) # Note(kazeevn) +1 is due to Python using 0-based indexing # instead of 1-based in the AUC formula in the paper T2[J] = T + 1 return T2
[docs]def fastDeLong(predictions_sorted_transposed: np.ndarray, label_1_count: int) -> Tuple[np.ndarray, np.ndarray]: """ The fast version of DeLong's method for computing the covariance of unadjusted AUC. Parameters ---------- predictions_sorted_transposed : a (n_classifiers, n_obs) numpy array containing the predicted probabilities by the two classifiers in the comparison. These probabilities are sorted such that the examples with label "1" come first. Returns ------- aucs, delongcov : Tuple[np.ndarray, np.ndarray] aucs: array of AUC values delongcov: array of DeLong covariance Reference --------- @article{sun2014fast, title={Fast Implementation of DeLong's Algorithm for Comparing the Areas Under Correlated Receiver Operating Characteristic Curves}, author={Xu Sun and Weichao Xu}, journal={IEEE Signal Processing Letters}, volume={21}, number={11}, pages={1389--1393}, year={2014}, publisher={IEEE} } """ # Short variables are named as they are in the paper m = label_1_count n = predictions_sorted_transposed.shape[1] - m positive_examples = predictions_sorted_transposed[:, :m] negative_examples = predictions_sorted_transposed[:, m:] k = predictions_sorted_transposed.shape[0] tx = np.empty([k, m], dtype=np.float64) ty = np.empty([k, n], dtype=np.float64) tz = np.empty([k, m + n], dtype=np.float64) for r in range(k): tx[r, :] = compute_midrank(positive_examples[r, :]) ty[r, :] = compute_midrank(negative_examples[r, :]) tz[r, :] = compute_midrank(predictions_sorted_transposed[r, :]) aucs = tz[:, :m].sum(axis=1) / m / n - float(m + 1.0) / 2.0 / n v01 = (tz[:, :m] - tx[:, :]) / n v10 = 1.0 - (tz[:, m:] - ty[:, :]) / m sx = np.cov(v01) sy = np.cov(v10) delongcov = sx / m + sy / n return aucs, delongcov
[docs]def calc_pvalue(aucs: np.ndarray, sigma: np.ndarray) -> float: """ Computes log(10) of p-values. Parameters ---------- aucs : np.array a 1-d array of AUCs sigma : np.array an array AUC DeLong covariances Returns ------- p : float log10(pvalue) """ l = np.array([[1, -1]]) z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T)) p = np.log10(2) + scipy.stats.norm.logsf(z, loc=0, scale=1) / np.log(10) return p
[docs]def compute_ground_truth_statistics(ground_truth: np.ndarray) -> Tuple[np.ndarray, int]: """ Compute statistics of ground-truth array. Parameters ---------- ground_truth : np.ndarray a (n_obs,) array of 0 and 1 values representing the ground-truth. Returns ------- order, label_1_count : Tuple[np.ndarray, int] order is a numpy array of sorted indexes label_1_count is the count of data points of the positive class. """ assert np.array_equal(np.unique(ground_truth), [0, 1]) order = (-ground_truth).argsort() label_1_count = int(ground_truth.sum()) return order, label_1_count
[docs]def delong_roc_test(ground_truth: np.ndarray, predictions_one: np.ndarray, predictions_two: np.ndarray) -> float: """ Compare areas-under-curve of two estimators using the DeLong test. Concretely, it computes the pvalue for hypothesis that two ROC AUCs are different. Parameters ---------- ground_truth : np.ndarray a (n_obs,) array of 0 and 1 representing ground-truths. predictions_one : np.ndarray a (n_obs,) array of probabilities of class 1 predicted by the first model. predictions_two : np.ndarray a (n_obs,) array of probabilities of class 1 predicted by the second model. Returns ------- p : float the p-value for hypothesis that two ROC AUCs are different. """ order, label_1_count = compute_ground_truth_statistics(ground_truth) predictions_sorted_transposed = np.vstack((predictions_one, predictions_two))[:, order] aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count) p = 10**calc_pvalue(aucs, delongcov).item() return p