データサイエンティスト上がりのDX参謀・起業家

データサイエンティスト上がりのDX参謀・起業家のブログ。データ分析や事業について。自身はアーティスト、経営者、事業家。

メモ:ROC曲線の最適カットオフを計算する関数

epiライブラリのROC関数ではカットオフまで計算してくれなかったので、ROC関数を計算するように書き換え。

library(epi)

ROC1 <- function (test = NULL, stat = NULL, form = NULL, plot = c("sp", 
    "ROC"), PS = is.null(test), PV = TRUE, MX = TRUE, MI = TRUE, 
    AUC = TRUE, grid = seq(0, 100, 10), col.grid = gray(0.9), 
    cuts = NULL, lwd = 2, data = parent.frame(), ...) 
{
    rnam <- if (!missing(test)) 
        deparse(substitute(test))
    else "lr.eta"
    if (is.null(form)) {
        if (is.null(stat) | is.null(test)) 
            stop("Either 'test' AND 'stat' OR 'formula' must be supplied!")
        lr <- glm(stat ~ test, family = binomial)
        resp <- stat
        Model.inf <- paste("Model: ", deparse(substitute(stat)), 
            "~", deparse(substitute(test)))
    }
    else {
        lr <- glm(form, family = binomial, data = data)
        resp <- eval(parse(text = deparse(form2)), envir = lr$model)
        Model.inf <- paste("Model: ", paste(paste(form)[c(2, 
            1, 3)], collapse = " "))
    }
    m <- as.matrix(base::table(switch(PS + 1, test, lr$fit), 
        resp))
    m <- addmargins(rbind(0, m), 2)
    fv <- c(-Inf, sort(unique(switch(PS + 1, test, lr$fit))))
    nr <- nrow(m)
    m <- apply(m, 2, cumsum)
    sns <- (m[nr, 2] - m[, 2])/m[nr, 2]
    spc <- m[, 1]/m[nr, 1]
    pvp <- m[, 2]/m[, 3]
    pvn <- (m[nr, 1] - m[, 1])/(m[nr, 3] - m[, 3])
    res <- data.frame(cbind(sns, spc, pvp, pvn, fv))
    ddaattaa <- data.frame(lr[21], lr$fit)
    names(res) <- c("sens", "spec", "pvp", "pvn", rnam)
    auc <- sum((res[-1, "sens"] + res[-nr, "sens"])/2 * abs(diff(1 - 
        res[, "spec"])))
    
    mx <- max(res[, 1] + res[, 2])
    mhv <- which((res[, 1] + res[, 2]) == mx)
    mxf <- fv[mhv]
    
    cutoff <- ddaattaa[ddaattaa$lr.fit == mxf, ]
    
    invisible(list(res = res, AUC = auc, lr = lr, cutoff = cutoff, optimal = res[mhv, ], 
                   n = nrow(ddaattaa), freq = table(ddaattaa[, 1])))
}

b <- ROC(form = a[, 1] ~ a[, 2], plot="ROC")
c <- ROC1(form = a[, 1] ~ a[, 2], plot="ROC")
c$cutoff
c$AUC
c$optimal
c$n
c$freq