Merge pull request #142 from s3alfisc/fix-CI

fix CI inversion bug #138, version 0.14.3
s3alfisc · Sep 12, 2023 · 336bb57 · 336bb57
2 parents ecc6d5b + 569ffd1
commit 336bb57
Show file tree

Hide file tree

Showing 4 changed files with 73 additions and 48 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: fwildclusterboot
 Title: Fast Wild Cluster Bootstrap Inference for Linear Models
-Version: 0.14.2
+Version: 0.14.3
 Authors@R: c(
     person("Alexander", "Fischer", , "[email protected]", role = c("aut", "cre")),
     person("David", "Roodman", role = "aut"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# fwildclusterboot 0.14.3
+
+- Fix a bug with CI inversion when `r` was set to be close to the estimated parameter. (CI inversion failed). See [#138](https://github.com/s3alfisc/fwildclusterboot/issues/138). Thanks to Achim Zeileis & team for raising this issue!
+
+
 # fwildclusterboot 0.14.2
 
 Minor fixes for CRAN release.

diff --git a/R/boot_algo_fastnwild.R b/R/boot_algo_fastnwild.R
@@ -13,9 +13,9 @@ boot_algo_fastnwild <-
            small_sample_correction,
            conf_int,
            maxiter,
-           tol, 
+           tol,
            sampling) {
-    
+
     #' Fast wild cluster bootstrap algorithm
     #'
     #' function that implements the fast bootstrap algorithm as described
@@ -63,11 +63,11 @@ boot_algo_fastnwild <-
     #'  finding procedure to find the confidence interval.
     #'        10 by default.
     #' @param sampling 'dqrng' or 'standard'. If 'dqrng', the 'dqrng' package is
-    #' used for random number generation (when available). If 'standard', 
-    #' functions from the 'stats' package are used when available. 
-    #' This argument is mostly a convenience to control random number generation in 
-    #' a wrapper package around `fwildclusterboot`, `wildrwolf`. 
-    #' I recommend to use the fast' option. 
+    #' used for random number generation (when available). If 'standard',
+    #' functions from the 'stats' package are used when available.
+    #' This argument is mostly a convenience to control random number generation in
+    #' a wrapper package around `fwildclusterboot`, `wildrwolf`.
+    #' I recommend to use the fast' option.
     #' @return A list of ...
     #' @importFrom collapse fsum GRP
     #' @importFrom stats as.formula coef model.matrix model.response
@@ -99,13 +99,13 @@ boot_algo_fastnwild <-
     N_G_bootcluster <- length(unique(bootcluster[[1]]))
 
     v <- get_weights(
-      type = type, 
-      full_enumeration = full_enumeration, 
-      N_G_bootcluster = N_G_bootcluster, 
-      boot_iter = boot_iter, 
+      type = type,
+      full_enumeration = full_enumeration,
+      N_G_bootcluster = N_G_bootcluster,
+      boot_iter = boot_iter,
       sampling = sampling
     )
-    
+
     # prepare "key" for use with collapse::fsum()
     g <- collapse::GRP(bootcluster[[1]], call = FALSE)
 
@@ -143,17 +143,17 @@ boot_algo_fastnwild <-
     # dim = N_G x k
     Q1 <-
       collapse::fsum(WX * as.vector(Q), g)
-    
+
     P2_bootcluster <- vec2mat(
       x = as.vector(WXARP),
       group_id = g$group.id
     )
-        
+
     Q2_bootcluster <- vec2mat(
       x = as.vector(WXARQ),
       group_id = g$group.id
     )
-    
+
     # P2_bootcluster <- Matrix::t(Matrix.utils::aggregate.Matrix(
     #   # see notes; formerly diag_XinvXXRuS_a
     #   Matrix::Diagonal(
@@ -305,28 +305,46 @@ boot_algo_fastnwild <-
 
     if (is.null(conf_int) || conf_int == TRUE) {
       # guess for standard errors
-      if (impose_null == TRUE) {
-        # should always be positive, point_estimate and t_stat need to have same
-        # sign, abs for security
-        se_guess <- abs(point_estimate / t_stat)
-      } else if (impose_null == FALSE) {
-        se_guess <- abs((point_estimate - r) / t_stat)
-      }
-
-
-      conf_int <- invert_p_val(
-        ABCD = ABCD,
-        small_sample_correction = small_sample_correction,
-        boot_iter = boot_iter,
-        point_estimate = point_estimate,
-        se_guess = se_guess,
-        clustid = clustid,
-        sign_level = sign_level,
-        vcov_sign = vcov_sign,
-        impose_null = impose_null,
-        p_val_type = p_val_type,
-        maxiter = maxiter,
-        tol = tol
+
+        #browser()
+        conf_int <- tryCatch({
+
+          if (impose_null == TRUE) {
+            # should always be positive, point_estimate and t_stat need to have same
+            # sign, abs for security
+            pval_peak <- abs(point_estimate)
+          } else if (impose_null == FALSE) {
+            pval_peak <- abs((point_estimate - r))
+          }
+
+          invert_p_val(
+            ABCD = ABCD,
+            small_sample_correction = small_sample_correction,
+            boot_iter = boot_iter,
+            point_estimate = point_estimate,
+            pval_peak = pval_peak,
+            clustid = clustid,
+            sign_level = sign_level,
+            vcov_sign = vcov_sign,
+            impose_null = impose_null,
+            p_val_type = p_val_type,
+            maxiter = maxiter,
+            tol = tol
+          )
+
+        },
+        error = function(e){
+          message("Unfortunately, confidence inversion failed due to poorly chosen starting values.",
+                  "Please let the maintainer know about this by opening an issue on GitHub.",
+                  "No confidence interval is computed.")
+          conf_int <- list(
+            conf_int = NA,
+            p_test_vals = NA,
+            test_vals = NA
+          )
+          conf_int
+
+        }
       )
     } else {
       conf_int <- list(
@@ -336,6 +354,7 @@ boot_algo_fastnwild <-
       )
     }
 
+
     res <- list(
       p_val = p_val,
       conf_int = conf_int$conf_int,

diff --git a/R/invert_p_val.R b/R/invert_p_val.R
@@ -7,7 +7,7 @@
 #' @param boot_iter An integer. Number of bootstrap iterations
 #' @param point_estimate A scalar. Point estimate of the coefficient
 #'  of interest from the regression model
-#' @param se_guess A scalar vector of dimension 2. A guess of the standard
+#' @param pval_peak A scalar vector of dimension 2. A guess of the standard
 #'  error that initiates the p-value inversion.
 #' @param clustid A vector with the clusters
 #' @param sign_level A numeric between 0 and 1. Sets to confidence level:
@@ -33,7 +33,7 @@ invert_p_val <- function(ABCD,
                          small_sample_correction,
                          boot_iter,
                          point_estimate,
-                         se_guess,
+                         pval_peak,
                          clustid,
                          sign_level,
                          vcov_sign,
@@ -42,7 +42,7 @@ invert_p_val <- function(ABCD,
                          tol,
                          maxiter) {
   check_arg(point_estimate, "numeric scalar")
-  check_arg(se_guess, "numeric scalar")
+  check_arg(pval_peak, "numeric scalar")
   check_arg(clustid, "data.frame")
   check_arg(sign_level, "numeric scalar")
   # check_arg(vcov_sign)
@@ -107,19 +107,20 @@ invert_p_val <- function(ABCD,
 
   # define functions to find boundaries separately
   get_start_vals <- function(point_estimate,
-                             se_guess,
+                             pval_peak,
                              sign_level,
                              upper) {
     #' @param point_estimate the point estimate of the model
-    #' @param se_guess A guess for the standard deviation
+    #' @param pval_peak A guess for the standard deviation
     #' @param sign_level the significance / 1-coverage level of the confidence
     #' interval
     #' @param upper logical. Should the upper or lower starting value be
     #' searched for?
     #' @noRd
 
+
     check <- FALSE
-    inflate_se <- c(2^(0:100 / 2))
+    inflate_se <- (1:100 / 10) ** 2
     len_inflate <- length(inflate_se)
     j <- 1
 
@@ -135,10 +136,10 @@ invert_p_val <- function(ABCD,
       # start guesses by taking confidence interval guess times inflation factor
       if (upper == TRUE) {
         starting_vals <-
-          as.numeric(point_estimate + inflate_se[j] * se_guess)
+          as.numeric(point_estimate + inflate_se[j] * pval_peak)
       } else if (upper == FALSE) {
         starting_vals <-
-          as.numeric(point_estimate - inflate_se[j] * se_guess)
+          as.numeric(point_estimate - inflate_se[j] * pval_peak)
       }
 
       # find starting value
@@ -166,7 +167,7 @@ invert_p_val <- function(ABCD,
   start_vals <- lapply(c(TRUE, FALSE), function(x) {
     get_start_vals(
       point_estimate = point_estimate,
-      se_guess = se_guess,
+      pval_peak = pval_peak,
       sign_level = sign_level,
       upper = x
     )
@@ -275,4 +276,4 @@ invert_p_val <- function(ABCD,
   )
 
   res_all
-}
+}