Deterministic Functions • ltmle

library(ltmle)

deterministic.g.function

Time ordering of data is W, C1, L1, A1, Y1, C2, L2, A2, Y2
True value of E[Y_(1,1,1,1)] (expected value of Y setting C1, A1, C2, A2 all to 1) is approximately 0.413.
A1 is known to always be 1 if L1 < -2, and is 1 with probability 0.1 if L1 > 2
A2 is known to always be 1 if A1 is 1
We can incorporate this knowledge using deterministic.g.function.

Generate data:

set.seed(123)
rexpit <- function(x) rbinom(n=length(x), size=1, prob=plogis(x))
n <- 1000
ua <- rep(TRUE, n)   #ua = uncensored and alive
L1 <- A1 <- Y1 <- C2.binary <- L2 <- A2 <- Y2 <- rep(NA_real_, n)
W <- rnorm(n)

C1 <- BinaryToCensoring(is.uncensored=rexpit(2 + W))
ua <- ua & C1 == "uncensored"
L1[ua] <- rnorm(n)[ua] + W[ua]
A1[ua] <- rexpit(L1[ua])
A1[ua & L1 < -2] <- 1
A1[ua & L1 >  2] <- rbinom(n, size=1, prob=0.1)[ua & L1 >  2]
Y1[ua] <- rexpit((W + L1 - A1)[ua])
ua <- ua & !Y1

C2.binary[ua] <- rexpit((1 + 0.7 * L1 - A1)[ua])
C2 <- BinaryToCensoring(is.uncensored=C2.binary)
ua <- ua & C2 == "uncensored"
L2[ua] <- (0.5 * L1 - 0.9 * A1 + rnorm(n))[ua]
A2[ua] <- rexpit((0.5 * L1 + 0.8 * L2)[ua]) | A1[ua]
Y2[ua] <- rexpit((0.7 * L1 + L2 - 0.8 * A1 - A2)[ua])
Y2[Y1 == 1] <- 1  # if a patient dies at time 1, record death at time 2 as well
data <- data.frame(W, C1, L1, A1, Y1, C2, L2, A2, Y2)

Without considering deterministic knowledge of A1 and A2:

result <- ltmle(data, Anodes=c("A1","A2"), Cnodes=c("C1", "C2"), 
                Lnodes=c("L1", "L2"), Ynodes=c("Y1", "Y2"), abar=c(1, 1), 
                survivalOutcome=TRUE)
#> Qform not specified, using defaults:
#> formula for L1:
#> Q.kplus1 ~ W
#> formula for Y1:
#> Q.kplus1 ~ W + L1 + A1
#> formula for L2:
#> Q.kplus1 ~ W + L1 + A1
#> formula for Y2:
#> Q.kplus1 ~ W + L1 + A1 + L2 + A2
#> 
#> gform not specified, using defaults:
#> formula for C1:
#> C1 ~ W
#> formula for A1:
#> A1 ~ W + L1
#> formula for C2:
#> C2 ~ W + L1 + A1
#> formula for A2:
#> A2 ~ W + L1 + A1 + L2
#> 
#> Estimate of time to completion: < 1 minute
summary(result) 
#> Estimator:  tmle 
#> Call:
#> ltmle(data = data, Anodes = c("A1", "A2"), Cnodes = c("C1", "C2"), 
#>     Lnodes = c("L1", "L2"), Ynodes = c("Y1", "Y2"), survivalOutcome = TRUE, 
#>     abar = c(1, 1))
#> 
#>    Parameter Estimate:  0.42358 
#>     Estimated Std Err:  0.022199 
#>               p-value:  <2e-16 
#>     95% Conf Interval: (0.38007, 0.46709)

Now we use deterministic.g.function to include our deterministic knowledge of A1 and A2:

deterministic.g.function <- function(data, current.node, nodes) {
  if (names(data)[current.node] == "A1") {
    det <- (data$L1 < -2 | data$L1 > 2) & !is.na(data$L1)
    prob1 <- ((data$L1 < -2) * 1 + (data$L1 > 2) * 0.1)[det]
  } else if (names(data)[current.node] == "A2") {
    det <- data$A1 == 1 & !is.na(data$A1)
    prob1 <- 1
  } else if (names(data[current.node]) %in% c("C1", "C2")){
    return(NULL)  #this returns the default of no deterministic links 
    #note that it is not necessary to specify that prior censoring indicates future censoring
  } else {
    stop("unexpected current.node")
  }
  return(list(is.deterministic=det, prob1=prob1))  
}
result <- ltmle(data, Anodes=c("A1","A2"), Cnodes=c("C1", "C2"), 
                Lnodes=c("L1", "L2"), Ynodes=c("Y1", "Y2"), abar=c(1, 1), 
                survivalOutcome=TRUE, deterministic.g.function = deterministic.g.function)
#> Qform not specified, using defaults:
#> formula for L1:
#> Q.kplus1 ~ W
#> formula for Y1:
#> Q.kplus1 ~ W + L1 + A1
#> formula for L2:
#> Q.kplus1 ~ W + L1 + A1
#> formula for Y2:
#> Q.kplus1 ~ W + L1 + A1 + L2 + A2
#> 
#> gform not specified, using defaults:
#> formula for C1:
#> C1 ~ W
#> formula for A1:
#> A1 ~ W + L1
#> formula for C2:
#> C2 ~ W + L1 + A1
#> formula for A2:
#> A2 ~ W + L1 + A1 + L2
#> 
#> Estimate of time to completion: < 1 minute
summary(result) 
#> Estimator:  tmle 
#> Call:
#> ltmle(data = data, Anodes = c("A1", "A2"), Cnodes = c("C1", "C2"), 
#>     Lnodes = c("L1", "L2"), Ynodes = c("Y1", "Y2"), survivalOutcome = TRUE, 
#>     abar = c(1, 1), deterministic.g.function = deterministic.g.function)
#> 
#>    Parameter Estimate:  0.42013 
#>     Estimated Std Err:  0.026763 
#>               p-value:  <2e-16 
#>     95% Conf Interval: (0.36768, 0.47259)

deterministic.Q.function

In this example, when L2 is positive, the patient leaves the study and we consider her final outcome to be 0.

W -> A1 -> Y1 -> L2 -> A2 -> Y2

n <- 1000
L2 <- A2 <- Y2 <- as.numeric(rep(NA, n))
W <- rnorm(n)
A1 <- rexpit(W)
Y1 <- rexpit(W - A1)
alive <- Y1 == 0
L2[alive] <- (0.5 * W - 0.9 * A1 + rnorm(n))[alive]
completed.study <- alive & L2 > 0

Scenario 1: patients don’t change treatment after leaving study; leave their A2 as NA

A2[alive & !completed.study] <- rexpit((0.5 * W + 0.8 * L2)[alive & !completed.study])

Y2[alive & !completed.study] <- rexpit((L2 - 0.8 * A1 - A2)[alive & !completed.study])
Y2[alive & completed.study] <- 0
Y2[!alive] <- 1  # if a patient dies at time 1, record death at time 2 as well
data <- data.frame(W, A1, Y1, L2, A2, Y2)

Specify that Q is deterministically 0 when L2 is in the history of the current Q regression and L2 > 0
It is not necessary to specify that Q is deterministically 1 if Y1 is 1; this is automatic. Also note that det.Q.fun doesn’t condition on called.from.estimate.g so g will also be set deterministically after L2 > 0.

det.Q.fun.1 <- function(data, current.node, nodes, called.from.estimate.g) {
  L2.index <- which(names(data) == "L2")
  stopifnot(length(L2.index) == 1)
  L2.in.history <- L2.index < current.node
  if (! L2.in.history) return(NULL)
  
  is.deterministic <- data$L2 > 0 & !is.na(data$L2)
  return(list(is.deterministic=is.deterministic, Q.value=0))
}

result.scenario1 <- ltmle(data, Anodes=c("A1","A2"), Lnodes="L2", Ynodes=c("Y1", "Y2"), abar=c(1, 1), 
  SL.library=NULL, estimate.time=FALSE, deterministic.Q.function=det.Q.fun.1, survivalOutcome=TRUE)
#> Qform not specified, using defaults:
#> formula for Y1:
#> Q.kplus1 ~ W + A1
#> formula for Y2:
#> Q.kplus1 ~ W + A1 + L2 + A2
#> 
#> gform not specified, using defaults:
#> formula for A1:
#> A1 ~ W
#> formula for A2:
#> A2 ~ W + A1 + L2
#> 
summary(result.scenario1)
#> Estimator:  tmle 
#> Call:
#> ltmle(data = data, Anodes = c("A1", "A2"), Lnodes = "L2", Ynodes = c("Y1", 
#>     "Y2"), survivalOutcome = TRUE, abar = c(1, 1), SL.library = NULL, 
#>     estimate.time = FALSE, deterministic.Q.function = det.Q.fun.1)
#> 
#>    Parameter Estimate:  0.33534 
#>     Estimated Std Err:  0.02463 
#>               p-value:  <2e-16 
#>     95% Conf Interval: (0.28707, 0.38361)

Scenario 2: treatment can still change after a patient leaves the study

A2[alive] <- rexpit((0.5 * W + 0.8 * L2)[alive])  #patients can change treatment after leaving study
Y2[alive & !completed.study] <- rexpit((L2 - 0.8 * A1 - A2)[alive & !completed.study])
Y2[alive & completed.study] <- 0
Y2[!alive] <- 1  # if a patient dies at time 1, record death at time 2 as well
data <- data.frame(W, A1, Y1, L2, A2, Y2)

det.Q.fun.2 <- function(data, current.node, nodes, called.from.estimate.g) {
  #there is no deterministic information when calculating g - treatment may still change
  if (called.from.estimate.g) return(NULL)  
  
  L2.index <- which(names(data) == "L2")
  stopifnot(length(L2.index) == 1)
  L2.in.history <- L2.index < current.node
  if (! L2.in.history) return(NULL)
  
  is.deterministic <- data$L2 > 0 & !is.na(data$L2)
  return(list(is.deterministic=is.deterministic, Q.value=0))
}

result.scenario2 <- ltmle(data, Anodes=c("A1","A2"), Lnodes="L2", Ynodes=c("Y1", "Y2"), abar=c(1, 1), 
 SL.library=NULL, estimate.time=FALSE, deterministic.Q.function=det.Q.fun.2, survivalOutcome=TRUE)
#> Qform not specified, using defaults:
#> formula for Y1:
#> Q.kplus1 ~ W + A1
#> formula for Y2:
#> Q.kplus1 ~ W + A1 + L2 + A2
#> 
#> gform not specified, using defaults:
#> formula for A1:
#> A1 ~ W
#> formula for A2:
#> A2 ~ W + A1 + L2
#> 
#> Warning in CheckForVarianceWarning(inputs, g.ratio): Variance estimate is based
#> on influence curve only, which may be significantly anticonservative because
#> your data appears to contain positivity violations. Robust variance estimate is
#> not currently available with deterministic.Q.function but this will be
#> addressed in a future release.
summary(result.scenario2)
#> Estimator:  tmle 
#> Call:
#> ltmle(data = data, Anodes = c("A1", "A2"), Lnodes = "L2", Ynodes = c("Y1", 
#>     "Y2"), survivalOutcome = TRUE, abar = c(1, 1), SL.library = NULL, 
#>     estimate.time = FALSE, deterministic.Q.function = det.Q.fun.2)
#> 
#>    Parameter Estimate:  0.3223 
#>     Estimated Std Err:  0.023814 
#>               p-value:  <2e-16 
#>     95% Conf Interval: (0.27562, 0.36897)