Deterministic Functions
Source:vignettes/articles/a04_deterministic-functions.Rmd
a04_deterministic-functions.Rmd
deterministic.g.function
Time ordering of data is W, C1, L1, A1, Y1, C2, L2, A2, Y2
True value of E[Y_(1,1,1,1)] (expected value of Y setting C1, A1, C2, A2
all to 1) is approximately 0.413.
A1 is known to always be 1 if L1 < -2, and is 1 with probability 0.1
if L1 > 2
A2 is known to always be 1 if A1 is 1
We can incorporate this knowledge using deterministic.g.function.
Generate data:
set.seed(123)
rexpit <- function(x) rbinom(n=length(x), size=1, prob=plogis(x))
n <- 1000
ua <- rep(TRUE, n) #ua = uncensored and alive
L1 <- A1 <- Y1 <- C2.binary <- L2 <- A2 <- Y2 <- rep(NA_real_, n)
W <- rnorm(n)
C1 <- BinaryToCensoring(is.uncensored=rexpit(2 + W))
ua <- ua & C1 == "uncensored"
L1[ua] <- rnorm(n)[ua] + W[ua]
A1[ua] <- rexpit(L1[ua])
A1[ua & L1 < -2] <- 1
A1[ua & L1 > 2] <- rbinom(n, size=1, prob=0.1)[ua & L1 > 2]
Y1[ua] <- rexpit((W + L1 - A1)[ua])
ua <- ua & !Y1
C2.binary[ua] <- rexpit((1 + 0.7 * L1 - A1)[ua])
C2 <- BinaryToCensoring(is.uncensored=C2.binary)
ua <- ua & C2 == "uncensored"
L2[ua] <- (0.5 * L1 - 0.9 * A1 + rnorm(n))[ua]
A2[ua] <- rexpit((0.5 * L1 + 0.8 * L2)[ua]) | A1[ua]
Y2[ua] <- rexpit((0.7 * L1 + L2 - 0.8 * A1 - A2)[ua])
Y2[Y1 == 1] <- 1 # if a patient dies at time 1, record death at time 2 as well
data <- data.frame(W, C1, L1, A1, Y1, C2, L2, A2, Y2)
Without considering deterministic knowledge of A1 and A2:
result <- ltmle(data, Anodes=c("A1","A2"), Cnodes=c("C1", "C2"),
Lnodes=c("L1", "L2"), Ynodes=c("Y1", "Y2"), abar=c(1, 1),
survivalOutcome=TRUE)
#> Qform not specified, using defaults:
#> formula for L1:
#> Q.kplus1 ~ W
#> formula for Y1:
#> Q.kplus1 ~ W + L1 + A1
#> formula for L2:
#> Q.kplus1 ~ W + L1 + A1
#> formula for Y2:
#> Q.kplus1 ~ W + L1 + A1 + L2 + A2
#>
#> gform not specified, using defaults:
#> formula for C1:
#> C1 ~ W
#> formula for A1:
#> A1 ~ W + L1
#> formula for C2:
#> C2 ~ W + L1 + A1
#> formula for A2:
#> A2 ~ W + L1 + A1 + L2
#>
#> Estimate of time to completion: < 1 minute
summary(result)
#> Estimator: tmle
#> Call:
#> ltmle(data = data, Anodes = c("A1", "A2"), Cnodes = c("C1", "C2"),
#> Lnodes = c("L1", "L2"), Ynodes = c("Y1", "Y2"), survivalOutcome = TRUE,
#> abar = c(1, 1))
#>
#> Parameter Estimate: 0.42358
#> Estimated Std Err: 0.022199
#> p-value: <2e-16
#> 95% Conf Interval: (0.38007, 0.46709)
Now we use deterministic.g.function to include our deterministic knowledge of A1 and A2:
deterministic.g.function <- function(data, current.node, nodes) {
if (names(data)[current.node] == "A1") {
det <- (data$L1 < -2 | data$L1 > 2) & !is.na(data$L1)
prob1 <- ((data$L1 < -2) * 1 + (data$L1 > 2) * 0.1)[det]
} else if (names(data)[current.node] == "A2") {
det <- data$A1 == 1 & !is.na(data$A1)
prob1 <- 1
} else if (names(data[current.node]) %in% c("C1", "C2")){
return(NULL) #this returns the default of no deterministic links
#note that it is not necessary to specify that prior censoring indicates future censoring
} else {
stop("unexpected current.node")
}
return(list(is.deterministic=det, prob1=prob1))
}
result <- ltmle(data, Anodes=c("A1","A2"), Cnodes=c("C1", "C2"),
Lnodes=c("L1", "L2"), Ynodes=c("Y1", "Y2"), abar=c(1, 1),
survivalOutcome=TRUE, deterministic.g.function = deterministic.g.function)
#> Qform not specified, using defaults:
#> formula for L1:
#> Q.kplus1 ~ W
#> formula for Y1:
#> Q.kplus1 ~ W + L1 + A1
#> formula for L2:
#> Q.kplus1 ~ W + L1 + A1
#> formula for Y2:
#> Q.kplus1 ~ W + L1 + A1 + L2 + A2
#>
#> gform not specified, using defaults:
#> formula for C1:
#> C1 ~ W
#> formula for A1:
#> A1 ~ W + L1
#> formula for C2:
#> C2 ~ W + L1 + A1
#> formula for A2:
#> A2 ~ W + L1 + A1 + L2
#>
#> Estimate of time to completion: < 1 minute
summary(result)
#> Estimator: tmle
#> Call:
#> ltmle(data = data, Anodes = c("A1", "A2"), Cnodes = c("C1", "C2"),
#> Lnodes = c("L1", "L2"), Ynodes = c("Y1", "Y2"), survivalOutcome = TRUE,
#> abar = c(1, 1), deterministic.g.function = deterministic.g.function)
#>
#> Parameter Estimate: 0.42013
#> Estimated Std Err: 0.026763
#> p-value: <2e-16
#> 95% Conf Interval: (0.36768, 0.47259)
deterministic.Q.function
In this example, when L2 is positive, the patient leaves the study and we consider her final outcome to be 0.
W -> A1 -> Y1 -> L2 -> A2 -> Y2
n <- 1000
L2 <- A2 <- Y2 <- as.numeric(rep(NA, n))
W <- rnorm(n)
A1 <- rexpit(W)
Y1 <- rexpit(W - A1)
alive <- Y1 == 0
L2[alive] <- (0.5 * W - 0.9 * A1 + rnorm(n))[alive]
completed.study <- alive & L2 > 0
- Scenario 1: patients don’t change treatment after leaving study; leave their A2 as NA
A2[alive & !completed.study] <- rexpit((0.5 * W + 0.8 * L2)[alive & !completed.study])
Y2[alive & !completed.study] <- rexpit((L2 - 0.8 * A1 - A2)[alive & !completed.study])
Y2[alive & completed.study] <- 0
Y2[!alive] <- 1 # if a patient dies at time 1, record death at time 2 as well
data <- data.frame(W, A1, Y1, L2, A2, Y2)
Specify that Q is deterministically 0 when L2 is in the history of
the current Q regression and L2 > 0
It is not necessary to specify that Q is deterministically 1 if Y1 is 1;
this is automatic. Also note that det.Q.fun doesn’t condition on
called.from.estimate.g
so g will also be set
deterministically after L2 > 0.
det.Q.fun.1 <- function(data, current.node, nodes, called.from.estimate.g) {
L2.index <- which(names(data) == "L2")
stopifnot(length(L2.index) == 1)
L2.in.history <- L2.index < current.node
if (! L2.in.history) return(NULL)
is.deterministic <- data$L2 > 0 & !is.na(data$L2)
return(list(is.deterministic=is.deterministic, Q.value=0))
}
result.scenario1 <- ltmle(data, Anodes=c("A1","A2"), Lnodes="L2", Ynodes=c("Y1", "Y2"), abar=c(1, 1),
SL.library=NULL, estimate.time=FALSE, deterministic.Q.function=det.Q.fun.1, survivalOutcome=TRUE)
#> Qform not specified, using defaults:
#> formula for Y1:
#> Q.kplus1 ~ W + A1
#> formula for Y2:
#> Q.kplus1 ~ W + A1 + L2 + A2
#>
#> gform not specified, using defaults:
#> formula for A1:
#> A1 ~ W
#> formula for A2:
#> A2 ~ W + A1 + L2
#>
summary(result.scenario1)
#> Estimator: tmle
#> Call:
#> ltmle(data = data, Anodes = c("A1", "A2"), Lnodes = "L2", Ynodes = c("Y1",
#> "Y2"), survivalOutcome = TRUE, abar = c(1, 1), SL.library = NULL,
#> estimate.time = FALSE, deterministic.Q.function = det.Q.fun.1)
#>
#> Parameter Estimate: 0.33534
#> Estimated Std Err: 0.02463
#> p-value: <2e-16
#> 95% Conf Interval: (0.28707, 0.38361)
- Scenario 2: treatment can still change after a patient leaves the study
A2[alive] <- rexpit((0.5 * W + 0.8 * L2)[alive]) #patients can change treatment after leaving study
Y2[alive & !completed.study] <- rexpit((L2 - 0.8 * A1 - A2)[alive & !completed.study])
Y2[alive & completed.study] <- 0
Y2[!alive] <- 1 # if a patient dies at time 1, record death at time 2 as well
data <- data.frame(W, A1, Y1, L2, A2, Y2)
det.Q.fun.2 <- function(data, current.node, nodes, called.from.estimate.g) {
#there is no deterministic information when calculating g - treatment may still change
if (called.from.estimate.g) return(NULL)
L2.index <- which(names(data) == "L2")
stopifnot(length(L2.index) == 1)
L2.in.history <- L2.index < current.node
if (! L2.in.history) return(NULL)
is.deterministic <- data$L2 > 0 & !is.na(data$L2)
return(list(is.deterministic=is.deterministic, Q.value=0))
}
result.scenario2 <- ltmle(data, Anodes=c("A1","A2"), Lnodes="L2", Ynodes=c("Y1", "Y2"), abar=c(1, 1),
SL.library=NULL, estimate.time=FALSE, deterministic.Q.function=det.Q.fun.2, survivalOutcome=TRUE)
#> Qform not specified, using defaults:
#> formula for Y1:
#> Q.kplus1 ~ W + A1
#> formula for Y2:
#> Q.kplus1 ~ W + A1 + L2 + A2
#>
#> gform not specified, using defaults:
#> formula for A1:
#> A1 ~ W
#> formula for A2:
#> A2 ~ W + A1 + L2
#>
#> Warning in CheckForVarianceWarning(inputs, g.ratio): Variance estimate is based
#> on influence curve only, which may be significantly anticonservative because
#> your data appears to contain positivity violations. Robust variance estimate is
#> not currently available with deterministic.Q.function but this will be
#> addressed in a future release.
summary(result.scenario2)
#> Estimator: tmle
#> Call:
#> ltmle(data = data, Anodes = c("A1", "A2"), Lnodes = "L2", Ynodes = c("Y1",
#> "Y2"), survivalOutcome = TRUE, abar = c(1, 1), SL.library = NULL,
#> estimate.time = FALSE, deterministic.Q.function = det.Q.fun.2)
#>
#> Parameter Estimate: 0.3223
#> Estimated Std Err: 0.023814
#> p-value: <2e-16
#> 95% Conf Interval: (0.27562, 0.36897)