Skip to content

Commit

Permalink
Simplify R code and markdown for case study 1
Browse files Browse the repository at this point in the history
  • Loading branch information
NightlordTW committed Oct 2, 2022
1 parent 8142a66 commit 459f560
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 712 deletions.
Binary file added Data/dat_cs1.rda
Binary file not shown.
181 changes: 0 additions & 181 deletions R/functions.r
Original file line number Diff line number Diff line change
Expand Up @@ -34,187 +34,6 @@ tdw <- function(a, x, data, estimand = "ate",
return(TDW)
}

load_imputed_data <- function(fname,
seed = 944,
adam.ades = "ades.sas7bdat", # Source data
adam.adsl = "adsl.sas7bdat" # Source data
) {
dat_cc <- NULL

if (file.exists(fname)) {
message("Loading previously saved object ",fname)
load(fname) # Load dat_cc
return(dat_cc)
}

dsout <- NULL

rawdata <- read.sas7bdat(adam.ades)
patinfo <- read.sas7bdat(adam.adsl)

dsout <- subset(rawdata, STUDYID %in% c("C-1801","105MS301", "109MS301", "109MS302") & AVISITN >= 0)
dsout$TRIAL <- factor(dsout$STUDYID, levels = c("C-1801","105MS301", "109MS301", "109MS302"),
labels = c("AFFIRM", "ADVANCE", "DEFINE", "CONFIRM"))
dsout <- left_join(dsout, patinfo, by = "USUBJID", keep = TRUE)

# Convert variables
dsout <- dsout %>% transform(WhiteRace = ifelse(dsout$RACE == "WHITE", 1, 0),
MaleGender = ifelse(dsout$SEX == "M", 1, 0))

set.seed(seed)

# Start multiple imputation
impvars <- c("STUDYID.x", "USUBJID.x", "SUBJID.x", "TRIAL", "TRTA", "TRTAN",
"VISIT", "AVISIT", "AVISITN", "AVAL",
"AGE", "MaleGender", "WhiteRace", "BASE", "HEIGHTBL", "WEIGHTBL", "ONSYRS",
"DIAGYRS", "PRDMTGR", "PRMSGR", "RLPS1YR", "RLPS3YR", "GDLESBL", "T1LESBL", "T2LESBL",
"NHPTMBL", "PASATABL", "T25FWABL", "EDSSBL", "TRELMOS", "SFPCSBL", "SFMCSBL", "BVZBL", "VFT25BL")
mice.prep <- mice(dsout[,impvars], maxit = 0)

pM <- mice.prep$predictorMatrix
pM[, "SUBJID.x"] <- -2
pM[, c("STUDYID.x", "USUBJID.x", "AVISIT")] <- 0

fit <- mice(dsout[,impvars], method = mice.prep$method, predictorMatrix = pM, m = 1)

dat_cc <- complete(fit,1)
save(dat_cc, file = fname)

return(dat_cc)
}

prepare_nrs <- function(data, # List with data sets
STUDYID_control = c("DEFINE", "CONFIRM"), # Merge DEINFE + CONFIRM
STUDYID_treat = c("AFFIRM"), # AFFIRM
END_VISIT = c("WEEK 36", "VISIT 9 WK 36")
) {

ds <- data %>%
group_by(USUBJID.x, TRIAL) %>%
filter(AVISITN == 0 & TRIAL %in% c(STUDYID_control, STUDYID_treat) & TRTA == "Placebo") %>%
summarize(
AGE = first(AGE),
SEX = first(MaleGender),
RACE = first(WhiteRace),
WEIGHTBL = first(WEIGHTBL),
HEIGHTBL = first(HEIGHTBL),
ONSYRS = first(ONSYRS),
DIAGYRS = first(DIAGYRS),
RLPS1YR = first(RLPS1YR),
RLPS3YR = first(RLPS3YR),
EDSSBL = first(EDSSBL),
GDLESBL = first(GDLESBL),
T1LESBL = first(T1LESBL),
T2LESBL = first(T2LESBL),
PRMSGR = first(PRMSGR),
NHPTMBL = first(NHPTMBL),
PASATABL = first(PASATABL),
T25FWABL = first(T25FWABL),
TRELMOS = first(TRELMOS),
SFMCSBL = first(SFMCSBL),
SFPCSBL = first(SFPCSBL),
BVZBL = first(BVZBL),
VFT25BL = first(VFT25BL)
)

EDSS_END <- subset(data, TRIAL %in% c(STUDYID_control, STUDYID_treat) & TRTA == "Placebo" & VISIT %in% END_VISIT)[,c("USUBJID.x", "AVAL")]

ds_full <- left_join(ds, EDSS_END, "USUBJID.x")
ds_full <- na.omit(ds_full)

ds_full$Trt <- NA
ds_full$Trt[which(ds_full$TRIAL %in% STUDYID_control)] <- 0
ds_full$Trt[which(ds_full$TRIAL %in% STUDYID_treat)] <- 1
ds_full$y <- ds_full$AVAL
ds_full$TrtGroup <- ifelse(ds_full$Trt == 0, "Control", "Active")

ds_full
}

simulate_nrs <- function(data, load = TRUE, seed = 944, dir = "../Data/") {
set.seed(seed)

dat_cc <- NULL
if (load & file.exists(paste0(dir, "dat_cs1.rda"))) {
load(paste0(dir, "dat_cs1.rda"))
return(dat_cc)
}

# Remove grouping variable
data <- ungroup(data)
data <- data %>% dplyr::select(-c(USUBJID.x, TRIAL)) # Remove key columns
data <- data %>% mutate(imputed = 0)

ncontrol <- 500
ntreated <- 2000

# Add empty rows to data
data <- data %>% add_row(
Trt = rep(0, ncontrol),
imputed = 1)
data <- data %>% add_row(
Trt = rep(1, ntreated),
imputed = 1
)

# Impute!
impvars <- c("AGE", "SEX", "RACE", "WEIGHTBL", "ONSYRS",
"DIAGYRS", "PRMSGR", "RLPS1YR", "RLPS3YR", "GDLESBL", "T1LESBL", "T2LESBL",
"NHPTMBL", "PASATABL", "T25FWABL", "EDSSBL", "TRELMOS", "SFPCSBL", "SFMCSBL", "BVZBL", "VFT25BL",
"Trt", "imputed", "y")
mice.prep <- mice(data[,impvars], maxit = 0)

pM <- mice.prep$predictorMatrix
pM[, "imputed"] <- 0
pM["y", "Trt"] <- 0 # Ensure treatment allocation does not affect the outcome

imeth <- mice.prep$method
imeth["AGE"] <- "pmm"
imeth["WEIGHTBL"] <- "rf"
imeth["NHPTMBL"] <- "pmm"
imeth["T25FWABL"] <- "pmm"
imeth["BVZBL"] <- "norm"
imeth["ONSYRS"] <- "pmm"
imeth["DIAGYRS"] <- "pmm"
imeth["RLPS1YR"] <- "pmm"
imeth["RLPS3YR"] <- "pmm"
imeth["TRELMOS"] <- "rf"
imeth["SFMCSBL"] <- "rf"
imeth["SFMCSBL"] <- "rf"

fit <- mice(data[,impvars], method = imeth,
predictorMatrix = pM, m = 1,
maxit = 10, printFlag = FALSE)

# Keep only the artificial patients
dat_cc <- subset(complete(fit, 1), imputed == 1)

# Selectively remove patients from the control group
lp_include <- -log(dat_cc$T25FWABL + 1) -
0.09 * dat_cc$SFPCSBL -
0.05 * dat_cc$SFMCSBL -
0.7 * dat_cc$GDLESBL -
0.5 * log(dat_cc$T1LESBL + 1) -
0.05 * dat_cc$AGE -
0.8 * dat_cc$EDSSBL -
1 * dat_cc$RLPS3YR -
1 * dat_cc$SEX

p_include <- 1/(1 + exp(-(-mean(lp_include) + lp_include)))

dat_cc$include <- rbinom(n = nrow(dat_cc), size = 1, prob = p_include)
dat_cc$include[dat_cc$Trt == 0] <- 1 # Include all control patients

dat_cc <- subset(dat_cc, include == 1)

# Regenerate outcome y
dat_cc <- dat_cc %>% dplyr::select(-imputed)
dat_cc$TrtGroup <- ifelse(dat_cc$Trt == 0, "Control", "Active")

save(dat_cc, file = paste0(dir, "dat_cs1.rda"))

return(dat_cc)
}

plot_density <- function(data, facet = "TRIAL", palette = "Set1") {

Expand Down
97 changes: 7 additions & 90 deletions Report/case_study_1.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -15,107 +15,24 @@ source("../R/functions.r")
library(table1)
library(ggplot2)
################################################################################
# Prepare data
################################################################################
dat <- load_imputed_data("F:/OneDrive Smart Data/OneDrive - Smart Data Analysis and Statistics/Research/Manuscripts/Long - PGS weighting/data/IDB_control_imputed.rda")
dat_baseline <- dat %>%
group_by(USUBJID.x, TRIAL) %>%
filter(AVISITN == 0 & TRTA == "Placebo") %>%
summarize(
AGE = first(AGE),
SEX = first(MaleGender),
RACE = first(WhiteRace),
WEIGHTBL = first(WEIGHTBL),
ONSYRS = first(ONSYRS),
DIAGYRS = first(DIAGYRS),
RLPS1YR = first(RLPS1YR),
RLPS3YR = first(RLPS3YR),
EDSSBL = first(EDSSBL),
GDLESBL = first(GDLESBL),
T1LESBL = first(T1LESBL),
T2LESBL = first(T2LESBL),
PRMSGR = first(PRMSGR),
NHPTMBL = first(NHPTMBL),
PASATABL = first(PASATABL),
T25FWABL = first(T25FWABL),
TRELMOS = first(TRELMOS),
SFMCSBL = first(SFMCSBL),
SFPCSBL = first(SFPCSBL),
BVZBL = first(BVZBL),
VFT25BL = first(VFT25BL)
)
```

# Original data sources
Previously, four RCTs of relapsing-remitting multiple sclerosis (RRMS) were combined in an integrated clinical trial database. This database includes patient-level data from the following trials:

* The ADVANCE trial includes `r length(unique(subset(dat, TRIAL == "ADVANCE" & TRTA == "Placebo")$USUBJID.x))` patients who received placebo every 2 weeks for 48 weeks followed by 125 $\mu$g peginterferon beta-1a subcutaneously every 2 or 4 weeks for 48 weeks (https://www.clinicaltrials.gov/ct2/show/NCT00906399)
* The DEFINE trial includes `r length(unique(subset(dat, TRIAL == "DEFINE" & TRTA == "Placebo")$USUBJID.x))` patients who received two placebo capsules orally three times daily (https://www.clinicaltrials.gov/ct2/show/NCT00420212)
* The CONFIRM trial includes `r length(unique(subset(dat, TRIAL == "CONFIRM" & TRTA == "Placebo")$USUBJID.x))` patients who received two placebo capsules orally three times daily (https://www.clinicaltrials.gov/ct2/show/NCT00451451)
* The AFFIRM trial includes `r length(unique(subset(dat, TRIAL == "AFFIRM" & TRTA == "Placebo")$USUBJID.x))` patients who received placebo, intravenous infusion, every 4 weeks, for up to 116 weeks. (https://www.clinicaltrials.gov/ct2/show/NCT00027300)

We here consider the following baseline covariates (some of which were imputed):

```{r}
dat_baseline$SEX <- factor(dat_baseline$SEX, levels = c(1,0), labels = c("Male", "Female"))
dat_baseline$RACE <- factor(dat_baseline$RACE, levels = c(1,0), labels = c("White", "Non-white"))
dat_baseline$PRMSGR <- factor(dat_baseline$PRMSGR, levels = c(1,0), labels = c("Yes", "No"))
label(dat_baseline$AGE) <- "Age"
label(dat_baseline$SEX) <- "Sex"
label(dat_baseline$RACE) <- "Ethnicity"
label(dat_baseline$WEIGHTBL) <- "Weight"
label(dat_baseline$ONSYRS) <- "Time since first multiple sclerosis symptoms"
label(dat_baseline$DIAGYRS) <- "Time since multiple sclerosis diagnosis"
label(dat_baseline$EDSSBL) <- "Expanded Disability Status Scale"
label(dat_baseline$PRMSGR) <- "Prior MS Treatment Group"
label(dat_baseline$NHPTMBL) <- "9 Hole Peg Test Average Score"
label(dat_baseline$SFMCSBL) <- "SF-36 mental component score"
label(dat_baseline$SFPCSBL) <- "SF-36 physical component score"
label(dat_baseline$PASATABL) <- "PASAT 3"
label(dat_baseline$T25FWABL) <- "Timed 25 Foot Walk"
label(dat_baseline$VFT25BL) <- "VFT 2.5%"
label(dat_baseline$RLPS1YR) <- "No. of Relapses within the previous 12 months"
label(dat_baseline$RLPS3YR) <- "No. of Relapses within the previous 3 years"
label(dat_baseline$TRELMOS) <- "No. of months Since Recent Pre-Study Relapse"
label(dat_baseline$BVZBL) <- "Brain Volume Z-Score"
label(dat_baseline$GDLESBL) <- "Number of Gd+ lesions"
label(dat_baseline$T1LESBL) <- "Number of T1 weighted lesions"
label(dat_baseline$T2LESBL) <- "Number of T2 weighted lesions"
units(dat_baseline$AGE) <- "years"
units(dat_baseline$WEIGHTBL) <- "kg"
units(dat_baseline$ONSYRS) <- "years"
units(dat_baseline$DIAGYRS) <- "years"
table1(~ SEX + AGE + RACE + WEIGHTBL + ONSYRS + DIAGYRS + EDSSBL +
NHPTMBL + SFMCSBL + SFPCSBL + PASATABL + T25FWABL +
VFT25BL + RLPS1YR + RLPS3YR + TRELMOS +
BVZBL + GDLESBL + T1LESBL + T2LESBL| TRIAL,
data = dat_baseline, overall = NULL,
caption = "Baseline characteristics of the Placebo patients in the original trials.")
```


```{r, fig.height = 20, fig.width = 10}
# Prepare NRS dataset
dat_NRS <- prepare_nrs(dat,
STUDYID_control = c("ADVANCE"),
STUDYID_treat = c("AFFIRM", "DEFINE", "CONFIRM"))
```
To mimic an observational study, we consider the ADVANCE trial as 'control' placebo group and combine the remaining three trials (AFFIRM, DEFINE and CONFIRM) into an 'active' placebo group. The resulting dataset has a total of `r nrow(dat_NRS)` patients, with `r nrow(subset(dat_NRS, Trt == 0))` patients receiving control and `r nrow(subset(dat_NRS, Trt == 1))` patients receiving 'active' placebo.
* The ADVANCE trial includes 499 patients who received placebo every 2 weeks for 48 weeks followed by 125 $\mu$g peginterferon beta-1a subcutaneously every 2 or 4 weeks for 48 weeks (https://www.clinicaltrials.gov/ct2/show/NCT00906399)
* The DEFINE trial includes 408 patients who received two placebo capsules orally three times daily (https://www.clinicaltrials.gov/ct2/show/NCT00420212)
* The CONFIRM trial includes 363 patients who received two placebo capsules orally three times daily (https://www.clinicaltrials.gov/ct2/show/NCT00451451)
* The AFFIRM trial includes 312 patients who received placebo, intravenous infusion, every 4 weeks, for up to 116 weeks. (https://www.clinicaltrials.gov/ct2/show/NCT00027300)

To mimic an observational study, we consider the ADVANCE trial as ‘control’ group and combine the data from the remaining three trials (AFFIRM, DEFINE and CONFIRM) into an ‘active’ treatment group. The resulting pooled dataset has a total of 1437 patients, with 465 patients receiving control and 972 patients receiving ‘active’ treatment.

# Simulation of an observational study
```{r}
sim_NRS <- simulate_nrs(dat_NRS) #simulate_nrs(dat_NRS, load = FALSE, dir = "Data/")
load("../Data/dat_cs1.rda")
```

We generated an artificial dataset with a predefined distribution of baseline EDSS for the control and active treatment group. To this purpose, we added 500 patients to the control group and 2000 patients to the active treatment group to the pooled placebo data and imputed their baseline and outcome data using multiple imputation. Subsequently, to mimic the presence of confounding, we omitted patients from the active comparator group according to the following baseline covariates: `T25FWABL` (Timed 25 Foot Walk), `SFPCSBL` (SF-36 physical component score), `SFMCSBL` (SF-36 mental component score), `GDLESBL` (Number of Gd+ lesions), `T1LESBL` (Number of T1 weighted lesions), `AGE` (age), `EDSSBL` (Expanded Disability Status Scale), `RLPS3YR` (No. of Relapses within the previous 3 years) and `SEX`. After imputation, the original trial data were omitted, resulting in a synthetic dataset of `r nrow(sim_NRS)` patients, with `r nrow(subset(sim_NRS, Trt == 0))` patients receiving control and `r nrow(subset(sim_NRS, Trt == 1))` patients receiving 'active' placebo.
We generated an artificial dataset with a predefined distribution of baseline EDSS for the control and active treatment group. To this purpose, we augmented the pooled placebo dataset with 500 control and 2000 treated patients. Subsequently, we imputed their baseline and outcome data using multiple imputation whilst imposing a null treatment effect. After imputation we omitted patients from the active comparator group according to the following baseline covariates: `T25FWABL` (Timed 25 Foot Walk), `SFPCSBL` (SF-36 physical component score), `SFMCSBL` (SF-36 mental component score), `GDLESBL` (Number of Gd+ lesions), `T1LESBL` (Number of T1 weighted lesions), `AGE` (age), `EDSSBL` (Expanded Disability Status Scale), `RLPS3YR` (No. of Relapses within the previous 3 years) and `SEX`. After imputation, the original trial data were omitted, resulting in a synthetic dataset of `r nrow(sim_NRS)` patients, with `r nrow(subset(sim_NRS, Trt == 0))` patients receiving control and `r nrow(subset(sim_NRS, Trt == 1))` patients receiving 'active' placebo.

```{r}
sim_NRS$SEX <- factor(sim_NRS$SEX, levels = c(1,0), labels = c("Male", "Female"))
Expand Down
Loading

0 comments on commit 459f560

Please sign in to comment.