merge

yjunechoe · Jan 23, 2024 · a1002aa · a1002aa
2 parents 2d0ade9 + fb73f8d
commit a1002aa
Showing 1 changed file with 70 additions and 66 deletions.
diff --git a/vignettes/articles/eyetrackingR-comparison.Rmd b/vignettes/articles/eyetrackingR-comparison.Rmd
@@ -31,38 +31,43 @@ library("ggplot2")
 library("eyetrackingR")
 
 data("word_recognition")
-data <- make_eyetrackingr_data(word_recognition, 
-                       participant_column = "ParticipantName",
-                       trial_column = "Trial",
-                       time_column = "TimeFromTrialOnset",
-                       trackloss_column = "TrackLoss",
-                       aoi_columns = c('Animate','Inanimate'),
-                       treat_non_aoi_looks_as_missing = TRUE
-                )
+data <- make_eyetrackingr_data(word_recognition,
+  participant_column = "ParticipantName",
+  trial_column = "Trial",
+  time_column = "TimeFromTrialOnset",
+  trackloss_column = "TrackLoss",
+  aoi_columns = c("Animate", "Inanimate"),
+  treat_non_aoi_looks_as_missing = TRUE
+)
 
 # subset to response window post word-onset
-response_window <- subset_by_window(data, 
-                                    window_start_time = 15500, 
-                                    window_end_time = 21000, 
-                                    rezero = FALSE)
+response_window <- subset_by_window(data,
+  window_start_time = 15500,
+  window_end_time = 21000,
+  rezero = FALSE
+)
 
 # analyze amount of trackloss by subjects and trials
 (trackloss <- trackloss_analysis(data = response_window))
 
 # remove trials with > 25% of trackloss
-response_window_clean <- clean_by_trackloss(data = response_window,
-                                            trial_prop_thresh = .25)
+response_window_clean <- clean_by_trackloss(
+  data = response_window,
+  trial_prop_thresh = .25
+)
 
 # create Target condition column
-response_window_clean$Target <- as.factor( ifelse(test = grepl('(Spoon|Bottle)', response_window_clean$Trial), 
-                                       yes = 'Inanimate', 
-                                       no  = 'Animate') )
+response_window_clean$Target <- as.factor(ifelse(test = grepl("(Spoon|Bottle)", response_window_clean$Trial),
+  yes = "Inanimate",
+  no  = "Animate"
+))
 
 response_time <- make_time_sequence_data(response_window_clean,
-                                  time_bin_size = 100, 
-                                  predictor_columns = c("Target"),
-                                  aois = "Animate",
-                                  summarize_by = "ParticipantName" )
+  time_bin_size = 100,
+  predictor_columns = c("Target"),
+  aois = "Animate",
+  summarize_by = "ParticipantName"
+)
 ```
 
 </details>
@@ -85,19 +90,21 @@ Additionally, we keep the following columns as they are necessary metadata for t
 For simplicity, we subset `response_time` to include only the columns of interest for the cluster-based permutation analysis.
 
 ```{r}
-response_time <- response_time %>% 
-  select(Prop, Target, TimeBin, ParticipantName,
-         Time, AOI)
+response_time <- response_time %>%
+  select(
+    Prop, Target, TimeBin, ParticipantName,
+    Time, AOI
+  )
 dplyr::glimpse(response_time)
 ```
 
 Lastly, we replicate the plot from the original vignette:
 
 ```{r}
 # visualize timecourse
-plot(response_time, predictor_column = "Target") + 
+plot(response_time, predictor_column = "Target") +
   theme_light() +
-  coord_cartesian(ylim = c(0,1))
+  coord_cartesian(ylim = c(0, 1))
 ```
 
 ## CPA in `{eyetrackingR}`
@@ -110,8 +117,8 @@ Continuing with the `response_time` dataframe, we jump to the ["Bootstrapped clu
 The original vignette uses the following heuristic of choosing a threshold:
 
 ```{r}
-num_sub = length(unique((response_window_clean$ParticipantName)))
-threshold_t = qt(p = 1 - .05/2, df = num_sub-1)
+num_sub <- length(unique((response_window_clean$ParticipantName)))
+threshold_t <- qt(p = 1 - .05 / 2, df = num_sub - 1)
 threshold_t
 ```
 
@@ -124,48 +131,47 @@ In `{eyetrackingR}`, CPA is conducted in two steps:
 1) Prepare data for CPA with `make_time_cluster_data()`:
 
     ```{r}
-    df_timeclust <- make_time_cluster_data(response_time, 
-                                          test= "t.test", paired=TRUE,
-                                          predictor_column = "Target", 
-                                          threshold = threshold_t)
+df_timeclust <- make_time_cluster_data(response_time,
+  test = "t.test", paired = TRUE,
+  predictor_column = "Target",
+  threshold = threshold_t
+)
     ```
 
     This step computes the timewise statistics from the data and identifies the empirical clusters, which can be inspected with a `plot()` and `summary()` method:
 
     ```{r}
-    plot(df_timeclust)
+plot(df_timeclust)
     ```
 
     ```{r}
-    summary(df_timeclust)
+summary(df_timeclust)
     ```
 
 2) Run the permutation test on the cluster data with `analyze_time_clusters()`:
 
     ```{r}
-    system.time({
-      
-      clust_analysis <- analyze_time_clusters(
-        df_timeclust,
-        within_subj = TRUE,
-        paired = TRUE,
-        samples = 150,
-        quiet = TRUE
-      )
-      
-    })
+system.time({
+  clust_analysis <- analyze_time_clusters(
+    df_timeclust,
+    within_subj = TRUE,
+    paired = TRUE,
+    samples = 150,
+    quiet = TRUE
+  )
+})
     ```
 
     This simulates a null distribution of cluster-mass statistics. The output, when printed, is essentially the output of `summary(df_timeclust)` with p-values (the `Probability` column)
 
     ```{r}
-    clust_analysis
+clust_analysis
     ```
 
     The null distribution (and the extremety of the empirical clusters in that context) can be visualized with a `plot()` method:
 
     ```{r}
-    plot(clust_analysis)
+plot(clust_analysis)
     ```
 
 ## CPA in `{jlmerclusterperm}`
@@ -176,10 +182,8 @@ First, we set up jlmerclusterperm. For comparability, we use a single thread:
 library(jlmerclusterperm)
 options("jlmerclusterperm.nthreads" = 1)
 system.time({
-  
   # Note the overhead for initializing the Julia session
   jlmerclusterperm_setup()
-  
 })
 ```
 
@@ -190,52 +194,52 @@ In `{jlmerclusterpm}`, CPA can also be conducted in two steps:
     We first specify the formula, data, and grouping columns of the data. Instead of a paired t-test, we specify a linear model with the formula `Prop ~ Target`.
 
     ```{r}
-    spec <- make_jlmer_spec(
-      formula = Prop ~ Target,
-      data = response_time,
-      subject = "ParticipantName",
-      trial = "Target",
-      time = "TimeBin"
-    )
+spec <- make_jlmer_spec(
+  formula = Prop ~ Target,
+  data = response_time,
+  subject = "ParticipantName",
+  trial = "Target",
+  time = "TimeBin"
+)
     ```
 
     The output prepares the data for CPA by subsetting it and applying the contrast coding schemes, among other things:
 
     ```{r}
-    spec
+spec
     ```
 
 2) Run the permutation test with the specification using `clusterpermute()`
 
     ```{r, message = FALSE, warning = FALSE}
-    system.time({
-      cpa <- clusterpermute(spec, threshold = threshold_t, nsim = 150)
-    })
+system.time({
+  cpa <- clusterpermute(spec, threshold = threshold_t, nsim = 150)
+})
     ```
 
     The same kinds of information are returned:
 
     ```{r}
-    cpa
+cpa
     ```
 
     The different pieces of information are available for further inspection using `tidy()`, which returns the dataframe underlying the summary:
 
     ```{r}
-    null_distribution <- tidy(cpa$null_cluster_dists)
-    null_distribution
+null_distribution <- tidy(cpa$null_cluster_dists)
+null_distribution
     ```
 
     ```{r}
-    empirical_clusters <- tidy(cpa$empirical_clusters)
-    empirical_clusters
+empirical_clusters <- tidy(cpa$empirical_clusters)
+empirical_clusters
     ```
 
 In contrast to `{eyetrackingR}`, `{jlmerclusterperm}` does not provide a custom `plot()` method. However, the same kinds of plots can be replicated with a few lines of ggplot code:
 
 ```{r}
 # We flip the sign of the statistic here for visual equivalence
-cpa_plot <- null_distribution %>% 
+cpa_plot <- null_distribution %>%
   ggplot(aes(-sum_statistic)) +
   geom_histogram(
     aes(y = after_stat(density)),