This RMarkdown file generates tables and figures for the paper “Think-aloud interviews: A tool for exploring student statistical reasoning”.

responses = read.csv("responses.csv",
                     stringsAsFactors=FALSE) %>% 
  filter(semester == "2019-spring" | semester == "2019-summer" | semester == "2019-fall",
         section == "cmu-200" | section == "colby-212" | section == "cmu-202")

Summary assessment results

How many students participated in total?

length(unique(responses$name))
## [1] 638

How many completed assessments? We group students by whether their response was pre or post, so that the pre-test counts separately from the post-test.

responses %>%
  group_by(name, post) %>%
  summarize(num_responses = n()) %>%
  nrow()
## [1] 892

A table of assessment results from spring 2019, including pre- and post-test information:

pretest_stats = responses %>%
  filter(post == FALSE) %>%
  group_by(section) %>%
  summarize(correctness=mean(correct),
            num_students = length(unique(name)),
            avg_responses=n() / length(unique(name)))

posttest_stats = responses %>%
  filter(post == TRUE) %>%
  group_by(section) %>%
  summarize(correctness=mean(correct),
            num_students = length(unique(name)),
            avg_responses=n() / length(unique(name)))

test_stats = pretest_stats %>% full_join(posttest_stats, by="section", suffix=c(".pre", ".post"))

gt(test_stats) %>%
  cols_label(
    section = "Section",
    correctness.pre = "Mean score",
    correctness.post = "Mean score",
    num_students.pre = "N",
    num_students.post = "N",
    avg_responses.pre = "Qs/student",
    avg_responses.post = "Qs/student"
  ) %>%
  fmt_percent(columns=vars(correctness.pre, correctness.post),
              decimals=1) %>%
  fmt_number(columns=vars(avg_responses.pre, avg_responses.post),
             decimals=1) %>%
  tab_spanner(label="Pre-test",
              columns=vars(correctness.pre, num_students.pre, avg_responses.pre)) %>%
  tab_spanner(label="Post-test",
              columns=vars(correctness.post, num_students.post, avg_responses.post)) %>%
  as_latex() %>%
  as.character() %>%
  cat()
## \captionsetup[table]{labelformat=empty,skip=1pt}
## \begin{longtable}{lrcrrcr}
## \toprule
## & \multicolumn{3}{c}{Pre-test} & \multicolumn{3}{c}{Post-test} \\ 
##  \cmidrule(lr){2-4}\cmidrule(lr){5-7}
## Section & Mean score & N & Qs/student & Mean score & N & Qs/student \\ 
## \midrule
## cmu-200 & $46.5\%$ & 265 & $29.2$ & $53.0\%$ & 221 & $28.8$ \\ 
## colby-212 & $51.1\%$ & 173 & $31.9$ & $59.9\%$ & 112 & $34.0$ \\ 
## cmu-202 & NA & NA & NA & $57.0\%$ & 121 & $28.8$ \\ 
## \bottomrule
## \end{longtable}

investment-success

Before the change (fall 2018):

investment_success = read.csv("response-matrix-colby-f18.csv") %>% pull(investment.success)
mean(investment_success)
## [1] 0.4086957

After the change (spring/fall 2019, pre and post):

mean(responses %>%
  filter(section == "colby-212", id == "investment-success", post) %>%
  pull(correct))
## [1] 0.1343284

What did they pick?

table(responses %>%
  filter(section == "colby-212", id == "investment-success", post) %>%
  pull(student_answer))
## 
##  A  B  C  D  E 
## 38 10  4  9  6

Most choose A, \(8 \times 0.1\).

Causality questions

responses %>%
  filter(id == "books") %>%
  group_by(post) %>%
  summarise(correctness = mean(correct))
## # A tibble: 2 x 2
##   post  correctness
##   <lgl>       <dbl>
## 1 FALSE       0.784
## 2 TRUE        0.850
responses %>%
  filter(id == "vitamin-c") %>%
  group_by(post) %>%
  summarise(correctness = mean(correct),
            fraction_c = mean(student_answer == "C"))
## # A tibble: 2 x 3
##   post  correctness fraction_c
##   <lgl>       <dbl>      <dbl>
## 1 FALSE       0.342      0.602
## 2 TRUE        0.413      0.530

A summary of the results from all the causal questions:

new_qs <- c("books", "coffee-headlines", "pools", "fixitol-solvix",
            "diet-pills", "candy-test", "vitamin-c")

responses %>%
  filter(id %in% new_qs) %>%
  group_by(id) %>%
  summarise(pre_correct = mean(correct[!post]),
            post_correct = mean(correct[post]))
## # A tibble: 7 x 3
##   id               pre_correct post_correct
##   <chr>                  <dbl>        <dbl>
## 1 books                  0.784        0.850
## 2 candy-test             0.52         0.518
## 3 coffee-headlines       0.781        0.8  
## 4 diet-pills             0.562        0.495
## 5 fixitol-solvix         0.710        0.662
## 6 pools                  0.701        0.780
## 7 vitamin-c              0.342        0.413

farm-areas and study-time

farm_areas_pre = responses %>%
  filter(id == "farm-areas", post == FALSE) %>%
  group_by(section) %>%
  summarize(correctness=mean(correct),
            responses=n())

farm_areas_post = responses %>%
  filter(id == "farm-areas", post == TRUE) %>%
  group_by(section) %>%
  summarize(correctness=mean(correct),
            responses=n())

farm_areas_pre %>% 
  full_join(farm_areas_post, by=c("section"), suffix=c(".pre", ".post")) %>%
  gt() %>%
  cols_label(
    section = "Section",
    correctness.pre = "Correct",
    responses.pre = "N",
    correctness.post = "Correct",
    responses.post = "N"
  ) %>%
  fmt_percent(vars(correctness.pre, correctness.post), decimals = 1) %>%
  tab_spanner(label="Pre-test",
              columns=vars(correctness.pre, responses.pre)) %>%
  tab_spanner(label="Post-test",
              columns=vars(correctness.post, responses.post)) %>%
  as_latex() %>%
  as.character() %>%
  cat()
## \captionsetup[table]{labelformat=empty,skip=1pt}
## \begin{longtable}{lrcrc}
## \toprule
## & \multicolumn{2}{c}{Pre-test} & \multicolumn{2}{c}{Post-test} \\ 
##  \cmidrule(lr){2-3}\cmidrule(lr){4-5}
## Section & Correct & N & Correct & N \\ 
## \midrule
## cmu-200 & $40.2\%$ & 132 & $22.6\%$ & 106 \\ 
## colby-212 & $37.4\%$ & 91 & $29.4\%$ & 68 \\ 
## cmu-202 & NA & NA & $35.7\%$ & 56 \\ 
## \bottomrule
## \end{longtable}
study_time_pre = responses %>%
  filter(id == "study-time", post == FALSE) %>%
  group_by(section) %>%
  summarize(correctness=mean(correct),
            responses=n())

study_time_post = responses %>%
  filter(id == "study-time", post == TRUE) %>%
  group_by(section) %>%
  summarize(correctness=mean(correct),
            responses=n())

study_time_pre %>% 
  full_join(study_time_post, by=c("section"), suffix=c(".pre", ".post")) %>%
  gt() %>%
  cols_label(
    section = "Section",
    correctness.pre = "Correct",
    responses.pre = "N",
    correctness.post = "Correct",
    responses.post = "N"
  ) %>%
  fmt_percent(vars(correctness.pre, correctness.post), decimals = 1) %>%
  tab_spanner(label="Pre-test",
              columns=vars(correctness.pre, responses.pre)) %>%
  tab_spanner(label="Post-test",
              columns=vars(correctness.post, responses.post)) %>%
  as_latex() %>%
  as.character() %>%
  cat()
## \captionsetup[table]{labelformat=empty,skip=1pt}
## \begin{longtable}{lrcrc}
## \toprule
## & \multicolumn{2}{c}{Pre-test} & \multicolumn{2}{c}{Post-test} \\ 
##  \cmidrule(lr){2-3}\cmidrule(lr){4-5}
## Section & Correct & N & Correct & N \\ 
## \midrule
## cmu-200 & $62.5\%$ & 120 & $50.0\%$ & 112 \\ 
## colby-212 & $73.4\%$ & 94 & $70.0\%$ & 60 \\ 
## cmu-202 & NA & NA & $57.9\%$ & 76 \\ 
## \bottomrule
## \end{longtable}

Matching answers for farm-areas and study-time:

farm_areas <- responses %>%
  filter(id == "farm-areas", post) %>%
  select(id, name, student_answer, correct, confidence, section, semester, post)

study_time <- responses %>%
  filter(id == "study-time", post) %>%
  select(id, name, student_answer, correct, confidence, section, semester, post)

# convert the study time answers into more readable format
# answers are their selections for Cosma, Jeri, Steve (in that order)
for (row in 1:nrow(study_time)){
  temp <- strsplit(gsub("[^A-Z]","", study_time$student_answer[row]), '')[[1]]
  study_time$student_answer[row] <- 
    paste(temp[c(2,4,6)][order(temp[c(1,3,5)])], collapse='')
}

q_pair <- study_time %>%
  select(name, correct, student_answer, post, confidence) %>%
  inner_join(farm_areas %>%
               select(name, correct, student_answer, post, confidence), 
             by=c("name", "post"), suffix=c("_study", "_farm"))

Total number of pairs who answered both:

nrow(q_pair)
## [1] 139

Number who correctly answered study-time and also answered farm-areas (correctly or incorrectly):

q_pair %>%
  filter(correct_study) %>%
  nrow()
## [1] 76

Were any guessing?

q_pair %>%
  filter(correct_study, confidence_study == 0) %>%
  nrow()
## [1] 3

Number of those students who answered farm-areas incorrectly:

q_pair %>%
  filter(correct_study, !correct_farm) %>%
  nrow()
## [1] 51

Without guessing:

q_pair %>%
  filter(correct_study, !correct_farm, confidence_farm > 0) %>%
  nrow()
## [1] 44

Number of those who answered B to farm-areas:

q_pair %>%
  filter(correct_study, student_answer_farm == "B") %>%
  nrow()
## [1] 33

Matched pairs for study-time:

study_time_pre = responses %>%
  filter(id == "study-time", post == FALSE, section == "cmu-200")

study_time_post = responses %>%
  filter(id == "study-time", post == TRUE, section == "cmu-200")

study_time_pre %>%
  full_join(study_time_post, by = "name", suffix=c(".pre", ".post")) %>%
  select(correct.pre, correct.post) %>%
  group_by(correct.pre, correct.post) %>%
  summarise(count=n())
## # A tibble: 8 x 3
## # Groups:   correct.pre [3]
##   correct.pre correct.post count
##   <lgl>       <lgl>        <int>
## 1 FALSE       FALSE            6
## 2 FALSE       TRUE             6
## 3 FALSE       NA              33
## 4 TRUE        FALSE            7
## 5 TRUE        TRUE            16
## 6 TRUE        NA              52
## 7 NA          FALSE           43
## 8 NA          TRUE            34