rm(list = ls()) ## empty the environment
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#point R to the right directory for the data
setwd("C:/Users/rob/Desktop/In Class Stats Exercises")
dir() # make sure the data files are in the same folder as this script!
## [1] "AnalysisofCS376StatsProblems.R"
## [2] "AnalysisofCS376StatsProblems.spin.R"
## [3] "AnalysisofCS376StatsProblems.spin.Rmd"
## [4] "cs376_addition_tasktime.csv"
## [5] "feedme.csv"
## [6] "listit-notes.csv"
## [7] "twitch-unlocks.csv"
#get data
fmdf = read.csv("feedme.csv", header = T, stringsAsFactors = T)
#examine data--look for weird stuff
temp = table(fmdf$preference)
##another way--better if you want to plot the data
## gpplot does not play well with tables
temp = summarize(group_by(fmdf, preference), Freq = n() )
temp = as.data.frame(temp)
names(temp)
## [1] "preference" "Freq"
ggplot(data = temp, aes(x=preference, y= Freq)) + geom_bar(stat="identity")
## H0--there is no difference in counts. HA: there is a difference
## what is chance that there is no difference, based on the counts we have here?
chisq.test(table(fmdf$preference))
##
## Chi-squared test for given probabilities
##
## data: table(fmdf$preference)
## X-squared = 21.593, df = 2, p-value = 2.047e-05
##p-value = 2.047e-05
#drop neither from consideration
fmdfopin = filter(fmdf, !grepl("neither",preference))
## or
subset(fmdfopin, preference != "neither")
## preference
## 1 aspen
## 2 aspen
## 3 sierra
## 4 aspen
## 5 sierra
## 6 sierra
## 7 aspen
## 8 aspen
## 9 aspen
## 10 aspen
## 11 aspen
## 12 sierra
## 13 sierra
## 14 aspen
## 15 sierra
## 16 aspen
## 17 aspen
## 18 aspen
## 19 aspen
## 20 aspen
## 21 sierra
## 22 sierra
## 23 aspen
## 24 aspen
## 25 aspen
## 26 sierra
## 27 aspen
## 28 aspen
## 29 aspen
## 30 aspen
## 31 aspen
## 32 sierra
## 33 aspen
## 34 aspen
## 35 aspen
## 36 aspen
## 37 aspen
## 38 aspen
## 39 aspen
## 40 aspen
## 41 aspen
## 42 sierra
## 43 aspen
## 44 sierra
## 45 sierra
## 46 sierra
## 47 sierra
## 48 aspen
## 49 sierra
## 50 aspen
## 51 sierra
## 52 aspen
## 53 sierra
## yet another way
tempopinion = filter(temp, Freq != 6)
## call me butter, cause I'm on a roll
fmdfopin = filter(fmdf, !(preference %in% c("neither")))
## get rid of the extra factor if you used subset not filter
table(fmdfopin)
## fmdfopin
## aspen neither sierra
## 35 0 18
fmdfopin$preference = factor(fmdfopin$preference) ##this gets rid of "neither" as a factor
table(fmdfopin)
## fmdfopin
## aspen sierra
## 35 18
## is there a difference?
chisq.test(table(fmdfopin$preference))
##
## Chi-squared test for given probabilities
##
## data: table(fmdfopin$preference)
## X-squared = 5.4528, df = 1, p-value = 0.01954
#yes
###############
### Twitch unlocks
## get data
twitch_unlocks <- read.csv("twitch-unlocks.csv", header = T, stringsAsFactors = T)
## look at data
head(twitch_unlocks$unlockNumberForPhone)
## [1] 1 2 3 4 5 6
table(twitch_unlocks$unlockNumberForPhone) ## not a helpful way to look, it turns out
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 82 80 78 73 68 63 59 58 55 53 52 50 50 50 49 49 49 49
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## 48 47 47 47 46 44 43 42 42 42 40 38 38 39 38 39 39 38
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## 37 38 37 37 37 37 37 38 37 38 38 37 36 36 33 33 34 32
## 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 32 33 33 33 32 32 32 32 32 32 30 30 30 30 29 29 29 28
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 28 28 28 28 28 28 27 27 27 27 26 26 26 26 26 25 25 25
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## 25 25 25 25 25 24 24 23 24 24 24 25 24 24 24 24 24 23
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## 23 23 23 23 23 22 22 21 21 21 21 21 21 22 22 21 21 20
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## 20 21 21 21 21 21 22 22 22 22 21 20 20 20 20 21 20 21
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## 21 20 20 20 20 20 19 20 20 20 20 20 20 19 20 20 20 20
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 20 20 20 20 19 20 20 20 20 18 18 18 18 18 18 17 17 17
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## 17 17 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## 16 16 16 16 16 17 17 16 16 16 16 17 17 16 16 17 17 17
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## 17 16 16 16 17 17 16 17 17 17 16 16 17 17 17 16 16 16
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## 16 16 16 16 16 16 16 16 16 16 16 16 15 15 15 15 15 15
## 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## 15 15 14 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13
## 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## 13 13 11 11 11 12 12 11 11 11 10 10 10 10 11 10 10 10
## 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## 10 10 10 10 10 10 10 10 10 10 11 11 10 10 10 10 10 10
## 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## 10 10 10 10 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## 9 9 9 9 9 9 9 9 9 9 10 10 10 10 9 9 9 9
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## 9 10 10 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
## 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
## 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
## 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
## 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
## 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
## 9 9 9 9 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
## 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
## 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
## 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
## 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
## 8 8 8 8 8 8 8 8 8 7 7 7 7 7 7 6 6 6
## 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
## 6 6 6 6 6 6 6 6 6 6 6 6 5 5 5 6 6 6
## 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
## 6 6 6 6 6 6 6 6 6 6 6 6 6 6 5 6 6 6
## 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
## 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
## 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
## 6 6 6 6 6 6 6 6 6 6 6 6 5 5 6 6 6 6
## 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
## 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
## 6 6 6 6 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
## 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
## 4 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3
## 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
## 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2
## 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
## 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
ggplot(twitch_unlocks, aes(duration)) + geom_histogram() + facet_wrap(~activity)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(twitch_unlocks, aes(transformedDuration)) + geom_histogram() + facet_wrap(~activity)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summarize(group_by(twitch_unlocks, activity), round(mean(transformedDuration), 3))
## # A tibble: 5 x 2
## activity round(mean(transformedDuration), 3)
## <fctr> <dbl>
## 1 Census/Activity 0.151
## 2 Census/Dress 0.160
## 3 Census/Energy 0.157
## 4 Census/People 0.158
## 5 Slide to Unlock 0.160
##census activity seems fastest, while slide to unlock seems slowest
#H0: there is no difference in time to unlock screens
#HA: there is a significant difference in time to unlock screens
## What are the chances that there really is no difference based on the data we have?
anova <- aov(transformedDuration ~ activity, data=twitch_unlocks)
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## activity 4 0.131 0.03279 38.84 <2e-16 ***
## Residuals 10702 9.036 0.00084
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#there is a difference! but what's different? from looking at the means, it's probably not the case that
## slide to unlock (M: .160) is different from Census/Dress (M: .160), but we should check formally
## H0: There is not difference in the pairwise comparisions
## HA: There is a difference
## What are the chances that there really is no difference based on the data we
## have for each pairwise comparison?
TukeyHSD(anova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = transformedDuration ~ activity, data = twitch_unlocks)
##
## $activity
## diff lwr upr
## Census/Dress-Census/Activity 0.0094311585 0.0071225681 0.0117397488
## Census/Energy-Census/Activity 0.0055919412 0.0032895183 0.0078943642
## Census/People-Census/Activity 0.0074696116 0.0051907583 0.0097484650
## Slide to Unlock-Census/Activity 0.0092487906 0.0064131886 0.0120843926
## Census/Energy-Census/Dress -0.0038392172 -0.0061516783 -0.0015267562
## Census/People-Census/Dress -0.0019615469 -0.0042505417 0.0003274479
## Slide to Unlock-Census/Dress -0.0001823679 -0.0030261265 0.0026613908
## Census/People-Census/Energy 0.0018776704 -0.0004051041 0.0041604448
## Slide to Unlock-Census/Energy 0.0036568494 0.0008180952 0.0064956036
## Slide to Unlock-Census/People 0.0017791790 -0.0010404924 0.0045988503
## p adj
## Census/Dress-Census/Activity 0.0000000
## Census/Energy-Census/Activity 0.0000000
## Census/People-Census/Activity 0.0000000
## Slide to Unlock-Census/Activity 0.0000000
## Census/Energy-Census/Dress 0.0000586
## Census/People-Census/Dress 0.1330408
## Slide to Unlock-Census/Dress 0.9997907
## Census/People-Census/Energy 0.1636581
## Slide to Unlock-Census/Energy 0.0040438
## Slide to Unlock-Census/People 0.4206493
## but maybe its' the number of times they've done it, not the activity. If that's the case,
## the p-value for unlock number on phone should be significantly small,
## but the p-value for activity should not. this is a two way anova
anova2 <- aov(transformedDuration ~ activity * unlockNumberForPhone, data = twitch_unlocks)
summary(anova2)
## Df Sum Sq Mean Sq F value Pr(>F)
## activity 4 0.131 0.03279 39.578 < 2e-16 ***
## unlockNumberForPhone 1 0.161 0.16063 193.876 < 2e-16 ***
## activity:unlockNumberForPhone 4 0.012 0.00311 3.755 0.00468 **
## Residuals 10697 8.863 0.00083
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## everything is significant, including the interaction. So we can say that they got faster
## at the activity as they did it more
#################
#### Information Scraps
#get data
listit = read.csv("listit-notes.csv", header = T, stringsAsFactors = T)
##look for outliers
qplot(listit$length)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## remove outliers
listitfiltered = filter(listit, length < 501)
## Bartlett tests to see if the variances are the same.
## H0: null that the variances in each of the groups (samples) are the same.
## HA: they are not
## What are the chances the true variances are the same given the data we have?
bartlett.test(listitfiltered, length ~ lifetime_under_one_day)
##
## Bartlett test of homogeneity of variances
##
## data: listitfiltered
## Bartlett's K-squared = 172970, df = 5, p-value < 2.2e-16
## p-value < 2.2e-16
## H0: There is no difference in the length of notes that live less than a day than those that do not.
## HA: There is a difference.
## What are the chances that the true length of the notes are the same given the data we have?
t.test(length ~ lifetime_under_one_day, data= listitfiltered, var.equal = F)
##
## Welch Two Sample t-test
##
## data: length by lifetime_under_one_day
## t = 8.4264, df = 268.5, p-value = 2.192e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 40.55129 65.27857
## sample estimates:
## mean in group FALSE mean in group TRUE
## 96.24305 43.32812
## p-value = 2.192e-15
## note, var.equal = F based on the bartlett test
##############
####Micro- vs. Macro
mmtask = read.csv("cs376_addition_tasktime.csv", header = T, stringsAsFactors = T)
table(mmtask$format, mmtask$interrupted)
##
## no yes
## macro 36 36
## micro 36 36
#36 people did each task once
## look at data
summarize(group_by(mmtask, format, interrupted), mean = round(mean(task_time), 2), sd = round(sd(task_time), 2))
## Source: local data frame [4 x 4]
## Groups: format [?]
##
## format interrupted mean sd
## <fctr> <fctr> <dbl> <dbl>
## 1 macro no 32.61 9.02
## 2 macro yes 47.26 18.16
## 3 micro no 87.27 54.44
## 4 micro yes 86.20 21.49
ggplot(mmtask, aes(factor(format), task_time)) + geom_boxplot() + facet_wrap(~ interrupted)
#another way
ggplot(mmtask, aes(task_time)) + geom_histogram() + facet_grid(format~interrupted)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Ooh, some outliers in the micro/no cell. Since they're all in one cell, they should be inspected
## individually and see if something interesting is happening there.
filter(mmtask, task_time > 149)
## format interrupted task_time user_id
## 1 micro no 306.999 07dc9816cffde43991aa691749a88701
## 2 micro no 254.048 7822f0f71628f8517df838a7896cb9fd
## 3 micro no 151.984 3d39466d9ca470239cd686666e7b6355
anova <- aov(mmtask$task_time ~ mmtask$format * mmtask$interrupted ## this is the basic anova
+ Error(mmtask$user_id / (mmtask$format * mmtask$interrupted))) ## this is the repeated measures
summary(anova)
##
## Error: mmtask$user_id
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 35 51544 1473
##
## Error: mmtask$user_id:mmtask$format
## Df Sum Sq Mean Sq F value Pr(>F)
## mmtask$format 1 78841 78841 62.28 2.81e-09 ***
## Residuals 35 44308 1266
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: mmtask$user_id:mmtask$interrupted
## Df Sum Sq Mean Sq F value Pr(>F)
## mmtask$interrupted 1 1659 1659.3 2.691 0.11
## Residuals 35 21583 616.7
##
## Error: mmtask$user_id:mmtask$format:mmtask$interrupted
## Df Sum Sq Mean Sq F value Pr(>F)
## mmtask$format:mmtask$interrupted 1 2223 2222.9 4.624 0.0385 *
## Residuals 35 16824 480.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## How to interpret:
## This code snippet show that there is a main effect of task format
## Error: mmtask$user_id:mmtask$format
## Df Sum Sq Mean Sq F value Pr(>F)
## mmtask$format 1 78841 78841 62.28 2.81e-09 ***
## Residuals 35 44308 1266
## This shows that being interrupted on it's own is not significant
## Error: mmtask$user_id:mmtask$interrupted
## Df Sum Sq Mean Sq F value Pr(>F)
## mmtask$interrupted 1 1659 1659.3 2.691 0.11
## Residuals 35 21583 616.7
## This shows that there is an interaction effect between the format
## and being interrupted
## Error: mmtask$user_id:mmtask$format:mmtask$interrupted
## Df Sum Sq Mean Sq F value Pr(>F)
## mmtask$format:mmtask$interrupted 1 2223 2222.9 4.624 0.0385 *
## Residuals 35 16824 480.7
## But maybe those outliers are driving the interaction effect, so
## we need to remove the three people with outlying observations from the analysis
temp = filter(mmtask, task_time > 149) %>%
select(user_id)
mmtaskno = filter(mmtask, !(user_id %in% temp$user_id))
table(mmtaskno$format, mmtaskno$interrupted)
##
## no yes
## macro 33 33
## micro 33 33
ggplot(mmtaskno, aes(task_time)) + geom_histogram() + facet_grid(format~interrupted)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
anova2 <- aov(mmtaskno$task_time ~ mmtaskno$format * mmtaskno$interrupted ## this is the basic anova
+ Error(mmtaskno$user_id / (mmtaskno$format * mmtaskno$interrupted)))
summary(anova2)
##
## Error: mmtaskno$user_id
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 32 23438 732.4
##
## Error: mmtaskno$user_id:mmtaskno$format
## Df Sum Sq Mean Sq F value Pr(>F)
## mmtaskno$format 1 48275 48275 116.5 3.38e-12 ***
## Residuals 32 13259 414
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: mmtaskno$user_id:mmtaskno$interrupted
## Df Sum Sq Mean Sq F value Pr(>F)
## mmtaskno$interrupted 1 5416 5416 37.19 8.16e-07 ***
## Residuals 32 4660 146
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: mmtaskno$user_id:mmtaskno$format:mmtaskno$interrupted
## Df Sum Sq Mean Sq F value Pr(>F)
## mmtaskno$format:mmtaskno$interrupted 1 243 242.98 2.582 0.118
## Residuals 32 3012 94.12
## this makes more sense. The interpretation, once removing outliers, is that
## there is a main effect of task format and being interrupted. The interaction
## of the two is marginal. This is why it's important to check for outliers first!