AnalysisofCS376StatsProblems.R

rm(list = ls())  ## empty the environment

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#point R to the right directory for the data
setwd("C:/Users/rob/Desktop/In Class Stats Exercises")
dir()  # make sure the data files are in the same folder as this script!

## [1] "AnalysisofCS376StatsProblems.R"       
## [2] "AnalysisofCS376StatsProblems.spin.R"  
## [3] "AnalysisofCS376StatsProblems.spin.Rmd"
## [4] "cs376_addition_tasktime.csv"          
## [5] "feedme.csv"                           
## [6] "listit-notes.csv"                     
## [7] "twitch-unlocks.csv"

#get data
fmdf = read.csv("feedme.csv", header = T, stringsAsFactors = T)

#examine data--look for weird stuff
temp = table(fmdf$preference)

##another way--better if you want to plot the data
## gpplot does not play well with tables

temp = summarize(group_by(fmdf, preference), Freq = n() )

temp = as.data.frame(temp)
names(temp)

## [1] "preference" "Freq"

ggplot(data = temp, aes(x=preference, y= Freq)) + geom_bar(stat="identity")

## H0--there is no difference in counts.  HA:  there is a difference
## what is chance that there is no difference, based on the counts we have here?
chisq.test(table(fmdf$preference))

## 
##  Chi-squared test for given probabilities
## 
## data:  table(fmdf$preference)
## X-squared = 21.593, df = 2, p-value = 2.047e-05

##p-value = 2.047e-05

#drop neither from consideration

fmdfopin = filter(fmdf, !grepl("neither",preference))

## or 
subset(fmdfopin, preference != "neither")

##    preference
## 1       aspen
## 2       aspen
## 3      sierra
## 4       aspen
## 5      sierra
## 6      sierra
## 7       aspen
## 8       aspen
## 9       aspen
## 10      aspen
## 11      aspen
## 12     sierra
## 13     sierra
## 14      aspen
## 15     sierra
## 16      aspen
## 17      aspen
## 18      aspen
## 19      aspen
## 20      aspen
## 21     sierra
## 22     sierra
## 23      aspen
## 24      aspen
## 25      aspen
## 26     sierra
## 27      aspen
## 28      aspen
## 29      aspen
## 30      aspen
## 31      aspen
## 32     sierra
## 33      aspen
## 34      aspen
## 35      aspen
## 36      aspen
## 37      aspen
## 38      aspen
## 39      aspen
## 40      aspen
## 41      aspen
## 42     sierra
## 43      aspen
## 44     sierra
## 45     sierra
## 46     sierra
## 47     sierra
## 48      aspen
## 49     sierra
## 50      aspen
## 51     sierra
## 52      aspen
## 53     sierra

## yet another way

tempopinion = filter(temp, Freq != 6)

## call me butter, cause I'm on a roll

fmdfopin = filter(fmdf, !(preference %in% c("neither")))


## get rid of the extra factor if you used subset not filter
table(fmdfopin)

## fmdfopin
##   aspen neither  sierra 
##      35       0      18

fmdfopin$preference = factor(fmdfopin$preference)  ##this gets rid of "neither" as a factor
table(fmdfopin)

## fmdfopin
##  aspen sierra 
##     35     18

## is there a difference?
chisq.test(table(fmdfopin$preference))

## 
##  Chi-squared test for given probabilities
## 
## data:  table(fmdfopin$preference)
## X-squared = 5.4528, df = 1, p-value = 0.01954

#yes

###############
###  Twitch unlocks

## get data
twitch_unlocks <- read.csv("twitch-unlocks.csv", header = T, stringsAsFactors = T)

## look at data
head(twitch_unlocks$unlockNumberForPhone)

## [1] 1 2 3 4 5 6

table(twitch_unlocks$unlockNumberForPhone)  ## not a helpful way to look, it turns out

## 
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##  82  80  78  73  68  63  59  58  55  53  52  50  50  50  49  49  49  49 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##  48  47  47  47  46  44  43  42  42  42  40  38  38  39  38  39  39  38 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##  37  38  37  37  37  37  37  38  37  38  38  37  36  36  33  33  34  32 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
##  32  33  33  33  32  32  32  32  32  32  30  30  30  30  29  29  29  28 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##  28  28  28  28  28  28  27  27  27  27  26  26  26  26  26  25  25  25 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
##  25  25  25  25  25  24  24  23  24  24  24  25  24  24  24  24  24  23 
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 
##  23  23  23  23  23  22  22  21  21  21  21  21  21  22  22  21  21  20 
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 
##  20  21  21  21  21  21  22  22  22  22  21  20  20  20  20  21  20  21 
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 
##  21  20  20  20  20  20  19  20  20  20  20  20  20  19  20  20  20  20 
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##  20  20  20  20  19  20  20  20  20  18  18  18  18  18  18  17  17  17 
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 
##  17  17  16  16  16  16  16  16  16  16  16  16  16  16  16  16  16  16 
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 
##  16  16  16  16  16  17  17  16  16  16  16  17  17  16  16  17  17  17 
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 
##  17  16  16  16  17  17  16  17  17  17  16  16  17  17  17  16  16  16 
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 
##  16  16  16  16  16  16  16  16  16  16  16  16  15  15  15  15  15  15 
## 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 
##  15  15  14  13  13  13  13  13  13  13  13  13  13  13  13  13  13  13 
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 
##  13  13  13  13  13  13  13  13  13  13  13  13  13  13  13  13  13  13 
## 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 
##  13  13  11  11  11  12  12  11  11  11  10  10  10  10  11  10  10  10 
## 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 
##  10  10  10  10  10  10  10  10  10  10  11  11  10  10  10  10  10  10 
## 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 
##  10  10  10  10   9   9   9   9   9   9   9   9   9   9   9   9   9   9 
## 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 
##   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9 
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 
##   9   9   9   9   9   9   9   9   9   9  10  10  10  10   9   9   9   9 
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 
##   9  10  10   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9 
## 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 
##   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9 
## 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 
##   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9 
## 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 
##   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9 
## 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 
##   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9   9 
## 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 
##   9   9   9   9   8   8   8   8   8   8   8   8   8   8   8   8   8   8 
## 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 
##   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8 
## 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 
##   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8 
## 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 
##   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8   8 
## 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 
##   8   8   8   8   8   8   8   8   8   7   7   7   7   7   7   6   6   6 
## 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 
##   6   6   6   6   6   6   6   6   6   6   6   6   5   5   5   6   6   6 
## 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 
##   6   6   6   6   6   6   6   6   6   6   6   6   6   6   5   6   6   6 
## 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 
##   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6 
## 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 
##   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6 
## 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 
##   6   6   6   6   6   6   6   6   6   6   6   6   5   5   6   6   6   6 
## 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 
##   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6   6 
## 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 
##   6   6   6   6   5   5   5   5   5   5   5   5   5   5   5   5   5   5 
## 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 
##   5   5   5   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 
##   4   4   4   4   4   3   3   3   3   3   3   3   3   3   3   3   3   3 
## 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 
##   3   3   3   3   3   3   3   3   3   3   2   2   2   2   2   2   2   2 
## 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
## 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 
##   2   2   2   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1

ggplot(twitch_unlocks, aes(duration)) + geom_histogram() + facet_wrap(~activity)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(twitch_unlocks, aes(transformedDuration)) + geom_histogram() + facet_wrap(~activity)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summarize(group_by(twitch_unlocks, activity), round(mean(transformedDuration), 3))

## # A tibble: 5 x 2
##          activity round(mean(transformedDuration), 3)
##            <fctr>                               <dbl>
## 1 Census/Activity                               0.151
## 2    Census/Dress                               0.160
## 3   Census/Energy                               0.157
## 4   Census/People                               0.158
## 5 Slide to Unlock                               0.160

##census activity seems fastest, while slide to unlock seems slowest


#H0:  there is no difference in time to unlock screens
#HA:  there is a significant difference in time to unlock screens
## What are the chances that there really is no difference based on the data we have?
anova <- aov(transformedDuration ~ activity, data=twitch_unlocks)

summary(anova)

##                Df Sum Sq Mean Sq F value Pr(>F)    
## activity        4  0.131 0.03279   38.84 <2e-16 ***
## Residuals   10702  9.036 0.00084                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#there is a difference!  but what's different?  from looking at the means, it's probably not the case that
## slide to unlock (M: .160) is different from Census/Dress (M: .160), but we should check formally 

## H0:  There is not difference in the pairwise comparisions
## HA:  There is a difference
## What are the chances that there really is no difference based on the data we 
## have for each pairwise comparison?
TukeyHSD(anova)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = transformedDuration ~ activity, data = twitch_unlocks)
## 
## $activity
##                                          diff           lwr           upr
## Census/Dress-Census/Activity     0.0094311585  0.0071225681  0.0117397488
## Census/Energy-Census/Activity    0.0055919412  0.0032895183  0.0078943642
## Census/People-Census/Activity    0.0074696116  0.0051907583  0.0097484650
## Slide to Unlock-Census/Activity  0.0092487906  0.0064131886  0.0120843926
## Census/Energy-Census/Dress      -0.0038392172 -0.0061516783 -0.0015267562
## Census/People-Census/Dress      -0.0019615469 -0.0042505417  0.0003274479
## Slide to Unlock-Census/Dress    -0.0001823679 -0.0030261265  0.0026613908
## Census/People-Census/Energy      0.0018776704 -0.0004051041  0.0041604448
## Slide to Unlock-Census/Energy    0.0036568494  0.0008180952  0.0064956036
## Slide to Unlock-Census/People    0.0017791790 -0.0010404924  0.0045988503
##                                     p adj
## Census/Dress-Census/Activity    0.0000000
## Census/Energy-Census/Activity   0.0000000
## Census/People-Census/Activity   0.0000000
## Slide to Unlock-Census/Activity 0.0000000
## Census/Energy-Census/Dress      0.0000586
## Census/People-Census/Dress      0.1330408
## Slide to Unlock-Census/Dress    0.9997907
## Census/People-Census/Energy     0.1636581
## Slide to Unlock-Census/Energy   0.0040438
## Slide to Unlock-Census/People   0.4206493

## but maybe its' the number of times they've done it, not the activity.  If that's the case,
## the p-value for unlock number on phone should be significantly small, 
## but the p-value for activity should not.  this is a two way anova
anova2 <- aov(transformedDuration ~ activity * unlockNumberForPhone, data = twitch_unlocks)
summary(anova2)

##                                  Df Sum Sq Mean Sq F value  Pr(>F)    
## activity                          4  0.131 0.03279  39.578 < 2e-16 ***
## unlockNumberForPhone              1  0.161 0.16063 193.876 < 2e-16 ***
## activity:unlockNumberForPhone     4  0.012 0.00311   3.755 0.00468 ** 
## Residuals                     10697  8.863 0.00083                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## everything is significant, including the interaction.  So we can say that they got faster
## at the activity as they did it more

#################
#### Information Scraps

#get data
listit = read.csv("listit-notes.csv", header = T, stringsAsFactors = T)


##look for outliers
qplot(listit$length)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## remove outliers
listitfiltered = filter(listit, length < 501)

##  Bartlett tests to see if the variances are the same.
##  H0:   null that the variances in each of the groups (samples) are the same.
##  HA:  they are not
##  What are the chances the true variances are the same given the data we have?
bartlett.test(listitfiltered, length ~ lifetime_under_one_day)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  listitfiltered
## Bartlett's K-squared = 172970, df = 5, p-value < 2.2e-16

##  p-value < 2.2e-16

## H0:  There is no difference in the length of notes that live less than a day than those that do not.
## HA:  There is a difference.
## What are the chances that the true length of the notes are the same given the data we have?

t.test(length ~ lifetime_under_one_day, data= listitfiltered, var.equal = F)

## 
##  Welch Two Sample t-test
## 
## data:  length by lifetime_under_one_day
## t = 8.4264, df = 268.5, p-value = 2.192e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  40.55129 65.27857
## sample estimates:
## mean in group FALSE  mean in group TRUE 
##            96.24305            43.32812

## p-value = 2.192e-15
## note, var.equal = F based on the bartlett test


##############
####Micro- vs. Macro

mmtask = read.csv("cs376_addition_tasktime.csv", header = T, stringsAsFactors = T)

table(mmtask$format, mmtask$interrupted)

##        
##         no yes
##   macro 36  36
##   micro 36  36

#36 people did each task once

## look at data
summarize(group_by(mmtask, format, interrupted), mean = round(mean(task_time), 2), sd = round(sd(task_time), 2))

## Source: local data frame [4 x 4]
## Groups: format [?]
## 
##   format interrupted  mean    sd
##   <fctr>      <fctr> <dbl> <dbl>
## 1  macro          no 32.61  9.02
## 2  macro         yes 47.26 18.16
## 3  micro          no 87.27 54.44
## 4  micro         yes 86.20 21.49

ggplot(mmtask, aes(factor(format), task_time)) + geom_boxplot() + facet_wrap(~ interrupted)

#another way

ggplot(mmtask, aes(task_time)) + geom_histogram() + facet_grid(format~interrupted)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Ooh, some outliers in the micro/no cell.  Since they're all in one cell, they should be inspected
## individually and see if something interesting is happening there.

filter(mmtask, task_time > 149)

##   format interrupted task_time                          user_id
## 1  micro          no   306.999 07dc9816cffde43991aa691749a88701
## 2  micro          no   254.048 7822f0f71628f8517df838a7896cb9fd
## 3  micro          no   151.984 3d39466d9ca470239cd686666e7b6355

anova <- aov(mmtask$task_time ~ mmtask$format * mmtask$interrupted         ## this is the basic anova
             + Error(mmtask$user_id / (mmtask$format * mmtask$interrupted)))  ## this is the repeated measures

summary(anova)

## 
## Error: mmtask$user_id
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 35  51544    1473               
## 
## Error: mmtask$user_id:mmtask$format
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## mmtask$format  1  78841   78841   62.28 2.81e-09 ***
## Residuals     35  44308    1266                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Error: mmtask$user_id:mmtask$interrupted
##                    Df Sum Sq Mean Sq F value Pr(>F)
## mmtask$interrupted  1   1659  1659.3   2.691   0.11
## Residuals          35  21583   616.7               
## 
## Error: mmtask$user_id:mmtask$format:mmtask$interrupted
##                                  Df Sum Sq Mean Sq F value Pr(>F)  
## mmtask$format:mmtask$interrupted  1   2223  2222.9   4.624 0.0385 *
## Residuals                        35  16824   480.7                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## How to interpret:

## This code snippet show that there is a main effect of task format

## Error: mmtask$user_id:mmtask$format
##                Df Sum Sq Mean Sq F value   Pr(>F)    
##  mmtask$format  1  78841   78841   62.28 2.81e-09 ***
##  Residuals     35  44308    1266  

##  This shows that being interrupted on it's own is not significant

##  Error: mmtask$user_id:mmtask$interrupted
##                    Df Sum Sq Mean Sq F value Pr(>F)
## mmtask$interrupted  1   1659  1659.3   2.691   0.11
## Residuals          35  21583   616.7  

## This shows that there is an interaction effect between the format
## and being interrupted

## Error: mmtask$user_id:mmtask$format:mmtask$interrupted
##                                   Df Sum Sq Mean Sq F value Pr(>F)  
##  mmtask$format:mmtask$interrupted  1   2223  2222.9   4.624 0.0385 *
##  Residuals                        35  16824   480.7     


## But maybe those outliers are driving the interaction effect, so
## we need to remove the three people with outlying observations from the analysis

temp = filter(mmtask, task_time > 149) %>%
        select(user_id)

mmtaskno = filter(mmtask, !(user_id %in% temp$user_id))

table(mmtaskno$format, mmtaskno$interrupted)

##        
##         no yes
##   macro 33  33
##   micro 33  33

ggplot(mmtaskno, aes(task_time)) + geom_histogram() + facet_grid(format~interrupted)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

anova2 <- aov(mmtaskno$task_time ~ mmtaskno$format * mmtaskno$interrupted         ## this is the basic anova
             + Error(mmtaskno$user_id / (mmtaskno$format * mmtaskno$interrupted))) 

summary(anova2)

## 
## Error: mmtaskno$user_id
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 32  23438   732.4               
## 
## Error: mmtaskno$user_id:mmtaskno$format
##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## mmtaskno$format  1  48275   48275   116.5 3.38e-12 ***
## Residuals       32  13259     414                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Error: mmtaskno$user_id:mmtaskno$interrupted
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## mmtaskno$interrupted  1   5416    5416   37.19 8.16e-07 ***
## Residuals            32   4660     146                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Error: mmtaskno$user_id:mmtaskno$format:mmtaskno$interrupted
##                                      Df Sum Sq Mean Sq F value Pr(>F)
## mmtaskno$format:mmtaskno$interrupted  1    243  242.98   2.582  0.118
## Residuals                            32   3012   94.12

## this makes more sense.  The interpretation, once removing outliers, is that 
## there is a main effect of task format and being interrupted.  The interaction
## of the two is marginal.  This is why it's important to check for outliers first!

AnalysisofCS376StatsProblems.R

rob

Tue Nov 08 16:33:19 2016