In [2]:
%load_ext rpy2.ipython
The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython
In [3]:
import pandas
In [4]:
listit_notes = pandas.read_csv('listit-notes.csv')
In [5]:
print(listit_notes)
      user_id  length  n_words  n_urls     lifetime  lifetime_under_one_day
0           6     105       20       0  3726466.307                   False
1           6      47        9       0  3724781.323                   False
2           6      23        4       0  3724805.340                   False
3           6      40        8       0  3473260.357                   False
4           6     116        2       2  3722957.374                   False
5           6     216       35       0  3461050.390                   False
6           6      49        7       0  3046172.407                   False
7           6      41        9       0  3460890.423                   False
8           6      61       10       0  3013096.440                   False
9           6     235        3       3  3452659.456                   False
10          6      98       13       0  3461149.473                   False
11          6      80       13       0  2946004.489                   False
12          6     133       20       0  2945732.506                   False
13          6      45        6       0  2920422.523                   False
14          6     140       27       0  2910342.539                   False
15          6      42        8       0  2251758.556                   False
16          6      27        4       0  2249488.572                   False
17          6     291       56       0  2184487.589                   False
18          6     106       20       0  2159628.605                   False
19          6     222       28       1  2159130.622                   False
20          6     311       39       1  1923875.639                   False
21          6     137       22       0  1923178.656                   False
22          6      75       13       0  1835289.673                   False
23          6     170       25       0  1818411.690                   False
24          6      79        6       0  1752407.707                   False
25          6     189       33       0  1752286.723                   False
26          6      96       10       1  1749247.740                   False
27          6      46        7       0     1197.000                    True
28          6     310       36       1  1745195.775                   False
29          6     104       15       0  1740774.792                   False
...       ...     ...      ...     ...          ...                     ...
1595       93      10        1       0  1654711.428                   False
1596       93     166       31       0  1592599.442                   False
1597       93      95       16       0        8.000                    True
1598       93     237       35       0  1568138.479                   False
1599       93      86        5       1  1555017.493                   False
1600       93      28        4       0  1551173.506                   False
1601       93     179       33       0  1480346.521                   False
1602       93     730      121       0  1408806.535                   False
1603       93      89        4       1  1392273.548                   False
1604       94      10        1       0       55.000                    True
1605       94      42        8       0     7875.000                    True
1606       94      39        7       0     3576.000                    True
1607       94     156        9       3  1658022.609                   False
1608       94      41        6       0  1667098.621                   False
1609       94      61        7       0  1667182.633                   False
1610       94      52        7       0  1667148.645                   False
1611       94      47        9       0    11061.000                    True
1612       94     348       62       0  1634796.672                   False
1613       94     361       70       0  1474399.685                   False
1614       94      52        8       0  1599378.697                   False
1615       94      46        8       0  1599405.709                   False
1616       94      24        3       0  1599491.723                   False
1617       94     169       33       0  1473259.736                   False
1618       94      31        7       0  1473776.748                   False
1619       96      68       10       0  1723130.763                   False
1620       96      31        7       0  1723241.773                   False
1621       96     213       17       3  1643269.784                   False
1622       96      67        5       1  1642882.794                   False
1623       96      88        6       1  1642993.804                   False
1624       96     254       47       0  1613874.815                   False

[1625 rows x 6 columns]
In [8]:
%Rpush listit_notes
/home/geza/.local/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.
  res = PandasDataFrame.from_items(items)
In [9]:
%%R

summary(listit_notes)
    user_id          length          n_words           n_urls       
 Min.   : 6.00   Min.   :   0.0   Min.   :  0.00   Min.   : 0.0000  
 1st Qu.:38.00   1st Qu.:  24.0   1st Qu.:  4.00   1st Qu.: 0.0000  
 Median :46.00   Median :  55.0   Median :  8.00   Median : 0.0000  
 Mean   :48.31   Mean   : 103.3   Mean   : 16.01   Mean   : 0.1975  
 3rd Qu.:60.00   3rd Qu.: 132.0   3rd Qu.: 20.00   3rd Qu.: 0.0000  
 Max.   :96.00   Max.   :1924.0   Max.   :322.00   Max.   :36.0000  
    lifetime       lifetime_under_one_day
 Min.   :      2   Mode :logical         
 1st Qu.:1462458   FALSE:1433            
 Median :1757590   TRUE :192             
 Mean   :1587482                         
 3rd Qu.:2073181                         
 Max.   :7162105                         
In [10]:
twitch_unlocks = pandas.read_csv('twitch-unlocks.csv')
%Rpush twitch_unlocks
/home/geza/.local/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.
  res = PandasDataFrame.from_items(items)
In [12]:
print(twitch_unlocks)
        phoneID  unlockNumberForPhone         activity  duration  \
0             0                     1    Census/People      2947   
1             0                     2    Census/People      2543   
2             0                     3    Census/People      1431   
3             0                     4     Census/Dress      5194   
4             0                     5    Census/Energy      6418   
5             0                     6  Census/Activity      7791   
6      11487108                     1     Census/Dress      5800   
7      11487108                     2     Census/Dress      7664   
8      11487108                     3    Census/People      6421   
9      11487108                     4    Census/Energy      6770   
10     26742641                     1    Census/People      1879   
11     26742641                     2     Census/Dress      3680   
12     26742641                     3     Census/Dress       429   
13     26742641                     4  Census/Activity      4884   
14     26742641                     5    Census/People      1211   
15     26742641                     6  Census/Activity      2407   
16     26742641                     7    Census/People      1008   
17     26742641                     8  Census/Activity      3012   
18     26742641                     9     Census/Dress      2058   
19     26742641                    10    Census/People      1889   
20     26742641                    11    Census/Energy      1736   
21     26742641                    12     Census/Dress      1133   
22     26742641                    13  Census/Activity      3617   
23     26742641                    14    Census/Energy      1436   
24     26742641                    15  Census/Activity      5440   
25     26742641                    16    Census/Energy      1752   
26     26742641                    17    Census/People      1264   
27     26742641                    18     Census/Dress       791   
28     26742641                    19  Census/Activity      2523   
29     26742641                    20    Census/Energy      1509   
...         ...                   ...              ...       ...   
10677  fec9b094                   940    Census/Energy      1258   
10678  fec9b094                   941    Census/Energy      1059   
10679  fec9b094                   942    Census/People      1212   
10680  fec9b094                   943    Census/People      1056   
10681  fec9b094                   944    Census/People      1300   
10682  fec9b094                   945    Census/People      1302   
10683  fec9b094                   946    Census/Energy       932   
10684  fec9b094                   947    Census/Energy      1038   
10685  fec9b094                   948    Census/Energy      1299   
10686  fec9b094                   949     Census/Dress      1070   
10687  fec9b094                   950    Census/People      1202   
10688  fec9b094                   951    Census/People      1649   
10689  fec9b094                   952  Census/Activity      5318   
10690  fec9b094                   953  Census/Activity      2794   
10691  fec9b094                   954  Slide to Unlock      4050   
10692  fec9b094                   955  Slide to Unlock      1060   
10693  fec9b094                   956  Slide to Unlock      1073   
10694  fec9b094                   957  Slide to Unlock      1017   
10695  fec9b094                   958  Slide to Unlock      9631   
10696  fec9b094                   959  Slide to Unlock      1164   
10697  fec9b094                   960  Slide to Unlock       983   
10698  fec9b094                   961  Slide to Unlock       754   
10699  fec9b094                   962  Slide to Unlock       928   
10700  fec9b094                   963  Slide to Unlock       776   
10701  fec9b094                   964     Census/Dress      1449   
10702  fec9b094                   965    Census/Energy       988   
10703  fec9b094                   966    Census/Energy      4828   
10704  fec9b094                   967  Census/Activity      3776   
10705  fec9b094                   968    Census/People      1497   
10706  fec9b094                   969    Census/Energy      2795   

       transformedDuration  
0                 0.135723  
1                 0.140820  
2                 0.162589  
3                 0.117794  
4                 0.111725  
5                 0.106439  
6                 0.114589  
7                 0.106877  
8                 0.111712  
9                 0.110243  
10                0.151886  
11                0.128392  
12                0.219728  
13                0.119621  
14                0.169517  
15                0.142768  
16                0.177474  
17                0.134985  
18                0.148470  
19                0.151685  
20                0.154922  
21                0.172362  
22                0.128947  
23                0.162447  
24                0.116439  
25                0.154567  
26                0.167712  
27                0.188563  
28                0.141098  
29                0.160446  
...                    ...  
10677             0.167911  
10678             0.175298  
10679             0.169482  
10680             0.175422  
10681             0.166538  
10682             0.166474  
10683             0.180986  
10684             0.176178  
10685             0.166570  
10686             0.174845  
10687             0.169834  
10688             0.156926  
10689             0.117102  
10690             0.137545  
10691             0.125353  
10692             0.175256  
10693             0.174723  
10694             0.177080  
10695             0.100944  
10696             0.171203  
10697             0.178592  
10698             0.190835  
10699             0.181181  
10700             0.189467  
10701             0.162081  
10702             0.178365  
10703             0.119966  
10704             0.127568  
10705             0.160766  
10706             0.137532  

[10707 rows x 5 columns]
In [11]:
%%R

summary(twitch_unlocks)
   phoneID          unlockNumberForPhone   activity            duration   
 Length:10707       Min.   :  1          Length:10707       Min.   :   7  
 Class :character   1st Qu.: 61          Class :character   1st Qu.:1194  
 Mode  :character   Median :177          Mode  :character   Median :1614  
                    Mean   :246                             Mean   :2141  
                    3rd Qu.:390                             3rd Qu.:2444  
                    Max.   :969                             Max.   :9990  
 transformedDuration
 Min.   :0.1000     
 1st Qu.:0.1422     
 Median :0.1578     
 Mean   :0.1570     
 3rd Qu.:0.1701     
 Max.   :0.6148     
In [13]:
feedme = pandas.read_csv('feedme.csv')
%Rpush feedme
/home/geza/.local/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.
  res = PandasDataFrame.from_items(items)
In [14]:
print(feedme)
   preference
0       aspen
1       aspen
2      sierra
3       aspen
4     neither
5      sierra
6      sierra
7     neither
8     neither
9       aspen
10      aspen
11      aspen
12      aspen
13      aspen
14     sierra
15     sierra
16    neither
17      aspen
18     sierra
19      aspen
20      aspen
21      aspen
22      aspen
23      aspen
24     sierra
25     sierra
26      aspen
27    neither
28      aspen
29      aspen
30     sierra
31      aspen
32      aspen
33      aspen
34      aspen
35      aspen
36     sierra
37      aspen
38      aspen
39      aspen
40      aspen
41      aspen
42      aspen
43      aspen
44      aspen
45      aspen
46     sierra
47      aspen
48    neither
49     sierra
50     sierra
51     sierra
52     sierra
53      aspen
54     sierra
55      aspen
56     sierra
57      aspen
58     sierra
In [15]:
%%R

summary(feedme)
  preference       
 Length:59         
 Class :character  
 Mode  :character  
In [40]:
addition_tasktime = pandas.read_csv('cs376_addition_tasktime.csv')
%Rpush addition_tasktime
/home/geza/.local/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.
  res = PandasDataFrame.from_items(items)
In [32]:
print(addition_tasktime)
    format interrupted  task_time                           user_id
0    macro          no     28.340  7cfe7ac2055c834ca66b3249aec4e298
1    macro         yes     47.008  7cfe7ac2055c834ca66b3249aec4e298
2    micro         yes     85.826  7cfe7ac2055c834ca66b3249aec4e298
3    micro          no     60.230  7cfe7ac2055c834ca66b3249aec4e298
4    micro         yes     85.034  86c6f9262be1745c6734b284f39438c5
5    micro          no     53.706  86c6f9262be1745c6734b284f39438c5
6    macro          no     21.876  86c6f9262be1745c6734b284f39438c5
7    macro         yes     74.751  86c6f9262be1745c6734b284f39438c5
8    macro         yes     18.845  cb587868dac25a9ac892a05dbbebb398
9    macro          no     14.954  cb587868dac25a9ac892a05dbbebb398
10   micro         yes     59.972  cb587868dac25a9ac892a05dbbebb398
11   micro          no     47.353  cb587868dac25a9ac892a05dbbebb398
12   micro          no     59.574  05b2d505137bf545e05481cfaf7f1d65
13   micro         yes    101.354  05b2d505137bf545e05481cfaf7f1d65
14   macro          no     42.251  05b2d505137bf545e05481cfaf7f1d65
15   macro         yes     78.490  05b2d505137bf545e05481cfaf7f1d65
16   macro         yes     74.092  c1a551370024c80e898bd93b90aaef90
17   macro          no     41.945  c1a551370024c80e898bd93b90aaef90
18   micro         yes     71.894  c1a551370024c80e898bd93b90aaef90
19   micro          no     50.858  c1a551370024c80e898bd93b90aaef90
20   micro         yes     77.861  72dccdb1d4d9dff7df279946c8e9146f
21   micro          no     53.272  72dccdb1d4d9dff7df279946c8e9146f
22   macro         yes     52.518  72dccdb1d4d9dff7df279946c8e9146f
23   macro          no     30.884  72dccdb1d4d9dff7df279946c8e9146f
24   micro          no     53.726  b4951c7ff0a492113f974b046ec95447
25   micro         yes     69.894  b4951c7ff0a492113f974b046ec95447
26   macro          no     17.174  b4951c7ff0a492113f974b046ec95447
27   macro         yes     23.338  b4951c7ff0a492113f974b046ec95447
28   micro         yes     84.720  eef3009de60608e9db5a4072a1438a93
29   micro          no     79.620  eef3009de60608e9db5a4072a1438a93
..     ...         ...        ...                               ...
114  micro         yes     72.493  84c3347f152eacc3c7f1bc10f9f15800
115  micro          no     59.038  84c3347f152eacc3c7f1bc10f9f15800
116  micro         yes     80.458  1ff9f1d16a116a8f9fb81e998b8ee827
117  micro          no     72.280  1ff9f1d16a116a8f9fb81e998b8ee827
118  macro         yes     41.279  1ff9f1d16a116a8f9fb81e998b8ee827
119  macro          no     24.512  1ff9f1d16a116a8f9fb81e998b8ee827
120  micro         yes    113.558  3d39466d9ca470239cd686666e7b6355
121  micro          no    151.984  3d39466d9ca470239cd686666e7b6355
122  macro          no     25.831  3d39466d9ca470239cd686666e7b6355
123  macro         yes     27.609  3d39466d9ca470239cd686666e7b6355
124  macro         yes     46.928  aad675e05dbe43b81da44aa3e3cd88aa
125  macro          no     49.508  aad675e05dbe43b81da44aa3e3cd88aa
126  micro         yes     75.757  aad675e05dbe43b81da44aa3e3cd88aa
127  micro          no     81.492  aad675e05dbe43b81da44aa3e3cd88aa
128  micro         yes     87.234  d3f58312639d199e66a0ba1c693747b0
129  micro          no     77.171  d3f58312639d199e66a0ba1c693747b0
130  macro          no     38.722  d3f58312639d199e66a0ba1c693747b0
131  macro         yes     76.463  d3f58312639d199e66a0ba1c693747b0
132  macro         yes     42.844  d7fbbe816936352f7832f9051e157d13
133  macro          no     29.169  d7fbbe816936352f7832f9051e157d13
134  micro          no    101.316  d7fbbe816936352f7832f9051e157d13
135  micro         yes     85.799  d7fbbe816936352f7832f9051e157d13
136  macro         yes     27.372  ddbb5b6dc4abe0b713108507dee6567e
137  macro          no     28.397  ddbb5b6dc4abe0b713108507dee6567e
138  micro          no     42.069  ddbb5b6dc4abe0b713108507dee6567e
139  micro         yes     49.148  ddbb5b6dc4abe0b713108507dee6567e
140  micro         yes    118.270  4e7af0779d0c2e2cf9b6890b303fdb3c
141  micro          no     71.589  4e7af0779d0c2e2cf9b6890b303fdb3c
142  macro          no     30.619  4e7af0779d0c2e2cf9b6890b303fdb3c
143  macro         yes     35.735  4e7af0779d0c2e2cf9b6890b303fdb3c

[144 rows x 4 columns]
In [33]:
%%R

summary(addition_tasktime)
    format          interrupted          task_time        user_id         
 Length:144         Length:144         Min.   : 14.95   Length:144        
 Class :character   Class :character   1st Qu.: 35.49   Class :character  
 Mode  :character   Mode  :character   Median : 55.96   Mode  :character  
                                       Mean   : 63.34                     
                                       3rd Qu.: 79.87                     
                                       Max.   :307.00                     
In [44]:
%%R

# convert all strings into factors

addition_tasktime$format <- as.factor(addition_tasktime$format)
addition_tasktime$interrupted <- as.factor(addition_tasktime$interrupted)
addition_tasktime$user_id <- as.factor(addition_tasktime$user_id)

summary(addition_tasktime)
   format   interrupted   task_time                                  user_id   
 macro:72   no :72      Min.   : 14.95   05b2d505137bf545e05481cfaf7f1d65:  4  
 micro:72   yes:72      1st Qu.: 35.49   07dc9816cffde43991aa691749a88701:  4  
                        Median : 55.96   1371c74673d1acc76499eb02c9ec1eeb:  4  
                        Mean   : 63.34   186e70893de69c7e6f1d75ba7a2f958b:  4  
                        3rd Qu.: 79.87   1ff9f1d16a116a8f9fb81e998b8ee827:  4  
                        Max.   :307.00   2016ee2ba0c7880433730d55240a7faf:  4  
                                         (Other)                         :120  
In [46]:
%%R

# convert all strings into factors

feedme$preference <- as.factor(feedme$preference)

summary(feedme)
   preference
 aspen  :35  
 neither: 6  
 sierra :18  
In [50]:
%%R

twitch_unlocks$phoneID <- as.factor(twitch_unlocks$phoneID)
twitch_unlocks$activity <- as.factor(twitch_unlocks$activity)

summary(twitch_unlocks)
     phoneID     unlockNumberForPhone            activity       duration   
 fec9b094: 969   Min.   :  1          Census/Activity:2379   Min.   :   7  
 205c57cf: 849   1st Qu.: 61          Census/Dress   :2338   1st Qu.:1194  
 af45d2f7: 727   Median :177          Census/Energy  :2363   Median :1614  
 b928327d: 705   Mean   :246          Census/People  :2463   Mean   :2141  
 1dfebf45: 687   3rd Qu.:390          Slide to Unlock:1164   3rd Qu.:2444  
 5d9ab4f5: 669   Max.   :969                                 Max.   :9990  
 (Other) :6101                                                             
 transformedDuration
 Min.   :0.1000     
 1st Qu.:0.1422     
 Median :0.1578     
 Mean   :0.1570     
 3rd Qu.:0.1701     
 Max.   :0.6148     
                    
In [52]:
%%R

summary(listit_notes)
    user_id          length          n_words           n_urls       
 Min.   : 6.00   Min.   :   0.0   Min.   :  0.00   Min.   : 0.0000  
 1st Qu.:38.00   1st Qu.:  24.0   1st Qu.:  4.00   1st Qu.: 0.0000  
 Median :46.00   Median :  55.0   Median :  8.00   Median : 0.0000  
 Mean   :48.31   Mean   : 103.3   Mean   : 16.01   Mean   : 0.1975  
 3rd Qu.:60.00   3rd Qu.: 132.0   3rd Qu.: 20.00   3rd Qu.: 0.0000  
 Max.   :96.00   Max.   :1924.0   Max.   :322.00   Max.   :36.0000  
    lifetime       lifetime_under_one_day
 Min.   :      2   Mode :logical         
 1st Qu.:1462458   FALSE:1433            
 Median :1757590   TRUE :192             
 Mean   :1587482                         
 3rd Qu.:2073181                         
 Max.   :7162105                         
In [54]:
%%R

library(ggplot2)
library(dplyr)
/home/geza/.local/lib/python3.6/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: 
载入程辑包:‘dplyr’


  warnings.warn(x, RRuntimeWarning)
/home/geza/.local/lib/python3.6/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: The following objects are masked from ‘package:stats’:

    filter, lag


  warnings.warn(x, RRuntimeWarning)
/home/geza/.local/lib/python3.6/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


  warnings.warn(x, RRuntimeWarning)
In [55]:
%%R

temp = table(feedme$preference)
temp = summarize(group_by(feedme, preference), Freq = n() )
temp = as.data.frame(temp)
ggplot(data = temp, aes(x=preference, y= Freq)) + geom_bar(stat="identity")
In [56]:
%%R

chisq.test(table(feedme$preference))
	Chi-squared test for given probabilities

data:  table(feedme$preference)
X-squared = 21.593, df = 2, p-value = 2.047e-05

In [59]:
%%R

fmdfopin = subset(feedme, preference != "neither")
fmdfopin$preference = factor(fmdfopin$preference)  ##this gets rid of "neither" as a factor
chisq.test(table(fmdfopin$preference))
	Chi-squared test for given probabilities

data:  table(fmdfopin$preference)
X-squared = 5.4528, df = 1, p-value = 0.01954

In [60]:
%%R

ggplot(twitch_unlocks, aes(duration)) + geom_histogram() + facet_wrap(~activity)
/home/geza/.local/lib/python3.6/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

  warnings.warn(x, RRuntimeWarning)
In [61]:
%%R

ggplot(twitch_unlocks, aes(transformedDuration)) + geom_histogram() + facet_wrap(~activity)
In [62]:
%%R

summarize(group_by(twitch_unlocks, activity), round(mean(transformedDuration), 3))
# A tibble: 5 x 2
  activity        `round(mean(transformedDuration), 3)`
  <fct>                                           <dbl>
1 Census/Activity                                 0.151
2 Census/Dress                                    0.16 
3 Census/Energy                                   0.157
4 Census/People                                   0.158
5 Slide to Unlock                                 0.16 
In [63]:
%%R

anova <- aov(transformedDuration ~ activity, data=twitch_unlocks)
summary(anova)
               Df Sum Sq Mean Sq F value Pr(>F)    
activity        4  0.131 0.03279   38.84 <2e-16 ***
Residuals   10702  9.036 0.00084                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [64]:
%%R

TukeyHSD(anova)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = transformedDuration ~ activity, data = twitch_unlocks)

$activity
                                         diff           lwr           upr
Census/Dress-Census/Activity     0.0094311585  0.0071225681  0.0117397488
Census/Energy-Census/Activity    0.0055919412  0.0032895183  0.0078943642
Census/People-Census/Activity    0.0074696116  0.0051907583  0.0097484650
Slide to Unlock-Census/Activity  0.0092487906  0.0064131886  0.0120843926
Census/Energy-Census/Dress      -0.0038392172 -0.0061516783 -0.0015267562
Census/People-Census/Dress      -0.0019615469 -0.0042505417  0.0003274479
Slide to Unlock-Census/Dress    -0.0001823679 -0.0030261265  0.0026613908
Census/People-Census/Energy      0.0018776704 -0.0004051041  0.0041604448
Slide to Unlock-Census/Energy    0.0036568494  0.0008180952  0.0064956036
Slide to Unlock-Census/People    0.0017791790 -0.0010404924  0.0045988503
                                    p adj
Census/Dress-Census/Activity    0.0000000
Census/Energy-Census/Activity   0.0000000
Census/People-Census/Activity   0.0000000
Slide to Unlock-Census/Activity 0.0000000
Census/Energy-Census/Dress      0.0000586
Census/People-Census/Dress      0.1330408
Slide to Unlock-Census/Dress    0.9997907
Census/People-Census/Energy     0.1636581
Slide to Unlock-Census/Energy   0.0040438
Slide to Unlock-Census/People   0.4206493

In [65]:
%%R

anova2 <- aov(transformedDuration ~ activity * unlockNumberForPhone, data = twitch_unlocks)
summary(anova2)
                                 Df Sum Sq Mean Sq F value  Pr(>F)    
activity                          4  0.131 0.03279  39.578 < 2e-16 ***
unlockNumberForPhone              1  0.161 0.16063 193.876 < 2e-16 ***
activity:unlockNumberForPhone     4  0.012 0.00311   3.755 0.00468 ** 
Residuals                     10697  8.863 0.00083                    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [67]:
%%R

qplot(listit_notes$length)
In [69]:
%%R

# remove outliers, ie notes of length greater than 500
listitfiltered = filter(listit_notes, length < 501)

##  Bartlett tests to see if the variances are the same.
##  H0:   null that the variances in each of the groups (samples) are the same.
##  HA:  they are not
##  What are the chances the true variances are the same given the data we have?
bartlett.test(listitfiltered, length ~ lifetime_under_one_day)
	Bartlett test of homogeneity of variances

data:  listitfiltered
Bartlett's K-squared = 172970, df = 5, p-value < 2.2e-16

In [70]:
%%R

##  p-value < 2.2e-16

## H0:  There is no difference in the length of notes that live less than a day than those that do not.
## HA:  There is a difference.
## What are the chances that the true length of the notes are the same given the data we have?

t.test(length ~ lifetime_under_one_day, data= listitfiltered, var.equal = F)
	Welch Two Sample t-test

data:  length by lifetime_under_one_day
t = 8.4264, df = 268.5, p-value = 2.192e-15
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 40.55129 65.27857
sample estimates:
mean in group FALSE  mean in group TRUE 
           96.24305            43.32812 

In [72]:
%%R

table(addition_tasktime$format, addition_tasktime$interrupted)
       
        no yes
  macro 36  36
  micro 36  36
In [73]:
%%R

summarize(group_by(addition_tasktime, format, interrupted), mean = round(mean(task_time), 2), sd = round(sd(task_time), 2))
# A tibble: 4 x 4
# Groups:   format [?]
  format interrupted  mean    sd
  <fct>  <fct>       <dbl> <dbl>
1 macro  no           32.6  9.02
2 macro  yes          47.3 18.2 
3 micro  no           87.3 54.4 
4 micro  yes          86.2 21.5 
In [74]:
%%R

ggplot(addition_tasktime, aes(factor(format), task_time)) + geom_boxplot() + facet_wrap(~ interrupted)
In [75]:
%%R

ggplot(addition_tasktime, aes(task_time)) + geom_histogram() + facet_grid(format~interrupted)
In [78]:
%%R

## Ooh, some outliers in the micro/no cell.  Since they're all in one cell, they should be inspected
## individually and see if something interesting is happening there.


filter(addition_tasktime, task_time > 149)
  format interrupted task_time                          user_id
1  micro          no   306.999 07dc9816cffde43991aa691749a88701
2  micro          no   254.048 7822f0f71628f8517df838a7896cb9fd
3  micro          no   151.984 3d39466d9ca470239cd686666e7b6355
In [80]:
%%R

anova <- aov(addition_tasktime$task_time ~ addition_tasktime$format * addition_tasktime$interrupted         ## this is the basic anova
             + Error(addition_tasktime$user_id / (addition_tasktime$format * addition_tasktime$interrupted)))  ## this is the repeated measures

summary(anova)
Error: addition_tasktime$user_id
          Df Sum Sq Mean Sq F value Pr(>F)
Residuals 35  51544    1473               

Error: addition_tasktime$user_id:addition_tasktime$format
                         Df Sum Sq Mean Sq F value   Pr(>F)    
addition_tasktime$format  1  78841   78841   62.28 2.81e-09 ***
Residuals                35  44308    1266                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: addition_tasktime$user_id:addition_tasktime$interrupted
                              Df Sum Sq Mean Sq F value Pr(>F)
addition_tasktime$interrupted  1   1659  1659.3   2.691   0.11
Residuals                     35  21583   616.7               

Error: addition_tasktime$user_id:addition_tasktime$format:addition_tasktime$interrupted
                                                       Df Sum Sq Mean Sq
addition_tasktime$format:addition_tasktime$interrupted  1   2223  2222.9
Residuals                                              35  16824   480.7
                                                       F value Pr(>F)  
addition_tasktime$format:addition_tasktime$interrupted   4.624 0.0385 *
Residuals                                                              
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [81]:
%%R

## How to interpret:

## This code snippet show that there is a main effect of task format

## Error: mmtask$user_id:mmtask$format
##                Df Sum Sq Mean Sq F value   Pr(>F)    
##  mmtask$format  1  78841   78841   62.28 2.81e-09 ***
##  Residuals     35  44308    1266  

##  This shows that being interrupted on it's own is not significant

##  Error: mmtask$user_id:mmtask$interrupted
##                    Df Sum Sq Mean Sq F value Pr(>F)
## mmtask$interrupted  1   1659  1659.3   2.691   0.11
## Residuals          35  21583   616.7  

## This shows that there is an interaction effect between the format
## and being interrupted

## Error: mmtask$user_id:mmtask$format:mmtask$interrupted
##                                   Df Sum Sq Mean Sq F value Pr(>F)  
##  mmtask$format:mmtask$interrupted  1   2223  2222.9   4.624 0.0385 *
##  Residuals                        35  16824   480.7     


## But maybe those outliers are driving the interaction effect, so
## we need to remove the three people with outlying observations from the analysis

temp = filter(addition_tasktime, task_time > 149) %>%
        select(user_id)

mmtaskno = filter(addition_tasktime, !(user_id %in% temp$user_id))

table(mmtaskno$format, mmtaskno$interrupted)
       
        no yes
  macro 33  33
  micro 33  33
In [82]:
%%R

ggplot(mmtaskno, aes(task_time)) + geom_histogram() + facet_grid(format~interrupted)
In [83]:
%%R

anova2 <- aov(mmtaskno$task_time ~ mmtaskno$format * mmtaskno$interrupted         ## this is the basic anova
             + Error(mmtaskno$user_id / (mmtaskno$format * mmtaskno$interrupted))) 

summary(anova2)
Error: mmtaskno$user_id
          Df Sum Sq Mean Sq F value Pr(>F)
Residuals 32  23438   732.4               

Error: mmtaskno$user_id:mmtaskno$format
                Df Sum Sq Mean Sq F value   Pr(>F)    
mmtaskno$format  1  48275   48275   116.5 3.38e-12 ***
Residuals       32  13259     414                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: mmtaskno$user_id:mmtaskno$interrupted
                     Df Sum Sq Mean Sq F value   Pr(>F)    
mmtaskno$interrupted  1   5416    5416   37.19 8.16e-07 ***
Residuals            32   4660     146                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: mmtaskno$user_id:mmtaskno$format:mmtaskno$interrupted
                                     Df Sum Sq Mean Sq F value Pr(>F)
mmtaskno$format:mmtaskno$interrupted  1    243  242.98   2.582  0.118
Residuals                            32   3012   94.12               
In [84]:
## this makes more sense.  The interpretation, once removing outliers, is that 
## there is a main effect of task format and being interrupted.  The interaction
## of the two is marginal.  This is why it's important to check for outliers first!