-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWEBSEARCH-EXERCISE.R
158 lines (129 loc) · 7.24 KB
/
WEBSEARCH-EXERCISE.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
websearch2 = read.csv("//Users/ritaraher/Documents/Coursera/untitled folder/experiments/websearch2.csv")
View(websearch2)
websearch2$Subject = factor(websearch2$Subject) #convert to nominal factor
websearch2$Order = factor(websearch2$Order)
summary(websearch2)
# view descriptive statistics by Technique
library(plyr)
ddply(websearch2, ~ Engine, function(data) summary(data$Searches))
ddply(websearch2, ~ Engine, summarise, Searches=mean(Searches), Searches.sd=sd(Searches))
# graph histograms and boxplot
hist(websearch2[websearch2$Engine == "Bing",]$Searches)
hist(websearch2[websearch2$Engine == "Google",]$Searches)
plot(Searches ~ Engine, data=websearch2) # boxplot
# test anova assumptions
shapiro.test(websearch2[websearch2$Engine == "Bing",]$Searches) # Shapiro-Wilk
shapiro.test(websearch2[websearch2$Engine == "Google",]$Searches)
# now test for an order effect -- did counterbalancing work?
library(reshape2)
# for a paired-samples t-test we must use a wide-format table; most
# R fns do not require a wide-format table, but the dcast function
# offers a quick way to translate long-format into wide-format when
# we need it.
websearch2.wide.order = dcast(websearch2, Subject ~ Order, value.var="Searches") # go wide
View(websearch2.wide.order) # verify
t.test(websearch2.wide.order$"1", websearch2.wide.order$"2", paired=TRUE, var.equal=TRUE)
# finally, the paired-samples t-test
websearch2.wide.tech = dcast(websearch2, Subject ~ Engine, value.var="Searches") # go wide
View(websearch2.wide.tech)
t.test(websearch2.wide.tech$Bing, websearch2.wide.tech$Google, paired=TRUE, var.equal=TRUE)
plot(Searches ~ Engine, data=websearch2) # confirm
# explore the Errors response; error counts are often Poisson
library(plyr)
ddply(websearch2, ~ Engine, function(data) summary(data$Effort))
ddply(websearch2, ~ Engine, summarise, Effort.mean=mean(Effort), Effort.sd=sd(Effort))
hist(websearch2[websearch2$Engine == "Bing",]$Effort) # histogram
hist(websearch2[websearch2$Engine == "Google",]$Effort) # histogram
plot(Effort ~ Engine, data=websearch2) # boxplot
# our response is ordinal within-Ss, so use nonparametric Wilcoxon signed-rank
library(coin)
wilcoxsign_test(Effort ~ Engine | Subject, data=websearch2, distribution="exact")
websearch3 = read.csv("//Users/ritaraher/Documents/Coursera/untitled folder/experiments/websearch3.csv")
View(websearch3)
websearch3$Subject = factor(websearch3$Subject) #convert to nominal factor
websearch3$Order = factor(websearch3$Order)
summary(websearch3)
# view descriptive statistics by Technique
library(plyr)
ddply(websearch3, ~ Engine, function(data) summary(data$Searches))
ddply(websearch3, ~ Engine, summarise, Searches=mean(Searches), Searches.sd=sd(Searches))
# graph histograms and boxplot
hist(websearch3[websearch3$Engine == "Bing",]$Searches)
hist(websearch3[websearch3$Engine == "Google",]$Searches)
hist(websearch3[websearch3$Engine == "Yahoo",]$Searches)
plot(Searches ~ Engine, data=websearch3) # boxplot
# repeated measures ANOVA
library(ez)
# ez lets us specify the dependent variable (Time), within-Ss
# variables (Technique), and the variable that identifies
# subjects (Subject).
m = ezANOVA(dv=Searches, within=Order, wid=Subject, data=websearch3)
# we then check the model for violations of sphericity. Sphericity is
# the situation where the variances of the differences between all
# combinations of levels of a within-Ss factor are equal. It always
# holds for within-Ss factors that have just 2 levels, but for 3+
# levels, sphericity can be tested with Mauchly's Test of Sphericity.
m$Mauchly # p<.05 indicates a violation
# if no violation, examine the uncorrected ANOVA in m$ANOVA.
# if violation, instead look at m$Sphericity and use the
# Greenhouse-Geisser correction, GGe.
m$ANOVA
# include the corrected DFs for each corrected effect
pos = match(m$`Sphericity Corrections`$Effect, m$ANOVA$Effect) # positions of within-Ss efx in m$ANOVA
m$Sphericity$GGe.DFn = m$Sphericity$GGe * m$ANOVA$DFn[pos] # Greenhouse-Geisser
m$Sphericity$GGe.DFd = m$Sphericity$GGe * m$ANOVA$DFd[pos]
m$Sphericity$HFe.DFn = m$Sphericity$HFe * m$ANOVA$DFn[pos] # Huynh-Feldt
m$Sphericity$HFe.DFd = m$Sphericity$HFe * m$ANOVA$DFd[pos]
m$Sphericity # show results
# repeated measures ANOVA
library(ez)
# ez lets us specify the dependent variable (Time), within-Ss
# variables (Technique), and the variable that identifies
# subjects (Subject).
m = ezANOVA(dv=Searches, within=Engine, wid=Subject, data=websearch3)
# we then check the model for violations of sphericity. Sphericity is
# the situation where the variances of the differences between all
# combinations of levels of a within-Ss factor are equal. It always
# holds for within-Ss factors that have just 2 levels, but for 3+
# levels, sphericity can be tested with Mauchly's Test of Sphericity.
m$Mauchly # p<.05 indicates a violation
# if no violation, examine the uncorrected ANOVA in m$ANOVA.
# if violation, instead look at m$Sphericity and use the
# Greenhouse-Geisser correction, GGe.
m$ANOVA
# include the corrected DFs for each corrected effect
pos = match(m$`Sphericity Corrections`$Effect, m$ANOVA$Effect) # positions of within-Ss efx in m$ANOVA
m$Sphericity$GGe.DFn = m$Sphericity$GGe * m$ANOVA$DFn[pos] # Greenhouse-Geisser
m$Sphericity$GGe.DFd = m$Sphericity$GGe * m$ANOVA$DFd[pos]
m$Sphericity$HFe.DFn = m$Sphericity$HFe * m$ANOVA$DFn[pos] # Huynh-Feldt
m$Sphericity$HFe.DFd = m$Sphericity$HFe * m$ANOVA$DFd[pos]
m$Sphericity # show results
# manual post hoc pairwise comparisons with paired-samples t-tests
library(reshape2)
websearch3.wide.tech = dcast(websearch3, Subject ~ Engine, value.var="Searches") # go wide
View(websearch3.wide.tech)
se.sc = t.test(websearch3.wide.tech$Yahoo, websearch3.wide.tech$Bing, paired=TRUE)
se.vc = t.test(websearch3.wide.tech$Google, websearch3.wide.tech$Yahoo, paired=TRUE)
sc.vc = t.test(websearch3.wide.tech$Bing, websearch3.wide.tech$Google, paired=TRUE)
p.adjust(c(se.sc$p.value, se.vc$p.value, sc.vc$p.value), method="holm")
# first, examine Errors for 3 techniques
library(plyr)
ddply(websearch3, ~ Engine, function(data) summary(data$Effort))
ddply(websearch3, ~ Engine, summarise, Effort.mean=mean(Effort), Errors.sd=sd(Effort))
hist(websearch3[websearch3$Engine == "Bing",]$Effort)
hist(websearch3[websearch3$Engine == "Google",]$Effort)
hist(websearch3[websearch3$Engine == "Yahoo",]$Effort) # new one
plot(Effort ~ Engine, data=websearch3) # boxplot
# are the Voice error counts possibly Poisson distributed
# as they seemed for Scroll and Search?
library(fitdistrplus)
fit = fitdist(websearch3[websearch3$Engine == "Voice",]$Errors, "pois", discrete=TRUE)
gofstat(fit) # goodness-of-fit test
# Friedman test on Errors
library(coin)
friedman_test(Effort ~ Engine | Subject, data=websearch3, distribution="asymptotic")
# manual post hoc Wilcoxon signed-rank test multiple comparisons
se.sc = wilcox.test(websearch3[websearch3$Engine == "Bing",]$Effort, websearch3[websearch3$Engine == "Yahoo",]$Effort, paired=TRUE, exact=FALSE)
se.vc = wilcox.test(websearch3[websearch3$Engine == "Google",]$Effort, websearch3[websearch3$Engine == "Yahoo",]$Effort, paired=TRUE, exact=FALSE)
sc.vc = wilcox.test(websearch3[websearch3$Engine == "Yahoo",]$Effort, websearch3[websearch3$Engine == "Bing",]$Effort, paired=TRUE, exact=FALSE)
p.adjust(c(se.sc$p.value, se.vc$p.value, sc.vc$p.value), method="holm")