d=read.csv("rte.anno.csv")
names(d)
d$X.hypothesis
d$X.hypothesis=="Yes"
names(d)
cor(d$X.hypothesis=="Yes", d$value)
d$value
cor(d$X.hypothesis=="Yes", d$value)
names(d)
by(d$unit_id, nrow)
by(d$unit_id, nrow)
by(d$unit_id, d, nrow)
source("~/analysis/util.R")
source("~/analysis/main.R")
source("~/searchrel/util.R")
source("~/searchrel/R/util.R")
by(d$unit_id, d, nrow)
dfagg(d$unit_id, d, nrow)
dfagg(d, d$unit_id, nrow)
table(dfagg(d, d$unit_id, nrow))
cor(d$X.hypothesis=="Yes", d$value)
d$j = d$X.hypothesis=="Yes"
cor(d$j, d$value)
mean(d$j == d$value)
table(d$j)
table(d$j)[['FALSE']]
kdfagg(d, d$unit_id, function(x) { list(num_votes=nrow(x), num_yes=table(d$j)[['TRUE']], num_no=table(d$j)[['FALSE']])} )
dfagg(d, d$unit_id, function(x) { list(num_votes=nrow(x), num_yes=table(d$j)[['TRUE']], num_no=table(d$j)[['FALSE']])} )
u=dfagg(d, d$unit_id, function(x) { list(num_votes=nrow(x), num_yes=table(d$j)[['TRUE']], num_no=table(d$j)[['FALSE']])} )
u[u$num_vote>=3,]
u=dfagg(d, d$unit_id, function(x) { list(num_votes=nrow(x), num_yes=table(x$j)[['TRUE']], num_no=table(x$j)[['FALSE']])} )
d$j
u=dfagg(d, d$unit_id, function(x) { list(num_votes=nrow(x), num_yes=table(x$j)[['TRUE']], num_no=table(x$j)[['FALSE']])} )
d$unit_id
u=dfagg(d, d$unit_id, function(x) { list(num_votes=nrow(x), num_yes=table(x$j)[['TRUE']], num_no=table(x$j)[['FALSE']])} )
u=dfagg(d, d$unit_id, function(x) { list(num_votes=nrow(x), num_yes=sum(x$j), num_no=sum(!x$j) })
u=dfagg(d, d$unit_id, function(x) { list(num_votes=nrow(x), num_yes=sum(x$j), num_no=sum(!x$j)) })
u
u[u$num_vote>=3,]
u=dfagg(d, d$unit_id, function(x) { list(num_votes=nrow(x), num_yes=sum(x$j), num_no=sum(!x$j)) })
head(u)
head(u[u$num_votes>=3,])
real=dfagg(d,d$unit_id, function(x) x[1,'value'])
real
names(real)
row.names(u)
names(real) == row.names(u)
cbind(u,real)
u$num_yes > u$num_no
u$decision = NA
u$decision[u$num_yes > u$num_no] = TRUE
u$decision[u$num_yes < u$num_no] = FALSE
u$decision
table(u$decision)
u$decision == u$value
u$decision == u$real
names(u)
u$decision == real
mean(u$decision == real, na.rm=T)
savehistory()
names(d)
source("~/searchrel/R/util.R")
excel(d)
u=dfagg(d, d$unit_id, function(x) { list(num_votes=nrow(x), num_yes=sum(x$j), num_no=sum(!x$j)) })
real=dfagg(d,d$unit_id, function(x) x[1,'value'])
real
cbind(u,real)
u$decision = NA
u$decision[u$num_yes > u$num_no] = TRUE
u$decision[u$num_yes < u$num_no] = FALSE
d$j
d$j == d$value
mean(d$j == d$value)
mean(u$decision == real, na.rm=T)
length(u$decision == real, na.rm=T)
u$decision == real
!is.na(u$decision == real)
length(!is.na(u$decision == real))
nrow(u)
u$decision
real
length(real)
nrow(u)
u$num_votes>=3
sum(u$num_votes>=3)
u$decision
!is.na(u$decision)
is.na(u$decision)
sum(is.na(u$decision))
sum(1is.na(u$decision))
sum(!is.na(u$decision))
excel(d[order(d$unit_id),])
names(d)
mean(u$decision == real, na.rm=T)
mean(u$decision == real, na.rm=T)
mean(u$decision == real, na.rm=T)
gt3=u$num_votes>=3
mean(u$decision[gt3] == real[gt3], na.rm=T)
table(dfagg(d, d$unit_id, nrow))
table(by(d, d$unit_id, nrow))
gt3=u$num_votes>=3
76/80
