전산통계학 9차 과제 - 싸인테스트, factorial experiments, 카테고리칼데이터
코딩 공부/R-전산 통계학 2020. 2. 13. 19:121번) "Sign Test' 의 p-value 를 구하는 함수를 만들 것인데, 단 Q* = Q- = #(the number) of 'data < M0' 으로 설정하세요,
그리고, 지난 8차 과제의 "DATA <- c(35,65,48,40,70,50,58,36,47,41,49,39,34,33,31), Mu zero=55" 에서 "Equal DATA 인 c(55,55,55,55,55)" 를 추가하세요
library(DescTools)
ADDDATA <- c(35,65,48,40,70,50,58,36,47,41,49,39,34,33,31
,55,55,55,55,55)
sm <- function(x, muzero, side) {
equal=0 ; low=0 ; high=0
for (i in 1: length(x)) {
if(x[i]==muzero) {equal=equal+1}
else if (x[i]<muzero) {low=low+1}
else {high= high+1}
}
qstar.a <- low ; qstar.c <- high
qstar.b <- low+equal ; qstar.d <- high+equal
if (side=="two.sided") {
if(low<high) {pvalue <- min((1- pbinom(qstar.b-1, length(x),0.5))*2, pbinom(qstar.b, length(x),0.5)*2)}
else {pvalue <- min((1- pbinom(qstar.a-1, length(x),0.5))*2, pbinom(qstar.a, length(x),0.5)*2)}
}
if (side=="less")
{pvalue <- 1-pbinom(qstar.a - 1, length(x), 0.5)}
else if (side=="greater")
{pvalue <- pbinom(qstar.b , length(x), 0.5)}
cat("\n",side ,"of Sign-Test","\np-value =",pvalue)
}
sm(ADDDATA, 55, "two.sided")
sm(ADDDATA, 55, "less")
sm(ADDDATA, 55, "greater")
이 식을, 이 알고리즘을 이해하기 위해서는 학부생들의 'Handling Zeros' 에 대해서 엄밀하고 자세히 알고 계셔야 하기 때문에 다음과 같은 요약사진을 사진으로 올리도록 하겠습니다
2번) 'Factorial experiments' 에 대해서 다룰 것인데, 프로그램의 결과값으로 '1-interaction 의 유무, 2-테이블(T.S & p-value), 3-테스트의 결론' 이 나오게끔 만들어 주세요 (아래 사진은 필자의 결과값 예시입니다)
data <- matrix(c(40,36,43,36,34,29,
39,36,33,32,26,25,
32,34,29,26,23,24,
33,27,25,20,22,18),6)
A <- gl(4,6)
B <- factor (rep (1:2,each=3, len=24))
C <- gl(8,3)
fac <- function (data, A, B, C, alpha) {
n <- length(data)/ length( levels(C) ) ; a <- length( levels(A) ) ; b <- length( levels(B) )
dftrt <- a*b-1 ; dfa <- a-1 ; dfb <- b-1 ; dfab <- (a-1)*(b-1) ; dferror <- a*b*(n-1)
t... <- sum(data) ; tij. <- matrix(tapply(data, C ,sum),2) ; ti.. <- tapply(data, A, sum) ; t.j. <- tapply(data, B, sum)
sstrt <- sum(tij.^2/n) - t...^2/(a*b*n) ; ssa <- sum(ti..^2/(b*n)) - t...^2/(a*b*n)
ssb <- sum(t.j.^2/(a*n)) - t...^2/(a*b*n) ; ssab <- sstrt-ssa-ssb ; sstot <- sum(data^2) -t...^2/(a*b*n)
sserror <- sstot-sstrt
mstrt <- sstrt/dftrt ; msa <- ssa/dfa ; msb <- ssb/dfb ; msab <- ssab/dfab ; mserror <- sserror/dferror
f1 <- mstrt/mserror
f2 <- msa/mserror
f3 <- msb/mserror
f4 <- msab/mserror
# interaction Test
inter.pvalue <- pf (f4, dfab, dferror, lower.tail = F)
inter.table <- matrix(c(dftrt, dfa, dfb, dfab, dferror, NA,
sstrt, ssa, ssb, ssab ,sserror, sstot,
mstrt, msa, msb, msab ,mserror, NA
,NA, NA, NA, f4, NA, NA ), nrow = 6, ncol = 4, dimnames = list(c("Treatment","A","B","A*B","Error","Total"), c("D.F", "S.S", "M.S", "F*")) )
# accept H0 = no interaction
if (inter.pvalue > alpha) {
test1p <- pf (f2, dfa, dferror, lower.tail = F)
test2p <- pf (f3, dfb, dferror, lower.tail = F)
ifelse (test1p < alpha, answer1 <- "Test 1 is reject H0", "Test 1 is accept H0")
ifelse (test2p < alpha, answer2 <- "Test 2 is reject H0", "Test 2 is accept H0")
Ans <- c(answer1, answer2)
# Table
table <- matrix(c(dftrt, dfa, dfb, dfab, dferror, NA,
sstrt, ssa, ssb, ssab ,sserror, sstot,
mstrt, msa, msb, msab ,mserror, NA
,f1, f2, f3, NA, NA, NA ), nrow = 6, ncol = 4, dimnames = list(c("Treatment","A","B","A*B","Error","Total"), c("D.F", "S.S", "M.S", "F*")) )
result <- list(table, c(f2,f3), c(test1p, test2p), Ans)
names(result) <- c("Main effect Table (Nonexistente interaction )","F*","p-value","Answer of Testing")
}
# reject H0 = exist interaction
if (inter.pvalue < alpha) {
N <- length(data)
oneway <- gl(8,3)
k <- nlevels(oneway)
odftrt <- k-1 ; odferror <- N-k
ot.. <- sum(data)
oti. <- tapply(data, oneway, sum)
osstot <- sum(data^2) - (ot..^2/N)
osstrt <- sum(oti.^2)/n -(ot..^2/N)
osserror <- osstot-osstrt
omstrt <- osstrt/odftrt ; omserror <- osserror/odferror
of <- omstrt/omserror
op <- pf(of ,k-1, N-k, lower.tail = F)
ifelse (op < alpha, answer3 <- "reject H0", "accept H0")
Ans <- c(answer3)
# Table
table <- matrix(c(k-1, N-k, N-1,
osstrt, osserror, osstot,
omstrt, omserror, NA
,of,NA ,NA ), nrow = 3, ncol = 4, dimnames = list(c("Treatment","Error","Total"), c("D.F", "S.S", "M.S", "F*")) )
result <- list(inter.table,table,of,op,Ans)
names(result) <- c("Interaction Table","One-way Table (Exist interaction)","F*","p-value","Answer of Testing")
}
result
}
fac(data,A,B,C,0.05)
3번) 주어진 데이터를 각 factor 에 대해서 차이에 대한 결론을 그래프로써 확인할 수 있도록 시각화 시켜주세요 (그러나, 팩터를 분석할 때 4방법 중에 한 가지가 아닌 Bonferroni,Duncan,Scheffe,Tukey 모든 것을 함수의 변수로써 이용할 수 있게끔 해주세요)
아래 사진은 필자의 결과값 예시입니다
library(agricolae)
data <- c(23,21,22,
16,18,15,
14,13,16,
20,22,19,
14,13,12,
12,11,10,
21,20,19,
13,12,12,
11,13,12)
A <- gl(3,3,27)
B <- factor(rep(1:3, each=9, len=27))
C <- gl(9,3)
sm <- function (data, A,B,C, alpha ,way) {
model <- aov(data~A*B)
inter.pvalue <- unlist(summary(model)[[1]][[5]][[3]])
# BONFERRONI
if (way=="B") {
# reject H0 = exist interaction
if (inter.pvalue < alpha) {
oneway.p <- unlist(summary(aov(data~C))[[1]][[5]][[1]])
if (oneway.p < alpha) {
model1 <- aov(data~C)
reason <- LSD.test(model1, "C", alpha = alpha, p.adj = "bonferroni",group = TRUE)
final1 <- plot(reason, variation = "IQR", main = "B : reject H0 = exist interaction",sub = "One-way ANOVA")
}
else {final1 <- "B : Can't Find differ from factor C"}
final2 <- "Not applicable"
}
# accept H0 = no interaction
else {A.p <- unlist(summary(model)[[1]][[5]][[1]])
B.p <- unlist(summary(model)[[1]][[5]][[2]])
if (A.p <alpha) {
reason1 <- LSD.test(model, "A", alpha = alpha, p.adj = "bonferroni",group = TRUE)
final1 <- plot(reason1, variation = "IQR", main = "B : accept H0 = no interaction", sub = "A Factor")
}
else if (A.p >alpha) {final1 <- "B : Can't Find differ from factor A"}
if (B.p <alpha) {
reason2 <- LSD.test(model, "B", alpha = alpha, p.adj = "bonferroni",group = TRUE)
final2 <- plot(reason2, variation = "IQR", main = "B : accept H0 = no interaction", sub = "B Factor")
}
else if (B.p >alpha) {final2 <- "B : Can't Find differ from factor B"}
}
}
# DUNCAN
else if (way=="D") {
# reject H0 = exist interaction
if (inter.pvalue < alpha) {
oneway.p <- unlist(summary(aov(data~C))[[1]][[5]][[1]])
if (oneway.p < alpha) {
model1 <- aov(data~C)
reason <- duncan.test(model1, "C", alpha = alpha)
final1 <- plot(reason, variation = "IQR", main = "D : reject H0 = exist interaction",sub = "One-way ANOVA")
}
else {final1 <- "D : Can't Find differ from factor C"}
final2 <- "Not applicable"
}
# accept H0 = no interaction
else {A.p <- unlist(summary(model)[[1]][[5]][[1]])
B.p <- unlist(summary(model)[[1]][[5]][[2]])
if (A.p <alpha) {
reason1 <- duncan.test(model, "A", alpha = alpha)
final1 <- plot(reason1, variation = "IQR", main = "D : accept H0 = no interaction", sub = "A Factor")
}
else if (A.p >alpha) {final1 <- "D : Can't Find differ from factor A"}
if (B.p <alpha) {
reason2 <- duncan.test(model, "B", alpha = alpha)
final2 <- plot(reason2, variation = "IQR", main = "D : accept H0 = no interaction", sub = "B Factor")
}
else if (B.p >alpha) {final2 <- "D : Can't Find differ from factor B"}
}
}
# SCHEFFE
else if (way=="S") {
# reject H0 = exist interaction
if (inter.pvalue < alpha) {
oneway.p <- unlist(summary(aov(data~C))[[1]][[5]][[1]])
if (oneway.p < alpha) {
model1 <- aov(data~C)
reason <- scheffe.test(model1, "C", alpha = alpha)
final1 <- plot(reason, variation = "IQR", main = "S : reject H0 = exist interaction",sub = "One-way ANOVA")
}
else {final1 <- "S : Can't Find differ from factor C"}
final2 <- "Not applicable"
}
# accept H0 = no interaction
else {A.p <- unlist(summary(model)[[1]][[5]][[1]])
B.p <- unlist(summary(model)[[1]][[5]][[2]])
if (A.p <alpha) {
reason1 <- scheffe.test(model, "A", alpha = alpha)
final1 <- plot(reason1, variation = "IQR", main = "S : accept H0 = no interaction", sub = "A Factor")
}
else if (A.p >alpha) {final1 <- "S : Can't Find differ from factor A"}
if (B.p <alpha) {
reason2 <- scheffe.test(model, "B", alpha = alpha)
final2 <- plot(reason2, variation = "IQR", main = "S : accept H0 = no interaction", sub = "B Factor")
}
else if (B.p >alpha) {final2 <- "S : Can't Find differ from factor B"}
}
}
# TUKEY
else if (way=="T") {
# reject H0 = exist interaction
if (inter.pvalue < alpha) {
oneway.p <- unlist(summary(aov(data~C))[[1]][[5]][[1]])
if (oneway.p < alpha) {
model1 <- aov(data~C)
reason <- HSD.test(model1, "C", alpha = alpha)
final1 <- plot(reason, variation = "IQR", main = "T : reject H0 = exist interaction",sub = "One-way ANOVA")
}
else {final1 <- "T : Can't Find differ from factor C"}
final2 <- "Not applicable"
}
# accept H0 = no interaction
else {A.p <- unlist(summary(model)[[1]][[5]][[1]])
B.p <- unlist(summary(model)[[1]][[5]][[2]])
if (A.p <alpha) {
reason1 <- HSD.test(model, "A", alpha = alpha)
final1 <- plot(reason1, variation = "IQR", main = "T : accept H0 = no interaction", sub = "A Factor")
}
else if (A.p >alpha) {final1 <- "T : Can't Find differ from factor A"}
if (B.p <alpha) {
reason2 <- HSD.test(model, "B", alpha = alpha)
final2 <- plot(reason2, variation = "IQR", main = "T : accept H0 = no interaction", sub = "B Factor")
}
else if (B.p >alpha) {final2 <- "T : Can't Find differ from factor B"}
}
}
par(mfcol=c(1,2))
final1
final2
}
sm(data,A,B,C,0.05,way="B")
sm(data,A,B,C,0.05,way="D")
sm(data,A,B,C,0.05,way="S")
sm(data,A,B,C,0.05,way="T")
4번) 'chi-squared goodness test' 에 대한 프로그램을 짜서 R 내부의 명령어인 'chisq.test' 의 결과가 서로 동일하게 나오게 하세요
data <- c(13,10,42,65,20)
prop <- c(0.1, 0.05, 0.25, 0.4, 0.2)
csg <- function(x,p) {
# df
r <- length(x)
df <- r-1
# expected
ex<-c()
for(i in 1:r)
ex[i] <- sum(x)*p[i]
# t.s
mo <- c()
for (i in 1:r)
{mo[i] <- ((x[i]-ex[i])^2/ex[i])}
chistar <- sum(mo)
cat("\n Chi-squared test for given probabilities","\n\ndata: ",substitute(x),
"\nX-squared = ",chistar,","," df = ",df,","," p-value = ",
round(pchisq(chistar,df=df,lower.tail = F),4),sep = "")
}
csg(data, prop) ; chisq.test(data, p = prop)
5번) 'independent test' 에 대한 프로그램을 짜서 R 내부의 명령어인 'chisq.test' 의 결과가 서로 동일하게 나오게 하세요
data <- matrix(c(75, 160, 100, 15, 60, 115, 65, 10, 65, 175, 135, 25), nrow = 4, byrow = F,
dimnames = list("Injury" = c("None", "Minor", "Major", "Death"),
"Restraint" = c("seat belt","both", "None")) )
indi <- function (x) {
r <-nrow(x) ; c <- ncol(x)
# df
df <- (r-1)*(c-1)
ni. <- c() ; n.j <- c()
for (i in 1:r) {ni.[i] <- sum(x[i,])}
for (j in 1:c) {n.j[j] <- sum(x[,j])}
# expected
E <- matrix(nr=r, nc=c)
for (i in 1:r) {
for (j in 1:c) {
E[i,j] <- (ni.[i]*n.j[j])/sum(x)
}
}
# t.s
su <- 0
for (i in 1:r) {
for (j in 1:c) {
su <- su+ (x[i,j]-E[i,j])^2 /E[i,j]
}
}
chistar <- sum(su)
cat("\n Pearson's Chi-squared test","\n\ndata: ",substitute(x),
"\nX-squared = ",round(chistar,3),","," df = ",df,","," p-value = ",
round(pchisq(chistar,df=df,lower.tail = F),4),sep = "")
}
indi(data) ; chisq.test(data)
<과제 총평>
1번 : 'Handling Zeros' 에 대해서 어렵다고 생각하기보다는 '어디에 equal data 가 붙어야 p-value 가 증가할까?' 를 잘 생각해보면 그렇게 복잡하지 않음을 깨달으실 수 있을거에요
2번 : 사실 단순한 'factorial experiments' 문제라기 보다는, '분석 순서에 대해서 잘 숙지하고 있는가? & 테이블을 잘 만들어낼 수 있는가?' 에 관한 문제임이라고 생각됩니다, 'interaction test' 의 다음순서를 잘 생각해보세요
3번 : 4가지의 분석 방법을 함수로 잘 짜면 되는 신종 노가다 괴롭힘 문제였습니다... (만약 I.Q.R 을 모르시겠다면, 구글검색을 추천합니다)
4번+5번 : 수리통계학의 'categorical data' 를 잘 배우셨다면, 두 문제의 test 구성에서 별 어려움을 많이 못 느끼셨을 것입니다 (특히, 5번의 'independent test' 는 후의 'homogeneity' 의 구성과 똑같음을 알고계시면 더 좋을 거에요)