Professor, Department of Statistics
2026-04-27
The type I error rate of a test is the probability of rejecting the null hypothesis when it is actually true.
In many research contexts, the acceptable type I error rate is set at 0.05, meaning that there is a 5% chance of falsely rejecting the null hypothesis.
A simulation study can help assess whether a statistical test maintains the desired type I error rate under various conditions.
The power of a statistical test is the probability of correctly rejecting a null hypothesis when it is false.
In other words, power is the ability of a test to detect a true effect or relationship between variables.
Power is affected by several factors, including sample size, effect size, and significance level.
For simple tests, analytical formulas for calculation power might exist. For complicated tests, simulations can be used to evaluate power
Simulation studies allow researchers to evaluate and compare the performance of statistical tests under controlled conditions.
They can help determine which test is most appropriate for a specific situation.
By simulating data with known properties, researchers can assess the type I error rate, power, and other performance metrics of each test.
Determine performance metrics: Decide on the metrics you will use to evaluate the performance of each test (e.g., type I error, power, prediction accuracy, confidence interval coverage).
Set up the simulation: Determine the number of repetitions.
Analyze and interpret the results: Assess the performance of each test based on the simulation results and draw conclusions.
generate_data <- function(n, mu, sd) {
rnorm(n, mean = mu, sd = sd)}
generate_data_out <- function(n, mu, sd, sd_out, p) {
n1=round(n*(1-p)); n2=n-n1
c(rnorm(n1, mean = mu, sd = sd), rnorm(n2, mean=mu, sd=sd_out))
}
run_tests <- function(data1, data2) {
t_test <- t.test(data1, data2)$p.value
t_e_test <- t.test(data1, data2, var.equal=TRUE)$p.value
wilcox_test <- wilcox.test(data1, data2)$p.value
c(t = t_test, t_e_test=t_e_test, wilcox_test = wilcox_test)}#without outliers
pvalues <- function(n, mu1, mu2,sd, n_rep=1000){
results=matrix(0, n_rep, 3)
for(b in 1:n_rep){
data1 <- generate_data(n, mu1, sd)
data2 <- generate_data(n, mu2, sd)
results[b, ]=run_tests(data1, data2)
}
return(colMeans(results<0.05))}
#with outliers
pvalues_out <- function(n, mu1, mu2, sd, sd_out, p, n_rep=1000){
results=matrix(0, n_rep, 3)
for(b in 1:n_rep){
data1 <- generate_data_out(n, mu1, sd, sd_out, p)
data2 <- generate_data_out(n, mu2, sd, sd_out, p)
results[b, ]=run_tests(data1, data2)
}
return(colMeans(results<0.05))}power_n5=rbind(pvalues(n=5, mu1=0, mu2=0.2, sd=1, n_rep=1000),
pvalues(n=5, mu1=0, mu2=0.5, sd=1,n_rep=1000),
pvalues(n=5, mu1=0, mu2=1, sd=1, n_rep=1000))
power_n10=rbind(pvalues(n=10, mu1=0, mu2=0.2, sd=1, n_rep=1000),
pvalues(n=10, mu1=0, mu2=0.5, sd=1, n_rep=1000),
pvalues(n=10, mu1=0, mu2=1, sd=1, n_rep=1000))
power_n50=rbind(pvalues(n=50, mu1=0, mu2=0.2, sd=1, n_rep=1000),
pvalues(n=50, mu1=0, mu2=0.5, sd=1, n_rep=1000),
pvalues(n=50, mu1=0, mu2=1, sd=1, n_rep=1000))
colnames(power_n5)=c("t", "t_e", "r")
colnames(power_n10)=c("t", "t_e", "r")
colnames(power_n50)=c("t", "t_e", "r")
power_normal_wide=data.frame(cbind(rbind(power_n5, power_n10, power_n50),
diff=rep(c(0.2, 0.5, 1.0),3),
n=rep(c(5, 10, 50), each=3)))
power_normal=gather(power_normal_wide, test, power, 1:3, factor_key=TRUE)
ggplot(power_normal, aes(x=diff, y=power,
group=interaction(test, n), color=factor(n), shape=test)) +
geom_point(size=3) +geom_line() + ggtitle("Power: no outlier")type1=rbind(
pvalues(n=5, mu1=0, mu2=0, sd=1, n_rep=1000),
pvalues(n=10, mu1=0, mu2=0, sd=1, n_rep=1000),
pvalues(n=50, mu1=0, mu2=0, sd=1, n_rep=1000))
colnames(type1)=c("t", "t_e", "r")
type1_wide=data.frame(type1, n=c(5,10,50))
type1=gather(type1_wide, test, power, 1:3, factor_key=TRUE)
ggplot(type1, aes(x=n, y=power, group=test, shape=test, color=test))+
geom_point(size=3) + geom_line()+
geom_hline(yintercept=0.05, linetype="dashed", color = "red") +
ggtitle("Type I Error: no outlier")#1% outliers
type1_out_mat=rbind(
pvalues_out(n=5, mu1=0, mu2=0, sd=1, sd_out=5, p=0.01, n_rep=1000),
pvalues_out(n=10, mu1=0, mu2=0, sd=1, sd_out=5, p=0.01, n_rep=1000),
pvalues_out(n=20, mu1=0, mu2=0, sd=1, sd_out=5, p=0.01, n_rep=1000),
pvalues_out(n=50, mu1=0, mu2=0, sd=1, sd_out=5, p=0.01, n_rep=1000))
colnames(type1_out_mat)=c("t", "t_e", "r")
type1_wide_out=data.frame(type1_out_mat, n=c(5,10,20,50))
type1_out=gather(type1_wide_out, test, power, 1:3, factor_key=TRUE)
ggplot(type1_out, aes(x=n, y=power, group=test, shape=test, color=test))+
geom_point(size=3) +
geom_line()+
geom_hline(yintercept=0.05, linetype="dashed", color = "red")+
ggtitle("Type I Error: 1% outliers")#5% outliers
type1_out_mat=rbind(
pvalues_out(n=5, mu1=0, mu2=0, sd=1, sd_out=5, p=0.05, n_rep=1000),
pvalues_out(n=10, mu1=0, mu2=0, sd=1, sd_out=5, p=0.05, n_rep=1000),
pvalues_out(n=20, mu1=0, mu2=0, sd=1, sd_out=5, p=0.05, n_rep=1000),
pvalues_out(n=50, mu1=0, mu2=0, sd=1, sd_out=5, p=0.05, n_rep=1000))
colnames(type1_out_mat)=c("t", "t_e", "r")
type1_wide_out=data.frame(type1_out_mat, n=c(5,10,20,50))
type1_out=gather(type1_wide_out, test, power, 1:3, factor_key=TRUE)
ggplot(type1_out, aes(x=n, y=power, group=test, shape=test, color=test))+
geom_point(size=3) +
geom_line()+
geom_hline(yintercept=0.05, linetype="dashed", color = "red")+
ggtitle("Type I Error: 5% outliers")