##########################################################################
## An Introduction to Empirical Legal Studies
## 	Chapter 7 Replication File
##
## Revised on 3/21/2014 KEC	  
##
## This work is licensed under a Creative Commons Attribution 4.0
## International License. See http://creativecommons.org/licenses/by/4.0
## for more information.
##
## Authors
##	Lee Epstein, Washington University in St. Louis
##  and Andrew D. Martin, University of Michigan
##########################################################################

## Chapter 7

##load packages
library(ggplot2)
library(foreign)
source("ggplotTemplate.R")

## Figure 7.3 Confidence intervals for the population mean from 50 random samples 
## of size n = 50. The population mean u = 240 is shown with the vertical line
## Section 7.2, p. 153

## simulate datasets and compute 95% confidence intervals 
set.seed(12345)
sims <- 50
N <- 50
mu <- 240
sd <- 50
lows <- rep(NA, sims)
highs <- rep(NA, sims)
middles <- rep(NA, sims)
for(i in 1:sims) {
  data <- rnorm(50, mu, sd)
  test <- t.test(data)
  lows[i] <- test$conf.int[1]
  middles[i] <- mean(data)
  highs[i] <- test$conf.int[2]
}
ciSimData <- data.frame(sim=1:sims, lows=lows, middles=middles, highs=highs)

## plot the confidence intervals
ciSimFig <- ggplot(data=ciSimData, aes(sim, middles, ymin=lows, ymax=highs)) + 
  geom_pointrange()
ciSimFig <- ciSimFig + geom_hline(yintercept=240) + xlab("Simulation Number") + 
  ylab("Confidence Intervals")
ciSimFig <- ciSimFig + coord_flip() 

## show plot and save it
print(ciSimFig)
ggsave("ciSimFig.pdf", ciSimFig, height=6, width=5)


## Figure 7.4 An illustration of computing p-value with t = 1.6 for a one-sample 
## t-test (p = 0.132, df = 14). The dark regions are those that contribute to the p-value
## Section 7.3, p. 158

## parameters
df <- 14
t.star <- 1.6

## set up ruler for the student-t distribution
ruler <- seq(-3.5,3.5,0.002)

## data
typeHolder <- rep("b", length(ruler))
typeHolder[ruler >= -3.5 & ruler < -t.star] <- "a"
typeHolder[ruler > t.star & ruler <= 3.5] <- "c"


tDensity <- data.frame(x=ruler, y=dt(ruler, df=df), type=ordered(typeHolder))

## create the plot
pvalue <- ggplot(data=tDensity, aes(x, y, fill=type)) + geom_line(size=lineSize)
pvalue <- pvalue + scale_x_continuous(breaks=c(-3,-2,-1.6,-1,0,1,1.6,2,3),
                                      labels=c("-3", "-2", "-t", "-1", "0", "1", 
                                               "t", "2", "3"))
pvalue <- pvalue + ylab("Density") + xlab("Test Statistic (t=1.6, df=14)")
pvalue <- pvalue + geom_area()
pvalue <- pvalue + guides(fill=FALSE) + geom_line(size=1.5)
pvalue <- pvalue + scale_fill_manual(values=c(fillColor,NA,fillColor))
pvalue <- pvalue + annotate("text", -3, 0.03, label="0.066")
pvalue <- pvalue + annotate("text", 3, 0.03, label="0.066")

cat("pvalue1\n")
print(2 * pt(-1.6, df))

## show plot and save it
print(pvalue)
ggsave("pvalue1.pdf", pvalue, height=5, width=6)

## Figure 7.5 An illustration of computing p-value with t = -0.3 for a one-sample
## t test (p = 0.769, df = 14). The dark regions are those that contribute to the 
## p-value
## Section 7.3, p. 158

## parameters
df <- 14
t.star <- .3

## set up ruler for the student-t distribution
ruler <- seq(-3.5,3.5,0.002)

## data
typeHolder <- rep("b", length(ruler))
typeHolder[ruler >= -3.5 & ruler < -t.star] <- "a"
typeHolder[ruler > t.star & ruler <= 3.5] <- "c"


tDensity <- data.frame(x=ruler, y=dt(ruler, df=df), type=ordered(typeHolder))

## create the plot
pvalue <- ggplot(data=tDensity, aes(x, y, fill=type)) + geom_line(size=lineSize)
pvalue <- pvalue + scale_x_continuous(breaks=c(-3,-2,-1,-0.3, 0,0.3,1,2,3),
                                      labels=c("-3", "-2", "-1", "t", "0", "-t", 
                                               "1", "2", "3"))
pvalue <- pvalue + ylab("Density") + xlab("Test Statistic (t=-0.3, df=14)")
pvalue <- pvalue + geom_area()
pvalue <- pvalue + guides(fill=FALSE) + geom_line(size=lineSize)
pvalue <- pvalue + scale_fill_manual(values=c(fillColor,NA,fillColor))
pvalue <- pvalue + annotate("text", -3, 0.03, label="0.384", family=myFont)
pvalue <- pvalue + annotate("text", 3, 0.03, label="0.384", family=myFont)

cat("pvalue2\n")
print(2 * pt(-0.3, df))

## show plot and save it
print(pvalue)
ggsave("pvalue2.pdf", pvalue, height=5, width=6)


## Figure 7.6 An illustration of computing p-value with t = 2.2 for a one-sample 
## t-test (p = 0.045, df = 14). The dark regions are those that contribute to 
## the p-value
## Section 7.3, p. 159

## parameters
df <- 14
t.star <- 2.2

## set up ruler for the student-t distribution
ruler <- seq(-3.5,3.5,0.002)

## data
typeHolder <- rep("b", length(ruler))
typeHolder[ruler >= -3.5 & ruler < -t.star] <- "a"
typeHolder[ruler > t.star & ruler <= 3.5] <- "c"

tDensity <- data.frame(x=ruler, y=dt(ruler, df=df), type=ordered(typeHolder))

## create the plot
pvalue <- ggplot(data=tDensity, aes(x, y, fill=type)) + geom_line(size=lineSize)
pvalue <- pvalue + scale_x_continuous(breaks=c(-3,-2.2,-2,-1, 0,1,2,2.2,3),
                                      labels=c("-3", "-t", "-2", "-1", "0", 
                                               "1", "2", "t", "3"))
pvalue <- pvalue + ylab("Density") + xlab("Test Statistic (t=2.2, df=14)")
pvalue <- pvalue + geom_area()
pvalue <- pvalue + guides(fill=FALSE) + geom_line(size=lineSize)
pvalue <- pvalue + scale_fill_manual(values=c(fillColor,NA,fillColor))
pvalue <- pvalue + annotate("text", -3, 0.03, label="0.023", family=myFont)
pvalue <- pvalue + annotate("text", 3, 0.03, label="0.023", family=myFont)

cat("pvalue3\n")
print(2 * pt(-2.2, df))

## show plot and save it
print(pvalue)
ggsave("pvalue3.pdf", pvalue, height=5, width=6)