##########################################################################
## An Introduction to Empirical Legal Studies
## 	Chapter 11 Replication File
##
## Revised on 3/24/2014 KEC	  
##
## This work is licensed under a Creative Commons Attribution 4.0
## International License. See http://creativecommons.org/licenses/by/4.0
## for more information.
##
## Authors
##	Lee Epstein, Washington University in St. Louis
##  and Andrew D. Martin, University of Michigan
##########################################################################

## Chapter 11

## load packages and set up multiplot 
library(foreign)
library(ggplot2)
library(reshape)
library(gridExtra)
library(mvtnorm)
library(faraway)
source("ggplotTemplate.R")

## Figure 11.1 Descriptive statistics on the number of procedures to register a 
## firm so that it can begin formally operating, by whether or not the judiciary 
## is fully independent. If the goal is to provide summary information about the 
##composition of the variables of interest through descriptive statistics, the 
## precise values in the top panel do not serve it. The box plot in the middle 
##panel visually display the distribution of the procedures variable and do a
## better job of drawing attention to the median, the interquartile range, and 
##any outliers. The violin plot provides similar information while conveying an
## even clearer picture of the shape of the variable's distribution
## Section 11.1, p. 263

## read in data
laPorte <- read.dta("laPorte.dta")
laPorte <- laPorte[is.na(laPorte$howard_carey_recode)==FALSE,]
boxPlot <- ggplot(laPorte, aes(howard_carey_recode,world_bank_procedures)) + 
  geom_boxplot(outlier.size=0, fill=fillColor, na.rm=TRUE)
boxPlot <- boxPlot + xlab("") + ylab("Number of Procedures") + 
  stat_boxplot(geom='errorbar', na.rm=TRUE)

## show plot and save it
print(boxPlot)
ggsave("boxPlot.pdf", boxPlot, height=5, width=9)

## make violin plot
vPlot <- ggplot(laPorte[!is.na(laPorte$world_bank_procedures),],
  aes(howard_carey_recode,world_bank_procedures)) + geom_violin(na.rm=TRUE)
vPlot <- vPlot + xlab("") + ylab("Number of Procedures") 
vPlot <- vPlot + geom_boxplot(width=.1, outlier.size=0, fill=fillColor, fatten=0, na.rm=TRUE) 
vPlot <- vPlot + stat_summary(fun.y=median, geom="point", fill="white", shape=21, 
                              size=3, na.rm=TRUE)

## show plot and save it
print(vPlot)
ggsave("vPlot.pdf", vPlot, height=5, width=9)

## Figure 11.2 A histogram and a kernel density plot of the number of procedures 
## to register a firm so that it can begin formally operating for 67 countries. 
##The histogram provides the reader with a richer understanding of the variable 
##and its distribution than a table of descriptive statistics (see the top panel
##of Figure 11.1). Arguably the kernel density plot does an even better job
## because the existence and location of the positive skew are more apparent
## Section 11.1, p. 267

## make histogram
histogram <- ggplot(laPorte, aes(world_bank_procedures))  + 
  geom_histogram(binwidth=2, fill=fillColor,colour="black")
histogram <- histogram + ylab("Frequency") + xlab("Number of Procedures") +
  xlim(0,20) + ggtitle("Histogram")

## show plot and save it
print(histogram)
ggsave("histogram.pdf", histogram, height=5, width=5)

## make kernel density plot
kernelDensity <- ggplot(laPorte, aes(world_bank_procedures)) + 
  geom_line(stat="density",size=lineSize,fill=fillColor,na.rm=TRUE)
kernelDensity <- kernelDensity + ylab("Density") + xlab("Number of Procedures")  +
  xlim(0,20) + ggtitle("Kernel Density")

## show kernel density plot and save it
print(kernelDensity)
ggsave("kernelDensity.pdf", kernelDensity, height=5, width=5)

## Figure 11.3 Compared to the descriptive statistics in Table 11.2, the 
## individual dot plots provide a more visually and cognitively appealing 
## solution to the problem of providing readers with information about the 
## composition of individual variables in a dataset
## Section 11.1, p. 269

## set up multiplot
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  require(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

## make first figure (top left)
approveConstAmdt <- read.table(header=T,text='
type pct
"Absolute Majority" 7.14
"3/5s Majority" 8.04
"2/3s Majority" 75.89
"3/4s Majority" 8.93
')
approveConstAmdt$type <- factor(approveConstAmdt$type, levels=c(
"3/4s Majority","2/3s Majority","3/5s Majority","Absolute Majority"), 
labels=c("3/4s Majority","2/3s Majority","3/5s Majority","Absolute Majority"))

approveAmdt <- ggplot(approveConstAmdt, aes(x=pct, y=type)) + geom_point(size=3) +
  geom_hline(yintercept=1, linetype=3) +
  geom_hline(yintercept=2, linetype=3) +
  geom_hline(yintercept=3, linetype=3) +
  geom_hline(yintercept=4, linetype=3) +
  xlab("Percent of Constitutions") + ylab("") + 
  ggtitle("Percentage of Vote Needed to Approve \n a Constitutional Amendment") + 
  xlim(0,100)

## make second figure (middle left)
HOSSelection <- read.table(header=T,text='
type pct
"Elected by Elite Group" 25.86
"Elected by Citizens" 57.47
"Heredity/Royal Selection" 16.67
')
HOSSelection$type <- factor(HOSSelection$type, levels=c(
"Elected by Elite Group","Elected by Citizens","Heredity/Royal Selection"), 
labels=c("Elected by \n Elite Group","Elected by
         \n Citizens","Heredity/Royal \n Selection"))

HOSSelectionG <- ggplot(HOSSelection, aes(x=pct, y=type)) + geom_point(size=3) +
  geom_hline(yintercept=1, linetype=3) +
  geom_hline(yintercept=2, linetype=3) +
  geom_hline(yintercept=3, linetype=3) +
  xlab("Percent of Constitutions") + ylab("") + 
  ggtitle("Selection of Head of State") + xlim(0,100)

## make third figure (bottom left)
numChambers <- read.table(header=T,text='
type pct
"Two Chambers" 42.39
"One Chamber" 57.61
')
numChambers$type <- factor(numChambers$type, levels=c("Two Chambers",
                                                      "One Chamber"), 
                           labels=c("Two Chambers","One Chamber"))

numChambersG <- ggplot(numChambers, aes(x=pct, y=type)) + geom_point(size=3) +
  geom_hline(yintercept=1, linetype=3) +
  geom_hline(yintercept=2, linetype=3) +
  xlab("Percent of Constitutions") + ylab("") + 
  ggtitle("Number of Chambers or \n Houses in Legislature") + 
  xlim(0,100)

## make fourth figure (top right)
judInd <- read.table(header=T,text='
type pct
"No" 22.65
"Yes" 77.35
')
judInd$type <- factor(judInd$type, levels=c("No","Yes"), labels=c("No","Yes"))

judIndG <- ggplot(judInd, aes(x=pct, y=type)) + geom_point(size=3) +
  geom_hline(yintercept=1, linetype=3) +
  geom_hline(yintercept=2, linetype=3) +
  xlab("Percent of Constitutions") + ylab("") + 
  ggtitle("Constitution Contains an Explicit Declaration
          Regarding Judicial Independence") + xlim(0,100)

## make fifth figure (middle right)
stateDesc <- read.table(header=T,text='
type pct
Unitary 70.83
Confederal 1.39
Federal 27.78
')
stateDesc$type <- factor(stateDesc$type, levels=c("Unitary","Confederal",
                                                  "Federal"), 
                         labels=c("Unitary","Confederal","Federal"))

stateDescG <- ggplot(stateDesc, aes(x=pct, y=type)) + geom_point(size=3) +
  geom_hline(yintercept=1, linetype=3) +
  geom_hline(yintercept=2, linetype=3) +
  geom_hline(yintercept=3, linetype=3) +
  xlab("Percent of Constitutions") + ylab("") + 
  ggtitle("Description of the State") + xlim(0,100)

## make sixth figure (bottom right)
natLang <- read.table(header=T,text='
type pct
"No Languages Mentioned" 26.23
"No Except for Government Business" 7.65
"Both Official and National" 14.21
"National Only" 6.01
"Official Only" 45.90
')
natLang$type <- factor(natLang$type, levels=c("No Languages Mentioned",
                                              "No Except for Government Business",
                                              "Both Official and National",
                                              "National Only","Official Only"), 
                       labels=c("No Languages \n Mentioned","No Except for \n 
                                Government \n Business","Both Official \n and 
                                National","National Only","Official Only"))

natLangG <- ggplot(natLang, aes(x=pct, y=type)) + geom_point(size=3) +
  geom_hline(yintercept=1, linetype=3) +
  geom_hline(yintercept=2, linetype=3) +
  geom_hline(yintercept=3, linetype=3) +
  geom_hline(yintercept=4, linetype=3) +
  geom_hline(yintercept=5, linetype=3) +
  xlab("Percent of Constitutions") + ylab("") + 
  ggtitle("Constitution Specifies an Official \n 
  or National Language") + xlim(0,100)

## make combined figure and save it
indDotPlots<-multiplot(approveAmdt,HOSSelectionG,numChambersG,judIndG,
                      stateDescG,natLangG, cols=2)

## show multiplot and save it
print(indDotPlots)
pdf("indDotPlots.pdf", indDotPlots)

## Figure 11.4 The table on the top is the same as Table 11.3, which shows
## data on a racial profiling. The mosaic plot on the bottom presents the
## same data in a more consice and appealing fashion. the width of the bars
## depicts the number of searches per year while the height of each tile conveys
##the relative number of searches that are conducted on drivers of each race
## during each year. With this plot, it is much easier to see, for example, the
## large percentage of searches in 1995 and 1996 that were of black drivers and how
## that percentage declines beginning in 1997
## Section 11.1, p. 274

## read in data and define it
gross <- read.dta("grossTable.dta")
gross <- gross[gross$location=="I95",c(1,4:6)]
tab <- as.table(t(gross[,2:4]))
dimnames(tab)[[2]] <- c("1995", "1996", "1997", "1998", "1999", "2000")
dimnames(tab)[[1]] <- c("White", "Black", "Hispanic")

## show plot and save it
par(mar=c(3.1, 2.1, 2.1, 0.1))
raceProfile <- plot(t(tab), main="", las=1, xlab="Year", ylab="Race of Person Searched")
print(raceProfile)

## Figure 11.5 The top panel reproduces the raw data table in Table 11.4. The 
## bottom panel is a bivariate scatterplot of the full sample. The solid line 
## is a smooth loess curve that summarizes the relationship between the payments
## and hours. We've also noted serveral outlier economies--those where the number
## of payments does not provide an especially good prediction of the number of hours 
## Section 11.1, p. 276

## read in data
taxData <- read.dta("taxSampleWorldBank.dta")
realScatter <- ggplot(data=taxData, aes(x=paymentsnumberperyear, 
                                        y=timehoursperyear)) + 
  geom_point(size=2.5, colour=fillColor)
realScatter <- realScatter + xlab("Payments (number per year)") + 
  ylab("Time (hours per year)")
realScatter <- realScatter + stat_smooth(se = FALSE,size=2,colour="black", method="loess")
realScatter <- realScatter + annotate("text",x=32, y=852,label="Vietnam",
                                      family="Times",size=3) 
realScatter <- realScatter + annotate("text",x=65, y=795,label="Venezuela",
                                      family="Times",size=3) 
realScatter <- realScatter + annotate("text",x=44, y=675,label="Cameroon",
                                      family="Times",size=3) 
realScatter <- realScatter + annotate("text",x=28, y=513,label="Ukraine",
                                      family="Times",size=3) 
#realScatter <- realScatter + ylim(0,875) + xlim(0,80)

## show plot and save it
print(realScatter)
ggsave("realScatter.pdf", realScatter, height=5, width=5)

## Figure 11.6 Life expectancy in years for three countires. From the raw data,
## partially reproduced above from Table 11.5, it is difficult to decipher trends 
## across time. Below the table, we provide a time series plot of the same data.
## Data points for each year are represented by a hollow circle. The time series 
## plot draws attention to the upswing in life expectancy in all three countries, 
## as well as the distance between China versus Germany in the UK
## Section 11.1, p. 277

## read in data
lifeData <- read.dta("lifeExpect.dta")

## make the plot
linePlot <- ggplot(lifeData, aes(x=year, y=LifeExpectacy,group=countryname,
                                 shape=countryname,size=countryname)) + 
  geom_line(aes(size = countryname)) + scale_size_manual(values=c(.5,1,.25))
linePlot <- linePlot + geom_point(size=2)+scale_shape_manual(values=c(1,1,1)) +
  xlim(1991,2010)
linePlot <- linePlot + xlab("Year") + ylab("Life Expectancy (years)")+ 
  theme(legend.position = "none") 
linePlot <- linePlot + annotate("text",x=1991, y=70,label="China",
                                family="Times",size=4) 
linePlot <- linePlot + annotate("text",x=1991, y=75,label="Germany",
                                family="Times",size=4) 
linePlot <- linePlot + annotate("text",x=1991.5, y=76.8,label="United Kingdom",
                                family="Times",size=4) 

## show plot and save it
print(linePlot)
ggsave("linePlot.pdf", linePlot, height=5, width=9)

## Figure 11.7 Nomograms depicting the results from Table 11.7's logistic 
## regression analysis of whether individual justices vote to strike or uphold 
## federal laws. Nomograms allow the reader to discern visually the estimated 
## coefficients and uncertainty around the estimates and to quickly spot whether 
## the effects are statistically significant by looking at whether the confidence intervals cross zero
## Section 11.1, p. 284

## read in Conservative Law data and Liberal Law data
consCoef <- c(-.524,-.282,-.299,.843,-.006,.010)
consSE <- c(.048,.095,.110,.144,.003,.005)
libCoef <- c(.409,.313,-.221,.070,-.007,.008)
libSE <- c(.117,.140,.131,.125,.003,.006)

## build data frames for Conservative Laws
consData <- data.frame(coef=consCoef,se=consSE)
consData$lows <- consData$coef - 1.96 * consData$se
consData$highs <- consData$coef + 1.96 * consData$se
consData$names <- ordered(c("Justice's Ideology","Lower Court's Decision",
                            "Discretionary Review or Not","Civil Liberties Case 
                            or Not","N of Cases Decided During Term","Term of 
                            Court"),
                          levels= rev(c("Justice's Ideology",
                                                  "Lower Court's Decision",
                                                  "Discretionary Review or Not",
                                                  "Civil Liberties Case or Not",
                                                  "N of Cases Decided During Term",
                                                  "Term of Court")))

## build data frames for Liberal Laws
libData <- data.frame(coef=libCoef,se=libSE)
libData$lows <- libData$coef - 1.96 * libData$se
libData$highs <- libData$coef + 1.96 * libData$se
libData$names <- ordered(c("Justice's Ideology","Lower Court's Decision",
                           "Discretionary Review or Not",
                           "Civil Liberties Case or Not",
                           "N of Cases Decided During Term",
                           "Term of Court"),
                         levels= rev(c("Justice's Ideology",
                                      "Lower Court's Decision",
                                      "Discretionary Review or Not",
                                      "Civil Liberties Case or Not",
                                      "N of Cases Decided During Term",
                                      "Term of Court")))

## make Conservative Law figure
consNomogram <- ggplot(consData, aes(y = coef, x=names, ymin=consData$lows, 
                                     ymax=consData$highs))
consNomogram <- consNomogram + geom_pointrange(size=.7) + coord_flip() + 
  geom_hline(yintercept=0) +
  ylab("Logistic Regression Coefficient") + xlab("") + 
  ggtitle("Conservative Law") + ylim(-0.7,1.2)

## make Liberal Law Figure
libNomogram <- ggplot(libData, aes(y = coef, x=names, ymin= libData$lows, 
                                   ymax= libData$highs))
libNomogram <- libNomogram + geom_pointrange(size=.7) + coord_flip() + 
  geom_hline(yintercept=0) +
  ylab("Logistic Regression Coefficient") + xlab("") + 
  ggtitle("Liberal Law") + ylim(-0.7,1.2)

## show plots and save them
print(consNomogram)
ggsave("consNomogram.pdf", consNomogram, height=5, width=6)
print(libNomogram)
ggsave("libNomogram.pdf", libNomogram, height=5, width=6)

## Figure 11.8 An illustration of moving from suboptimal to optimal communication
## of research results. Adapting this scheme to their own projects and needs 
## should  help researchers better relay their results. To generate the predicted 
## probabilities and confidence intervals for varying levels of ideology, 
## we hold all other variables at their mean or mode
## Section 11.1, p. 286

## set up function
clFunc <- function(dat,fm, cluster){
           require(sandwich, quietly = TRUE)
           require(lmtest, quietly = TRUE)
           M <- length(unique(cluster))
           N <- length(cluster)
           K <- fm$rank
           dfc <- (M/(M-1))*((N-1)/(N-K))
           uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
           vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
           coeftest(fm, vcovCL) 
           return(vcovCL)}

## read in data
jrData <- read.dta("judicialRestraint.dta",convert.factors = FALSE)
jrData <- jrData[,c("vote_jud_rev","MQ_mean","lct_dir","cert_jur","civ_lib",
                    "N_cases","term","law_ideology","justice")]
jrData <- jrData[is.na(jrData$lct_dir)==FALSE,]

## make Conservative Laws figure
consData <- jrData[jrData$law_ideology==0,]
consModel <- glm(vote_jud_rev~MQ_mean+lct_dir+cert_jur+civ_lib+N_cases+term,
                 consData,family="binomial")
consVcov <- clFunc(consData,consModel, consData$justice)
n <- 1000
alpha <- .05
ruler <- seq(min(consData$MQ_mean),max(consData$MQ_mean),length.out=100)  
consSims <- rmvnorm(n,coef(consModel), consVcov)
mq <- consSims[1:n,'MQ_mean'] 
lct <- consSims[1:n,'lct_dir']
cert <- consSims[1:n,'cert_jur']
cl <- consSims[1:n,'civ_lib']
cases <- consSims[1:n,'N_cases']
term <- consSims[1:n,'term']
int <- consSims[1:n,'(Intercept)']

ctops <- rep(NA,length(ruler))
cprobs <- rep(NA,length(ruler))
cbottoms <- rep(NA,length(ruler))
csimMat <- matrix(data=NA,n,length(ruler))

for (i in 1:length(ruler))  {
csimMat[,i] <- ilogit(int+mq*ruler[i]+lct*1+cert*1+cl*1+cases*mean(consData$N_cases)
                      +term*mean(consData$term))
}

for (i in 1:length(ruler)) {
ctops[i] <- quantile(csimMat[,i], alpha/2)
cprobs[i] <- quantile(csimMat[,i], .5)
cbottoms[i] <- quantile(csimMat[,i], 1-(alpha/2))
}

consGraphData <- data.frame(ruler=ruler,probs=cprobs,tops=ctops,bottoms=cbottoms)
consPred <- ggplot(consGraphData, aes(x=ruler, y=probs)) + 
  geom_line(size=lineSize) + ylim(0,0.9)
consPred <- consPred + geom_ribbon(aes(x=ruler, ymin=bottoms, ymax=tops), 
                                   alpha=0.5, fill="grey60")
consPred <- consPred + xlab("Justice's Ideology") + 
  ylab("Probability Justice Votes to Invalidate the Law")+
  ggtitle("Conservative Laws")

## show plot and save it
print(consPred)
ggsave("consPred.pdf", consPred, height=5, width=5)

## make Liberal Laws figure
libData <- jrData[jrData$law_ideology==1,]
libModel <- glm(vote_jud_rev~MQ_mean+lct_dir+cert_jur+civ_lib+N_cases+term,
                libData,family="binomial")
libVcov <- clFunc(libData,libModel, libData$justice)
n <- 1000
alpha <- .05
ruler <- seq(min(libData$MQ_mean),max(libData$MQ_mean),length.out=100)  
libSims <- rmvnorm(n,coef(libModel), libVcov)
mq <- libSims[1:n,'MQ_mean'] 
lct <- libSims[1:n,'lct_dir']
cert <- libSims[1:n,'cert_jur']
cl <- libSims[1:n,'civ_lib']
cases <- libSims[1:n,'N_cases']
term <- libSims[1:n,'term']
int <- libSims[1:n,'(Intercept)']

ltops <- rep(NA,length(ruler))
lprobs <- rep(NA,length(ruler))
lbottoms <- rep(NA,length(ruler))
lsimMat <- matrix(data=NA,n,length(ruler))

for (i in 1:length(ruler))  {
lsimMat[,i] <- ilogit(int+mq*ruler[i]+lct*1+cert*1+cl*1+cases*mean(libData$N_cases)
                      +term*mean(libData$term))
}

for (i in 1:length(ruler)) {
ltops[i] <- quantile(lsimMat[,i], alpha/2)
lprobs[i] <- quantile(lsimMat[,i], .5)
lbottoms[i] <- quantile(lsimMat[,i], 1-(alpha/2))
}

libGraphData <- data.frame(ruler=ruler,probs=lprobs,tops=ltops,bottoms=lbottoms)
libPred <- ggplot(libGraphData, aes(x=ruler, y=probs)) + geom_line(size=lineSize)
libPred <- libPred + geom_ribbon(aes(x=ruler, ymin=bottoms, ymax=tops), alpha=0.5, 
                                 fill="grey60") + ylim(0,0.9)
libPred <- libPred + xlab("Justice's Ideology") + 
  ylab("Probability Justice Votes to Invalidate the Law")+
  ggtitle("Liberal Laws")

## show plot and save it
print(libPred)
ggsave("libJRPred.pdf", libPred, height=5, width=5)

## make All Laws figure
sameGraphData <- data.frame(ruler=ruler,lib=libGraphData$probs,
                            cons=consGraphData$probs)
samePred <- ggplot(sameGraphData, aes(x=ruler, y=lib)) + 
  geom_line(size=lineSize) + ylim(0,0.9)
samePred <- samePred + geom_line(data=sameGraphData,aes(x=ruler,y=cons),
                                 size=1,linetype=2)
samePred <- samePred + xlab("Justice's Ideology") + 
  ylab("Probability Justice Votes to Invalidate the Law")+
  ggtitle("All Laws")

## show plot and save it
print(samePred)
ggsave("samePred.pdf", samePred, height=5, width=5)

cat("## NOTE: these are different from the next because the standard errors are
    computed differently,\n")
cat("## the method for generating the confidence intervals is different, and 
    the points at which\n")
cat("## the function is evaluated is different. The output from Stata should 
    be used.\n\n")
cat("## Conservative Laws\n")
cat("## Far Left\n")
cat(ctops[1], " ", cprobs[1], " ", cbottoms[1], "\n")
cat("## Far Right\n")
cat(ctops[100], " ", cprobs[100], " ", cbottoms[100], "\n")

cat("## Liberal Laws\n")
cat("## Far Left\n")
cat(ltops[1], " ", lprobs[1], " ", lbottoms[1], "\n")
cat("## Far Right\n")
cat(ltops[100], " ", lprobs[100], " ", lbottoms[100], "\n")

