*************************************************************************
** An Introduction to Empirical Legal Studies
** 	Chapter 11 Replication File
**
** Revised on 3/21/2014 KEC	  
**
** This work is licensed under a Creative Commons Attribution 4.0
** International License. See http://creativecommons.org/licenses/by/4.0
** for more information.
**
** Authors
**	Lee Epstein, Washington University in St. Louis
**  and Andrew D. Martin, University of Michigan
*************************************************************************

** Chapter 11

** Figure 11.1 Descriptive statistics on the number of procedures to
**   register a firm so that it can begin formally operating, by whether
**   or not the judiciary is fully independent. If the goal is to provide 
**   summary information about the composition of the variables of interest 
**   through descriptive statistics, the precise values in the top panel do 
**   not well serve it. The box plots in the middle panel visually display 
**   the distribution of the procedures variable and do a better job of drawing 
**   attention to the median, the interquartile range, and any outliers.
**   The violin plot provides similar information while conveying an even 
**   clearer picture of the shape of the variable's distribution
** Section 11.1, p. 263

** Table in top panel of Figure 11.1
** read in data
use laPorte.dta, clear
drop if missing(howard_carey_recode)

** make the table
table howard_carey_recode, contents(mean world_bank_procedures sd ///
   world_bank_procedures min world_bank_procedures max world_bank_procedures ///
   freq) cw

** make the box plot
graph box world_bank_procedures, over(howard_carey_recode) ///
   ytitle(Number of Procedures) ylabel(0(5)18, angle(horizontal)) ///
   saving(fig1111, replace)

** install violin package
ssc install vioplot

** make violin plot
vioplot world_bank_procedures, over(howard_carey_recode) ///
   ytitle(Number of Procedures) ylabel(0(5)18, angle(horizontal)) ///
   saving(fig1112, replace)

** combine plots
graph combine fig1111.gph fig1112.gph, co(1) title(Descriptive Statatics ///
   on the Number of Procedures to Register a Firm, size(medium)) ///
   fysize(120) fxsize(100)

** Figure 11.2 A histogram and a kernel density plot of the number of 
**   procedures to register a firm so that it can begin formally operating 
**   for 67 countries. The histogram provides the reader with a richer 
**   understanding of the variable and its distribution than a table of
**   descriptive statistics (see the top panel of Figure 11.1).
**   Arguably, the kernel density plot does an even better job because 
**   the existence and location of the positive skew are more apparent.
** Section 11.1, p. 267

** make the histogram
histogram world_bank_procedures, discrete width(2) frequency fcolor(black) ///
   ylabel(0(5)17, angle(horizontal)) xtitle(Number of Procedures) ///
   xlabel(0(5)20, grid glcolor(bluishgray)) title(Histogram) ///
   saving(fig1121, replace)

** make the kernel density plot
kdensity world_bank_procedures, kernel(gaussian) recast(line) ///
   lcolor(black) lwidth(thick) ytitle(Density) ylabel(0.00(0.03)0.10, ///
   angle(horizontal)) xtitle(Number of Procedures) xlabel(0(5)20, grid ///
   glcolor(bluishgray)) note("") title(Kernel Density) saving(fig1122, replace)

** combine plots
graph combine fig1121.gph fig1122.gph, title(Number of Procedures to ///
   Register a Firm, size(medium)) 

** Table 11.2 Descriptive statistics table created from Comparative 
**   Constitutions Project Database on provisions in 184 constitutions. 
**   Although the frequencies in the table provide details on the 
**   individual variables, it is unlikely that readers can quickly 
**   process the information; it is also unlikely that they need 
**   precise values. See Figure 11.3 for dot plots of the same data
** Section 11.1, 268

** read in data and calculate numbers
use ccpCharacteristics.dta, clear
tabulate amndapct
tabulate hoselect
tabulate housenum
tabulate judind
tabulate fedunit
tabulate lang

** Figure 11.3 Compared to the descriptive statistics in Table 11.2, 
**   the individual dot plots provide a more visually and cognitively
**   appealing solution to the problem of providing readers with information 
**   about the composition of individual variables in a dataset
** Section 11.1, p. 269

** generate new variables that accommodate each category's proportion
gen amndapctProp = .
replace amndapctProp = 7.14 if (amndapct==1)
replace amndapctProp = 8.04 if (amndapct==2)
replace amndapctProp = 75.89 if (amndapct==3)
replace amndapctProp = 8.93 if (amndapct==4)
gen hoselectProp = .
replace hoselectProp = 16.67 if (hoselect==1)
replace hoselectProp = 57.47 if (hoselect==2)
replace hoselectProp = 25.86 if (hoselect==3)
gen housenumProp = .
replace housenumProp = 57.61 if (housenum==2)
replace housenumProp = 42.39 if (housenum==3)
gen judindProp = .
replace judindProp = 77.35 if (judind==1)
replace judindProp = 22.65 if (judind==2)
gen fedunitProp = .
replace fedunitProp = 27.78 if (fedunit==1)
replace fedunitProp = 1.39 if (fedunit==2)
replace fedunitProp = 70.83 if (fedunit==3)
gen langProp = .
replace langProp = 45.90 if (lang==1)
replace langProp = 6.01 if (lang==2)
replace langProp = 14.21 if (lang==3)
replace langProp = 7.65 if (lang==4)
replace langProp = 26.23 if (lang==5)

** make the dot plots
graph dot amndapctProp, over(amndapct, label(nolabel)) marker(1, ///
   msize(large)) yscale(range(0 100)) title("Procedure of Vote Needed to" ///
   "Approve a Constitutional Amendment", bexpand position(3) ///
   orientation(rvertical) box fcolor(bluishgray) size(vsmall)) yscale(off) ///
   ylabel(, grid glcolor(bluishgray)) ytitle("") xsize(7) ysize(8) text(80 92 ///
   "Absolute Majority" 67 80 "3/5s Majority" 44 80 "2/3s Majority" 17 80 ///
   "3/4s Majority", size(small)) saving(fig1137, replace)
graph dot hoselectProp, over(hoselect, label(nolabel)) marker(1, ///
   msize(large)) yscale(range(0 100)) title("Selection of" "Head of State", ///
   bexpand position(3) orientation(rvertical) box fcolor(bluishgray) ///
   size(vsmall)) yscale(off) ylabel(, grid glcolor(bluishgray)) ytitle("") ///
   xsize(7) ysize(8) text(80 90 "Heredity/Royal Selection" 55 80 ///
   "Elected by Citizens" 22 80 "Elected by Elite Group", size(small)) ///
   saving(fig1138, replace)
graph dot housenumProp, over(housenum, label(nolabel)) marker(1, msize(large)) ///
   yscale(range(0 100)) title("Number of Chamber" "or House in Legislature", ///
   bexpand position(3) orientation(rvertical) box fcolor(bluishgray) ///
   size(vsmall)) ylabel(, grid glcolor(bluishgray)) ///
   ytitle("Percent of Constitutions") xsize(7) ysize(8) text(80 82 ///
   "One Chamber" 37 80 "Two Chambers", size(small)) saving(fig1139, replace)
graph dot judindProp, over(judind, label(nolabel)) marker(1, msize(large)) ///
   yscale(range(0 100)) title("Constitution Contains an Explicit Declaration" ///
   "Regarding Judicial independence", bexpand position(3) ///
   orientation(rvertical) box fcolor(bluishgray) size(vsmall)) ///
   yscale(off) ylabel(, grid glcolor(bluishgray)) ytitle("") xsize(7) ///
   ysize(8) text(80 84 "Yes" 30 80 "No", size(small)) saving(fig11310, replace)
graph dot fedunitProp, over(fedunit, label(nolabel)) marker(1, msize(large)) ///
   yscale(range(0 100)) title("Description" "of the State", bexpand ///
   position(3) orientation(rvertical) box fcolor(bluishgray) size(vsmall)) ///
   yscale(off) ylabel(, grid glcolor(bluishgray)) ytitle("") xsize(7) ///
   ysize(8) text(80 88 "Federal" 55 80 "Confederal" 22 80 "Unitary", ///
   size(small)) saving(fig11311, replace)
graph dot langProp, over(lang, label(nolabel)) marker(1, msize(large)) ///
   yscale(range(0 100)) title("Official or" "National Language", bexpand ///
   position(3) orientation(rvertical) box fcolor(bluishgray) size(vsmall)) ///
   ylabel(, grid glcolor(bluishgray)) ytitle("Percent of Constitutions") ///
   xsize(7) ysize(8) text(80 95 "Official Only" 75 80 "National Only" 55 80 ///
   "Both Official and National" 37 80 "No Except for Govt. Business" 15 80 ///
   "No Languages Mentioned", size(vsmall)) saving(fig11312, replace)


** combine plots
graph combine fig1137.gph fig11310.gph fig1138.gph fig11311.gph ///
   fig1139.gph fig11312.gph, imargin(tiny) row(3) col(2) xcommon ///
   title(Dot Plots from the CCP Database, size(medium)) 

** Figure 11.4 The table on top is the same as Table 11.3, which 
**   shows data on a racial profiling. The mosaic plot on the bottom 
**   presents the same data in a more concise and appealing fashion. 
**   The width of the bars depicts the number of searches per year 
**   while the height of each tile conveys the relative number of 
**   searches that are conducted on drivers of each race during each year.
**   With this plot, it is much easier to see, for example, the large 
**   percentage of searches in 1995 and 1996 that were of black drivers 
**   and how that percentage declines beginning in 1997
** Section 11.1, p. 274

** read in data and install package
use gross.dta, clear
ssc install spineplot

** make spineplot (mosaic plot)
spineplot race year, bar1(color(gray)) bar2(color(gray)) ///
bar3(color(gray)) bar4(color(gray)) bar5(color(gray)) bar6(color(gray)) ///
   xtitle("Year", axis(2)) xscale(off) ytitle("Race of Person Searched", ///
   axis(2)) ylabel(0.90 "White" 0.45 "Black" 0.03 "Hispanic",noticks axis(2)) ///
   plotregion(margin(zero)) legend(off) ///
   title("Fig. 11.4 Data on a Racial Profiling", size(medium))

** Figure 11.5 The top panel reproduces the raw data table in Table 11.4. 
**   The bottom panel is a bivariate scatterplot of the full sample. 
**   The solid line is a smooth loess curve that summarizes the relationship 
**   between the payments and hours. We've also noted several outlier 
**   economies--those where the number of payments does not provide an 
**   especially good prediction of the number of hours
** Section 11.1, p. 276

** read in data
use taxSampleWorldBank.dta, clear

** make scatterplot
lowess timehoursperyear paymentsnumberperyear, bwidth(0.6) ///
   mcolor(gray) msymbol(circle) lineopts(lcolor(black) lwidth(vthick) ///
   lpattern(solid)) ylabel(0(250)770, grid glcolor(bluishgray)) ///
   xscale(range(5 70)) xlabel(20(20)60, grid glcolor(bluishgray)) ///
   note("") title("Loess Smoother:" "Relationship between Payments and Hours", ///
   size(medium)) text(513 28 "Ukraine" 852 32 "Vietnam" 795 65 "Venezuela" ///
   675 44 "Cameroon", size(small))

** Figure 11.6 Life expectancy in years for three countries. From the raw 
**   data, partially reproduced above from Table 11.5, it is difficult to 
**   decipher trends across time. Below the table, we provide a time series 
**   plot of the same data. Data points for each year are represented by a 
**   hollow circle. The time series plot draws attention to the upswing 
**   in life expectancy in all three countries, as well as the distance 
**   between China versus Germany and the UK
** Section 11.1, p. 277

** read in data
use lifeExpect.dta, clear

** make figure
twoway (connected china year, mcolor(black) msize(medlarge) ///
   msymbol(circle_hollow) lcolor(black) lwidth(thin) cmissing(n)) ///
   (connected germany year, mcolor(black) msize(medlarge) ///
   msymbol(circle_hollow) lcolor(black) lwidth(thick)) ///
   (connected uk year, mcolor(black) msize(medlarge) msymbol(circle_hollow) ///
   lcolor(black) lwidth(thin)), ytitle(Life Expectancy (years)) ///
   yscale(range(70 80)) ylabel(72(3)79, angle(horizontal)) xtitle(Year) ///
   xscale(range(1990 2011)) xlabel(1995(5)2010, grid glcolor(bluishgray)) ///
   text(70.5 1992 "China" 75 1992 "Germany" 77.3 1992 "United Kingdom") ///
   legend(off)

**Table 11.6 Logistic regression of the analysis of the votes of 44 
**   individual U.S. Supreme Court justices on whether to uphold or 
**   invalidate a federal law, 1937-2009. Tables of this sort are common 
**   in empirical legal research but have their share of problems. One is 
**   that the variable names are not clear. It isn't obvious, for example,
**   that "Lowct" means the "ideological direction of the lower court's 
**   decision." We offer some correctives in Table 11.7. Another problem 
**   is that most readers won't be able to interpret the coefficients; not 
**   even the authors can, in their heads, translate them into interesting 
**   quantities. To remedy this pervasive problem, we provide 
**   suggestions on how to move from meaningless to meaningful 
**   communication. See Figure 11.8
** Section 11.2, p. 280

** read in data
use judicialRestraint.dta, clear
drop if missing(lct_dir) 
keep if law_ideology==0

** run logistic regression for conservative law
logit vote_jud_rev MQ_mean lct_dir cert_jur civ_lib N_cases term

** install pre package and compute proportional reduction in error 
**   for conservative law
ssc install pre
pre logit vote_jud_rev MQ_mean lct_dir cert_jur civ_lib N_cases term

** run logistic regression for liberal law
use judicialRestraint.dta, clear
drop if missing(lct_dir) 
keep if law_ideology==1
logit vote_jud_rev MQ_mean lct_dir cert_jur civ_lib N_cases term

*compute proportional reduction in error for liberal law
pre logit vote_jud_rev MQ_mean lct_dir cert_jur civ_lib N_cases term

** Figure 11.7 Nomograms depicting the results from Table 11.7's logistic
**   regression analysis of whether individual justices vote to strike or
**   uphold federal laws. Nomograms allow the reader to discern 
**   visually the estimated coefficients and the uncertainty around 
**   the estimates and to quickly spot whether the effects are statistically 
**   significant by looking at whether the confidence intervals cross zero
** Section 11.2, p. 284

*install necessary packages
ssc install parmest
ssc install egenmore
net from http://gking.harvard.edu/clarify
net install clarify 

** make top panel figure for conservative law
** read in data
use judicialRestraint.dta, clear
drop if missing(lct_dir) 
keep if law_ideology==0

** run logistic regression
logit vote_jud_rev MQ_mean lct_dir cert_jur civ_lib N_cases term
parmest, norestore
drop in 7
egen axis= axis(estimate)

** generate figure
twoway scatter axis estimate, xline(0, lcolor(black)) ///
   ylab(, angle(horizontal)) ||  rcap min95 max95 axis, horizontal ///
   xscale(range(-0.7 1.2)) xlabel(-0.5(0.5)1, grid glcolor(bluishgray)) ///
   ylabel(1 "Justice's Ideology" 3 "Lower Court's Decision" 2 ///
   "Discretionary Review or Not" 6 "Civil Liberties Case or Not" ///
   4 "N of Cases Decided During Term" 5 "Term of Court", labels) ///
   xtitle(Logistic Regression Coefficient) ytitle("") title(Conservative ///
   Law, size(medium)) legend(off) saving(fig1171, replace)

** make bottom panel for liberal law
use judicialRestraint.dta, clear
drop if missing(lct_dir) 
keep if law_ideology==1
logit vote_jud_rev MQ_mean lct_dir cert_jur civ_lib N_cases term
parmest, norestore
drop in 7
egen axis= axis(estimate)
twoway scatter axis estimate, xline(0, lcolor(black)) ///
   ylab(, angle(horizontal)) ||  rcap min95 max95 axis, ///
   horizontal xscale(range(-0.7 1.2)) xlabel(-0.5(0.5)1, ///
   grid glcolor(bluishgray)) ylabel(6 "Justice's Ideology" 5 ///
   "Lower Court's Decision" 1 "Discretionary Review or Not" 4 ///
   "Civil Liberties Case or Not" 2 "N of Cases Decided During Term" 3 ///
   "Term of Court", labels)  xtitle(Logistic Regression Coefficient) ///
   ytitle("") title(Liberal Law, size(medium)) legend(off) ///
   saving(fig1172, replace)

** combine into one figure
graph combine fig1171.gph fig1172.gph, col(1) ///
   title("Fig. 11.7 Nomogram of Logistic Regression Anlysis of" ///
   "whether Individual Justices Vote to Strike or Uphold Federal Laws", ///
   size(small))

** Figure 11.9 Reproduction of the final panel in Figure 11.8. Here, we 
**   use Epstein and Landes's models (in Table 11.7) to show probability 
**   of a justice voting to invalidate a conservative law (panel 1) and 
**   a liberal law (panel 2) across a range of Justice's Ideology. 
**   In both panels we depict our uncertainty, in the form of 95% 
**   confidence intervals, with gray shading. To avoid cluttering in 
**   the third panel, we eliminate the confidence intervals and simply 
**   show the two sets of probabilites
** Section 11.2, p. 289

** read in data
use judicialRestraint.dta, clear
drop if missing(lct_dir) 
keep if law_ideology==0

** run model and set parameters
estsimp logit vote_jud_rev MQ_mean lct_dir cert_jur civ_lib N_cases term
setx (lct_dir cert_jur civ_lib) 1 (N_cases term) mean
set more off
gen ctop = .
gen cbottom = .
gen ruler = _n*0.0025-4.1075 in 1/3143
local mq = -4.11
while `mq' <=3.83 {
display "Simulating for MQ_mean = `mq'"
setx MQ_mean `mq'
simqi, prval(1) genpr(probs)
_pctile probs, p(2.5, 97.5)
replace cbottom = r(r1) if ruler==float(`mq')
replace ctop = r(r2) if ruler==float(`mq')
drop probs
local mq = `mq'+0.0025
}
gen mq = _b[MQ_mean]
gen lct = _b[lct_dir]
gen cert = _b[cert_jur]
gen cl = _b[civ_lib]
gen cases = _b[N_cases]
gen Term = _b[term]
gen cons = _b[_cons]
gen vec1 = 1
gen vecCases = 119.2724
gen vecTerm = 1975.503
gen logitPred = invlogit(mq*ruler+lct*vec1+cert*vec1+cl*vec1+cases*vecCases+Term*vecTerm+cons)

** make figure for conservative laws
twoway (rspike ctop cbottom ruler, lcolor(gs13)) ///
   (line logitPred ruler, lcolor(black)), ytitle(Probability ///
   Justice Votes to Invalidate the Law) yscale(range(0 0.8)) ///
   ylabel(0.0(0.25)0.75, angle(horizontal) grid glcolor(bluishgray)) ///
   xtitle(Justice's Ideology) xlabel(, grid glcolor(bluishgray)) ///
   title(Conservative Laws) legend(off) saving(fig1191, replace) 

** make second panel for liberal laws
use judicialRestraint.dta, clear
drop if missing(lct_dir) 
keep if law_ideology==1

** run model 
estsimp logit vote_jud_rev MQ_mean lct_dir cert_jur civ_lib N_cases term
setx (lct_dir cert_jur civ_lib) 1 (N_cases term) mean
set more off
gen ctop = .
gen cbottom = .
gen ruler = _n*0.0034-4.1066 in 1/2334
local mq = -4.11
while `mq' <=3.83 {
display "Simulating for MQ_mean = `mq'"
setx MQ_mean `mq'
simqi, prval(1) genpr(probs)
_pctile probs, p(2.5, 97.5)
replace cbottom = r(r1) if ruler==float(`mq')
replace ctop = r(r2) if ruler==float(`mq')
drop probs
local mq = `mq'+0.0034
}
gen mq = _b[MQ_mean]
gen lct = _b[lct_dir]
gen cert = _b[cert_jur]
gen cl = _b[civ_lib]
gen cases = _b[N_cases]
gen Term = _b[term]
gen cons = _b[_cons]
gen vec1 = 1
gen vecCases = 119.2724
gen vecTerm = 1975.503
gen logitPred = invlogit(mq*ruler+lct*vec1+cert*vec1+cl*vec1+cases*vecCases+Term*vecTerm+cons)

** make the figure
twoway (rspike ctop cbottom ruler, lcolor(gs13)) (line ///
   logitPred ruler, lcolor(black)), ytitle(Probability Justice ///
   Votes to Invalidate the Law) yscale(range(0 0.8)) ///
   ylabel(0.0(0.25)0.75, angle(horizontal) grid glcolor(bluishgray)) ///
   xtitle(Justice's Ideology) xlabel(, grid glcolor(bluishgray)) ///
   title(Liberal Laws) legend(off) saving(fig1192, replace)

** make the third figure, combined
use judicialRestraint.dta, clear
drop if missing(lct_dir) 

** conservative law
gen vote_jud_rev_cons=vote_jud_rev if law_ideology==0
gen MQ_mean_cons=MQ_mean if law_ideology==0
gen lct_dir_cons=lct_dir if law_ideology==0
gen cert_jur_cons=cert_jur if law_ideology==0
gen civ_lib_cons=civ_lib if law_ideology==0
gen N_cases_cons=N_cases if law_ideology==0
gen term_cons=term if law_ideology==0
logit vote_jud_rev_cons MQ_mean_cons lct_dir_cons cert_jur_cons civ_lib_cons N_cases_cons term_cons
gen mq_cons = _b[MQ_mean_cons]
gen lct_cons = _b[lct_dir_cons]
gen cert_cons = _b[cert_jur_cons]
gen cl_cons = _b[civ_lib_cons]
gen cases_cons = _b[N_cases_cons]
gen Term_cons = _b[term_cons]
gen int_cons = _b[_cons]
gen ruler = _n*0.00145-4.10855 in 1/5477
gen vec1 = 1
gen vecCases = 119.2724
gen vecTerm = 1975.503
gen logitPred_cons = invlogit(mq_cons*ruler+lct_cons*vec1+cert_cons*vec1+cl_cons*vec1+cases_cons*vecCases+Term_cons*vecTerm+int_cons)

** liberal law
gen vote_jud_rev_lib=vote_jud_rev if law_ideology==1
gen MQ_mean_lib=MQ_mean if law_ideology==1
gen lct_dir_lib=lct_dir if law_ideology==1
gen cert_jur_lib=cert_jur if law_ideology==1
gen civ_lib_lib=civ_lib if law_ideology==1
gen N_cases_lib=N_cases if law_ideology==1
gen term_lib=term if law_ideology==1
logit vote_jud_rev_lib MQ_mean_lib lct_dir_lib cert_jur_lib civ_lib_lib N_cases_lib term_lib
gen mq_lib = _b[MQ_mean_lib]
gen lct_lib = _b[lct_dir_lib]
gen cert_lib = _b[cert_jur_lib]
gen cl_lib = _b[civ_lib_lib]
gen cases_lib = _b[N_cases_lib]
gen Term_lib = _b[term_lib]
gen int_lib = _b[_cons]
gen logitPred_lib = invlogit(mq_lib*ruler+lct_lib*vec1+cert_lib*vec1+cl_lib*vec1+cases_lib*vecCases+Term_lib*vecTerm+int_lib)
twoway (line logitPred_cons ruler, lpattern(longdash) lcolor(black)) ///
   (line logitPred_lib ruler, lcolor(black) lwidth(thick)), ///
   ytitle(Probability Justice Votes to Invalidate the Law) ///
   yscale(range(0.0 0.8)) ylabel(0.0(0.25)0.75, angle(horizontal) ///
   grid glcolor(bluishgray)) xtitle(Justice's Ideology) xlabel(, grid ///
   glcolor(bluishgray)) title(All Laws) legend(off) saving(fig1193, replace)

** combine the three panels
graph combine fig1191.gph fig1192.gph fig1193.gph, row(1) fxsize(150) ///
   fysize(80) title("Best Communication:" ///
   "Graph the Quantity of Interest Plus Uncertainty Across Values", ///
   size(medium))

** file cleanup
erase fig1111.gph
erase fig1112.gph
erase fig1121.gph
erase fig1122.gph
erase fig11310.gph
erase fig11311.gph
erase fig11312.gph
erase fig1137.gph
erase fig1138.gph
erase fig1139.gph
erase fig1171.gph
erase fig1172.gph
erase fig1191.gph
erase fig1192.gph
erase fig1193.gph
