clear all
capture log close
log using lecture5, replace text
//commands corresponding to lecture 5 of the Summer Epi Stata class
set more off
//histograms
use transplants, clear
//basic histogram (density histogram)
hist bmi
//specify that each bar of the histogram covers an interval of two units
hist bmi, width(2)
//specify age interval of two years, and start the graph at age 0
hist bmi, width(2) start(0)
//specify that we want ten bars
hist bmi, bin(10)
//specify that we want a *fraction* histogram (height of 0.1=10% of all values)
hist rec_wgt_kg, fraction width(2)
hist rec_wgt_kg, fraction width(10)
//a *percent* histogram (height of 10 = 10% of all values)
hist rec_wgt_kg, percent width(2)
hist rec_wgt_kg, percent width(10)
//a *frequency* histogram (height = # of records represented by one bar)
hist rec_wgt_kg, freq width(2)
hist rec_wgt_kg, freq width(10)
//illustrating hist, discrete discrete
hist dx //each bar is < 1 diagnosis - histogram looks weird
hist dx, discrete //1 bar per diagnosis
//scatter plots
use donors, clear
graph twoway scatter don_wgt don_hgt
//abbreviation
twoway scatter don_wgt don_hgt
tw sc don_wgt don_hgt
//line plots
use transplants, clear
bys age: egen mean_ecd = mean(don_ecd)
egen age_tag = tag(age)
//just need one observation for each age
graph twoway line mean_ecd age if age_tag==1
//The following code makes the dataset tx_yr.dta
//remove the comment braces to run the code and create tx_yr.dta
use transplants, clear
gen int yr = year(transplant_date)
gen byte n=1
rename gender female
rename don_ecd ecd
gen over70 = (age>70)
gen unknown_disease = dx==9
gen hypertensive = dx==4
gen diabetes = dx==2
gen not_working = cond(rec_work == 0,1,0)
collapse (sum) not_working n hypertensive unknown_disease diabetes ecd female rec_hcv_antibody over70 , by(yr)
gen int male=n-female
gen int scd = n-ecd
gen total= n
lab var total "Total Num. of Recipients"
lab var not_working "Num. Unemployed Recipients"
lab var yr "Transplant Year"
lab var rec_hcv "Number HCV+ Recipients"
lab var over70 "Number Recips. Over 70 Years Old"
save tx_yr, replace
use tx_yr.dta, clear
desc
//show various kinds of graph in their simplest form
graph twoway line n yr //line graph
graph twoway connected n yr //connected graph
graph twoway area n yr //area graph
graph twoway bar n yr //bar graph
graph twoway scatter ecd scd //scatter plot
graph twoway function y=x^2+2 //function
//range() option for function
graph twoway function y=x^2+2, range (1 10) //function
graph twoway function y=x^2+2, range(yr)
//graphing more than one Y variable
graph twoway line ecd scd yr
graph twoway line n ecd scd yr
graph twoway area ecd scd yr //ecd area is hidden by scd area
graph twoway area scd ecd yr //now ecd area shows, since it's drawn second
graph twoway bar scd ecd yr
//overlaying several plots
graph twoway line n yr || connected male female yr
//another way of writing the same thing, using /// to continue the same
//command on two lines
graph twoway line n yr || ///
connected male female yr
//overlay observed data with linear regression fit
regress n yr
graph twoway line n yr ///
|| function y=_b[_cons]+_b[yr]*x, range(yr)
//illustrate that you can combine lots of plots in one graph
graph twoway line female yr ///
|| line male yr ///
|| line scd yr ///
|| line ecd yr ///
|| line n yr
//of course, you could make the same graph with
//graph twoway line female male scd ecd n yr
//xscale/yscale
graph twoway line n yr, yscale(range(0)) //range of Y axis includes zero
graph twoway line n yr, yscale(range(0 400)) //Y axis range includes 0 and 400
//specify ranges for both X and Y axes
graph twoway line n yr, xscale(range(2018))
graph twoway line female yr, yscale(log) //write Y axis on a log scale
graph twoway line female yr, xscale(reverse) //reverse the X axis - mirror image
graph twoway line female yr, xscale(off) yscale(off) //suppress axes entirely
//combining several scale options
//this graph doesn't look great. For the variables that we're using for this
//exercise, the default Stata axes look pretty good. But this illustrates
//that you can combine as many scale options as you want.
graph twoway line ecd yr, xscale(off) yscale(log range(1) reverse)
//graph label options
//Pick "approximately four" nice values based on axis range
graph twoway line n yr, yscale(range(0)) ylabel(#4)
//label minimum and maximum values
graph twoway line n yr, yscale(range(0)) ylabel(minmax)
//start at 0, and go in increments of 10 up to 110
graph twoway line n yr, yscale(range(0)) ylabel(0(50)250)
//add "ticks" (small vertical line) every unlabeled year in X axis
graph twoway line n yr, yscale(range(0)) ylabel(0(50)250) xtick(2006(1)2015)
//axis titles
//DDKT = deceased donor kidney transplant
graph twoway line n yr, xtitle("Calendar year") ytitle("DDKT") ylabel(0(50)250)
//graph titles
graph twoway line n yr, title("Transplants per year") ylabel(0(50)250)
//title and subtitle
graph twoway line n yr, title("Transplants per year") ylabel(0(50)250) ///
subtitle("2006-2015")
//note and caption options
graph twoway line n yr, title("title") subtitle("subtitle") ///
note("note") caption("caption")
//legend options
//put the legend inside the graph
graph twoway line male female yr, legend(ring(0))
//put the legend inside the graph, in upper-left corner ("eleven o'clock")
graph twoway line male female yr, legend(ring(0) pos(11))
//put the legend inside the graph, in lower-right corner ("five o'clock")
graph twoway line male female yr, legend(ring(0) pos(5))
//put the legend at 5:00 - change Y range so it fits
graph twoway line male female yr, legend(ring(0) pos(5)) yscale(range(0))
//put the different legend "keys" in one column
graph twoway line male female yr, legend(ring(0) pos(5) cols(1))
//change the order of the "keys"
//in this case, you just could do "twoway line female male yr"
//but order() is really handy when you have complicated graphs containing
//multiple types of plot
graph twoway line male female yr, legend(ring(0) pos(5) order(2 1))
graph twoway line male female yr, legend(ring(0) pos(5) cols(1) order(2 1))
//print the legend for males but not females
// ("order") suppresses legend for anything that's not included)
graph twoway line male female yr, legend(ring(0) pos(5) cols(1) order(1))
//legend labels
graph twoway line male female yr, ///
legend(ring(0) pos(5) cols(1) label(1 "Men") label(2 "Women"))
//force legend to print (when it normally wouldn't)
graph twoway line n yr, legend(on)
//force legend *NOT* to print (when it normally would)
graph twoway line male female yr, legend(off)
//line options
twoway line n yr, xline(2007)
twoway line n yr, yline(200)
//text option
//add the text "policy change" at y=300 x=2007 on the graph
twoway line n yr, xline(2007) text(200 2007 "Policy change")
twoway line n yr, ylabel(0(50)250) text(225 2014 "Local peak in 2014")
//twoway line options
//sort
//first, we have to *UN*sort
sort n
list, clean noobs
//display unsorted graph
twoway line n yr, ylabel(0(100)400)
//now display the graph with the line drawn properly
twoway line n yr, ylabel(0(100)400) sort
//options for drawing the line: line color
twoway line scd ecd yr, lcolor(green yellow) ylabel(0(50)200) sort
//line thickness
twoway line scd ecd yr, lwidth(thick thick) sort
//valid thicknesses are:
//vvthin vthin thin medthin medium medthick thick vthick vvthick vvvthick
//line pattern
twoway line scd ecd yr, lpattern(solid dash) sort
//valid patterns are:
//solid dash dot dash_dot shortdash shortdash_dot longdash longdash_dot blank
//scatter plot options
use transplants.dta, clear
keep if peak_pra <= 10
graph twoway scatter peak_pra age
//add jitter (random noise)
graph twoway scatter peak_pra age, jitter(2)
graph twoway scatter bmi age if gender==0, mcolor(orange) ///
|| scatter bmi age if gender==1, mcolor(black)
//marker symbol
graph twoway scatter bmi age if gender==0, msymbol(D) ///
|| scatter bmi age if gender==1, msymbol(+)
//marker size
graph twoway scatter bmi age if gender==0, msize(small) ///
|| scatter bmi age if gender==1, msize(large)
//valid sizes:
// tiny vsmall small medsmall medium medlarge large vlarge huge
//graph bar
use transplants.dta,clear
graph bar (mean) bmi ,over(race)
graph bar (mean) bmi ,over(race, label(angle(45)))
graph bar (mean) bmi age ,over(race, label(angle(45)))
graph bar (median) bmi age ,over(race, label(angle(45)))
//graph box
graph box bmi age,over(rec_education,label(angle(45)))
graph box bmi,over(prev_ki) over(rec_hcv)
//create label so that it shows up on graph
label define prev_ki_label 0 "First time KT" 1 "Previous KT"
label values prev_ki prev_ki_label
graph box age,over(prev_ki) over(rec_hcv) b1title(Recipient HCV and Previous KT)
//saving a graph as a Stata .gph file
graph save bmi_plot.gph, replace
//exporting a graph to .PNG (for putting in a document)
graph export bmi_plot.png, replace
graph export bmi_plot.png, replace width(2400) //saves with more pixels
log close