capture log close
log using lecture3.log, replace text
//This log file contains most of the examples used in Lecture 3 of
//Stata Programming and Data Management, along with additional explanations
//and examples.
version 12 //I'm using Stata 15, but some students may have earlier versions
clear all //clear all data from memory
macro drop _all //clear all macros in memory
set more off //give output all at once (not one screenful at a time)
set linesize 80 //maximum allowed width for output
local p = 1/78
disp %3.2f `p'
local p = 1/7778
disp %3.2f `p'
use transplants, clear
//conditional if
if _N > 100 {
disp "Meh. That's a big dataset"
}
if _N > 10000 {
disp "That's a huge dataset"
}
else {
disp "Bah! Not so big."
}
tab dx gender, chi2
if r(p) < 0.05 {
disp "p<0.05"
}
else {
disp %3.2f r(p)
}
count if !inlist(rec_hcv_antibody, 0, 1, .)
if r(N) == 0 {
logistic rec_hcv_antibody age
}
else {
disp "Non-binary outcome. " ///
"can't do logistic regression"
}
//note the line continuation ( ///) in the previous else-statement.
//line continuations can be used to improve readibility of a do-file, but
//they should not change functionality.
//stata interprets " ///" as if the next line is on the same line
//for loops.
forvalues i = 1/5 { //first time: create a macro i, set it equal to 1
disp `i' //display macro i
} //go back to the "forvalues" statement.
//the second time, macro i will equal 2 and it will run again.
//then it will go back and set i equal to 3 and run again
//and then run a fourth time with i equal to 4
//finally, it will run one last time with i equal to 5
//then it will stop.
//all the output from a "forvalues" loop is displayed together, because
//Stata doesn't actually start running code in the loop until you type the
//closing brace }. So if you have a bunch of "disp" statements and you want
//all the display to appear together (instead of separately, after each
//"disp" command in your code) you can put it all inside a loop that
//only runs once.
//another example of "forvalues"
forvalues b = 35/45 {
quietly count if age>=`b' & age<`b'+1
disp "Age of `b': " r(N) " patients"
}
foreach v of varlist age wait_yrs died {
if inlist("`v'", "age", "wait_yrs") {
quietly sum `v'
disp "Mean `v': " _col(20) %3.2f r(mean)
}
else {
quietly sum `v'
disp "Percent `v': " _col(20) %3.2f 100 * r(mean) "%"
}
}
//foreach works like forvalues. Except that forvalues sets the macro
//equal to a number which increases each time, and foreach sets the macro
//equal to something from a list.
//there are actually several varieties of foreach, but a useful one is
//foreach... of varlist...
//first time through the loop, macro v will equal "age". Second time it will
//equal "wait_yrs". Third time it will equal "died".
foreach v of varlist age wait_yrs died {
quietly sum `v' //summarize whatever variable is referred to by "v"
disp "Max `v': " _col(10) r(max) //display maximum value
}
//example of foreach.. of varlist with if-statement
foreach v of varlist age wait_yrs died {
if inlist("`v'", "age", "wait_yrs") {
quietly sum `v'
disp "Mean `v': " _col(20) %3.2f r(mean)
}
else {
quietly sum `v'
disp "Percent `v': " _col(20) %3.2f 100 * r(mean) "%"
}
}
//table1 program from lecture 2
capture program drop table1
program define table1
disp "Variable" _col(20) "mean (SD)" _col(40) "range"
quietly sum age
disp "age" _col(20) %3.2f r(mean) " (" %3.2f r(sd) ")" ///
_col(40) %3.2f r(min) "-" %3.2f r(max)
quietly sum wait_yrs
disp "wait_yrs" _col(20) %3.2f r(mean) " (" %3.2f r(sd) ")" ///
_col(40) %3.2f r(min) "-" %3.2f r(max)
quietly sum bmi
disp "bmi" _col(20) %3.2f r(mean) " (" %3.2f r(sd) ")" ///
_col(40) %3.2f r(min) "-" %3.2f r(max)
end
table1
table1 age peak_pra
//add "syntax varlist" to control the list of variables
capture program drop table1_v
program define table1_v
syntax varlist
//write out a nicely formatted table
//NOTE: could also write out with commas instead of spacing
//which would make it easier to import into Excel
disp "Variable" _col(15) "mean(SD)" _col(30) "range"
foreach v of varlist `varlist' {
quietly sum `v'
disp "`v'" ///
_col(15) %3.2f r(mean) "(" %3.2f r(sd) ")" ///
_col(30) %3.2f r(min) "-" %3.2f r(max)
}
end
table1_v age wait_yrs rec_wgt_kg
//demonstrate behavior of local macros inside programs
program define varprogram
syntax varlist
disp "`varlist'" //displays the varlist macro
end
//syntax varname: exactly one variable
program define varfavorite
syntax varname
disp "My favorite variable is `varlist'" //displays the varlist macro
end
varfavorite fake_id
varprogram fake_id
disp "`varlist'" //displays nothing. Stata has forgotten the varlist macro
program define macroprog
local a=4 //define local macro a
disp "`a' `b'" //Stata knows macro a, but macro b is empty
end
local b=3 //define local macro b
macroprog
disp "`a' `b'" //Stata knows macro b, but macro a is empty
program define varprogram2
syntax [varlist] //now, "varlist" is optional
disp "`varlist'"
end
varprogram2 fake_id
varprogram2
program define myif
syntax [if] //accepts an optional "if" statement
if "`if'" != "" { //if there's something in the "if" statement...
disp "`if'" //display it
}
else {
disp "No if statement" //otherwise, display this message
}
end
myif //displays "no if statement"
myif if age > 35 //displays "if age > 35"
capture program drop countif
program define countif
syntax if
quietly count `if'
disp "total count is " r(N)
end
capture program drop table1_if
program define table1_if
syntax varlist [if]
//write out a nicely formatted table
disp "Variable" _col(12) "mean(SD)" _col(25) "range"
foreach v of varlist `varlist' {
quietly sum `v' `if'
disp "`v'" _col(12) %3.2f r(mean) "(" %3.2f r(sd) ")" _col(25) ///
%3.2f r(min) "-" %3.2f r(max)
}
end
table1_if age peak_pra wait_yrs
table1_if age peak_pra wait_yrs if race==1 //summarize for only race==1
//non-working program - as an example
capture program drop tabyear0
program define tabyear0
syntax varname
gen year = year(`varlist')
tab year
end
//here's how you *should* do it (using a temporary variable)
capture program drop tabyear
program define tabyear
syntax varname
tempvar year //create a temporary variable called year
//use backquote-apostrophe to refer to the temporary variable (like a macro)
//NOTE: even if, by some chance, there's already a variable called "year",
//this code will still work
gen `year' = year(`varlist')
tab `year'
//the temporary variable disappears automatically when the program is done
end
tabyear transplant_date
tabyear end_date
//an example of what *not* to do when you want to temporarily alter the dataset
capture program drop savesample0
program define savesample0
//save random 10% of records
sample 10 //delete 90% of records!
save sample_data, replace
end
//here's how you should do it
capture program drop savesample
program define savesample
//save random 10% of records
preserve //save a temporary copy of the data
sample 10 //no worries
save sample_data , replace
//when the program reaches the end, the temporary copy is reloaded,
//undoing any changes
end
count
savesample
count
//combining several of the above techniques into a "table1" program
capture program drop table1_nice
program define table1_nice
syntax varlist [if]
//if: specify records to include in analysis (optional)
//replace: overwrite log file if it exists (optional)
preserve
capture quietly keep `if' //temporarily drop any records not required
disp "Variable" _col(12) "mean(SD)" _col(25) "range"
foreach v of varlist `varlist' {
quietly sum `v' `if'
disp "`v'" _col(12) %3.1f r(mean) "(" %3.1f r(sd) ")" _col(25) ///
%3.1f r(min) "-" %3.1f r(max)
}
end
//run the new program using options and if
table1_nice age bmi wait_yr if age>40
//extended macro functions
local ll: variable label race
disp "`ll'" //display "Patient race"
local ll: variable label gender
disp "`ll'" //nothing
sum wait_yrs
local m1: disp %3.2f r(mean)
disp "`m1'"
sum wait_yrs
local m2: di "Mean: " %3.2f r(mean)
display "`m2'"
//putexcel
version 13 //need Stata 13 or higher to use putexcel
use transplants, clear
//begin with putexcel set to specify the filename
putexcel set lecture3.xlsx
putexcel set lecture3,replace //completely overwrites excel file
putexcel set lecture3,modify //allows you to keep excel formatting
putexcel A1=("Variable") B1=("Median (IQR)")
//age bmi peak_pra wait_yrs
sum age, detail
//extended macro for putexcel
local med_iqr: disp r(p50) " (" r(p25) "-" r(p75) ")"
putexcel A2=("Age at transplant") B2=("`med_iqr'") //can putexcel multiple cells
//four variables
label var age "Age at transplant"
label var bmi "BMI"
label var peak_pra "Peak PRA"
label var wait_yrs "Years on waitlist"
local row=2
foreach v of varlist age bmi peak_pra wait_yrs {
sum `v', detail
local varlabel: variable label `v'
local med_iqr: disp %3.1f r(p50) " (" %3.1f r(p25) "-" %3.1f r(p75) ")"
putexcel A`row'=("`varlabel'") B`row'=("`med_iqr'")
local row = `row' + 1
}
log close
exit