/* 

Do-file to produce results for the keynote talk
"Understanding Variability in Multilevel Models
for Categorical Responses"

by Sophia Rabe-Hesketh

(joint work with Anders Skrondal)

At the American Educational Research Association
Annual Meeting 2012, HLM SIG

A paper on this work is in preparation.

Run the do-file sequentially for each dataset
(later analyses may rely on variables created earlier)

*/

**************** PISA data *********************************************************

*** Slide 4: Distribution of SES

* read the data
use pisaUSA2000, clear

* generate school mean SES
egen mn_ses = mean(isei), by(id_school)

* generate deviation of student SES from school mean
generate dv_ses = isei - mn_ses

egen pick = tag(id_school)  /* takes the value 1 for one obs. per school */

summarize mn_ses if pick==1   

* tabulate percentiles
tabstat mn_ses if pick==1, stat(p1 p25 p50 p75 p99)
tabstat dv_ses , stat(p1 p25 p50 p75 p99)

* create variables for graph
egen p25_isei = pctile(isei), p(25) by(id_school)
egen p75_isei = pctile(isei), p(75) by(id_school)

egen min_isei = min(isei), by(id_school)
egen max_isei = max(isei), by(id_school)
summ min_isei max_isei if pick==1

twoway ///
  (rspike min_isei max_isei mn_ses, lpatt(dash) lcol(gs10)) ///
   (rcap p25_isei p75_isei mn_ses, lcol(navy)) ///
   (function y=x-11, range(mn_ses) lcol(black)) ///
   (function y=x+11, range(mn_ses) lcol(black)) ///
   (scatteri 45 45 28 39 62 51, msym(X) mcol(red) msize(large))  ///
   (scatteri 15 20 "mn-11" 37 20 "mn+11", msym(i) mlabcol(black)), ///
   xline(39, lcol(black)) xline(51, lcol(black)) ///
   xlabel(21(6)69) legend(order(1 "Min to Max" 2 "P25 to P75")) ///
   xtitle(School mean SES (mn)) ytitle(Student SES (mn+dv))

   
*** Slide 6: Maximum likelihood estimates of model parameters

* declare cluster identifier for xtlogit
xtset id_school

* null model
xtlogit pass_read, intpoints(20)
estimates store xt0  /* stores estimates for null model under the label xt0 */

* full model
xtlogit pass_read dv_ses mn_ses, intpoints(20)
estimates store xt   /* stores estimates for full model under the label xt0 */


*** Slide 7: Predicted conditional probabilities

estimates restore xt  /* retrieves estimates so that _b[cons] refers to intercept in full model */

* predicted probabilities when random intercept is zero, stored as global macros
global cproblo = invlogit(_b[_cons]+39*_b[mn_ses]-11*_b[dv_ses])
global cprobme = invlogit(_b[_cons]+45*_b[mn_ses])
global cprobhi = invlogit(_b[_cons]+51*_b[mn_ses]+11*_b[dv_ses])

* graph on left
twoway (function y = invlogit(_b[_cons]+39*_b[mn_ses]+_b[dv_ses]*x), range(-40 40) lpatt("-")) ///
(function y = invlogit(_b[_cons]+45*_b[mn_ses]+_b[dv_ses]*x), range(-40 40) lpatt("_")) ///
(function y = invlogit(_b[_cons]+51*_b[mn_ses]+_b[dv_ses]*x), range(-40 40) lpatt(solid)) ///
(scatteri $cproblo -11 $cprobme 0 $cprobhi 11, msym(x) msize(vlarge) mcol(red)), ///
 xtitle(Deviation of SES from school mean (dv)) ytitle(Probability of being proficient) ///
 yscale(range(0 1)) ylabel(0(.1)1) xlabel(-33(11)33)   ///
 legend(order(1 "mn = 39" 2 "mn = 45" 3 "mn = 51") rows(1))

* graph on right
twoway (function y = invlogit(_b[_cons]+_b[mn_ses]*x-11*_b[dv_ses]), range(25 68) lpatt("-")) ///
(function y = invlogit(_b[_cons]+_b[mn_ses]*x), range(25 68) lpatt("_")) ///
(function y = invlogit(_b[_cons]+_b[mn_ses]*x+11*_b[dv_ses]), range(25 68) lpatt(solid)) ///
 (scatteri $cproblo 39 $cprobme 45 $cprobhi 51, msym(x) msize(vlarge) mcol(red)), ///
 xtitle(School mean SES (mn)) ytitle(Probability of being proficient) ///
 yscale(range(0 1)) ylab(0(.1)1) xlabel(21(6)69) ///
 legend(order(1 "dv = -11" 2 "dv = 0" 3 "dv = 11") rows(1))


*** Slide 9: Median odds ratio

* for null model
estimates restore xt0
display exp(sqrt(2*exp([lnsig2u]_cons))*invnormal(3/4))

* for full model
estimates restore xt
display exp(sqrt(2*exp([lnsig2u]_cons))*invnormal(3/4))


*** Slide 12: Intraclass correlation of latent responses

* for null model
estimates restore xt0
display exp([lnsig2u]_cons)/(exp([lnsig2u]_cons) + _pi^2/3)

* for full model
estimates restore xt
display exp([lnsig2u]_cons)/(exp([lnsig2u]_cons) + _pi^2/3)


*** Slide 14: Intraclass correlation of observed responses
* Install xtrho by Rodriguez & Elo (2003). The Stata Journal 3, 32-46.

net from http://www.stata-journal.com/software/sj3-1
net install st0031

* null model
estimates restore xt0
global xb = _b[_cons]  /* save fixed part in global xb */
global sigma0 = exp([lnsig2u]_cons/2) /* save random-intercept SD */
xtrhoi $xb $sigma0 logit
global icco0 = r(r)  /* ICC of observed responses */
global mprob0 = r(mp)  /* Mean probability */

* full model
estimates restore xt
global xblo = _b[_cons] + 39* _b[mn_ses] - 11*_b[dv_ses]
global xbme = _b[_cons] + 45* _b[mn_ses] 
global xbhi = _b[_cons] + 51* _b[mn_ses] + 11*_b[dv_ses]
global sigma = exp([lnsig2u]_cons/2)

xtrhoi $xblo $sigma logit
global iccolo = r(r)
global mproblo = r(mp)

xtrhoi $xbme $sigma logit
global iccome = r(r)
global mprobme = r(mp)

xtrhoi $xbhi $sigma logit
global mprobhi = r(mp)
global iccohi = r(r)

display "ICCs of observed responses are" _n "lo: " %6.4f $iccolo ///
", me: " %6.4f $iccome ", hi: " %6.4f $iccohi

/*

*Figure 2 in Rodriquez and Elo (2003)
* this code can be skipped

save temp, replace

postutil clear
postfile corr condition prob sigma eta corr or using correlations, replace
matrix probs=(.5,.2,.1,.05,.025)
forvalues i=1/5{
	global prob = probs[1,`i']
	global eta = logit($prob)
	forvalues j=0/100 {
		global sig = `j'/20
		qui xtrhoi $eta $sig logit
		post corr (`i') ($prob) ($sig) ($eta) (r(r)) (r(or))
	}
}
postclose corr



use correlations, clear
generate icc = sigma^2/(_pi^2/3 + sigma^2)
sort condition sigma
twoway (line corr sigma, connect(ascending)) ///
   (line icc sigma if condition==1, sort lpatt(dash)), ///
   legend(order(1 "Observed" 2 "Latent")) ytitle(ICC) ///
   xtitle(Random-intercept standard deviation psi)

use temp, clear

*/


*** Slide 16: Standard deviation of probabilities

** use xtrho (see numerator of ICC_obs on slide 13)

xtrhoi $xb $sigma0 logit
global S0 = sqrt(r(jp)-r(mp)^2) /* SD of probabilities */

xtrhoi $xblo $sigma logit
global Slo = sqrt(r(jp)-r(mp)^2)

xtrhoi $xbme $sigma logit
global Sme = sqrt(r(jp)-r(mp)^2)

xtrhoi $xbhi $sigma logit
global Shi = sqrt(r(jp)-r(mp)^2)

disp "Standard deviations are" _n %6.4f $S0 " " ///
 %6.4f $Slo " " %6.4f $Sme " " %6.4f $Shi
 
** use simulation
save temp, replace

set obs 1000000
generate p0 = invlogit($xb+rnormal(0,$sigma0))
generate plo = invlogit($xblo+rnormal(0,$sigma))
generate pme = invlogit($xbme+rnormal(0,$sigma))
generate phi = invlogit($xbhi+rnormal(0,$sigma))

* check:
disp "Means should be" _n "$mprob0 $mproblo $mprobme $mprobhi"
summarize p0 plo pme phi

quietly summarize p0
global SD0 = sqrt(r(Var))
quietly summarize plo
global SDlo = sqrt(r(Var))
quietly summarize pme
global SDme = sqrt(r(Var))
quietly summarize phi
global SDhi = sqrt(r(Var))

disp "Standard deviations are" _n %6.4f $SD0 " " ///
 %6.4f $SDlo " " %6.4f $SDme " " %6.4f $SDhi

use temp, clear


*** Slide 19: Density of probabilities

* null model
estimates restore xt0

global cprob0 = invlogit(_b[_cons])

global denp50_0 = normalden(ln($cprob0/(1-$cprob0)),$xb,$sigma0)/($cprob0*(1-$cprob0))
global denmn_0 = normalden(ln($mprob0/(1-$mprob0)),$xb,$sigma0)/($mprob0*(1-$mprob0))

twoway (function p= normalden(ln(x/(1-x)),$xb,$sigma0)/(x*(1-x)), range(0 1)) ///
   (scatteri $denmn_0 $mprob0, recast(dropline) msym(i) lpatt(solid) lcol(navy)) ///
   (scatteri $denp50_0 $cprob0, recast(dropline) msym(i) lpatt("_") lcol(navy)), ///
   legend(order(3 "Median" 2 "Mean")) xtitle(Conditional probability) ytitle(Density)
 
* full model  
global denp50_lo = normalden(ln($cproblo/(1-$cproblo)),$xblo,$sigma)/($cproblo*(1-$cproblo))
global denmn_lo = normalden(ln($mproblo/(1-$mproblo)),$xblo,$sigma)/($mproblo*(1-$mproblo))

global denp50_me = normalden(ln($cprobme/(1-$cprobme)),$xbme,$sigma)/($cprobme*(1-$cprobme))
global denmn_me = normalden(ln($mprobme/(1-$mprobme)),$xbme,$sigma)/($mprobme*(1-$mprobme))

global denp50_hi = normalden(ln($cprobhi/(1-$cprobhi)),$xbhi,$sigma)/($cprobhi*(1-$cprobhi))
global denmn_hi = normalden(ln($mprobhi/(1-$mprobhi)),$xbhi,$sigma)/($mprobhi*(1-$mprobhi))

twoway (function lo = normalden(ln(x/(1-x)),$xblo,$sigma)/(x*(1-x)), range(0 1) lpatt("-")) ///
 (function me = normalden(ln(x/(1-x)),$xbme,$sigma)/(x*(1-x)), range(0 1) lpatt("_")) ///
 (function hi = normalden(ln(x/(1-x)),$xbhi,$sigma)/(x*(1-x)), range(0 1) lpatt(solid)) ///
   (scatteri $denmn_lo $mproblo, recast(dropline) msym(i) lpatt(solid) lcol(navy)) ///
   (scatteri $denp50_lo $cproblo, recast(dropline) msym(i) lpatt("_") lcol(navy)) ///
   (scatteri $denmn_me $mprobme, recast(dropline) msym(i) lpatt(solid) lcol(maroon)) ///
   (scatteri $denp50_me $cprobme, recast(dropline) msym(i) lpatt("_") lcol(maroon)) ///
     (scatteri $denp50_hi $cprobhi, recast(dropline) msym(i) lpatt("_") lcol(dkgreen)) ///
   (scatteri $denmn_hi $mprobhi, recast(dropline) msym(i) lpatt(solid) lcol(dkgreen)), ///
  legend(order(1 "lo" 2 "me" 3 "hi") rows(1)) xtitle(Conditional probability) ytitle(Density)
  

*** Slide 20: Percentiles of probabilities

estimates restore xt

* left graph
generate mn = 25 + (_n-1)*(68-25)/300 if _n<302
generate me_l = invlogit(_b[_cons] + _b[mn_ses]*mn - 1.96*exp([lnsig2u]_cons/2))
generate me_u = invlogit(_b[_cons] + _b[mn_ses]*mn + 1.96*exp([lnsig2u]_cons/2))

twoway ///
(rarea me_l me_u mn, sort fintensity(inten10) color(maroon) lwidth(none)) ///
(function y = invlogit(_b[_cons]+_b[mn_ses]*x), range(25 68) lpatt("_") lcol(maroon)) ///
 (scatteri  $cprobme 45 , msym(x) msize(vlarge) mcol(red)), ///
 xtitle(School mean SES (mn)) ytitle(Probability of being proficient) ///
 yscale(range(0 1)) ylab(0(.1)1) xlabel(21(6)69) ///
 legend(order(2 "Median" 1 "2.5th %tile to 97.5th %tile") rows(1))

* right graph
generate dv = -40 + (_n-1)*(80)/300 if _n<302
generate lo_l = invlogit(_b[_cons] + _b[dv_ses]*dv + 39*_b[mn_ses] - 0.67*exp([lnsig2u]_cons/2))
generate lo_u = invlogit(_b[_cons] + _b[dv_ses]*dv + 39*_b[mn_ses] + 0.67*exp([lnsig2u]_cons/2))

generate hi_l = invlogit(_b[_cons] + _b[dv_ses]*dv +51*_b[mn_ses] - 0.67*exp([lnsig2u]_cons/2))
generate hi_u = invlogit(_b[_cons] + _b[dv_ses]*dv +51*_b[mn_ses] + 0.67*exp([lnsig2u]_cons/2))

twoway ///
(rarea lo_l lo_u dv, sort fintensity(inten10) color(navy) lwidth(none)) ///
(rarea hi_l hi_u dv, sort fintensity(inten10) color(dkgreen) lwidth(none)) ///
(function y = invlogit(_b[_cons]+39*_b[mn_ses]+_b[dv_ses]*x), range(-40 40) lpatt("-") lcol(navy)) ///
(function y = invlogit(_b[_cons]+51*_b[mn_ses]+_b[dv_ses]*x), range(-40 40) lpatt(solid) lcol(dkgreen)) ///
(scatteri $cproblo -11 $cprobhi 11, msym(x) msize(vlarge) mcol(red)), ///
 xtitle(Deviation of SES from school mean (dv)) ytitle(Probability of being proficient) ///
 yscale(range(0 1)) ylabel(0(.1)1) xlabel(-33(11)33)   ///
 legend(order(3 "mn = 39" 4 "mn = 51" 1 "25th %tile to 75th %tile" 2 "25th %tile to 75th %tile") rows(2))


*** Slide 22: Probabilities schools in the data 

** install gllamm (Rabe-Hesketh et al., 2005). Journal of Econometrics 128 (2), 301-323

ssc install gllamm, replace


gllamm pass_read, i(id_school) ///
  link(logit) family(binom) nip(20) adapt
estimates store gll0
  
gllamm pass_read dv_ses mn_ses, i(id_school) ///
  link(logit) family(binom) nip(20) adapt
estimates store gll

* create a new observation for each school (pickone=1 for new obs)
egen pickone = tag(id_school)
expand 2 if pickone==1
by id_school pickone, sort: replace pickone=0 if _n==1

* set response variable to missing for "invented data"
replace pass_read = . if pickone==1

* set dv_ses = 0 for "invented data"
* mn_ses stays as it is for the school
replace dv = dv_ses
replace dv_ses = 0 if pickone==1

* obtain ptilde (posterior mean of prob. for each school)
estimates restore gll
gllapred prob, mu fsample  /* posterior mean probabilities */

twoway ///
(line me_l  mn, sort  color(maroon) lpatt(dash)) ///
(line me_u mn, sort  color(maroon) lpatt(dash)) ///
(function y = invlogit(_b[_cons]+_b[mn_ses]*x), range(25 68) lpatt(solid) lcol(maroon)) ///
(scatter prob mn_ses if pickone==1, mlab(id_school) mlabpos(0) mlabcol(black) mlabsi(vsmall) msym(i) ), ///
 xtitle(School mean SES (mn)) ytitle(Probability of being proficient) ///
 yscale(range(0 1)) ylab(0(.1)1) xlabel(21(6)69) ///
 legend(order(3 "Median" 1 "2.5th %tile to 97.5th %tile") rows(1))

* take a random sample of abou7t 20% of schools 
set seed 231232
gsort -pickone id_school
generate select = pickone==1&uniform()<.2

* create 23 new observations per school (for sampled schools_
expand 23 if select==1

* set dv_ses  to increase from smallest to largest value for the school
* (in equal steps)

generate maxdv = max_isei - mn_ses
generate mindv = min_isei - mn_ses

by id_school select (dv_ses), sort: ///
   replace dv_ses = mindv + (_n-1)*(maxdv-mindv)/22 if select==1

* obtain ptilde (posterior mean probabilities) for these data
estimates restore gll
gllapred prob2, mu fsample

generate ses = mn_ses + dv_ses

sort mn_ses schoolid select ses

twoway (line prob2 ses, connect(ascending) lcol(black)) ///
  (scatter prob mn_ses, mlab(id_school) mlabpos(12) msym(i) ///
   mlabcol(black) mlabsi(vsmall) mlabgap(.05)) if select==1 , ///
  xtitle(Student SES (mn+dv)) ytitle(Probability of being proficient) ///
  legend(off) ylab(0(.1)1) xlabel(28(17)79) xline(28, lcol(navy) lpatt("-")) ///
  xline(45, lcol(maroon) lpatt("_")) ///
  xline(62, lcol(dkgreen) lpatt(solid))


*** Slide 23: Probabilities for schools in the data (cont'd)

* keep only one invented data point per school
egen p = tag(id_school select)
drop if select==1 & p==0
drop p

* set mn_ses to 45 for all schools
replace mn = mn_ses
replace mn_ses = 45 if pickone==1
replace dv_ses = 0 if pickone==1

gllapred probc, mu fsample  /* posterior  mean probability */

graph box prob probc if pickone==1,  ///
   legend(off)  ///
   marker(2, mlab(id_school) msym(i) mlabpos(12)) ylab(0(.1)1) ///
   showyvars yvaroptions(relabel(1 "dv=0" 2 "mn=45, dv=0"))


************ STAR (Tennessee class-size experiment) data *********************************

*** Slide 25: three-level ordinal cumulative logit model: OR_median

* read the data
use star_gr4_attn, clear

* estimate the model
gllamm ptattn , i(tchid schid) link(ologit) adapt
estimates store at0_3lv

* OR_median: different teachers from the same school
display exp(sqrt(2*[tchi1]_cons^2)*invnormal(3/4))

* OR_median different teachers from different schools
estimates restore at0_3lv
display exp(sqrt(2*([tchi1]_cons^2+[schi2]_cons^2))*invnormal(3/4))


*** Slide 26: three-level ordinal cumulative logit model: ICC_lat

* ICC_lat: same school (different teachers)

display ([schi2]_cons^2)/([tchi1]_cons^2+[schi2]_cons^2+_pi^2/3)

* ICC_lat same teacher (and school)

display ([tchi1]_cons^2+[schi2]_cons^2)/([tchi1]_cons^2+[schi2]_cons^2+_pi^2/3)


*** Slide 27: three-level ordinal cumulative logit model: ICC_obs

* make a new dataset for all combinations of responses for a pair of students

drop _all
set obs 5
generate ptattn1 = _n /* response for student 1 */
expand 5
by ptattn1, sort: generate ptattn2 = _n  /* response for student 2 */

* both students are in same school with school identifier schid
generate schid = _n

* stack responses for two students into one variable, creating new variable student
* to kee track of which stusten is which
reshape long ptattn, i(schid) j(student)

* case 1: each student is taugh by a different teacher
generate tchid = student
gllapred ll1, ll fsample  /* log-likelihood contributions from the schools */

* case 2: both students are taught by the same teacher
replace tchid = schid
gllapred ll2, ll fsample /* log-likelihood contributions from the schools */

generate pschool=exp(ll1)  /* joint probabilities bar(pi)_st: different teachers */
generate pteacher = exp(ll2) /* joint probabilities bar(pi)_st: same teachers */

* need a variable for each student within the pair: ptattn1 ptattn2 to get correlations, etc.
reshape wide ptattn, i(schid) j(student)

* create frequency weights (must be integers)
generate ns = round(pschool*10000000,1)
generate nt = round(pteacher*10000000,1)

* Pearson correlation using frequency weights
correlate ptattn1 ptattn2 [fweight=ns]
correlate ptattn1 ptattn2 [fweight=nt]

* Kendall's tau-b
tabulate ptattn1 ptattn2 [fweight=nt], taub
tabulate ptattn1 ptattn2 [fweight=ns], taub


*** Slide 28 Probabilities for teahers and schools in the data

* read the data
use star_gr4_attn, clear

* redefine tchid and schid as consecutive integers
drop if ptattn==.
drop if tchid==.
drop if schid==.
egen ntchid = group(tchid)
egen nschid = group(schid)
generate otchid = tchid
generate oschid = schid

replace tchid = ntchid
replace schid = nschid

* create one new observation per school
egen pickschool = tag(schid)
expand 2 if pickschool==1
by schid pickschool, sort: replace pickschool=0 if _n==1

* set response variable to missing for "invented observations"
replace ptattn = . if pickschool==1
replace tchid = 300 if pickschool==1 /* 300 is a new value of tchid */

egen pickteacher = tag(schid tchid)
sort schid tchid
list schid tchid if pickteacher==1 in 1/104, sepby(schid)

* posterior mean probability of response being above 3
* for tchid=300, there are no data within the school, so posterior becomes prior
gllapred prob, above(3) mu fsample

* graph on left
graph box prob if pickteacher==1&pickschool==0, ///
   marker(1, mlab(tchid) msym(i) mlabpos(12)) ylab(0(.1)1) ///
   ytitle(Probability)

* graph on right
graph box prob if pickteacher==1&pickschool==1, ///
    marker(1, mlab(schid) msym(i) mlabpos(12)) ylab(0(.1)1) ///
	ytitle(Probability)