/**************************************************** * Private Education * * Author(s): Mun Sim (Nicole) Lai & Comfort Sumida * ****************************************************/ /****************************************************************************************************** * Product: Age profiles of private education expenditures (CFE) * * Data requirements: Individual data on student enrollment, household data on education expenditures * * ws = work status (a work status of 7 indicates student) (individual) * * tx835 = household education expenditures * * Methodology: Regression method * ******************************************************************************************************/ /**************************************************************************** * note(s): - do-file is for 1998 only * * - estimated age profiles are not adjusted to aggregate controls * * - do not smooth private education expenditures * ****************************************************************************/ /* use individual data */ use "c:/ind98.dta" /* generate dummy variable to indicate those individuals who are students (work status of 7) */ gen sage=age if ws==7 replace sage=-1 if sage==. /* find the number of enrolled students of each age in each household */ sort savehhid by savehhid: egen n0=sum(sage==0) by savehhid: egen n1=sum(sage==1) by savehhid: egen n2=sum(sage==2) by savehhid: egen n3=sum(sage==3) by savehhid: egen n4=sum(sage==4) by savehhid: egen n5=sum(sage==5) by savehhid: egen n6=sum(sage==6) by savehhid: egen n7=sum(sage==7) by savehhid: egen n8=sum(sage==8) by savehhid: egen n9=sum(sage==9) by savehhid: egen n10=sum(sage==10) by savehhid: egen n11=sum(sage==11) by savehhid: egen n12=sum(sage==12) by savehhid: egen n13=sum(sage==13) by savehhid: egen n14=sum(sage==14) by savehhid: egen n15=sum(sage==15) by savehhid: egen n16=sum(sage==16) by savehhid: egen n17=sum(sage==17) by savehhid: egen n18=sum(sage==18) by savehhid: egen n19=sum(sage==19) by savehhid: egen n20=sum(sage==20) by savehhid: egen n21=sum(sage==21) by savehhid: egen n22=sum(sage==22) by savehhid: egen n23=sum(sage==23) by savehhid: egen n24=sum(sage==24) by savehhid: egen n25=sum(sage==25) by savehhid: egen n26=sum(sage>=26) /* find the number of individuals (students and non-students) of each age by household */ gen AGE=age sort savehhid by savehhid: egen p0=sum(AGE==0) by savehhid: egen p1=sum(AGE==1) by savehhid: egen p2=sum(AGE==2) by savehhid: egen p3=sum(AGE==3) by savehhid: egen p4=sum(AGE==4) by savehhid: egen p5=sum(AGE==5) by savehhid: egen p6=sum(AGE==6) by savehhid: egen p7=sum(AGE==7) by savehhid: egen p8=sum(AGE==8) by savehhid: egen p9=sum(AGE==9) by savehhid: egen p10=sum(AGE==10) by savehhid: egen p11=sum(AGE==11) by savehhid: egen p12=sum(AGE==12) by savehhid: egen p13=sum(AGE==13) by savehhid: egen p14=sum(AGE==14) by savehhid: egen p15=sum(AGE==15) by savehhid: egen p16=sum(AGE==16) by savehhid: egen p17=sum(AGE==17) by savehhid: egen p18=sum(AGE==18) by savehhid: egen p19=sum(AGE==19) by savehhid: egen p20=sum(AGE==20) by savehhid: egen p21=sum(AGE==21) by savehhid: egen p22=sum(AGE==22) by savehhid: egen p23=sum(AGE==23) by savehhid: egen p24=sum(AGE==24) by savehhid: egen p25=sum(AGE==25) by savehhid: egen p26=sum(AGE>=26) /* generate variables for those in age groups 6 & 7 who are not enrolled */ gen pn6=p6-n6 gen pn7=p7-n7 /* merge with hh data */ sort savehhid count save "C:\indv_98.dta",replace use "C:\hhdata98.dta", clear sort savehhid count merge savehhid using "C:\indv_98.dta" tab _merge drop _merge /* dropping repeated household */ sort savehhid drop if savehhid==savehhid[_n-1] /* regression of number enrolled of each age (and population aged 3, 4, 5, non-enrolled aged 6, 7) by household on household education expenditures */ reg tx835 p3 p4 p5 n6 pn6 n7 pn7 n8 n9 n10 n11 n12 n13 n14 n15 n16 n17 n18 n19 n20 n21 n22 n23 n24 n25 n26 [w=mult], robust noconstant gen bp3=_b[p3] gen bp4=_b[p4] gen bp5=_b[p5] gen bn6=_b[n6] gen bpn6=_b[pn6] gen bn7=_b[n7] gen bpn7=_b[pn7] gen bn8=_b[n8] gen bn9=_b[n9] gen bn10=_b[n10] gen bn11=_b[n11] gen bn12=_b[n12] gen bn13=_b[n13] gen bn14=_b[n14] gen bn15=_b[n15] gen bn16=_b[n16] gen bn17=_b[n17] gen bn18=_b[n18] gen bn19=_b[n19] gen bn20=_b[n20] gen bn21=_b[n21] gen bn22=_b[n22] gen bn23=_b[n23] gen bn24=_b[n24] gen bn25=_b[n25] gen bn26=_b[n26] /* drop all unnecessary variables and merge with individual data */ sort savehhid keep savehhid tx835 bp3 bp4 bp5 bn6 bpn6 bn7 bpn7 bn8 bn9 bn10 bn11 bn12 bn13 bn14 bn15 bn16 bn17 bn18 bn19 bn20 bn21 bn22 bn23 bn24 bn25 bn26 sort savehhid count save "C:\temp\hh_03.dta",replace use "C:\\ind03.dta", clear sort savehhid count merge savehhid using "C:\temp\hh_03.dta" tab _merge drop _merge /* assign coefficients (estimated average educational expenditures) to all enrolled by age */ gen sage=age if ws==7 gen nsage=age if ws~=7 replace sage=-1 if sage==. /* note that we used all individuals in age groups 3, 4, 5, due to omitted information regarding enrollment */ gen ep3=bp3*(age==3) gen ep4=bp4*(age==4) gen ep5=bp5*(age==5) gen en6=bn6*(sage==6) gen epn6=bpn6*(nsage==6) gen en7=bn7*(sage==7) gen epn7=bpn7*(nsage==7) gen en8=bn8*(sage==8) gen en9=bn9*(sage==9) gen en10=bn10*(sage==10) gen en11=bn11*(sage==11) gen en12=bn12*(sage==12) gen en13=bn13*(sage==13) gen en14=bn14*(sage==14) gen en15=bn15*(sage==15) gen en16=bn16*(sage==16) gen en17=bn17*(sage==17) gen en18=bn18*(sage==18) gen en19=bn19*(sage==19) gen en20=bn20*(sage==20) gen en21=bn21*(sage==21) gen en22=bn22*(sage==22) gen en23=bn23*(sage==23) gen en24=bn24*(sage==24) gen en25=bn25*(sage==25) gen en26=bn26*(sage>=26) egen sum=rsum(ep3 ep4 ep5 en6 epn6 en7 epn7 en8 en9 en10 en11 en12 en13 en14 en15 en16 en17 en18 en19 en20 en21 en22 en23 en24 en25 en26) sort savehhid /* total estimated average household expenditures on education */ by savehhid:egen total=sum(sum) /* proportion of estimated household expenditures on each enrolled individual */ gen rep3=ep3/total replace rep3=0 if rep3==. gen rep4=ep4/total replace rep4=0 if rep4==. gen rep5=ep5/total replace rep5=0 if rep5==. gen ren6=en6/total replace ren6=0 if ren6==. gen repn6=epn6/total replace repn6=0 if repn6==. gen ren7=en7/total replace ren7=0 if ren7==. gen repn7=epn7/total replace repn7=0 if repn7==. gen ren8=en8/total replace ren8=0 if ren8==. gen ren9=en9/total replace ren9=0 if ren9==. gen ren10=en10/total replace ren10=0 if ren10==. gen ren11=en11/total replace ren11=0 if ren11==. gen ren12=en12/total replace ren12=0 if ren12==. gen ren13=en13/total replace ren13=0 if ren13==. gen ren14=en14/total replace ren14=0 if ren14==. gen ren15=en15/total replace ren15=0 if ren15==. gen ren16=en16/total replace ren16=0 if ren16==. gen ren17=en17/total replace ren17=0 if ren17==. gen ren18=en18/total replace ren18=0 if ren18==. gen ren19=en19/total replace ren19=0 if ren19==. gen ren20=en20/total replace ren20=0 if ren20==. gen ren21=en21/total replace ren21=0 if ren21==. gen ren22=en22/total replace ren22=0 if ren22==. gen ren23=en23/total replace ren23=0 if ren23==. gen ren24=en24/total replace ren24=0 if ren24==. gen ren25=en25/total replace ren25=0 if ren25==. gen ren26=en26/total replace ren26=0 if ren26==. /* estimated educational expenditures */ gen tp3=tx835*rep3 gen tp4=tx835*rep4 gen tp5=tx835*rep5 gen tn6=tx835*ren6 gen tpn6=tx835*repn6 gen tn7=tx835*ren7 gen tpn7=tx835*repn7 gen tn8=tx835*ren8 gen tn9=tx835*ren9 gen tn10=tx835*ren10 gen tn11=tx835*ren11 gen tn12=tx835*ren12 gen tn13=tx835*ren13 gen tn14=tx835*ren14 gen tn15=tx835*ren15 gen tn16=tx835*ren16 gen tn17=tx835*ren17 gen tn18=tx835*ren18 gen tn19=tx835*ren19 gen tn20=tx835*ren20 gen tn21=tx835*ren21 gen tn22=tx835*ren22 gen tn23=tx835*ren23 gen tn24=tx835*ren24 gen tn25=tx835*ren25 gen tn26=tx835*ren26 egen test5=rsum(tp3 tp4 tp5 tn6 tpn6 tn7 tpn7 tn8 tn9 tn10 tn11 tn12 tn13 tn14 tn15 tn16 tn17 tn18 tn19 tn20 tn21 tn22 tn23 tn24 tn25 tn26) /* tabulate mean educational expenditures by age (age profile of education expenditures) table age [w=mult], c(mean test5) /* save final dataset */ save "c:\edu98d.dta", replace