/* I. OTHER CONSUMPTION (consumption less health and education expenditures) */ /* use individual data set */ use "c:\ind78.dta" /* Ia. assign equivalence scale value to each individual */ /* adults over the age of 20 have an equivalence scale equal to 1 */ gen alpha=1 /* children below the age of 5 have an equivalence scale of 0.4 */ replace alpha=0.4 if AGE<5 /* children between the ages of 5 and 20 have an equivalence scale which increases linearly */ replace alpha=0.4+(1-0.4)*((AGE-4)/(20-4)) if AGE>=5 & AGE<=20 /* Ib. calculate total number of equivalent adults in the household */ sort SAVEHHID by SAVEHHID: egen alphah=sum(alpha) /* create share of household other consumption (each individual's share of total other consumption for the household) */ gen sharei=alpha/alphah replace sharei=0 if sharei==. /* Ic. merge individual and household data */ /* individual data contains age information, while household data contains household expenditure information */ sort SAVEHHID count save "c:\temp\ind_78.dta", replace use "c:\hhitem78.dta", clear sort SAVEHHID count merge SAVEHHID using "c:\temp\ind_78.dta" tab _merge save "c:\temp\ind_78.dta", replace /* Id. find individual other consumption expenditures multiplying each individual's share of household other consumption by the total consumption of the household (less education and health expenditures) */ /* note: tx800 refers to total household consumption expenditure */ /* note: tx810 refers to medical care and sanitation expenditure */ /* note: tx835 refers to expenditure on education */ gen consi=sharei*(tx800-tx810-tx835) /* tabulate consumption by age (age profile of other consumption - unsmoothed) */ table age [w=mult], c(mean consi) /* smooth consumption data using lowess with bandwidth of 0.1 */ lowess consi age, bwidth(0.1) gen(sconsi) nograph /* tabulate smoothed consumption by age (age profile of other consumption - smoothed) */ table age [w=mult], c(mean sconsi) /* save final data set */ save "c:\aconseh78.dta", replace /* II. EDUCATION */ /* simplified version */ /* use individual data */ use "c:/ind78.dta" /* generate dummy variable to indicate those individuals who are students (work status of 7) */ gen sage=age if ws==7 replace sage=-1 if sage==. /* IIa. find the number of enrolled students of each age in each househol d*/ sort savehhid by savehhid: egen n0=sum(sage==0) by savehhid: egen n1=sum(sage==1) by savehhid: egen n2=sum(sage==2) by savehhid: egen n3=sum(sage==3) by savehhid: egen n4=sum(sage==4) by savehhid: egen n5=sum(sage==5) by savehhid: egen n6=sum(sage==6) by savehhid: egen n7=sum(sage==7) by savehhid: egen n8=sum(sage==8) by savehhid: egen n9=sum(sage==9) by savehhid: egen n10=sum(sage==10) by savehhid: egen n11=sum(sage==11) by savehhid: egen n12=sum(sage==12) by savehhid: egen n13=sum(sage==13) by savehhid: egen n14=sum(sage==14) by savehhid: egen n15=sum(sage==15) by savehhid: egen n16=sum(sage==16) by savehhid: egen n17=sum(sage==17) by savehhid: egen n18=sum(sage==18) by savehhid: egen n19=sum(sage==19) by savehhid: egen n20=sum(sage==20) by savehhid: egen n21=sum(sage==21) by savehhid: egen n22=sum(sage==22) by savehhid: egen n23=sum(sage==23) by savehhid: egen n24=sum(sage==24) by savehhid: egen n25=sum(sage==25) by savehhid: egen n26=sum(sage>=26) /* find the number of individuals (students and non-students) for ages 3 to 7 by household */ gen AGE=age sort savehhid by savehhid: egen p3=sum(AGE==3) by savehhid: egen p4=sum(AGE==4) by savehhid: egen p5=sum(AGE==5) by savehhid: egen p6=sum(AGE==6) by savehhid: egen p7=sum(AGE==7) /* generate variables for those in age groups 6 & 7 who are not enrolled */ gen pn6=p6-n6 gen pn7=p7-n7 /* IIb. merge with hh data */ sort savehhid count save "C:\temp\indv_78.dta",replace use "C:\hhdata78.dta", clear sort savehhid count merge savehhid using "C:\temp\indv_78.dta" tab _merge drop _merge /* IIc. dropping repeated household */ sort savehhid drop if savehhid==savehhid[_n-1] /* regression of number enrolled of each age (and population aged 3, 4, 5, non-enrolled aged 6, 7) by household on household education expenditures */ reg tx835 p3 p4 p5 n6 pn6 n7 pn7 n8 n9 n10 n11 n12 n13 n14 n15 n16 n17 n18 n19 n20 n21 n22 n23 n24 n25 n26 [w=mult], robust noconstant gen bp3=_b[p3] gen bp4=_b[p4] gen bp5=_b[p5] gen bn6=_b[n6] gen bpn6=_b[pn6] gen bn7=_b[n7] gen bpn7=_b[pn7] gen bn8=_b[n8] gen bn9=_b[n9] gen bn10=_b[n10] gen bn11=_b[n11] gen bn12=_b[n12] gen bn13=_b[n13] gen bn14=_b[n14] gen bn15=_b[n15] gen bn16=_b[n16] gen bn17=_b[n17] gen bn18=_b[n18] gen bn19=_b[n19] gen bn20=_b[n20] gen bn21=_b[n21] gen bn22=_b[n22] gen bn23=_b[n23] gen bn24=_b[n24] gen bn25=_b[n25] gen bn26=_b[n26] /* IId. drop all unnecessary variables and merge with individual data */ sort savehhid keep savehhid tx835 bp3 bp4 bp5 bn6 bpn6 bn7 bpn7 bn8 bn9 bn10 bn11 bn12 bn13 bn14 bn15 bn16 bn17 bn18 bn19 bn20 bn21 bn22 bn23 bn24 bn25 bn26 sort savehhid count save "C:\temp\hh_78.dta",replace use "C:\ind78.dta", clear sort savehhid count merge savehhid using "C:\temp\hh_78.dta" tab _merge drop _merge /* assign coefficients (estimated average educational expenditures) to all enrolled by age */ gen sage=age if ws==7 gen nsage=age if ws~=7 replace sage=-1 if sage==. /* note that we used all individuals in age groups 3, 4, 5, due to omitted information regarding enrollment */ gen ep3=bp3*(age==3) gen ep4=bp4*(age==4) gen ep5=bp5*(age==5) gen en6=bn6*(sage==6) gen epn6=bpn6*(nsage==6) gen en7=bn7*(sage==7) gen epn7=bpn7*(nsage==7) gen en8=bn8*(sage==8) gen en9=bn9*(sage==9) gen en10=bn10*(sage==10) gen en11=bn11*(sage==11) gen en12=bn12*(sage==12) gen en13=bn13*(sage==13) gen en14=bn14*(sage==14) gen en15=bn15*(sage==15) gen en16=bn16*(sage==16) gen en17=bn17*(sage==17) gen en18=bn18*(sage==18) gen en19=bn19*(sage==19) gen en20=bn20*(sage==20) gen en21=bn21*(sage==21) gen en22=bn22*(sage==22) gen en23=bn23*(sage==23) gen en24=bn24*(sage==24) gen en25=bn25*(sage==25) gen en26=bn26*(sage>=26) egen sum=rsum(ep3 ep4 ep5 en6 epn6 en7 epn7 en8 en9 en10 en11 en12 en13 en14 en15 en16 en17 en18 en19 en20 en21 en22 en23 en24 en25 en26) sort savehhid /* total estimated average household expenditures on education */ by savehhid:egen total=sum(sum) /* proportion of estimated household expenditures on each enrolled individual */ gen rep3=ep3/total replace rep3=0 if rep3==. gen rep4=ep4/total replace rep4=0 if rep4==. gen rep5=ep5/total replace rep5=0 if rep5==. gen ren6=en6/total replace ren6=0 if ren6==. gen repn6=epn6/total replace repn6=0 if repn6==. gen ren7=en7/total replace ren7=0 if ren7==. gen repn7=epn7/total replace repn7=0 if repn7==. gen ren8=en8/total replace ren8=0 if ren8==. gen ren9=en9/total replace ren9=0 if ren9==. gen ren10=en10/total replace ren10=0 if ren10==. gen ren11=en11/total replace ren11=0 if ren11==. gen ren12=en12/total replace ren12=0 if ren12==. gen ren13=en13/total replace ren13=0 if ren13==. gen ren14=en14/total replace ren14=0 if ren14==. gen ren15=en15/total replace ren15=0 if ren15==. gen ren16=en16/total replace ren16=0 if ren16==. gen ren17=en17/total replace ren17=0 if ren17==. gen ren18=en18/total replace ren18=0 if ren18==. gen ren19=en19/total replace ren19=0 if ren19==. gen ren20=en20/total replace ren20=0 if ren20==. gen ren21=en21/total replace ren21=0 if ren21==. gen ren22=en22/total replace ren22=0 if ren22==. gen ren23=en23/total replace ren23=0 if ren23==. gen ren24=en24/total replace ren24=0 if ren24==. gen ren25=en25/total replace ren25=0 if ren25==. gen ren26=en26/total replace ren26=0 if ren26==. /* estimated educational expenditures */ gen tp3=tx835*rep3 gen tp4=tx835*rep4 gen tp5=tx835*rep5 gen tn6=tx835*ren6 gen tpn6=tx835*repn6 gen tn7=tx835*ren7 gen tpn7=tx835*repn7 gen tn8=tx835*ren8 gen tn9=tx835*ren9 gen tn10=tx835*ren10 gen tn11=tx835*ren11 gen tn12=tx835*ren12 gen tn13=tx835*ren13 gen tn14=tx835*ren14 gen tn15=tx835*ren15 gen tn16=tx835*ren16 gen tn17=tx835*ren17 gen tn18=tx835*ren18 gen tn19=tx835*ren19 gen tn20=tx835*ren20 gen tn21=tx835*ren21 gen tn22=tx835*ren22 gen tn23=tx835*ren23 gen tn24=tx835*ren24 gen tn25=tx835*ren25 gen tn26=tx835*ren26 egen test5=rsum(tp3 tp4 tp5 tn6 tpn6 tn7 tpn7 tn8 tn9 tn10 tn11 tn12 tn13 tn14 tn15 tn16 tn17 tn18 tn19 tn20 tn21 tn22 tn23 tn24 tn25 tn26) /* tabulate mean educational expenditures by age (age profile of education expenditures) table age [w=mult], c(mean test5) /* save final dataset */ save "c:\edu78d.dta", replace /* III. HEALTH */ /* use individual data*/ use "c:\ind78.dta" /* IIIa. create age groups */ sort AGE gen agegrp=AGE recode agegrp (0/4=2.5) (5/9=7.5) (10/14=12.5) (15/19=17.5) (20/24=22.5) (25/29=27.5) (30/34=32.5) (35/39=37.5) (40/44=42.5) (45/49=47.5) (50/54=52.5) (55/59=57.5) (60/64=62.5) (65/69=67.5) (70/max=70) /* calculating the number of individuals in each age group in each household */ sort SAVEHHID by SAVEHHID: egen p4=sum(agegrp==2.5) by SAVEHHID: egen p9=sum(agegrp==7.5) by SAVEHHID: egen p14=sum(agegrp==12.5) by SAVEHHID: egen p19=sum(agegrp==17.5) by SAVEHHID: egen p24=sum(agegrp==22.5) by SAVEHHID: egen p29=sum(agegrp==27.5) by SAVEHHID: egen p34=sum(agegrp==32.5) by SAVEHHID: egen p39=sum(agegrp==37.5) by SAVEHHID: egen p44=sum(agegrp==42.5) by SAVEHHID: egen p49=sum(agegrp==47.5) by SAVEHHID: egen p54=sum(agegrp==52.5) by SAVEHHID: egen p59=sum(agegrp==57.5) by SAVEHHID: egen p64=sum(agegrp==62.5) by SAVEHHID: egen p69=sum(agegrp==67.5) by SAVEHHID: egen p70=sum(agegrp==70) /* IIIb. merge with hh data */ /* household data contains information on household health expenditures, while individual data includes necessary data on individual ages of household members */ sort SAVEHHID count save "C:\temp\indv_78.dta",replace use "C:\hhitem78.dta", clear sort SAVEHHID count merge SAVEHHID using "C:\temp\indv_78.dta" tab _merge drop _merge /* IIIc. Dropping repeated households */ sort SAVEHHID drop if SAVEHHID==SAVEHHID[_n-1] /* household health expenditures are regressed on the number of individuals in each age group within a household (without an intercept), and coefficients are stored for future use */ reg tx810 p4 p9 p14 p19 p24 p29 p34 p39 p44 p49 p54 p59 p64 p69 p70 [w=MULT], robust noconstant gen bp4=_b[p4] gen bp9=_b[p9] gen bp14=_b[p14] gen bp19=_b[p19] gen bp24=_b[p24] gen bp29=_b[p29] gen bp34=_b[p34] gen bp39=_b[p39] gen bp44=_b[p44] gen bp49=_b[p49] gen bp54=_b[p54] gen bp59=_b[p59] gen bp64=_b[p64] gen bp69=_b[p69] gen bp70=_b[p70] /* keep necessary variables */ keep SAVEHHID bp70 bp69 bp64 bp59 bp54 bp49 bp44 bp39 bp34 bp29 bp24 bp19 bp14 bp9 bp4 p70 p9 p4 p14 p19 p24 p29 p34 p39 p44 p49 p54 p59 p64 p69 p70 tx800 tx810 tx814 /* IIId. merge with individual data to obtain corresponding age data of household members */ sort SAVEHHID count save C:\temp\hh_78.dta,replace use "C:\ind78.dta", clear sort SAVEHHID count merge SAVEHHID using "C:\temp\hh_78.dta" tab _merge drop _merge sort AGE /* re-create five-year age groups */ gen agegrp=AGE recode agegrp (0/4=2.5) (5/9=7.5) (10/14=12.5) (15/19=17.5) (20/24=22.5) (25/29=27.5) (30/34=32.5) (35/39=37.5) (40/44=42.5) (45/49=47.5) (50/54=52.5) (55/59=57.5) (60/64=62.5) (65/69=67.5) (70/max=70) /* assign coefficients to corresponding age group members */ sort agegrp gen hp4=bp4*(agegrp==2.5) gen hp9=bp9*(agegrp==7.5) gen hp14=bp14*(agegrp==12.5) gen hp19=bp19*(agegrp==17.5) gen hp24=bp24*(agegrp==22.5) gen hp29=bp29*(agegrp==27.5) gen hp34=bp34*(agegrp==32.5) gen hp39=bp39*(agegrp==37.5) gen hp44=bp44*(agegrp==42.5) gen hp49=bp49*(agegrp==47.5) gen hp54=bp54*(agegrp==52.5) gen hp59=bp59*(agegrp==57.5) gen hp64=bp64*(agegrp==62.5) gen hp69=bp69*(agegrp==67.5) gen hp70=bp70*(agegrp==70) egen sum=rsum(hp4 hp9 hp14 hp19 hp24 hp29 hp34 hp39 hp44 hp49 hp54 hp59 hp64 hp69 hp70) /* calculating total household estimated expenditures on health */ sort SAVEHHID by SAVEHHID:egen total=sum(sum) /* IIIe. calculating share of estimated household health expenditures for each member */ gen rhp4=hp4/total replace rhp4=0 if rhp4==. gen rhp9=hp9/total replace rhp9=0 if rhp9==. gen rhp14=hp14/total replace rhp14=0 if rhp14==. gen rhp19=hp19/total replace rhp19=0 if rhp19==. gen rhp24=hp24/total replace rhp24=0 if rhp24==. gen rhp29=hp29/total replace rhp29=0 if rhp29==. gen rhp34=hp34/total replace rhp34=0 if rhp34==. gen rhp39=hp39/total replace rhp39=0 if rhp39==. gen rhp44=hp44/total replace rhp44=0 if rhp44==. gen rhp49=hp49/total replace rhp49=0 if rhp49==. gen rhp54=hp54/total replace rhp54=0 if rhp54==. gen rhp59=hp59/total replace rhp59=0 if rhp59==. gen rhp64=hp64/total replace rhp64=0 if rhp64==. gen rhp69=hp69/total replace rhp69=0 if rhp69==. gen rhp70=hp70/total replace rhp70=0 if rhp70==. /* multiplying share with reported household health expenditures to obtain estimated health expenditures for individuals */ gen th4=(tx810-tx814)*rhp4 gen th9=(tx810-tx814)*rhp9 gen th14=(tx810-tx814)*rhp14 gen th19=(tx810-tx814)*rhp19 gen th24=(tx810-tx814)*rhp24 gen th29=(tx810-tx814)*rhp29 gen th34=(tx810-tx814)*rhp34 gen th39=(tx810-tx814)*rhp39 gen th44=(tx810-tx814)*rhp44 gen th49=(tx810-tx814)*rhp49 gen th54=(tx810-tx814)*rhp54 gen th59=(tx810-tx814)*rhp59 gen th64=(tx810-tx814)*rhp64 gen th69=(tx810-tx814)*rhp69 gen th70=(tx810-tx814)*rhp70 egen health=rsum(th4 th9 th14 th19 th24 th29 th34 th39 th44 th49 th54 th59 th64 th69 th70) /* tabulate unsmoothed results by age (age profile of health - unsmoothed) */ sort AGE table AGE [w=MULT], c(mean health) /* smooth results using lowess, bandwidth of 0.1 */ lowess health AGE, bwidth(0.1) gen(shealth) nograph /* tabulate smoothed results by age (age profile of health - smoothed) */ table AGE [w=MULT], c(mean shealth) /* keep only necessary variables and save final data set */ keep SAVEHHID health shealth tx800 tx810 tx814 AGE MULT save "c:\health78w.dta",replace /* IV. PRIVATE CONSUMPTION (combined consumption) */ /* sum of health, education, and other consumption by age (this step was completed in a spreadsheet program from the above tabulated age profiles) */