Hello everyone. I thought i can piece the rest together on my own but it seems that i have not really understood how it works. The problem is this:
I want to estimate not only the effect of the eventtime on the income per cohort, but also for males and females separately. Then i construct the child penalties by taking the difference of the effect on males vs females and weigh this by females counterfactual. For this i introduced sex into the synthetic dataset and ran the regressions seperately for males and females. I did this for both my approaches and checked whether they produce the same outcome by taking differences at the end. they don't. Can someone explain to me how this differs from running without splitting by sex? And how do i have to change my reghdfe approach so that it mirrors the old dummy approach in this case aswell? Also, I see, that enforcing the baseline globally as shown above works, but i don't quite understand why we take the same baseline for everyone. Shouldn't every cohort get its own?
At the moment i am running this code:
Thank you for your help,
Heike
I want to estimate not only the effect of the eventtime on the income per cohort, but also for males and females separately. Then i construct the child penalties by taking the difference of the effect on males vs females and weigh this by females counterfactual. For this i introduced sex into the synthetic dataset and ran the regressions seperately for males and females. I did this for both my approaches and checked whether they produce the same outcome by taking differences at the end. they don't. Can someone explain to me how this differs from running without splitting by sex? And how do i have to change my reghdfe approach so that it mirrors the old dummy approach in this case aswell? Also, I see, that enforcing the baseline globally as shown above works, but i don't quite understand why we take the same baseline for everyone. Shouldn't every cohort get its own?
At the moment i am running this code:
Code:
*===============================================================
* Synthetic dataset (as provided) + CP via (A) dummies vs (B) reghdfe, then compare
*===============================================================
version 18
clear all
set more off
set seed 12345
*------------------------------
* 0) Build the dataset (your code)
*------------------------------
local municipality = 100
local Npersons = 50
local Tmin = 1
local Tmax = 17
local Tobs = `Tmax' - `Tmin'
local obs = `municipality' * `Npersons' * `Tobs'
display `obs'
set obs `obs'
gen mun_id = ceil(_n / (`Npersons' * `Tobs'))
gen person_in_unit = mod(ceil(_n / `Tobs') - 1, `Npersons') + 1
gen id = mun_id * 10000 + person_in_unit
bysort mun_id person_in_unit: gen t_idx = `Tmin' + _n - 1
gen str3 language = cond(mod(mun_id,3)==0, "GER", ///
cond(mod(mun_id,3)==1, "FRE", "ITA"))
bysort id: gen statyear0 = 1995 + floor(runiform()*10)
gen statyear = statyear0 + t_idx
bysort id: gen start_age = 25 + int(runiform()*20)
gen age = start_age + t_idx
* 1 = male, 2 = female (balanced-ish)
bys id: gen byte sex = 1 + (runiform()>=0.5)
gen u_intercept = rnormal(0, 5)
gen u_slope = rnormal(0, 0.5)
gen event_effect = cond(t_idx>=0, -10 + 0.5*t_idx, 0)
gen y = 100 + u_intercept + u_slope * t_idx + event_effect + rnormal(0, 10) - 40 * sex
drop u_intercept u_slope start_age statyear0 event_effect
label var id "Individual identifier"
label var mun_id "Municipality / cohort id"
label var person_in_unit "Person within municipality"
label var language "language region"
label var t_idx "Event time index (years relative)"
label var age "Age of individual (synthetic)"
label var statyear "Calendar year of observation"
label var y "Outcome (synthetic)"
* Define cohort handle
egen lang = group(language)
local cohort lang
local coh3 = substr("`cohort'", 1, 3)
local coh9 = substr("`cohort'", 1, 9)
*--------------------------------------------------------------
* Add sex & create an outcome with sex-specific event-time drop
*--------------------------------------------------------------
rename y mrevcot
gen dacot = statyear
tempfile SYN
save "`SYN'", replace
*===============================================================
* (A) DUMMY-INTERACTIONS CP (benchmark)
*===============================================================
use "`SYN'", clear
/* interactions; names will include the short `coh3'/`coh7` prefixes */
xi i.t_idx*i.`cohort', noomit
drop _It_iX`coh3'_5_* _It_idx* _I`coh9'_*
local vardrop
foreach var of varlist _I* {
quietly summarize `var', meanonly
if r(mean) == 0 {
di "`var' --> delete"
local vardrop `vardrop' `var'
}
}
capture drop `vardrop'
tempfile female_dummy male_dummy
preserve
keep if sex == 2
* FEMALES: alpha^w_t and E[~Y^w | t]
reg mrevcot _It_iX`coh3'_* i.age i.dacot, r
gen double alpha_w_dummy = .
replace alpha_w_dummy = 0 if t_idx==5
/* create the levels local named after the 3-letter handle */
levelsof `cohort', local(`coh3')
forvalues k = 1/16 {
if `k'!=5 {
foreach l of local `coh3' {
cap replace alpha_w_dummy = _b[_It_iX`coh3'_`k'_`l'] ///
if `cohort'==`l' & t_idx==`k'
}
}
}
* Counterfactual
predict double ytilde_w if e(sample), xb
gen double mu_ytilde_w_dummy = .
replace mu_ytilde_w_dummy = ytilde_w - alpha_w_dummy if e(sample)
save "`female_dummy'", replace
restore
preserve
fvset base 5 t_idx
keep if sex == 1
* MALES: alpha^m_t
reg mrevcot _It_iX`coh3'_* i.age i.dacot, r
gen double alpha_m_dummy = .
replace alpha_m_dummy = 0 if t_idx==5
levelsof `cohort', local(`coh3'_m)
forvalues k = 1/16 {
if `k'!=5 {
foreach l of local `coh3'_m {
cap replace alpha_m_dummy = _b[_It_iX`coh3'_`k'_`l'] ///
if `cohort'==`l' & t_idx==`k'
}
}
}
save "`male_dummy'", replace
restore
use "`female_dummy'", clear
append using "`male_dummy'"
* collapse to event-time series and compute P_t
collapse (mean) alpha_w_dummy alpha_m_dummy mu_ytilde_w_dummy, by(t_idx lang)
gen double cp_dummy = (alpha_m_dummy - alpha_w_dummy) / mu_ytilde_w_dummy
gen double cp_pct_dummy = 100*cp
tab cp_dummy
tempfile CP_DUMMY
save "`CP_DUMMY'", replace
*===============================================================
* (B) REGHDFE + SAVED FE CP
*===============================================================
use "`SYN'", clear
tempfile female male
* --- FEMALES (absorb t FE; predict non-FE xb; rebuild baseline)
preserve
keep if sex==2
reghdfe mrevcot i.age i.dacot, absorb(i.t_idx#i.lang, savefe) vce(robust)
egen double fe_base_w = mean(cond(t_idx==5, __hdfe1__, .))
gen double alpha_w = __hdfe1__ - fe_base_w
predict double xb_noFE_w, xb
gen double mu_ytilde_w = xb_noFE_w + fe_base_w
save "`female'", replace
restore
* --- MALES
preserve
keep if sex==1
reghdfe mrevcot i.age i.dacot, absorb(i.t_idx#i.lang, savefe) vce(robust)
egen double fe_base_m = mean(cond(t_idx==5, __hdfe1__, .))
gen double alpha_m = __hdfe1__ - fe_base_m
save "`male'", replace
restore
* -------------------------
* Append and collapse
* -------------------------
use "`female'", clear
append using "`male'"
* collapse to event-time series and compute P_t
collapse (mean) alpha_w alpha_m mu_ytilde_w, by(t_idx lang)
gen double cp_hdfe = (alpha_m - alpha_w) / mu_ytilde_w
gen double cp_pct_hdfe = 100*cp_hdfe
tab cp_hdfe
tempfile CP_HDFE
save "`CP_HDFE'", replace
*===============================================================
* Compare the two approaches
*===============================================================
use "`CP_DUMMY'", clear
merge 1:1 t_idx lang using "`CP_HDFE'", nogen
gen double diff_cp = cp_hdfe - cp_dummy
gen double diff_cp_pct = cp_pct_hdfe - cp_pct_dummy
gen diff_alpha_w = alpha_w - alpha_w_dummy
gen diff_alpha_m = alpha_m - alpha_m_dummy
gen diff_mu_ytilde_w = mu_ytilde_w - mu_ytilde_w_dummy
tab diff_alpha_m
tab diff_alpha_w
tab diff_mu_ytilde_w
tab diff_cp
Heike

Comment