Recently we used the below code. It is written to do yearly mcap sort on median break-point and idiovol sort on 33.33 and 66.67 break-points. I need necessary modifications to it so that it does now first sort to create quintiles of size and then second sort to create quintiles of idiovol. After this, it should generate data set that contains one observation for each month and each of the (up to ) 25 groups in each month and gives the weighted and unweighted means of rt. Other parameters to this code are alright. However, the part that creates six portfolios and then SMB and HML is to discarded.
Code:
gen moy = month(dofm(mdate))
gen year = year(dofm(mdate))
// CREATE A "FISCAL YEAR" RUNNING FROM JULY THROUGH SUBSEQUENT JUNE
gen fyear = cond(moy > 6, year, year-1)
frame put stock_id fyear mcap idiovol, into(mcap_idiovol_work)
frame change mcap_idiovol_work
collapse (count) n_mcap = mcap n_idiovol = idiovol (firstnm) mcap idiovol, by(stock_id fyear)
assert n_mcap <= 1 & n_idiovol <= 1 // VERIFY UNIQUE VALUE OF MCAP AND idiovol
replace fyear = fyear + 1 // CHANGE THE FYEAR TO WHICH THEY WILL APPLY
frame change default
rename (mcap idiovol) orig=
frlink m:1 stock_id fyear, frame(mcap_idiovol_work)
frget mcap idiovol, from(mcap_idiovol_work)
frame drop mcap_idiovol_work
drop mcap_idiovol_work
egen byte representative = tag(stock_id fyear)
// MEDIAN SPLIT BASED ON JUNE VALUE OF mcap
capture program drop one_year_median_split
program define one_year_median_split
xtile june_mcap_group = mcap, nq(2)
exit
end
frame put stock_id fyear mcap if representative & !missing(mcap), into(median_split) // ***
frame change median_split
runby one_year_median_split, by(fyear)
frame change default
frlink m:1 stock_id fyear, frame(median_split stock_id fyear) // ***
frget june_mcap_group, from(median_split)
frame drop median_split
drop median_split
// NOW SPLIT AT 30TH AND 70TH PERCENTILES OF idiovol
capture program drop one_year_three_groups
program define one_year_three_groups
if _N > = 3 {
_pctile idiovol, percentiles(33.33 66.67)
gen cut = `r(r1)' in 1
replace cut = `r(r2)' in 2
xtile march_idiovol_group = idiovol, cutpoints(cut)
}
else {
gen march_idiovol_group = .
}
exit
end
frame put stock_id fyear idiovol if representative & !missing(idiovol), into(three_groups) // ***
frame change three_groups
runby one_year_three_groups, by(fyear) verbose
frame change default
frlink m:1 stock_id fyear, frame(three_groups stock_id fyear) // ***
frget march_idiovol_group, from(three_groups)
frame drop three_groups
drop three_groups
capture program drop one_weighted_return
program define one_weighted_return
if !missing(june_mcap_group, march_idiovol_group) {
egen numerator = total(mcap*rt)
egen denominator = total(mcap)
gen vw_mean_rt = numerator/denominator
}
exit
end
drop if missing(june_mcap_group, march_idiovol_group)
runby one_weighted_return, by(mdate june_mcap_group march_idiovol_group)
collapse (first) vw_mean_rt, by(mdate june_mcap_group march_idiovol_group)
drop if missing(vw_mean_rt)
keep mdate june_mcap_group march_idiovol_group vw_mean_rt
isid june_mcap_group march_idiovol_group mdate, sort
by mdate june_mcap_group, sort: egen temp = mean(vw_mean_rt)
by mdate (june_mcap_group), sort: gen SMB = temp[1] - temp[_N]
drop temp
by mdate march_idiovol_group, sort: egen temp = mean(vw_mean_rt)
by mdate (march_idiovol_group): gen LMH = temp[1] - temp[_N]
drop temp
// AND IF YOU WANT TO REDUCE TO ONE OBSERVATION PER MONTH
label define june_mcap_group 1 "S" 2 "B"
label define march_idiovol_group 1 "L" 2 "M" 3 "H"
label values june_mcap_group june_mcap_group
label values march_idiovol_group march_idiovol_group
decode june_mcap_group, gen (mcap_group)
decode march_idiovol_group, gen(idiovol_group)
drop june_mcap_group march_idiovol_group
egen groups = concat(mcap_group idiovol_group)
keep mdate groups SMB LMH vw_mean_rt
rename vw_mean_rt =_
reshape wide vw_mean_rt_, i(mdate) j(groups) string
gen HML = (vw_mean_rt_SH + vw_mean_rt_BH)/2 - (vw_mean_rt_SL + vw_mean_rt_BL)/2

Comment