Ben Jann, thanks! could you make a example with mm_lsfit() where the coefficients are saved as a Stata matrix?
-
Login or Register
- Log in with
mata: r = 1000 // replications k = 10 // number of predictors n = 1000 // sample size b = J(r, k+1, .) // container for results for (i=1;i<=1000;i++) { y = rnormal(n,1,0,1) X = rnormal(n,10,0,1) b[i,] = mm_lsfit(y, X)' } mean(b)', sqrt(mm_colvar(b))' // means and standard deviations end
mata: r = 1000 // replications k = 10 // number of predictors n = 1000 // sample size b = se = J(r, k+1, .) // containers for results for (i=1;i<=1000;i++) { y = rnormal(n,1,0,1) X = rnormal(n,10,0,1) S = mm_ls(y, X) // initialize estimation problem b[i,] = mm_ls_b(S)' // obtain coefficients se[i,] = mm_ls_se(S)' // obtain standard errors } sqrt(mm_colvar(b))', mean(se)' // standard deviation and avg. standard error end
mata: r = 1000 // replications k = 10 // number of predictors n = 1000 // sample size b = se = J(r, k+1, .) // containers for results for (i=1;i<=1000;i++) { y = rnormal(n,1,0,1) X = rnormal(n,10,0,1) w = runiform(n,1) S = mm_ls(y, X, w) b[i,] = mm_ls_b(S)' K = k + 1 - mm_ls_k_omit(S) s = quadcross(X,1, (w:*(y-mm_ls_xb(S))):^2, X,1) V = (n/(n-K)) * (mm_ls_XXinv(S) * s * mm_ls_XXinv(S)) se[i,] = sqrt(diagonal(V))' // robust standard errors } mean(b)', mean(se)' end
timer clear
// regress
forv i=1/1000 {
qui drawnorm y x1-x10, double clear n(1000)
timer on 1
qui regress y x1-x10
timer off 1
timer on 3
qui {
mat accum YX = y x1-x10
mat b = invsym(YX[2...,2...])*(YX[2...,1])
}
timer off 3
}
// mata mm_ls()
mata:
n = 1000
for (i=1;i<=1000;i++) {
y = rnormal(n,1,0,1)
X = rnormal(n,10,0,1)
timer_on(2)
b = mm_lsfit(y, X)
timer_off(2)
}
end
timer list
. timer list
1: 4.88 / 1000 = 0.0049
2: 0.32 / 1000 = 0.0003
3: 0.40 / 1000 = 0.0004
prog define betas, rclass syntax varlist gettoken yvar 0 : 0 qui mat accum YX = `yvar' `0' matrix betas = invsym(YX[2...,2...])*(YX[2...,1]) return matrix betas = betas end
Stata-17 Born: 19 Jul 2022 Processors: 8 Compile number 170121 obs:1000 reps :25 processors:8 1: 0.02 / 25 = 0.0008 Stata matrix 2: 0.04 / 25 = 0.0016 Mata matrix 3: 0.13 / 25 = 0.0053 mm_lsfit() 10: 0.56 / 25 = 0.0224 regress 20: 0.63 / 25 = 0.0253 asreg obs:1000000 reps :25 processors:8 1: 1.45 / 25 = 0.0580 Stata matrix 2: 4.46 / 25 = 0.1782 Mata matrix 3: 4.45 / 25 = 0.1778 mm_lsfit() 10: 3.54 / 25 = 0.1415 regress 20: 41.60 / 25 = 1.6640 asreg
* betas.ado * prog define betas, rclass *! version 0.0.1 2022-07-21 return matrix r(betas) or e(b) syntax varlist , [mata] [moremata] [asreg] gettoken yvar xvars : varlist if ( "`mata'" != "" & "`moremata'" != "" & "`asreg'" != "" ) { di as error "options `mata', `moremata' and `asreg' " error 184 } if ( "`asreg'"=="asreg" ) { /* asreg */ qui asreg `varlist' tempname tomatrix frame put _b_* if _n == 1 , into(`tomatrix') frame `tomatrix' : rename (_b_*)(*) frame `tomatrix' : mkmat * , matrix(betas) matrix rownames betas = "`yvar'" } else if ("`mata'"=="`moremata'" ) { /* Stata matrix only */ qui mat accum YX = `varlist' matrix betas = (invsym(YX[2...,2...])*(YX[2...,1]))' } else { /* mata or moremata */ local implementation = cond("`mata'"=="mata", "mata", "moremata") mata : betas("`yvar'", "`xvars'", "`implementation'") } return matrix betas = betas /* r(betas) with col/row stripes */ end mata: void betas( string scalar yvar, string scalar xvars, string scalar implementation ){ real matrix y, X real colvector b string colvector cstripe xvars = tokens(xvars) y = st_data(., yvar) X = st_data(., xvars) if ( implementation == "mata" ) { X = X,J(rows(X),1,1) b = invsym(quadcross(X, X))*quadcross(X, y) } else if ( implementation == "moremata" ) { b = mm_lsfit(y, X) } rstripe = J(1, 1, ""), yvar cstripe = J(cols(b'), 1, ""), ( xvars, "_cons")' st_matrix("betas", b') st_matrixrowstripe("betas", rstripe) st_matrixcolstripe("betas", cstripe) } end exit // test follows clear all which betas capt log close timings log using timings , replace name(timings) qui { cls noi di "Stata-" c(stata_version) " Born: " c(born_date) " Processors: " c(processors) noi query compilenumber mata : mata set matafavor speed postfile timings reps obs t1 t2 t3 t10 t20 using timings, every(1) replace local nobs 1000 1000000 local nreps 25 local nonames nonames // _assert_mreldif cast error on stripes! qui foreach obs of numlist `nobs' { timer clear qui forvalues reps = 1/`nreps' { matrix drop _all qui drawnorm y x1-x10, double clear n(`obs') local yvar y unab xvars : x1-x10 timer on 1 betas `yvar' `xvars' timer off 1 matrix rename r(betas) copy timer on 2 betas `yvar' `xvars', mata timer off 2 _assert_mreldif r(betas) copy , `nonames' timer on 3 betas `yvar' `xvars', moremata timer off 3 _assert_mreldif r(betas) copy , `nonames' timer on 10 qui _regress `yvar' `xvars' timer off 10 _assert_mreldif e(b) copy , `nonames' timer on 20 betas `yvar' `xvars', asreg timer off 20 _assert_mreldif r(betas) copy , `nonames' } noi { di as res _n "obs:" `obs' di as res "reps :" `nreps' di as res "processors:" c(processors) _n timer list post timings (r(nt1)) (`obs') (r(t1)) (r(t2)) (r(t3)) (r(t10)) (r(t20)) } } postclose timings } // qui log close timings
*Example clear set seed 999 set obs 400 gen y = rnormal() gen x1 = ln(_n + 1000) gen x2 = x1^2 gen x3 = x1^3 *REGRESS CAN regress y x1-x3 *GLM CAN'T glm y x1-x3 *MM_LSFIT() CAN putmata y=y X=(x1 x2 x3), replace mata b = mm_lsfit(y, X) b end
. . regress y x1-x3 Source | SS df MS Number of obs = 400 -------------+---------------------------------- F(3, 396) = 1.44 Model | 4.67519272 3 1.55839757 Prob > F = 0.2315 Residual | 429.449514 396 1.08446847 R-squared = 0.0108 -------------+---------------------------------- Adj R-squared = 0.0033 Total | 434.124707 399 1.08803185 Root MSE = 1.0414 ------------------------------------------------------------------------------ y | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- x1 | -1298.321 10929.67 -0.12 0.906 -22785.75 20189.1 x2 | 179.992 1544.121 0.12 0.907 -2855.707 3215.691 x3 | -8.320985 72.70972 -0.11 0.909 -151.2663 134.6243 _cons | 3122.863 25785.13 0.12 0.904 -47569.99 53815.72 ------------------------------------------------------------------------------ . . glm y x1-x3 note: x3 omitted because of collinearity Iteration 0: log likelihood = -581.78996 Generalized linear models Number of obs = 400 Optimization : ML Residual df = 397 Scale parameter = 1.081773 Deviance = 429.4637175 (1/df) Deviance = 1.081773 Pearson = 429.4637175 (1/df) Pearson = 1.081773 Variance function: V(u) = 1 [Gaussian] Link function : g(u) = u [Identity] AIC = 2.92395 Log likelihood = -581.7899562 BIC = -1949.148 ------------------------------------------------------------------------------ | OIM y | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- x1 | -47.55762 87.55635 -0.54 0.587 -219.1649 124.0497 x2 | 3.282378 6.183074 0.53 0.596 -8.836224 15.40098 x3 | 0 (omitted) _cons | 172.1966 309.9173 0.56 0.578 -435.23 779.6233 ------------------------------------------------------------------------------ ------------------------------------------------- mata (type end to exit) -------------------------------------------------------------- : : b = mm_lsfit(y, X) : : b 1 +----------------+ 1 | -1298.323697 | 2 | 179.9923073 | 3 | -8.321001467 | 4 | 3122.868871 | +----------------+ : : end ---------------------------------------------------------------------------------------------------------------------------------------- .
. mat accum YX = price i.rep##c.headroom mpg (obs=69)
. correlate x1-x3 (obs=400) | x1 x2 x3 -------------+--------------------------- x1 | 1.0000 x2 | 1.0000 1.0000 x3 | 0.9999 1.0000 1.0000
. drop _all . set seed 342432 . set obs 1000 Number of observations (_N) was 0, now 1,000. . local offset 1e5 . gen double x1 = rnormal() + `offset' . gen double x2 = rnormal() . gen double y = x1 + x2 + rnormal() - `offset' . corr x1 x2 (obs=1,000) | x1 x2 -------------+------------------ x1 | 1.0000 x2 | 0.0533 1.0000 . . // mm_lsfit . mata: st_matrix("b", mm_lsfit(st_data(.,"y"), st_data(.,"x1 x2"))) . // regress . qui regress y x1 x2 . mat b = e(b)', b . // mat accum . mat accum YX = y x1 x2 (obs=1,000) . mat b = b, (invsym(YX[2...,2...]) * (YX[2...,1])) . // results . mat coln b = regress mm_lsfit "mat accum" . mat list b b[3,3] regress mm_lsfit mat accum x1 1.018271 1.018271 1.402e-07 x2 1.0036801 1.0036801 1.0587929 _cons -101827.07 -101827.07 0
Comment