Announcement

Collapse
No announcement yet.
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • Manual replication of spivregress

    Dear All,

    in my research I need to replicate the first stage of the spivregress command. Unfortunately the routines do not offer this option. Following user manual, I came out with the following code in Mata:

    Code:
    ********************************************************************************
    * Replicate spivregress first-stage (e(delta_2sls)) and final GS2SLS (e(b))
    * Manual first stage (2SLS) and manual second stage (GS2SLS) for SARAR(1,1)
    ********************************************************************************
    clear all
    version 18.0
    set matsize 11000
    
    mata:
    
    mata clear
    mata set matastrict on
    
    end
    
    use dataset.dta, clear
    
    replace literacy1880 = 46.77 if NUTS_ID=="UKN" /* Add the literacy rate for Norther Ireland */
    replace literacy1880 = 24.33 if NUTS_ID=="ES53" /* Add the literacy rate for Baleares */
    
    spset, modify coordsys(latlong, kilometers)
    spmatrix create idistance dW1500, vtruncate(1/1500) replace
    spmatrix summarize dW1500
    
    ********************************************************************************
    * 0) USER SETTINGS
    ********************************************************************************
    local yvar  yp9500
    local endog pc_culture                      // <-- confirm spelling matches dataset
    local ziv   pc_institutions literacy1880
    local Xlist urb_rate1850 school country1 country2 country3 country4 country5 country7 country8 country15
    
    local Wname dW1500                         // spatial matrix name
    local IMP   2                              // impower used in spivregress
    local TOL   1e-10                          // tolerance for collinearity filtering
    
    ********************************************************************************
    * 1) Spatial setup & spivregress
    ********************************************************************************
    spset
    spmatrix dir
    
    * Bring W into Mata
    spmatrix matafromsp W id = `Wname'
    
    * Run spivregress (heteroskedastic SARAR(1,1), no constant)
    spivregress `yvar' (`endog' = `ziv') `Xlist', ///
        heteroskedastic dvarlag(`Wname') errorlag(`Wname') ///
        noconstant impower(`IMP')
    
    * Store results
    matrix SPIV_delta2sls = e(delta_2sls)
    matrix SPIV_b         = e(b)
    scalar SPIV_rho       = _b[`Wname':e.`yvar']       // SAR error parameter
    
    * Detect Wy coefficient name for comparison
    local ebnames : colfullnames e(b)
    local wyname
    foreach nm of local ebnames {
        if (strpos("`nm'", "`Wname'") & strpos("`nm'", "`yvar'")) local wyname `nm'
    }
    if "`wyname'" == "" {
        foreach nm of local ebnames {
            if (strpos("`nm'", " W ") & strpos("`nm'", "`yvar'")) local wyname `nm'
        }
    }
    if "`wyname'" != "" {
        scalar BLag = _b["`Wname':`yvar'"]
        di as res "Detected Wy coefficient name: `wyname' | value: " %9.6f BLag
    }
    di as res "rho_hat (SAR error): " %9.6f SPIV_rho
    
    ********************************************************************************
    * 2-3) MATA — Definitions and Execution
    ********************************************************************************
    
    mata:
    
    
    struct colfilter {
        real matrix X
        real colvector keep
        }
    
    struct firststage {
        real matrix Z
        real matrix H1
        real colvector theta2sls
        real scalar rankH1
        real scalar rankZ
        }
    
    struct gs2sls {
        real colvector beta_gs2sls
        real scalar rankH2
        }
        
    struct colfilter drop_dep_cols(real matrix X, real scalar tol) {
         struct colfilter CF;
         CF.keep = J(cols(X), 1, 0);
        
         real matrix kept;
         kept  = J(rows(X), 0, .);
         real matrix basis;
         basis = J(0, 0, .);
         real scalar cur_rank;
         cur_rank = 0;
    
         real scalar j;
         real matrix trial;
         real matrix U, V;
         real colvector s;
         real scalar r2;
    
         for (j = 1; j <= cols(X); j++) {
             trial = (basis, X[, j]);
             svd(trial, U, s, V);
             r2 = sum(s :> tol);
             if (r2 > cur_rank) {
                 kept       = kept , X[, j];
                 CF.keep[j] = 1;
                 basis      = trial;
                 cur_rank   = r2;
             }
         }
         CF.X = kept;
         return(CF);
    }
    
    struct firststage spiv_first_stage_singleW(real colvector y, real matrix X, real matrix Yend, real matrix Ziv, real matrix W, real scalar impower, real scalar tol) {
        real matrix X53;
        real matrix H;
        real matrix curW;
        real scalar r;
        real matrix H1;
        real colvector Wy;
        real matrix HtH;
        real matrix invHtH;
        real matrix PH1;
        real matrix Ztil;
        real matrix A;
        real colvector b;  
        if (impower <= 0) impower = 2;
        if (tol <= 0) tol = 1e-10;
        X53 = X , Ziv;
        H    = X53;
        curW = W;
        for (r = 1; r <= impower; r++) {
            H    = H , (curW * X53);
            curW = curW * W;
        }
        
        struct colfilter CF2;
        CF2 = drop_dep_cols(H, tol);
        H1      = CF2.X;
    
        Wy = W * y;
        real matrix Z;
        Z  = Yend , X , Wy;
    
        HtH    = quadcross(H1, H1);
        invHtH = invsym(HtH);
        PH1    = H1 * invHtH * H1';
        Ztil = PH1 * Z;
        A    = quadcross(Ztil, Z);
        b    = Ztil' * y;
    
        struct firststage FS;
        FS.theta2sls = invsym(A) * b;
        FS.Z         = Z;
        FS.H1        = H1;
        FS.rankH1    = rank(H1);
        FS.rankZ     = rank(Z);
        return(FS);
    }
    
    struct gs2sls spiv_second_stage_gs2sls_singleW(real colvector y, real matrix Z, real matrix H1, real matrix W, real scalar rho_hat, real scalar tol) {
        if (tol <= 0) tol = 1e-10;
        
        real matrix H2full;
        H2full  = H1 , (W * H1);
        struct colfilter CF3;
        CF3 = drop_dep_cols(H2full, tol);
        real matrix H2;
        H2      = CF3.X;
    
        real colvector Wy;
        Wy    = W * y;
        real colvector ystar;
        ystar = y - rho_hat * Wy;
        real matrix Zstar;
        Zstar    = Z - rho_hat * (W * Z);
        
        real matrix HtH;
        HtH    = quadcross(H2, H2);
        real matrix invHtH;
        invHtH = invsym(HtH);
        real matrix PH2;
        PH2    = H2 * invHtH * H2';
    
        real matrix Ztil;
        Ztil = PH2 * Zstar;
        real matrix A1;
        A1    = quadcross(Ztil, Zstar);
        real matrix b1;
        b1    = Ztil' * ystar;
    
        struct gs2sls OUT;
        OUT.beta_gs2sls = invsym(A1) * b1;
        OUT.rankH2      = rank(H2);
        return(OUT);
    }
    
    struct manual{
        real colvector y1
        real matrix X1
        real matrix Yend1
        real matrix Ziv1
        real matrix W
        real scalar imp
        real scalar tlr
    }
    
    y1    = st_data(., tokens("`yvar'"));
    X1    = st_data(., tokens("`Xlist'"));
    Yend1 = st_data(., tokens("`endog'"));
    Ziv1  = st_data(., tokens("`ziv'"));
    imp = strtoreal("`IMP'");
    tlr = strtoreal("`TOL'");
    
    FS = spiv_first_stage_singleW(y1, X1, Yend1, Ziv1, W, imp, tlr);
    st_matrix("MAN_delta2sls", FS.theta2sls);
    
    real scalar rho = st_numscalar("SPIV_rho");
    struct gs2sls G = spiv_second_stage_gs2sls_singleW(y1, FS.Z, FS.H1, W, rho, tlr);
    st_matrix("MAN_beta_gs2sls", G.beta_gs2sls);
     
    end
    
    ********************************************************************************
    * 4) Compare results
    ********************************************************************************
    di as txt "==> Comparing first-stage (Stata vs Manual):"
    mat list SPIV_delta2sls, format(%9.6f)
    mat list MAN_delta2sls , format(%9.6f)
    
    di as txt "==> Comparing final-stage (GS2SLS) coefficients:"
    tempname Bsubset
    matrix `Bsubset' = J(colsof(MAN_beta_gs2sls), 1, .)
    local pos 1
    matrix `Bsubset'[`pos',1] = _b[`endog']
    local ++pos
    foreach v of local Xlist {
        matrix `Bsubset'[`pos',1] = _b[`v']
        local ++pos
    }
    if "`wyname'" != "" {
        matrix `Bsubset'[`pos',1] = BLag
    }
    mat list `Bsubset', format(%9.6f) title("Stata e(b) subset [endog, X, W*y]")
    mat list MAN_beta_gs2sls, format(%9.6f) title("Manual GS2SLS [endog, X, W*y]")
    It seems to run smoothly (I hope it is correct: I am new in Mata) until the very end. When I reach the lines in red I get the following error message:

    Code:
    : FS = spiv_first_stage_singleW(y1, X1, Yend1, Ziv1, W, imp, tlr);
             drop_dep_cols():  3259  nonstruct found where struct required
    spiv_first_stage_singleW():     -  function returned error
                     <istmt>:     -  function returned error
    (6 lines skipped)
    --------------------------------------------------------------------------------------------
    r(3259);
    
    end of do-file
    
    r(3259);
    I am running out of ideas. Do you have any suggestion? I would be very grateful for any suggestion you may want to provide.

    Best regards,

    Dario

  • #2
    Try the following minor changes in function drop_dep_cols():

    Code:
    struct colfilter scalar drop_dep_cols(real matrix X, real scalar tol) {
         struct colfilter scalar CF;
         CF.keep = J(cols(X), 1, 0);
        
         real matrix kept;
         kept  = J(rows(X), 0, .);
         real matrix basis;requires 
         basis = J(rows(X), 0, .);
         real scalar cur_rank;
         cur_rank = 0;
    
         real scalar j;
         real matrix trial;
         real matrix U, V;
         real colvector s;
         real scalar r2;
    
         for (j = 1; j <= cols(X); j++) {
             trial = (basis, X[, j]);
             svd(trial, U, s, V);
             r2 = sum(s :> tol);
             if (r2 > cur_rank) {
                 kept       = kept , X[, j];
                 CF.keep[j] = 1;
                 basis      = trial;
                 cur_rank   = r2;
             }
         }
         CF.X = kept;
         return(CF);
    }
    Without "scalar", the variable is default to a NULL matrix (orgtype) of struct colfilter, which requires initialization before usage. See help mf_orgtype for detailed discussion.
    Last edited by Hua Peng (StataCorp); 17 Dec 2025, 12:46.

    Comment


    • #3
      Hua Peng (StataCorp) thanks for your suggestion. Apparently it goes through. Another matter is to understand if I did correctly. I will check closely results.

      Comment


      • #4
        Dear All,

        I am really get crazy! I have tried to replicate spivregress command, because I need to estimate the first stage. My final code is:

        Code:
        ********************************************************************************
        * Replicate spivregress first-stage (e(delta_2sls)) and final GS2SLS (e(b))
        * Manual first stage (2SLS) and manual second stage (GS2SLS) for SARAR(1,1)
        ********************************************************************************
        clear all
        version 18.0
        set matsize 11000
        
        mata: 
        
        mata clear
        mata set matastrict on
        
        end
        
        cd "C:\Users\dario\OneDrive - unime.it\Pubblicazioni 2020\Culture and institutions\Analysis and Paper_17Oct25\Analysis\Data"
        use tabellini2.dta, clear
        
        replace literacy1880 = 46.77 if NUTS_ID=="UKN" /* Add the literacy rate for Norther Ireland */
        replace literacy1880 = 24.33 if NUTS_ID=="ES53" /* Add the literacy rate for Baleares */
        
        spset, modify coordsys(latlong, kilometers)
        spmatrix create idistance dW1500, vtruncate(1/1500) replace
        spmatrix summarize dW1500
        
        ********************************************************************************
        * 0) USER SETTINGS
        ********************************************************************************
        local yvar  yp9500
        local endog pc_culture                      // <-- confirm spelling matches dataset
        local ziv   pc_institutions literacy1880
        local Xlist urb_rate1850 school country1 country2 country3 country4 country5 country7 country8 country15
        
        local Wname dW1500                         // spatial matrix name
        local IMP   2                              // impower used in spivregress
        local TOL   1e-10                          // tolerance for collinearity filtering
        
        ********************************************************************************
        * 1) Spatial setup & spivregress
        ********************************************************************************
        spset
        spmatrix dir
        
        * Bring W into Mata
        spmatrix matafromsp W id = `Wname'
        
        * Run spivregress (heteroskedastic SARAR(1,1), no constant)
        spivregress `yvar' (`endog' = `ziv') `Xlist', ///
            heteroskedastic dvarlag(`Wname') errorlag(`Wname') ///
            noconstant impower(`IMP')
        
        * Store results
        matrix SPIV_delta2sls = e(delta_2sls)
        matrix SPIV_b         = e(b)
        scalar SPIV_rho       = _b[`Wname':e.`yvar']       // SAR error parameter
        
        * Detect Wy coefficient name for comparison
        local ebnames : colfullnames e(b)
        local wyname
        foreach nm of local ebnames {
            if (strpos("`nm'", "`Wname'") & strpos("`nm'", "`yvar'")) local wyname `nm'
        }
        if "`wyname'" == "" {
            foreach nm of local ebnames {
                if (strpos("`nm'", " W ") & strpos("`nm'", "`yvar'")) local wyname `nm'
            }
        }
        if "`wyname'" != "" {
            scalar BLag = _b["`Wname':`yvar'"] 
            di as res "Detected Wy coefficient name: `wyname' | value: " %9.6f BLag
        }
        di as res "rho_hat (SAR error): " %9.6f SPIV_rho
        
        ********************************************************************************
        * 2-3) MATA — Definitions and Execution
        ********************************************************************************
        
        mata:
        
        
        struct colfilter {
            real matrix X 
            real colvector keep 
            }
        
        struct firststage { 
            real matrix Z 
            real matrix H1 
            real colvector theta2sls
            real scalar rankH1 
            real scalar rankZ 
            }
        
        struct gs2sls {
            real colvector beta_gs2sls 
            real scalar rankH2 
            }
            
        
        struct colfilter scalar drop_dep_cols(real matrix X, real scalar tol)
        {
            struct colfilter scalar CF;
            CF.keep = J(cols(X), 1, 0);
            real matrix kept;
            real matrix basis;
        
            kept = J(rows(X), 0, .);
            basis = J(rows(X), 0, .);   // ensure conformability for column-join
            
            real scalar cur_rank;
            cur_rank = 0;
            real scalar j;
            real matrix trial;
            real matrix U, V;
            real colvector s;
            real scalar r2;
        
            for (j = 1; j <= cols(X); j++) {
                trial = (basis, X[, j]);
                svd(trial, U, s, V);
                r2 = sum(s :> tol);
                if (r2 > cur_rank) {
                    kept = kept , X[, j];
                    CF.keep[j] = 1;
                    basis = trial;
                    cur_rank = r2;
                }
            }
        
            CF.X = kept;
            return(CF);
        
        }
        
        
        struct firststage scalar spiv_first_stage_singleW(
            real colvector y,
            real matrix    X,
            real matrix    Yend,
            real matrix    Ziv,
            real matrix    W,
            real scalar    impower,
            real scalar    tol)
        {
            if (impower <= 0) impower = 2;
            if (tol    <= 0) tol = 1e-10;
            
            real matrix X53
            real matrix H
            real matrix curW
        
            X53 = X , Ziv;
            H    = X53;
            curW = W;
        
            real scalar r;
            for (r = 1; r <= impower; r++) {
                H    = H , (curW * X53);
                curW = curW * W;
            }
            
            struct colfilter scalar CF2;
        
            CF2 = drop_dep_cols(H, tol);
            
            real matrix H1;
            H1 = CF2.X;
            
            real colvector Wy;
            Wy = W * y;
        
            real matrix Z;
            Z    = Yend , X , Wy;
            
            real matrix HtH;
            HtH  = quadcross(H1, H1);
            
            real matrix invHtH;
            invHtH = invsym(HtH);
            
            real matrix PH1;
            PH1  = H1 * invHtH * H1';
            
            real matrix Ztil; 
            Ztil = PH1 * Z;
            
            real matrix A;
            A    = quadcross(Ztil, Z);
            
            real colvector b;
            b = Ztil' * y;
        
            struct firststage scalar FiS;
            FiS.theta2sls = invsym(A) * b;
            FiS.Z        = Z;
            FiS.H1       = H1;
            FiS.rankH1   = rank(H1);
            FiS.rankZ    = rank(Z);
        
            return(FiS);
        }
        
        
        struct gs2sls scalar spiv_second_stage_gs2sls_singleW(
            real colvector y,
            real matrix    Z,
            real matrix    H1,
            real matrix    W,
            real scalar    rho_hat,
            real scalar    tol)
        {
            if (tol <= 0) tol = 1e-10;
        
            real matrix H2full;
            H2full = H1 , (W * H1);
            struct colfilter scalar CF3;
            CF3 = drop_dep_cols(H2full, tol);
            real matrix H2;
            H2 = CF3.X;
        
            real colvector Wy;
            Wy    = W * y;
            real colvector ystar;
            ystar = y - rho_hat * Wy;
            real matrix Zstar;
            Zstar  = Z - rho_hat * (W * Z);
        
            real matrix HtH;
            HtH     = quadcross(H2, H2);
            real matrix invHtH;
            invHtH  = invsym(HtH);
            real matrix PH2;
            PH2     = H2 * invHtH * H2';
            real matrix Ztil;
            Ztil    = PH2 * Zstar;
            real matrix A1;
            A1      = quadcross(Ztil, Zstar);
            real matrix b1;
            b1      = Ztil' * ystar;
        
            struct gs2sls scalar OUT;
            OUT.beta_gs2sls = invsym(A1) * b1;
            OUT.rankH2      = rank(H2);
        
            return(OUT);
        }
        
        struct manual{
            real colvector y1
            real matrix X1
            real matrix Yend1
            real matrix Ziv1
            real matrix W
            real scalar imp
            real scalar tlr
        }
        
        
        // --- pull data & options from Stata ---
        y1    = st_data(., tokens("`yvar'"));
        X1    = st_data(., tokens("`Xlist'"));
        Yend1 = st_data(., tokens("`endog'"));
        Ziv1  = st_data(., tokens("`ziv'"));
        imp   = strtoreal("`IMP'");
        tlr   = strtoreal("`TOL'");
        
        // FIRST STAGE (declare, then assign)
        //struct firststage scalar FiS(y1, X1, Yend1, Ziv1, W, imp, tlr);
        FiS = spiv_first_stage_singleW(y1, X1, Yend1, Ziv1, W, imp, tlr);
        st_matrix("MAN_delta2sls", FiS.theta2sls);
        
        // bring rho from Stata to Mata
        
        rho = st_numscalar("SPIV_rho");
        
        // SECOND STAGE (declare, then assign)
        // struct gs2sls scalar G;
        G = spiv_second_stage_gs2sls_singleW(y1, FiS.Z, FiS.H1, W, rho, tlr);
        
        st_matrix("MAN_beta_gs2sls", G.beta_gs2sls);
        
         
        end
        
        ********************************************************************************
        * 4) Compare results
        ********************************************************************************
        
        
        local ebnames : colfullnames e(b)
        di as txt "e(b) column names:"
        di "`ebnames'"
        
        * And test one reference explicitly:
        display _b["`yvar':`endog'"]
        
        
        
        di as txt "==> Comparing final-stage (GS2SLS) coefficients:"
        tempname Bsubset
        
        * Count rows we need: 1 for endog + #Xlist + 1 if Wy is present
        local kX : word count `Xlist'
        local addWy = cond("`wyname'"!="", 1, 0)
        local rows = 1 + `kX' + `addWy'
        
        matrix `Bsubset' = J(`rows', 1, .)
        local pos = 1
        
        * 1) Endogenous regressor
        local nm "`yvar':`endog'"
        capture matrix `Bsubset'[`pos',1] = _b["`nm'"]
        if _rc {
            di as err "Missing coef in e(b): `nm' -> set to missing."
            matrix `Bsubset'[`pos',1] = .
        }
        local ++pos
        
        * 2) All X's
        foreach v of local Xlist {
            local nm "`yvar':`v'"
            capture matrix `Bsubset'[`pos',1] = _b["`nm'"]
            if _rc {
                di as err "Missing coef in e(b): `nm' -> set to missing."
                matrix `Bsubset'[`pos',1] = .
            }
            local ++pos
        }
        
        * 3) Wy (if detected earlier)
        if "`wyname'" != "" {
            matrix `Bsubset'[`pos',1] = BLag
        }
        
        mat list `Bsubset', format(%9.6f) title("Stata e(b) subset [endog, X, W*y]")
        mat list MAN_beta_gs2sls, format(%9.6f) title("Manual GS2SLS [endog, X, W*y]")
        Comparison of the results indicates a slight but still sizeable difference:

        Code:
        . mat list `Bsubset', format(%9.6f) title("Stata e(b) subset [endog, X, W*y]")
        
        __000000[12,1]:  Stata e(b) subset [endog, X, W*y]
                    c1
         r1   0.532636
         r2   0.577093
         r3   0.339420
         r4  56.813335
         r5  19.323841
         r6  40.001986
         r7  68.503835
         r8  16.941797
         r9  58.507620
        r10  33.519943
        r11  47.884192
        r12   0.263805
        
        . mat list MAN_beta_gs2sls, format(%9.6f)
        
        MAN_beta_gs2sls[12,1]
                    c1
         r1   0.529632
         r2   0.579944
         r3   0.339817
         r4  55.994329
         r5  18.424055
         r6  39.249499
         r7  67.798004
         r8  15.741989
         r9  57.117423
        r10  32.788604
        r11  47.330402
        r12   0.271898
        I cannot figure out why. I have followed closely user manual, but still I cannot get exactly the same results. I would be very grateful for any suggestion you may want to provide.

        Best

        Dario

        Comment

        Working...
        X