Announcement

Collapse
No announcement yet.
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • Linear Regression and Prediction in Mata

    Consider the following in ado-code
    Code:
    * Example generated by -dataex-. For more info, type help dataex
    clear
    input int year long(gdp7 gdp1 gdp2 gdp3 gdp4 gdp5 gdp6 gdp8 gdp9 gdp10 gdp12 gdp14 gdp16 gdp18 gdp19 gdp20 gdp21)
    1960  2284  2879  2158  1796  1782  2329  1858  1620  2113  1713  3387  1010   707   728  1074  2373  2545
    1961  2388  2929  2216  1899  1883  2485  1959  1763  2111  1823  3625  1143  1028   783  1201  2346  2659
    1962  2527  3103  2276  1977  2018  2665  2099  1900  2275  1905  3788  1262  1049   848  1335  2539  2713
    1963  2610  3227  2386  2074  2122  2700  2205  2021  2360  1991  3946  1374  1188   905  1467  2717  2866
    1964  2806  3420  2534  2224  2288  2980  2367  2093  2566  2112  4158  1546  1316   979  1554  2873  3001
    1965  3005  3667  2660  2336  2414  3177  2526  2205  2739  2268  4360  1647  1492  1084  1678  2973  3224
    1966  3168  3974  2794  2537  2559  3377  2728  2400  2874  2417  4575  1889  1634  1170  1841  3230  3481
    1967  3241  4154  2923  2668  2719  3580  2915  2624  3078  2620  4790  2133  1754  1293  1954  3404  3360
    1968  3571  4494  3173  2908  2958  3892  3163  2910  3399  2784  5138  2496  1967  1475  2160  3788  3489
    1969  3998  4805  3391  3239  3308  4352  3531  3227  3951  3035  5645  2917  2298  1592  2447  4162  3969
    1970  4367  4999  3615  3732  3797  4541  3857  3602  4342  3213  6382  3293  2631  1916  2715  4490  4136
    1971  4686  5362  3854  4100  4129  4858  4203  3834  4705  3523  6938  3575  2988  2153  2951  4677  4438
    1972  5055  5838  4149  4513  4512  5262  4535  4101  4984  3814  7406  3988  3383  2420  3293  5013  4780
    1973  5553  6464  4683  4969  5039  5719  5008  4594  5475  4138  7998  4491  3844  2845  3710  5595  5335
    1974  6074  6951  5036  5622  5707  6121  5594  5240  6165  4728  8800  4778  3907  3048  4227  6024  6041
    1975  6603  7519  5481  6147  6146  6565  6077  5601  6699  5458  8934  5300  4505  3045  4600  6683  6382
    1976  7367  8300  5953  6810  6854  7371  6673  6288  7345  6116  9368  5768  5024  3407  4965  7140  6748
    1977  8090  9146  6489  7588  7326  7901  7292  6821  7959  6742 10240  6344  5417  3784  5364  7482  6867
    1978  8928 10229  7176  8117  8056  8585  8031  7542  8676  7412 11003  7084  6138  4120  5761  8272  7301
    1979 10067 11306  7973  9288  8921  9566  8948  8594  9504  8354 12189  8027  6781  4663  6188  9174  7915
    1980 11083 12186  8502 10312 10156 10362  9891  9643 10458  9557 13853  8931  7374  5263  6856 10203  8656
    1981 12115 13533  9161 11242 11079 11106 10929 10593 11304 10548 15338  9986  7870  5812  7447 11513  9736
    1982 12761 13940  9917 12148 11827 12115 11869 11275 11784 11205 15963 10813  8204  6263  7957 11537 10687
    1983 13519 15008 10669 13048 12334 12822 12518 11812 12419 12022 16611 11346  8388  6479  8378 12300 11262
    1984 14481 16549 11336 13533 13113 13777 13145 12543 13234 13220 17710 12064  8834  6570  8812 13120 12141
    1985 15291 17600 12068 14296 13735 14698 13746 13285 13938 14354 18812 12978  9297  6959  9259 14019 12556
    1986 15998 18439 12795 14921 14292 15608 14308 13896 14613 15184 19458 13590  9525  7414  9744 14537 13085
    1987 16679 19407 13717 15549 15013 16024 14940 14688 15197 15823 20120 14425  9548  8126 10542 15554 13402
    1988 17786 20711 14864 16595 16209 16766 16040 15784 16082 16299 21334 15862 10277  9057 11434 16524 13569
    1989 18994 22047 15716 17768 17345 17418 17193 16875 17387 17043 22965 17269 11036 10042 12417 17255 14124
    1990 20465 23064 16397 19070 18526 18237 18244 17946 18665 18004 24518 18815 11405 10894 13365 17322 14420
    1991 21602 23507 16681 20172 19453 19070 19021 18845 19626 19212 24840 20055 11971 11783 14152 17652 14252
    1992 22154 24509 17069 20960 20123 19829 19746 19367 20224 20185 25141 20648 12198 12219 14564 18496 14679
    1993 21878 25409 17845 21220 20308 20199 19920 19778 20679 21088 25427 21122 12166 12236 14699 19432 15605
    1994 22371 26670 18975 22139 21340 21691 20695 20577 21588 22534 26031 21757 12566 12614 15325 20583 16686
    1995 23035 27574 19860 22976 22248 22693 21545 21532 22585 23874 26485 22551 12991 13413 16032 21773 17402
    1996 23742 28814 20923 24008 22687 23781 22288 22235 23531 26263 26394 23714 13418 13974 16720 22562 17879
    1997 24156 30262 22280 24472 23416 24878 23370 22810 24692 27776 27850 24478 14127 14804 17420 23649 18510
    1998 24931 31519 23206 25314 24164 25669 24412 23840 25811 27294 28835 24429 14665 15401 18479 24853 18669
    1999 25755 33028 23959 26558 24792 27113 25111 24402 26654 30011 28887 24709 15213 16363 19817 26283 19937
    2000 26943 34603 25583 28359 26631 28798 26690 25759 28467 36273 30461 26015 16272 17353 21074 27403 20789
    2001 27449 35341 27026 28855 28001 29837 28043 26586 30359 37078 30806 26619 17332 18071 22257 28492 21825
    2002 28348 36180 28969 29942 29330 30318 28829 27320 31284 36617 32751 27196 19119 18799 23756 29819 22662
    2003 28855 37548 29609 30796 30082 30853 29210 27537 31792 37245 33516 28071 20479 17603 24812 31273 23728
    end
    format %ty year
    label var gdp7 "West Germany"
    label var gdp1 "1 GDP per Capita"
    label var gdp2 "2 GDP per Capita"
    label var gdp3 "3 GDP per Capita"
    label var gdp4 "4 GDP per Capita"
    label var gdp5 "5 GDP per Capita"
    label var gdp6 "6 GDP per Capita"
    label var gdp8 "8 GDP per Capita"
    label var gdp9 "9 GDP per Capita"
    label var gdp10 "10 GDP per Capita"
    label var gdp12 "12 GDP per Capita"
    label var gdp14 "14 GDP per Capita"
    label var gdp16 "16 GDP per Capita"
    label var gdp18 "18 GDP per Capita"
    label var gdp19 "19 GDP per Capita"
    label var gdp20 "20 GDP per Capita"
    label var gdp21 "21 GDP per Capita"
    cls
    qui ds
    
    loc int_time= 1987
    
    loc temp: word 1 of `r(varlist)'
    
    loc time: disp "`temp'"
    
    loc t: word 2 of `r(varlist)'
    
    loc treated_unit: disp "`t'"
    
    loc a: word 3 of `r(varlist)'
    
    loc donor_one: disp "`a'" // First donor unit...
    
    local nwords :  word count `r(varlist)'
    
    loc b: word `nwords' of `r(varlist)'
    
    loc last_donor: disp "`b'" // Last donor...
    
    egen ym = rowmean(`donor_one'-`last_donor')
    
    lab var ym "ymean"
    
    drop `donor_one'-`last_donor'
    cls
    
    reg gdp7 ym ///
        if year < `int_time' //
        
    predict cf, xb
    
        
    lab var cf "Counterfactual West Germany"
    
    cls
        
    line gdp cf year, xline(`int_time') ///
        legend(ring(0) pos(11)) lcol(black red) ///
        yti("GDP per Capita") xti("Year")
    This estimates the causal impact of West German Reunification on GDP per capita, using a new difference-in-differences technique. The steps are fairly simple: Take the row-wise average of the control group's units, then use bivariate linear regression to predict the outcome of the treated unit (the first column) in the preintervention period (pre-1990). Then, use those OLS estimates from the pre-intervention period, and boom, we have ourselves a counterfactual.

    Consider the problem now in Mata

    Code:
    * Example generated by -dataex-. For more info, type help dataex
    clear
    input int year long(gdp7 gdp1 gdp2 gdp3 gdp4 gdp5 gdp6 gdp8 gdp9 gdp10 gdp12 gdp14 gdp16 gdp18 gdp19 gdp20 gdp21)
    1960  2284  2879  2158  1796  1782  2329  1858  1620  2113  1713  3387  1010   707   728  1074  2373  2545
    1961  2388  2929  2216  1899  1883  2485  1959  1763  2111  1823  3625  1143  1028   783  1201  2346  2659
    1962  2527  3103  2276  1977  2018  2665  2099  1900  2275  1905  3788  1262  1049   848  1335  2539  2713
    1963  2610  3227  2386  2074  2122  2700  2205  2021  2360  1991  3946  1374  1188   905  1467  2717  2866
    1964  2806  3420  2534  2224  2288  2980  2367  2093  2566  2112  4158  1546  1316   979  1554  2873  3001
    1965  3005  3667  2660  2336  2414  3177  2526  2205  2739  2268  4360  1647  1492  1084  1678  2973  3224
    1966  3168  3974  2794  2537  2559  3377  2728  2400  2874  2417  4575  1889  1634  1170  1841  3230  3481
    1967  3241  4154  2923  2668  2719  3580  2915  2624  3078  2620  4790  2133  1754  1293  1954  3404  3360
    1968  3571  4494  3173  2908  2958  3892  3163  2910  3399  2784  5138  2496  1967  1475  2160  3788  3489
    1969  3998  4805  3391  3239  3308  4352  3531  3227  3951  3035  5645  2917  2298  1592  2447  4162  3969
    1970  4367  4999  3615  3732  3797  4541  3857  3602  4342  3213  6382  3293  2631  1916  2715  4490  4136
    1971  4686  5362  3854  4100  4129  4858  4203  3834  4705  3523  6938  3575  2988  2153  2951  4677  4438
    1972  5055  5838  4149  4513  4512  5262  4535  4101  4984  3814  7406  3988  3383  2420  3293  5013  4780
    1973  5553  6464  4683  4969  5039  5719  5008  4594  5475  4138  7998  4491  3844  2845  3710  5595  5335
    1974  6074  6951  5036  5622  5707  6121  5594  5240  6165  4728  8800  4778  3907  3048  4227  6024  6041
    1975  6603  7519  5481  6147  6146  6565  6077  5601  6699  5458  8934  5300  4505  3045  4600  6683  6382
    1976  7367  8300  5953  6810  6854  7371  6673  6288  7345  6116  9368  5768  5024  3407  4965  7140  6748
    1977  8090  9146  6489  7588  7326  7901  7292  6821  7959  6742 10240  6344  5417  3784  5364  7482  6867
    1978  8928 10229  7176  8117  8056  8585  8031  7542  8676  7412 11003  7084  6138  4120  5761  8272  7301
    1979 10067 11306  7973  9288  8921  9566  8948  8594  9504  8354 12189  8027  6781  4663  6188  9174  7915
    1980 11083 12186  8502 10312 10156 10362  9891  9643 10458  9557 13853  8931  7374  5263  6856 10203  8656
    1981 12115 13533  9161 11242 11079 11106 10929 10593 11304 10548 15338  9986  7870  5812  7447 11513  9736
    1982 12761 13940  9917 12148 11827 12115 11869 11275 11784 11205 15963 10813  8204  6263  7957 11537 10687
    1983 13519 15008 10669 13048 12334 12822 12518 11812 12419 12022 16611 11346  8388  6479  8378 12300 11262
    1984 14481 16549 11336 13533 13113 13777 13145 12543 13234 13220 17710 12064  8834  6570  8812 13120 12141
    1985 15291 17600 12068 14296 13735 14698 13746 13285 13938 14354 18812 12978  9297  6959  9259 14019 12556
    1986 15998 18439 12795 14921 14292 15608 14308 13896 14613 15184 19458 13590  9525  7414  9744 14537 13085
    1987 16679 19407 13717 15549 15013 16024 14940 14688 15197 15823 20120 14425  9548  8126 10542 15554 13402
    1988 17786 20711 14864 16595 16209 16766 16040 15784 16082 16299 21334 15862 10277  9057 11434 16524 13569
    1989 18994 22047 15716 17768 17345 17418 17193 16875 17387 17043 22965 17269 11036 10042 12417 17255 14124
    1990 20465 23064 16397 19070 18526 18237 18244 17946 18665 18004 24518 18815 11405 10894 13365 17322 14420
    1991 21602 23507 16681 20172 19453 19070 19021 18845 19626 19212 24840 20055 11971 11783 14152 17652 14252
    1992 22154 24509 17069 20960 20123 19829 19746 19367 20224 20185 25141 20648 12198 12219 14564 18496 14679
    1993 21878 25409 17845 21220 20308 20199 19920 19778 20679 21088 25427 21122 12166 12236 14699 19432 15605
    1994 22371 26670 18975 22139 21340 21691 20695 20577 21588 22534 26031 21757 12566 12614 15325 20583 16686
    1995 23035 27574 19860 22976 22248 22693 21545 21532 22585 23874 26485 22551 12991 13413 16032 21773 17402
    1996 23742 28814 20923 24008 22687 23781 22288 22235 23531 26263 26394 23714 13418 13974 16720 22562 17879
    1997 24156 30262 22280 24472 23416 24878 23370 22810 24692 27776 27850 24478 14127 14804 17420 23649 18510
    1998 24931 31519 23206 25314 24164 25669 24412 23840 25811 27294 28835 24429 14665 15401 18479 24853 18669
    1999 25755 33028 23959 26558 24792 27113 25111 24402 26654 30011 28887 24709 15213 16363 19817 26283 19937
    2000 26943 34603 25583 28359 26631 28798 26690 25759 28467 36273 30461 26015 16272 17353 21074 27403 20789
    2001 27449 35341 27026 28855 28001 29837 28043 26586 30359 37078 30806 26619 17332 18071 22257 28492 21825
    2002 28348 36180 28969 29942 29330 30318 28829 27320 31284 36617 32751 27196 19119 18799 23756 29819 22662
    2003 28855 37548 29609 30796 30082 30853 29210 27537 31792 37245 33516 28071 20479 17603 24812 31273 23728
    end
    format %ty year
    label var gdp7 "West Germany"
    label var gdp1 "1 GDP per Capita"
    label var gdp2 "2 GDP per Capita"
    label var gdp3 "3 GDP per Capita"
    label var gdp4 "4 GDP per Capita"
    label var gdp5 "5 GDP per Capita"
    label var gdp6 "6 GDP per Capita"
    label var gdp8 "8 GDP per Capita"
    label var gdp9 "9 GDP per Capita"
    label var gdp10 "10 GDP per Capita"
    label var gdp12 "12 GDP per Capita"
    label var gdp14 "14 GDP per Capita"
    label var gdp16 "16 GDP per Capita"
    label var gdp18 "18 GDP per Capita"
    label var gdp19 "19 GDP per Capita"
    label var gdp20 "20 GDP per Capita"
    label var gdp21 "21 GDP per Capita"
    cls
    qui ds
    
    loc int_time= 1987
    
    loc temp: word 1 of `r(varlist)'
    
    loc time: disp "`temp'"
    
    loc t: word 2 of `r(varlist)'
    
    loc treated_unit: disp "`t'"
    
    loc a: word 3 of `r(varlist)'
    
    loc donor_one: disp "`a'" // First donor unit...
    
    local nwords :  word count `r(varlist)'
    
    loc b: word `nwords' of `r(varlist)'
    
    loc last_donor: disp "`b'" // Last donor...
    
    /*
    egen ym = rowmean(`donor_one'-`last_donor')
    
    lab var ym "ymean"
    
    drop `donor_one'-`last_donor'
    cls
    
    reg gdp7 ym ///
        if year < `int_time' //  */
    
     mata
     y    = st_data(., "gdp7")
     
     t = st_data(., "year")
    
     X    = st_data(., "`donor_one'-`last_donor'")
    
     n    = rows(X)
    
     X    = X,J(n,1,1)
    
     XpX  = quadcross(X, X) if t < 1990
    
     XpXi = invsym(XpX)
    
     b    = XpXi*quadcross(X, y)
    
    end
    The error you'll get is
    'if' found where almost anything else expected
    r(3000);


    My question is this: how do I get Mata to know that I'd like to restrict my estimation sample to being before 1990, and then use the OLS coefficient to predict the values for West Germany after 1990? In Stata's ado, this is very simple. But I've never used Mata before, so I was curious on how we might do this.

    EDIT: For the curious, this is code for this paper and the Mata code comes from here
    Last edited by Jared Greathouse; 22 Aug 2022, 13:17.

  • #2
    Also posted, with subsequent discussion, in the General forum at

    https://www.statalist.org/forums/for...-and-using-ols

    Comment

    Working...
    X