Announcement

Collapse
No announcement yet.
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • one to one match not using psmatch2

    Hi all, I need to perform one to one match but not with psm, I want to match firms with priva ==1 with firms with priva ==0 using "IR" as my measure either with or without replacement, I write code to do this but it's too slow(take around 15 minutes for one version and I have 64 of them), so could any one here know an efficient way doing this, below is data.

    thanks in advance

    Code:
    * Example generated by -dataex-. To install: ssc install dataex
    clear
    input float year str9 firmid float(provinceid industrycode2) double(sales totalasset IR) float(Export Leverage ROA priva)
    2002 "731480669" 440000 3100  20952   41538   .1740685999393463           0 1.8839825    -.08137128 1
    2003 "731754236" 320000 2600  22130   14410  .07273736596107483           0  .7900621     .10062457 1
    2004 "731797949" 650000 1700 147286  316933   .4121880531311035           0  .8761314   -.031309456 1
    2004 "731840978" 350000 2000  51174   49218    .249519482254982           0  3.524129     .05292779 1
    2004 "732110555" 330000 3600  21895   24692   .9157142043113709           0 2.3558033      .1003159 1
    2002 "732846263" 430000 2600 153720  115150  .09424319863319397           0  .8668936     .04585323 1
    2004 "733240474" 150000 2600  36766   54266  .37224307656288147    .6126385   1.49098    .009213872 1
    2002 "733897759" 360000 1500  12628   16373   .7729251384735107           0  .4318321    -.03450803 1
    2003 "734718142" 370000 1300  61452    9747  .08469082415103912           0  13.00933     .04780958 1
    2003 "735380788" 610000 1500  11024   52997    .967302143573761           0 .58859146    -.05911655 1
    2004 "737219103" 370000 3700 152859  105293  .30998149514198303 .0029393365  3.601967     .13094887 1
    2004 "737611922" 450000 2600  10321    5638   .4301075339317322           0 1.7184185    -.05054984 1
    2004 "737619131" 450000 3500  19153   29159  .09326812624931335           0  .4666767   -.026441235 1
    2003 "738710949" 130000 3100  11542   23450   .2850426137447357           0  9.151515    .006226013 1
    2003 "738984560" 430000 3500   5565   10803  .33863499760627747           0  6.978582    -.09188988 1
    2003 "740869166" 340000 1500 170005  360980  .13403639197349548           0   1.66581    -.03021497 1
    2004 "740989678" 370000 3100 200509  211844  .08894023299217224           0  6.735485     .04525028 1
    2004 "741286333" 610000 1300  10601    8365   .2943892478942871           0 11.131484    -.10627615 1
    2003 "74227449X" 510000 1300  25658   29581   1.038927435874939           0 10.408022    .000980359 1
    2003 "743670996" 440000 2700  46775  101265  .23514454066753387           0 .32488585     .01904903 1
    2004 "744034481" 120000 2600  26440   17770  .22697417438030243           0  .4168394       .207372 1
    2004 "744338602" 210000 3600  81704  114299   .4226192235946655    .3451689  2.398216    .031968784 1
    2003 "744675529" 510000 2600  38225   32888  .05744858831167221           0 .26273757     .09067137 1
    2004 "745148851" 450000 1300  20536   78985  1.0425113439559937           0  6.828048    -.09792998 1
    2004 "746100796" 110000 3100  95449  141444   .6907730102539063           0 1.3750944     .10032946 1
    2004 "747037565" 330000 4000 252705  105626   .2071758657693863    .4763993 2.3320506     .07603242 1
    2004 "747166551" 330000 3000  12822    9244  .08823002874851227           0  6.094398     .03191259 1
    2004 "747808358" 370000 3900  33138   35417   .7609244585037231           0   10.2793    .022757433 1
    2004 "748550344" 140000 2700   5151   15362   .5189773440361023           0 .56260806   .0019528707 1
    2004 "749531612" 230000 1500   7363    3689   .3461538553237915           0  2.412581     .02981838 1
    2004 "749865583" 370000 4100  69969   43132   .2713746726512909           0  2.282996      .1315033 1
    2004 "753379537" 430000 3500  51691  105368  .25050172209739685           0  .7424839     .02668742 1
    2004 "755810343" 430000 2600  45583   34401  .09624548256397247           0    4.7335    .008836952 1
    1999 "801109348" 110000 2900 172754 1099424   .3347841203212738           0 15.586817    -.10828269 1
    1999 "802590751" 110000 3400  13600    4956  .29372549057006836           0  .1843955      .0968523 1
    2001 "804415121" 130000 1700  11169    2969  .13001464307308197           0  .4131366     .14584035 1
    2000 "813811126" 410000 1700  11391    5210   .0409548319876194           0 3.5701754      .1771619 1
    2004 "839987808" 320000 1300   8878    4494  .21053239703178406           0 11.131484    -.02269693 1
    2002 "843847488" 330000 2600   6808    4840  .10150288790464401           0  3.033333    -.02128099 1
    1999 "868530305" 410000 1700  73085   34388   .0820949524641037           0  21.05773    .005437944 1
    2004 "877687275" 420000 1400   6075   12210   1.119218349456787           0  9.901786   -.009090909 1
    1999 "878375740" 420000 1700  22689   36279   .2773377299308777           0  3.467307   -.007662835 1
    2000 "919660637" 540000 3100   3185    5717   .4512743651866913           0 .18585356    .067692846 1
    2002 "AB2244175" 110000 3100  72196   75481  .07764967530965805           0   12.3288     .01005551 1
    1999 "C14501401" 410000 1900   9200    7080 .014059754088521004           0  .1843955     .15880966 1
    2004 "G33692492" 440000 3000  25558    8334 .004409305285662413           0  1.352908     .27498284 1
    1999 "HN4510902" 430000 3600 105112  287640  1.2592281103134155           0  3.767619    -.01864831 1
    1999 "HU0815595" 420000 3000  29220   17780   .2023460417985916           0 2.2327273     .08998875 1
    2001 "JL272201X" 220000 3600  91924   18338  .02176063321530819           0  .3706555     .10039262 1
    1999 "K25061390" 410000 3500  12410    3440 .013750618323683739           0  .1843955     .15880966 1
    1999 "SA2200647" 610000 3100  12850   35250  .14339622855186462           0  .2661638    .018723404 1
    1999 "VS3828986" 510000 1300   6250    7400  .24529844522476196           0 2.5238094  -.0006756757 1
    1999 "X07023133" 230000 3600   9078   13450    .920914351940155           0  2.072179    -.04624535 1
    1998 "004188880" 370000 3500 154721   91000 .053614456206560135           0 1.4594594     .10017583 0
    2002 "016016226" 610000 3100   1998   74278   .6454432010650635           0 10.397575   .0006596839 0
    2003 "016016226" 610000 3100   2995   74358  1.1269488334655762           0 10.444975  .00043035045 0
    2004 "016016226" 610000 3100   2902   74000   .6887009739875794           0  11.03252   .0004054054 0
    2004 "100003401" 110000 3300   3551  103362   .4313957989215851           0  .6328136  -.0027959985 0
    1998 "101100481" 110000 2300   6525   31312 .019155049696564674           0 1.1064245    -.06074348 0
    1999 "101100481" 110000 2300   7984   31242 .013750618323683739           0 1.0387627   -.015844056 0
    2000 "101100481" 110000 2300   7654   35461 .011776781640946865           0 1.3741965   -.010941598 0
    2001 "101100481" 110000 2300   7432   42755 .012429831549525261           0 1.8471066   .0018945152 0
    2002 "101100481" 110000 2300   7596   47784 .005261091515421867           0 3.5512905   -.016993135 0
    2003 "101100481" 110000 2300   8410   43798 .009993675164878368           0  4.527259    -.06790265 0
    1998 "101100588" 110000 2300  10355   30114  .21880383789539337           0  .2101266  .00023245002 0
    1999 "101100588" 110000 2300   9498   31164    .201830193400383           0  .2346579   .0004813246 0
    2000 "101100588" 110000 2300   8967   31347  .31930750608444214           0 .24264647   .0003509108 0
    2001 "101100588" 110000 2300  11551   31526  .39241379499435425           0 .25152838  .00009515955 0
    2002 "101100588" 110000 2300  13647   32244  .39520585536956787           0 .28160897  .00012405409 0
    2003 "101100588" 110000 2300  14008   61842   .3599514067173004           0 1.4574608  .00012936193 0
    2004 "101100588" 110000 2300  20647   87170  .32129964232444763           0  2.464351 .000022943674 0
    1998 "101100676" 110000 2600  14158  108975   .6065140962600708   .17411263  .4446595     .00414774 0
    1998 "101100684" 110000 2300  34466  123681  .14982537925243378           0  .4563556    .012192657 0
    1999 "101100684" 110000 2300  32321  125280   .2320360541343689           0  .3863314    .016762452 0
    2000 "101100684" 110000 2300  29496  126360  .24967902898788452           0  .4108977  .00016619184 0
    2001 "101100684" 110000 2300  31240  128776  .08750544488430023           0 .52043164    -.02637914 0
    2002 "101100684" 110000 2300  37801  129944  .09606187045574188           0 .58491486   .0031936835 0
    2003 "101100684" 110000 2300  32172  122236   .1621129810810089           0  .4564909     .02380641 0
    2004 "101100684" 110000 2300  32790  123016  .25414934754371643           0  .4583995     .01032386 0
    1998 "101101513" 110000 2300 152459  542279   .9517787098884583           0 .25503144    .025811436 0
    1999 "101101513" 110000 2300 172754  705663  2.0398201942443848           0  .6175248    .005250382 0
    2000 "101101513" 110000 2300 206120  420767  .12487445771694183           0 .50896376      .1771619 0
    2001 "101101513" 110000 2300 230481  381721   .2381468117237091           0  .3474329     .02311112 0
    2002 "101101513" 110000 2300 187916  444208   .6213416457176209           0  .4448232   .0033993085 0
    2003 "101101513" 110000 2300 193863  479320   .7966523170471191           0 .52471477     .02015355 0
    2004 "101101513" 110000 2300 241201  497970   .6527663469314575           0  .4364008    .036909852 0
    1998 "10110153X" 110000 3500   6494   61855   3.059612989425659           0  2.923814    .005044055 0
    1999 "10110153X" 110000 3500  10767   91064   3.164313316345215           0 1.9853134  .00010981288 0
    1998 "10110313X" 110000 2700   3251    2129  .17903225123882294           0  1.904502     .07703147 0
    1999 "10110313X" 110000 2700   5294    3368   .0978676900267601           0  2.295499     .15880966 0
    2000 "10110313X" 110000 2700   5565    4500   .0850767120718956           0 2.3783784     .08311111 0
    2001 "10110313X" 110000 2700   5514    4452  .11073094606399536           0 1.9601064      .1745283 0
    2002 "10110313X" 110000 2700   6722    5880  .09602906554937363           0 1.1514819      .1947279 0
    2003 "10110313X" 110000 2700   6908    7671   .1169472485780716           0 1.6543252    .030243775 0
    2004 "10110313X" 110000 2700   8520    7984  .10785061120986938           0  1.476427  -.0006262525 0
    1998 "101103930" 110000 3000  13467   21648   1.207482933998108           0 1.4931475     .09275684 0
    1999 "101103930" 110000 3000  12050   22110   1.460150957107544           0  1.254972     .09059249 0
    2000 "101103930" 110000 3000  13008   24326   1.371999979019165           0  1.251365     .07403601 0
    2001 "101103930" 110000 3000  11566   25394   1.614600658416748           0 1.1422305       .074112 0
    2002 "101103930" 110000 3000   8080   25526   2.095536470413208           0  1.143781   .0039175744 0
    end

  • #2
    I assume that "using "IR" as my measure" means you want to pair each firm having priva = 1 with the firm having priva = 0 that has the closest value of IR.

    Code:
    // IDENTIFY MAX DIFFERENCE BETWEEN NEAREST IR VALUES
    sort IR
    gen delta = IR-IR[_n-1]
    summ delta
    local gague = r(max)
    drop delta
    
    // SEPARATE DATA SET FOR PRIVA==0 OBSERVATIONS
    preserve
    keep if priva == 0
    tempfile priva0
    save `priva0'
    restore
    keep if priva == 1
    
    // JOIN THE TWO DATA SETS KEEPING ONLY
    // PAIRS WITH IR DIFFERING BY AT MOST `mesh'
    rangejoin IR  -`gague' `gague' using `priva0'
    
    // NOW SORT ON IR DIFFERENCE & KEEP LEAST
    gen delta = abs(IR-IR_U)
    by firmid (delta), sort: keep if _n == 1
    Note: You need the -rangejoin- command, by Robert Picard. If you don't already have it, -ssc install rangejoin-.

    You didn't say what you already tried that you found unsatisfactory. I'm guessing you didn't have -rangejoin- available. The use of -rangejoin- greatly speeds up this kind of process, and also reduces memory requirements, because you do not join both subsets of the data in their entirety. It enables you to join each observation with only a limited number of observations that are at least "within striking distance" of being a closest match on IR. Then you simply pick the closest match from among those that pass -rangejoin-'s filter.

    If you already have -rangejoin- and tried doing it this way, I don't think there is any more rapid way to do this in Stata and I fear you will just have to accept the long run-times. These are computation-intensive processes and you can only speed them up so much.




    Comment


    • #3
      Originally posted by Clyde Schechter View Post
      I assume that "using "IR" as my measure" means you want to pair each firm having priva = 1 with the firm having priva = 0 that has the closest value of IR.

      Code:
      // IDENTIFY MAX DIFFERENCE BETWEEN NEAREST IR VALUES
      sort IR
      gen delta = IR-IR[_n-1]
      summ delta
      local gague = r(max)
      drop delta
      
      // SEPARATE DATA SET FOR PRIVA==0 OBSERVATIONS
      preserve
      keep if priva == 0
      tempfile priva0
      save `priva0'
      restore
      keep if priva == 1
      
      // JOIN THE TWO DATA SETS KEEPING ONLY
      // PAIRS WITH IR DIFFERING BY AT MOST `mesh'
      rangejoin IR -`gague' `gague' using `priva0'
      
      // NOW SORT ON IR DIFFERENCE & KEEP LEAST
      gen delta = abs(IR-IR_U)
      by firmid (delta), sort: keep if _n == 1
      Note: You need the -rangejoin- command, by Robert Picard. If you don't already have it, -ssc install rangejoin-.

      You didn't say what you already tried that you found unsatisfactory. I'm guessing you didn't have -rangejoin- available. The use of -rangejoin- greatly speeds up this kind of process, and also reduces memory requirements, because you do not join both subsets of the data in their entirety. It enables you to join each observation with only a limited number of observations that are at least "within striking distance" of being a closest match on IR. Then you simply pick the closest match from among those that pass -rangejoin-'s filter.

      If you already have -rangejoin- and tried doing it this way, I don't think there is any more rapid way to do this in Stata and I fear you will just have to accept the long run-times. These are computation-intensive processes and you can only speed them up so much.



      Thanks Clyde , it does works, what I did do use range but with a for loop, so it's really slow, but your solution here is way fast than mine.

      but there are duplicates among the matched firms with "priva ==0", I want to match without replacement, how to fix this?

      Comment


      • #4
        Strictly as stated, what you are asking cannot be done. Let's consider some observation with priva == 0 that appears more than once as a match in the results above. If you disqualify that observation from serving as the match to one of those priva == 1 partners, then that priva == 1 partner will have to match with some other observation that is not its closest match on IR (unless there happens to be another priva == 0 observation with exactly the same value of IR, which occasionally happens but can't be relied on.). Of course you can resolve that by leaving the other priva == 1 partner unmatched altogether. But you cannot have all three: 1) every priva == 1 firm gets matched to something, 2) matching without replacement and 3) matching each firm to the firm with the closest IR. (In some data sets, it will work out that way coincidentally, but in general, you cannot simultaneously impose these requirements.)

        If you want to dispense with one of the three requirements, it is possible.

        You need to decide what you want to accomplish. I should point out that from the perspective of almost any statistical analysis comparing priva == 1 and priva == 0 firms, there is no reason to prefer sampling without replacement. I will also remind you that in your original post you did specifically say "either with or without replacement."
        Last edited by Clyde Schechter; 19 Sep 2016, 21:12.

        Comment


        • #5
          You are right Clyde, given if we want to match each firm(priva ==1) with firm(priva==0) which have the closest IR in the sample, we may,in general, loss some firms(priva ==1).
          if I don't want to take the risk of loosing some of my precious observations and make a concession that second or third or some like that could be used if the first best have been matched by another firm(priva==1).

          How to do this without looping through a very long time.

          Code:
          // create an empty data set to containe the matched result
          clear
          set more off
          use "E:\Research\privatization\privatize-sample1215-pscore-1.dta"
          local max_obs = [_N]
          drop in 1/`max_obs'
          save "E:\Research\privatization\privatize-sample1215-121-pscore-1.dta",replace
          // potential control firms(priva==0) pool
          clear
          set more off
          use "E:\Research\privatization\privatize-control1009-pscore-1.dta"
          save "E:\Research\privatization\privatize-control1009-pscore-121-1.dta",replace
          // treatment firms(priva==1)pool
          clear
          set more off
          use "E:\Research\privatization\privatize-sample1215-pscore-1.dta"
          
          
          levelsof firmid, local(levels)
          qui foreach i of local levels {
              clear
              set more off
              use "E:\Research\privatization\privatize-sample1215-pscore-1.dta"
              // keep one firm from the treatment pool
              keep if firmid == "`i'"
              keep if year == priva_year
              rangejoin IR lower_IR upper_IR using "E:\Research\privatization\privatize-control1009-pscore-121-1.dta", /// 
              by(industrycode2 year) all // industry and IR match,checked
              gen abs_IR = abs(IR-IR_U)
              drop  if  abs_IR ==.
              qui sum abs_IR,detail
              gen  min_abs_IR = r(min)
              keep if abs_IR == min_abs_IR
             // add the matching observation to the data set created before
              append using "E:\Research\privatization\privatize-sample1215-pscore-121-1.dta"
              save "E:\Research\privatization\privatize-sample1215-pscore-121-1.dta",replace
              keep firmid_U
              rename firmid_U firmid
             // kick the matched control firm out from the potential control pool
              merge 1:m  firmid using "E:\Research\privatization\privatize-control1009-pscore-121-1.dta"
              drop if _merge==3 | _merge ==1
              drop _merge
              save "E:\Research\privatization\privatize-control1009-pscore-121-1.dta",replace
          }
          order of firms in treatment pool will matter in this procedure. and it's really slow

          Comment


          • #6
            just as a follow up, psmatch2 has an option called pscore(), if you just want to match on some variable other than propensity score, just write like:

            Code:
            psmatch2 treatment   ,  pscore(variable name you want to match on)
            but be aware that order of the treatment samples matter in this case,

            Comment

            Working...
            X