Announcement

Collapse
No announcement yet.
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • Speeding up Stata

    Dear Stata Community!

    I need your help! I am working with very big data files (~20Gb) containing around 1 million observations and 15 to 20 000 variables. I'm trying to reduce the size of the data file to around 100 variables max. However, when I loop through my data and try to subsequently reduce the size, Stata takes way too long to perform the tasks. Maybe some of you have an idea of how to speed up the code and get to the same outcome! All help is much appreciated

    Here is the code I'm currently running...

    // US 10Y 5BP SHORT
    clear all
    quietly {
    use TF_Short_10Y_5BP_US_end.dta

    gen Treasury_coupon_1 = Treasury
    gen Treasury_coupon_2 = Treasury
    gen Treasury_coupon_3 = Treasury
    gen Treasury_coupon_4 = Treasury
    gen Treasury_coupon_5 = Treasury
    gen Treasury_coupon_6 = Treasury
    gen Treasury_coupon_7 = Treasury
    gen Treasury_coupon_8 = Treasury
    gen Treasury_coupon_9 = Treasury
    gen Treasury_coupon_10 = Treasury
    gen Treasury_coupon_11 = Treasury
    gen Treasury_coupon_12 = Treasury
    gen Treasury_coupon_13 = Treasury
    gen Treasury_coupon_14 = Treasury
    gen Treasury_coupon_15 = Treasury
    gen Treasury_coupon_16 = Treasury
    gen Treasury_coupon_17 = Treasury
    gen Treasury_coupon_18 = Treasury
    gen Treasury_coupon_19 = Treasury
    gen Treasury_coupon_20 = Treasury + 100

    gen Swap_coupon_1 = Swap
    gen Swap_coupon_2 = Swap
    gen Swap_coupon_3 = Swap
    gen Swap_coupon_4 = Swap
    gen Swap_coupon_5 = Swap
    gen Swap_coupon_6 = Swap
    gen Swap_coupon_7 = Swap
    gen Swap_coupon_8 = Swap
    gen Swap_coupon_9 = Swap
    gen Swap_coupon_10 = Swap
    gen Swap_coupon_11 = Swap
    gen Swap_coupon_12 = Swap
    gen Swap_coupon_13 = Swap
    gen Swap_coupon_14 = Swap
    gen Swap_coupon_15 = Swap
    gen Swap_coupon_16 = Swap
    gen Swap_coupon_17 = Swap
    gen Swap_coupon_18 = Swap
    gen Swap_coupon_19 = Swap
    gen Swap_coupon_20 = Swap + 100

    gen LIBOR_coupon = LIBOR + 100


    gen Treasury_disc_1 = 1
    gen Treasury_disc_2 = 1
    gen Treasury_disc_3 = 1
    gen Treasury_disc_4 = 1
    gen Treasury_disc_5 = 1
    gen Treasury_disc_6 = 1
    gen Treasury_disc_7 = 1
    gen Treasury_disc_8 = 1
    gen Treasury_disc_9 = 1
    gen Treasury_disc_10 = 1
    gen Treasury_disc_11 = 1
    gen Treasury_disc_12 = 1
    gen Treasury_disc_13 = 1
    gen Treasury_disc_14 = 1
    gen Treasury_disc_15 = 1
    gen Treasury_disc_16 = 1
    gen Treasury_disc_17 = 1
    gen Treasury_disc_18 = 1
    gen Treasury_disc_19 = 1
    gen Treasury_disc_20 = 1

    gen Swap_disc_1 = 1
    gen Swap_disc_2 = 1
    gen Swap_disc_3 = 1
    gen Swap_disc_4 = 1
    gen Swap_disc_5 = 1
    gen Swap_disc_6 = 1
    gen Swap_disc_7 = 1
    gen Swap_disc_8 = 1
    gen Swap_disc_9 = 1
    gen Swap_disc_10 = 1
    gen Swap_disc_11 = 1
    gen Swap_disc_12 = 1
    gen Swap_disc_13 = 1
    gen Swap_disc_14 = 1
    gen Swap_disc_15 = 1
    gen Swap_disc_16 = 1
    gen Swap_disc_17 = 1
    gen Swap_disc_18 = 1
    gen Swap_disc_19 = 1
    gen Swap_disc_20 = 1

    gen Treasury_coupon_1_date =.
    gen Treasury_coupon_2_date =.
    gen Treasury_coupon_3_date =.
    gen Treasury_coupon_4_date =.
    gen Treasury_coupon_5_date =.
    gen Treasury_coupon_6_date =.
    gen Treasury_coupon_7_date =.
    gen Treasury_coupon_8_date =.
    gen Treasury_coupon_9_date =.
    gen Treasury_coupon_10_date =.
    gen Treasury_coupon_11_date =.
    gen Treasury_coupon_12_date =.
    gen Treasury_coupon_13_date =.
    gen Treasury_coupon_14_date =.
    gen Treasury_coupon_15_date =.
    gen Treasury_coupon_16_date =.
    gen Treasury_coupon_17_date =.
    gen Treasury_coupon_18_date =.
    gen Treasury_coupon_19_date =.
    gen Treasury_coupon_20_date =.

    gen Swap_coupon_1_date =.
    gen Swap_coupon_2_date =.
    gen Swap_coupon_3_date =.
    gen Swap_coupon_4_date =.
    gen Swap_coupon_5_date =.
    gen Swap_coupon_6_date =.
    gen Swap_coupon_7_date =.
    gen Swap_coupon_8_date =.
    gen Swap_coupon_9_date =.
    gen Swap_coupon_10_date =.
    gen Swap_coupon_11_date =.
    gen Swap_coupon_12_date =.
    gen Swap_coupon_13_date =.
    gen Swap_coupon_14_date =.
    gen Swap_coupon_15_date =.
    gen Swap_coupon_16_date =.
    gen Swap_coupon_17_date =.
    gen Swap_coupon_18_date =.
    gen Swap_coupon_19_date =.
    gen Swap_coupon_20_date =.

    gen LIBOR_coupon_date=.

    gen REPO_reset_date =.


    quietly {
    forvalues i = 1/2119 {
    egen trade_start_date = min(Date) if Tradenumb == `i'
    egen trade_end_date = max(Date) if Tradenumb == `i'
    replace Treasury_coupon_1_date = trade_start_date + 180 if Tradenumb == `i'
    replace Treasury_coupon_2_date = trade_start_date + 360 if Tradenumb == `i'
    replace Treasury_coupon_3_date = trade_start_date + 540 if Tradenumb == `i'
    replace Treasury_coupon_4_date = trade_start_date + 720 if Tradenumb == `i'
    replace Treasury_coupon_5_date = trade_start_date + 900 if Tradenumb == `i'
    replace Treasury_coupon_6_date = trade_start_date + 1080 if Tradenumb == `i'
    replace Treasury_coupon_7_date = trade_start_date + 1260 if Tradenumb == `i'
    replace Treasury_coupon_8_date = trade_start_date + 1440 if Tradenumb == `i'
    replace Treasury_coupon_9_date = trade_start_date + 1620 if Tradenumb == `i'
    replace Treasury_coupon_10_date = trade_start_date + 1800 if Tradenumb == `i'
    replace Treasury_coupon_11_date = trade_start_date + 1960 if Tradenumb == `i'
    replace Treasury_coupon_12_date = trade_start_date + 2160 if Tradenumb == `i'
    replace Treasury_coupon_13_date = trade_start_date + 2340 if Tradenumb == `i'
    replace Treasury_coupon_14_date = trade_start_date + 2520 if Tradenumb == `i'
    replace Treasury_coupon_15_date = trade_start_date + 2700 if Tradenumb == `i'
    replace Treasury_coupon_16_date = trade_start_date + 2880 if Tradenumb == `i'
    replace Treasury_coupon_17_date = trade_start_date + 3060 if Tradenumb == `i'
    replace Treasury_coupon_18_date = trade_start_date + 3240 if Tradenumb == `i'
    replace Treasury_coupon_19_date = trade_start_date + 3420 if Tradenumb == `i'
    replace Treasury_coupon_20_date = trade_start_date + 3600 if Tradenumb == `i'

    replace Swap_coupon_1_date = trade_start_date + 180 if Tradenumb == `i'
    replace Swap_coupon_2_date = trade_start_date + 360 if Tradenumb == `i'
    replace Swap_coupon_3_date = trade_start_date + 540 if Tradenumb == `i'
    replace Swap_coupon_4_date = trade_start_date + 720 if Tradenumb == `i'
    replace Swap_coupon_5_date = trade_start_date + 900 if Tradenumb == `i'
    replace Swap_coupon_6_date = trade_start_date + 1080 if Tradenumb == `i'
    replace Swap_coupon_7_date = trade_start_date + 1260 if Tradenumb == `i'
    replace Swap_coupon_8_date = trade_start_date + 1440 if Tradenumb == `i'
    replace Swap_coupon_9_date = trade_start_date + 1620 if Tradenumb == `i'
    replace Swap_coupon_10_date = trade_start_date + 1800 if Tradenumb == `i'
    replace Swap_coupon_11_date = trade_start_date + 1960 if Tradenumb == `i'
    replace Swap_coupon_12_date = trade_start_date + 2160 if Tradenumb == `i'
    replace Swap_coupon_13_date = trade_start_date + 2340 if Tradenumb == `i'
    replace Swap_coupon_14_date = trade_start_date + 2520 if Tradenumb == `i'
    replace Swap_coupon_15_date = trade_start_date + 2700 if Tradenumb == `i'
    replace Swap_coupon_16_date = trade_start_date + 2880 if Tradenumb == `i'
    replace Swap_coupon_17_date = trade_start_date + 3060 if Tradenumb == `i'
    replace Swap_coupon_18_date = trade_start_date + 3240 if Tradenumb == `i'
    replace Swap_coupon_19_date = trade_start_date + 3420 if Tradenumb == `i'
    replace Swap_coupon_20_date = trade_start_date + 3600 if Tradenumb == `i'

    //Schleife?
    replace LIBOR_coupon_date = trade_start_date + 90 if Tradenumb == `i' & Date <= trade_start_date+90
    replace LIBOR_coupon_date = trade_start_date + 180 if Tradenumb == `i' & Date > trade_start_date+90 & Date <= trade_start_date+180
    replace LIBOR_coupon_date = trade_start_date + 270 if Tradenumb == `i' & Date > trade_start_date+180 & Date <= trade_start_date+270
    replace LIBOR_coupon_date = trade_start_date + 360 if Tradenumb == `i' & Date > trade_start_date+270 & Date <= trade_start_date+360
    replace LIBOR_coupon_date = trade_start_date + 450 if Tradenumb == `i' & Date > trade_start_date+360 & Date <= trade_start_date+450
    replace LIBOR_coupon_date = trade_start_date + 540 if Tradenumb == `i' & Date > trade_start_date+450 & Date <= trade_start_date+540
    replace LIBOR_coupon_date = trade_start_date + 630 if Tradenumb == `i' & Date > trade_start_date+540 & Date <= trade_start_date+630
    replace LIBOR_coupon_date = trade_start_date + 720 if Tradenumb == `i' & Date > trade_start_date+630 & Date <= trade_start_date+720
    replace LIBOR_coupon_date = trade_start_date + 810 if Tradenumb == `i' & Date > trade_start_date+720 & Date <= trade_start_date+810
    replace LIBOR_coupon_date = trade_start_date + 900 if Tradenumb == `i' & Date > trade_start_date+810 & Date <= trade_start_date+900
    replace LIBOR_coupon_date = trade_start_date + 990 if Tradenumb == `i' & Date > trade_start_date+900 & Date <= trade_start_date+990
    replace LIBOR_coupon_date = trade_start_date + 1080 if Tradenumb == `i' & Date > trade_start_date+990 & Date <= trade_start_date+1080
    replace LIBOR_coupon_date = trade_start_date + 1170 if Tradenumb == `i' & Date > trade_start_date+1080 & Date <= trade_start_date+1170
    replace LIBOR_coupon_date = trade_start_date + 1260 if Tradenumb == `i' & Date > trade_start_date+1170 & Date <= trade_start_date+1260
    replace LIBOR_coupon_date = trade_start_date + 1350 if Tradenumb == `i' & Date > trade_start_date+1260 & Date <= trade_start_date+1350
    replace LIBOR_coupon_date = trade_start_date + 1440 if Tradenumb == `i' & Date > trade_start_date+1350 & Date <= trade_start_date+1440
    replace LIBOR_coupon_date = trade_start_date + 1530 if Tradenumb == `i' & Date > trade_start_date+1440 & Date <= trade_start_date+1530
    replace LIBOR_coupon_date = trade_start_date + 1620 if Tradenumb == `i' & Date > trade_start_date+1530 & Date <= trade_start_date+1620
    replace LIBOR_coupon_date = trade_start_date + 1710 if Tradenumb == `i' & Date > trade_start_date+1620 & Date <= trade_start_date+1710
    replace LIBOR_coupon_date = trade_start_date + 1800 if Tradenumb == `i' & Date > trade_start_date+1710 & Date <= trade_start_date+1800
    replace LIBOR_coupon_date = trade_start_date + 1890 if Tradenumb == `i' & Date > trade_start_date+1800 & Date <= trade_start_date+1890
    replace LIBOR_coupon_date = trade_start_date + 1980 if Tradenumb == `i' & Date > trade_start_date+1890 & Date <= trade_start_date+1980
    replace LIBOR_coupon_date = trade_start_date + 2070 if Tradenumb == `i' & Date > trade_start_date+1980 & Date <= trade_start_date+2070
    replace LIBOR_coupon_date = trade_start_date + 2160 if Tradenumb == `i' & Date > trade_start_date+2070 & Date <= trade_start_date+2160
    replace LIBOR_coupon_date = trade_start_date + 2250 if Tradenumb == `i' & Date > trade_start_date+2160 & Date <= trade_start_date+2250
    replace LIBOR_coupon_date = trade_start_date + 2340 if Tradenumb == `i' & Date > trade_start_date+2250 & Date <= trade_start_date+2340
    replace LIBOR_coupon_date = trade_start_date + 2430 if Tradenumb == `i' & Date > trade_start_date+2340 & Date <= trade_start_date+2430
    replace LIBOR_coupon_date = trade_start_date + 2520 if Tradenumb == `i' & Date > trade_start_date+2430 & Date <= trade_start_date+2520
    replace LIBOR_coupon_date = trade_start_date + 2610 if Tradenumb == `i' & Date > trade_start_date+2520 & Date <= trade_start_date+2610
    replace LIBOR_coupon_date = trade_start_date + 2700 if Tradenumb == `i' & Date > trade_start_date+2610 & Date <= trade_start_date+2700
    replace LIBOR_coupon_date = trade_start_date + 2790 if Tradenumb == `i' & Date > trade_start_date+2700 & Date <= trade_start_date+2790
    replace LIBOR_coupon_date = trade_start_date + 2880 if Tradenumb == `i' & Date > trade_start_date+2790 & Date <= trade_start_date+2880
    replace LIBOR_coupon_date = trade_start_date + 2970 if Tradenumb == `i' & Date > trade_start_date+2880 & Date <= trade_start_date+2970
    replace LIBOR_coupon_date = trade_start_date + 3060 if Tradenumb == `i' & Date > trade_start_date+2970 & Date <= trade_start_date+3060
    replace LIBOR_coupon_date = trade_start_date + 3150 if Tradenumb == `i' & Date > trade_start_date+3060 & Date <= trade_start_date+3150
    replace LIBOR_coupon_date = trade_start_date + 3240 if Tradenumb == `i' & Date > trade_start_date+3150 & Date <= trade_start_date+3240
    replace LIBOR_coupon_date = trade_start_date + 3330 if Tradenumb == `i' & Date > trade_start_date+3240 & Date <= trade_start_date+3330
    replace LIBOR_coupon_date = trade_start_date + 3420 if Tradenumb == `i' & Date > trade_start_date+3330 & Date <= trade_start_date+3420
    replace LIBOR_coupon_date = trade_start_date + 3510 if Tradenumb == `i' & Date > trade_start_date+3420 & Date <= trade_start_date+3510
    replace LIBOR_coupon_date = trade_start_date + 3600 if Tradenumb == `i' & Date > trade_start_date+3510 & Date <= trade_start_date+3600

    replace REPO_reset_date = Date[_n-1] if Tradenumb == `i' & Date <= trade_end_date
    replace REPO_reset_date =. if Tradenumb ==`i' & Date == trade_start_date
    drop trade_start_date trade_end_date
    }

    }
    quietly {
    forvalues j = 0/3600 {
    replace Treasury_disc_1 = DT__`j' if Treasury_coupon_1_date - Date ==`j'
    replace Swap_disc_1 = DS__`j' if Swap_coupon_1_date - Date == `j'

    replace Treasury_disc_2 = DT__`j' if Treasury_coupon_2_date - Date == `j'
    replace Swap_disc_2 = DS__`j' if Swap_coupon_2_date - Date == `j'

    replace Treasury_disc_3 = DT__`j' if Treasury_coupon_3_date - Date == `j'
    replace Swap_disc_3 = DS__`j' if Swap_coupon_3_date - Date == `j'

    replace Treasury_disc_4 = DT__`j' if Treasury_coupon_4_date - Date == `j' //& `j' <= 720
    replace Swap_disc_4 = DS__`j' if Swap_coupon_4_date - Date == `j' //& `j' <= 720

    replace Treasury_disc_5 = DT__`j' if Treasury_coupon_5_date - Date ==`j'
    replace Swap_disc_5 = DS__`j' if Swap_coupon_5_date - Date == `j'

    replace Treasury_disc_6 = DT__`j' if Treasury_coupon_6_date - Date ==`j'
    replace Swap_disc_6 = DS__`j' if Swap_coupon_6_date - Date == `j'

    replace Treasury_disc_7 = DT__`j' if Treasury_coupon_7_date - Date ==`j'
    replace Swap_disc_7 = DS__`j' if Swap_coupon_7_date - Date == `j'

    replace Treasury_disc_8 = DT__`j' if Treasury_coupon_8_date - Date ==`j'
    replace Swap_disc_8 = DS__`j' if Swap_coupon_8_date - Date == `j'

    replace Treasury_disc_9 = DT__`j' if Treasury_coupon_9_date - Date ==`j'
    replace Swap_disc_9 = DS__`j' if Swap_coupon_9_date - Date == `j'

    replace Treasury_disc_10 = DT__`j' if Treasury_coupon_10_date - Date ==`j'
    replace Swap_disc_10 = DS__`j' if Swap_coupon_10_date - Date == `j'

    replace Treasury_disc_11 = DT__`j' if Treasury_coupon_11_date - Date ==`j'
    replace Swap_disc_11 = DS__`j' if Swap_coupon_11_date - Date == `j'

    replace Treasury_disc_12 = DT__`j' if Treasury_coupon_12_date - Date ==`j'
    replace Swap_disc_12 = DS__`j' if Swap_coupon_12_date - Date == `j'

    replace Treasury_disc_13 = DT__`j' if Treasury_coupon_13_date - Date ==`j'
    replace Swap_disc_13 = DS__`j' if Swap_coupon_13_date - Date == `j'

    replace Treasury_disc_14 = DT__`j' if Treasury_coupon_14_date - Date ==`j'
    replace Swap_disc_14 = DS__`j' if Swap_coupon_14_date - Date == `j'

    replace Treasury_disc_15 = DT__`j' if Treasury_coupon_15_date - Date ==`j'
    replace Swap_disc_15 = DS__`j' if Swap_coupon_15_date - Date == `j'

    replace Treasury_disc_16 = DT__`j' if Treasury_coupon_16_date - Date ==`j'
    replace Swap_disc_16 = DS__`j' if Swap_coupon_16_date - Date == `j'

    replace Treasury_disc_17 = DT__`j' if Treasury_coupon_17_date - Date ==`j'
    replace Swap_disc_17 = DS__`j' if Swap_coupon_17_date - Date == `j'

    replace Treasury_disc_18 = DT__`j' if Treasury_coupon_18_date - Date ==`j'
    replace Swap_disc_18 = DS__`j' if Swap_coupon_18_date - Date == `j'

    replace Treasury_disc_19 = DT__`j' if Treasury_coupon_19_date - Date ==`j'
    replace Swap_disc_19 = DS__`j' if Swap_coupon_19_date - Date == `j'

    replace Treasury_disc_20 = DT__`j' if Treasury_coupon_20_date - Date ==`j'
    replace Swap_disc_20 = DS__`j' if Swap_coupon_20_date - Date == `j'

    }
    }

    quietly {
    forvalues i = 0/3600 {
    drop DT__`i' DS__`i'
    }
    }
    }
    save TF_Short_10Y_5BP_US_last.dta



  • #2
    Hi Raoul,

    please read the FAQ (also linked from the top of each page here), especially section 12, and learn how to post sample data using dataex in order to help us answer your question.

    We don't see your original data, nor the data layout you want to create. Without both, it is guesswork to give an answer to your question.

    From what you presented, it seems obvious that there is room for increasing your code efficiency. You repeat the same commands thousands of times; no wonder that this needs quite some runtime on larger data.

    Regards
    Bela

    PS: When posting code, please use CODE delimiters; writing [CODE]example code[/CODE] will produce:
    Code:
    example code

    Comment


    • #3
      I had a quick hack at your code. I understand nothing here really and can test nothing either, but I see both unnecessary loops and scope for loops to simplify the code even if they won't speed it up. I imagine that this could be taken further.

      Code:
      // US 10Y 5BP SHORT
      clear all
      use TF_Short_10Y_5BP_US_end.dta
      
      forval j = 1/19 { 
          gen Treasury_coupon_`j' = Treasury
      }
      gen Treasury_coupon_20 = Treasury + 100
      
      forval j = 1/19 { 
          gen Swap_coupon_`j' = Swap
      }
      gen Swap_coupon_20 = Swap + 100
      
      gen LIBOR_coupon = LIBOR + 100
      
      forval j = 1/20 { 
          gen Treasury_disc_`j' = 1
          gen Swap_disc_`j' = 1
          gen Treasury_coupon_`j'_date =.
          gen Swap_coupon_`j'_date =.
      }
      
      gen LIBOR_coupon_date=.
      
      gen REPO_reset_date =.
      
      quietly bysort Tradenumb (Date) : gen trade_start_date = Date[1]
      quietly bysort Tradenumb (Date) : gen trade_start_date = Date[_N]
      
      forval j = 1/19 { 
          local J = 180 * `j' 
          replace Treasury_coupon_`j'_date = trade_start_date + `J'
          replace Swap_coupon_`j'_date = trade_start_date + `J'
      }
      
      replace LIBOR_coupon_date = trade_start_date + 90 * ceil(trade_start_date/90) 
      replace REPO_reset_date = Date[_n-1] if  Date <= trade_end_date
      replace REPO_reset_date =. if Date == trade_start_date
      
      forvalues j = 0/3600 {
          forval k = 1/20 {
              replace Treasury_disc_`k' = DT__`j' if Treasury_coupon_`k'_date - Date ==`j'
              replace Swap_disc_`k' = DS__`j' if Swap_coupon_`k'_date - Date == `j'
              local todrop `todrop' DT__`j' DS__`j'
          }
      }
      
      drop `todrop' 
      
      save TF_Short_10Y_5BP_US_last.dta

      Comment


      • #4
        Dear Daniel & Nick!

        Thank you very much for the responses. I tried to include as much as possible of my data, however Stata almost always crashed when using dataex. Accordingly, I had to omit a lot of variables. Specifically, in my dataset there are variables DT__0 until DT__3600 as well as DS__0 until DS__3600 (total of 7200 extra variables).

        Code:
        * Example generated by -dataex-. To install: ssc install dataex
        clear
        input int Date float(DT__0 DS__0 Tradenumb Treasury Swap LIBOR LIBOR_discount Repo)
        14304 1 1 1 2.6425 2.895  1.256875 .9998603 5.08
        14305 1 1 1 2.6425 2.895  1.256875 .9998602 4.79
        14306 1 1 1 2.6425 2.895  1.256875 .9998603 4.71
        14307 1 1 1 2.6425 2.895  1.256875 .9998603 4.71
        14308 1 1 1 2.6425 2.895  1.256875 .9998603 4.82
        14311 1 1 1 2.6425 2.895  1.256875 .9998611 4.82
        14312 1 1 1 2.6425 2.895  1.256875 .9998611 4.76
        14313 1 1 1 2.6425 2.895  1.256875 .9998611 4.81
        14314 1 1 1 2.6425 2.895  1.256875 .9998611 4.79
        14315 1 1 1 2.6425 2.895  1.256875 .9998611 4.76
        14318 1 1 1 2.6425 2.895  1.256875 .9998611 4.93
        14319 1 1 1 2.6425 2.895  1.256875 .9998611 4.75
        14320 1 1 1 2.6425 2.895  1.256875 .9998611 4.72
        14321 1 1 1 2.6425 2.895  1.256875 .9998611 4.68
        14322 1 1 1 2.6425 2.895  1.256875 .9998611 4.66
        14325 1 1 1 2.6425 2.895  1.256875  .999862 4.76
        14326 1 1 1 2.6425 2.895  1.256875 .9998611 4.73
        14327 1 1 1 2.6425 2.895  1.256875 .9998611 4.85
        14328 1 1 1 2.6425 2.895  1.256875 .9998611 4.91
        14329 1 1 1 2.6425 2.895  1.256875 .9998611  4.8
        14332 1 1 1 2.6425 2.895  1.256875 .9998611 4.86
        14333 1 1 1 2.6425 2.895  1.256875 .9998611 4.79
        14334 1 1 1 2.6425 2.895  1.256875 .9998611 5.03
        14335 1 1 1 2.6425 2.895  1.256875 .9998611  5.2
        14336 1 1 1 2.6425 2.895  1.256875 .9998611  5.2
        14339 1 1 1 2.6425 2.895  1.256875 .9998611 4.89
        14340 1 1 1 2.6425 2.895  1.256875 .9998611 4.75
        14341 1 1 1 2.6425 2.895  1.256875 .9998611 4.77
        14342 1 1 1 2.6425 2.895  1.256875 .9998611 4.76
        14343 1 1 1 2.6425 2.895  1.256875 .9998611  4.7
        14346 1 1 1 2.6425 2.895  1.256875 .9998611 4.75
        14347 1 1 1 2.6425 2.895  1.256875 .9998611 4.71
        14348 1 1 1 2.6425 2.895  1.256875 .9998611 4.79
        14349 1 1 1 2.6425 2.895  1.256875 .9998611 4.87
        14350 1 1 1 2.6425 2.895  1.256875 .9998611 4.69
        14353 1 1 1 2.6425 2.895  1.256875 .9998611 4.63
        14354 1 1 1 2.6425 2.895  1.256875 .9998611 4.57
        14355 1 1 1 2.6425 2.895  1.256875 .9998611 4.58
        14356 1 1 1 2.6425 2.895  1.256875 .9998612 4.47
        14305 1 1 2 2.6225  2.87 1.2584774 .9998602 4.79
        14306 1 1 2 2.6225  2.87 1.2584774 .9998603 4.71
        14307 1 1 2 2.6225  2.87 1.2584774 .9998603 4.71
        14308 1 1 2 2.6225  2.87 1.2584774 .9998603 4.82
        14311 1 1 2 2.6225  2.87 1.2584774 .9998611 4.82
        14312 1 1 2 2.6225  2.87 1.2584774 .9998611 4.76
        14313 1 1 2 2.6225  2.87 1.2584774 .9998611 4.81
        14314 1 1 2 2.6225  2.87 1.2584774 .9998611 4.79
        14315 1 1 2 2.6225  2.87 1.2584774 .9998611 4.76
        14318 1 1 2 2.6225  2.87 1.2584774 .9998611 4.93
        14319 1 1 2 2.6225  2.87 1.2584774 .9998611 4.75
        14320 1 1 2 2.6225  2.87 1.2584774 .9998611 4.72
        14321 1 1 2 2.6225  2.87 1.2584774 .9998611 4.68
        14322 1 1 2 2.6225  2.87 1.2584774 .9998611 4.66
        14325 1 1 2 2.6225  2.87 1.2584774  .999862 4.76
        14326 1 1 2 2.6225  2.87 1.2584774 .9998611 4.73
        14327 1 1 2 2.6225  2.87 1.2584774 .9998611 4.85
        14328 1 1 2 2.6225  2.87 1.2584774 .9998611 4.91
        14329 1 1 2 2.6225  2.87 1.2584774 .9998611  4.8
        14332 1 1 2 2.6225  2.87 1.2584774 .9998611 4.86
        14333 1 1 2 2.6225  2.87 1.2584774 .9998611 4.79
        14334 1 1 2 2.6225  2.87 1.2584774 .9998611 5.03
        14335 1 1 2 2.6225  2.87 1.2584774 .9998611  5.2
        14336 1 1 2 2.6225  2.87 1.2584774 .9998611  5.2
        14339 1 1 2 2.6225  2.87 1.2584774 .9998611 4.89
        14340 1 1 2 2.6225  2.87 1.2584774 .9998611 4.75
        14341 1 1 2 2.6225  2.87 1.2584774 .9998611 4.77
        14342 1 1 2 2.6225  2.87 1.2584774 .9998611 4.76
        14343 1 1 2 2.6225  2.87 1.2584774 .9998611  4.7
        14346 1 1 2 2.6225  2.87 1.2584774 .9998611 4.75
        14347 1 1 2 2.6225  2.87 1.2584774 .9998611 4.71
        14348 1 1 2 2.6225  2.87 1.2584774 .9998611 4.79
        14349 1 1 2 2.6225  2.87 1.2584774 .9998611 4.87
        14350 1 1 2 2.6225  2.87 1.2584774 .9998611 4.69
        14353 1 1 2 2.6225  2.87 1.2584774 .9998611 4.63
        14354 1 1 2 2.6225  2.87 1.2584774 .9998611 4.57
        14355 1 1 2 2.6225  2.87 1.2584774 .9998611 4.58
        14356 1 1 2 2.6225  2.87 1.2584774 .9998612 4.47
        end
        format %td Date

        I do hope that makes things more clear. What is more, the Tradenumb variable is the main loop from my previous post (from 1 up until 2119).

        Thanks so much for all of your help!

        Comment


        • #5
          Although generally we ask for data examples, and are grateful for them, this one doesn't enable me to add to my answer, as I can't see that you've added to your question or are asking questions about my code.

          I don't understand what the calculations do. But, codewise, I can't see that there is any need to loop over distinct values of -Tradenumb-, You need to get
          minimum and maximum dates for each distinct value, but that really doesn't need a loop. Conversely I should warn that

          Code:
           
           replace REPO_reset_date = Date[_n-1] if  Date <= trade_end_date
          looks dangerous if it involves looking at a previous -Tradenumb-. But it really isn't false modesty or understatement to say that I don't understand these calculations. I've seen some of the words before but it's not my field at all. I don't expect you to explain everything or even anything, but it means that extra help needs to be sought from other people, unless it's specifically about my translation of your code into something more concise.

          Comment


          • #6
            Nick's code is certainly much tidier. If you decide to change something, it should be much easier working off of his code.

            I don't actually know what will speed things up. It seems you have to do 10s of 1000s of calculations no matter what else you do. Some things you could try:
            • Use clonevar instead of gen when making copies of variables.
            • For the final drop, maybe -drop DT* DS*- would be quicker.
            Incidentally, what is your definition of "way too long?" 30 minutes or 30 days? I sometimes just let things run overnight or over the weekend. Or I take advantage of these monster unix machines I have access to. I don't like them for most purposes but sometimes they come in handy for data manipulation tasks.



            -------------------------------------------
            Richard Williams, Notre Dame Dept of Sociology
            StataNow Version: 19.5 MP (2 processor)

            EMAIL: [email protected]
            WWW: https://www3.nd.edu/~rwilliam

            Comment


            • #7
              Thanks for the data example. It helps us understand your data layout.

              However, you did not tell us yet what is is you want to achieve. What would the data look like after all calculations have been processed?

              Regards
              Bela

              PS: Note that dataex supports restricting the varlist to export as well as Stata's if and in modifiers; this helps in keeping a data example small.

              Comment


              • #8
                Originally posted by Nick Cox View Post
                Although generally we ask for data examples, and are grateful for them, this one doesn't enable me to add to my answer, as I can't see that you've added to your question or are asking questions about my code.

                I don't understand what the calculations do. But, codewise, I can't see that there is any need to loop over distinct values of -Tradenumb-, You need to get
                minimum and maximum dates for each distinct value, but that really doesn't need a loop. Conversely I should warn that

                Code:
                replace REPO_reset_date = Date[_n-1] if Date &lt;= trade_end_date
                looks dangerous if it involves looking at a previous -Tradenumb-. But it really isn't false modesty or understatement to say that I don't understand these calculations. I've seen some of the words before but it's not my field at all. I don't expect you to explain everything or even anything, but it means that extra help needs to be sought from other people, unless it's specifically about my translation of your code into something more concise.
                Dear Nick!

                Thanks for all the help so far, I can totally understand that it makes not very much sense. So I will try to explain it a bit.

                In specific I’m trying to calculate the daily returns of Swap Spread Strategies. My data fundamentally spans about 19 years (1999-2018). The variable Tradenum indicates individual positions and helps to identify when they are entered (start_date/=earliest date) and when they are terminated (end_date/=latest date). The positions can & do exist at times next to each other.

                What I need to do in order to get to the strategy’s daily returns is to calculate the daily value of all the positions by multiplying the cash flows (Treasury_coupon_1, Swap_coupon_1, Treasury_coupon_2, etc...) with the respective discount factors (Treasury_disc_1, Swap_disc_1, Treasury_disc_2). It is these discount factor variables which I try to generate, so that they include the specific discount factors (formerly included in DT__ & DS__ variables). Note however that as the time in a specific position progresses also the needed discount factors so value the cash flows change. Try to imagine i.e. that the discount factor to value the first cash flow on initiation date is DS__180 (because cash flows happens in 180 days from now), on the following & next business day the same cash flow would subsequently be valued with only DS__179 and so on. This happens until DS__0 is reached (coupon is received) and stays like that afterwards. For the other coupon payments the same principle is being used, only with different multiples of 180. Ultimately, I want discount variables with I can just multiply with the coupon payment variables to get the present value.

                The most problems however, I think, makes the LIBOR_coupon_date part of my code. Here I try to generate a variable which gives me the coupon payment date, so that I can in the end subtract the current date from it with the variable LIBOR-discount (which is the discount factor to price the LIBOR), like LIBOR_discount ^ (LIBOR_coupon_date - current_date).

                I will try to implement all of the proposed changes and will let you know how it works out. Thanks so much again!!

                Comment


                • #9
                  Originally posted by Richard Williams View Post
                  Nick's code is certainly much tidier. If you decide to change something, it should be much easier working off of his code.

                  I don't actually know what will speed things up. It seems you have to do 10s of 1000s of calculations no matter what else you do. Some things you could try:
                  • Use clonevar instead of gen when making copies of variables.
                  • For the final drop, maybe -drop DT* DS*- would be quicker.
                  Incidentally, what is your definition of "way too long?" 30 minutes or 30 days? I sometimes just let things run overnight or over the weekend. Or I take advantage of these monster unix machines I have access to. I don't like them for most purposes but sometimes they come in handy for data manipulation tasks.


                  Dear Richard!

                  You're totally right! Nick's code is much tidier than my initial attempt. Thanks for the – drop DT* DS* – command, will also implement this one too.

                  In terms of "way too long", the code would run about 3/4 days (admittedly it is also the most computationally intense one), but I have to ultimately run the same code for almost 60 different files. So a total runtime of more than two weeks, for all of those files, is not really possible. At the moment I don't know if my university owns such a unix machine and if students like me even have access to it, but I will definitely inquire about such a device and the possibility of using it.

                  Thanks to you too for all the helpful & immediate advice!

                  Comment


                  • #10
                    Can you get by with sub sampling, or choosing fewer dates, or whatever? This just seems like an incredibly monstrous problem. Once you have the new data, are you sure you know what to do with it? If you haven’t already, I would suggest starting with much smaller practice runs to be sure you are restructuring the data correctly for what you want to do.
                    -------------------------------------------
                    Richard Williams, Notre Dame Dept of Sociology
                    StataNow Version: 19.5 MP (2 processor)

                    EMAIL: [email protected]
                    WWW: https://www3.nd.edu/~rwilliam

                    Comment


                    • #11
                      – Update –

                      I implemented all of Nick's changes now, and it works almost perfectly! The runtime is only a fraction of what it used to be before!!

                      The one thing where things still go a bit wrong is in that line of code:
                      Code:
                       
                       replace LIBOR_coupon_date = trade_start_date + 90 * ceil(trade_start_date/90)
                      Honestly I'm not very familiar with the command ceil() and even with some slight variations of the code above I can't make it work properly. I would need to somehow increase the date by 90 for each quarter. So the first payment would be exactly 90 days after trade_start_date. But if the position is held for 91 days the LIBOR_coupon_date should include the next quarter payment date (i.e. trade_start_date + 180 ) and so on. Unfortunately, I couldn't find an answer myself by reading the descriptions to ceil(), round() & floor().

                      I mean I can't say it too often but thank you so much! Really glad that the STATA community is always so helpful & understanding with people like me

                      Comment


                      • #12
                        Originally posted by Richard Williams View Post
                        Can you get by with sub sampling, or choosing fewer dates, or whatever? This just seems like an incredibly monstrous problem. Once you have the new data, are you sure you know what to do with it? If you haven’t already, I would suggest starting with much smaller practice runs to be sure you are restructuring the data correctly for what you want to do.
                        Dear Richard,

                        actually it goes pretty smoothly now with all of Nick's improvements. Except for the LIBOR_coupon_date variable I have everything I want & need. Once I have the new data, it is actually very straightforward and I need just to do basic things like multiplying two variables to generate a new variable, etc. Ultimately, I would also need to calculate the average of all strategy returns from the same day but this should be also possible with by Date: or bysort Date: , I think, right?

                        Comment


                        • #13
                          The following example may make clearer what the fragment with the ceil() function (not command) is doing:

                          Code:
                          . mata
                          ------------------------------------------------- mata (type end to exit) -----------
                          : t = 45 :* (1..10)
                          
                          : t \ 90 :* ceil(t :/ 90)
                                   1     2     3     4     5     6     7     8     9    10
                              +-------------------------------------------------------------+
                            1 |   45    90   135   180   225   270   315   360   405   450  |
                            2 |   90    90   180   180   270   270   360   360   450   450  |
                              +-------------------------------------------------------------+
                          The first row of the matrix is sample input 45(45)450.

                          The second row is the result of the operation. It rounds upwards to the nearest multiple of 90.

                          Is that wrong?

                          As it happens, my next column in the Stata Journal is a general piece on rounding and binning.
                          Last edited by Nick Cox; 25 May 2018, 17:51.

                          Comment


                          • #14
                            Originally posted by Nick Cox View Post
                            The following example may make clearer what the fragment with the ceil() function (not command) is doing:

                            Code:
                            . mata
                            ------------------------------------------------- mata (type end to exit) -----------
                            : t = 45 :* (1..10)
                            
                            : t \ 90 :* ceil(t :/ 90)
                            1 2 3 4 5 6 7 8 9 10
                            +-------------------------------------------------------------+
                            1 | 45 90 135 180 225 270 315 360 405 450 |
                            2 | 90 90 180 180 270 270 360 360 450 450 |
                            +-------------------------------------------------------------+
                            The first row of the matrix is sample input 45(45)450.

                            The second row is the result of the operation. It rounds upwards to the nearest multiple of 90.

                            Is that wrong?

                            As it happens, my next column in the Stata Journal is a general piece on rounding and binning.
                            Thanks Nick for the explanation! I made it work now for my purposes as well

                            However, I noticed a different problem showing up when running the code for several files in a sequence, as shown below. Then, I get an "too many variables specified" r(103) error when it should start with the second file and STATA subsequently stops altogether. Do you maybe have an idea, how I could solve that issue too? The odd thing is that if I manually run the code for the second file after STATA stopped, it works perfectly fine, just when running the long sequence the problem shows up... And I certainly don't understand why?!


                            Code:
                             // US 10Y 5BP SHORT
                            clear all
                            quietly {
                            use TF_Short_10Y_5BP_US_end.dta
                            
                            forval j = 1/19 {
                             gen Treasury_coupon_`j' = Treasury
                            }
                            gen Treasury_coupon_20 = Treasury + 100
                            
                            forval j = 1/19 {
                             gen Swap_coupon_`j' = Swap
                            }
                            gen Swap_coupon_20 = Swap + 100
                            
                            gen LIBOR_coupon = LIBOR + 100
                            
                            forval j = 1/20 {
                             gen Treasury_disc_`j' = 1
                             gen Swap_disc_`j' = 1
                             gen Treasury_coupon_`j'_date =.
                             gen Swap_coupon_`j'_date =.
                            }
                            
                            gen LIBOR_coupon_date = .
                            
                            gen REPO_reset_date =.
                            
                            quietly bysort Tradenumb (Date) : gen trade_start_date = Date[1]
                            quietly bysort Tradenumb (Date) : gen trade_end_date = Date[_N]
                            
                            forval j = 1/20 {
                             local J = 180 * `j'
                             replace Treasury_coupon_`j'_date = trade_start_date + `J'
                             replace Swap_coupon_`j'_date = trade_start_date + `J'
                            }
                            
                            gen y = Date - trade_start_date
                            gen z= 90*ceil(y / 90)
                            replace z = 90 if z == 0
                            drop z y
                            replace REPO_reset_date = Date[_n-1] if Date <= trade_end_date
                            replace REPO_reset_date =. if Date == trade_start_date
                            
                            forvalues j= 0/3600 {
                             forval k = 1/20 {
                              replace Treasury_disc_`k' = DT__`j' if Treasury_coupon_`k'_date - Date == `j'
                              replace Swap_disc_`k' = DS__`j' if Swap_coupon_`k'_date - Date == `j'
                              local todrop `todrop' DT__`j' DS__`j'
                             }
                            }
                            
                            drop `todrop'
                            }
                            
                            save TF_Short_10Y_5BP_US_last.dta, replace
                            
                            
                            // US 10Y 5BP LONG
                            clear all
                            quietly {
                            use TF_Long_10Y_5BP_US_end.dta
                            
                            forval j = 1/19 {
                             gen Treasury_coupon_`j' = Treasury
                            }
                            gen Treasury_coupon_20 = Treasury + 100
                            
                            forval j = 1/19 {
                             gen Swap_coupon_`j' = Swap
                            }
                            gen Swap_coupon_20 = Swap + 100
                            
                            gen LIBOR_coupon = LIBOR + 100
                            
                            forval j = 1/20 {
                             gen Treasury_disc_`j' = 1
                             gen Swap_disc_`j' = 1
                             gen Treasury_coupon_`j'_date =.
                             gen Swap_coupon_`j'_date =.
                            }
                            
                            gen LIBOR_coupon_date = .
                            
                            gen REPO_reset_date =.
                            
                            quietly bysort Tradenumb (Date) : gen trade_start_date = Date[1]
                            quietly bysort Tradenumb (Date) : gen trade_end_date = Date[_N]
                            
                            forval j = 1/20 {
                             local J = 180 * `j'
                             replace Treasury_coupon_`j'_date = trade_start_date + `J'
                             replace Swap_coupon_`j'_date = trade_start_date + `J'
                            }
                            
                            gen y = Date - trade_start_date
                            gen z= 90*ceil(y / 90)
                            replace z = 90 if z == 0
                            drop z y
                            replace REPO_reset_date = Date[_n-1] if Date <= trade_end_date
                            replace REPO_reset_date =. if Date == trade_start_date
                            
                            forvalues j= 0/3600 {
                             forval k = 1/20 {
                              replace Treasury_disc_`k' = DT__`j' if Treasury_coupon_`k'_date - Date == `j'
                              replace Swap_disc_`k' = DS__`j' if Swap_coupon_`k'_date - Date == `j'
                              local todrop `todrop' DT__`j' DS__`j'
                             }
                            }
                            
                            drop `todrop'
                            }
                            
                            save TF_Long_10Y_5BP_US_last.dta, replace

                            Comment


                            • #15
                              I have to correct my earlier post slightly. As it turns out even for the first file the code stops after some runtime and shows the same error... Any idea what might cause this? And how to circumvent it?
                              Last edited by Raoul Eireiner; 26 May 2018, 04:11.

                              Comment

                              Working...
                              X