merge with specific conditions

Michael Duarte Goncalves

Join Date: Oct 2022
Posts: 500

merge with specific conditions

20 Oct 2023, 04:31

Hi everyone,

I would like to merge two datasets.
One has the Barrio's names. This dataset contains aggregated data by neighbourhood (by Barrio).

The other dataset contains the same Barrio's names, but into two separate variables: origin_barrio and destination_barrio.

Basically, if the barrio is called, say, palacio, I want the variables for the number of car parks (total, green, blue) to be added at the end of the dataset 2 (merging the data in bulk). However, regardless of whether the barrio is the origin or the destination, as long as the name of the barrio matches, I want the variables to be added just once, without having to create several variables.

parkingSER_2018_clean_collapsed.dta dataset:

Code:

* Example generated by -dataex-. For more info, type help dataex
clear
input str20(origin_barrio destination_barrio) double(parking_slots green_slots blue_slots orange_slots)
"acacias"              "acacias"              3771 3068  703    .
"adelfas"              "adelfas"              2465 2167  298    .
"almagro"              "almagro"              2982 2306  676    .
"almenara"             "almenara"             3830 3309  521    .
"arapiles"             "arapiles"             2104 1650  454    .
"argüelles"           "argüelles"           2476 2104  372    .
"atocha"               "atocha"               1479  694  785    .
"bellas vistas"        "bellas vistas"        2906 2585  321    .
"berruguete"           "berruguete"           2011 1639  372    .
"casa de campo"        "casa de campo"        5001 2923  872 1206
"castellana"           "castellana"           2489 2057  432    .
"castilla"             "castilla"             3807 2867  940    .
"castillejos"          "castillejos"          3111 2670  441    .
"chopera"              "chopera"              1910 1618  292    .
"ciudad jardín"       "ciudad jardín"       3364 2670  694    .
"ciudad universitaria" "ciudad universitaria" 2355 1774  581    .
"cortes"               "cortes"                646  646    .    .
"cuatro caminos"       "cuatro caminos"       4361 3490  871    .
"delicias"             "delicias"             3310 2493  817    .
"el pilar"             "el pilar"             7265 6634  631    .
"el viso"              "el viso"              5887 4435 1452    .
"embajadores"          "embajadores"          2409 2409    .    .
"estrella"             "estrella"             3639 3120  519    .
"fuente del berro"     "fuente del berro"     2220 1988  232    .
"gaztambide"           "gaztambide"           2217 1680  537    .
"goya"                 "goya"                 2934 2520  414    .
"guindalera"           "guindalera"           6727 5169 1558    .
"hispanoamérica"      "hispanoamérica"      6922 5335 1587    .
"ibiza"                "ibiza"                2035 1665  370    .
"imperial"             "imperial"             3668 2984  684    .
"jerónimos"           "jerónimos"           1925 1509  416    .
"justicia"             "justicia"             1589 1589    .    .
"la paz"               "la paz"               5925 4972  571    .
"legazpi"              "legazpi"              3183 2564  619    .
"lista"                "lista"                1860 1509  351    .
"niño jesús"         "niño jesús"         2607 2189  418    .
"nueva españa"        "nueva españa"        6738 5247 1491    .
"pacífico"            "pacífico"            2841 2366  475    .
"palacio"              "palacio"              2231 2040    .  191
"palos de moguer"      "palos de moguer"      3000 2541  459    .
"prosperidad"          "prosperidad"          4553 3747  806    .
"recoletos"            "recoletos"            2438 1870  568    .
"rios rosas"           "rios rosas"           3238 2707  531    .
"sol"                  "sol"                   357  357    .    .
"trafalgar"            "trafalgar"            2530 2177  353    .
"universidad"          "universidad"          1951 1951    .    .
"valdeacederas"        "valdeacederas"        2348 2144  204    .
"vallehermoso"         "vallehermoso"         3467 2674  793    .
end

This is the code tried relative with the dataset above:

Code:

cd ${data}/clean_edm
use parkingSER_2018_clean.dta, clear

//put everything in lower case, and spanish accents also (ñ, etc.)
gen Barrio_without_numbers = ustrlower(regexs(1),"es") if regexm(Barrio, "([A-Z][A-ZÁÉÍÓÚÜÑ]*[ ]?[A-ZÁÉÍÓÚÜÑ]*[ ]?[A-ZÁÉÍÓÚÜÑ]*)([A-ZÁÉÍÓÚÜÑ]+)*([A-ZÁÉÍÓÚÜÑ]+)*")
replace Barrio_without_numbers = "rios rosas" if Barrio_without_numbers == "ríos rosas"


// for further merging
collapse parking_slots green_slots blue_slots orange_slots, by(Barrio_without_numbers)
rename Barrio_without_numbers origin_barrio
gen destination_barrio = origin_barrio

order origin_barrio destination_barrio

save parkingSER_2018_clean_collapsed.dta, replace

tempfile parking_by_barrio
save `parking_by_barrio', replace

This is the second dataset clean_edm_trips_costs_pt_taxis_privatefuelcosts_to gether.dta:

Code:

* Example generated by -dataex-. For more info, type help dataex
clear
input str9 individ_ID long ID_HOGAR byte ID_VIAJE str47(origin_barrio destination_barrio) double(parking_slots green_slots blue_slots orange_slots)
"189_1"   189 1 ""            ""               .    .   . .
"189_1"   189 2 ""            ""               .    .   . .
"244_1"   244 1 ""            ""               .    .   . .
"244_1"   244 2 ""            ""               .    .   . .
"244_2"   244 1 ""            ""               .    .   . .
"244_2"   244 2 ""            ""               .    .   . .
"324_1"   324 1 ""            ""               .    .   . .
"324_1"   324 2 ""            ""               .    .   . .
"414_1"   414 1 ""            ""               .    .   . .
"414_1"   414 2 ""            ""               .    .   . .
"414_2"   414 1 ""            ""               .    .   . .
"414_2"   414 2 ""            ""               .    .   . .
"414_3"   414 1 ""            ""               .    .   . .
"414_3"   414 2 ""            ""               .    .   . .
"414_3"   414 3 ""            ""               .    .   . .
"581_1"   581 1 ""            ""               .    .   . .
"581_2"   581 1 ""            ""               .    .   . .
"581_2"   581 2 ""            ""               .    .   . .
"581_3"   581 1 ""            ""               .    .   . .
"581_3"   581 2 ""            ""               .    .   . .
"581_4"   581 1 ""            ""               .    .   . .
"581_4"   581 2 ""            ""               .    .   . .
"742_1"   742 1 ""            "adelfas"     2465 2167 298 .
"742_1"   742 2 "adelfas"     ""               .    .   . .
"742_2"   742 1 ""            "simancas"       .    .   . .
"742_2"   742 2 "simancas"    ""               .    .   . .
"759_1"   759 1 ""            ""               .    .   . .
"759_1"   759 2 ""            ""               .    .   . .
"759_1"   759 3 ""            ""               .    .   . .
"759_1"   759 4 ""            ""               .    .   . .
"759_2"   759 1 ""            ""               .    .   . .
"759_2"   759 2 ""            ""               .    .   . .
"759_3"   759 1 ""            ""               .    .   . .
"759_3"   759 2 ""            ""               .    .   . .
"759_4"   759 1 ""            ""               .    .   . .
"759_4"   759 2 ""            ""               .    .   . .
"914_1"   914 1 ""            ""               .    .   . .
"914_1"   914 2 ""            ""               .    .   . .
"914_2"   914 1 ""            ""               .    .   . .
"914_2"   914 2 ""            ""               .    .   . .
"1096_1" 1096 1 ""            "concepción"    .    .   . .
"1096_1" 1096 2 "concepción" ""               .    .   . .
"1096_2" 1096 1 ""            ""               .    .   . .
"1096_2" 1096 2 ""            ""               .    .   . .
"1096_2" 1096 3 ""            ""               .    .   . .
"1096_2" 1096 4 ""            ""               .    .   . .
"1096_2" 1096 5 ""            ""               .    .   . .
"1096_3" 1096 1 ""            ""               .    .   . .
"1096_3" 1096 2 ""            ""               .    .   . .
"1479_1" 1479 1 ""            ""               .    .   . .
"1479_1" 1479 2 ""            "pilar"          .    .   . .
"1479_1" 1479 3 "pilar"       ""               .    .   . .
"1479_1" 1479 4 ""            ""               .    .   . .
"1479_2" 1479 1 ""            ""               .    .   . .
"1479_2" 1479 2 ""            ""               .    .   . .
"1479_3" 1479 1 ""            ""               .    .   . .
"1479_3" 1479 2 ""            ""               .    .   . .
"1508_1" 1508 1 ""            "simancas"       .    .   . .
"1508_1" 1508 2 "simancas"    ""               .    .   . .
"1508_2" 1508 1 ""            ""               .    .   . .
"1508_2" 1508 2 ""            ""               .    .   . .
"1710_1" 1710 1 ""            ""               .    .   . .
"1710_1" 1710 2 ""            ""               .    .   . .
"1710_1" 1710 3 ""            ""               .    .   . .
"1710_2" 1710 1 ""            ""               .    .   . .
"1710_2" 1710 2 ""            ""               .    .   . .
"1710_3" 1710 1 ""            ""               .    .   . .
"1710_3" 1710 2 ""            ""               .    .   . .
"1710_3" 1710 3 ""            ""               .    .   . .
"1710_4" 1710 1 ""            ""               .    .   . .
"1710_4" 1710 2 ""            ""               .    .   . .
"2275_2" 2275 1 ""            ""               .    .   . .
"2275_3" 2275 1 ""            ""               .    .   . .
"2322_1" 2322 1 ""            ""               .    .   . .
"2322_3" 2322 1 ""            ""               .    .   . .
"2322_5" 2322 1 ""            ""               .    .   . .
"2332_1" 2332 1 ""            "imperial"    3668 2984 684 .
"2332_1" 2332 2 "imperial"    ""               .    .   . .
"2332_1" 2332 4 ""            ""               .    .   . .
"2332_1" 2332 5 ""            ""               .    .   . .
"2502_2" 2502 1 ""            ""               .    .   . .
"2502_2" 2502 2 ""            ""               .    .   . .
"2502_3" 2502 1 ""            ""               .    .   . .
"2502_3" 2502 2 ""            ""               .    .   . .
"2882_2" 2882 1 ""            ""               .    .   . .
"3216_1" 3216 1 ""            "goya"        2934 2520 414 .
"3216_1" 3216 2 "goya"        ""               .    .   . .
"3216_1" 3216 3 ""            ""               .    .   . .
"3216_2" 3216 1 ""            "prosperidad" 4553 3747 806 .
"3216_2" 3216 2 "prosperidad" ""               .    .   . .
"3216_3" 3216 1 ""            "prosperidad" 4553 3747 806 .
"3216_3" 3216 2 "prosperidad" ""               .    .   . .
"3216_3" 3216 3 ""            ""               .    .   . .
"3216_4" 3216 1 ""            "prosperidad" 4553 3747 806 .
"3216_4" 3216 2 "prosperidad" ""               .    .   . .
"3216_5" 3216 1 ""            ""               .    .   . .
"3216_5" 3216 2 ""            ""               .    .   . .
"3230_1" 3230 1 ""            ""               .    .   . .
"3230_1" 3230 2 ""            ""               .    .   . .
"3230_1" 3230 3 ""            ""               .    .   . .
end

This is the code that I tried, but not at all efficient, concerning merging both datasets:

Code:

cd ${maindir}/04_pt_modes_cleaned/data/clean_edm
use clean_edm_trips_costs_pt_taxis_privatefuelcosts_together.dta, clear


// merge EESTACIONA and EMODO variables. EESTACIONA tells us where people parked. EMODO tells us the mode used. EMODO should be similar to travel_mode.
merge m:m individ_ID ID_HOGAR ID_IND ID_VIAJE using ${data}/Microdata/etapas_edm_2018.dta, nogenerate keep(3) keepusing(EESTACIONA EMODO)

replace origin_barrio = ustrlower(origin_barrio,"es")
replace destination_barrio = ustrlower(destination_barrio,"es")



// merge parking_slots green_slots blue_slots orange_slots
merge m:1 destination_barrio using ${data}/clean_edm/parkingSER_2018_clean_collapsed.dta, generate(dest_zona_ser) keepusing(parking_slots green_slots blue_slots orange_slots)
merge m:1 origin_barrio using ${data}/clean_edm/parkingSER_2018_clean_collapsed.dta, generate(orig_zona_ser) keepusing(parking_slots green_slots blue_slots orange_slots) // THAT IS NOT WORKING, ASK statalist EXPERTS!

Thank you so much.
Best regards,

Michael

Last edited by Michael Duarte Goncalves; 20 Oct 2023, 04:36.

Tags: None

Announcement

merge with specific conditions