Code:
net inst jregex, from("http://wbuchanan.github.io/StataRegex/")
Code:
jregex replace ...
net inst jregex, from("http://wbuchanan.github.io/StataRegex/")
jregex replace ...
* Example generated by -dataex-. To install: ssc install dataex clear input str17 phone1 str14 phone2 str16 phone3 "(706)332-9739" "735 578 - 674 " "765 60-6789" "605 801 8928" "(227) 385 3769" "(944)5239383" "(647) 425120" "(98 ) 246674" "(522)829-8615" "12 841 - 4162" "(656) 919 8420" "(803) 436 - 483 " "(795)450-6874" "1108218003" "1308484948" "(795) 450 - 68745" "1108218003" "1308484948" "1108218003" "(795) 450 - " "1308484948" end g clnphone = regexs(1) if regexm(phone1 + phone2 + phone3, "(\([0-9][0-9][0-9]\) [0-9][0-9][0-9] - [0-9][0-9][0-9][0-9])") local d3 "[0-9][0-9][0-9]" gen firstgood = "" foreach v of varlist phone1 phone2 phone3 { replace firstgood = regexs(1) + " " + regexs(2) + " - " + regexs(3) /// if regexm(`v', "^(\(`d3'\)) *(`d3') *- *(`d3'[0-9])$") & mi(firstgood) } list
. list +---------------------------------------------------------------------------------------------+ | phone1 phone2 phone3 clnphone firstgood | |---------------------------------------------------------------------------------------------| 1. | (706)332-9739 735 578 - 674 765 60-6789 (706) 332 - 9739 | 2. | 605 801 8928 (227) 385 3769 (944)5239383 | 3. | (647) 425120 (98 ) 246674 (522)829-8615 (522) 829 - 8615 | 4. | 12 841 - 4162 (656) 919 8420 (803) 436 - 483 | 5. | (795)450-6874 1108218003 1308484948 (795) 450 - 6874 | |---------------------------------------------------------------------------------------------| 6. | (795) 450 - 68745 1108218003 1308484948 (795) 450 - 6874 | 7. | 1108218003 (795) 450 - 1308484948 (795) 450 - 1308 | +---------------------------------------------------------------------------------------------+
g clnphone = ustrregexs(1) if ustrregexm(phone1 + phone2 + phone3, "(\([0-9]{3}\) [0-9]{3} - [0-9]{4})")
g clnphone2 = regexs(1) if regexm(phone1 + phone2 + phone3, "(\([0-9][0-9][0-9]\) [0-9][0-9][0-9] - [0-9][0-9][0-9][0-9])")
* Example generated by -dataex-. To install: ssc install dataex clear input str16(phone1 phone2 phone3) "(706)332-9739" "735 578 - 674 " "765 60-6789" "605 801 8928" "(227) 385 3769" "(944)5239383" "(647) 425120" "(98 ) 246674" "(522)829-8615" "12 841 - 4162" "(656) 919 8420" "(803) 436 - 483 " "(795)450-6874" "1108218003" "1308484948" end gen phone1_clean = ustrregexra(phone1,"[^0-9]","") gen phone2_clean = ustrregexra(phone2,"[^0-9]","") gen phone3_clean = ustrregexra(phone3,"[^0-9]","")
. list +-------------------------------------------------------------------------------------------+ | phone1 phone2 phone3 phone1_c~n phone2_c~n phone3_c~n | |-------------------------------------------------------------------------------------------| 1. | (706)332-9739 735 578 - 674 765 60-6789 7063329739 735578674 765606789 | 2. | 605 801 8928 (227) 385 3769 (944)5239383 6058018928 2273853769 9445239383 | 3. | (647) 425120 (98 ) 246674 (522)829-8615 647425120 98246674 5228298615 | 4. | 12 841 - 4162 (656) 919 8420 (803) 436 - 483 128414162 6569198420 803436483 | 5. | (795)450-6874 1108218003 1308484948 7954506874 1108218003 1308484948 | +-------------------------------------------------------------------------------------------+
clear set obs 1000 set seed 7779311 tempvar hasparen hasspace hashyphen area exchange extension areas exchanges extensions g `hasparen' = . g `hasspace' = . g `hashyphen' = . g `area' = . g `exchange' = . g `extension' = . g `areas' = "" g `exchanges' = "" g `extensions' = "" forv i = 1/3 { qui: replace `hasparen' = rbinomial(1, .5) qui: replace `hasspace' = rbinomial(1, .5) qui: replace `hashyphen' = rbinomial(1, .5) qui: replace `area' = int(runiform(1, 999)) qui: replace `exchange' = int(runiform(1, 999)) qui: replace `extension' = int(runiform(1, 9999)) qui: replace `areas' = cond(inrange(`area', 1, 9), " " + strofreal(`area'), /// cond(inrange(`area', 10, 99), strofreal(`area') + " ", /// strofreal(`area'))) qui: replace `areas' = "(" + `areas' + ")" if `hasparen' == 1 qui: replace `exchanges' = cond(inrange(`exchange', 1, 9), " " + strofreal(`exchange') + " ", /// cond(inrange(`exchange', 10, 99), " " + strofreal(`exchange'), /// strofreal(`exchange'))) qui: replace `extensions' = cond(inrange(`extension', 1, 9), " " + strofreal(`extension') + " ", /// cond(inrange(`extension', 10, 99), " " + strofreal(`extension'), /// cond(inrange(`extension', 100, 999), strofreal(`extension') + " ", /// strofreal(`extension')))) qui: g phone`i' = cond(`hasspace' == 1 & `hashyphen' == 1, /// `areas' + " " + `exchanges' + " - " + `extensions', /// cond(`hasspace' == 0 & `hashyphen' == 1, /// `areas' + `exchanges' + "-" + `extensions', /// cond(`hasspace' == 1 & `hashyphen' == 0, /// `areas' + " " + `exchanges' + " " + `extensions', /// `areas' + `exchanges' + `extensions'))) }
Leave a comment: