Code:
net inst jregex, from("http://wbuchanan.github.io/StataRegex/")
Code:
jregex replace ...
net inst jregex, from("http://wbuchanan.github.io/StataRegex/")
jregex replace ...
* Example generated by -dataex-. To install: ssc install dataex
clear
input str17 phone1 str14 phone2 str16 phone3
"(706)332-9739" "735 578 - 674 " "765 60-6789"
"605 801 8928" "(227) 385 3769" "(944)5239383"
"(647) 425120" "(98 ) 246674" "(522)829-8615"
"12 841 - 4162" "(656) 919 8420" "(803) 436 - 483 "
"(795)450-6874" "1108218003" "1308484948"
"(795) 450 - 68745" "1108218003" "1308484948"
"1108218003" "(795) 450 - " "1308484948"
end
g clnphone = regexs(1) if regexm(phone1 + phone2 + phone3, "(\([0-9][0-9][0-9]\) [0-9][0-9][0-9] - [0-9][0-9][0-9][0-9])")
local d3 "[0-9][0-9][0-9]"
gen firstgood = ""
foreach v of varlist phone1 phone2 phone3 {
replace firstgood = regexs(1) + " " + regexs(2) + " - " + regexs(3) ///
if regexm(`v', "^(\(`d3'\)) *(`d3') *- *(`d3'[0-9])$") & mi(firstgood)
}
list
. list
+---------------------------------------------------------------------------------------------+
| phone1 phone2 phone3 clnphone firstgood |
|---------------------------------------------------------------------------------------------|
1. | (706)332-9739 735 578 - 674 765 60-6789 (706) 332 - 9739 |
2. | 605 801 8928 (227) 385 3769 (944)5239383 |
3. | (647) 425120 (98 ) 246674 (522)829-8615 (522) 829 - 8615 |
4. | 12 841 - 4162 (656) 919 8420 (803) 436 - 483 |
5. | (795)450-6874 1108218003 1308484948 (795) 450 - 6874 |
|---------------------------------------------------------------------------------------------|
6. | (795) 450 - 68745 1108218003 1308484948 (795) 450 - 6874 |
7. | 1108218003 (795) 450 - 1308484948 (795) 450 - 1308 |
+---------------------------------------------------------------------------------------------+
g clnphone = ustrregexs(1) if ustrregexm(phone1 + phone2 + phone3, "(\([0-9]{3}\) [0-9]{3} - [0-9]{4})")
g clnphone2 = regexs(1) if regexm(phone1 + phone2 + phone3, "(\([0-9][0-9][0-9]\) [0-9][0-9][0-9] - [0-9][0-9][0-9][0-9])")
* Example generated by -dataex-. To install: ssc install dataex clear input str16(phone1 phone2 phone3) "(706)332-9739" "735 578 - 674 " "765 60-6789" "605 801 8928" "(227) 385 3769" "(944)5239383" "(647) 425120" "(98 ) 246674" "(522)829-8615" "12 841 - 4162" "(656) 919 8420" "(803) 436 - 483 " "(795)450-6874" "1108218003" "1308484948" end gen phone1_clean = ustrregexra(phone1,"[^0-9]","") gen phone2_clean = ustrregexra(phone2,"[^0-9]","") gen phone3_clean = ustrregexra(phone3,"[^0-9]","")
. list
+-------------------------------------------------------------------------------------------+
| phone1 phone2 phone3 phone1_c~n phone2_c~n phone3_c~n |
|-------------------------------------------------------------------------------------------|
1. | (706)332-9739 735 578 - 674 765 60-6789 7063329739 735578674 765606789 |
2. | 605 801 8928 (227) 385 3769 (944)5239383 6058018928 2273853769 9445239383 |
3. | (647) 425120 (98 ) 246674 (522)829-8615 647425120 98246674 5228298615 |
4. | 12 841 - 4162 (656) 919 8420 (803) 436 - 483 128414162 6569198420 803436483 |
5. | (795)450-6874 1108218003 1308484948 7954506874 1108218003 1308484948 |
+-------------------------------------------------------------------------------------------+
clear
set obs 1000
set seed 7779311
tempvar hasparen hasspace hashyphen area exchange extension areas exchanges extensions
g `hasparen' = .
g `hasspace' = .
g `hashyphen' = .
g `area' = .
g `exchange' = .
g `extension' = .
g `areas' = ""
g `exchanges' = ""
g `extensions' = ""
forv i = 1/3 {
qui: replace `hasparen' = rbinomial(1, .5)
qui: replace `hasspace' = rbinomial(1, .5)
qui: replace `hashyphen' = rbinomial(1, .5)
qui: replace `area' = int(runiform(1, 999))
qui: replace `exchange' = int(runiform(1, 999))
qui: replace `extension' = int(runiform(1, 9999))
qui: replace `areas' = cond(inrange(`area', 1, 9), " " + strofreal(`area'), ///
cond(inrange(`area', 10, 99), strofreal(`area') + " ", ///
strofreal(`area')))
qui: replace `areas' = "(" + `areas' + ")" if `hasparen' == 1
qui: replace `exchanges' = cond(inrange(`exchange', 1, 9), " " + strofreal(`exchange') + " ", ///
cond(inrange(`exchange', 10, 99), " " + strofreal(`exchange'), ///
strofreal(`exchange')))
qui: replace `extensions' = cond(inrange(`extension', 1, 9), " " + strofreal(`extension') + " ", ///
cond(inrange(`extension', 10, 99), " " + strofreal(`extension'), ///
cond(inrange(`extension', 100, 999), strofreal(`extension') + " ", ///
strofreal(`extension'))))
qui: g phone`i' = cond(`hasspace' == 1 & `hashyphen' == 1, ///
`areas' + " " + `exchanges' + " - " + `extensions', ///
cond(`hasspace' == 0 & `hashyphen' == 1, ///
`areas' + `exchanges' + "-" + `extensions', ///
cond(`hasspace' == 1 & `hashyphen' == 0, ///
`areas' + " " + `exchanges' + " " + `extensions', ///
`areas' + `exchanges' + `extensions')))
}
Leave a comment: