I have a .do file that imports an excel file, then runs some data cleaning, and drops observations that don't meet our requirements. Something strange seems to be happening. When I run the file, the last command will sometimes drop 4 observations, sometimes 5, and sometimes 6. I don't change the code or anything about the imported dataset. I just highlight the code and run the script, and it randomly drops between 4 and 6 in the final step. I tried adding sleep, thinking that perhaps the processor was getting bogged down and missing something. That didn't fix it. Any help would be appreciated.
clear
import excel "/Users/RO584734/Downloads/Gen+Pop+Phase+1++PBSBE+Measure+Validation_February +12,+2025_11.41.xlsx", sheet("Sheet0") firstrow
sleep 100
drop if Q_RecaptchaScore < .49
sleep 100
gen val1 = .
replace val1 = 0 if V1 !=1
replace val1 = 1 if V1 == 1
replace val1 =. if V1==.
gen val2 = .
replace val2 = 0 if V2 !=1 & V2 !=.
replace val2 = 1 if V2 == 1
replace val2 =. if V2 ==.
gen val3 = .
replace val3 = 0 if V3 !=3 & V3 !=.
replace val3 = 1 if V3 == 1
replace val3 =. if V3 ==.
gen val4 = .
replace val4 = 0 if V4 !=3 & V4!=.
replace val4 = 1 if V4 == 1
replace val4 =. if V4 ==.
egen vmiss = rowmiss(val1 val2 val3 val4)
gen stopped =1 if val1!=. & val2==. & val3==. & val4==.
replace stopped =1 if val2 !=. & val3==. & val4==.
replace stopped =1 if val3 !=. & val4 ==.
replace stopped =1 if val4==.
gen valid = .
replace valid = 0 if (val1 + val2 + val3 + val4) < 4
replace valid = 1 if (val1 + val2 + val3 + val4) == 4
sleep 100
drop if valid==.
sleep 100
drop if valid==0
sleep 100
drop if Demo_age < 18
drop if Demo_age > 65
drop if YPARQ_1 ==2
drop if YPARQ_2 ==2
sort IPAddress
sleep 100
drop if IPAddress== IPAddress[_n-1] & Demo_age== Demo_age[_n-1] & Demo_sex==Demo_sex[_n-1] & Demo_gender== Demo_gender[_n-1]
clear
import excel "/Users/RO584734/Downloads/Gen+Pop+Phase+1++PBSBE+Measure+Validation_February +12,+2025_11.41.xlsx", sheet("Sheet0") firstrow
sleep 100
drop if Q_RecaptchaScore < .49
sleep 100
gen val1 = .
replace val1 = 0 if V1 !=1
replace val1 = 1 if V1 == 1
replace val1 =. if V1==.
gen val2 = .
replace val2 = 0 if V2 !=1 & V2 !=.
replace val2 = 1 if V2 == 1
replace val2 =. if V2 ==.
gen val3 = .
replace val3 = 0 if V3 !=3 & V3 !=.
replace val3 = 1 if V3 == 1
replace val3 =. if V3 ==.
gen val4 = .
replace val4 = 0 if V4 !=3 & V4!=.
replace val4 = 1 if V4 == 1
replace val4 =. if V4 ==.
egen vmiss = rowmiss(val1 val2 val3 val4)
gen stopped =1 if val1!=. & val2==. & val3==. & val4==.
replace stopped =1 if val2 !=. & val3==. & val4==.
replace stopped =1 if val3 !=. & val4 ==.
replace stopped =1 if val4==.
gen valid = .
replace valid = 0 if (val1 + val2 + val3 + val4) < 4
replace valid = 1 if (val1 + val2 + val3 + val4) == 4
sleep 100
drop if valid==.
sleep 100
drop if valid==0
sleep 100
drop if Demo_age < 18
drop if Demo_age > 65
drop if YPARQ_1 ==2
drop if YPARQ_2 ==2
sort IPAddress
sleep 100
drop if IPAddress== IPAddress[_n-1] & Demo_age== Demo_age[_n-1] & Demo_sex==Demo_sex[_n-1] & Demo_gender== Demo_gender[_n-1]
Comment