-
Notifications
You must be signed in to change notification settings - Fork 74
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
59 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Consecutive digit swap | ||
|
||
---- | ||
## Overview | ||
|
||
This project provides a solution to detecting consecutive digit swaps in phone numbers. | ||
E.g.: 070 1234 5678 vs 070 2134 5678 | ||
|
||
---- | ||
## Implementation | ||
1. Modify this code to run on custom script Python container. (https://docs.treasuredata.com/articles/#!pd/python-custom-scripting-example) | ||
2. Copy and paste the code into a custom script in Treasure Workflows. | ||
|
||
---- | ||
## Considerations | ||
|
||
This project can be used to detect any consecutive character swaps, e.g.: email, username etc. | ||
|
||
---- | ||
## Questions | ||
|
||
Please feel free to reach out to apac-se@treasure-data.com with any questions you have about using this code. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import pandas as pd | ||
|
||
def check_consecutive_digit_swap(): | ||
df = pd.read_csv('data.csv', dtype=str) | ||
df = df.reset_index() # Make sure indexes pair with number of rows. | ||
cnt = 0 | ||
|
||
# init result csv | ||
f = open('res.csv','w+') | ||
f.write('phone1,phone2\n') | ||
f.close() | ||
|
||
f = open('res.csv', 'w') | ||
|
||
for index, row in df.iterrows(): | ||
phone1 = row['ph1'] | ||
phone2 = row['ph2'] | ||
|
||
# Check if lengths are the same. | ||
if len(phone1) == len(phone2): | ||
|
||
# Find differing positions. | ||
differing_positions = [i for i in range(len(phone1)) if phone1[i] != phone2[i]] | ||
|
||
# Check if there are exactly two differing positions, and that they are consecutive. | ||
if len(differing_positions) == 2: | ||
i, j = differing_positions | ||
if (j == i + 1 and phone1[i] == phone2[j] and phone1[j] == phone2[i]): | ||
cnt = cnt + 1 | ||
f.write(phone1 + ',' + phone2 + '\n') | ||
#print(phone1, phone2, (j == i + 1 | ||
# and phone1[i] == phone2[j] | ||
# and phone1[j] == phone2[i])) | ||
print(str(cnt)) | ||
f.close() | ||
|
||
check_consecutive_digit_swap() |