-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawl_profile.py
executable file
·38 lines (28 loc) · 1.09 KB
/
crawl_profile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python3.5
"""Goes through all usernames and collects their information"""
import json
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from util.cli_helper import get_all_user_names
from util.extractor import extract_information
chrome_options = Options()
chrome_options.add_argument('--dns-prefetch-disable')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--lang=en-US')
chrome_options.add_argument('--headless')
chrome_options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US'})
browser = webdriver.Chrome('./assets/chromedriver', chrome_options=chrome_options)
# makes sure slower connections work as well
browser.implicitly_wait(25)
try:
usernames = get_all_user_names()
for username in usernames:
print('Extracting information from ' + username)
information = extract_information(browser, username)
with open('./profiles/' + username + '.json', 'w') as fp:
json.dump(information, fp)
except KeyboardInterrupt:
print('Aborted...')
finally:
browser.delete_all_cookies()
browser.close()