-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeviantart.py
77 lines (63 loc) · 2.36 KB
/
deviantart.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import requests
from bs4 import BeautifulSoup
import concurrent.futures
# List of usernames
usernames = ["twinstar", "theposearchives", "Tomasz-Mro"]
# Function to scrape account information for a single username
def scrape_account_info(username):
# URL of the website with the username
url = f"https://www.deviantart.com/{username}"
# Send a GET request to the URL
response = requests.get(url)
# Create a BeautifulSoup object to parse the HTML content
soup = BeautifulSoup(response.content, "html.parser")
try:
# Scrape the name
name_element = soup.find("span", class_="_2UI2c")
name = name_element.get_text(strip=True)
except AttributeError:
name = "Not found"
try:
# Scrape the more info
more_info_element = soup.find("div", class_="_33syq")
more_info = more_info_element.get_text(strip=True)
except AttributeError:
more_info = "Not found"
try:
# Scrape the number of watchers
watchers_element = soup.find("span", class_="_1thFP")
watchers = watchers_element.get_text(strip=True)
except AttributeError:
watchers = "Not found"
try:
# Scrape the number of deviations
deviation_element = soup.find("span", class_="_1thFP")
deviation = deviation_element.find("span").get_text(strip=True)
except AttributeError:
deviation = "Not found"
try:
# Scrape the pageviews
pageviews_element = soup.find("div", class_="_1yr7V")
pageviews = pageviews_element.get_text(strip=True)
except AttributeError:
pageviews = "Not found"
# Return the scraped information
return {
"Username": username,
"Name": name,
"More info": more_info,
"Watchers": watchers,
"Deviation": deviation,
"Pageviews": pageviews
}
# Create a thread pool executor
executor = concurrent.futures.ThreadPoolExecutor()
# Submit the scraping tasks to the executor
futures = [executor.submit(scrape_account_info, username) for username in usernames]
# Wait for all tasks to complete and retrieve the results
results = [future.result() for future in concurrent.futures.as_completed(futures)]
# Print the scraped information
for result in results:
for key, value in result.items():
print(f"{key}: {value}")
print("-----------------------------------")