-
Notifications
You must be signed in to change notification settings - Fork 1
/
database.py
152 lines (112 loc) · 5.72 KB
/
database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import main
import requests
scrapper_output = main.artist_name
def api_fetch(client_id='f0affaf409354cc89102c9ff41044fe4', client_secret='f61d4df574404456ace1ba73551ce432'):
"""
Function that accesses the various endpoints of the Spotify API by passing an access token from client credentials.
Param:
-----
- client_id: a string containining id to access app created in Spotify for Developers
- client_secret: a string containing secret to access app created in Spotify for Developers
Output:
------
- headers
"""
# URL for token resource
auth_url = 'https://accounts.spotify.com/api/token'
# Request body
params = {'grant_type': 'client_credentials',
'client_id': client_id,
'client_secret': client_secret}
# POST the request
auth_response = requests.post(auth_url, params).json()
# Retrieve the access token
access_token = auth_response['access_token']
# Save the header in a new variable so you can use it later on
headers = {'Authorization': 'Bearer {token}'.format(token=access_token)}
return headers
# Use the Spotify API to find their 5 most popular song for each artist
def top_n_tracks(n=5, scrapper_output = scrapper_output):
"""
Function that scraps the API of Spotify to get the top N tracks of the artists scrapped in scrapper.py and the corresponding features.
Param:
-----
- n: int. Any integer between 1 and 10. Default value is 5.
Output:
------
- track_ids, track_titles, artist_names
- danceability, energy, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo
"""
# Call the function to save the header in a new variable
headers = api_fetch()
### Get above artists' IDs to be able to get tracks of each artist
# Define base url
base_url = 'https://api.spotify.com/v1/'
# Initialize track id list
track_ids, track_titles, artist_names = ([] for i in range(3))
# Create loop to get artist id and then get the top n tracks of the corresponding artist
for artist_name in scrapper_output:
artist_search_endpoint = base_url + 'search?q=' + artist_name.replace(' ', '%20') + '&type=artist'
artist_id = requests.get(artist_search_endpoint, headers=headers).json()['artists']['items'][0]['id']
artist_top_tracks_endpoint = base_url + 'artists/' + artist_id + '/top-tracks?market=ES'
track_info = requests.get(artist_top_tracks_endpoint, headers=headers).json()['tracks']
print("n",type(n)," int(len(track_info)))", type(int(len(track_info))))
for i in range(min(5, int(len(track_info)))):
track_ids.append(track_info[i]['id'])
track_titles.append(track_info[i]['name'])
artist_names.append(track_info[i]['artists'][0]['name'])
### Get all other features of each track
# Initialize the lists
danceability, energy, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo = \
([] for i in range(10))
# Set the audio features endpoint
audio_features_endpoint = base_url + 'audio-features'
# Create a loop and use the audio features endpoint to fetch the above features and append them to lists above
for track_id in track_ids:
# Get the GET Audio Features request
track_info = requests.get(audio_features_endpoint, headers=headers, params={'ids': track_id}).json()
# Get the audio features subset
track_info_features = track_info['audio_features'][0]
# Append the features' values
danceability.append(track_info_features['danceability'])
energy.append(track_info_features['energy'])
loudness.append(track_info_features['loudness'])
mode.append(track_info_features['mode'])
speechiness.append(track_info_features['speechiness'])
acousticness.append(track_info_features['acousticness'])
instrumentalness.append(track_info_features['instrumentalness'])
liveness.append(track_info_features['liveness'])
valence.append(track_info_features['valence'])
tempo.append(track_info_features['tempo'])
return track_ids, track_titles, artist_names, \
danceability, energy, loudness, mode, speechiness, acousticness, \
instrumentalness, liveness, valence, tempo
## 4 ## Create a df with these columns: song id, title, artist, and all features (['danceability', 'energy', 'loudness', 'mode', 'speechiness',
# 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo'])
# The df should contain about 245 songs (49 artists x 5)
# Importing pandas library
import pandas as pd
def create_database(scrapper_output = scrapper_output):
"""
Function that gets the features of the tracks in list form and returns a named dataframe.
Param:
-----
- track_features: function or lists. All the features of a track that will be used in the recommender system.
Output:
------
- df
"""
track_features=top_n_tracks(scrapper_output)
# Get the track features
track_ids, track_titles, artist_names, \
danceability, energy, loudness, mode, speechiness, acousticness, \
instrumentalness, liveness, valence, tempo = track_features
# Dictionary of lists
track_dict = {'id': track_ids, 'title': track_titles, 'artist': artist_names,
'danceability': danceability, 'energy': energy, 'loudness': loudness, 'mode': mode,
'speechiness': speechiness, 'acousticness': acousticness, 'instrumentalness': instrumentalness,
'liveness': liveness, 'valence': valence, 'tempo': tempo}
# Create dataframe
df = pd.DataFrame(track_dict)
# Print dataframe
return df