-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdistribution_of_users.py
56 lines (46 loc) · 1.88 KB
/
distribution_of_users.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import dash
from dash import dcc, html, Input, Output
import pandas as pd
import plotly.express as px
# Load data
movies_df = pd.read_csv("movies.csv")
ratings_df = pd.read_csv("ratings.csv")
users_df = pd.read_csv("users.csv")
# Function to group ages into 10-year intervals
def age_group(age):
if age >= 10 and age < 20:
return "10-19"
elif age >= 20 and age < 30:
return "20-29"
elif age >= 30 and age < 40:
return "30-39"
elif age >= 40 and age < 50:
return "40-49"
elif age >= 50 and age < 60:
return "50-59"
elif age >= 60:
return "60+"
else:
return "Unknown"
# Apply age grouping
users_df['age_group'] = users_df['age'].apply(age_group)
# Define a function to generate sunburst chart
def generate_sunburst_chart(selected_movie_id, layer1, layer2, layer3):
# print("rama chandra")
if selected_movie_id is None or layer1 is None or layer2 is None or layer3 is None:
return {}
# Filter ratings for the selected movie
ratings_for_movie = ratings_df[ratings_df['movieId'] == selected_movie_id]
# Get unique user ids for the selected movie
user_ids_for_movie = ratings_for_movie['userId'].unique()
# Filter users data for the selected users
users_for_movie = users_df[users_df['userId'].isin(user_ids_for_movie)]
# Group users by the selected layers and count the number of users in each group
group_cols = [layer1, layer2, layer3]
user_counts = users_for_movie.groupby(group_cols).size().reset_index(name='user_count')
# Create the sunburst chart based on selected layers
fig = px.sunburst(user_counts, path=group_cols, values='user_count')
# Update hover template to show count of users
fig.update_traces(hovertemplate='<b>%{label}</b><br>user_count: %{value}')
# print(fig)
return fig