This project involves analyzing purchasing patterns for a dummy video game company called "Heroes of Pymoli". Analysis is done about:
- Player count
- Purchasing analysis (total)
- Gender demographics
- Purchasing analysis (by gender)
- Age demographics
- Top spenders
- Most popular items
- Most profitable items
Observed Trends:
- In both datasets, there are more men than women or other/non-disclosed genders purchasing items in the game
- In both datasets, the most items were purchased within the 20-24 age group
- In both datasets, kids under 10 were purchasing more items than people over 40
#Dependencies
import pandas as pd
import numpy as np
import os
filename = input("file name:")
file name:purchase_data.json
# Set path for file
json_path = os.path.join("Resources", str(filename))
pymoli_data = pd.read_json(json_path)
pymoli_data.head()
Age | Gender | Item ID | Item Name | Price | SN | |
---|---|---|---|---|---|---|
0 | 38 | Male | 165 | Bone Crushing Silver Skewer | 3.37 | Aelalis34 |
1 | 21 | Male | 119 | Stormbringer, Dark Blade of Ending Misery | 2.32 | Eolo46 |
2 | 34 | Male | 174 | Primitive Blade | 2.46 | Assastnya25 |
3 | 21 | Male | 92 | Final Critic | 1.36 | Pheusrical25 |
4 | 23 | Male | 63 | Stormfury Mace | 1.27 | Aela59 |
# Player Count
player_count = pymoli_data["SN"].nunique()
player_frame = pd.DataFrame({"Total Players": [player_count]})
player_frame
Total Players | |
---|---|
0 | 573 |
#Purchasing Analysis (Total)
#Number of Unique Items
unique_items= pymoli_data["Item ID"].nunique()
#Average Purchase Price
avg_price = round(pymoli_data["Price"].mean(),2)
#Total Number of Purchases
tot_purch = len(pymoli_data)
#Total Revenue
revenue = round(pymoli_data["Price"].sum(),2)
purch_analysis = pd.DataFrame(
{"Number of Unique Items": [unique_items],
"Average Price": [avg_price],
"Number of Purchases": [tot_purch],
"Total Revenue": [revenue]})
purch_analysis= purch_analysis[["Number of Unique Items","Average Price","Number of Purchases","Total Revenue"]]
purch_analysis
Number of Unique Items | Average Price | Number of Purchases | Total Revenue | |
---|---|---|---|---|
0 | 183 | 2.93 | 780 | 2286.33 |
#Gender Demographics
grouped_gender=pymoli_data.groupby(["Gender"])
count_gender = grouped_gender["SN"].nunique()
perc_gender = round((count_gender/player_count)*100,2)
gender = pd.DataFrame({"Total Count":count_gender,"Percentage of Players":perc_gender})
gender
Percentage of Players | Total Count | |
---|---|---|
Gender | ||
Female | 17.45 | 100 |
Male | 81.15 | 465 |
Other / Non-Disclosed | 1.40 | 8 |
#Purchasing Analysis (Gender)
purchase_count = grouped_gender["SN"].count()
avg_price = round(grouped_gender["Price"].mean(),2)
tot_price = grouped_gender["Price"].sum()
norm_totals = round(tot_price/count_gender,2)
purch_analysis = pd.DataFrame({"Purchase Count": purchase_count,
"Average Purchase Price": avg_price,
"Total Purchase Value": tot_price,
"Normalized Totals": norm_totals})
purch_analysis = purch_analysis[["Purchase Count","Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]
purch_analysis
Purchase Count | Average Purchase Price | Total Purchase Value | Normalized Totals | |
---|---|---|---|---|
Gender | ||||
Female | 136 | 2.82 | 382.91 | 3.83 |
Male | 633 | 2.95 | 1867.68 | 4.02 |
Other / Non-Disclosed | 11 | 3.25 | 35.74 | 4.47 |
#Bins for Age Demographics
bins = [0]
bin_value = ["<10"]
max_age = pymoli_data["Age"].max()
for x in range(2,int(max_age/5)+2):
bins = bins + [(x*5)-1]
for x in range(1,len(bins)-2):
bin_value = bin_value + [str(bins[x]+1)+"-"+str(bins[x+1])]
bin_value = bin_value + [str(max_age) + "+"]
#print(bins)
#print(bin_value)
#Age Demographics - need to group by SN.. add back in age, gender.. groupby age group (count and percentage)
pymoli_data["Age Group"] = pd.cut(pymoli_data["Age"],bins,labels = bin_value)
#Purchasing Analysis (Age)
age_group = pymoli_data.groupby(["Age Group"])
age_count = age_group.size()
age_avg_price = round(age_group["Price"].mean(),2)
age_tot_value = age_group["Price"].sum()
age_norm_tot = round(age_tot_value/age_count,2)
age_dataframe = pd.DataFrame({"Purchase Count":age_count,
"Average Purchase Price": age_avg_price,
"Total Purchase Value": age_tot_value,
"Normalized Totals": age_norm_tot})
age_dataframe = age_dataframe[["Purchase Count","Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]
age_dataframe
Purchase Count | Average Purchase Price | Total Purchase Value | Normalized Totals | |
---|---|---|---|---|
Age Group | ||||
<10 | 28 | 2.98 | 83.46 | 2.98 |
10-14 | 35 | 2.77 | 96.95 | 2.77 |
15-19 | 133 | 2.91 | 386.42 | 2.91 |
20-24 | 336 | 2.91 | 978.77 | 2.91 |
25-29 | 125 | 2.96 | 370.33 | 2.96 |
30-34 | 64 | 3.08 | 197.25 | 3.08 |
35-39 | 42 | 2.84 | 119.40 | 2.84 |
40-44 | 16 | 3.19 | 51.03 | 3.19 |
45+ | 1 | 2.72 | 2.72 | 2.72 |
top_spend_group = pymoli_data.groupby("SN")
top_spend_count = top_spend_group.size()
top_spend_avg = round(top_spend_group["Price"].mean(),2)
top_spenders = top_spend_group["Price"].sum()
top_spenders_frame = pd.DataFrame({"Total Purchase Value": top_spenders})
#Identify top 5 spenders
top_spenders_sort = top_spenders_frame.sort_values(by="Total Purchase Value",
ascending = False)
top_spenders_filter = top_spenders_sort.iloc[0:5].reset_index()
#join spenders with other calculations
top_spend_func_frame = pd.DataFrame({"Purchase Count": top_spend_count,
"Average Purchase Price": top_spend_avg}).reset_index()
top_spenders_final = pd.merge(top_spenders_filter, top_spend_func_frame, on="SN")
top_spenders_final
SN | Total Purchase Value | Average Purchase Price | Purchase Count | |
---|---|---|---|---|
0 | Undirrala66 | 17.06 | 3.41 | 5 |
1 | Saedue76 | 13.56 | 3.39 | 4 |
2 | Mindimnya67 | 12.74 | 3.18 | 4 |
3 | Haellysu29 | 12.73 | 4.24 | 3 |
4 | Eoda93 | 11.58 | 3.86 | 3 |
top_items = pymoli_data.groupby("Item ID")
top_items_group2 = pymoli_data.groupby(["Item ID","Item Name","Price"])
top_items_count = top_items.size()
top_items_value = top_items_group2["Price"].sum()
top_items_frame = pd.DataFrame({"Purchase Count": top_items_count})
#Identify most popular items
top_items_sort = top_items_frame.sort_values(by="Purchase Count",
ascending = False).reset_index()
end = top_items_sort["Purchase Count"].size
lastrow = 0
for row in range(4, end):
if(top_items_sort.get_value(row,"Purchase Count") != top_items_sort.get_value(row + 1,"Purchase Count")):
lastrow = row
break
if lastrow != 4:
print("Note: As a result of ties, more than 5 items are the most popular. The tied values are printed")
top_items_filter = top_items_sort.iloc[0:lastrow+1].reset_index()
top_items_func_frame = pd.DataFrame({"Total Purchase Value": top_items_value}).reset_index()
#merge tables
top_items_final = pd.merge(top_items_filter, top_items_func_frame, on="Item ID")
#fix column order and price name
top_items_final = top_items_final[["Item ID","Item Name","Purchase Count","Price","Total Purchase Value"]]
top_items_final = top_items_final.rename(columns={"Price":"Item Price"})
top_items_final
Note: As a result of ties, more than 5 items are the most popular. The tied values are printed
Item ID | Item Name | Purchase Count | Item Price | Total Purchase Value | |
---|---|---|---|---|---|
0 | 39 | Betrayal, Whisper of Grieving Widows | 11 | 2.35 | 25.85 |
1 | 84 | Arcane Gem | 11 | 2.23 | 24.53 |
2 | 31 | Trickster | 9 | 2.07 | 18.63 |
3 | 175 | Woeful Adamantite Claymore | 9 | 1.24 | 11.16 |
4 | 13 | Serenity | 9 | 1.49 | 13.41 |
5 | 34 | Retribution Axe | 9 | 4.14 | 37.26 |
#Most Profitable Items
top_values_frame = pd.DataFrame({"Total Purchase Value": top_items_value})
top_values_sort = top_values_frame.sort_values(by="Total Purchase Value",
ascending = False)
top_values_filter = top_values_sort.iloc[0:5].reset_index()
top_values_final = pd.merge(top_values_filter,top_items_sort, on="Item ID")
#rearrange columns and rename price column
top_values_final = top_values_final[["Item ID","Item Name","Purchase Count","Price","Total Purchase Value"]]
top_values_final = top_values_final.rename(columns={"Price":"Item Price"})
print()
top_values_final
Item ID | Item Name | Purchase Count | Item Price | Total Purchase Value | |
---|---|---|---|---|---|
0 | 34 | Retribution Axe | 9 | 4.14 | 37.26 |
1 | 115 | Spectral Diamond Doomblade | 7 | 4.25 | 29.75 |
2 | 32 | Orenmir | 6 | 4.95 | 29.70 |
3 | 103 | Singed Scalpel | 6 | 4.87 | 29.22 |
4 | 107 | Splitter, Foe Of Subtlety | 8 | 3.61 | 28.88 |