-
Notifications
You must be signed in to change notification settings - Fork 0
/
coding_for_visual_RStudio.R
125 lines (102 loc) · 4.54 KB
/
coding_for_visual_RStudio.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Setting up environment
# Load necessary packages
library(tidyverse)
library(geosphere)
library(ggplot2)
library(dplyr)
library(lubridate)
# Load the data
setwd('C:/Users/Shahmi Aiman/Downloads')
bike_data <- read_csv("cleaned_bike_data.csv")
## Creating Pie Chart (Total) ##
# Create a summary table to calculate the percentages
summary_table <- table(bike_data$member_casual)
percentage <- round(summary_table / sum(summary_table) * 100, 2)
# Create a data frame for plotting
plot_data <- data.frame(
member_casual = names(summary_table),
percentage = percentage
)
# Create the pie chart using ggplot2
total_pie <- ggplot(plot_data, aes(x = "", y = percentage, fill = member_casual)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
theme_void() +
theme(legend.position = "right") +
labs(title = "Percentage of Member vs. Casual",
fill = "",
x = NULL,
y = NULL) +
geom_text(aes(label = paste0(percentage, "%")), position = position_stack(vjust = 0.5))
# Display the pie chart
print(total_pie)
## Creating the average duration bike used each month between member and casual user ##
# Calculate the duration of bike used
bike_data$duration <- difftime(bike_data$ended_at, bike_data$started_at, units = "mins")
# Convert the start_time column to a proper date-time format
bike_data$start_time <- ymd_hms(bike_data$started_at)
# Extract month and year from the start_time column
bike_data$month_year <- format(bike_data$start_time, "%Y-%m")
# Group and summarize data by month and member_casual
summary_data <- bike_data %>%
mutate(month = floor_date(start_time, "month")) %>%
group_by(month, member_casual) %>%
summarize(avg_duration = mean(as.numeric(duration), na.rm = TRUE))
# Create the chart
ggplot(summary_data, aes(x = month, y = avg_duration, fill = member_casual)) +
geom_col(position = "dodge") +
scale_x_datetime(date_labels = "%b %Y", date_breaks = "1 month") +
labs(title = "Average Duration by Month for Member and Casual",
x = "Month",
y = "Average Duration (minutes)",
fill = "Member/Casual") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Creating the average bike used each month between member and casual user ##
# Calculate the average number of users for member and casual in each month
average_users <- bike_data %>%
group_by(month_year, member_casual) %>%
summarize(avg_users = n() / length(unique(date(start_time))))
# Create the chart
ggplot(average_users, aes(x = month_year, y = avg_users, fill = member_casual)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Average Number of Users per Month",
x = "Month",
y = "Average Number of Users",
fill = "User Type") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Average distance travel by member and casual user ##
# Calculate the distance traveled for each user
bike_data <- bike_data %>%
mutate(distance_km = distHaversine(
cbind(start_lng, start_lat),
cbind(end_lng, end_lat)
) / 1000) # Divide by 1000 to get the distance in kilometers
# Calculate the average distance and grouping
distance_table <- bike_data %>%
group_by(member_casual) %>%
summarize(avg_distance = mean(as.numeric(distance_km), na.rm = TRUE))
# Create a bar graph comparing the average distance traveled by member and casual users
ggplot(distance_table, aes(x = member_casual, y = avg_distance, fill = member_casual)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = sprintf("%.2f km", avg_distance)),
position = position_dodge(width = 0.9),
vjust = -0.5, size = 4) + # Add labels above each bar
labs(title = "Average Distance Traveled by User Type",
x = "User Type",
y = "Average Distance (km)") +
theme_minimal()
## Average duration bike used by member and casual user ##
# Calculate the average duration for each user type
average_duration_by_user <- bike_data %>%
group_by(member_casual) %>%
summarize(average_duration = mean(duration, na.rm = TRUE))
# Create a bar graph comparing the average duration between member and casual users
ggplot(average_duration_by_user, aes(x = member_casual, y = average_duration, fill = member_casual)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = sprintf("%.2f km", average_duration)),
position = position_dodge(width = 0.9),
vjust = -0.5, size = 4) +
labs(x = "User Type", y = "Average Duration (mins)", title = "Average Duration by User Type") +
theme_minimal()