-
Notifications
You must be signed in to change notification settings - Fork 0
/
reactor_decision_tree.R
83 lines (68 loc) · 3.23 KB
/
reactor_decision_tree.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# Load required libraries
library(ggplot2)
library(dplyr)
library(MASS)
library(class) # For k-NN algorithm
# Step 1: Data setup
data <- data.frame(
Element = c("Thorium", "Americium", "Curium", "Neptunium"),
Cost = c(7500, 1500000, 3000000, 10000), # Cost in USD/kg
EnergyDensity = c(79400, 110000, 3000000, 110000), # Energy Density in MJ/kg
RTG = c(0, 1, 1, 1), # Recommended for RTG (1/0)
Reactor = c(1, 0, 0, 1) # Recommended for Reactor (1/0)
)
# View the dataset
print(data)
# Logistic regression for RTG use case
rtg_model <- glm(RTG ~ Cost + EnergyDensity, data = data, family = binomial)
# Logistic regression for Reactor use case
reactor_model <- glm(Reactor ~ Cost + EnergyDensity, data = data, family = binomial)
# Stepwise regression for both models
stepwise_rtg <- stepAIC(rtg_model, direction = "both")
stepwise_reactor <- stepAIC(reactor_model, direction = "both")
# Step 2: Scatter plot of samples
ggplot(data, aes(x = Cost, y = EnergyDensity, color = as.factor(RTG))) +
geom_point(size = 4) +
labs(title = "Scatter Plot: Cost vs Energy Density for RTG Use Case",
x = "Cost (USD/kg)",
y = "Energy Density (MJ/kg)",
color = "RTG Recommendation") +
theme_minimal()
# Step 3: k-NN classification on the dataset
# Define feature matrix and target vector for RTG
features <- data[, c("Cost", "EnergyDensity")]
target_rtg <- data$RTG
# Normalize the data
features_scaled <- as.data.frame(scale(features))
# Create grid for decision boundary
grid <- expand.grid(Cost = seq(min(features_scaled$Cost), max(features_scaled$Cost), length = 100),
EnergyDensity = seq(min(features_scaled$EnergyDensity), max(features_scaled$EnergyDensity), length = 100))
# Apply k-NN with k = 3
k <- 3
knn_predictions <- knn(train = features_scaled, test = grid, cl = target_rtg, k = k)
# Convert back to original scale for plotting
grid$Cost <- grid$Cost * sd(features$Cost) + mean(features$Cost)
grid$EnergyDensity <- grid$EnergyDensity * sd(features$EnergyDensity) + mean(features$EnergyDensity)
# Plot k-NN decision boundary
ggplot(grid, aes(x = Cost, y = EnergyDensity)) +
geom_tile(aes(fill = knn_predictions), alpha = 0.3) +
geom_point(data = data, aes(x = Cost, y = EnergyDensity, color = as.factor(RTG)), size = 4) +
labs(title = "k-NN Decision Boundary with k = 3 for RTG Use Case",
x = "Cost (USD/kg)",
y = "Energy Density (MJ/kg)",
fill = "k-NN Prediction",
color = "RTG Recommendation") +
theme_minimal()
# Optionally, repeat the k-NN classification for Reactor use case by adjusting target
target_reactor <- data$Reactor
knn_predictions_reactor <- knn(train = features_scaled, test = grid, cl = target_reactor, k = k)
# Plot k-NN decision boundary for Reactor
ggplot(grid, aes(x = Cost, y = EnergyDensity)) +
geom_tile(aes(fill = knn_predictions_reactor), alpha = 0.3) +
geom_point(data = data, aes(x = Cost, y = EnergyDensity, color = as.factor(Reactor)), size = 4) +
labs(title = "k-NN Decision Boundary with k = 3 for Reactor Use Case",
x = "Cost (USD/kg)",
y = "Energy Density (MJ/kg)",
fill = "k-NN Prediction",
color = "Reactor Recommendation") +
theme_minimal()