-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFeatureImportance.py
31 lines (23 loc) · 1.02 KB
/
FeatureImportance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
path = 'StudentPerformanceFactors.csv'
student_performance = pd.read_csv(path)
y = student_performance.Exam_Score
features = [
'Hours_Studied', 'Previous_Scores', 'Attendance', 'Sleep_Hours', 'Tutoring_Sessions',
'Physical_Activity', 'Parental_Involvement', 'Gender', 'Access_to_Resources', 'Extracurricular_Activities',
'Motivation_Level', 'Internet_Access', 'Family_Income', 'Teacher_Quality', 'School_Type',
'Peer_Influence', 'Learning_Disabilities', 'Parental_Education_Level', 'Distance_from_Home'
]
X = student_performance[features]
X = pd.get_dummies(X)
from sklearn.tree import DecisionTreeRegressor
dt_model = DecisionTreeRegressor(random_state=1)
dt_model.fit(X, y)
importances = dt_model.feature_importances_
feature_importance_df = pd.DataFrame({
'Feature': X.columns,
'Importance': importances
})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
print("\nFeatures sorted by importance (highest to lowest):")
print(feature_importance_df)