diff --git a/Salary Prediction/salary_prediction.py b/Salary Prediction/salary_prediction.py new file mode 100644 index 0000000..08dbc74 --- /dev/null +++ b/Salary Prediction/salary_prediction.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +"""Salary-prediction.ipynb + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1l22jFuwbNijASJOnbx3dE2q-8LSlvGaM +""" + +!pip install pandas +import pandas as pd + +# Load the datasets +Placement_Train = pd.read_csv("/content/Placement_Test.csv") +placement_test = pd.read_csv("/content/Placement_Train.csv") +placement_sample_submission = pd.read_csv("/content/Plcement_Sample_Submission.csv") + +# Import necessary libraries +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestRegressor +from sklearn.metrics import mean_squared_error +from sklearn.preprocessing import LabelEncoder + +# Load the datasets +placement_train = pd.read_csv("/content/Placement_Train.csv") +placement_test = pd.read_csv("/content/Placement_Test.csv") +placement_sample_submission = pd.read_csv("/content/Plcement_Sample_Submission.csv") + +# Display basic information about the training dataset +print(placement_train.info()) + +# Data Preprocessing +# Encode categorical variables +label_encoder = LabelEncoder() +categorical_cols = ['gender', 'ssc_b', 'hsc_b', 'hsc_s', 'degree_t', 'workex', 'specialisation'] +for col in categorical_cols: + placement_train[col] = label_encoder.fit_transform(placement_train[col]) + placement_test[col] = label_encoder.transform(placement_test[col]) + +# Separate features and target variable +X = placement_train.drop(['Annual_salary'], axis=1) +y = placement_train['Annual_salary'] + +# Split the data into training and testing sets +X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42) + +# Build the model (Random Forest Regressor) +model = RandomForestRegressor(n_estimators=100, random_state=42) +model.fit(X_train, y_train) + +# Predict the annual salary on the validation set +y_pred = model.predict(X_valid) + +# Evaluate the model +mse = mean_squared_error(y_valid, y_pred) +print(f'Mean Squared Error on Validation Set: {mse}') + +# Predict annual salary on the test set +test_predictions = model.predict(placement_test) + +# Save predictions to a DataFrame +submission = pd.DataFrame({'Annual_salary': test_predictions}) + +# Save the submission DataFrame to a CSV file +submission.to_csv('placement_submission.csv', index=False) + +# Predict annual salary on the test set +test_predictions = model.predict(placement_test) + +# Create a DataFrame with SR_no and predicted Annual_salary +results = pd.DataFrame({'SR_no': placement_test['SR_no'], 'Predicted_Annual_salary': test_predictions}) + +# Display the results +print(results) + +# Assuming 'results' is your DataFrame with SR_no and predicted Annual_salary +results.to_csv('predicted_salary_results.csv', index=False) + +# Assuming 'results' is your DataFrame with SR_no and predicted Annual_salary +results = results.rename(columns={'Predicted_Annual_salary': 'Annual_salary'}) + +# Display the updated DataFrame +print(results) + +results.to_csv('predicted_salary_results.csv', index=False) \ No newline at end of file