Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Janiishaan authored Jul 2, 2024
1 parent f4a5426 commit 738e358
Showing 1 changed file with 86 additions and 0 deletions.
86 changes: 86 additions & 0 deletions Salary Prediction/salary_prediction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
"""Salary-prediction.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1l22jFuwbNijASJOnbx3dE2q-8LSlvGaM
"""

!pip install pandas
import pandas as pd

# Load the datasets
Placement_Train = pd.read_csv("/content/Placement_Test.csv")
placement_test = pd.read_csv("/content/Placement_Train.csv")
placement_sample_submission = pd.read_csv("/content/Plcement_Sample_Submission.csv")

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder

# Load the datasets
placement_train = pd.read_csv("/content/Placement_Train.csv")
placement_test = pd.read_csv("/content/Placement_Test.csv")
placement_sample_submission = pd.read_csv("/content/Plcement_Sample_Submission.csv")

# Display basic information about the training dataset
print(placement_train.info())

# Data Preprocessing
# Encode categorical variables
label_encoder = LabelEncoder()
categorical_cols = ['gender', 'ssc_b', 'hsc_b', 'hsc_s', 'degree_t', 'workex', 'specialisation']
for col in categorical_cols:
placement_train[col] = label_encoder.fit_transform(placement_train[col])
placement_test[col] = label_encoder.transform(placement_test[col])

# Separate features and target variable
X = placement_train.drop(['Annual_salary'], axis=1)
y = placement_train['Annual_salary']

# Split the data into training and testing sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the model (Random Forest Regressor)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict the annual salary on the validation set
y_pred = model.predict(X_valid)

# Evaluate the model
mse = mean_squared_error(y_valid, y_pred)
print(f'Mean Squared Error on Validation Set: {mse}')

# Predict annual salary on the test set
test_predictions = model.predict(placement_test)

# Save predictions to a DataFrame
submission = pd.DataFrame({'Annual_salary': test_predictions})

# Save the submission DataFrame to a CSV file
submission.to_csv('placement_submission.csv', index=False)

# Predict annual salary on the test set
test_predictions = model.predict(placement_test)

# Create a DataFrame with SR_no and predicted Annual_salary
results = pd.DataFrame({'SR_no': placement_test['SR_no'], 'Predicted_Annual_salary': test_predictions})

# Display the results
print(results)

# Assuming 'results' is your DataFrame with SR_no and predicted Annual_salary
results.to_csv('predicted_salary_results.csv', index=False)

# Assuming 'results' is your DataFrame with SR_no and predicted Annual_salary
results = results.rename(columns={'Predicted_Annual_salary': 'Annual_salary'})

# Display the updated DataFrame
print(results)

results.to_csv('predicted_salary_results.csv', index=False)

0 comments on commit 738e358

Please sign in to comment.