-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_cgpa.py
146 lines (113 loc) · 4.3 KB
/
get_cgpa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import time
from openpyxl import load_workbook
import fitz # PyMuPDF
def scan_result_dir(base_folder):
"""
Traverse the directory structure and collect all semester data for each roll number.
Base_Folder/
1/
12345.pdf
67890.pdf
2/
12345.pdf
67890.pdf
3/
12345.pdf
"""
all_data = {}
start_time = time.time()
# Loop through each semester folder in the base directory
for semester in sorted(os.listdir(base_folder), key=lambda x: int(x)): # Sort semesters numerically
sem_path = os.path.join(base_folder, semester)
if os.path.isdir(sem_path):
for pdf_file in os.listdir(sem_path):
if pdf_file.endswith(".pdf"):
filename = pdf_file.split(".")[0] # Extract roll number from file name
roll_number = filename.split('-')[-1]
if roll_number not in all_data:
all_data[roll_number] = []
pdf_path = os.path.join(sem_path, pdf_file)
pdf_data = read_csjmu_result(pdf_path)
if pdf_data:
# Append semester data (file path) to the roll number list
all_data[roll_number].append({
"semester": semester,
"sgpa": pdf_data[2],
"cgpa": pdf_data[3],
"back_papers": pdf_data[4],
"result": pdf_data[5],
"division": pdf_data[6],
"pdf_path": pdf_data[0]
})
end_time = time.time()
elapsed = end_time - start_time
print(f"Elapsed time: {elapsed:.2f} seconds - Scanning Done!")
return all_data
def read_csjmu_result(pdf_path):
'''
Extract data from result pdf
Supported: CSJMU RESULT ONLY
'''
back_papers = None
result = None
division = None
with fitz.open(pdf_path) as pdf:
# Iterate through the pages (starting from the second page)
for page_number in range(0, len(pdf)): # Start from the second page
page = pdf.load_page(page_number)
text = page.get_text("text")
# print(text)
# Split the text into lines and look for the row with "SGPA"
lines = text.split('\n')
# print(lines)
for i, line in enumerate(lines):
# print(line)
if "ROLL NO." in line:
roll_number = lines[i+1]
if "CARRY OVER PAPER(S)" in line:
if lines[i+1] == "SGPA":
back_papers = None
else:
back_papers = lines[i+1].split(",")
if "RESULT" == line:
# print(line)
result = lines[i+1]
if "DIVISION" == line and result == "PASSED":
division = lines[i+1]
if "SGPA" in line:
sgpa = lines[i+1]
if "CGPA" in line:
cgpa = lines[i+1]
return [
str(pdf_path),
int(roll_number),
float(sgpa),
float(cgpa),
back_papers if back_papers else None,
result,
division
]
def fix_coloumn(excel_file):
'''
Fix coloumn width automatically
Pass excel file path as argument
'''
wb = load_workbook(excel_file)
ws = wb.active
for column in ws.columns:
max_length = 0
column_letter = column[0].column_letter # Get the column letter
for cell in column:
try: # Ignore empty cells
if cell.value:
max_length = max(max_length, len(str(cell.value)))
except:
pass
adjusted_width = max_length + 2 # Add some padding
ws.column_dimensions[column_letter].width = adjusted_width
# Save the adjusted Excel file
wb.save(excel_file)
if __name__ == "__main__":
data = scan_result_dir(r"C:\Users\sanjay\Downloads\Results\2BCA-A")
print(data)