forked from Micky-12/Question_Paper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquestion_selection_without_pdf_lekha
96 lines (86 loc) · 4.08 KB
/
question_selection_without_pdf_lekha
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN
# Simulate training data
def simulate_data():
np.random.seed(42)
marks = np.random.randint(0, 41, size=1000) # Marks out of 40
difficulty_proportions = np.zeros((1000, 3))
for i, mark in enumerate(marks):
if mark <= 13:
difficulty_proportions[i] = [0.7, 0.25, 0.05]
elif 14 <= mark <= 26:
difficulty_proportions[i] = [0.3, 0.5, 0.2]
else:
difficulty_proportions[i] = [0.1, 0.4, 0.5]
return marks.reshape(-1, 1, 1), difficulty_proportions # Reshape for RNN
# Create and train RNN model
def create_rnn_model(X, y):
model = Sequential([
SimpleRNN(10, input_shape=(1, 1), activation='relu'), # RNN layer
Dense(3, activation='softmax') # Output proportions as softmax
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=100, batch_size=32, verbose=1)
return model
# Function to predict difficulty proportions using the trained RNN model
def predict_difficulty_proportions(model, mark):
mark_array = np.array([[mark]]) # Reshape input as required by the RNN
proportions = model.predict(mark_array)
return proportions[0]
# Select questions based on predicted proportions
def select_questions(questions_df, predicted_proportions):
print("Starting question selection...")
questions_df = questions_df.sample(frac=1).reset_index(drop=True)
selected_questions = []
total_marks = 0
difficulty_counts = {0: 0, 1: 0, 2: 0}
target_marks = 100
attempts = 0 # To count how many times we've tried to add a question without success
while total_marks < target_marks and not questions_df.empty:
added = False # Flag to check if we added a question in this iteration
for index, question in questions_df.iterrows():
difficulty = question['Difficulty level']
current_proportion = difficulty_counts[difficulty] / sum(difficulty_counts.values()) if sum(difficulty_counts.values()) > 0 else 0
if current_proportion < predicted_proportions[difficulty] and total_marks + question['Marks'] <= target_marks:
selected_questions.append({
"Question": question["Question"],
"Marks": question["Marks"],
"Difficulty level": question["Difficulty level"]
})
total_marks += question['Marks']
difficulty_counts[difficulty] += 1
questions_df = questions_df.drop(index)
print(f"Selected {len(selected_questions)} questions. Total marks: {total_marks}")
added = True
break
if not added:
attempts += 1
print("No suitable question found in this iteration.")
if attempts > len(questions_df): # If we've made more attempts than there are rows, break out
print("Breaking out of loop - no further suitable questions.")
break
return selected_questions
# Main execution flow
if __name__ == "__main__":
X, y = simulate_data()
model = create_rnn_model(X, y)
try:
questions_df = pd.read_csv("C:/Users/Dell/OneDrive/Desktop/MINI PROJECT WEB/updated_data.csv")
print(questions_df.head()) # Print to verify contents are loaded
except Exception as e:
print(f"Failed to load data: {e}")
exit(1)
# Ensure DataFrame has the necessary columns
expected_columns = {'Difficulty level', 'Marks', 'Question'}
if not expected_columns <= set(questions_df.columns):
print(f"DataFrame missing necessary columns. Expected columns: {expected_columns}")
exit(1)
marks = 30 # Example internal marks
predicted_proportions = predict_difficulty_proportions(model, marks)
selected_questions = select_questions(questions_df, predicted_proportions)
print("Final selected questions:")
for question in selected_questions:
print(question)