Advertisement
JagadeeshUndavalli

Untitled

Sep 25th, 2022
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.90 KB | None | 0 0
  1. from html.entities import html5
  2. from pydoc import html
  3. import pandas as pd
  4. import numpy as np
  5. import matplotlib.pyplot as plt
  6. import seaborn as sns
  7. from sklearn.preprocessing import LabelEncoder
  8. from sklearn.ensemble import RandomForestClassifier
  9. from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
  10. from sklearn.model_selection import train_test_split
  11. import warnings
  12. warnings.filterwarnings("ignore")
  13.  
  14. from code import interact
  15. import streamlit as st
  16. import streamlit.components.v1 as components
  17. st.set_page_config(page_title="Student Dropout Predictor", layout="wide")
  18. # components.html('<html><body><div class="header">Student Dropout Prediction</div></body></html>')
  19. with st.container():
  20. st.title("Student Dropout Predictor")
  21. # st.subheader("By team CODE BUDDIES")
  22. st.write("Prediction of a student whether he/she drops out from the education based on various factors")
  23.  
  24. with open('app.css') as f:
  25. st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
  26.  
  27. # input_dit = {'school' : 'st.selectbox("Select School type",("MS","GP"))', 'gender' : st.selectbox("Gender",("M","F")), 'age' : st.slider("Age",15,22), 'address' : st.selectbox("Address",("R","U")),'famsize':st.selectbox("Family Size",("GT3","LE3")), 'Pstatus' : st.selectbox("Pstatus",("T","A")), 'Medu' : st.selectbox("Mother Education",(0,1,2,3,4)),'Fedu' : st.selectbox("Father Education",(0,1,2,3,4)),'Mjob' : st.selectbox("Mother JOb",("Teacher","at home","health","services","other")),'Fjob' : st.selectbox("Father JOb",("Teacher","at home","health","services","other")),'reason' : st.selectbox("Reason",("Reputation","Course","Home","other")),'gaurdian' : st.selectbox("Gaurdian",("Father","Mother","Other")),'traveltime' : st.selectbox("Travel Time(hrs)",(1,2,3,4)),'studytime' : st.selectbox("Study Time(hrs)",(1,2,3,4)),'failures' : st.selectbox("failures",(0,1,2,3)),'schoolsup' : st.selectbox("School Support",("Yes","No")),'famsup' : st.selectbox("Family Support",("Yes","No")),'paid' : st.selectbox("Fee paid",("Yes","No")),'activities' : st.selectbox("Activities",("Yes","No")),'nursery' : st.selectbox("Nursery",("Yes","No")),'higher' : st.selectbox("Higher Education??",("Yes","No")),'internet' : st.selectbox("Internet",("Yes","No")),'romantic' : st.selectbox("Romantic",("Yes","No")),'famrel' : st.selectbox("Family relatives",(1,2,3,4)),'freetime' : st.selectbox("Free Time(hrs)",(1,2,3,4)),'goout' : st.selectbox("Vacation/Go out Time(hrs)",(1,2,3,4)),'Dalc' : st.selectbox("Dalc",(1,2,3,4)),'Walc' : st.selectbox("Walc",(1,2,3,4)),'health' : st.selectbox("Health",(1,2,3,4)),'absences' : st.slider("Days absent",0,100)}
  28.  
  29.  
  30. data = pd.read_csv('dropout.csv')
  31.  
  32. le = LabelEncoder()
  33. feature_names = data.columns.values
  34.  
  35. for name in feature_names:
  36. if data[name].dtype =='object':
  37. data[name] = le.fit_transform(data[name])
  38.  
  39. X = data.drop('dropout', axis=1)
  40. y= data.dropout
  41.  
  42. # selecting the most important features
  43.  
  44. from sklearn.tree import DecisionTreeClassifier
  45. from sklearn.feature_selection import SelectFromModel
  46.  
  47. from sklearn.model_selection import train_test_split
  48. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
  49.  
  50. model = SelectFromModel(DecisionTreeClassifier())
  51. model.fit(X_train, y_train)
  52.  
  53. model.get_support()
  54. selected_feat= X_train.columns[(model.get_support())]
  55.  
  56. # st.write(selected_feat.values)
  57.  
  58.  
  59. input_names = {'school' : 'Select School type', 'gender' : 'Gender', 'age' : "Age", 'address' : "Address",'famsize':"Family Size", 'Pstatus' : "Pstatus", 'Medu' : "Mother Education",'Fedu' :"Father Education",'Mjob' : "Mother Job", 'Fjob' : "Father Job",'reason' : "Reason",'guardian' :"Guardian",'traveltime' : "Travel Time(hrs)",'studytime' : "Study Time(hrs)",'failures' :"Failures",'schoolsup' : "School Support",'famsup' : "Family Support",'paid' : "Fee paid",'activities' : "Activities",'nursery':"Nursery",'higher' : "Higher Education??",'internet' : "Internet",'romantic' : "Romantic",'famrel' : "Family relatives",'freetime' :"Free Time(hrs)",'goout' : "Vacation/Go out Time(hrs)",'Dalc' : "Dalc",'Walc' : "Walc",'health' : "Health",'absences' : "Days absent"}
  60. input_type = {'school' : ['',"MS","GP"], 'gender' :['',"M","F"], 'address' : ['',"R","U"],'famsize':['',"GT3","LE3"], 'Pstatus' : ['',"T","A"], 'Medu' : ['',0,1,2,3,4],'Fedu' : ['',0,1,2,3,4],'Mjob' : ['',"Teacher","at home","health","services","other"],'Fjob' : ['',"Teacher","at home","health","services","other"],'reason' : ['',"Reputation","Course","Home","other"],'guardian' : ['',"Father","Mother","Other"],'traveltime' : ['',1,2,3,4],'studytime' : ['',1,2,3,4],'failures' : ['',0,1,2,3],'schoolsup' : ['',"Yes","No"],'famsup' : ['',"Yes","No"],'paid' : ['',"Yes","No"],'activities' : ['',"Yes","No"],'nursery' : ['',"Yes","No"],'higher' : ['',"Yes","No"],'internet' : ['',"Yes","No"],'romantic' : ['',"Yes","No"],'famrel' : ['',1,2,3,4],'freetime' : ['',1,2,3,4],'goout' : ['',1,2,3,4],'Dalc' : ['',1,2,3,4],'Walc' : ['',1,2,3,4],'health' : ['',1,2,3,4]}
  61. input_lst=[]
  62.  
  63. with st.container():
  64. for i in selected_feat:
  65. st.write(input_lst)
  66. if i=='age':
  67. ele = st.slider("Age",15,22)
  68. elif i!='age' and i!='absences':
  69. ele = st.selectbox(input_names[i], input_type[i])
  70. elif i=="absences":
  71. ele = st.slider("Days absent",0,100)
  72. if ele!='':
  73. input_lst.append(ele)
  74.  
  75.  
  76. # input_lst[0] = st.selectbox(input_names[selected_feat[0]],input_type[selected_feat[0]]) if selected_feat[0]!='absences' else st.slider('absences',15,22)
  77. # input_lst[1] = st.selectbox(input_names[selected_feat[1]],input_type[selected_feat[1]]) if selected_feat[1]!='absences' else st.slider('absences',15,22)
  78. # input_lst[2] = st.selectbox(input_names[selected_feat[2]],input_type[selected_feat[2]]) if selected_feat[2]!='absences' else st.slider('absences',15,22)
  79. # input_lst[3] = st.selectbox(input_names[selected_feat[3]],input_type[selected_feat[3]]) if selected_feat[3]!='absences' else st.slider('absences',15,22)
  80. # input_lst[4] = st.selectbox(input_names[selected_feat[4]],input_type[selected_feat[4]]) if selected_feat[4]!='absences' else st.slider('absences',15,22)
  81. # input_lst[5] = st.selectbox(input_names[selected_feat[5]],input_type[selected_feat[5]]) if selected_feat[5]!='absences' else st.slider('absences',15,22)
  82. # input_lst[6] = st.selectbox(input_names[selected_feat[6]],input_type[selected_feat[6]]) if selected_feat[6]!='absences' else st.slider('absences',15,22)
  83. # input_lst[7] = st.selectbox(input_names[selected_feat[7]],input_type[selected_feat[7]]) if selected_feat[7]!='absences' else st.slider('absences',15,22)
  84. # input_lst[8] = st.selectbox(input_names[selected_feat[8]],input_type[selected_feat[8]]) if selected_feat[8]!='absences' else st.slider('absences',15,22)
  85. # input_lst[9] = st.selectbox(input_names[selected_feat[9]],input_type[selected_feat[9]]) if selected_feat[9]!='absences' else st.slider('absences',15,22)
  86. # input_lst[10]= st.selectbox(input_names[selected_feat[10]],input_type[selected_feat[10]]) if selected_feat[10]!='absences' else st.slider('absences',15,22)
  87.  
  88. # st.write(input_lst)
  89.  
  90. df = pd.DataFrame(data=data, columns=selected_feat)
  91. df_target = data['dropout']
  92. # df1 = pd.concat([df, df_target], ignore_index=True, sort=False)
  93. df = df.join(df_target, lsuffix='_caller', rsuffix='_other')
  94.  
  95. X = df.drop('dropout',axis=1)
  96. y = df.dropout
  97.  
  98. #dealing with unbalanced data
  99.  
  100. # from imblearn.over_sampling import RandomOverSampler
  101.  
  102. # ros = RandomOverSampler()
  103. # X, y = ros.fit_resample(X, y)
  104.  
  105.  
  106. # #splitting the dataset
  107.  
  108.  
  109. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40)
  110.  
  111. # """> Training the models and Evaluating their performance
  112.  
  113. # *`Random Forest` is a classifier that contains a number of decision trees on various subsets of the given dataset and takes the average to improve the predictive accuracy of that dataset.*
  114. # """
  115.  
  116. model = RandomForestClassifier(n_estimators=13, criterion='gini',max_depth=10, max_features='auto')
  117. model.fit(X_train,y_train)
  118.  
  119. y_pred = model.predict(X_test)
  120.  
  121.  
  122. # st.write("Random Forest Classifier\'s Accuracy :",round(accuracy_score(y_test, y_pred),4))
  123. # st.write("Random Forest Classifier\'s F1 Score :",round(f1_score(y_test, y_pred),4))
  124.  
  125. X_test_input_cols = list(X.columns)
  126. default_dict = {}
  127. for i in range(len(X_test_input_cols)):
  128. default_dict[X_test_input_cols[i]] = input_lst[i]
  129.  
  130. X_input_test = pd.DataFrame(default_dict,index=[0])
  131.  
  132. for name in X_test_input_cols:
  133. if X_input_test[name].dtype =='object':
  134. X_input_test[name] = le.fit_transform(X_input_test[name])
  135.  
  136. y_input_pred = model.predict(X_input_test)
  137. if y_input_pred[0]==0:
  138. st.success('The Student will not dropout 😆😆😆')
  139. else:
  140. st.error('The Student will dropout 😭😭😭')
  141.  
  142.  
  143. st.write('')
  144. st.write('')
  145. st.write("The source code can be found here 👉[🔗](https://www.github.com/UndavalliJagadeesh/ADS_HACKATHON)")
  146.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement