Advertisement
JagadeeshUndavalli

Untitled

Sep 24th, 2022
174
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.96 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. from sklearn.preprocessing import LabelEncoder
  6. from sklearn.ensemble import RandomForestClassifier
  7. from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
  8. from sklearn.model_selection import train_test_split
  9. import warnings
  10. warnings.filterwarnings("ignore")
  11.  
  12. from code import interact
  13. import streamlit as st
  14. st.set_page_config(page_title="Student Dropout Predictor", layout="wide")
  15.  
  16. def load_css(file_name):
  17. with open(file_name) as f:
  18. st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
  19. load_css('app.css')
  20. with st.container():
  21.  
  22. st.title("Student Dropout Predictor")
  23. st.subheader("By team CODE BUDDIES")
  24. st.write("Prediction of a student whether he/she drops out from the education based on various factors")
  25. st.write("The source code can be found [here](https://www.github.com/UndavalliJagadeesh/ADS_HACKATHON)")
  26.  
  27.  
  28. with st.container():
  29. school = st.selectbox("Select School type",("MS","GP"))
  30. gender = st.selectbox("Gender",("M","F"))
  31. age = st.slider("Age",15,22)
  32. address = st.selectbox("Address",("R","U"))
  33. famsize = st.selectbox("Family Size",("GT3","LE3"))
  34. Pstatus = st.selectbox("Pstatus",("T","A"))
  35. Medu = st.selectbox("Mother Education",(0,1,2,3,4))
  36. Fedu = st.selectbox("Father Education",(0,1,2,3,4))
  37. Mjob = st.selectbox("Mother JOb",("Teacher","at home","health","services","other"))
  38. Fjob = st.selectbox("Father JOb",("Teacher","at home","health","services","other"))
  39. reason = st.selectbox("Reason",("Reputation","Course","Home","other"))
  40. gaurdian = st.selectbox("Gaurdian",("Father","Mother","Other"))
  41. traveltime = st.selectbox("Travel Time(hrs)",(1,2,3,4))
  42. studytime = st.selectbox("Study Time(hrs)",(1,2,3,4))
  43. failures = st.selectbox("failures",(0,1,2,3))
  44. schoolsup = st.selectbox("School Support",("Yes","No"))
  45. famsup = st.selectbox("Family Support",("Yes","No"))
  46. paid = st.selectbox("Fee paid",("Yes","No"))
  47. activities = st.selectbox("Activities",("Yes","No"))
  48. nursery = st.selectbox("Nursery",("Yes","No"))
  49. higher = st.selectbox("Higher Education??",("Yes","No"))
  50. internet = st.selectbox("Internet",("Yes","No"))
  51. romantic = st.selectbox("Romantic",("Yes","No"))
  52. famrel = st.selectbox("Family relatives",(1,2,3,4))
  53. freetime = st.selectbox("Free Time(hrs)",(1,2,3,4))
  54. goout = st.selectbox("Vacation/Go out Time(hrs)",(1,2,3,4))
  55. Dalc = st.selectbox("Dalc",(1,2,3,4))
  56. Walc = st.selectbox("Walc",(1,2,3,4))
  57. health = st.selectbox("Health",(1,2,3,4))
  58. absences = st.slider("Days absent",0,100)
  59.  
  60. input_lst=[school,gender,age,address,famsize,Pstatus, Medu,Fedu,Mjob, Fjob, reason, gaurdian, traveltime, studytime, failures, schoolsup,famsup,paid, activities, nursery, higher, internet, romantic,famrel,freetime, goout,Dalc,Walc,health,absences]
  61.  
  62. data = pd.read_csv('student-data.csv')
  63.  
  64. le = LabelEncoder()
  65. feature_names = data.columns.values
  66.  
  67. for name in feature_names:
  68. if data[name].dtype =='object':
  69. data[name] = le.fit_transform(data[name])
  70.  
  71. data.hist(edgecolor='black',bins = 25, figsize= (20,20))
  72. plt.show()
  73.  
  74. # """*The `hist()` method of pandas is used to represent the histograms of each feature specified in the data frame.*"""
  75.  
  76. # plt.figure(figsize=(10,10))
  77. # sns.heatmap(data.corr(), cmap="YlGnBu", annot=True)
  78.  
  79.  
  80. X = data.drop('passed', axis=1)
  81. y = data.passed
  82.  
  83.  
  84. # """> Splitting data for Training and Testing"""
  85.  
  86. # #splitting the dataset
  87.  
  88.  
  89. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40)
  90.  
  91. # """> Training the models and Evaluating their performance
  92.  
  93. # *Since the target feature is categorical, the Machine learning models that are used to train and predict on this dataset should be of type Classification.*
  94.  
  95. # ### Random Forest Classifier
  96.  
  97. # *`Random Forest` is a classifier that contains a number of decision trees on various subsets of the given dataset and takes the average to improve the predictive accuracy of that dataset.*
  98. # """
  99.  
  100. model = RandomForestClassifier(n_estimators=13, criterion='gini',max_depth=10, max_features='auto')
  101. model.fit(X_train,y_train)
  102.  
  103. y_pred = model.predict(X_test)
  104.  
  105.  
  106. st.write("Random Forest Classifier\'s Accuracy :",round(accuracy_score(y_test, y_pred),4))
  107. st.write("Random Forest Classifier\'s F1 Score :",round(f1_score(y_test, y_pred),4))
  108.  
  109. X_test_input_cols = list(X.columns)
  110. default_dict = {}
  111. for i in range(len(X_test_input_cols)):
  112. default_dict[X_test_input_cols[i]] = input_lst[i]
  113.  
  114. X_input_test = pd.DataFrame(default_dict,index=[0])
  115.  
  116. for name in X_test_input_cols:
  117. if X_input_test[name].dtype =='object':
  118. X_input_test[name] = le.fit_transform(X_input_test[name])
  119.  
  120. y_input_pred = model.predict(X_input_test)
  121. if y_input_pred[0]==1:
  122. st.success('The Student will not dropout 😆😆😆')
  123. else:
  124. st.error('The Student will dropout 😭😭😭')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement