Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import seaborn as sns
- from sklearn.preprocessing import LabelEncoder
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
- from sklearn.model_selection import train_test_split
- import warnings
- warnings.filterwarnings("ignore")
- from code import interact
- import streamlit as st
- st.set_page_config(page_title="Student Dropout Predictor", layout="wide")
- def load_css(file_name):
- with open(file_name) as f:
- st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
- load_css('app.css')
- with st.container():
- st.title("Student Dropout Predictor")
- st.subheader("By team CODE BUDDIES")
- st.write("Prediction of a student whether he/she drops out from the education based on various factors")
- st.write("The source code can be found [here](https://www.github.com/UndavalliJagadeesh/ADS_HACKATHON)")
- with st.container():
- school = st.selectbox("Select School type",("MS","GP"))
- gender = st.selectbox("Gender",("M","F"))
- age = st.slider("Age",15,22)
- address = st.selectbox("Address",("R","U"))
- famsize = st.selectbox("Family Size",("GT3","LE3"))
- Pstatus = st.selectbox("Pstatus",("T","A"))
- Medu = st.selectbox("Mother Education",(0,1,2,3,4))
- Fedu = st.selectbox("Father Education",(0,1,2,3,4))
- Mjob = st.selectbox("Mother JOb",("Teacher","at home","health","services","other"))
- Fjob = st.selectbox("Father JOb",("Teacher","at home","health","services","other"))
- reason = st.selectbox("Reason",("Reputation","Course","Home","other"))
- gaurdian = st.selectbox("Gaurdian",("Father","Mother","Other"))
- traveltime = st.selectbox("Travel Time(hrs)",(1,2,3,4))
- studytime = st.selectbox("Study Time(hrs)",(1,2,3,4))
- failures = st.selectbox("failures",(0,1,2,3))
- schoolsup = st.selectbox("School Support",("Yes","No"))
- famsup = st.selectbox("Family Support",("Yes","No"))
- paid = st.selectbox("Fee paid",("Yes","No"))
- activities = st.selectbox("Activities",("Yes","No"))
- nursery = st.selectbox("Nursery",("Yes","No"))
- higher = st.selectbox("Higher Education??",("Yes","No"))
- internet = st.selectbox("Internet",("Yes","No"))
- romantic = st.selectbox("Romantic",("Yes","No"))
- famrel = st.selectbox("Family relatives",(1,2,3,4))
- freetime = st.selectbox("Free Time(hrs)",(1,2,3,4))
- goout = st.selectbox("Vacation/Go out Time(hrs)",(1,2,3,4))
- Dalc = st.selectbox("Dalc",(1,2,3,4))
- Walc = st.selectbox("Walc",(1,2,3,4))
- health = st.selectbox("Health",(1,2,3,4))
- absences = st.slider("Days absent",0,100)
- input_lst=[school,gender,age,address,famsize,Pstatus, Medu,Fedu,Mjob, Fjob, reason, gaurdian, traveltime, studytime, failures, schoolsup,famsup,paid, activities, nursery, higher, internet, romantic,famrel,freetime, goout,Dalc,Walc,health,absences]
- data = pd.read_csv('student-data.csv')
- le = LabelEncoder()
- feature_names = data.columns.values
- for name in feature_names:
- if data[name].dtype =='object':
- data[name] = le.fit_transform(data[name])
- data.hist(edgecolor='black',bins = 25, figsize= (20,20))
- plt.show()
- # """*The `hist()` method of pandas is used to represent the histograms of each feature specified in the data frame.*"""
- # plt.figure(figsize=(10,10))
- # sns.heatmap(data.corr(), cmap="YlGnBu", annot=True)
- X = data.drop('passed', axis=1)
- y = data.passed
- # """> Splitting data for Training and Testing"""
- # #splitting the dataset
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40)
- # """> Training the models and Evaluating their performance
- # *Since the target feature is categorical, the Machine learning models that are used to train and predict on this dataset should be of type Classification.*
- # ### Random Forest Classifier
- # *`Random Forest` is a classifier that contains a number of decision trees on various subsets of the given dataset and takes the average to improve the predictive accuracy of that dataset.*
- # """
- model = RandomForestClassifier(n_estimators=13, criterion='gini',max_depth=10, max_features='auto')
- model.fit(X_train,y_train)
- y_pred = model.predict(X_test)
- st.write("Random Forest Classifier\'s Accuracy :",round(accuracy_score(y_test, y_pred),4))
- st.write("Random Forest Classifier\'s F1 Score :",round(f1_score(y_test, y_pred),4))
- X_test_input_cols = list(X.columns)
- default_dict = {}
- for i in range(len(X_test_input_cols)):
- default_dict[X_test_input_cols[i]] = input_lst[i]
- X_input_test = pd.DataFrame(default_dict,index=[0])
- for name in X_test_input_cols:
- if X_input_test[name].dtype =='object':
- X_input_test[name] = le.fit_transform(X_input_test[name])
- y_input_pred = model.predict(X_input_test)
- if y_input_pred[0]==1:
- st.success('The Student will not dropout 😆😆😆')
- else:
- st.error('The Student will dropout 😭😭😭')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement