Untitled

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

from code import interact
import streamlit as st
st.set_page_config(page_title="Student Dropout Predictor", layout="wide")

def load_css(file_name):
    with open(file_name) as f:
        st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
load_css('app.css')
with st.container():

    st.title("Student Dropout Predictor")
    st.subheader("By team CODE BUDDIES")
    st.write("Prediction of a student whether he/she drops out from the education based on various factors")
    st.write("The source code can be found [here](https://www.github.com/UndavalliJagadeesh/ADS_HACKATHON)")


with st.container():
    school = st.selectbox("Select School type",("MS","GP"))
    gender = st.selectbox("Gender",("M","F"))
    age = st.slider("Age",15,22)
    address = st.selectbox("Address",("R","U"))
    famsize = st.selectbox("Family Size",("GT3","LE3"))
    Pstatus = st.selectbox("Pstatus",("T","A"))
    Medu = st.selectbox("Mother Education",(0,1,2,3,4))
    Fedu = st.selectbox("Father Education",(0,1,2,3,4))
    Mjob = st.selectbox("Mother JOb",("Teacher","at home","health","services","other"))
    Fjob = st.selectbox("Father JOb",("Teacher","at home","health","services","other"))
    reason = st.selectbox("Reason",("Reputation","Course","Home","other"))
    gaurdian = st.selectbox("Gaurdian",("Father","Mother","Other"))
    traveltime = st.selectbox("Travel Time(hrs)",(1,2,3,4))
    studytime = st.selectbox("Study Time(hrs)",(1,2,3,4))
    failures = st.selectbox("failures",(0,1,2,3))
    schoolsup = st.selectbox("School Support",("Yes","No"))
    famsup = st.selectbox("Family Support",("Yes","No"))
    paid = st.selectbox("Fee paid",("Yes","No"))
    activities = st.selectbox("Activities",("Yes","No"))
    nursery = st.selectbox("Nursery",("Yes","No"))
    higher = st.selectbox("Higher Education??",("Yes","No"))
    internet = st.selectbox("Internet",("Yes","No"))
    romantic = st.selectbox("Romantic",("Yes","No"))
    famrel = st.selectbox("Family relatives",(1,2,3,4))
    freetime = st.selectbox("Free Time(hrs)",(1,2,3,4))
    goout = st.selectbox("Vacation/Go out Time(hrs)",(1,2,3,4))
    Dalc = st.selectbox("Dalc",(1,2,3,4))
    Walc = st.selectbox("Walc",(1,2,3,4))
    health = st.selectbox("Health",(1,2,3,4))
    absences = st.slider("Days absent",0,100)

input_lst=[school,gender,age,address,famsize,Pstatus, Medu,Fedu,Mjob, Fjob, reason, gaurdian, traveltime, studytime, failures, schoolsup,famsup,paid, activities, nursery, higher, internet, romantic,famrel,freetime, goout,Dalc,Walc,health,absences]

data = pd.read_csv('student-data.csv')

le = LabelEncoder()
feature_names = data.columns.values

for name in feature_names:
  if data[name].dtype =='object':
    data[name] = le.fit_transform(data[name])

data.hist(edgecolor='black',bins = 25, figsize= (20,20))
plt.show()

# """*The `hist()` method of pandas is used to represent the histograms of each feature specified in the data frame.*"""

# plt.figure(figsize=(10,10))
# sns.heatmap(data.corr(), cmap="YlGnBu", annot=True)


X = data.drop('passed', axis=1)
y =  data.passed


# """> Splitting data for Training and Testing"""

# #splitting the dataset


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40)

# """> Training the models and Evaluating their performance

# *Since the target feature is categorical, the Machine learning models that are used to train and predict on this dataset should be of type Classification.*

# ### Random Forest Classifier

# *`Random Forest` is a classifier that contains a number of decision trees on various subsets of the given dataset and takes the average to improve the predictive accuracy of that dataset.*
# """

model = RandomForestClassifier(n_estimators=13, criterion='gini',max_depth=10, max_features='auto')
model.fit(X_train,y_train)

y_pred = model.predict(X_test)


st.write("Random Forest Classifier\'s Accuracy :",round(accuracy_score(y_test, y_pred),4))
st.write("Random Forest Classifier\'s F1 Score :",round(f1_score(y_test, y_pred),4))

X_test_input_cols = list(X.columns)
default_dict = {}
for i in range(len(X_test_input_cols)):
    default_dict[X_test_input_cols[i]] = input_lst[i]

X_input_test = pd.DataFrame(default_dict,index=[0])

for name in X_test_input_cols:
  if X_input_test[name].dtype =='object':
    X_input_test[name] = le.fit_transform(X_input_test[name])

y_input_pred = model.predict(X_input_test)
if y_input_pred[0]==1:
    st.success('The Student will not dropout 😆😆😆')
else:
    st.error('The Student will dropout 😭😭😭')