Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## Importing packages
- # This R environment comes with all of CRAN and many other helpful packages preinstalled.
- # You can see which packages are installed by checking out the kaggle/rstats docker image:
- # https://github.com/kaggle/docker-rstats
- library(tidyverse) # metapackage with lots of helpful functions
- ## Running code
- # In a notebook, you can run a single code cell by clicking in the cell and then hitting
- # the blue arrow to the left, or by clicking in the cell and pressing Shift+Enter. In a script,
- # you can run code by highlighting the code you want to run and then clicking the blue arrow
- # at the bottom of this window.
- ## Reading in files
- # You can access files from datasets you've added to this kernel in the "../input/" directory.
- # You can see the files added to this kernel by running the code below.
- empset <- read.csv("../input/MFG10YearTerminationData.csv")
- ## Saving data
- # If you save any files or images, these will be put in the "output" directory. You
- # can see the output directory by committing and running your kernel (using the
- # Commit & Run button) and then checking out the compiled version of your kernel.
- #setwd("D:/Kriti/practice/employee-attrition-data")
- #empset<-read.csv("D:/Kriti/practice/employee-attrition-data/MFG10YearTerminationData.csv")
- str(empset)
- #View(empset)
- summary(empset)
- library(tidyr)
- library(ggplot2)
- library(dplyr)
- library(plyr)
- statusCount<-as.data.frame.matrix(empset %>%
- group_by(STATUS_YEAR) %>%
- select(STATUS) %>%
- table())
- statusCount$Total<-statusCount$ACTIVE+statusCount$TERMINATED
- statusCount$PercentTerminate<-statusCount$TERMINATED/(statusCount$Total)*100
- statusCount
- mean(statusCount$PercentTerminate)
- ggplot(empset)+geom_bar(aes(x=BUSINESS_UNIT, fill=STATUS))
- TerminateData<- empset %>% filter(STATUS=="TERMINATED")
- ggplot(TerminateData)+geom_bar(aes(x=STATUS_YEAR, fill=termtype_desc))
- ggplot(TerminateData)+geom_bar(aes(x=STATUS_YEAR, fill=termreason_desc))
- ggplot(TerminateData)+geom_bar(aes(x=as.factor(department_name), fill=as.factor(termreason_desc)))+
- theme(axis.text.x = element_text(angle=90, hjust=1,vjust=0.5))
- library(caret)
- featurePlot(x=empset[,6:7],y=empset$STATUS,plot="density",auto.key=list(columns=2))
- featurePlot(x=empset[,6:7],y=empset$STATUS,plot="box",auto.key=list(columns=2))
- library(rattle)
- library(magrittr)
- library(randomForest)
- crv$seed=42
- set.seed(crv$seed)
- empNum<-nrow(empset)
- empTrain<-subset(empset,STATUS_YEAR<2015)
- empTest<-subset(empset,STATUS_YEAR==2015)
- MYinput= c("age", "length_of_service", "gender_full", "STATUS_YEAR", "BUSINESS_UNIT")
- MYnumeric= c("age", "length_of_service", "STATUS_YEAR")
- MYcategoric = c("gender_full", "BUSINESS_UNIT")
- MYtarget= "STATUS"
- MYident = "EmployeeID"
- MYTrainingData<-empTrain[c(MYinput, MYtarget)]
- MYTestData<-empTest[c(MYinput, MYtarget)]
- library(rpart)
- myrpart<-rpart(STATUS~.,
- data=empTrain[,c(MYinput,MYtarget)],
- method="class",
- parms = list(split="information"),
- control=rpart.control(usesurrogate = 0, maxsurrogate = 0))
- myrpart
- print(myrpart)
- printcp(myrpart)
- fancyRpartPlot(myrpart, main="Decision Tree MFG10YearTerminationData $ STATUS")
- #predict using decision train
- mydt<-predict(myrpart, data=empTest[c(MYinput, MYtarget)], type="class")
- #confusion matrix to check
- #confusionMatrix(data=mydt, reference = empTest$STATUS,positive = "Yes", mode = "prec_recall")
- #error
- #table(empTest[c(MYinput,MYtarget)]$STATUS, mydt, dnn = c("Actual","Predicted"))
- set.seed(crv$seed)
- myrf<-randomForest(STATUS~.,
- data=empTrain[,c(MYinput,MYtarget)],
- ntree=500,
- mtry=2,
- importance=TRUE,
- na.action = randomForest::na.roughfix,
- replace=FALSE)
- myrf
- pROC::roc(myrf$y,as.numeric(myrf$predicted))
- pROC::ci.auc(myrf$y,as.numeric(myrf$predicted))
- rn<-round(randomForest::importance(myrf),2)
- rn[order(rn[,3],decreasing = TRUE),]
- varImpPlot(myrf, type = 1, main="Variable importance", sub="random forest model")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement