Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(tidyr)
- library(ggplot2)
- library(dplyr)
- library(tidyverse) #
- #import the data
- employee <- read.csv(file="c:\\Users\\Asus\\OneDrive\\Desktop\\employee_attrition.csv",header=TRUE,sep=",")
- employee
- View(employee)
- #Data exploration
- head(employee,10) #for first 10 lines
- tail(employee,10) #for last 10 lines
- str(employee) #displaying the internal structure of a datassets
- class(employee) #To check the class of the datasets
- dim(employee) #To check the dataset number of row and column
- names(employee) #To check the dataset column title
- summary(employee) #See the summary of the dataset
- View(summary(employee)) #To see the summary of the dataset using table format
- #Data Cleaning
- employee$gender_full <-NULL
- employee$birthdate_key <- NULL
- #Change the entire dataset from Not Applicable to NA
- Employee_Dataset<- employee[employee == "Not Applicable"] <- NA
- View(Employee_Dataset)
- #Remove the duplicate data by EmployeeID
- Employee_Dataset<-employee[!rev(duplicated(rev(employee$`EmployeeID`))),]
- #Remove the NA Value in the dataset
- Employee_Attrition<-na.omit(Employee_Dataset)
- rownames(Employee_Attrition) <- 1:nrow(Employee_Attrition)
- View(Employee_Attrition)
- #Data Processing
- names(Employee_Attrition)=c("Employee_ID(PK)","Record_Date","Hire_Date","Termination_Date","Age",
- "Length_of_Service","City","Department","Job_Title","Store_Number","Gender",
- "Term_desc","Type_term","Status_Year","Status","Business_unit")
- names(Employee_Attrition)
- View(Employee_Dataset)
- #how data stores
- statusCount<-as.data.frame.matrix(Employee_Attrition %>%
- group_by(Status_Year) %>%
- select(Status) %>%
- table())
- View(statusCount)
- statusCount$Terminated<-statusCount$TERMINATED/1485*100
- statusCount
- View(statusCount)
- mean(statusCount$TERMINATED)
- #Analysis part
- TerminateData<- Employee_Attrition %>% filter(Status=="TERMINATED")
- #Status Year and Termination Description
- ggplot(TerminateData)+geom_bar(aes(x=Status_Year, fill=Term_desc))+ labs(title="Status of termination", x="Status of year", y="Number of employee")
- ggplot(TerminateData)+geom_bar(aes(x=Length_of_Service, fill=Status))
- #Analysis 1-1
- TerminateData_Department <-Employee_Attrition %>%
- filter(Status_Year >=2014 &Term_desc=="Layoff")
- #Department and Termination Description
- ggplot(TerminateData_Department)+geom_bar(aes(x=as.factor(Department),
- fill=as.factor(Term_desc)))+labs(title="Department Statistic", x="Department", y="Number of employee")+
- theme(axis.text.x = element_text(angle=90, hjust=1,vjust=0.5))
- Layoff = TerminateData_Department[(TerminateData_Department$Term_desc=="Layoff")
- &(TerminateData_Department$Status_Year>=2014),]
- #To show the number of layoff during 2014-2015
- nrow(Layoff)
- #Analysis 1-2
- #Age and Length_of_Services
- #TerminateData_Services <- Employee_Attrition %>%
- # filter(Status_Year>=2014 &Term_desc=="Layoff")
- ggplot(aes(x=Age,y=Length_of_Service),data=TerminateData_Services)+
- geom_point()+
- xlim(20,67)+
- ylim(1,28)
- #To check average of age and length of services during 2014-2015
- TerminateData_Services<-select(Employee_Attrition,Age,Length_of_Service) %>%
- filter(Employee_Attrition$Status_Year>=2014, Employee_Attrition$Term_desc=="Layoff")
- mean(TerminateData_Services$Length_of_Service) #11.94
- mean(TerminateData_Services$Age) #40.80
- View(Employee_Attrition)
- #Analysis 1-3
- TerminateData_Gender<-select(Employee_Attrition,Gender,Length_of_Service) %>%
- filter(Employee_Attrition$Status_Year>=2014,
- Employee_Attrition$Term_desc=="Layoff")
- ggplot(TerminateData_Gender,aes(y=Length_of_Service,x=Gender))+geom_boxplot()
- ###Analysis 1.3 (Last part)
- #To show how many female being terminated
- Female<-TerminateData_Gender[(TerminateData_Gender$Gender=="F"),]
- rownames(Female) <- 1:nrow(Female)
- nrow(Female)
- max(Female$Length_of_Service)#25
- min(Female$Length_of_Service)#1
- mean(Female$Length_of_Service)#11.522
- #To calculate all the female contribute how many length of services
- Total_Female<-sum(TerminateData_Gender[which(TerminateData_Gender$Gender=="F"),2])
- Female_Dataset<- subset(TerminateData_Gender,TerminateData_Gender$Gender=="F")
- rownames(Female_Dataset) <- 1:nrow(Female_Dataset)
- Female_Length_of_Service<-head(Female_Dataset,1)
- Female_Length_of_Service$Length_of_Service <- NULL
- Total_Female_Leng_Of_Service<-mutate(Female_Length_of_Service,Total_Length_of_Service=Total_Female)
- View(Total_Female_Leng_Of_Service)
- Male=TerminateData_Gender[(TerminateData_Gender$Gender=="M"),]
- rownames(Male) <- 1:nrow(Male)
- nrow(Male)
- max(Male$Length_of_Service)#25
- min(Male$Length_of_Service)#2
- mean(Male$Length_of_Service)#12.42
- Total_Male<-sum(TerminateData_Gender[which(TerminateData_Gender$Gender=="M"),2])
- Male_Dataset<- subset(TerminateData_Gender,TerminateData_Gender$Gender=="M")
- rownames(Male_Dataset) <- 1:nrow(Male_Dataset)
- Male_Length_of_Service<-head(Male_Dataset,1)
- Male_Length_of_Service$Length_of_Service <- NULL
- Total_Male_Leng_Of_Service<-mutate(Male_Length_of_Service,Total_Length_of_Service=Total_Male)
- View(Total_Male_Leng_Of_Service)
- ggplot(TerminateData)+geom_bar(aes(x=as.factor(Department), fill=as.factor(Term_desc)))+
- theme(axis.text.x = element_text(angle=90, hjust=1,vjust=0.5))
- AgeData<- Employee_Attrition %>% filter(Age <40 & Status=="TERMINATED")
- #Age and Termination Description
- ggplot(AgeData)+geom_bar(aes(x=Age, fill=Term_desc))
- #Age and Length of service
- ggplot(data=Employee_Attrition,aes(x=Age,y=Length_of_Service)) + geom_line()
- #Scatter plot
- #To show older will work how long for the services
- #Age and Length of service
- ggplot(Employee_Attrition, aes(x=Age,y=Length_of_Service, color=Age)) + geom_point() + xlim(10,60)+ ylim(0,25)
- Employee_Dataset %>% filter(Age>20) %>% ggplot(aes(x=Length_of_Service)) + geom_density(fill="#69b3a2", color="#e9ecef", alpha=0.8)
- Female_Terminated <- Employee_Attrition %>% filter(Status=="TERMINATED")
- #Why many man gender to being termination
- #Gender and length of services
- p <- ggplot(Female_Terminated, aes(x= Length_of_Service, y = Gender))
- p <- p + geom_boxplot()
- p <- p + theme_classic()
- p <- p + coord_flip()
- p <- p + labs(title = "Female Active Statistic")
- p
- bp
- nrow(Employee_Attrition)
- nrow(Employee_Attrition,Gender="F")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement