Advertisement
backlight0815

Untitled

Aug 14th, 2022
204
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.48 KB | None | 0 0
  1. library(tidyr)
  2. library(ggplot2)
  3. library(dplyr)
  4. library(tidyverse) #
  5.  
  6.  
  7. #import the data
  8. employee <- read.csv(file="c:\\Users\\Asus\\OneDrive\\Desktop\\employee_attrition.csv",header=TRUE,sep=",")
  9. employee
  10. View(employee)
  11.  
  12.  
  13. #Data exploration
  14. head(employee,10) #for first 10 lines
  15. tail(employee,10) #for last 10 lines
  16. str(employee) #displaying the internal structure of a datassets
  17. class(employee) #To check the class of the datasets
  18. dim(employee) #To check the dataset number of row and column
  19. names(employee) #To check the dataset column title
  20. summary(employee) #See the summary of the dataset
  21. View(summary(employee)) #To see the summary of the dataset using table format
  22.  
  23.  
  24. #Data Cleaning
  25. employee$gender_full <-NULL
  26. employee$birthdate_key <- NULL
  27. #Change the entire dataset from Not Applicable to NA
  28. Employee_Dataset<- employee[employee == "Not Applicable"] <- NA
  29. View(Employee_Dataset)
  30. #Remove the duplicate data by EmployeeID
  31. Employee_Dataset<-employee[!rev(duplicated(rev(employee$`EmployeeID`))),]
  32. #Remove the NA Value in the dataset
  33. Employee_Attrition<-na.omit(Employee_Dataset)
  34. rownames(Employee_Attrition) <- 1:nrow(Employee_Attrition)
  35. View(Employee_Attrition)
  36. #Data Processing
  37. names(Employee_Attrition)=c("Employee_ID(PK)","Record_Date","Hire_Date","Termination_Date","Age",
  38. "Length_of_Service","City","Department","Job_Title","Store_Number","Gender",
  39. "Term_desc","Type_term","Status_Year","Status","Business_unit")
  40. names(Employee_Attrition)
  41.  
  42.  
  43.  
  44. View(Employee_Dataset)
  45. #how data stores
  46.  
  47.  
  48.  
  49.  
  50. statusCount<-as.data.frame.matrix(Employee_Attrition %>%
  51. group_by(Status_Year) %>%
  52. select(Status) %>%
  53. table())
  54. View(statusCount)
  55.  
  56.  
  57. statusCount$Terminated<-statusCount$TERMINATED/1485*100
  58.  
  59.  
  60. statusCount
  61. View(statusCount)
  62.  
  63. mean(statusCount$TERMINATED)
  64. #Analysis part
  65.  
  66.  
  67. TerminateData<- Employee_Attrition %>% filter(Status=="TERMINATED")
  68. #Status Year and Termination Description
  69. ggplot(TerminateData)+geom_bar(aes(x=Status_Year, fill=Term_desc))+ labs(title="Status of termination", x="Status of year", y="Number of employee")
  70.  
  71. ggplot(TerminateData)+geom_bar(aes(x=Length_of_Service, fill=Status))
  72. #Analysis 1-1
  73. TerminateData_Department <-Employee_Attrition %>%
  74. filter(Status_Year >=2014 &Term_desc=="Layoff")
  75. #Department and Termination Description
  76. ggplot(TerminateData_Department)+geom_bar(aes(x=as.factor(Department),
  77. fill=as.factor(Term_desc)))+labs(title="Department Statistic", x="Department", y="Number of employee")+
  78. theme(axis.text.x = element_text(angle=90, hjust=1,vjust=0.5))
  79.  
  80. Layoff = TerminateData_Department[(TerminateData_Department$Term_desc=="Layoff")
  81.  
  82. &(TerminateData_Department$Status_Year>=2014),]
  83. #To show the number of layoff during 2014-2015
  84. nrow(Layoff)
  85.  
  86. #Analysis 1-2
  87. #Age and Length_of_Services
  88. #TerminateData_Services <- Employee_Attrition %>%
  89. # filter(Status_Year>=2014 &Term_desc=="Layoff")
  90.  
  91. ggplot(aes(x=Age,y=Length_of_Service),data=TerminateData_Services)+
  92. geom_point()+
  93. xlim(20,67)+
  94. ylim(1,28)
  95.  
  96. #To check average of age and length of services during 2014-2015
  97. TerminateData_Services<-select(Employee_Attrition,Age,Length_of_Service) %>%
  98. filter(Employee_Attrition$Status_Year>=2014, Employee_Attrition$Term_desc=="Layoff")
  99.  
  100. mean(TerminateData_Services$Length_of_Service) #11.94
  101. mean(TerminateData_Services$Age) #40.80
  102. View(Employee_Attrition)
  103. #Analysis 1-3
  104. TerminateData_Gender<-select(Employee_Attrition,Gender,Length_of_Service) %>%
  105. filter(Employee_Attrition$Status_Year>=2014,
  106. Employee_Attrition$Term_desc=="Layoff")
  107.  
  108. ggplot(TerminateData_Gender,aes(y=Length_of_Service,x=Gender))+geom_boxplot()
  109.  
  110. ###Analysis 1.3 (Last part)
  111. #To show how many female being terminated
  112. Female<-TerminateData_Gender[(TerminateData_Gender$Gender=="F"),]
  113. rownames(Female) <- 1:nrow(Female)
  114. nrow(Female)
  115. max(Female$Length_of_Service)#25
  116. min(Female$Length_of_Service)#1
  117. mean(Female$Length_of_Service)#11.522
  118.  
  119. #To calculate all the female contribute how many length of services
  120. Total_Female<-sum(TerminateData_Gender[which(TerminateData_Gender$Gender=="F"),2])
  121. Female_Dataset<- subset(TerminateData_Gender,TerminateData_Gender$Gender=="F")
  122. rownames(Female_Dataset) <- 1:nrow(Female_Dataset)
  123. Female_Length_of_Service<-head(Female_Dataset,1)
  124. Female_Length_of_Service$Length_of_Service <- NULL
  125. Total_Female_Leng_Of_Service<-mutate(Female_Length_of_Service,Total_Length_of_Service=Total_Female)
  126. View(Total_Female_Leng_Of_Service)
  127.  
  128.  
  129. Male=TerminateData_Gender[(TerminateData_Gender$Gender=="M"),]
  130. rownames(Male) <- 1:nrow(Male)
  131. nrow(Male)
  132. max(Male$Length_of_Service)#25
  133. min(Male$Length_of_Service)#2
  134. mean(Male$Length_of_Service)#12.42
  135.  
  136. Total_Male<-sum(TerminateData_Gender[which(TerminateData_Gender$Gender=="M"),2])
  137.  
  138.  
  139. Male_Dataset<- subset(TerminateData_Gender,TerminateData_Gender$Gender=="M")
  140. rownames(Male_Dataset) <- 1:nrow(Male_Dataset)
  141. Male_Length_of_Service<-head(Male_Dataset,1)
  142. Male_Length_of_Service$Length_of_Service <- NULL
  143. Total_Male_Leng_Of_Service<-mutate(Male_Length_of_Service,Total_Length_of_Service=Total_Male)
  144. View(Total_Male_Leng_Of_Service)
  145.  
  146.  
  147. ggplot(TerminateData)+geom_bar(aes(x=as.factor(Department), fill=as.factor(Term_desc)))+
  148. theme(axis.text.x = element_text(angle=90, hjust=1,vjust=0.5))
  149.  
  150.  
  151.  
  152.  
  153. AgeData<- Employee_Attrition %>% filter(Age <40 & Status=="TERMINATED")
  154. #Age and Termination Description
  155.  
  156. ggplot(AgeData)+geom_bar(aes(x=Age, fill=Term_desc))
  157. #Age and Length of service
  158. ggplot(data=Employee_Attrition,aes(x=Age,y=Length_of_Service)) + geom_line()
  159. #Scatter plot
  160.  
  161. #To show older will work how long for the services
  162. #Age and Length of service
  163.  
  164. ggplot(Employee_Attrition, aes(x=Age,y=Length_of_Service, color=Age)) + geom_point() + xlim(10,60)+ ylim(0,25)
  165.  
  166. Employee_Dataset %>% filter(Age>20) %>% ggplot(aes(x=Length_of_Service)) + geom_density(fill="#69b3a2", color="#e9ecef", alpha=0.8)
  167.  
  168. Female_Terminated <- Employee_Attrition %>% filter(Status=="TERMINATED")
  169.  
  170. #Why many man gender to being termination
  171. #Gender and length of services
  172. p <- ggplot(Female_Terminated, aes(x= Length_of_Service, y = Gender))
  173. p <- p + geom_boxplot()
  174.  
  175. p <- p + theme_classic()
  176. p <- p + coord_flip()
  177. p <- p + labs(title = "Female Active Statistic")
  178.  
  179. p
  180.  
  181.  
  182.  
  183. bp
  184. nrow(Employee_Attrition)
  185. nrow(Employee_Attrition,Gender="F")
  186.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement