Advertisement
korenizla

Untitled

Feb 5th, 2023 (edited)
2,281
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.42 KB | None | 0 0
  1. # importing libraries
  2. library (factoextra)
  3. library (cluster)
  4. library("ggpubr")
  5.  
  6. # importing data
  7. data <- read.csv("C:\\Users\\ramta\\Desktop\\Kvartiry_Ufa.csv")
  8.  
  9. # viewing head of data
  10. head(data)
  11.  
  12. # droping X column, cuz it is repeating index, which needless
  13. data = subset(data, select = -c(X))
  14.  
  15. #checking head of data again
  16. head(data)
  17.  
  18. # removing rows with missing values to performance clustering
  19. data <- na.omit(data)
  20.  
  21. # plotting data to see the spread and correlation
  22. plot(data)
  23.  
  24. # data is too big, plotting specific columns
  25. plot(data$total_area, data$last_price)
  26. points(data$citycenter_near, data$last_price, col="red")
  27. points(data$parks_around3, data$last_price, col="green")
  28. points(data$living_area, data$last_price, col="blue")
  29.  
  30. # take a closer look to column citycenter_near
  31. plot(data$citycenter_near, data$last_price, col="red")
  32.  
  33. # scale each variable to have a mean of 0 and sd of 1
  34. data <- scale(data)
  35.  
  36. # view head of scaled data
  37. head(data)
  38.  
  39. #make this example reproducible
  40. set.seed(1)
  41.  
  42. # figuring out optimal numbers of clusters
  43. fviz_nbclust(data, kmeans, nstart=10, method = "wss")
  44.  
  45. # distortion decreasing at point 8
  46. # perform k-means clustering with k = 8 clusters
  47. km <- kmeans(data, centers = 4, nstart = 25)
  48.  
  49. # view results
  50. print(km)
  51.  
  52. #plot results of final k-means model
  53. fviz_cluster(km, data = data)
  54.  
  55. km$centers
  56.  
  57. plot(data, col=km$cluster)
  58. points(km$centers, col=1:4, pch=8)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement