Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # importing libraries
- library (factoextra)
- library (cluster)
- library("ggpubr")
- # importing data
- data <- read.csv("C:\\Users\\ramta\\Desktop\\Kvartiry_Ufa.csv")
- # viewing head of data
- head(data)
- # droping X column, cuz it is repeating index, which needless
- data = subset(data, select = -c(X))
- #checking head of data again
- head(data)
- # removing rows with missing values to performance clustering
- data <- na.omit(data)
- # plotting data to see the spread and correlation
- plot(data)
- # data is too big, plotting specific columns
- plot(data$total_area, data$last_price)
- points(data$citycenter_near, data$last_price, col="red")
- points(data$parks_around3, data$last_price, col="green")
- points(data$living_area, data$last_price, col="blue")
- # take a closer look to column citycenter_near
- plot(data$citycenter_near, data$last_price, col="red")
- # scale each variable to have a mean of 0 and sd of 1
- data <- scale(data)
- # view head of scaled data
- head(data)
- #make this example reproducible
- set.seed(1)
- # figuring out optimal numbers of clusters
- fviz_nbclust(data, kmeans, nstart=10, method = "wss")
- # distortion decreasing at point 8
- # perform k-means clustering with k = 8 clusters
- km <- kmeans(data, centers = 4, nstart = 25)
- # view results
- print(km)
- #plot results of final k-means model
- fviz_cluster(km, data = data)
- km$centers
- plot(data, col=km$cluster)
- points(km$centers, col=1:4, pch=8)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement