Advertisement
amu2002

salescsv

Nov 20th, 2023
40
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.93 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """ML_P6.ipynb
  3.  
  4. Automatically generated by Colaboratory.
  5.  
  6. Original file is located at
  7.    https://colab.research.google.com/drive/1jcUxUUvU0j7DFv3weJK1HZxEMyspw3pn
  8. """
  9.  
  10. import numpy as np
  11. import pandas as pd
  12. import os
  13. import seaborn as sns
  14. import matplotlib.pyplot as plt
  15. from sklearn.cluster import KMeans
  16.  
  17. df = pd.read_csv("./sales_data_sample.csv",encoding='latin')
  18.  
  19. df.head()
  20.  
  21. df.describe()
  22.  
  23. df.shape
  24.  
  25. df = df[['QUANTITYORDERED', 'ORDERLINENUMBER']]
  26. df = df.dropna(axis = 0)
  27. wcss = []
  28.  
  29. for i in range(1, 11):
  30.     clustering = KMeans(n_clusters=i, init='k-means++',n_init=10, random_state=42)
  31.     clustering.fit(df)
  32.     wcss.append(clustering.inertia_)
  33.  
  34. ks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  35. sns.lineplot(x = ks, y = wcss);
  36.  
  37. fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15,5))
  38. sns.scatterplot(ax=axes[0], data=df, x='QUANTITYORDERED', y='ORDERLINENUMBER').set_title('Without clustering')
  39. sns.scatterplot(ax=axes[1], data=df, x='QUANTITYORDERED', y='ORDERLINENUMBER', hue=clustering.labels_).set_title('Using the elbow method');
  40.  
  41. df.describe().T
  42.  
  43. from sklearn.preprocessing import StandardScaler
  44.  
  45. ss = StandardScaler()
  46. scaled = ss.fit_transform(df)
  47. wcss_sc = []
  48.  
  49. for i in range(1, 11):
  50.     clustering_sc = KMeans(n_clusters=i, init='k-means++',n_init=10, random_state=42)
  51.     clustering_sc.fit(scaled)
  52.     wcss_sc.append(clustering_sc.inertia_)
  53.  
  54. ks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  55. sns.lineplot(x = ks, y = wcss_sc);
  56.  
  57. fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15,5))
  58. sns.scatterplot(ax=axes[0], data=df, x='QUANTITYORDERED', y='ORDERLINENUMBER').set_title('Without cliustering')
  59. sns.scatterplot(ax=axes[1], data=df, x='QUANTITYORDERED', y='ORDERLINENUMBER', hue=clustering.labels_).set_title('With the Elbow method')
  60. sns.scatterplot(ax=axes[2], data=df, x='QUANTITYORDERED', y='ORDERLINENUMBER', hue=clustering_sc.labels_).set_title('With the Elbow method and scaled data');
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement