Advertisement
makispaiktis

ML - my_kmeans (Epsilon for K=3 only)

Oct 12th, 2022 (edited)
828
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
MatLab 6.02 KB | None | 0 0
  1. clear all
  2. close all
  3. clc
  4.  
  5.  
  6. % *************************************************************************
  7. % *************************************************************************
  8.  
  9. % Data - Generate random 2-D points
  10. Ns = [2, 3, 5] * 10;
  11. means = [0.5, 1, 1.5];
  12. stds = [0.3, 0.4, 0.5];
  13. LEN = length(Ns);
  14. N = sum(Ns);                                    % Total # of points
  15. [x, y] = generatePoints(Ns, means, stds);
  16. N = 500;
  17. x = rand(1, N);
  18. y = rand(1, N);
  19. points = [x; y];
  20. colors = ["green", "blue", "yellow", "magenta", "red"];
  21.  
  22.  
  23.  
  24.  
  25. % *************************************************************************
  26. % *************************************************************************
  27.  
  28. % Algorithm's Initialization
  29. K = 3;
  30. min_x = min(x);
  31. max_x = max(x);
  32. min_y = min(y);
  33. max_y = max(y);
  34. % x's = 1st row       y's = 2nd row
  35. % Column 'i' contains the coordinates of i-th element/point
  36. centroids_x = min_x + rand(1, K) * (max_x - min_x);
  37. centroids_y = min_y + rand(1, K) * (max_y - min_y);
  38. centroids = [centroids_x; centroids_y];
  39. centroids_x = [points(1, 1) points(1, floor(N/2)) points(1, N)];
  40. centroids_y = [points(2, 1) points(2, floor(N/2)) points(2, N)];
  41. centroids = [centroids_x; centroids_y];
  42. figure();
  43. plot(x, y, 'bo');
  44. title("Iteration 0 - Random centroids");
  45. xlabel("x");
  46. ylabel("y");
  47. hold on
  48. plot(centroids_x, centroids_y, 'r+', 'MarkerSize', 10);
  49.  
  50.  
  51.  
  52. % *************************************************************************
  53. % *************************************************************************
  54.  
  55. % Generate 2 useful matrices: 'distances' = K x N, where K = # of centroids
  56. % and N = # of points, so each column 'j' contains the distance of j-th
  57. % point with each of the k centroids
  58. distances = zeros(K, N);
  59. % And 'groups' = 1 x N, where each element is 1, 2, ..., or 'K' and shows
  60. % to which group-centroid the element belongs
  61. groups = zeros(1, N);
  62.  
  63. % First, I have to calculate the distances matrix between the existing
  64. % centroids and the given random points
  65. for n = 1 : N
  66.     point = points(:, n);
  67.     for k = 1 : K
  68.         centroid = centroids(:, k);
  69.         distances(k, n) = distance(centroid, point);
  70.     end
  71.     % After the completion of n-th column, I can determine the index of
  72.     % centroid this item belongs to
  73.     column = distances(:, n);
  74.     [MIN, index] = min(column);
  75.     groups(n) = index;
  76. end
  77. display('***********************************************************');
  78. round = 0;
  79. disp("        Iteration " + num2str(round));
  80. centroids
  81. % disp("Centroids = " + mat2str(centroids));
  82. display(' ');
  83. display(' ');
  84. groups;
  85.  
  86.  
  87.  
  88.  
  89.  
  90.  
  91. % *************************************************************************
  92. % *************************************************************************
  93.  
  94. % Iterations with for-loop
  95. MAX_ROUND = 10;
  96. for round = 1 : MAX_ROUND
  97.    
  98.     display('***********************************************************');
  99.     disp("        Iteration " + num2str(round));
  100.     round;
  101.     points_around_centroid = zeros(1, K);
  102.     coord_around_centroid = zeros(2, K);
  103.     % This vector will contain the indeces of points that belong to centroid 'k'
  104.     % I have to find out the new centroids based on the 'groups' vector
  105.     for n = 1 : N
  106.         points_around_centroid(groups(n)) = points_around_centroid(groups(n)) + 1;
  107.         coord_around_centroid(1, groups(n)) = coord_around_centroid(1, groups(n)) + points(1, n);
  108.         coord_around_centroid(2, groups(n)) = coord_around_centroid(2, groups(n)) + points(2, n);
  109.     end
  110.     points_around_centroid;
  111.     coord_around_centroid;
  112.     % Now, my 2 vectors contain for each 'k': the sum of coordinates of the
  113.     % points around this k-th centroid and the number of these points
  114.     for k = 1 : K
  115.         centroids(1, k) = coord_around_centroid(1, k) / points_around_centroid(k);
  116.         centroids(2, k) = coord_around_centroid(2, k) / points_around_centroid(k);
  117.     end
  118.     centroids;
  119.     % Maybe in some iterations a centroid has no corresponding points
  120.     % around it, so:
  121.     for k = 1 : K
  122.         if isnan(centroids(1, k)) == 1 || isnan(centroids(2, k)) == 1
  123.             disp("NaN for centroid " + num2str(k));
  124.             centroids(1, k) = points(1, floor(N/2));
  125.             centroids(2, k) = points(2, floor(N/2));
  126.         end
  127.     end
  128.     centroids
  129.    
  130.    
  131.    
  132.    
  133.    
  134.    
  135.    
  136.    
  137.    
  138.     % Now, new centroids have determined - Update 'distances', 'groups'
  139.     for n = 1 : N
  140.         point = points(:, n);
  141.         for k = 1 : K
  142.             centroid = centroids(:, k);
  143.             distances(k, n) = distance(centroid, point);
  144.         end
  145.         column = distances(:, n);
  146.         [MIN, index] = min(column);
  147.         groups(n) = index;
  148.     end
  149.    
  150.     % disp("Centroids = " + mat2str(centroids));
  151.     display('***********************************************************');
  152.     display(' ');
  153.     display(' ');
  154.    
  155.    
  156.    
  157.    
  158.    
  159.     figure();
  160.     for n = 1 : N
  161.         cluster_k = groups(n);
  162.         plot(points(1, n), points(2, n), 'o', 'Color', colors(cluster_k));
  163.         hold on
  164.     end
  165.     plot(centroids(1, :), centroids(2, :),  'r+', 'MarkerSize', 10);
  166.    
  167. end
  168.  
  169.  
  170.  
  171.  
  172.  
  173.  
  174.  
  175.  
  176. % *************************************************************************
  177. % *************************************************************************
  178.  
  179. % Auxiliary Functions
  180. function plot_spots(x, y, TITLE)
  181.     figure();
  182.     plot(x, y, 'bo');
  183.     title(TITLE);
  184.     xlabel("x");
  185.     ylabel("y");
  186. end
  187.  
  188. function [x, y] = generatePoints(Ns, means, stds)
  189.     if length(Ns) ~= length(means) || length(Ns) ~= length(stds) || length(means) ~= length(stds)
  190.         x = linspace(0, 3, 4);
  191.         y = linspace(0, 3, 4);
  192.         display('Error');
  193.     end
  194.     x = [];
  195.     y = [];
  196.     for i = 1 : length(Ns)
  197.         temp_x = rand(1, Ns(i)) * stds(i) + means(i);
  198.         temp_y = rand(1, Ns(i)) * stds(i) + means(i);
  199.         x = [x temp_x];
  200.         y = [y temp_y];
  201.     end
  202. end
  203.  
  204.  
  205. function d = distance(p1, p2)
  206.     % p1 = [2 3]'    p2 = [1 4]'
  207.     d = sqrt((p1(1) - p2(1))^2 + (p1(2) - p2(2))^2);
  208. end
  209.  
  210.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement