clc;
close all;
clearvars;
workspace;
format long g;
format compact;
fontSize = 18;
pointsPerCluster = 150;
spread = 0.03;
offsets = [0.3, 0.5, 0.7, 0.9];
xa = spread * randn(pointsPerCluster, 1) + offsets(1);
ya = spread * randn(pointsPerCluster, 1) + offsets(1);
xb = spread * randn(pointsPerCluster, 1) + offsets(2);
yb = spread * randn(pointsPerCluster, 1) + offsets(2);
xc = spread * randn(pointsPerCluster, 1) + offsets(3);
yc = spread * randn(pointsPerCluster, 1) + offsets(3);
xd = spread * randn(pointsPerCluster, 1) + offsets(4);
yd = spread * randn(pointsPerCluster, 1) + offsets(4);
x = [xa; xb; xc; xd];
y = [ya; yb; yc; yd];
xy = [x, y];
evaluationObject = evalclusters(xy, 'kmeans', 'DaviesBouldin', 'klist', [3:10])
[assignedClass, clusterCenters] = kmeans(xy, evaluationObject.OptimalK);
clusterCenters
hfig = figure;
subplot(1, 2, 1);
gscatter(x, y, assignedClass);
legend('FontSize', fontSize, 'Location', 'northwest');
grid on;
xlabel('x', 'fontSize', fontSize);
ylabel('y', 'fontSize', fontSize);
title('Original Class Numbers Assigned by kmeans()', 'fontSize', fontSize);
hfig.WindowState = 'maximized';
distancesFromOrigin = sqrt(clusterCenters(:, 1) .^ 2 + clusterCenters(:, 2) .^2)
[sortedDistances, sortOrder] = sort(distancesFromOrigin, 'ascend')
newClassNumbers = zeros(length(x), 1);
for k = 1 : size(clusterCenters, 1)
currentClassLocations = assignedClass == k;
newClassNumber = find(k == sortOrder);
fprintf('Initially the center of cluster %d is (%.2f, %.2f), %.2f from the origin.\n', ...
k, clusterCenters(k), clusterCenters(k), distancesFromOrigin(k));
fprintf(' Relabeling all points in initial cluster #%d to cluster #%d.\n', k, newClassNumber);
newClassNumbers(currentClassLocations) = newClassNumber;
end
subplot(1, 2, 2);
gscatter(x, y, newClassNumbers);
grid on;
xlabel('x', 'fontSize', fontSize);
ylabel('y', 'fontSize', fontSize);
title('New Class Numbers', 'fontSize', fontSize);
legend('FontSize', fontSize, 'Location', 'northwest');
fprintf('Now, after relabeling:\n');
for k = 1 : size(clusterCenters, 1)
currentClassLocations = newClassNumbers == k;
meanx(k) = mean(x(currentClassLocations));
meany(k) = mean(y(currentClassLocations));
fprintf('The center of cluster %d is (%.2f, %.2f).\n', k, meanx(k), meany(k));
end