fuzzylloyd/fuzzykmeans.c · CS-PROJECTS

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <stdbool.h>
#include <mpi.h>

#include "fuzzykmeans.h"

// "v0" and "v1" are pointers, each representing an array with 2 elements.
//
// Returns the SQUARED Euclidean distance
// between the points ( v0[0] , v0[1] ) and ( v1[0] , v1[1] ).
double dist2(double *v0, double *v1)
{
    return (pow(v0[0] - v1[0], 2) + pow(v0[1] - v1[1], 2) * 1.0);
}

// Calculate the weight between data point i and center j (wij)
// "vi" is an pointer to a single data point (number i).
// "centers" is an array of length 2*k (coordinates of ALL cluster centers).
// "j" is the index of the cluster center whose weight should be calculated.
// "k" is the number of cluster centers.
//
// Returns the weight of data point vi respect to cluster j.
double weight(double *vi, double *centers, int j, int k)
{
    double sum = 0.0,
           m = 2.0,
           power = (1 / (m - 1)),
           arr[2] = {centers[2 * j], centers[(2 * j) + 1]},
           numerator = dist2(vi, arr);

    for (int l = 0; l < 2 * k; l += 2)
    {
        double c_arr[2] = {centers[l], centers[l + 1]};
        sum += pow(numerator / dist2(vi, c_arr), power);
    }

    return 1. / sum;
}

// "data" is an array of length 2*n (coordinates of data points).
// "centers" is an array of length 2*k (coordinates of cluster centers).
//
// Instead of returning anything, this function overwrites memory in the "centers" array.
// The cluster centers are updated with one iteration of fuzzy Lloyd's algorithm.
void fuzzykmeans(double *data, int n, double *centers, int k)
{
    double new_centers_num[2], new_centers[2 * k];
    double new_centers_den = 0.0, max_dist = 0.0;

    for (;;)
    {
        for (int i = 0; i < 2 * k; i += 2)
        {
            double numerator_sum[2] = {0.0, 0.0},
                   denominator_sum = 0.0;

            for (int x = 0; x < 2 * n; x += 2)
            {
                double arr[2] = {data[x], data[x + 1]},
                       w = weight(arr, centers, i / 2, k),
                       p = pow(w, 2);
                numerator_sum[0] += arr[0] * p;
                numerator_sum[1] += arr[1] * p;
                denominator_sum += p;
            }

            MPI_Allreduce(&numerator_sum[0], &new_centers_num[0], 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
            MPI_Allreduce(&numerator_sum[1], &new_centers_num[1], 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
            MPI_Allreduce(&denominator_sum, &new_centers_den, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

            new_centers[i] = new_centers_num[0] / new_centers_den;
            new_centers[i + 1] = new_centers_num[1] / new_centers_den;

            double old[2] = {centers[i], centers[i + 1]},
                   new[2] = {new_centers[i], new_centers[i + 1]},
                   distance = dist2(new, old);

            max_dist = (max_dist < distance) ? distance : max_dist;
        }

        if (max_dist < 1e-3)
            break;
        else
            max_dist = 0.0;

        for (int j = 0; j < 2 * k; j++)
            centers[j] = new_centers[j];
    }
}