#include <stdio.h> #include <math.h> #include <stdlib.h> #include <stdbool.h> #include <mpi.h> #include "fuzzykmeans.h" // "v0" and "v1" are pointers, each representing an array with 2 elements. // // Returns the SQUARED Euclidean distance // between the points ( v0[0] , v0[1] ) and ( v1[0] , v1[1] ). double dist2(double *v0, double *v1) { return (pow(v0[0] - v1[0], 2) + pow(v0[1] - v1[1], 2) * 1.0); } // Calculate the weight between data point i and center j (wij) // "vi" is an pointer to a single data point (number i). // "centers" is an array of length 2*k (coordinates of ALL cluster centers). // "j" is the index of the cluster center whose weight should be calculated. // "k" is the number of cluster centers. // // Returns the weight of data point vi respect to cluster j. double weight(double *vi, double *centers, int j, int k) { double sum = 0.0, m = 2.0, power = (1 / (m - 1)), arr[2] = {centers[2 * j], centers[(2 * j) + 1]}, numerator = dist2(vi, arr); for (int l = 0; l < 2 * k; l += 2) { double c_arr[2] = {centers[l], centers[l + 1]}; sum += pow(numerator / dist2(vi, c_arr), power); } return 1. / sum; } // "data" is an array of length 2*n (coordinates of data points). // "centers" is an array of length 2*k (coordinates of cluster centers). // // Instead of returning anything, this function overwrites memory in the "centers" array. // The cluster centers are updated with one iteration of fuzzy Lloyd's algorithm. void fuzzykmeans(double *data, int n, double *centers, int k) { double new_centers_num[2], new_centers[2 * k]; double new_centers_den = 0.0, max_dist = 0.0; for (;;) { for (int i = 0; i < 2 * k; i += 2) { double numerator_sum[2] = {0.0, 0.0}, denominator_sum = 0.0; for (int x = 0; x < 2 * n; x += 2) { double arr[2] = {data[x], data[x + 1]}, w = weight(arr, centers, i / 2, k), p = pow(w, 2); numerator_sum[0] += arr[0] * p; numerator_sum[1] += arr[1] * p; denominator_sum += p; } MPI_Allreduce(&numerator_sum[0], &new_centers_num[0], 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&numerator_sum[1], &new_centers_num[1], 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&denominator_sum, &new_centers_den, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); new_centers[i] = new_centers_num[0] / new_centers_den; new_centers[i + 1] = new_centers_num[1] / new_centers_den; double old[2] = {centers[i], centers[i + 1]}, new[2] = {new_centers[i], new_centers[i + 1]}, distance = dist2(new, old); max_dist = (max_dist < distance) ? distance : max_dist; } if (max_dist < 1e-3) break; else max_dist = 0.0; for (int j = 0; j < 2 * k; j++) centers[j] = new_centers[j]; } }