computing-systems-212 / Lab 4: Optimizing Caches / task5 / task5.c
task5.c
Raw
#include <stdlib.h>

// WARNING: this file MUST NOT contain any definitions of main()
// WARNING: otherwise the file may fail the autograder and result in 0 marks.

#define DIM_J 30
#define DIM_L 50
#define DIM_N 40
#define DIM_I 25
#define DIM_K 32
#define DIM_M 20

#define T 8

void func2(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c, volatile const double *d) {
    register size_t j;
    register size_t l;
    register size_t n;
    register size_t i;
    register size_t k;
    register size_t m;

    // tiling + loop swapping
    for (m = 0; m < DIM_M; m += T) {
        for (k = 0; k < DIM_K; k += T) {
            for (i = 0; i < DIM_I; i += T) {
                for (n = 0; n < DIM_N; n += T) {
                    for (l = 0; l < DIM_L; l += T) {
                        for (j = 0; j < DIM_J; j += T) {

                            for (size_t mm = m; (mm < m + T) && (mm < DIM_M); ++mm) {
                                for (size_t kk = k; (kk < k + T) && (kk < DIM_K); ++kk) {
                                    for (size_t ii = i; (ii < i + T) && (ii < DIM_I); ++ii) {
                                        for (size_t nn = n; (nn < n + T) && (nn < DIM_N); ++nn) {
                                            for (size_t ll = l; (ll < l + T) && (ll < DIM_L); ++ll) {
                                                for (size_t jj = j; (jj < j + T) && (jj < DIM_J); ++jj) {
                                                    out[DIM_M * DIM_N * ll + DIM_N * mm + nn] = a[DIM_K * DIM_J * ii + DIM_J * kk + jj] * b[DIM_L * ii + ll] * c[DIM_J * mm + jj] * d[DIM_N * kk + nn];
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}