computing-systems-212 / Lab 4: Optimizing Caches / task2 / task2.c
task2.c
Raw
#include <stdlib.h>

// WARNING: this file MUST NOT contain any definitions of main()
// WARNING: otherwise the file may fail the autograder and result in 0 marks.

#define DIM_I 370
#define DIM_K 100
#define DIM_J 200

#define T 8

void func1(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c) {
    register size_t i;
    register size_t k;
    register size_t j;

    // tiling + loop swapping
    for (i = 0; i < DIM_I; i += T) {
        for (j = 0; j < DIM_J; j += T) {
            for (k = 0; k < DIM_K; k += T) {

                for (size_t kk = k; (kk < k + T) && (kk < DIM_K); ++kk) {
                    for (size_t jj = j; (jj < j + T) && (jj < DIM_J); ++jj) {
                        for (size_t ii = i; (ii < i+ T) && (ii < DIM_I); ++ii) {
                            out[DIM_I * jj + ii] += a[DIM_I * jj + ii] * b[DIM_K * ii + kk] * c[DIM_K * jj + kk];
                        }
                    }
                }
            }
        }
    }
}