#include // WARNING: this file MUST NOT contain any definitions of main() // WARNING: otherwise the file may fail the autograder and result in 0 marks. #define DIM_I 370 #define DIM_K 100 #define DIM_J 200 #define T 8 void func1(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c) { register size_t i; register size_t k; register size_t j; // tiling + loop swapping for (i = 0; i < DIM_I; i += T) { for (j = 0; j < DIM_J; j += T) { for (k = 0; k < DIM_K; k += T) { for (size_t kk = k; (kk < k + T) && (kk < DIM_K); ++kk) { for (size_t jj = j; (jj < j + T) && (jj < DIM_J); ++jj) { for (size_t ii = i; (ii < i+ T) && (ii < DIM_I); ++ii) { out[DIM_I * jj + ii] += a[DIM_I * jj + ii] * b[DIM_K * ii + kk] * c[DIM_K * jj + kk]; } } } } } } }