#include // WARNING: this file MUST NOT contain any definitions of main() // WARNING: otherwise the file may fail the autograder and result in 0 marks. #define DIM_J 30 #define DIM_L 50 #define DIM_N 40 #define DIM_I 25 #define DIM_K 32 #define DIM_M 20 #define T 8 void func2(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c, volatile const double *d) { register size_t j; register size_t l; register size_t n; register size_t i; register size_t k; register size_t m; // tiling + loop swapping for (m = 0; m < DIM_M; m += T) { for (k = 0; k < DIM_K; k += T) { for (i = 0; i < DIM_I; i += T) { for (n = 0; n < DIM_N; n += T) { for (l = 0; l < DIM_L; l += T) { for (j = 0; j < DIM_J; j += T) { for (size_t mm = m; (mm < m + T) && (mm < DIM_M); ++mm) { for (size_t kk = k; (kk < k + T) && (kk < DIM_K); ++kk) { for (size_t ii = i; (ii < i + T) && (ii < DIM_I); ++ii) { for (size_t nn = n; (nn < n + T) && (nn < DIM_N); ++nn) { for (size_t ll = l; (ll < l + T) && (ll < DIM_L); ++ll) { for (size_t jj = j; (jj < j + T) && (jj < DIM_J); ++jj) { out[DIM_M * DIM_N * ll + DIM_N * mm + nn] = a[DIM_K * DIM_J * ii + DIM_J * kk + jj] * b[DIM_L * ii + ll] * c[DIM_J * mm + jj] * d[DIM_N * kk + nn]; } } } } } } } } } } } } }