#include <stdlib.h>
// WARNING: this file MUST NOT contain any definitions of main()
// WARNING: otherwise the file may fail the autograder and result in 0 marks.
#define DIM_J 30
#define DIM_L 50
#define DIM_N 40
#define DIM_I 25
#define DIM_K 32
#define DIM_M 20
#define T 8
void func2(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c, volatile const double *d) {
register size_t j;
register size_t l;
register size_t n;
register size_t i;
register size_t k;
register size_t m;
// tiling + loop swapping
for (m = 0; m < DIM_M; m += T) {
for (k = 0; k < DIM_K; k += T) {
for (i = 0; i < DIM_I; i += T) {
for (n = 0; n < DIM_N; n += T) {
for (l = 0; l < DIM_L; l += T) {
for (j = 0; j < DIM_J; j += T) {
for (size_t mm = m; (mm < m + T) && (mm < DIM_M); ++mm) {
for (size_t kk = k; (kk < k + T) && (kk < DIM_K); ++kk) {
for (size_t ii = i; (ii < i + T) && (ii < DIM_I); ++ii) {
for (size_t nn = n; (nn < n + T) && (nn < DIM_N); ++nn) {
for (size_t ll = l; (ll < l + T) && (ll < DIM_L); ++ll) {
for (size_t jj = j; (jj < j + T) && (jj < DIM_J); ++jj) {
out[DIM_M * DIM_N * ll + DIM_N * mm + nn] = a[DIM_K * DIM_J * ii + DIM_J * kk + jj] * b[DIM_L * ii + ll] * c[DIM_J * mm + jj] * d[DIM_N * kk + nn];
}
}
}
}
}
}
}
}
}
}
}
}
}