// optimized versions of matrix diagonal summing #include "matvec.h" // You can write several different versions of your optimized function // in this file and call one of them in the last function. int sumdiag_VER1(matrix_t *mat, vector_t *vec) { // OPTIMIZED CODE HERE // Initialize Variables long mrow = mat->rows; long lenV = vec->len; int minus = mrow-1; vector_t vecX = *vec; //Checking for bad length of Vector if(lenV != (mrow + minus)){ printf("sumdiag_base: bad sizes\n"); return 1; } for(int i=0; i<(lenV); i++){ // initialize vector of diagonal sums VSET(vecX,i,0); // to all 0s } int dtemp; //keeps track of current diagonal for(int i =0; i<mrow; i++){ //go through the 2D array sequentially, adds to correct diagnoal each time dtemp = minus-i; //begining diagonal decreases each time for(int j = 0; j<mrow; j++){ VSET(vecX,dtemp, VGET(vecX, dtemp) + MGET(*mat,i,j)); //gets the previous value in the sum for a diagonal and add the current num to it dtemp++; //continue to next diagonal } } return 0; } int sumdiag_VER2(matrix_t *mat, vector_t *vec) { // OPTIMIZED CODE HERE // Initialize Variables long mrow = mat->rows; long lenV = vec->len; int minus = mrow-1; matrix_t matM = *mat; vector_t vecX = *vec; //Checking for bad length of Vector if(lenV != (mrow + minus)){ printf("sumdiag_base: bad sizes\n"); return 1; } for(int i=0; i<(lenV); i++){ // initialize vector of diagonal sums VSET(vecX,i,0); // to all 0s } int dtemp; //keeps track of current diagonal int j; for(int i =0; i<mrow; i++){ //go through the 2D array sequentially, adds to correct diagnoal each time dtemp = minus-i; //begining diagonal decreases each time for(j = 0; j<mrow-4; j+=4){ VSET(vecX,dtemp, VGET(vecX, dtemp) + MGET(matM,i,j)); //gets the previous value in the sum for a diagonal and add the current num to it dtemp++; //continue to next diagonal VSET(vecX,dtemp,VGET(vecX,dtemp) + MGET(matM,i,j+1)); //unrolled to increase pipeline efficency dtemp++; VSET(vecX, dtemp,VGET(vecX,dtemp) + MGET(matM,i,j+2)); dtemp++; VSET(vecX,dtemp,VGET(vecX,dtemp)+ MGET(matM,i,j+3)); dtemp++; } for(;j<mrow;j++){ //clean up if the rows/cols are not divisible by 4 VSET(vecX,dtemp,VGET(vecX,dtemp) + MGET(matM,i,j)); dtemp++; } } return 0; } int sumdiag_OPTM(matrix_t *mat, vector_t *vec) { // call your preferred version of the function // OPTIMIZED CODE HERE // Initialize Variables long mrow = mat->rows; long lenV = vec->len; int minus = mrow-1; matrix_t matM = *mat; vector_t vecX = *vec; //Checking for bad length of Vector if(lenV != (mrow + minus)){ printf("sumdiag_base: bad sizes\n"); return 1; } for(int i=0; i<(lenV); i++){ // initialize vector of diagonal sums VSET(vecX,i,0); // to all 0s } int dtemp; //keeps track of current diagonal int j; for(int i =0; i<mrow; i++){ //go through the 2D array sequentially, adds to correct diagnoal each time dtemp = minus-i; //begining diagonal decreases each time for(j = 0; j<mrow-4; j+=4){ VSET(vecX,dtemp, VGET(vecX, dtemp) + MGET(matM,i,j)); //gets the previous value in the sum for a diagonal and add the current num to it dtemp++; //continue to next diagonal VSET(vecX,dtemp,VGET(vecX,dtemp) + MGET(matM,i,j+1)); //unrolled to increase pipeline efficency dtemp++; VSET(vecX, dtemp,VGET(vecX,dtemp) + MGET(matM,i,j+2)); dtemp++; VSET(vecX,dtemp,VGET(vecX,dtemp)+ MGET(matM,i,j+3)); dtemp++; } for(;j<mrow;j++){ //clean up if the rows/cols are not divisible by 4 VSET(vecX,dtemp,VGET(vecX,dtemp) + MGET(matM,i,j)); dtemp++; } } return 0; }