danial27@castor:~/lab-4-valentino-jaber/task4$ make detailed cc -g -c -O2 -o task4.o task4.c cc -g -static -o task4 task4.o main.o valgrind -v --tool=cachegrind --D1=4096,4,64 --LL=16384,8,64 --cachegrind-out-file=stats.cgout ./task4 ==3382762== Cachegrind, a cache and branch-prediction profiler ==3382762== Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al. ==3382762== Using Valgrind-3.15.0-608cb11914-20190413 and LibVEX; rerun with -h for copyright info ==3382762== Command: ./task4 ==3382762== --3382762-- Valgrind options: --3382762-- -v --3382762-- --tool=cachegrind --3382762-- --D1=4096,4,64 --3382762-- --LL=16384,8,64 --3382762-- --cachegrind-out-file=stats.cgout --3382762-- Contents of /proc/version: --3382762-- Linux version 5.4.0-128-generic (buildd@bos02-arm64-058) (gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~20.04.1)) #144-Ubuntu SMP Tue Sep 20 11:03:09 UTC 2022 --3382762-- --3382762-- Arch and hwcaps: ARM64, LittleEndian, baseline --3382762-- Page sizes: currently 4096, max supported 65536 --3382762-- Valgrind library directory: /usr/lib/aarch64-linux-gnu/valgrind --3382762-- Warning: Cannot auto-detect cache config, using defaults. --3382762-- Run with -v to see. ==3382762== Cache configuration used: ==3382762== I1: 16,384 B, 4-way, 64 B lines ==3382762== D1: 4,096 B, 4-way, 64 B lines ==3382762== LL: 16,384 B, 8-way, 64 B lines --3382762-- Reading syms from /ubc/ece/home/ugrads/d/danial27/lab-4-valentino-jaber/task4/task4 --3382762-- object doesn't have a dynamic symbol table --3382762-- Reading syms from /usr/lib/aarch64-linux-gnu/valgrind/cachegrind-arm64-linux --3382762-- object doesn't have a symbol table --3382762-- object doesn't have a dynamic symbol table --3382762-- Scheduler: using generic scheduler lock implementation. ==3382762== embedded gdbserver: reading from /tmp/vgdb-pipe-from-vgdb-to-3382762-by-danial27-on-??? ==3382762== embedded gdbserver: writing to /tmp/vgdb-pipe-to-vgdb-from-3382762-by-danial27-on-??? ==3382762== embedded gdbserver: shared mem /tmp/vgdb-pipe-shared-mem-vgdb-3382762-by-danial27-on-??? ==3382762== ==3382762== TO CONTROL THIS PROCESS USING vgdb (which you probably ==3382762== don't want to do, unless you know exactly what you're doing, ==3382762== or are doing some strange experiment): ==3382762== /usr/lib/aarch64-linux-gnu/valgrind/../../bin/vgdb --pid=3382762 ...command... ==3382762== ==3382762== TO DEBUG THIS PROCESS USING GDB: start GDB like this ==3382762== /path/to/gdb ./task4 ==3382762== and then give GDB the following command ==3382762== target remote | /usr/lib/aarch64-linux-gnu/valgrind/../../bin/vgdb --pid=3382762 ==3382762== --pid is optional if only one valgrind process is running ==3382762== ==3382762== ==3382762== I refs: 10,858,954,108 ==3382762== I1 misses: 352 ==3382762== LLi misses: 349 ==3382762== I1 miss rate: 0.00% ==3382762== LLi miss rate: 0.00% ==3382762== ==3382762== D refs: 4,800,006,348 (3,840,004,772 rd + 960,001,576 wr) ==3382762== D1 misses: 174,912,500 ( 136,175,391 rd + 38,737,109 wr) ==3382762== LLd misses: 51,161,905 ( 50,398,064 rd + 763,841 wr) ==3382762== D1 miss rate: 3.6% ( 3.5% + 4.0% ) ==3382762== LLd miss rate: 1.1% ( 1.3% + 0.1% ) ==3382762== ==3382762== LL refs: 174,912,852 ( 136,175,743 rd + 38,737,109 wr) ==3382762== LL misses: 51,162,254 ( 50,398,413 rd + 763,841 wr) ==3382762== LL miss rate: 0.3% ( 0.3% + 0.1% ) cg_annotate --auto=yes --show-percs=no stats.cgout | tee stats.rep -------------------------------------------------------------------------------- I1 cache: 16384 B, 64 B, 4-way associative D1 cache: 4096 B, 64 B, 4-way associative LL cache: 16384 B, 64 B, 8-way associative Command: ./task4 Data file: stats.cgout Events recorded: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw Events shown: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw Event sort order: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw Thresholds: 0.1 100 100 100 100 100 100 100 100 Include dirs: User annotated: Auto-annotation: on -------------------------------------------------------------------------------- Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw -------------------------------------------------------------------------------- 10,858,954,108 352 349 3,840,004,772 136,175,391 50,398,064 960,001,576 38,737,109 763,841 PROGRAM TOTALS -------------------------------------------------------------------------------- Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw file:function -------------------------------------------------------------------------------- 10,858,932,379 3 3 3,840,000,005 136,175,101 50,397,857 960,000,005 38,736,980 763,728 /ubc/ece/home/ugrads/d/danial27/lab-4-valentino-jaber/task4/task4.c:func2 -------------------------------------------------------------------------------- -- Auto-annotated source: /ubc/ece/home/ugrads/d/danial27/lab-4-valentino-jaber/task4/task4.c -------------------------------------------------------------------------------- Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw -- line 7 ---------------------------------------- . . . . . . . . . #define DIM_L 50 . . . . . . . . . #define DIM_N 40 . . . . . . . . . #define DIM_I 25 . . . . . . . . . #define DIM_K 32 . . . . . . . . . #define DIM_M 20 . . . . . . . . . . . . . . . . . . #define B 8 . . . . . . . . . 14 1 1 0 0 0 5 0 0 void func2(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c, volatile const double *d) { . . . . . . . . . register size_t j; . . . . . . . . . register size_t l; . . . . . . . . . register size_t n; . . . . . . . . . register size_t i; . . . . . . . . . register size_t k; . . . . . . . . . register size_t m; 151 0 0 0 0 0 0 0 0 for (j = 0; j < DIM_J; ++j) { 7,710 0 0 0 0 0 0 0 0 for (l = 0; l < DIM_L; ++l) { 303,000 1 1 0 0 0 0 0 0 for (n = 0; n < DIM_N; ++n) { 6,000,000 0 0 0 0 0 0 0 0 for (i = 0; i < DIM_I; ++i) { 193,500,000 0 0 0 0 0 0 0 0 for (k = 0; k < DIM_K; ++k) { 2,928,000,000 0 0 0 0 0 0 0 0 for (m = 0; m < DIM_M; ++m) { 7,731,121,500 1 1 3,840,000,000 136,175,099 50,397,855 960,000,000 38,736,980 763,728 out[DIM_M * DIM_N * l + DIM_N * m + n] = a[DIM_K * DIM_J * i + DIM_J * k + j] * b[DIM_L * i + l] * c[DIM_J * m + j] * d[DIM_N * k + n]; . . . . . . . . . } . . . . . . . . . } . . . . . . . . . } . . . . . . . . . } . . . . . . . . . } . . . . . . . . . } 4 0 0 5 2 2 0 0 0 } . . . . . . . . . . . . . . . . . . // void func2(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c, volatile const double *d) { . . . . . . . . . // register size_t j; . . . . . . . . . // register size_t l; . . . . . . . . . // register size_t n; . . . . . . . . . // register size_t i; . . . . . . . . . // register size_t k; . . . . . . . . . // register size_t m; -- line 43 ---------------------------------------- -------------------------------------------------------------------------------- Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw -------------------------------------------------------------------------------- 10,858,932,379 3 3 3,840,000,005 136,175,101 50,397,857 960,000,005 38,736,980 763,728 events annotated