danial27@castor:~/lab-4-valentino-jaber/task4$ make detailed
cc -g -c -O2 -o task4.o task4.c
cc -g -static -o task4 task4.o main.o
valgrind -v --tool=cachegrind --D1=4096,4,64 --LL=16384,8,64 --cachegrind-out-file=stats.cgout ./task4
==3382762== Cachegrind, a cache and branch-prediction profiler
==3382762== Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.
==3382762== Using Valgrind-3.15.0-608cb11914-20190413 and LibVEX; rerun with -h for copyright info
==3382762== Command: ./task4
==3382762==
--3382762-- Valgrind options:
--3382762-- -v
--3382762-- --tool=cachegrind
--3382762-- --D1=4096,4,64
--3382762-- --LL=16384,8,64
--3382762-- --cachegrind-out-file=stats.cgout
--3382762-- Contents of /proc/version:
--3382762-- Linux version 5.4.0-128-generic (buildd@bos02-arm64-058) (gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~20.04.1)) #144-Ubuntu SMP Tue Sep 20 11:03:09 UTC 2022
--3382762--
--3382762-- Arch and hwcaps: ARM64, LittleEndian, baseline
--3382762-- Page sizes: currently 4096, max supported 65536
--3382762-- Valgrind library directory: /usr/lib/aarch64-linux-gnu/valgrind
--3382762-- Warning: Cannot auto-detect cache config, using defaults.
--3382762-- Run with -v to see.
==3382762== Cache configuration used:
==3382762== I1: 16,384 B, 4-way, 64 B lines
==3382762== D1: 4,096 B, 4-way, 64 B lines
==3382762== LL: 16,384 B, 8-way, 64 B lines
--3382762-- Reading syms from /ubc/ece/home/ugrads/d/danial27/lab-4-valentino-jaber/task4/task4
--3382762-- object doesn't have a dynamic symbol table
--3382762-- Reading syms from /usr/lib/aarch64-linux-gnu/valgrind/cachegrind-arm64-linux
--3382762-- object doesn't have a symbol table
--3382762-- object doesn't have a dynamic symbol table
--3382762-- Scheduler: using generic scheduler lock implementation.
==3382762== embedded gdbserver: reading from /tmp/vgdb-pipe-from-vgdb-to-3382762-by-danial27-on-???
==3382762== embedded gdbserver: writing to /tmp/vgdb-pipe-to-vgdb-from-3382762-by-danial27-on-???
==3382762== embedded gdbserver: shared mem /tmp/vgdb-pipe-shared-mem-vgdb-3382762-by-danial27-on-???
==3382762==
==3382762== TO CONTROL THIS PROCESS USING vgdb (which you probably
==3382762== don't want to do, unless you know exactly what you're doing,
==3382762== or are doing some strange experiment):
==3382762== /usr/lib/aarch64-linux-gnu/valgrind/../../bin/vgdb --pid=3382762 ...command...
==3382762==
==3382762== TO DEBUG THIS PROCESS USING GDB: start GDB like this
==3382762== /path/to/gdb ./task4
==3382762== and then give GDB the following command
==3382762== target remote | /usr/lib/aarch64-linux-gnu/valgrind/../../bin/vgdb --pid=3382762
==3382762== --pid is optional if only one valgrind process is running
==3382762==
==3382762==
==3382762== I refs: 10,858,954,108
==3382762== I1 misses: 352
==3382762== LLi misses: 349
==3382762== I1 miss rate: 0.00%
==3382762== LLi miss rate: 0.00%
==3382762==
==3382762== D refs: 4,800,006,348 (3,840,004,772 rd + 960,001,576 wr)
==3382762== D1 misses: 174,912,500 ( 136,175,391 rd + 38,737,109 wr)
==3382762== LLd misses: 51,161,905 ( 50,398,064 rd + 763,841 wr)
==3382762== D1 miss rate: 3.6% ( 3.5% + 4.0% )
==3382762== LLd miss rate: 1.1% ( 1.3% + 0.1% )
==3382762==
==3382762== LL refs: 174,912,852 ( 136,175,743 rd + 38,737,109 wr)
==3382762== LL misses: 51,162,254 ( 50,398,413 rd + 763,841 wr)
==3382762== LL miss rate: 0.3% ( 0.3% + 0.1% )
cg_annotate --auto=yes --show-percs=no stats.cgout | tee stats.rep
--------------------------------------------------------------------------------
I1 cache: 16384 B, 64 B, 4-way associative
D1 cache: 4096 B, 64 B, 4-way associative
LL cache: 16384 B, 64 B, 8-way associative
Command: ./task4
Data file: stats.cgout
Events recorded: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw
Events shown: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw
Event sort order: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw
Thresholds: 0.1 100 100 100 100 100 100 100 100
Include dirs:
User annotated:
Auto-annotation: on
--------------------------------------------------------------------------------
Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw
--------------------------------------------------------------------------------
10,858,954,108 352 349 3,840,004,772 136,175,391 50,398,064 960,001,576 38,737,109 763,841 PROGRAM TOTALS
--------------------------------------------------------------------------------
Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw file:function
--------------------------------------------------------------------------------
10,858,932,379 3 3 3,840,000,005 136,175,101 50,397,857 960,000,005 38,736,980 763,728 /ubc/ece/home/ugrads/d/danial27/lab-4-valentino-jaber/task4/task4.c:func2
--------------------------------------------------------------------------------
-- Auto-annotated source: /ubc/ece/home/ugrads/d/danial27/lab-4-valentino-jaber/task4/task4.c
--------------------------------------------------------------------------------
Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw
-- line 7 ----------------------------------------
. . . . . . . . . #define DIM_L 50
. . . . . . . . . #define DIM_N 40
. . . . . . . . . #define DIM_I 25
. . . . . . . . . #define DIM_K 32
. . . . . . . . . #define DIM_M 20
. . . . . . . . .
. . . . . . . . . #define B 8
. . . . . . . . .
14 1 1 0 0 0 5 0 0 void func2(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c, volatile const double *d) {
. . . . . . . . . register size_t j;
. . . . . . . . . register size_t l;
. . . . . . . . . register size_t n;
. . . . . . . . . register size_t i;
. . . . . . . . . register size_t k;
. . . . . . . . . register size_t m;
151 0 0 0 0 0 0 0 0 for (j = 0; j < DIM_J; ++j) {
7,710 0 0 0 0 0 0 0 0 for (l = 0; l < DIM_L; ++l) {
303,000 1 1 0 0 0 0 0 0 for (n = 0; n < DIM_N; ++n) {
6,000,000 0 0 0 0 0 0 0 0 for (i = 0; i < DIM_I; ++i) {
193,500,000 0 0 0 0 0 0 0 0 for (k = 0; k < DIM_K; ++k) {
2,928,000,000 0 0 0 0 0 0 0 0 for (m = 0; m < DIM_M; ++m) {
7,731,121,500 1 1 3,840,000,000 136,175,099 50,397,855 960,000,000 38,736,980 763,728 out[DIM_M * DIM_N * l + DIM_N * m + n] = a[DIM_K * DIM_J * i + DIM_J * k + j] * b[DIM_L * i + l] * c[DIM_J * m + j] * d[DIM_N * k + n];
. . . . . . . . . }
. . . . . . . . . }
. . . . . . . . . }
. . . . . . . . . }
. . . . . . . . . }
. . . . . . . . . }
4 0 0 5 2 2 0 0 0 }
. . . . . . . . .
. . . . . . . . . // void func2(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c, volatile const double *d) {
. . . . . . . . . // register size_t j;
. . . . . . . . . // register size_t l;
. . . . . . . . . // register size_t n;
. . . . . . . . . // register size_t i;
. . . . . . . . . // register size_t k;
. . . . . . . . . // register size_t m;
-- line 43 ----------------------------------------
--------------------------------------------------------------------------------
Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw
--------------------------------------------------------------------------------
10,858,932,379 3 3 3,840,000,005 136,175,101 50,397,857 960,000,005 38,736,980 763,728 events annotated