computing-systems-212 / Lab 4: Optimizing Caches / task4 / cachegrind_original.txt
cachegrind_original.txt
Raw
danial27@castor:~/lab-4-valentino-jaber/task4$ make detailed
cc -g -c -O2 -o task4.o task4.c
cc -g -static -o task4 task4.o main.o
valgrind -v --tool=cachegrind --D1=4096,4,64 --LL=16384,8,64 --cachegrind-out-file=stats.cgout ./task4
==3382762== Cachegrind, a cache and branch-prediction profiler
==3382762== Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.
==3382762== Using Valgrind-3.15.0-608cb11914-20190413 and LibVEX; rerun with -h for copyright info
==3382762== Command: ./task4
==3382762==
--3382762-- Valgrind options:
--3382762--    -v
--3382762--    --tool=cachegrind
--3382762--    --D1=4096,4,64
--3382762--    --LL=16384,8,64
--3382762--    --cachegrind-out-file=stats.cgout
--3382762-- Contents of /proc/version:
--3382762--   Linux version 5.4.0-128-generic (buildd@bos02-arm64-058) (gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~20.04.1)) #144-Ubuntu SMP Tue Sep 20 11:03:09 UTC 2022
--3382762--
--3382762-- Arch and hwcaps: ARM64, LittleEndian, baseline
--3382762-- Page sizes: currently 4096, max supported 65536
--3382762-- Valgrind library directory: /usr/lib/aarch64-linux-gnu/valgrind
--3382762-- Warning: Cannot auto-detect cache config, using defaults.
--3382762--          Run with -v to see.
==3382762== Cache configuration used:
==3382762==   I1: 16,384 B, 4-way, 64 B lines
==3382762==   D1: 4,096 B, 4-way, 64 B lines
==3382762==   LL: 16,384 B, 8-way, 64 B lines
--3382762-- Reading syms from /ubc/ece/home/ugrads/d/danial27/lab-4-valentino-jaber/task4/task4
--3382762--    object doesn't have a dynamic symbol table
--3382762-- Reading syms from /usr/lib/aarch64-linux-gnu/valgrind/cachegrind-arm64-linux
--3382762--    object doesn't have a symbol table
--3382762--    object doesn't have a dynamic symbol table
--3382762-- Scheduler: using generic scheduler lock implementation.
==3382762== embedded gdbserver: reading from /tmp/vgdb-pipe-from-vgdb-to-3382762-by-danial27-on-???
==3382762== embedded gdbserver: writing to   /tmp/vgdb-pipe-to-vgdb-from-3382762-by-danial27-on-???
==3382762== embedded gdbserver: shared mem   /tmp/vgdb-pipe-shared-mem-vgdb-3382762-by-danial27-on-???
==3382762==
==3382762== TO CONTROL THIS PROCESS USING vgdb (which you probably
==3382762== don't want to do, unless you know exactly what you're doing,
==3382762== or are doing some strange experiment):
==3382762==   /usr/lib/aarch64-linux-gnu/valgrind/../../bin/vgdb --pid=3382762 ...command...
==3382762==
==3382762== TO DEBUG THIS PROCESS USING GDB: start GDB like this
==3382762==   /path/to/gdb ./task4
==3382762== and then give GDB the following command
==3382762==   target remote | /usr/lib/aarch64-linux-gnu/valgrind/../../bin/vgdb --pid=3382762
==3382762== --pid is optional if only one valgrind process is running
==3382762==
==3382762==
==3382762== I   refs:      10,858,954,108
==3382762== I1  misses:               352
==3382762== LLi misses:               349
==3382762== I1  miss rate:           0.00%
==3382762== LLi miss rate:           0.00%
==3382762==
==3382762== D   refs:       4,800,006,348  (3,840,004,772 rd   + 960,001,576 wr)
==3382762== D1  misses:       174,912,500  (  136,175,391 rd   +  38,737,109 wr)
==3382762== LLd misses:        51,161,905  (   50,398,064 rd   +     763,841 wr)
==3382762== D1  miss rate:            3.6% (          3.5%     +         4.0%  )
==3382762== LLd miss rate:            1.1% (          1.3%     +         0.1%  )
==3382762==
==3382762== LL refs:          174,912,852  (  136,175,743 rd   +  38,737,109 wr)
==3382762== LL misses:         51,162,254  (   50,398,413 rd   +     763,841 wr)
==3382762== LL miss rate:             0.3% (          0.3%     +         0.1%  )
cg_annotate --auto=yes --show-percs=no stats.cgout | tee stats.rep
--------------------------------------------------------------------------------
I1 cache:         16384 B, 64 B, 4-way associative
D1 cache:         4096 B, 64 B, 4-way associative
LL cache:         16384 B, 64 B, 8-way associative
Command:          ./task4
Data file:        stats.cgout
Events recorded:  Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw
Events shown:     Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw
Event sort order: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw
Thresholds:       0.1 100 100 100 100 100 100 100 100
Include dirs:
User annotated:
Auto-annotation:  on

--------------------------------------------------------------------------------
Ir             I1mr ILmr Dr            D1mr        DLmr       Dw          D1mw       DLmw
--------------------------------------------------------------------------------
10,858,954,108  352  349 3,840,004,772 136,175,391 50,398,064 960,001,576 38,737,109 763,841  PROGRAM TOTALS

--------------------------------------------------------------------------------
Ir             I1mr ILmr Dr            D1mr        DLmr       Dw          D1mw       DLmw     file:function
--------------------------------------------------------------------------------
10,858,932,379    3    3 3,840,000,005 136,175,101 50,397,857 960,000,005 38,736,980 763,728  /ubc/ece/home/ugrads/d/danial27/lab-4-valentino-jaber/task4/task4.c:func2

--------------------------------------------------------------------------------
-- Auto-annotated source: /ubc/ece/home/ugrads/d/danial27/lab-4-valentino-jaber/task4/task4.c
--------------------------------------------------------------------------------
Ir            I1mr ILmr Dr            D1mr        DLmr       Dw          D1mw       DLmw

-- line 7 ----------------------------------------
            .    .    .             .           .          .           .          .       .  #define DIM_L 50
            .    .    .             .           .          .           .          .       .  #define DIM_N 40
            .    .    .             .           .          .           .          .       .  #define DIM_I 25
            .    .    .             .           .          .           .          .       .  #define DIM_K 32
            .    .    .             .           .          .           .          .       .  #define DIM_M 20
            .    .    .             .           .          .           .          .       .
            .    .    .             .           .          .           .          .       .  #define B 8
            .    .    .             .           .          .           .          .       .
           14    1    1             0           0          0           5          0       0  void func2(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c, volatile const double *d) {
            .    .    .             .           .          .           .          .       .      register size_t j;
            .    .    .             .           .          .           .          .       .      register size_t l;
            .    .    .             .           .          .           .          .       .      register size_t n;
            .    .    .             .           .          .           .          .       .      register size_t i;
            .    .    .             .           .          .           .          .       .      register size_t k;
            .    .    .             .           .          .           .          .       .      register size_t m;
          151    0    0             0           0          0           0          0       0      for (j = 0; j < DIM_J; ++j) {
        7,710    0    0             0           0          0           0          0       0          for (l = 0; l < DIM_L; ++l) {
      303,000    1    1             0           0          0           0          0       0              for (n = 0; n < DIM_N; ++n) {
    6,000,000    0    0             0           0          0           0          0       0                  for (i = 0; i < DIM_I; ++i) {
  193,500,000    0    0             0           0          0           0          0       0                      for (k = 0; k < DIM_K; ++k) {
2,928,000,000    0    0             0           0          0           0          0       0                          for (m = 0; m < DIM_M; ++m) {
7,731,121,500    1    1 3,840,000,000 136,175,099 50,397,855 960,000,000 38,736,980 763,728                              out[DIM_M * DIM_N * l + DIM_N * m + n] = a[DIM_K * DIM_J * i + DIM_J * k + j] * b[DIM_L * i + l] * c[DIM_J * m + j] * d[DIM_N * k + n];
            .    .    .             .           .          .           .          .       .                          }
            .    .    .             .           .          .           .          .       .                      }
            .    .    .             .           .          .           .          .       .                  }
            .    .    .             .           .          .           .          .       .              }
            .    .    .             .           .          .           .          .       .          }
            .    .    .             .           .          .           .          .       .      }
            4    0    0             5           2          2           0          0       0  }
            .    .    .             .           .          .           .          .       .
            .    .    .             .           .          .           .          .       .  // void func2(volatile double *out, volatile const double *a, volatile const double *b, volatile const double *c, volatile const double *d) {
            .    .    .             .           .          .           .          .       .  //     register size_t j;
            .    .    .             .           .          .           .          .       .  //     register size_t l;
            .    .    .             .           .          .           .          .       .  //     register size_t n;
            .    .    .             .           .          .           .          .       .  //     register size_t i;
            .    .    .             .           .          .           .          .       .  //     register size_t k;
            .    .    .             .           .          .           .          .       .  //     register size_t m;
-- line 43 ----------------------------------------

--------------------------------------------------------------------------------
Ir             I1mr ILmr Dr            D1mr        DLmr       Dw          D1mw       DLmw
--------------------------------------------------------------------------------
10,858,932,379    3    3 3,840,000,005 136,175,101 50,397,857 960,000,005 38,736,980 763,728  events annotated