#include <immintrin.h> #include "unistd.h" #include <stdint.h> #include <stdio.h> #define FLUSH_ALIGN 64 // If intrinsincs not available #define _mm_clflushopt(addr) \ asm volatile(".byte 0x66; clflush %0" \ : "+m"(*(volatile char *)addr)); #define _mm_clwb(addr) \ asm volatile(".byte 0x66; xsaveopt %0" \ : "+m"(*(volatile char *)addr)); #define _mm_pcommit() \ asm volatile(".byte 0x66, 0x0f, 0xae, 0xf8"); static void flush_clflushopt(void *addr, size_t len) { // printf("addr %p len %zu", addr, len); uintptr_t uptr; /* * Loop through cache-line-size (typically 64B) aligned chunks * covering the given range. */ for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) { // _mm_clflushopt((char *)uptr); _mm_clflushopt((char *)uptr); } }