WiscSort / pmem_benchmark / hugeMmap.c
hugeMmap.c
Raw
/*
  huge-mmap is a benchmark program which compares performance between
  accessing to huge size pages and small size pages.
  

  Here is a brief instruction on preparing hugetlb'ed page

  ex.

  0. boot with HUGETLB configured 2.6 linux kernel

  1. check hugepages
  # grep Huge /proc/meminfo
  HugePages_Total:     0
  HugePages_Free:      0
  Hugepagesize:     4096 kB

  2. set hugepages
  # echo 2 > /proc/sys/vm/nr_hugepages

  3. mount hugetlbfs
  # [ -d /mnt/huge ] || mkdir -p /home/huge
  # mount none /mnt/huge -t hugetlbfs -o rw,mode=0777

  4. run benchmark
  # make
  # ./huge-mmap


  note: 
  - hugetlb code portions come from linux/Documentation/vm/hugetlbpage.txt  
  - support x86 only now

  Kazutomo Yoshii <kazutomo@mcs.anl.gov>
*/
 

#if defined(__i386__) || defined(__x86_64__)

static __inline__ unsigned long long int rdtsc(void)
{
  unsigned long long int x;
     __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
     return x;
}
#else

#error "No tick counter is available!"

#endif  // architecture


/* from twister.c */
typedef unsigned long uint32;
void seedMT(uint32 seed);
uint32 randomMT(void);


/*
 *
 * Example of using hugepage memory in a user application using the mmap
 * system call.  Before running this application, make sure that the
 * administrator has mounted the hugetlbfs filesystem (on some directory
 * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
 * example, the app is requesting memory of size 256MB that is backed by
 * huge pages.
 *
 * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
 * That means the addresses starting with 0x800000... will need to be
 * specified.  Specifying a fixed address is not required on ppc64, i386
 * or x86_64.
 */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <stdint.h>
#include <ctype.h>
#include <time.h>


static unsigned long  LENGTH = (4UL*1024*1024);

#define FILE_NAME "/mnt/huge/page"

#define PROTECTION (PROT_READ | PROT_WRITE)

/* Only ia64 requires this */
#ifdef __ia64__
#define ADDR (void *)(0x8000000000000000UL)
#define FLAGS (MAP_SHARED | MAP_FIXED)
#else
#define ADDR (void *)(0x0UL)
#define FLAGS (MAP_SHARED)
#endif



static uint64_t bench_hugepage( uint64_t (*bench)( double* ptr))
{
  uint64_t t = 0;
  void *addr;
  int fd;

  fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
  if (fd < 0) {
    perror("open");
    exit(1);
  }

  addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
  if (addr == MAP_FAILED) {
    perror("mmap");
    unlink(FILE_NAME);
    exit(1);
  }

  t = bench(addr);

  munmap(addr, LENGTH);
  close(fd);
  unlink(FILE_NAME);

  return t;
}


static uint64_t bench_smallpage( uint64_t (*bench)( double* ptr))
{
  uint64_t t = 0;
  void* addr;

  addr = malloc( LENGTH );
  if( ! addr ) {
    perror( addr );
    exit(1);
  }
  
  t = bench(addr);

  free(addr);

  return t;
}

static int BENCH_LOOP_CNT;

static int dcomp( const void *a, const void *b )
{
  if( *((double*)a) == *((double*)b) ) return 0;
  else if( *((double*)a) > *((double*)b) ) return 1;
  return -1;
}


static uint64_t qsort_bench( double* array )
{
  uint64_t start;
  int i;
  int size = LENGTH/sizeof(double);

  /* set random number */
  for( i=0; i<size; i++ ) {
    array[i] = (double)randomMT();
  }

  start = rdtsc();
  qsort(array, size, sizeof(double), dcomp );
  return rdtsc() - start;
}

static uint64_t memtest_bench( double* array )
{
  int i,a1,a2,a3;
  uint64_t start;
  int size = LENGTH/sizeof(double);

  start = rdtsc();
  for( i=0; i<BENCH_LOOP_CNT; i++ ) {
    a1 = randomMT() % size;                             
    a2 = randomMT() % size;
    a3 = randomMT() % size;
    array[a1] = array[a2] * array[a3] + (double)a1;
  }

  return rdtsc() - start;
}

uint64_t get_cpufreq_HZ()
{
  char buf[1024];
  double freq;
  FILE* fp = fopen("/proc/cpuinfo", "r");
  if( !fp ) {
    perror("fopen");
    exit(1);
  }
  while(fgets(buf, sizeof(buf), fp ) != (char*)NULL ) {
    if( strncmp( buf, "cpu MHz", 7 ) == 0 ) {
      char* p;
      p = buf;
      while( *p && (!isdigit(*p)) ) p++;
      freq = atof(p);
      break;
    }
  }

  fclose(fp);

  return (uint64_t)(freq*1000*1000);
}


int main(int argc, char* argv[])
{
  int i;
  uint64_t t1,t2;
  uint64_t cpufreq = get_cpufreq_HZ();

  if( argc < 2 ) {
    printf("Usage: %s niter [allocsize_MB]\n", argv[0]);
    printf("allocsize is 4MB by default\n");
    return 1;
  }

  BENCH_LOOP_CNT = atoi( argv[1] );
  if( BENCH_LOOP_CNT <=0 ) { 
    printf("please enter valid value [0, 2^32)\n");
    return 0;
  }

  if( argc > 2 ) {
    LENGTH = (unsigned long)(1024* 1024 * atoi( argv[2] ) );
  }
  if( LENGTH <=0 ) { 
    printf("please enter valid value [0, PHYCONT_MB)\n");
    return 0;
  }


  seedMT( (uint32)time(NULL) );

  printf("# BENCH_LOOP_CNT=%d\n", BENCH_LOOP_CNT);
  printf("# LENGTH=%lu MB\n", LENGTH>>20);
  printf("# small_page=4KB in x86\n");
  printf("# huge_page=4MB in x86\n");

  printf("# try small_page huge_page\n");


  for( i=0; i<10; i++ ) {
    t1 = bench_smallpage( qsort_bench );
    t2 = bench_hugepage( qsort_bench );
    printf("%d %f %f\n",
       i, 
       (double)t1/(double)cpufreq,
       (double)t2/(double)cpufreq );
  }

  return 0;
}