#pragma once #define SETUP_TIMING() cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); #define TIME_FUNC(f,t) { \ cudaEventRecord(start, 0); \ f; \ cudaEventRecord(stop, 0); \ cudaEventSynchronize(stop); \ cudaEventElapsedTime(&t, start,stop); \ } #define CLEANUP(vec) if(vec)CubDebugExit(g_allocator.DeviceFree(vec)) #define ALLOCATE(vec,size) CubDebugExit(g_allocator.DeviceAllocate((void**)&vec, size)) template<typename T> T* loadToGPU(T* src, int numEntries, cub::CachingDeviceAllocator& g_allocator) { T* dest; CubDebugExit(g_allocator.DeviceAllocate((void**)&dest, sizeof(T) * numEntries)); CubDebugExit(cudaMemcpy(dest, src, sizeof(T) * numEntries, cudaMemcpyHostToDevice)); return dest; } #define TILE_SIZE (BLOCK_THREADS * ITEMS_PER_THREAD) #define CHECK_ERROR() { \ cudaDeviceSynchronize(); \ cudaError_t error = cudaGetLastError(); \ if(error != cudaSuccess) \ { \ printf("CUDA error: %s\n", cudaGetErrorString(error)); \ exit(-1); \ } \ }