#pragma once #define CUB_STDERR #define CHECK_ERROR() { \ cudaDeviceSynchronize(); \ cudaError_t error = cudaGetLastError(); \ if(error != cudaSuccess) \ { \ gpuErrchk(error); \ exit(-1); \ } \ } #define CHECK_ERROR_STREAM(stream) { \ cudaStreamSynchronize(stream); \ cudaError_t error = cudaGetLastError(); \ if(error != cudaSuccess) \ { \ gpuErrchk(error); \ exit(-1); \ } \ } #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { if (code != cudaSuccess) { fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); if (abort) exit(code); } } #define SETUP_TIMING() cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); #define TIME_FUNC(f,t) { \ cudaEventRecord(start, 0); \ f; \ cudaEventRecord(stop, 0); \ cudaEventSynchronize(stop); \ cudaEventElapsedTime(&t, start,stop); \ } #define CLEANUP(vec) if(vec)CubDebugExit(g_allocator.DeviceFree(vec)) #define ALLOCATE(vec,size) CubDebugExit(g_allocator.DeviceAllocate((void**)&vec, size)) template<typename T> T* loadToGPU(T* src, int numEntries, cub::CachingDeviceAllocator& g_allocator) { T* dest; CubDebugExit(g_allocator.DeviceAllocate((void**)&dest, sizeof(T) * numEntries)); cudaMemcpy(dest, src, sizeof(T) * numEntries, cudaMemcpyHostToDevice); return dest; } #define TILE_SIZE (BLOCK_THREADS * ITEMS_PER_THREAD)