Lancelot / src / gpudb / test / main3.cu
main3.cu
Raw
// #include "QueryProcessing.h"
// #include "QueryOptimizer.h"
// #include "CPUGPUProcessing.h"
// #include "CacheManager.h"
// #include "CPUProcessing.h"
// #include "CostModel.h"
// #include "pcm-cache.cpp"

#include "common.h"

#define PROFILE 1

#define CHECK_CU_ERROR(err, cufunc)                                     \
    if (err != CUDA_SUCCESS) { printf ("Error %d for CUDA Driver API function '%s'\n", err, cufunc); return -1; }

int main() {

	cudaSetDevice(0);
	CUdevice device;
	cuDeviceGet(&device, 0);

	cout << endl;

// #ifdef PROFILE
	PAPI_library_init( PAPI_VER_CURRENT );
	// InitMonitor();
// #endif

// #ifdef PROFILE
	cout << endl;
	cout << "We are profiling" << endl;

	int retval;
	char const *EventName[] = { "cuda:::dram__bytes_read.sum:device=0", "cuda:::dram__bytes_write.sum:device=0"}; // CUPTI_11 event.
	int* events = new int [NUM_EVENTS];
	int EventSet = PAPI_NULL;

	for(int i = 0; i < NUM_EVENTS; i++ ){
	    retval = PAPI_event_name_to_code( (char *)EventName[i], &events[i] );
	    assert(retval == PAPI_OK);
	}

	retval = PAPI_create_eventset( &EventSet ); assert(retval == PAPI_OK);
	retval = PAPI_add_events( EventSet, events, NUM_EVENTS ); assert(retval == PAPI_OK);
// #endif

	CUcontext sessionCtx = NULL;
	CUcontext poppedCtx, curCtx;
	CHECK_CU_ERROR( cuCtxCreate(&sessionCtx, 0, device), "cuCtxCreate");
	cuCtxGetCurrent(&curCtx); cout << curCtx << endl;
	CHECK_CU_ERROR( cuCtxPopCurrent(&poppedCtx), "cuCtxPopCurrent" );
	cuCtxGetCurrent(&curCtx); cout << curCtx << endl;

	long long p_values[NUM_EVENTS];
	long long read = 0, write = 0;
	
	for(int i = 0; i < NUM_EVENTS; i++ ){
		p_values[i] = 0;
	}

	CHECK_CU_ERROR(cuCtxPushCurrent(sessionCtx), "cuCtxPushCurrent");
	retval = PAPI_start( EventSet ); assert(retval == PAPI_OK);
	CHECK_CU_ERROR( cuCtxPopCurrent(&sessionCtx), "cuCtxPopCurrent" );

	parallel_for(int(0), 64, [=](int j){

		CUcontext poppedCtx;
		cuCtxPushCurrent(sessionCtx);

		// cuCtxSetCurrent(sessionCtx);

		cudaStream_t stream;
		cudaStreamCreate(&stream);



		cudaStreamSynchronize(stream);

		cudaStreamDestroy(stream);

		cuCtxPopCurrent(&poppedCtx);

	});

	cout << endl;

    CHECK_CU_ERROR(cuCtxPushCurrent(sessionCtx), "cuCtxPushCurrent");
    CHECK_CU_ERROR( cuCtxSynchronize( ), "cuCtxSynchronize" );
    CHECK_CU_ERROR( cuCtxPopCurrent(&sessionCtx), "cuCtxPopCurrent" );
	retval = PAPI_stop( EventSet, p_values ); assert(retval == PAPI_OK);
	// retval = PAPI_reset(EventSet); assert(retval == PAPI_OK);
	for(int i = 0; i < NUM_EVENTS; i++ ) {
		printf( "stop: %12lld \t=0X%016llX \t\t --> %s \n", p_values[i], p_values[i], EventName[i] );
	}

	CHECK_CU_ERROR(cuCtxPushCurrent(sessionCtx), "cuCtxPushCurrent");
	retval = PAPI_start( EventSet ); assert(retval == PAPI_OK);
	CHECK_CU_ERROR( cuCtxPopCurrent(&sessionCtx), "cuCtxPopCurrent" );

	parallel_for(int(0), 64, [=](int j){

		CUcontext poppedCtx;
		cuCtxPushCurrent(sessionCtx);

		// cuCtxSetCurrent(sessionCtx);

		cudaStream_t stream;
		cudaStreamCreate(&stream);

		// cudaStreamSynchronize(stream);

		cudaStreamDestroy(stream);

		cuCtxPopCurrent(&poppedCtx);

	});

	cout << endl;

    CHECK_CU_ERROR(cuCtxPushCurrent(sessionCtx), "cuCtxPushCurrent");
    CHECK_CU_ERROR( cuCtxSynchronize( ), "cuCtxSynchronize" );
    CHECK_CU_ERROR( cuCtxPopCurrent(&sessionCtx), "cuCtxPopCurrent" );
	retval = PAPI_stop( EventSet, p_values ); assert(retval == PAPI_OK);
	// retval = PAPI_reset(EventSet); assert(retval == PAPI_OK);
	for(int i = 0; i < NUM_EVENTS; i++ ) {
		printf( "stop: %12lld \t=0X%016llX \t\t --> %s \n", p_values[i], p_values[i], EventName[i] );
	}


    if (sessionCtx != NULL) {
        cuCtxDestroy(sessionCtx);
    }


// #ifdef PROFILE
		retval = PAPI_cleanup_eventset(EventSet); assert(retval == PAPI_OK);
		retval = PAPI_destroy_eventset(&EventSet); assert(retval == PAPI_OK);
// #endif

// #ifdef PROFILE
		PAPI_shutdown();
// #endif

}