cuda-100-days / day1 / vecadd.cu
vecadd.cu
Raw
#include <stdio.h>
#include <stdlib.h>

void initwith(float val, float *a, int N)
{
    for(int i=0;i<N;i++)
    {
        a[i]=val;
    }
}



void checkElementsAre(float target, float *array, int N)
{
  for(int i = 0; i < N; i++)
  {
    if(array[i] != target)
    {
      printf("FAIL: array[%d] - %0.0f does not equal %0.0f\n", i, array[i], target);
      exit(1);
    }
  }
  printf("SUCCESS! All values added correctly.\n");
}

__global__ void addvec_gpu(float *a, float *b, float *c, int N)
{
    int i =threadIdx.x + blockDim.x*blockIdx.x;
    if(i<N)
        c[i]=a[i]+b[i];
}
int main()
{
    const int N = 2<<20;
    size_t size = N * sizeof(float);

    float *a;
    float *b;
    float *c;

    cudaMallocManaged(&a, size);
    cudaMallocManaged(&b, size);
    cudaMallocManaged(&c, size);

    initwith(3,a,N);
    initwith(4,b,N);
    initwith(0,c,N);

    
    int numOfThreads = 64;
    int numOfBlocks = (numOfThreads+N-1)/numOfThreads; 

    addvec_gpu<<<numOfBlocks, numOfThreads>>>(a,b,c,N);

    cudaDeviceSynchronize();

    checkElementsAre(7, c, N);

    cudaFree(a);
    cudaFree(b);
    cudaFree(c);


}