Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

After run gdr_copy_to_mapping, how does the kernel function utilize the data transferred from host? In other words, can cudamalloc and cudaMemcpy be replaced? #307

Open
LiShuang-codes opened this issue Oct 22, 2024 · 0 comments

Comments

@LiShuang-codes
Copy link

A sample code is here, plz :

#define DTYPE float

__global__ void sample(const float* A, const float* B, int64_t n) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    printf("%f at %p\n", A[0], A);
    if (i < N)
    {
       B[i] = A[i] +1;
    }
}

int main(){
  gdr_t gdr_handle;
  void *gpu_buffer;
  gpu_mem_handle_t mhandle;
  void *gpu_map_addr = NULL;
  gdr_mh_t mh;
  cudaError_t cuda_status;
  CUdevice dev;
  CUcontext dev_ctx;
  ASSERTDRV(cuInit(0));
  ASSERTDRV(cuDeviceGet(&dev, 0));
  ASSERTDRV(cuDevicePrimaryCtxRetain(&dev_ctx, dev));
  ASSERTDRV(cuCtxSetCurrent(dev_ctx));
  ASSERTDRV(gpu_mem_alloc(&mhandle, SIZE, 1, 1));
  gdr_handle = gdr_open();
  if (!gdr_handle)
  {
      printf("RDMA initialize failed.\n");
      goto close_and_clear;
  }
  printf("RDMA initialize succeed.\n");
  if (gdr_pin_buffer(gdr_handle, mhandle.ptr, 2*size_bytes, 0, 0, &mh) != 0)
  {
      printf("*** ERROR: gdr_pin_buffer failed\n");
      goto close_and_clear;
  }


  if (gdr_map(gdr_handle, mh, &gpu_map_addr, 2*size_bytes) != 0)
  {
      printf("*** ERROR: gdr_map failed\n");
      gdr_unpin_buffer(gdr_handle, mh);
      goto close_and_clear;
  }
  DTYPE *a = (DTYPE *)malloc(size_bytes);
  DTYPE *b = (DTYPE *)malloc(size_bytes);
gdr_copy_to_mapping(mh, gpu_map_addr, a, size_bytes);
//***HOW TO CALL KERNAL FUNCTION?
//***cudaDeviceSynchronize() CAN BE CALLED TO WAITE?
gdr_copy_from_mapping(mh, gpu_map_addr+size_bytes, b, size_bytes);
close_and_clear:
    gdr_unmap(gdr_handle, mh, gpu_map_addr, SIZE);
    gdr_unpin_buffer(gdr_handle, mh);
    cudaFree(gpu_buffer);
    gdr_close(gdr_handle);
    free(buffer);
    return 0;
}


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant