Skip to content

Commit

Permalink
Fix a few PyTorch annotations (#176)
Browse files Browse the repository at this point in the history
* Re-enable dynamic cudaMemcpyAsync() memory copy kind in pt_opt
* Bug fix in cudnnGetConvolutionForwardAlgorithm_v7, cudnnGetConvolutionBackwardFilterAlgorithm_v7, cudnnGetConvolutionBackwardDataAlgorithm_v7, and cudaMemsetAsync

Co-authored-by: Element Green <[email protected]>
  • Loading branch information
yuhc and elementgreen authored Jun 17, 2021
1 parent dc88460 commit 48131fb
Showing 1 changed file with 24 additions and 8 deletions.
32 changes: 24 additions & 8 deletions cava/samples/pytorch/pt_opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1152,12 +1152,8 @@ cudaMemcpyToSymbol(const void *symbol, const void *src, size_t count, size_t off
__host__ __cudart_builtin__ cudaError_t CUDARTAPI
cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream)
{
/* TensorFlow always copies data between device memories */
ava_async;
ava_argument(dst) ava_opaque;
ava_argument(src) ava_opaque;

/*
ava_argument(dst) {
if (kind == cudaMemcpyHostToDevice) {
ava_opaque;
Expand All @@ -1175,7 +1171,6 @@ cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind ki
ava_opaque;
}
}
*/

ava_argument(stream) ava_handle;

Expand Down Expand Up @@ -7409,7 +7404,17 @@ cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnHandle_t handle,
int *returnedAlgoCount,
cudnnConvolutionBwdFilterAlgoPerf_t *perfResults)
{
ava_unsupported;
ava_argument(handle) ava_handle;
ava_argument(srcDesc) ava_handle;
ava_argument(diffDesc) ava_handle;
ava_argument(convDesc) ava_handle;
ava_argument(gradDesc) ava_handle;
ava_argument(returnedAlgoCount) {
ava_out; ava_buffer(1);
}
ava_argument(perfResults) {
ava_out; cu_in_out_buffer(requestedAlgoCount, returnedAlgoCount);
}
}

/*
Expand Down Expand Up @@ -7533,7 +7538,17 @@ cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
int *returnedAlgoCount,
cudnnConvolutionBwdDataAlgoPerf_t *perfResults)
{
ava_unsupported;
ava_argument(handle) ava_handle;
ava_argument(filterDesc) ava_handle;
ava_argument(diffDesc) ava_handle;
ava_argument(convDesc) ava_handle;
ava_argument(gradDesc) ava_handle;
ava_argument(returnedAlgoCount) {
ava_out; ava_buffer(1);
}
ava_argument(perfResults) {
ava_out; cu_in_out_buffer(requestedAlgoCount, returnedAlgoCount);
}
}

/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
Expand Down Expand Up @@ -22397,7 +22412,8 @@ __host__ cudaError_t CUDARTAPI cudaMemset3D(struct cudaPitchedPtr pitchedDevPtr,

__host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync(void *devPtr, int value, size_t count, cudaStream_t stream __dv(0))
{
ava_unsupported;
ava_argument(devPtr) ava_opaque;
ava_argument(stream) ava_handle;
}

__host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream __dv(0))
Expand Down

0 comments on commit 48131fb

Please sign in to comment.