-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #294 from DrTimothyAldenDavis/dev2
9.2.0
- Loading branch information
Showing
97 changed files
with
15,326 additions
and
14,046 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
//------------------------------------------------------------------------------ | ||
// GB_cuda_apply.hpp: CPU definitions for CUDA apply operations | ||
//------------------------------------------------------------------------------ | ||
|
||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//------------------------------------------------------------------------------ | ||
|
||
#ifndef GB_CUDA_EWISE_H | ||
#define GB_CUDA_EWISE_H | ||
|
||
#include "GB_cuda.hpp" | ||
|
||
GrB_Info GB_cuda_apply_unop_jit | ||
( | ||
// output: | ||
GB_void *Cx, | ||
// input: | ||
const GrB_Type ctype, | ||
const GB_Operator op, | ||
const bool flipij, | ||
const GrB_Matrix A, | ||
const GB_void *ythunk, | ||
// CUDA stream and launch parameters: | ||
cudaStream_t stream, | ||
int32_t gridsz, | ||
int32_t blocksz | ||
) ; | ||
|
||
GrB_Info GB_cuda_apply_bind1st_jit | ||
( | ||
// output: | ||
GB_void *Cx, | ||
// input: | ||
const GrB_Type ctype, | ||
const GrB_BinaryOp op, | ||
const GrB_Matrix A, | ||
const GB_void *xscalar, | ||
// CUDA stream and launch parameters: | ||
cudaStream_t stream, | ||
int32_t gridsz, | ||
int32_t blocksz | ||
) ; | ||
|
||
GrB_Info GB_cuda_apply_bind2nd_jit | ||
( | ||
// output: | ||
GB_void *Cx, | ||
// input: | ||
const GrB_Type ctype, | ||
const GrB_BinaryOp op, | ||
const GrB_Matrix A, | ||
const GB_void *xscalar, | ||
// CUDA stream and launch parameters: | ||
cudaStream_t stream, | ||
int32_t gridsz, | ||
int32_t blocksz | ||
) ; | ||
|
||
#endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#include "GB_cuda_apply.hpp" | ||
|
||
extern "C" | ||
{ | ||
typedef GB_JIT_CUDA_KERNEL_APPLY_BIND1ST_PROTO ((*GB_jit_dl_function)) ; | ||
} | ||
|
||
|
||
GrB_Info GB_cuda_apply_bind1st_jit | ||
( | ||
// output: | ||
GB_void *Cx, | ||
// input: | ||
const GrB_Type ctype, | ||
const GrB_BinaryOp op, | ||
const GrB_Matrix A, | ||
const GB_void *scalarx, | ||
// CUDA stream and launch parameters: | ||
cudaStream_t stream, | ||
int32_t gridsz, | ||
int32_t blocksz | ||
) | ||
{ | ||
//-------------------------------------------------------------------------- | ||
// encodify the problem | ||
//-------------------------------------------------------------------------- | ||
|
||
GB_jit_encoding encoding ; | ||
char *suffix ; | ||
uint64_t hash = GB_encodify_ewise (&encoding, &suffix, | ||
GB_JIT_CUDA_KERNEL_APPLYBIND1, false, false, false, GxB_FULL, ctype, | ||
NULL, false, false, op, false, NULL, A) ; | ||
|
||
//-------------------------------------------------------------------------- | ||
// get the kernel function pointer, loading or compiling it if needed | ||
//-------------------------------------------------------------------------- | ||
|
||
void *dl_function ; | ||
GrB_Info info = GB_jitifyer_load (&dl_function, | ||
GB_jit_ewise_family, "cuda_apply_bind1st", | ||
hash, &encoding, suffix, NULL, NULL, | ||
(GB_Operator) op, ctype, NULL, A->type) ; | ||
if (info != GrB_SUCCESS) { | ||
return (info) ; | ||
} | ||
|
||
//-------------------------------------------------------------------------- | ||
// call the jit kernel and return result | ||
//-------------------------------------------------------------------------- | ||
|
||
GB_jit_dl_function GB_jit_kernel = (GB_jit_dl_function) dl_function ; | ||
return (GB_jit_kernel (Cx, scalarx, A, stream, gridsz, blocksz)) ; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#include "GB_cuda_apply.hpp" | ||
|
||
extern "C" | ||
{ | ||
typedef GB_JIT_CUDA_KERNEL_APPLY_BIND2ND_PROTO ((*GB_jit_dl_function)) ; | ||
} | ||
|
||
|
||
GrB_Info GB_cuda_apply_bind2nd_jit | ||
( | ||
// output: | ||
GB_void *Cx, | ||
// input: | ||
const GrB_Type ctype, | ||
const GrB_BinaryOp op, | ||
const GrB_Matrix A, | ||
const GB_void *scalarx, | ||
// CUDA stream and launch parameters: | ||
cudaStream_t stream, | ||
int32_t gridsz, | ||
int32_t blocksz | ||
) | ||
{ | ||
//-------------------------------------------------------------------------- | ||
// encodify the problem | ||
//-------------------------------------------------------------------------- | ||
|
||
GB_jit_encoding encoding ; | ||
char *suffix ; | ||
uint64_t hash = GB_encodify_ewise (&encoding, &suffix, | ||
GB_JIT_CUDA_KERNEL_APPLYBIND2, false, false, false, GxB_FULL, ctype, | ||
NULL, false, false, op, false, A, NULL) ; | ||
|
||
//-------------------------------------------------------------------------- | ||
// get the kernel function pointer, loading or compiling it if needed | ||
//-------------------------------------------------------------------------- | ||
|
||
void *dl_function ; | ||
GrB_Info info = GB_jitifyer_load (&dl_function, | ||
GB_jit_ewise_family, "cuda_apply_bind2nd", | ||
hash, &encoding, suffix, NULL, NULL, | ||
(GB_Operator) op, ctype, A->type, NULL) ; | ||
if (info != GrB_SUCCESS){ | ||
return (info) ; | ||
} | ||
|
||
//-------------------------------------------------------------------------- | ||
// call the jit kernel and return result | ||
//-------------------------------------------------------------------------- | ||
|
||
GB_jit_dl_function GB_jit_kernel = (GB_jit_dl_function) dl_function ; | ||
return (GB_jit_kernel (Cx, A, scalarx, stream, gridsz, blocksz)) ; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#include "GB_cuda_apply.hpp" | ||
|
||
#undef GB_FREE_WORKSPACE | ||
#define GB_FREE_WORKSPACE \ | ||
{ \ | ||
GB_FREE_WORK (&scalarx_cuda, scalarx_cuda_size) ; \ | ||
} | ||
|
||
#undef GB_FREE_ALL | ||
#define GB_FREE_ALL ; | ||
|
||
#define BLOCK_SIZE 512 | ||
#define LOG2_BLOCK_SIZE 9 | ||
|
||
GrB_Info GB_cuda_apply_binop | ||
( | ||
GB_void *Cx, | ||
const GrB_Type ctype, | ||
const GrB_BinaryOp op, | ||
const GrB_Matrix A, | ||
const GB_void *scalarx, | ||
const bool bind1st | ||
) | ||
{ | ||
ASSERT (scalarx != NULL) ; | ||
// make a copy of scalarx to ensure it's not on the CPU stack | ||
GB_void *scalarx_cuda = NULL ; | ||
size_t scalarx_cuda_size = 0 ; | ||
if (bind1st) | ||
{ | ||
scalarx_cuda = GB_MALLOC_WORK (op->xtype->size, GB_void, &scalarx_cuda_size) ; | ||
} | ||
else | ||
{ | ||
scalarx_cuda = GB_MALLOC_WORK (op->ytype->size, GB_void, &scalarx_cuda_size) ; | ||
} | ||
if (scalarx_cuda == NULL) | ||
{ | ||
return (GrB_OUT_OF_MEMORY) ; | ||
} | ||
memcpy (scalarx_cuda, scalarx, scalarx_cuda_size) ; | ||
|
||
// FIXME: use the stream pool | ||
cudaStream_t stream ; | ||
CUDA_OK (cudaStreamCreate (&stream)) ; | ||
|
||
GrB_Index anz = GB_nnz_held (A) ; | ||
|
||
int32_t gridsz = GB_ICEIL (anz, BLOCK_SIZE) ; | ||
|
||
GrB_Info info ; | ||
if (bind1st) { | ||
info = GB_cuda_apply_bind1st_jit (Cx, ctype, op, A, | ||
scalarx_cuda, stream, gridsz, BLOCK_SIZE) ; | ||
} else { | ||
info = GB_cuda_apply_bind2nd_jit (Cx, ctype, op, A, | ||
scalarx_cuda, stream, gridsz, BLOCK_SIZE) ; | ||
} | ||
|
||
if (info == GrB_NO_VALUE) info = GrB_PANIC ; | ||
GB_OK (info) ; | ||
|
||
CUDA_OK (cudaStreamSynchronize (stream)) ; | ||
CUDA_OK (cudaStreamDestroy (stream)) ; | ||
|
||
GB_FREE_WORKSPACE ; | ||
return GrB_SUCCESS ; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#include "GraphBLAS_cuda.hpp" | ||
#include "GB_cuda.hpp" | ||
|
||
bool GB_cuda_apply_binop_branch | ||
( | ||
const GrB_Type ctype, | ||
const GrB_BinaryOp op, | ||
const GrB_Matrix A | ||
) | ||
{ | ||
bool ok = GB_cuda_type_branch (ctype) && GB_cuda_type_branch (A->type) ; | ||
|
||
if (op->xtype != NULL) | ||
{ | ||
ok = ok && GB_cuda_type_branch (op->xtype) ; | ||
} | ||
if (op->ytype != NULL) | ||
{ | ||
ok = ok && GB_cuda_type_branch (op->ytype) ; | ||
} | ||
if (op->ztype != NULL) | ||
{ | ||
ok = ok && GB_cuda_type_branch (op->ztype) ; | ||
} | ||
|
||
ok = ok && (op != NULL && op->hash != UINT64_MAX) ; | ||
|
||
return (ok) ; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#include "GB_cuda_apply.hpp" | ||
|
||
#undef GB_FREE_WORKSPACE | ||
#define GB_FREE_WORKSPACE \ | ||
{ \ | ||
GB_FREE_WORK (&ythunk_cuda, ythunk_cuda_size) ; \ | ||
} | ||
|
||
#undef GB_FREE_ALL | ||
#define GB_FREE_ALL ; | ||
|
||
#define BLOCK_SIZE 512 | ||
#define LOG2_BLOCK_SIZE 9 | ||
|
||
GrB_Info GB_cuda_apply_unop | ||
( | ||
GB_void *Cx, | ||
const GrB_Type ctype, | ||
const GB_Operator op, | ||
const bool flipij, | ||
const GrB_Matrix A, | ||
const GB_void *ythunk | ||
) | ||
{ | ||
|
||
GB_void *ythunk_cuda = NULL ; | ||
size_t ythunk_cuda_size = 0 ; | ||
if (ythunk != NULL && op != NULL && op->ytype != NULL) | ||
{ | ||
// make a copy of ythunk, since ythunk might be allocated on | ||
// the CPU stack and thus not accessible to the CUDA kernel. | ||
ythunk_cuda = GB_MALLOC_WORK (op->ytype->size, GB_void, &ythunk_cuda_size) ; | ||
if (ythunk_cuda == NULL) | ||
{ | ||
return (GrB_OUT_OF_MEMORY) ; | ||
} | ||
memcpy (ythunk_cuda, ythunk, op->ytype->size) ; | ||
} | ||
|
||
// FIXME: use the stream pool | ||
cudaStream_t stream ; | ||
CUDA_OK (cudaStreamCreate (&stream)) ; | ||
|
||
GrB_Index anz = GB_nnz_held (A) ; | ||
|
||
int32_t gridsz = GB_ICEIL (anz, BLOCK_SIZE) ; | ||
|
||
GrB_Info info = GB_cuda_apply_unop_jit (Cx, ctype, op, flipij, A, | ||
ythunk_cuda, stream, gridsz, BLOCK_SIZE) ; | ||
|
||
if (info == GrB_NO_VALUE) info = GrB_PANIC ; | ||
GB_OK (info) ; | ||
|
||
CUDA_OK (cudaStreamSynchronize (stream)) ; | ||
CUDA_OK (cudaStreamDestroy (stream)) ; | ||
|
||
GB_FREE_WORKSPACE ; | ||
return GrB_SUCCESS ; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#include "GraphBLAS_cuda.hpp" | ||
#include "GB_cuda.hpp" | ||
|
||
bool GB_cuda_apply_unop_branch | ||
( | ||
const GrB_Type ctype, | ||
const GrB_Matrix A, | ||
const GB_Operator op | ||
) | ||
{ | ||
bool ok = (GB_cuda_type_branch (ctype) && GB_cuda_type_branch (A->type)) | ||
&& (op != NULL && op->hash != UINT64_MAX); | ||
|
||
if (!ok) | ||
{ | ||
return false ; | ||
} | ||
return true ; | ||
} |
Oops, something went wrong.