diff --git a/src/Makefile b/src/Makefile index 9408053..bf75e0e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -21,7 +21,14 @@ CL_LIB=cl-crypt CHACHA_IF_DIR:=crypt-if -CL_HEADER_DIR:=opencl-platform +# for Windows NVIDIA cygwin sample +#CL_HEADER_DIR:=/cygdrive/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.0/include/ +#CL_LIBRARY_DIR:=/cygdrive/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.0/lib/x64 +# for Ubuntu Linux AMD sample +#CL_HEADER_DIR:=/opt/amdgpu-pro/include/ +#CL_LIBRARY_DIR:=/opt/amdgpu-pro/lib/x86_64-linux-gnu/ + +CL_PLATFORM_DIR:=opencl-platform CUDA_HEADER_DIR:=cuda-headers CUDA_SHA256_DIR:=cuda-sha256 @@ -29,14 +36,20 @@ CUDA_SHA256_DIR:=cuda-sha256 CUDA_DIR ?= /usr/local/cuda CXX ?= g++ +CFLAGS+=-DENDIAN_NEUTRAL -DLTC_NO_ASM -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR) + +# some instances require optimisations disabled (e.g. AMD on Linux) +# remove "-cl-opt-disable" for default Optimisations to take place +CL_DEVICE_CFLAGS=-cl-opt-disable CFLAGS_COMMON:=-DENDIAN_NEUTRAL -DLTC_NO_ASM -I$(CHACHA_IF_DIR) CFLAGS+=$(CFLAGS_COMMON) -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR) #use -DUSE_RDTSC for Windows compilation CL_CFLAGS_common:=-fPIC -std=c++11 $(CFLAGS_COMMON) -DOPENCL_VARIANT \ - -I$(CL_HEADER_DIR) -Icommon/ \ - -I$(CUDA_DIR)/targets/x86_64-linux/include $(HOST_CFLAGS) + -DCL_DEVICE_CFLAGS="\"$(CL_DEVICE_CFLAGS)\"" \ + -I$(CL_PLATFORM_DIR) -Icommon/ \ + -I$(CUDA_DIR)/targets/x86_64-linux/include CL_CFLAGS_release:=$(CL_CFLAGS_common) -O3 CL_CFLAGS_debug:=$(CL_CFLAGS_common) -O0 -g CL_CFLAGS:=$(CL_CFLAGS_$V) @@ -127,14 +140,20 @@ $V/poh_verify.o: $(POH_SRCS) CL_CPU_GPU_OBJS=$(addprefix $V/,cl_init_platform.o cl_verify.o cl_gpu_ctx.o cl_sign.o cl_chacha.o cl_poh_verify.o) -$V/lib$(CL_LIB).so: $(CL_CPU_GPU_OBJS) - $(CXX) -shared $^ -lOpenCL -o $@ +$V/cl_crypt-dlink.o: $(CL_CPU_GPU_OBJS) + ar rvs $@ $^ + +$V/lib$(CL_LIB).a: $V/cl_crypt-dlink.o $(CL_CPU_GPU_OBJS) + ar rcs $@ $^ + +$V/lib$(CL_LIB).so: $V/cl_crypt-dlink.o $(CL_CPU_GPU_OBJS) + $(CXX) -shared --shared $^ -o $@ $V/cl_ecc_main.o: $(CL_ECC_DIR)/main.cpp $(ECC_DIR)/ed25519.h @mkdir -p $(@D) $(CXX) $(CL_CFLAGS) -pthread -I$(ECC_DIR) -c $< -o $@ -$V/$(CL_ECC_TEST_BIN): $V/cl_ecc_main.o $V/lib$(CL_LIB).so +$V/$(CL_ECC_TEST_BIN): $V/cl_ecc_main.o $V/lib$(CL_LIB).a $(CXX) $(CL_CFLAGS) -L$(CUDA_DIR)/lib64 -L$V -pthread $< -l$(CL_LIB) -lOpenCL -o $@ CPU_GPU_OBJS=$(addprefix $V/,chacha_cbc.o aes_cbc.o verify.o poh_verify.o gpu_ctx.o sign.o seed.o keypair.o) diff --git a/src/opencl-ecc-ed25519/main.cpp b/src/opencl-ecc-ed25519/main.cpp index 776b4e3..bcd0847 100644 --- a/src/opencl-ecc-ed25519/main.cpp +++ b/src/opencl-ecc-ed25519/main.cpp @@ -10,7 +10,6 @@ #include "gpu_common.h" #include "gpu_ctx.h" -#define USE_CLOCK_GETTIME #include "perftime.h" #define PACKET_SIZE 512 @@ -100,14 +99,12 @@ int main(int argc, const char* argv[]) { } } - if ((argc - arg) != 6) { - printf("usage: %s [-v] \n", argv[0]); + if ((argc - arg) < 6 || (argc - arg) > 8) { + printf("usage: %s [-v] \n", argv[0]); return 1; } ed25519_set_verbose(verbose); - - DIE(cl_check_init(CL_DEVICE_TYPE_GPU) == false, "OpenCL could not be init"); int num_signatures_per_elem = strtol(argv[arg++], NULL, 10); if (num_signatures_per_elem <= 0) { @@ -145,6 +142,17 @@ int main(int argc, const char* argv[]) { return 1; } + if(argc >= 8) { + query_platform_id = strtol(argv[arg++], NULL, 10); + } + + if(argc >= 9) { + query_device_id = strtol(argv[arg++], NULL, 10); + } + + DIE(cl_check_init() == false, "OpenCL could not be init"); + + LOG("OpenCL init has finished\n"); LOG("streamer size: %zu elems size: %zu\n", sizeof(streamer_Packet), sizeof(gpu_Elems)); std::vector vctx = std::vector(num_threads); @@ -331,14 +339,17 @@ int main(int argc, const char* argv[]) { for (int thread = 0; thread < num_threads; thread++) { LOG("ret:\n"); + int verify_ok = out_size / (int)sizeof(uint8_t); bool verify_failed = false; for (int i = 0; i < out_size / (int)sizeof(uint8_t); i++) { if (vctx[thread].out_h[i] != 1) { verify_failed = true; + verify_ok--; } } LOG("\n"); fflush(stdout); + printf("Verify OK: %d / %d\n", verify_ok, out_size / (int)sizeof(uint8_t)); assert(verify_failed == false); } diff --git a/src/opencl-ecc-ed25519/verify.cpp b/src/opencl-ecc-ed25519/verify.cpp index 457fddd..8faa8f4 100644 --- a/src/opencl-ecc-ed25519/verify.cpp +++ b/src/opencl-ecc-ed25519/verify.cpp @@ -16,7 +16,6 @@ #include "gpu_common.h" #include "gpu_ctx.h" -#define USE_CLOCK_GETTIME #include "perftime.h" static int consttime_equal(const unsigned char *x, const unsigned char *y) { @@ -101,8 +100,6 @@ int ed25519_verify(const unsigned char *signature, return ed25519_verify_device(signature, message, message_len, public_key); } -bool g_verbose = true; - void ed25519_set_verbose(bool val) { g_verbose = val; } diff --git a/src/opencl-platform/cl_common.h b/src/opencl-platform/cl_common.h index 9ca8db2..e9e6cbc 100644 --- a/src/opencl-platform/cl_common.h +++ b/src/opencl-platform/cl_common.h @@ -36,6 +36,9 @@ extern bool g_verbose; #include #endif +extern cl_uint query_device_id; +extern cl_uint query_platform_id; + // runs at the start of any OpenCL entry point crypto function bool cl_check_init(cl_uint sel_device_type); bool cl_check_init(void); diff --git a/src/opencl-platform/cl_init_platform.cpp b/src/opencl-platform/cl_init_platform.cpp index d10b469..7e71df8 100644 --- a/src/opencl-platform/cl_init_platform.cpp +++ b/src/opencl-platform/cl_init_platform.cpp @@ -9,6 +9,10 @@ #include "cl_common.h" cl_uint query_device_type = CL_DEVICE_TYPE_ALL; + +cl_uint query_platform_id = 0; +cl_uint query_device_id = 0; + bool cl_is_init = false; cl_context context; @@ -230,6 +234,8 @@ bool cl_check_init(void) { CL_ERR( clGetPlatformIDs(platform_num, platform_list, NULL)); cout << "Platforms found: " << platform_num << endl; + bool dev_selected = false; + /* list all platforms and VENDOR/VERSION properties */ for (cl_uint platf = 0; platf < platform_num; platf++) { /* get attribute CL_PLATFORM_VENDOR */ @@ -290,15 +296,21 @@ bool cl_check_init(void) { /* select device based on cli arguments */ string tmpAttrData = attr_data; - - // always select last device of type GPU - device = device_list[dev]; + + if((dev == query_device_id) && + (platf == query_platform_id)) { + device = device_list[dev]; + cout << "<----- SELECTED"; + dev_selected = true; + } delete[] attr_data; cout << endl; } } + DIE(dev_selected == false, "no platform or device selected"); + // clean delete[] platform_list; delete[] device_list; @@ -318,7 +330,7 @@ bool cl_check_init(void) { *************************************************/ #ifdef KERNELS_SHA256 - cout << "Compiling sha256 kernels" << endl; + cout << "Compiling sha256 kernels, FLAGS: " << CL_DEVICE_CFLAGS << endl; /* retrieve kernel source */ kernel_src = kernels_sha256_src; @@ -331,7 +343,7 @@ bool cl_check_init(void) { CL_ERR( ret ); /* compile the program for the given set of devices */ - ret = clBuildProgram(program, 1, &device, "-DENDIAN_NEUTRAL -DLTC_NO_ASM", NULL, NULL); + ret = clBuildProgram(program, 1, &device, CL_DEVICE_CFLAGS, NULL, NULL); CL_COMPILE_ERR( ret, program, device ); init_sha256_state_kernel = clCreateKernel(program, "init_sha256_state_kernel", &ret); @@ -346,7 +358,7 @@ bool cl_check_init(void) { *************************************************/ #ifdef KERNELS_PRECOMP_DATA - cout << "Compiling verify kernels" << endl; + cout << "Compiling verify kernels, FLAGS: " << CL_DEVICE_CFLAGS << endl; /* retrieve kernel source */ kernel_src = kernels_precomp_data_src; @@ -360,7 +372,7 @@ bool cl_check_init(void) { CL_ERR( ret ); /* compile the program for the given set of devices */ - ret = clBuildProgram(program, 1, &device, "-DENDIAN_NEUTRAL -DLTC_NO_ASM", NULL, NULL); + ret = clBuildProgram(program, 1, &device, CL_DEVICE_CFLAGS, NULL, NULL); CL_COMPILE_ERR( ret, program, device ); ed25519_sign_kernel = clCreateKernel(program, "ed25519_sign_kernel", &ret); diff --git a/src/opencl-platform/kernels_sha256.h b/src/opencl-platform/kernels_sha256.h index 374a2bd..04c32c0 100644 --- a/src/opencl-platform/kernels_sha256.h +++ b/src/opencl-platform/kernels_sha256.h @@ -1,5 +1,8 @@ const char *kernels_sha256_src = R""""( +#define ENDIAN_NEUTRAL +#define LTC_NO_ASM + #define uint64_t ulong #define uint32_t uint #define uint16_t ushort diff --git a/src/opencl-platform/kernels_verify.h b/src/opencl-platform/kernels_verify.h index c35b748..430df9d 100644 --- a/src/opencl-platform/kernels_verify.h +++ b/src/opencl-platform/kernels_verify.h @@ -1,5 +1,8 @@ const char *kernels_verify_src = R""""( +#define ENDIAN_NEUTRAL +#define LTC_NO_ASM + //////////////////////////////////////////////////////////////////////////////////// int sha512_init(sha512_context * md); @@ -1275,13 +1278,6 @@ void ED25519_DECLSPEC ed25519_create_keypair(unsigned char *public_key, unsigned void ED25519_DECLSPEC ed25519_sign(__global unsigned char *signature, __global const unsigned char *message, uint32_t message_len, __global const unsigned char *public_key, __global const unsigned char *private_key); int ED25519_DECLSPEC ed25519_verify(__global const unsigned char *signature, __global const unsigned char *message, uint32_t message_len, __global const unsigned char *public_key); void ED25519_DECLSPEC ed25519_verify_many(const gpu_Elems* elems, uint32_t num_elems, uint32_t message_size, uint32_t total_packets, uint32_t total_signatures, const uint32_t* message_lens, const uint32_t* public_key_offset, const uint32_t* signature_offset, const uint32_t* message_start_offset, uint8_t* out, uint8_t use_non_default_stream); -void ED25519_DECLSPEC ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar); -void ED25519_DECLSPEC ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key); -void ED25519_DECLSPEC ed25519_free_gpu_mem(); -void ED25519_DECLSPEC ed25519_set_verbose(bool val); - -const char* ED25519_DECLSPEC ed25519_license(); -bool ED25519_DECLSPEC ed25519_init(); #ifdef __cplusplus }