diff --git a/src/Makefile b/src/Makefile
index 9408053..bf75e0e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -21,7 +21,14 @@ CL_LIB=cl-crypt
 
 CHACHA_IF_DIR:=crypt-if
 
-CL_HEADER_DIR:=opencl-platform
+# for Windows NVIDIA cygwin sample
+#CL_HEADER_DIR:=/cygdrive/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.0/include/
+#CL_LIBRARY_DIR:=/cygdrive/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.0/lib/x64
+# for Ubuntu Linux AMD sample
+#CL_HEADER_DIR:=/opt/amdgpu-pro/include/
+#CL_LIBRARY_DIR:=/opt/amdgpu-pro/lib/x86_64-linux-gnu/
+
+CL_PLATFORM_DIR:=opencl-platform
 
 CUDA_HEADER_DIR:=cuda-headers
 CUDA_SHA256_DIR:=cuda-sha256
@@ -29,14 +36,20 @@ CUDA_SHA256_DIR:=cuda-sha256
 CUDA_DIR ?= /usr/local/cuda
 
 CXX ?= g++
+CFLAGS+=-DENDIAN_NEUTRAL -DLTC_NO_ASM -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR)
+
+# some instances require optimisations disabled (e.g. AMD on Linux)
+# remove "-cl-opt-disable" for default Optimisations to take place
+CL_DEVICE_CFLAGS=-cl-opt-disable
 
 CFLAGS_COMMON:=-DENDIAN_NEUTRAL -DLTC_NO_ASM -I$(CHACHA_IF_DIR)
 CFLAGS+=$(CFLAGS_COMMON) -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR)
 
 #use -DUSE_RDTSC for Windows compilation
 CL_CFLAGS_common:=-fPIC -std=c++11 $(CFLAGS_COMMON) -DOPENCL_VARIANT \
-		  -I$(CL_HEADER_DIR) -Icommon/ \
-		  -I$(CUDA_DIR)/targets/x86_64-linux/include $(HOST_CFLAGS)
+		  -DCL_DEVICE_CFLAGS="\"$(CL_DEVICE_CFLAGS)\"" \
+		  -I$(CL_PLATFORM_DIR) -Icommon/ \
+		  -I$(CUDA_DIR)/targets/x86_64-linux/include
 CL_CFLAGS_release:=$(CL_CFLAGS_common) -O3
 CL_CFLAGS_debug:=$(CL_CFLAGS_common) -O0 -g
 CL_CFLAGS:=$(CL_CFLAGS_$V)
@@ -127,14 +140,20 @@ $V/poh_verify.o: $(POH_SRCS)
 
 CL_CPU_GPU_OBJS=$(addprefix $V/,cl_init_platform.o cl_verify.o cl_gpu_ctx.o cl_sign.o cl_chacha.o cl_poh_verify.o)
 
-$V/lib$(CL_LIB).so: $(CL_CPU_GPU_OBJS)
-	$(CXX) -shared $^ -lOpenCL -o $@
+$V/cl_crypt-dlink.o: $(CL_CPU_GPU_OBJS)
+	ar rvs $@ $^
+
+$V/lib$(CL_LIB).a: $V/cl_crypt-dlink.o $(CL_CPU_GPU_OBJS)
+	ar rcs $@ $^
+
+$V/lib$(CL_LIB).so: $V/cl_crypt-dlink.o $(CL_CPU_GPU_OBJS)
+	$(CXX) -shared --shared $^ -o $@
 
 $V/cl_ecc_main.o: $(CL_ECC_DIR)/main.cpp $(ECC_DIR)/ed25519.h
 	@mkdir -p $(@D)
 	$(CXX) $(CL_CFLAGS) -pthread -I$(ECC_DIR) -c $< -o $@
 
-$V/$(CL_ECC_TEST_BIN): $V/cl_ecc_main.o $V/lib$(CL_LIB).so
+$V/$(CL_ECC_TEST_BIN): $V/cl_ecc_main.o $V/lib$(CL_LIB).a
 	$(CXX) $(CL_CFLAGS) -L$(CUDA_DIR)/lib64 -L$V -pthread $< -l$(CL_LIB) -lOpenCL -o $@
 
 CPU_GPU_OBJS=$(addprefix $V/,chacha_cbc.o aes_cbc.o verify.o poh_verify.o gpu_ctx.o sign.o seed.o keypair.o)
diff --git a/src/opencl-ecc-ed25519/main.cpp b/src/opencl-ecc-ed25519/main.cpp
index 776b4e3..bcd0847 100644
--- a/src/opencl-ecc-ed25519/main.cpp
+++ b/src/opencl-ecc-ed25519/main.cpp
@@ -10,7 +10,6 @@
 #include "gpu_common.h"
 #include "gpu_ctx.h"
 
-#define USE_CLOCK_GETTIME
 #include "perftime.h"
 
 #define PACKET_SIZE 512
@@ -100,14 +99,12 @@ int main(int argc, const char* argv[]) {
         }
     }
 
-    if ((argc - arg) != 6) {
-        printf("usage: %s [-v] <num_signatures> <num_elems> <num_sigs_per_packet> <num_threads> <num_iterations> <use_non_default_stream>\n", argv[0]);
+    if ((argc - arg) < 6 || (argc - arg) > 8) {
+        printf("usage: %s [-v] <num_signatures> <num_elems> <num_sigs_per_packet> <num_threads> <num_iterations> <use_non_default_stream> <cl_platform_id> <cl_device_id>\n", argv[0]);
         return 1;
     }
 
     ed25519_set_verbose(verbose);
-	
-	DIE(cl_check_init(CL_DEVICE_TYPE_GPU) == false, "OpenCL could not be init");
 
     int num_signatures_per_elem = strtol(argv[arg++], NULL, 10);
     if (num_signatures_per_elem <= 0) {
@@ -145,6 +142,17 @@ int main(int argc, const char* argv[]) {
         return 1;
     }
 
+    if(argc >= 8) {
+	    query_platform_id = strtol(argv[arg++], NULL, 10);
+    }
+
+    if(argc >= 9) {
+        query_device_id = strtol(argv[arg++], NULL, 10);
+    }
+
+	DIE(cl_check_init() == false, "OpenCL could not be init");
+
+	LOG("OpenCL init has finished\n");
     LOG("streamer size: %zu elems size: %zu\n", sizeof(streamer_Packet), sizeof(gpu_Elems));
 
     std::vector<verify_cpu_ctx_t> vctx = std::vector<verify_cpu_ctx_t>(num_threads);
@@ -331,14 +339,17 @@ int main(int argc, const char* argv[]) {
 
     for (int thread = 0; thread < num_threads; thread++) {
         LOG("ret:\n");
+        int verify_ok = out_size / (int)sizeof(uint8_t);
         bool verify_failed = false;
         for (int i = 0; i < out_size / (int)sizeof(uint8_t); i++) {
             if (vctx[thread].out_h[i] != 1) {
                 verify_failed = true;
+                verify_ok--;
             }
         }
         LOG("\n");
         fflush(stdout);
+        printf("Verify OK: %d / %d\n", verify_ok, out_size / (int)sizeof(uint8_t));
         assert(verify_failed == false);
     }
 
diff --git a/src/opencl-ecc-ed25519/verify.cpp b/src/opencl-ecc-ed25519/verify.cpp
index 457fddd..8faa8f4 100644
--- a/src/opencl-ecc-ed25519/verify.cpp
+++ b/src/opencl-ecc-ed25519/verify.cpp
@@ -16,7 +16,6 @@
 #include "gpu_common.h"
 #include "gpu_ctx.h"
 
-#define USE_CLOCK_GETTIME
 #include "perftime.h"
 
 static int consttime_equal(const unsigned char *x, const unsigned char *y) {
@@ -101,8 +100,6 @@ int ed25519_verify(const unsigned char *signature,
     return ed25519_verify_device(signature, message, message_len, public_key);
 }
 
-bool g_verbose = true;
-
 void ed25519_set_verbose(bool val) {
     g_verbose = val;
 }
diff --git a/src/opencl-platform/cl_common.h b/src/opencl-platform/cl_common.h
index 9ca8db2..e9e6cbc 100644
--- a/src/opencl-platform/cl_common.h
+++ b/src/opencl-platform/cl_common.h
@@ -36,6 +36,9 @@ extern bool g_verbose;
    #include <CL/cl.h>
 #endif
 
+extern cl_uint query_device_id;
+extern cl_uint query_platform_id;
+
 // runs at the start of any OpenCL entry point crypto function
 bool cl_check_init(cl_uint sel_device_type);
 bool cl_check_init(void);
diff --git a/src/opencl-platform/cl_init_platform.cpp b/src/opencl-platform/cl_init_platform.cpp
index d10b469..7e71df8 100644
--- a/src/opencl-platform/cl_init_platform.cpp
+++ b/src/opencl-platform/cl_init_platform.cpp
@@ -9,6 +9,10 @@
 #include "cl_common.h"
 
 cl_uint query_device_type = CL_DEVICE_TYPE_ALL;
+
+cl_uint query_platform_id = 0;
+cl_uint query_device_id = 0;
+
 bool cl_is_init = false;
 
 cl_context context;
@@ -230,6 +234,8 @@ bool cl_check_init(void) {
     CL_ERR( clGetPlatformIDs(platform_num, platform_list, NULL));
     cout << "Platforms found: " << platform_num << endl;
 
+    bool dev_selected = false;
+
     /* list all platforms and VENDOR/VERSION properties */
     for (cl_uint platf = 0; platf < platform_num; platf++) {
         /* get attribute CL_PLATFORM_VENDOR */
@@ -290,15 +296,21 @@ bool cl_check_init(void) {
 
             /* select device based on cli arguments */
             string tmpAttrData = attr_data;
-            
-            // always select last device of type GPU
-            device = device_list[dev];
+
+            if((dev == query_device_id) && 
+                (platf == query_platform_id)) {
+                device = device_list[dev];
+                cout << "<----- SELECTED";
+                dev_selected = true;
+            }
 
             delete[] attr_data;
             cout << endl;
         }
     }
 
+    DIE(dev_selected == false, "no platform or device selected");
+
     // clean
     delete[] platform_list;
     delete[] device_list;
@@ -318,7 +330,7 @@ bool cl_check_init(void) {
     *************************************************/
     
 #ifdef KERNELS_SHA256
-    cout << "Compiling sha256 kernels" << endl;
+    cout << "Compiling sha256 kernels, FLAGS: " << CL_DEVICE_CFLAGS << endl;
 
     /* retrieve kernel source */
     kernel_src = kernels_sha256_src;
@@ -331,7 +343,7 @@ bool cl_check_init(void) {
     CL_ERR( ret );
 
     /* compile the program for the given set of devices */
-    ret = clBuildProgram(program, 1, &device, "-DENDIAN_NEUTRAL -DLTC_NO_ASM", NULL, NULL);
+    ret = clBuildProgram(program, 1, &device, CL_DEVICE_CFLAGS, NULL, NULL);
     CL_COMPILE_ERR( ret, program, device );
     
     init_sha256_state_kernel = clCreateKernel(program, "init_sha256_state_kernel", &ret);
@@ -346,7 +358,7 @@ bool cl_check_init(void) {
     *************************************************/
     
 #ifdef KERNELS_PRECOMP_DATA
-    cout << "Compiling verify kernels" << endl;
+    cout << "Compiling verify kernels, FLAGS: " << CL_DEVICE_CFLAGS << endl;
     
     /* retrieve kernel source */
     kernel_src = kernels_precomp_data_src;
@@ -360,7 +372,7 @@ bool cl_check_init(void) {
     CL_ERR( ret );
 
     /* compile the program for the given set of devices */
-    ret = clBuildProgram(program, 1, &device, "-DENDIAN_NEUTRAL -DLTC_NO_ASM", NULL, NULL);
+    ret = clBuildProgram(program, 1, &device, CL_DEVICE_CFLAGS, NULL, NULL);
     CL_COMPILE_ERR( ret, program, device );
     
 	ed25519_sign_kernel = clCreateKernel(program, "ed25519_sign_kernel", &ret);
diff --git a/src/opencl-platform/kernels_sha256.h b/src/opencl-platform/kernels_sha256.h
index 374a2bd..04c32c0 100644
--- a/src/opencl-platform/kernels_sha256.h
+++ b/src/opencl-platform/kernels_sha256.h
@@ -1,5 +1,8 @@
 const char *kernels_sha256_src = R""""(
 
+#define ENDIAN_NEUTRAL
+#define LTC_NO_ASM
+
 #define uint64_t    ulong
 #define uint32_t    uint
 #define uint16_t    ushort
diff --git a/src/opencl-platform/kernels_verify.h b/src/opencl-platform/kernels_verify.h
index c35b748..430df9d 100644
--- a/src/opencl-platform/kernels_verify.h
+++ b/src/opencl-platform/kernels_verify.h
@@ -1,5 +1,8 @@
 const char *kernels_verify_src = R""""(
 
+#define ENDIAN_NEUTRAL
+#define LTC_NO_ASM
+
 ////////////////////////////////////////////////////////////////////////////////////
 
 int sha512_init(sha512_context * md);
@@ -1275,13 +1278,6 @@ void ED25519_DECLSPEC ed25519_create_keypair(unsigned char *public_key, unsigned
 void ED25519_DECLSPEC ed25519_sign(__global unsigned char *signature, __global const unsigned char *message, uint32_t message_len, __global const unsigned char *public_key, __global const unsigned char *private_key);
 int ED25519_DECLSPEC ed25519_verify(__global const unsigned char *signature, __global const unsigned char *message, uint32_t message_len, __global const unsigned char *public_key);
 void ED25519_DECLSPEC ed25519_verify_many(const gpu_Elems* elems, uint32_t num_elems, uint32_t message_size, uint32_t total_packets, uint32_t total_signatures, const uint32_t* message_lens, const uint32_t* public_key_offset, const uint32_t* signature_offset, const uint32_t* message_start_offset, uint8_t* out, uint8_t use_non_default_stream);
-void ED25519_DECLSPEC ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar);
-void ED25519_DECLSPEC ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key);
-void ED25519_DECLSPEC ed25519_free_gpu_mem();
-void ED25519_DECLSPEC ed25519_set_verbose(bool val);
-
-const char* ED25519_DECLSPEC ed25519_license();
-bool ED25519_DECLSPEC ed25519_init();
 
 #ifdef __cplusplus
 }