diff --git a/driver/others/memory.c b/driver/others/memory.c index 72d3e173cf..2e185593e8 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1603,9 +1603,11 @@ void gotoblas_dummy_for_PGI(void) { #endif #else +/* USE_TLS / COMPILE_TLS not set */ + #include -#ifdef OS_WINDOWS +#if defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT) #define ALLOC_WINDOWS #ifndef MEM_LARGE_PAGES #define MEM_LARGE_PAGES 0x20000000 @@ -1619,7 +1621,7 @@ void gotoblas_dummy_for_PGI(void) { #include #include -#ifndef OS_WINDOWS +#if !defined(OS_WINDOWS) || defined(OS_CYGWIN_NT) #include #ifndef NO_SYSV_IPC #include @@ -1639,7 +1641,7 @@ void gotoblas_dummy_for_PGI(void) { #include #endif -#if defined(OS_FREEBSD) || defined(OS_DARWIN) +#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) #include #include #endif @@ -1678,9 +1680,12 @@ void gotoblas_dummy_for_PGI(void) { #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) #define CONSTRUCTOR __attribute__ ((constructor)) #define DESTRUCTOR __attribute__ ((destructor)) -#else +#elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) #define CONSTRUCTOR __attribute__ ((constructor(101))) #define DESTRUCTOR __attribute__ ((destructor(101))) +#else +#define CONSTRUCTOR __attribute__ ((constructor)) +#define DESTRUCTOR __attribute__ ((destructor)) #endif #ifdef DYNAMIC_ARCH @@ -1740,7 +1745,8 @@ int i,n; size = CPU_ALLOC_SIZE(nums); ret = sched_getaffinity(0,size,cpusetp); if (ret!=0) return nums; - nums = CPU_COUNT_S(size,cpusetp); + ret = CPU_COUNT_S(size,cpusetp); + if (ret > 0 && ret < nums) nums = ret; CPU_FREE(cpusetp); return nums; #endif @@ -1756,7 +1762,7 @@ int get_num_procs(void) { return nums; } #endif - + #ifdef OS_HAIKU int get_num_procs(void) { static int nums = 0; @@ -1793,7 +1799,7 @@ int get_num_procs(void) { #endif -#if defined(OS_FREEBSD) +#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) int get_num_procs(void) { @@ -1870,7 +1876,7 @@ void openblas_fork_handler() // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035 // In the mean time build with USE_OPENMP=0 or link against another // implementation of OpenMP. -#if !(defined(OS_WINDOWS) || defined(OS_ANDROID)) && defined(SMP_SERVER) +#if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER) int err; err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL); if(err != 0) @@ -1883,7 +1889,7 @@ extern int openblas_goto_num_threads_env(); extern int openblas_omp_num_threads_env(); int blas_get_cpu_number(void){ -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) int max_num; #endif int blas_goto_num = 0; @@ -1891,11 +1897,11 @@ int blas_get_cpu_number(void){ if (blas_num_threads) return blas_num_threads; -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) max_num = get_num_procs(); #endif - blas_goto_num = 0; + // blas_goto_num = 0; #ifndef USE_OPENMP blas_goto_num=openblas_num_threads_env(); if (blas_goto_num < 0) blas_goto_num = 0; @@ -1907,7 +1913,7 @@ int blas_get_cpu_number(void){ #endif - blas_omp_num = 0; + // blas_omp_num = 0; blas_omp_num=openblas_omp_num_threads_env(); if (blas_omp_num < 0) blas_omp_num = 0; @@ -1915,7 +1921,7 @@ int blas_get_cpu_number(void){ else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; else blas_num_threads = MAX_CPU_NUMBER; -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) if (blas_num_threads > max_num) blas_num_threads = max_num; #endif @@ -2002,11 +2008,15 @@ static void *alloc_mmap(void *address){ } if (map_address != (void *)-1) { +#if defined(SMP) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); +#endif release_info[release_pos].address = map_address; release_info[release_pos].func = alloc_mmap_free; release_pos ++; +#if defined(SMP) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); +#endif } #ifdef OS_LINUX @@ -2148,14 +2158,18 @@ static void *alloc_mmap(void *address){ #if defined(OS_LINUX) && !defined(NO_WARMUP) } #endif - LOCK_COMMAND(&alloc_lock); if (map_address != (void *)-1) { +#if defined(SMP) && !defined(USE_OPENMP) + LOCK_COMMAND(&alloc_lock); +#endif release_info[release_pos].address = map_address; release_info[release_pos].func = alloc_mmap_free; release_pos ++; +#if defined(SMP) && !defined(USE_OPENMP) + UNLOCK_COMMAND(&alloc_lock); +#endif } - UNLOCK_COMMAND(&alloc_lock); return map_address; } @@ -2554,6 +2568,11 @@ void *blas_memory_alloc(int procpos){ NULL, }; void *(**func)(void *address); + +#if defined(USE_OPENMP) + if (!memory_initialized) { +#endif + LOCK_COMMAND(&alloc_lock); if (!memory_initialized) { @@ -2589,6 +2608,9 @@ void *blas_memory_alloc(int procpos){ } UNLOCK_COMMAND(&alloc_lock); +#if defined(USE_OPENMP) + } +#endif #ifdef DEBUG printf("Alloc Start ...\n"); @@ -2603,13 +2625,17 @@ void *blas_memory_alloc(int procpos){ do { if (!memory[position].used && (memory[position].pos == mypos)) { +#if defined(SMP) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -// blas_lock(&memory[position].lock); - +#else + blas_lock(&memory[position].lock); +#endif if (!memory[position].used) goto allocation; - +#if defined(SMP) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); -// blas_unlock(&memory[position].lock); +#else + blas_unlock(&memory[position].lock); +#endif } position ++; @@ -2647,7 +2673,6 @@ void *blas_memory_alloc(int procpos){ memory[position].used = 1; UNLOCK_COMMAND(&alloc_lock); -/* blas_unlock(&memory[position].lock);*/ if (!memory[position].addr) { do { @@ -2693,9 +2718,13 @@ void *blas_memory_alloc(int procpos){ } while ((BLASLONG)map_address == -1); +#if defined(SMP) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); +#endif memory[position].addr = map_address; +#if defined(SMP) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); +#endif #ifdef DEBUG printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position); @@ -2749,8 +2778,9 @@ void blas_memory_free(void *free_area){ #endif position = 0; +#if defined(SMP) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); - +#endif while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) position++; @@ -2764,7 +2794,9 @@ void blas_memory_free(void *free_area){ WMB; memory[position].used = 0; +#if defined(SMP) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); +#endif #ifdef DEBUG printf("Unmap Succeeded.\n\n"); @@ -2779,8 +2811,9 @@ void blas_memory_free(void *free_area){ for (position = 0; position < NUM_BUFFERS; position++) printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used); #endif +#if defined(SMP) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); - +#endif return; }