Skip to content
This repository has been archived by the owner on Apr 24, 2022. It is now read-only.

Cuda11 #2254

Open
wants to merge 30 commits into
base: cuda11
Choose a base branch
from
Open

Cuda11 #2254

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
ecd7559
OpenCL: reduce GenerateDAG() time using shared __local memory
hackmod Sep 5, 2020
1536abb
config option to use system OpenCL (for ROCm users)
ianzur Sep 10, 2020
515df7a
fix epoch zero bug
hackmod Sep 11, 2020
a355fea
Merge pull request #2033 from hackmod/epoch-zero-bug
jean-m-cyr Sep 11, 2020
c118e83
Merge pull request #2032 from ianzur/sys-openCL-config-option
jean-m-cyr Sep 11, 2020
861087f
MESA define cl_amd_media_ops but no amd_bitalign() defined.
hackmod Sep 12, 2020
c934fdf
Merge pull request #2035 from hackmod/amd_bitalign-fix
jean-m-cyr Sep 13, 2020
6a8ed69
set PLATFORM as intended
hackmod Sep 17, 2020
e7369aa
Added rejected and failed share counts to the API's HTTP site
jclapis Sep 20, 2020
34f891b
Cleaned up the reported power numbers in the API's HTTP site
jclapis Sep 20, 2020
29fcb8b
Cleaned the reported power in the CLI display when HWMON is enabled
jclapis Sep 20, 2020
433ec1b
Fixed API HTTP power report so it properly truncates to 2 decimal places
jclapis Sep 20, 2020
c23dae1
Update POOL_EXAMPLES_ETH.md
OberstK Sep 29, 2020
66186f1
Merge pull request #2048 from OberstK/patch-1
jean-m-cyr Sep 29, 2020
cd75c13
Merge pull request #2030 from ethereum-mining/cuda11
ddobreff Oct 9, 2020
d6d4197
Update POOL_EXAMPLES_ETH.md
ibmua Oct 9, 2020
91d74ee
Merge pull request #2054 from ibmua/patch-1
jean-m-cyr Oct 9, 2020
9c6dc31
Merge pull request #2041 from jclapis/master
jean-m-cyr Oct 9, 2020
752100a
Readme.md: removed my ETH address
MariusVanDerWijden Oct 13, 2020
d5f3a97
Merge pull request #2057 from ethereum-mining/MariusVanDerWijden-patch-1
chfast Oct 13, 2020
8f48e43
Support Ampere graphic cards using Cuda 11.0/11.1
aloisklink Nov 23, 2020
beaeb00
Merge pull request #2082 from aloisklink/support-ampere
AndreaLanfranchi Nov 24, 2020
aaec223
Merge pull request #2023 from hackmod/opencl-dag
jean-m-cyr Jan 1, 2021
3a81f1d
Merge pull request #2038 from hackmod/mesa-clover
jean-m-cyr Jan 1, 2021
c41fcef
Update README.md
cryptomine Jan 8, 2021
18995fa
Merge pull request #2107 from cryptomine/patch-1
jean-m-cyr Jan 10, 2021
608f4de
Fix fastexit not working on some AMD GPUs
jean-m-cyr Jan 11, 2021
47ae149
Merge pull request #2111 from jean-m-cyr/master
ddobreff Jan 11, 2021
9cb460b
Avoiding buffer overflow in SHA3
Feb 8, 2021
ce52c74
Merge pull request #2147 from ekuznetsov139/master
jean-m-cyr Feb 10, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ option(ETHDBUS "Build with D-Bus support" OFF)
option(APICORE "Build with API Server support" ON)
option(BINKERN "Install AMD binary kernels" ON)
option(DEVBUILD "Log developer metrics" OFF)
option(USE_SYS_OPENCL "Build with system OpenCL" OFF)

# propagates CMake configuration options to the compiler
function(configureProject)
Expand All @@ -55,6 +56,9 @@ function(configureProject)
if (DEVBUILD)
add_definitions(-DDEV_BUILD)
endif()
if (USE_SYS_OPENCL)
add_definitions(-DUSE_SYS_OPENCL)
endif()
endfunction()

hunter_add_package(Boost COMPONENTS system filesystem thread)
Expand All @@ -68,6 +72,10 @@ find_package(ethash CONFIG REQUIRED)

configureProject()

if(APPLE)
set(USE_SYS_OPENCL ON)
endif()

message("----------------------------------------------------------------------------")
message("-- CMake ${CMAKE_VERSION}")
message("-- Build ${CMAKE_BUILD_TYPE} / ${CMAKE_SYSTEM_NAME}")
Expand All @@ -79,6 +87,7 @@ message("-- ETHDBUS Build D-Bus components ${ETHD
message("-- APICORE Build API Server components ${APICORE}")
message("-- BINKERN Install AMD binary kernels ${BINKERN}")
message("-- DEVBUILD Build with dev logging ${DEVBUILD}")
message("-- USE_SYS_OPENCL Build with system OpenCL ${USE_SYS_OPENCL}")
message("----------------------------------------------------------------------------")
message("")

Expand All @@ -87,7 +96,6 @@ if(UNIX AND NOT APPLE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libstdc++")
endif()


cable_add_buildinfo_library(PROJECT_NAME ${PROJECT_NAME})

add_subdirectory(libdevcore)
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ Ordered alphabetically. [Contributors statistics since 2015-08-20].
| EoD | [@EoD](https://github.com/EoD) | |
| Genoil | [@Genoil](https://github.com/Genoil) | |
| goobur | [@goobur](https://github.com/goobur) | |
| Marius van der Wijden | [@MariusVanDerWijden](https://github.com/MariusVanDerWijden) | ETH: 0x57d22b967c9dc64e5577f37edf1514c2d8985099 |
| Marius van der Wijden | [@MariusVanDerWijden](https://github.com/MariusVanDerWijden) | |
| Paweł Bylica | [@chfast](https://github.com/chfast) | |
| Philipp Andreas | [@smurfy](https://github.com/smurfy) | |
| Stefan Oberhumer | [@StefanOberhumer](https://github.com/StefanOberhumer) | |
Expand Down Expand Up @@ -131,9 +131,9 @@ Because of the GDDR5X memory, which can't be fully utilized for ETH mining (yet)

Only GCN 1.0 GPUs (78x0, 79x0, 270, 280), but in a different way. You'll see that on each new epoch (30K blocks), the hashrate will go down a little bit.

### Can I still mine ETH with my 2GB GPU?
### Can I still mine ETH with my 4GB GPU?

Not really, your VRAM must be above the DAG size (Currently about 2.15 GB.) to get best performance. Without it severe hash loss will occur.
Not really, your VRAM must be above the DAG size (Currently about 4.023 GB.) to get best performance. Without it severe hash loss will occur.

### What are the optimal launch parameters?

Expand Down
3 changes: 3 additions & 0 deletions docs/BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ you have to install the OpenGL libraries. E.g. on Ubuntu run:
sudo apt-get install mesa-common-dev
```

If you want to use locally installed [ROCm-OpenCL](https://rocmdocs.amd.com/en/latest/) package, use build flag `-DUSE_SYS_OPENCL=ON` with cmake config.

### macOS

1. GCC version >= TBF
Expand Down Expand Up @@ -143,6 +145,7 @@ cmake .. -DETHASHCUDA=ON -DETHASHCL=OFF
* `-DAPICORE=ON` - enable API Server, `ON` by default.
* `-DBINKERN=ON` - install AMD binary kernels, `ON` by default.
* `-DETHDBUS=ON` - enable D-Bus support, `OFF` by default.
* `-DUSE_SYS_OPENCL=ON` - Use system OpenCL, `OFF` by default, unless on macOS. Specify to use local **ROCm-OpenCL** package.

## Disable Hunter

Expand Down
12 changes: 7 additions & 5 deletions docs/POOL_EXAMPLES_ETH.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ The above samples produce the very same result.
-P stratum://account%%2e1234.Worker:[email protected]:4444
```

## Secure socket comunications for stratum only
## Secure socket communications for stratum only

Ethminer supports secure socket communications (where pool implements and offers it) to avoid the risk of a [man-in-the-middle attack](https://en.wikipedia.org/wiki/Man-in-the-middle_attack)
To enable it simply replace tcp with either:
Expand Down Expand Up @@ -104,7 +104,7 @@ Here you can find a collection of samples to connect to most commonly used ethas
* Stratum connection is **always to be preferred** over **getwork** when pool offers it due to its better network latency.
* If possible the samples use a protocol which supports reporting of hashrate (`--report-hashrate`) if pool supports this.

**Check for updates in the pool connection settings visiting the pools homepage.**
**Check for updates in the pool connection settings visiting the pool's homepage.**

## Variables

Expand Down Expand Up @@ -252,6 +252,11 @@ HINTS:

### nanopool.org

Notice ⚠

* Use "%40" for the @-sign in your email address
* Use "\%2e" for the .-sign on Linux in ETH_WALLET.WORKERNAME

With email:

```
Expand All @@ -272,9 +277,6 @@ Without email:
-P stratum1+tcp://[email protected]:9999
```

HINTS:

* Use "%40" for the @-sign in your email address

### nicehash.com

Expand Down
11 changes: 9 additions & 2 deletions libapicore/ApiServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1169,10 +1169,17 @@ std::string ApiConnection::getHttpMinerStatDetail()

_ret << "<td class=right>" << dev::getFormattedHashes(hashrate) << "</td>";

_ret << "<td class=right>" << device["mining"]["shares"][0].asString() << "</td>";

string solString = "A" + device["mining"]["shares"][0].asString() +
":R" + device["mining"]["shares"][1].asString() +
":F" + device["mining"]["shares"][2].asString();
_ret << "<td class=right>" << solString << "</td>";
_ret << "<td class=right>" << device["hardware"]["sensors"][0].asString() << "</td>";
_ret << "<td class=right>" << device["hardware"]["sensors"][1].asString() << "</td>";
_ret << "<td class=right>" << device["hardware"]["sensors"][2].asString() << "</td>";

stringstream powerStream; // Round the power to 2 decimal places to remove floating point garbage
powerStream << fixed << setprecision(2) << device["hardware"]["sensors"][2].asDouble();
_ret << "<td class=right>" << powerStream.str() << "</td>";

_ret << "</tr>"; // Close row
}
Expand Down
2 changes: 1 addition & 1 deletion libethash-cl/CLMiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ bool CLMiner::initEpoch_internal()
addDefinition(code, "WORKSIZE", m_settings.localWorkSize);
addDefinition(code, "ACCESSES", 64);
addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
addDefinition(code, "PLATFORM", m_deviceDescriptor.clPlatformId);
addDefinition(code, "PLATFORM", static_cast<unsigned>(m_deviceDescriptor.clPlatformType));
addDefinition(code, "COMPUTE", computeCapability);

if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Clover)
Expand Down
4 changes: 2 additions & 2 deletions libethash-cl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ set(SOURCES
${CMAKE_CURRENT_BINARY_DIR}/ethash.h
)

if(APPLE)
# On macOS use system OpenCL library.
if(USE_SYS_OPENCL)
# On macOS or using ROCm-OpenCL, use system OpenCL library.
find_package(OpenCL REQUIRED)
else()
hunter_add_package(OpenCL)
Expand Down
52 changes: 43 additions & 9 deletions libethash-cl/kernels/cl/ethash.cl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
// You should have received a copy of the GNU General Public License
// along with Gateless Gate Sharp. If not, see <http://www.gnu.org/licenses/>.


#define OPENCL_PLATFORM_UNKNOWN 0
#define OPENCL_PLATFORM_AMD 1
#define OPENCL_PLATFORM_CLOVER 2
#define OPENCL_PLATFORM_NVIDIA 3
#define OPENCL_PLATFORM_INTEL 4

#if (defined(__Tahiti__) || defined(__Pitcairn__) || defined(__Capeverde__) || defined(__Oland__) || defined(__Hainan__))
#define LEGACY
Expand All @@ -26,6 +30,22 @@
#endif

#if defined(cl_amd_media_ops)
#if PLATFORM == OPENCL_PLATFORM_CLOVER
/*
* MESA define cl_amd_media_ops but no amd_bitalign() defined.
* https://github.com/openwall/john/issues/3454#issuecomment-436899959
*/
uint2 amd_bitalign(uint2 src0, uint2 src1, uint2 src2)
{
uint2 dst;
__asm("v_alignbit_b32 %0, %2, %3, %4\n"
"v_alignbit_b32 %1, %5, %6, %7"
: "=v" (dst.x), "=v" (dst.y)
: "v" (src0.x), "v" (src1.x), "v" (src2.x),
"v" (src0.y), "v" (src1.y), "v" (src2.y));
return dst;
}
#endif
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
#elif defined(cl_nv_pragma_unroll)
uint amd_bitalign(uint src0, uint src1, uint src2)
Expand Down Expand Up @@ -246,7 +266,7 @@ struct SearchResults {

__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(
__global struct SearchResults* restrict g_output,
__global volatile struct SearchResults* restrict g_output,
__constant uint2 const* g_header,
__global ulong8 const* _g_dag0,
__global ulong8 const* _g_dag1,
Expand Down Expand Up @@ -424,23 +444,37 @@ static void SHA3_512(uint2 *s)
__kernel void GenerateDAG(uint start, __global const uint16 *_Cache, __global uint16 *_DAG0, __global uint16 *_DAG1, uint light_size)
{
__global const Node *Cache = (__global const Node *) _Cache;
uint NodeIdx = start + get_global_id(0);
const uint gid = get_global_id(0);
uint NodeIdx = start + gid;
const uint thread_id = gid & 3;

__local Node sharebuf[WORKSIZE];
__local uint indexbuf[WORKSIZE];
__local Node *dagNode = sharebuf + (get_local_id(0) / 4) * 4;
__local uint *indexes = indexbuf + (get_local_id(0) / 4) * 4;
__global const Node *parentNode;

Node DAGNode = Cache[NodeIdx % light_size];

DAGNode.dwords[0] ^= NodeIdx;
SHA3_512(DAGNode.qwords);

dagNode[thread_id] = DAGNode;
barrier(CLK_LOCAL_MEM_FENCE);
for (uint i = 0; i < 256; ++i) {
uint ParentIdx = fnv(NodeIdx ^ i, DAGNode.dwords[i & 15]) % light_size;
__global const Node *ParentNode = Cache + ParentIdx;
uint ParentIdx = fnv(NodeIdx ^ i, dagNode[thread_id].dwords[i & 15]) % light_size;
indexes[thread_id] = ParentIdx;
barrier(CLK_LOCAL_MEM_FENCE);

#pragma unroll
for (uint x = 0; x < 4; ++x) {
DAGNode.dqwords[x] *= (uint4)(FNV_PRIME);
DAGNode.dqwords[x] ^= ParentNode->dqwords[x];
for (uint t = 0; t < 4; ++t) {
uint parentIndex = indexes[t];
parentNode = Cache + parentIndex;

dagNode[t].dqwords[thread_id] = fnv(dagNode[t].dqwords[thread_id], parentNode->dqwords[thread_id]);
barrier(CLK_LOCAL_MEM_FENCE);
}
}
DAGNode = dagNode[thread_id];

SHA3_512(DAGNode.qwords);

Expand Down
8 changes: 8 additions & 0 deletions libethash-cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ else()
if(NOT CUDA_VERSION VERSION_LESS 10.0)
list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_75,code=sm_75")
endif()
if(NOT CUDA_VERSION VERSION_LESS 11.0)
# NVIDIA A100 and NVIDIA DGX-A100
list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_80,code=sm_80")
endif()
if(NOT CUDA_VERSION VERSION_LESS 11.1)
# Tesla GA10x cards, RTX Ampere – RTX 3080/3090, RTX A6000, RTX A40
list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_86,code=sm_86")
endif()
endif()

file(GLOB sources "*.cpp" "*.cu")
Expand Down
10 changes: 6 additions & 4 deletions libethash-cuda/ethash_cuda_miner_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,13 @@ __global__ void ethash_calculate_dag_item(uint32_t start)
uint32_t const node_index = start + blockIdx.x * blockDim.x + threadIdx.x;
if (((node_index >> 1) & (~1)) >= d_dag_size)
return;

hash128_t dag_node;
union {
hash128_t dag_node;
uint2 dag_node_mem[25];
};
copy(dag_node.uint4s, d_light[node_index % d_light_size].uint4s, 4);
dag_node.words[0] ^= node_index;
SHA3_512(dag_node.uint2s);
SHA3_512(dag_node_mem);

const int thread_id = threadIdx.x & 3;

Expand All @@ -78,7 +80,7 @@ __global__ void ethash_calculate_dag_item(uint32_t start)
}
}
}
SHA3_512(dag_node.uint2s);
SHA3_512(dag_node_mem);
hash64_t* dag_nodes = (hash64_t*)d_dag;
copy(dag_nodes[node_index].uint4s, dag_node.uint4s, 4);
}
Expand Down
2 changes: 1 addition & 1 deletion libethcore/Miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ struct HwSensorsType
{
string _ret = to_string(tempC) + "C " + to_string(fanP) + "%";
if (powerW)
_ret.append(boost::str(boost::format("%f") % powerW));
_ret.append(" " + boost::str(boost::format("%0.2f") % powerW) + "W");
return _ret;
};
};
Expand Down
2 changes: 1 addition & 1 deletion libpoolprotocols/PoolManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ void PoolManager::setClientHandlers()
// If epoch is valued in workpackage take it
if (wp.epoch == -1)
{
if (m_currentWp.block > 0)
if (m_currentWp.block >= 0)
m_currentWp.epoch = m_currentWp.block / 30000;
else
m_currentWp.epoch = ethash::find_epoch_number(
Expand Down