Skip to content

Commit

Permalink
throw a message if tf runtime is incompatible (#797)
Browse files Browse the repository at this point in the history
* throw a message if tf runtime is incompatible

fix #557 and #796.

* still raise if tf version is correct

* detect TF_CXX11_ABI_FLAG

* format codes

* fix lint

* move messages into the function

* fix lint

* fix lints
  • Loading branch information
njzjz authored Jun 26, 2021
1 parent 7d145c5 commit b145ca3
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 15 deletions.
82 changes: 68 additions & 14 deletions deepmd/env.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
"""Module that sets tensorflow working environment and exports inportant constants."""

import os
from pathlib import Path
import logging
import os
import platform
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Any
import numpy as np
from imp import reload
from configparser import ConfigParser
from imp import reload
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

import numpy as np

if TYPE_CHECKING:
from types import ModuleType
Expand Down Expand Up @@ -37,6 +38,7 @@

SHARED_LIB_MODULE = "op"


def set_env_if_empty(key: str, value: str, verbose: bool = True):
"""Set environment variable only if it is empty.
Expand Down Expand Up @@ -74,7 +76,8 @@ def set_mkl():
"""
if "mkl_rt" in np.__config__.get_info("blas_mkl_info").get("libraries", []):
set_env_if_empty("KMP_BLOCKTIME", "0")
set_env_if_empty("KMP_AFFINITY", "granularity=fine,verbose,compact,1,0")
set_env_if_empty(
"KMP_AFFINITY", "granularity=fine,verbose,compact,1,0")
reload(np)


Expand Down Expand Up @@ -118,8 +121,10 @@ def get_tf_session_config() -> Any:
intra_op_parallelism_threads=intra, inter_op_parallelism_threads=inter
)


default_tf_session_config = get_tf_session_config()


def get_module(module_name: str) -> "ModuleType":
"""Load force module.
Expand Down Expand Up @@ -149,14 +154,59 @@ def get_module(module_name: str) -> "ModuleType":
if not module_file.is_file():
raise FileNotFoundError(f"module {module_name} does not exist")
else:
module = tf.load_op_library(str(module_file))
try:
module = tf.load_op_library(str(module_file))
except tf.errors.NotFoundError as e:
# check CXX11_ABI_FLAG is compatiblity
# see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html
# ABI should be the same
if 'CXX11_ABI_FLAG' in tf.__dict__:
tf_cxx11_abi_flag = tf.CXX11_ABI_FLAG
else:
tf_cxx11_abi_flag = tf.sysconfig.CXX11_ABI_FLAG
if TF_CXX11_ABI_FLAG != tf_cxx11_abi_flag:
raise RuntimeError(
"This deepmd-kit package was compiled with "
"CXX11_ABI_FLAG=%d, but TensorFlow runtime was compiled "
"with CXX11_ABI_FLAG=%d. These two library ABIs are "
"incompatible and thus an error is raised when loading %s."
"You need to rebuild deepmd-kit against this TensorFlow "
"runtime." % (
TF_CXX11_ABI_FLAG,
tf_cxx11_abi_flag,
module_name,
)) from e

# different versions may cause incompatibility
# see #406, #447, #557, #774, and #796 for example
# throw a message if versions are different
if TF_VERSION != tf.version.VERSION:
raise RuntimeError(
"The version of TensorFlow used to compile this "
"deepmd-kit package is %s, but the version of TensorFlow "
"runtime you are using is %s. These two versions are "
"incompatible and thus an error is raised when loading %s. "
"You need to install TensorFlow %s, or rebuild deepmd-kit "
"against TensorFlow %s.\nIf you are using a wheel from "
"pypi, you may consider to install deepmd-kit execuating "
"`pip install deepmd-kit --no-binary deepmd-kit` "
"instead." % (
TF_VERSION,
tf.version.VERSION,
module_name,
TF_VERSION,
tf.version.VERSION,
)) from e
raise RuntimeError(
"This deepmd-kit package is inconsitent with TensorFlow"
"Runtime, thus an error is raised when loading %s."
"You need to rebuild deepmd-kit against this TensorFlow"
"runtime." % (
module_name,
)) from e
return module


op_module = get_module("libop_abi")
op_grads_module = get_module("libop_grads")


def _get_package_constants(
config_file: Path = Path(__file__).parent / "pkg_config/run_config.ini",
) -> Dict[str, str]:
Expand All @@ -165,7 +215,7 @@ def _get_package_constants(
Parameters
----------
config_file : str, optional
path to CONFIG file, by default "config/run_config.ini"
path to CONFIG file, by default "pkg_config/run_config.ini"
Returns
-------
Expand All @@ -176,8 +226,14 @@ def _get_package_constants(
config.read(config_file)
return dict(config.items("CONFIG"))


GLOBAL_CONFIG = _get_package_constants()
MODEL_VERSION = GLOBAL_CONFIG["model_version"]
TF_VERSION = GLOBAL_CONFIG["tf_version"]
TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"])

op_module = get_module("libop_abi")
op_grads_module = get_module("libop_grads")

if GLOBAL_CONFIG["precision"] == "-DHIGH_PREC":
GLOBAL_TF_FLOAT_PRECISION = tf.float64
Expand Down Expand Up @@ -221,5 +277,3 @@ def global_cvt_2_ener_float(xx: tf.Tensor) -> tf.Tensor:
output tensor cast to `GLOBAL_ENER_FLOAT_PRECISION`
"""
return tf.cast(xx, GLOBAL_ENER_FLOAT_PRECISION)


19 changes: 18 additions & 1 deletion source/cmake/Findtensorflow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,27 @@ else (BUILD_CPP_IF)
endif ()
endif (BUILD_CPP_IF)

# detect TensorFlow version
try_run(
TENSORFLOW_VERSION_RUN_RESULT_VAR TENSORFLOW_VERSION_COMPILE_RESULT_VAR
${CMAKE_CURRENT_BINARY_DIR}/tf_version
"${CMAKE_CURRENT_LIST_DIR}/tf_version.cpp"
LINK_LIBRARIES ${TensorFlowFramework_LIBRARY}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${TensorFlow_INCLUDE_DIRS}"
RUN_OUTPUT_VARIABLE TENSORFLOW_VERSION
COMPILE_OUTPUT_VARIABLE TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR
)
if (NOT ${TENSORFLOW_VERSION_COMPILE_RESULT_VAR})
message(FATAL_ERROR "Failed to compile: \n ${TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR}" )
endif()
if (NOT ${TENSORFLOW_VERSION_RUN_RESULT_VAR} EQUAL "0")
message(FATAL_ERROR "Failed to run, return code: ${TENSORFLOW_VERSION}" )
endif()

# print message
if (NOT TensorFlow_FIND_QUIETLY)
message(STATUS "Found TensorFlow: ${TensorFlow_INCLUDE_DIRS}, ${TensorFlow_LIBRARY}, ${TensorFlowFramework_LIBRARY} "
" in ${TensorFlow_search_PATHS}")
" in ${TensorFlow_search_PATHS} (found version \"${TENSORFLOW_VERSION}\")")
endif ()

unset(TensorFlow_search_PATHS)
10 changes: 10 additions & 0 deletions source/cmake/tf_version.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include <iostream>
#include "tensorflow/core/public/version.h"

int main(int argc, char * argv[])
{
// See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h
// TF_VERSION_STRING has been avaiable since TensorFlow v0.6
std::cout << TF_VERSION_STRING;
return 0;
}
2 changes: 2 additions & 0 deletions source/config/run_config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ GIT_DATE = @GIT_DATE@
GIT_BRANCH = @GIT_BRANCH@
TF_INCLUDE_DIR = @TensorFlow_INCLUDE_DIRS@
TF_LIBS = @TensorFlow_LIBRARY@
TF_VERSION = @TENSORFLOW_VERSION@
TF_CXX11_ABI_FLAG = @OP_CXX_ABI@
PRECISION = @PREC_DEF@
MODEL_VERSION=@MODEL_VERSION@

0 comments on commit b145ca3

Please sign in to comment.