From 6316cc81c434cbd89af97e05e4ca8c19aa8009f6 Mon Sep 17 00:00:00 2001 From: jacobkahn Date: Mon, 13 Feb 2023 00:42:54 -0500 Subject: [PATCH] Add standalone lib build with Python install --- CMakeLists.txt | 12 +++++++ lm/CMakeLists.txt | 1 - pyproject.toml | 2 ++ python/BuildStandalone.cmake | 41 +++++++++++++++++++++++ setup.py | 63 +++++++++++++++++++++++++++++++++--- 5 files changed, 113 insertions(+), 6 deletions(-) create mode 100644 pyproject.toml create mode 100644 python/BuildStandalone.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cd44762..3c6c8639 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,11 +16,23 @@ project(kenlm) option(FORCE_STATIC "Build static executables" OFF) option(COMPILE_TESTS "Compile tests" OFF) option(ENABLE_PYTHON "Build Python bindings" OFF) +option(BUILD_PYTHON_STANDALONE "Build standalone C++ lib with Python install" OFF) # Eigen3 less than 3.1.0 has a race condition: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=466 find_package(Eigen3 3.1.0 CONFIG) include(CMakeDependentOption) cmake_dependent_option(ENABLE_INTERPOLATE "Build interpolation program (depends on Eigen3)" ON "EIGEN3_FOUND AND NOT WIN32" OFF) +set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order") + +if (BUILD_PYTHON_STANDALONE) + # Build a shared lib artifact in the same way the cpython setup.py builds + # and return. That lib can be dlopened by other Python packages that need + # cpp artifacts and can't use the bits compiled by setuptools Extension + include(${CMAKE_CURRENT_LIST_DIR}/python/BuildStandalone.cmake) + return() +endif() + + if (FORCE_STATIC) #presumably overkill, is there a better way? #http://cmake.3232098.n2.nabble.com/Howto-compile-static-executable-td5580269.html diff --git a/lm/CMakeLists.txt b/lm/CMakeLists.txt index 209f1dac..6dcd2a43 100644 --- a/lm/CMakeLists.txt +++ b/lm/CMakeLists.txt @@ -36,7 +36,6 @@ target_link_libraries(kenlm PUBLIC kenlm_util Threads::Threads) # Since headers are relative to `include/kenlm` at install time, not just `include` target_include_directories(kenlm PUBLIC $) -set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order") target_compile_definitions(kenlm PUBLIC -DKENLM_MAX_ORDER=${KENLM_MAX_ORDER}) # This directory has children that need to be processed diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..cb4fab07 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[build-system] +requires = ["setuptools", "wheel", "cmake"] diff --git a/python/BuildStandalone.cmake b/python/BuildStandalone.cmake new file mode 100644 index 00000000..47924a60 --- /dev/null +++ b/python/BuildStandalone.cmake @@ -0,0 +1,41 @@ +cmake_minimum_required(VERSION 3.1) + +file(GLOB + KENLM_PYTHON_STANDALONE_SRCS + "util/*.cc" + "lm/*.cc" + "util/double-conversion/*.cc" + "python/*.cc" + ) + +list(FILTER KENLM_PYTHON_STANDALONE_SRCS EXCLUDE REGEX ".*main.cc") +list(FILTER KENLM_PYTHON_STANDALONE_SRCS EXCLUDE REGEX ".*test.cc") + +add_library( + kenlm + SHARED + ${KENLM_PYTHON_STANDALONE_SRCS} + ) + +target_include_directories(kenlm PRIVATE ${PROJECT_SOURCE_DIR}) +target_compile_definitions(kenlm PRIVATE KENLM_MAX_ORDER=${KENLM_MAX_ORDER}) + +find_package(ZLIB) +find_package(BZip2) +find_package(LibLZMA) + +if (ZLIB_FOUND) + target_link_libraries(kenlm PRIVATE ${ZLIB_LIBRARIES}) + target_include_directories(kenlm PRIVATE ${ZLIB_INCLUDE_DIRS}) + target_compile_definitions(kenlm PRIVATE HAVE_ZLIB) +endif() +if(BZIP2_FOUND) + target_link_libraries(kenlm PRIVATE ${BZIP2_LIBRARIES}) + target_include_directories(kenlm PRIVATE ${BZIP2_INCLUDE_DIR}) + target_compile_definitions(kenlm PRIVATE HAVE_BZLIB) +endif() +if(LIBLZMA_FOUND) + target_link_libraries(kenlm PRIVATE ${LIBLZMA_LIBRARIES}) + target_include_directories(kenlm PRIVATE ${LIBLZMA_INCLUDE_DIRS}) + target_compile_definitions(kenlm PRIVATE HAVE_LZMA) +endif() diff --git a/setup.py b/setup.py index 7d634264..59ccce6a 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,12 @@ from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext as _build_ext import glob import platform +import subprocess import os import sys import re +from pathlib import Path #Does gcc compile with this header and library? def compile_test(header, library): @@ -29,10 +32,6 @@ def compile_test(header, library): ARGS.append('-DHAVE_CLOCKGETTIME') elif platform.system() == 'Darwin': LIBS = ['c++'] - # Build a dylib on macOS - import sysconfig - vars = sysconfig.get_config_vars() - vars['LDSHARED'] = vars['LDSHARED'].replace('-bundle', '-dynamiclib') else: LIBS = [] @@ -53,17 +52,71 @@ def compile_test(header, library): ARGS.append('-DHAVE_XZLIB') LIBS.append('lzma') + +class build_ext(_build_ext): + def run(self): + try: + out = subprocess.check_output(["cmake", "--version"]) + except OSError: + raise RuntimeError( + "CMake must be installed to build the following extensions: " + + ", ".join(e.name for e in self.extensions) + ) + + ext_dir = str(Path(self.get_ext_fullpath('libkenlm')).absolute().parent) + source_dir = str(Path(__file__).absolute().parent) + + cmake_args = [ + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + ext_dir, + "-DBUILD_SHARED_LIBS=ON", + "-DBUILD_PYTHON_STANDALONE=ON", + ] + cfg = "Debug" if self.debug else "Release" + build_args = ["--config", cfg] + + if platform.system() == "Windows": + cmake_args += [ + "-DCMAKE_WINDOWS_EXPORT_ALL_SYMBOLS=ON", + "-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), ext_dir), + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), ext_dir), + "-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), ext_dir), + ] + if sys.maxsize > 2**32: + cmake_args += ["-A", "x64"] + build_args += ["--", "/m"] + else: + cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg] + build_args += ["--", "-j4"] + + env = os.environ.copy() + env["CXXFLAGS"] = '{} -fPIC -DVERSION_INFO=\\"{}\\"'.format( + env.get("CXXFLAGS", ""), self.distribution.get_version() + ) + + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + subprocess.check_call( + ["cmake", source_dir] + cmake_args, cwd=self.build_temp, env=env + ) + subprocess.check_call( + ["cmake", "--build", "."] + build_args, cwd=self.build_temp + ) + + return _build_ext.run(self) + + ext_modules = [ Extension(name='kenlm', sources=FILES + ['python/kenlm.cpp'], language='C++', include_dirs=['.'] + INCLUDE_PATHS, libraries=LIBS, - extra_compile_args=ARGS) + extra_compile_args=ARGS), ] setup( name='kenlm', ext_modules=ext_modules, + cmdclass={"build_ext": build_ext}, include_package_data=True, )