From be17707ecd9dfbcedfd54ddfae2c7dd0665d7b2b Mon Sep 17 00:00:00 2001 From: "alberto.ferreira" Date: Sun, 23 Feb 2020 14:54:38 +0000 Subject: [PATCH 1/7] [swig] Fix SWIG methods that return char** with StringArray. + [new] Add StringArray class to manage and manipulate arrays of fixed-length strings: This class is now used to wrap any char** parameters, manage memory and manipulate the strings. Such class is defined at swig/StringArray.hpp and wrapped in StringArray.i. + [API+fix] Wrap LGBM_BoosterGetFeatureNames it resulted in segfault before: Added wrapper LGBM_BoosterGetFeatureNamesSWIG(BoosterHandle) that only receives the booster handle and figures how much memory to allocate for strings and returns a StringArray which can be easily converted to String[]. + [API+safety] For consistency, LGBM_BoosterGetEvalNamesSWIG was wrapped as well: * Refactor to detect any kind of errors and removed all the parameters besides the BoosterHandle (much simpler API to use in Java). * No assumptions are made about the required string space necessary (128 before). * The amount of required string memory is computed internally + [safety] No possibility of undefined behaviour The two methods wrapped above now compute the necessary string storage space prior to allocation, as the low-level C API calls would crash the process irreversibly if they write more memory than which is passed to them. --- include/LightGBM/c_api.h | 18 ++++ src/c_api.cpp | 32 +++++++ swig/StringArray.hpp | 146 ++++++++++++++++++++++++++++++ swig/StringArray.i | 91 +++++++++++++++++++ swig/StringArray_API_extensions.i | 98 ++++++++++++++++++++ swig/lightgbmlib.i | 20 +--- 6 files changed, 388 insertions(+), 17 deletions(-) create mode 100644 swig/StringArray.hpp create mode 100644 swig/StringArray.i create mode 100644 swig/StringArray_API_extensions.i diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index 9e2607071979..8174639bdf34 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -574,6 +574,24 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetFeatureNames(BoosterHandle handle, int* out_len, char** out_strs); +/*! + * \brief Get the size of the largest evaluation dataset name. + * \param handle Handle of booster + * \param[out] out_len Number of characters of the largest name (excluding null-terminator). + * \return 0 when succeed, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_BoosterGetLargestEvalNameSize(BoosterHandle handle, + size_t* out_len); + +/*! + * \brief Get the size of the largest feature name. + * \param handle Handle of booster + * \param[out] out_len Number of characters in largest name (excluding null-terminator). + * \return 0 when succeed, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_BoosterGetLargestFeatureNameSize(BoosterHandle handle, + size_t* out_len); + /*! * \brief Get number of features. * \param handle Handle of booster diff --git a/src/c_api.cpp b/src/c_api.cpp index 224c74d6c730..48498c977b75 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -521,6 +521,24 @@ class Booster { return idx; } + size_t GetLargestEvalNameSize() const { + size_t max_len = 0; + for (const auto& metric : train_metric_) { + for (const auto& name : metric->GetName()) { + max_len = std::max(name.size(), max_len); + } + } + return max_len; + } + + size_t GetLargestFeatureNameSize() const { + size_t max_len = 0; + for (const auto& name : boosting_->FeatureNames()) { + max_len = std::max(name.size(), max_len); + } + return max_len; + } + const Boosting* GetBoosting() const { return boosting_.get(); } private: @@ -1370,6 +1388,20 @@ int LGBM_BoosterGetFeatureNames(BoosterHandle handle, int* out_len, char** out_s API_END(); } +int LGBM_BoosterGetLargestEvalNameSize(BoosterHandle handle, size_t* out_len) { + API_BEGIN(); + Booster* ref_booster = reinterpret_cast(handle); + *out_len = ref_booster->GetLargestEvalNameSize(); + API_END(); +} + +int LGBM_BoosterGetLargestFeatureNameSize(BoosterHandle handle, size_t* out_len) { + API_BEGIN(); + Booster* ref_booster = reinterpret_cast(handle); + *out_len = ref_booster->GetLargestFeatureNameSize(); + API_END(); +} + int LGBM_BoosterGetNumFeature(BoosterHandle handle, int* out_len) { API_BEGIN(); Booster* ref_booster = reinterpret_cast(handle); diff --git a/swig/StringArray.hpp b/swig/StringArray.hpp new file mode 100644 index 000000000000..ef3a163e0d89 --- /dev/null +++ b/swig/StringArray.hpp @@ -0,0 +1,146 @@ +#ifndef __STRING_ARRAY_H__ +#define __STRING_ARRAY_H__ + +#include +#include +#include + +/** + * Container that manages an array of fixed-length strings. + * + * To be compatible with SWIG's `various.i` extension module, + * the array of pointers to char* must be NULL-terminated: + * [char*, char*, char*, ..., NULL] + * This implies that the length of this array is bigger + * by 1 element than the number of char* it stores. + * I.e., _num_elements == _array.size()-1 + * + * The class also takes care of allocation of the underlying + * char* memory. + */ +class StringArray +{ + public: + StringArray(size_t num_elements, size_t string_size) + : _string_size(string_size), + _array(num_elements + 1, nullptr) + { + _allocate_strings(num_elements, string_size); + } + + ~StringArray() + { + _release_strings(); + } + + /** + * Returns the pointer to the raw array. + * Notice its size is greater than the number of stored strings by 1. + * + * @return char** pointer to raw data (null-terminated). + */ + char **data() noexcept + { + return _array.data(); + } + + /** + * Return char* from the array of size _string_size+1. + * Notice the last element in _array is already + * considered out of bounds. + * + * @param index Index of the element to retrieve. + * @return pointer or nullptr if index is out of bounds. + */ + char *getitem(size_t index) noexcept + { + if (_in_bounds(index)) + return _array[index]; + else + return nullptr; + } + + /** + * Safely copies the full content data + * into one of the strings in the array. + * If that is not possible, returns error (-1). + * + * @param index index of the string in the array. + * @param content content to store + * + * @return In case index results in out of bounds access, + * or content + 1 (null-terminator byte) doesn't fit + * into the target string (_string_size), it errors out + * and returns -1. + */ + int setitem(size_t index, std::string content) noexcept + { + if (_in_bounds(index) && content.size() < _string_size) + { + std::strcpy(_array[index], content.c_str()); + return 0; + } else { + return -1; + } + } + + /** + * @return number of stored strings. + */ + size_t get_num_elements() noexcept + { + return _array.size() - 1; + } + + private: + + /** + * Returns true if and only if within bounds. + * Notice that it excludes the last element of _array (NULL). + * + * @param index index of the element + * @return bool true if within bounds + */ + bool _in_bounds(size_t index) noexcept + { + return index < get_num_elements(); + } + + /** + * Allocate an array of fixed-length strings. + * + * Since a NULL-terminated array is required by SWIG's `various.i`, + * the size of the array is actually `num_elements + 1` but only + * num_elements are filled. + * + * @param num_elements Number of strings to store in the array. + * @param string_size The size of each string in the array. + */ + void _allocate_strings(int num_elements, int string_size) + { + for (int i = 0; i < num_elements; ++i) + { + // Leave space for \0 terminator: + _array[i] = new (std::nothrow) char[string_size + 1]; + + // Check memory allocation: + if (! _array[i]) { + _release_strings(); + throw std::bad_alloc(); + } + } + } + + /** + * Deletes the allocated strings. + */ + void _release_strings() noexcept + { + std::for_each(_array.begin(), _array.end(), [](char* c) { delete[] c; }); + } + + const size_t _string_size; + std::vector _array; +}; + +#endif // __STRING_ARRAY_H__ diff --git a/swig/StringArray.i b/swig/StringArray.i new file mode 100644 index 000000000000..9df22249ae36 --- /dev/null +++ b/swig/StringArray.i @@ -0,0 +1,91 @@ +/** + * This wraps the StringArray.hpp class for SWIG usage, + * adding the basic C-style wrappers needed to make it + * usable for the users of the low-level lightgbmJNI API. + */ + +%{ +#include "../swig/StringArray.hpp" +%} + +// Use SWIG's `various.i` to get a String[] directly in one call: +%apply char **STRING_ARRAY {char **StringArrayHandle_get_strings}; + +%inline %{ + + typedef void* StringArrayHandle; + + /** + * @brief Creates a new StringArray and returns its handle. + * + * @param num_strings number of strings to store. + * @param string_size the maximum number of characters that can be stored in each string. + * @return StringArrayHandle or nullptr in case of allocation failure. + */ + StringArrayHandle StringArrayHandle_create(size_t num_strings, size_t string_size) { + try { + return new StringArray(num_strings, string_size); + } catch (std::bad_alloc &e) { + return nullptr; + } + } + + /** + * @brief Free the StringArray object. + * + * @param handle StringArray handle. + */ + void StringArrayHandle_free(StringArrayHandle handle) + { + delete reinterpret_cast(handle); + } + + /** + * @brief Return the raw pointer to the array of strings. + * Wrapped in Java into String[] automatically. + * + * @param handle StringArray handle. + * @return Raw pointer to the string array which `various.i` maps to String[]. + */ + char **StringArrayHandle_get_strings(StringArrayHandle handle) + { + return reinterpret_cast(handle)->data(); + } + + /** + * For the end user to extract a specific string from the StringArray object. + * + * @param handle StringArray handle. + * @param index index of the string to retrieve from the array. + * @return raw pointer to string at index, or nullptr if out of bounds. + */ + char *StringArrayHandle_get_string(StringArrayHandle handle, int index) + { + return reinterpret_cast(handle)->getitem(index); + } + + /** + * @brief Replaces one string of the array at index with the new content. + * + * @param handle StringArray handle. + * @param index Index of the string to replace + * @param new_content The content to replace + * @return 0 (success) or -1 (error) in case of out of bounds index or too large content. + */ + int StringArrayHandle_set_string(StringArrayHandle handle, size_t index, const char* new_content) + { + return reinterpret_cast(handle)->setitem(index, std::string(new_content)); + } + + /** + * @brief Retrieve the number of strings in the StringArray. + * + * @param handle StringArray handle. + * @return number of strings that the array stores. + */ + size_t StringArrayHandle_get_num_elements(StringArrayHandle handle) + { + return reinterpret_cast(handle)->get_num_elements(); + } + +%} diff --git a/swig/StringArray_API_extensions.i b/swig/StringArray_API_extensions.i new file mode 100644 index 000000000000..3db18059b77e --- /dev/null +++ b/swig/StringArray_API_extensions.i @@ -0,0 +1,98 @@ +/** + * This SWIG interface extension provides support to + * allocate, return and manage arrays of strings through + * the class StringArray. + * + * This is then used to generate wrappers that return newly-allocated + * arrays of strings, so the user can them access them easily as a String[] + * on the Java side by a single call to StringArray::data(), and even manipulate + * them. + * + * It also implements working wrappers to: + * - LGBM_BoosterGetEvalNames (re-implemented with new API) + * - LGBM_BoosterGetFeatureNames (original non-wrapped version didn't work). + * where the wrappers names end with "SWIG". + */ + + +#include + +%include "./StringArray.i" + +%inline %{ + + #define API_OK_OR_VALUE(api_return, return_value) if (api_return == -1) return return_value + #define API_OK_OR_NULL(api_return) API_OK_OR_VALUE(api_return, nullptr) + + /** + * @brief Wraps LGBM_BoosterGetEvalNames. + * + * In case of success a new StringArray is created and returned, + * which you're responsible for freeing, + * @see StringArrayHandle_free(). + * In case of failure such resource is freed and nullptr is returned. + * Check for that case with null (lightgbmlib) or 0 (lightgbmlibJNI). + * + * @param handle Booster handle + * @return StringArrayHandle with the eval names (or nullptr in case of error) + */ + StringArrayHandle LGBM_BoosterGetEvalNamesSWIG(BoosterHandle handle) + { + std::unique_ptr strings(nullptr); + + // 1) Figure out the necessary allocation size: + int eval_counts; + API_OK_OR_NULL(LGBM_BoosterGetEvalCounts(handle, &eval_counts)); + + size_t largest_eval_name_size; + API_OK_OR_NULL(LGBM_BoosterGetLargestEvalNameSize(handle, &largest_eval_name_size)); + + // 2) Allocate the strings container: + try { + strings.reset(new StringArray(eval_counts, largest_eval_name_size)); + } catch (std::bad_alloc &e) { + LGBM_SetLastError("Failure to allocate memory."); + return nullptr; + } + + // 3) Extract the names: + API_OK_OR_NULL(LGBM_BoosterGetEvalNames(handle, &eval_counts, strings->data())); + return strings.release(); + } + + /** + * @brief Wraps LGBM_BoosterGetFeatureNames. + * + * Allocates a new StringArray. You must free it yourself if it succeeds. + * @see StringArrayHandle_free(). + * In case of failure such resource is freed and nullptr is returned. + * Check for that case with null (lightgbmlib) or 0 (lightgbmlibJNI). + * + * @param handle Booster handle + * @return StringArrayHandle with the feature names (or nullptr in case of error) + */ + StringArrayHandle LGBM_BoosterGetFeatureNamesSWIG(BoosterHandle handle) + { + std::unique_ptr strings(nullptr); + + // 1) To preallocate memory extract number of features & required size first: + int num_features; + API_OK_OR_NULL(LGBM_BoosterGetNumFeature(handle, &num_features)); + + size_t max_feature_name_size; + API_OK_OR_NULL(LGBM_BoosterGetLargestFeatureNameSize(handle, &max_feature_name_size)); + + // 2) Allocate an array of strings: + try { + strings.reset(new StringArray(num_features, max_feature_name_size)); + } catch (std::bad_alloc &e) { + LGBM_SetLastError("Failure to allocate memory."); + return nullptr; + } + + // 3) Extract feature names: + API_OK_OR_NULL(LGBM_BoosterGetFeatureNames(handle, &num_features, strings->data())); + + return strings.release(); + } +%} diff --git a/swig/lightgbmlib.i b/swig/lightgbmlib.i index 34468dd805d1..5fba17e12adf 100644 --- a/swig/lightgbmlib.i +++ b/swig/lightgbmlib.i @@ -6,6 +6,7 @@ %module lightgbmlib %ignore LGBM_BoosterSaveModelToString; %ignore LGBM_BoosterGetEvalNames; +%ignore LGBM_BoosterGetFeatureNames; %{ /* Includes the header in the wrapper code */ #include "../include/LightGBM/export.h" @@ -73,19 +74,6 @@ return dst; } - char ** LGBM_BoosterGetEvalNamesSWIG(BoosterHandle handle, - int eval_counts) { - char** dst = new char*[eval_counts]; - for (int i = 0; i < eval_counts; ++i) { - dst[i] = new char[128]; - } - int result = LGBM_BoosterGetEvalNames(handle, &eval_counts, dst); - if (result != 0) { - return nullptr; - } - return dst; - } - int LGBM_BoosterPredictForMatSingle(JNIEnv *jenv, jdoubleArray data, BoosterHandle handle, @@ -249,10 +237,6 @@ %array_functions(float, floatArray) %array_functions(int, intArray) %array_functions(long, longArray) -/* Note: there is a bug in the SWIG generated string arrays when creating - a new array with strings where the strings are prematurely deallocated -*/ -%array_functions(char *, stringArray) /* Custom pointer manipulation template */ %define %pointer_manipulation(TYPE, NAME) @@ -301,3 +285,5 @@ TYPE *NAME##_handle(); /* Allow retrieving handle to void** */ %pointer_handle(void*, voidpp) + +%include "StringArray_API_extensions.i" From 2746c72661944f1f2039810866208cf6218f931e Mon Sep 17 00:00:00 2001 From: "alberto.ferreira" Date: Sat, 14 Mar 2020 15:24:22 +0000 Subject: [PATCH 2/7] Changes to C API and wrappers support char** To support the latest SWIG changes that enable proper char** return support that is safe, the C API was changed. The respecive wrappers in R and Python were changed too. --- include/LightGBM/c_api.h | 34 +++++++-------- python-package/lightgbm/basic.py | 26 ++++++++++- src/c_api.cpp | 72 ++++++++++++++----------------- src/lightgbm_R.cpp | 12 +++++- swig/StringArray_API_extensions.i | 43 ++++++++++-------- 5 files changed, 106 insertions(+), 81 deletions(-) diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index 8174639bdf34..1c8dced5d96f 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -555,43 +555,41 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle, /*! * \brief Get names of evaluation datasets. * \param handle Handle of booster + * \param len Number of char* pointers stored at char** out_strs. + * If smaller than the max size, only this many strings are copied. * \param[out] out_len Total number of evaluation datasets + * \param buffer_len Size of pre-allocated strings. + * Content is copied up to buffer_len-1 and null-terminated. + * \param[out] out_buffer_len String sizes required to do the full string copies * \param[out] out_strs Names of evaluation datasets, should pre-allocate memory * \return 0 when succeed, -1 when failure happens */ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle, + const int len, int* out_len, + const size_t buffer_len, + size_t* out_buffer_len, char** out_strs); /*! * \brief Get names of features. * \param handle Handle of booster + * \param len Number of char* pointers stored at char** out_strs. + * If smaller than the max size, only this many strings are copied. * \param[out] out_len Total number of features + * \param buffer_len Size of pre-allocated strings. + * Content is copied up to buffer_len-1 and null-terminated. + * \param[out] out_buffer_len String sizes required to do the full string copies * \param[out] out_strs Names of features, should pre-allocate memory * \return 0 when succeed, -1 when failure happens */ LIGHTGBM_C_EXPORT int LGBM_BoosterGetFeatureNames(BoosterHandle handle, + const int len, int* out_len, + const size_t buffer_len, + size_t* out_buffer_len, char** out_strs); -/*! - * \brief Get the size of the largest evaluation dataset name. - * \param handle Handle of booster - * \param[out] out_len Number of characters of the largest name (excluding null-terminator). - * \return 0 when succeed, -1 when failure happens - */ -LIGHTGBM_C_EXPORT int LGBM_BoosterGetLargestEvalNameSize(BoosterHandle handle, - size_t* out_len); - -/*! - * \brief Get the size of the largest feature name. - * \param handle Handle of booster - * \param[out] out_len Number of characters in largest name (excluding null-terminator). - * \return 0 when succeed, -1 when failure happens - */ -LIGHTGBM_C_EXPORT int LGBM_BoosterGetLargestFeatureNameSize(BoosterHandle handle, - size_t* out_len); - /*! * \brief Get number of features. * \param handle Handle of booster diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 6fcb37ff3c89..41081c8ea17e 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -2711,14 +2711,24 @@ def feature_name(self): num_feature = self.num_feature() # Get name of features tmp_out_len = ctypes.c_int(0) - string_buffers = [ctypes.create_string_buffer(255) for i in range_(num_feature)] + reserved_string_buffer_size = 255 + required_string_buffer_size = ctypes.c_size_t(0) + string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for i in range_(num_feature)] ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_BoosterGetFeatureNames( self.handle, + num_feature, ctypes.byref(tmp_out_len), + reserved_string_buffer_size, + ctypes.byref(required_string_buffer_size), ptr_string_buffers)) if num_feature != tmp_out_len.value: raise ValueError("Length of feature names doesn't equal with num_feature") + if reserved_string_buffer_size < required_string_buffer_size.value: + raise BufferError( + "Allocated feature name buffer size ({}) was inferior to the needed size ({})." + .format(reserved_string_buffer_size, required_string_buffer_size.value) + ) return [string_buffers[i].value.decode() for i in range_(num_feature)] def feature_importance(self, importance_type='split', iteration=None): @@ -2898,14 +2908,26 @@ def __get_eval_info(self): if self.__num_inner_eval > 0: # Get name of evals tmp_out_len = ctypes.c_int(0) - string_buffers = [ctypes.create_string_buffer(255) for i in range_(self.__num_inner_eval)] + reserved_string_buffer_size = 255 + required_string_buffer_size = ctypes.c_size_t(0) + string_buffers = [ + ctypes.create_string_buffer(reserved_string_buffer_size) for i in range_(self.__num_inner_eval) + ] ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_BoosterGetEvalNames( self.handle, + self.__num_inner_eval, ctypes.byref(tmp_out_len), + reserved_string_buffer_size, + ctypes.byref(required_string_buffer_size), ptr_string_buffers)) if self.__num_inner_eval != tmp_out_len.value: raise ValueError("Length of eval names doesn't equal with num_evals") + if reserved_string_buffer_size < required_string_buffer_size.value: + raise BufferError( + "Allocated eval buffer size ({}) was inferior to the needed size ({})." + .format(reserved_string_buffer_size, required_string_buffer_size.value) + ) self.__name_inner_eval = \ [string_buffers[i].value.decode() for i in range_(self.__num_inner_eval)] self.__higher_better_inner_eval = \ diff --git a/src/c_api.cpp b/src/c_api.cpp index 48498c977b75..94363e3ddd29 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -501,44 +501,36 @@ class Booster { return ret; } - int GetEvalNames(char** out_strs) const { + int GetEvalNames(char** out_strs, const int len, const size_t buffer_len, size_t *out_buffer_len) const { + *out_buffer_len = 0; int idx = 0; for (const auto& metric : train_metric_) { for (const auto& name : metric->GetName()) { - std::memcpy(out_strs[idx], name.c_str(), name.size() + 1); + if (idx < len) { + std::memcpy(out_strs[idx], name.c_str(), std::min(name.size() + 1, buffer_len)); + out_strs[idx][buffer_len-1] = '\0'; + } + *out_buffer_len = std::max(name.size() + 1, *out_buffer_len); ++idx; } } return idx; } - int GetFeatureNames(char** out_strs) const { + int GetFeatureNames(char** out_strs, const int len, const size_t buffer_len, size_t *out_buffer_len) const { + *out_buffer_len = 0; int idx = 0; for (const auto& name : boosting_->FeatureNames()) { - std::memcpy(out_strs[idx], name.c_str(), name.size() + 1); + if (idx < len) { + std::memcpy(out_strs[idx], name.c_str(), std::min(name.size() + 1, buffer_len)); + out_strs[idx][buffer_len-1] = '\0'; + } + *out_buffer_len = std::max(name.size() + 1, *out_buffer_len); ++idx; } return idx; } - size_t GetLargestEvalNameSize() const { - size_t max_len = 0; - for (const auto& metric : train_metric_) { - for (const auto& name : metric->GetName()) { - max_len = std::max(name.size(), max_len); - } - } - return max_len; - } - - size_t GetLargestFeatureNameSize() const { - size_t max_len = 0; - for (const auto& name : boosting_->FeatureNames()) { - max_len = std::max(name.size(), max_len); - } - return max_len; - } - const Boosting* GetBoosting() const { return boosting_.get(); } private: @@ -1374,31 +1366,31 @@ int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int* out_len) { API_END(); } -int LGBM_BoosterGetEvalNames(BoosterHandle handle, int* out_len, char** out_strs) { - API_BEGIN(); - Booster* ref_booster = reinterpret_cast(handle); - *out_len = ref_booster->GetEvalNames(out_strs); - API_END(); -} - -int LGBM_BoosterGetFeatureNames(BoosterHandle handle, int* out_len, char** out_strs) { - API_BEGIN(); - Booster* ref_booster = reinterpret_cast(handle); - *out_len = ref_booster->GetFeatureNames(out_strs); - API_END(); -} - -int LGBM_BoosterGetLargestEvalNameSize(BoosterHandle handle, size_t* out_len) { +int LGBM_BoosterGetEvalNames( + BoosterHandle handle, + const int len, + int* out_len, + const size_t buffer_len, + size_t* out_buffer_len, + char** out_strs) +{ API_BEGIN(); Booster* ref_booster = reinterpret_cast(handle); - *out_len = ref_booster->GetLargestEvalNameSize(); + *out_len = ref_booster->GetEvalNames(out_strs, len, buffer_len, out_buffer_len); API_END(); } -int LGBM_BoosterGetLargestFeatureNameSize(BoosterHandle handle, size_t* out_len) { +int LGBM_BoosterGetFeatureNames( + BoosterHandle handle, + const int len, + int* out_len, + const size_t buffer_len, + size_t* out_buffer_len, + char** out_strs) +{ API_BEGIN(); Booster* ref_booster = reinterpret_cast(handle); - *out_len = ref_booster->GetLargestFeatureNameSize(); + *out_len = ref_booster->GetFeatureNames(out_strs, len, buffer_len, out_buffer_len); API_END(); } diff --git a/src/lightgbm_R.cpp b/src/lightgbm_R.cpp index 4ba70a5f1c22..d2bd976e64d7 100644 --- a/src/lightgbm_R.cpp +++ b/src/lightgbm_R.cpp @@ -449,15 +449,23 @@ LGBM_SE LGBM_BoosterGetEvalNames_R(LGBM_SE handle, R_API_BEGIN(); int len; CHECK_CALL(LGBM_BoosterGetEvalCounts(R_GET_PTR(handle), &len)); + + const size_t reserved_string_size = 128; std::vector> names(len); std::vector ptr_names(len); for (int i = 0; i < len; ++i) { - names[i].resize(128); + names[i].resize(reserved_string_size); ptr_names[i] = names[i].data(); } + int out_len; - CHECK_CALL(LGBM_BoosterGetEvalNames(R_GET_PTR(handle), &out_len, ptr_names.data())); + size_t required_string_size; + CHECK_CALL(LGBM_BoosterGetEvalNames(R_GET_PTR(handle), + len, &out_len, + reserved_string_size, &required_string_size, + ptr_names.data())); CHECK_EQ(out_len, len); + CHECK_GE(reserved_string_size, required_string_size); auto merge_names = Common::Join(ptr_names, "\t"); EncodeChar(eval_names, merge_names.c_str(), buf_len, actual_len, merge_names.size() + 1); R_API_END(); diff --git a/swig/StringArray_API_extensions.i b/swig/StringArray_API_extensions.i index 3db18059b77e..ac3eda2962a1 100644 --- a/swig/StringArray_API_extensions.i +++ b/swig/StringArray_API_extensions.i @@ -38,25 +38,28 @@ */ StringArrayHandle LGBM_BoosterGetEvalNamesSWIG(BoosterHandle handle) { - std::unique_ptr strings(nullptr); - - // 1) Figure out the necessary allocation size: int eval_counts; - API_OK_OR_NULL(LGBM_BoosterGetEvalCounts(handle, &eval_counts)); + size_t string_size; + std::unique_ptr strings(nullptr); - size_t largest_eval_name_size; - API_OK_OR_NULL(LGBM_BoosterGetLargestEvalNameSize(handle, &largest_eval_name_size)); + // Retrieve required allocation space: + API_OK_OR_NULL(LGBM_BoosterGetEvalNames(handle, + 0, &eval_counts, + 0, &string_size, + strings->data())); - // 2) Allocate the strings container: try { - strings.reset(new StringArray(eval_counts, largest_eval_name_size)); + strings.reset(new StringArray(eval_counts, string_size)); } catch (std::bad_alloc &e) { LGBM_SetLastError("Failure to allocate memory."); return nullptr; } - // 3) Extract the names: - API_OK_OR_NULL(LGBM_BoosterGetEvalNames(handle, &eval_counts, strings->data())); + API_OK_OR_NULL(LGBM_BoosterGetEvalNames(handle, + eval_counts, &eval_counts, + string_size, &string_size, + strings->data())); + return strings.release(); } @@ -73,16 +76,16 @@ */ StringArrayHandle LGBM_BoosterGetFeatureNamesSWIG(BoosterHandle handle) { - std::unique_ptr strings(nullptr); - - // 1) To preallocate memory extract number of features & required size first: int num_features; - API_OK_OR_NULL(LGBM_BoosterGetNumFeature(handle, &num_features)); - size_t max_feature_name_size; - API_OK_OR_NULL(LGBM_BoosterGetLargestFeatureNameSize(handle, &max_feature_name_size)); + std::unique_ptr strings(nullptr); + + // Retrieve required allocation space: + API_OK_OR_NULL(LGBM_BoosterGetFeatureNames(handle, + 0, &num_features, + 0, &max_feature_name_size, + nullptr)); - // 2) Allocate an array of strings: try { strings.reset(new StringArray(num_features, max_feature_name_size)); } catch (std::bad_alloc &e) { @@ -90,8 +93,10 @@ return nullptr; } - // 3) Extract feature names: - API_OK_OR_NULL(LGBM_BoosterGetFeatureNames(handle, &num_features, strings->data())); + API_OK_OR_NULL(LGBM_BoosterGetFeatureNames(handle, + num_features, &num_features, + max_feature_name_size, &max_feature_name_size, + strings->data())); return strings.release(); } From b46188132a49cdb96c2578244427049de2c7f976 Mon Sep 17 00:00:00 2001 From: "alberto.ferreira" Date: Sat, 14 Mar 2020 16:34:01 +0000 Subject: [PATCH 3/7] Cleanup indentation in new lightgbm_R.cpp code --- src/lightgbm_R.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/lightgbm_R.cpp b/src/lightgbm_R.cpp index d2bd976e64d7..3223fc09e1ec 100644 --- a/src/lightgbm_R.cpp +++ b/src/lightgbm_R.cpp @@ -460,10 +460,14 @@ LGBM_SE LGBM_BoosterGetEvalNames_R(LGBM_SE handle, int out_len; size_t required_string_size; - CHECK_CALL(LGBM_BoosterGetEvalNames(R_GET_PTR(handle), - len, &out_len, - reserved_string_size, &required_string_size, - ptr_names.data())); + CHECK_CALL( + LGBM_BoosterGetEvalNames( + R_GET_PTR(handle), + len, &out_len, + reserved_string_size, &required_string_size, + ptr_names.data() + ) + ); CHECK_EQ(out_len, len); CHECK_GE(reserved_string_size, required_string_size); auto merge_names = Common::Join(ptr_names, "\t"); From 935947b1258e9a89d6c5413ba0b158860a7ef965 Mon Sep 17 00:00:00 2001 From: Alberto Ferreira Date: Wed, 18 Mar 2020 22:57:24 +0000 Subject: [PATCH 4/7] Adress review code-style comments. --- include/LightGBM/c_api.h | 12 ++++++------ src/c_api.cpp | 32 ++++++++++++++----------------- src/lightgbm_R.cpp | 3 +-- swig/StringArray.i | 4 ++++ swig/StringArray_API_extensions.i | 4 ++++ 5 files changed, 29 insertions(+), 26 deletions(-) diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index 1c8dced5d96f..8e9a6fc2500c 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -555,11 +555,11 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle, /*! * \brief Get names of evaluation datasets. * \param handle Handle of booster - * \param len Number of char* pointers stored at char** out_strs. - * If smaller than the max size, only this many strings are copied. + * \param len Number of ``char*`` pointers stored at ``out_strs``. + * If smaller than the max size, only this many strings are copied * \param[out] out_len Total number of evaluation datasets * \param buffer_len Size of pre-allocated strings. - * Content is copied up to buffer_len-1 and null-terminated. + * Content is copied up to ``buffer_len - 1`` and null-terminated * \param[out] out_buffer_len String sizes required to do the full string copies * \param[out] out_strs Names of evaluation datasets, should pre-allocate memory * \return 0 when succeed, -1 when failure happens @@ -574,11 +574,11 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle, /*! * \brief Get names of features. * \param handle Handle of booster - * \param len Number of char* pointers stored at char** out_strs. - * If smaller than the max size, only this many strings are copied. + * \param len Number of ``char*`` pointers stored at ``out_strs``. + * If smaller than the max size, only this many strings are copied * \param[out] out_len Total number of features * \param buffer_len Size of pre-allocated strings. - * Content is copied up to buffer_len-1 and null-terminated. + * Content is copied up to ``buffer_len - 1`` and null-terminated * \param[out] out_buffer_len String sizes required to do the full string copies * \param[out] out_strs Names of features, should pre-allocate memory * \return 0 when succeed, -1 when failure happens diff --git a/src/c_api.cpp b/src/c_api.cpp index 94363e3ddd29..22d53937e337 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -508,7 +508,7 @@ class Booster { for (const auto& name : metric->GetName()) { if (idx < len) { std::memcpy(out_strs[idx], name.c_str(), std::min(name.size() + 1, buffer_len)); - out_strs[idx][buffer_len-1] = '\0'; + out_strs[idx][buffer_len - 1] = '\0'; } *out_buffer_len = std::max(name.size() + 1, *out_buffer_len); ++idx; @@ -523,7 +523,7 @@ class Booster { for (const auto& name : boosting_->FeatureNames()) { if (idx < len) { std::memcpy(out_strs[idx], name.c_str(), std::min(name.size() + 1, buffer_len)); - out_strs[idx][buffer_len-1] = '\0'; + out_strs[idx][buffer_len - 1] = '\0'; } *out_buffer_len = std::max(name.size() + 1, *out_buffer_len); ++idx; @@ -1366,28 +1366,24 @@ int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int* out_len) { API_END(); } -int LGBM_BoosterGetEvalNames( - BoosterHandle handle, - const int len, - int* out_len, - const size_t buffer_len, - size_t* out_buffer_len, - char** out_strs) -{ +int LGBM_BoosterGetEvalNames(BoosterHandle handle, + const int len, + int* out_len, + const size_t buffer_len, + size_t* out_buffer_len, + char** out_strs) { API_BEGIN(); Booster* ref_booster = reinterpret_cast(handle); *out_len = ref_booster->GetEvalNames(out_strs, len, buffer_len, out_buffer_len); API_END(); } -int LGBM_BoosterGetFeatureNames( - BoosterHandle handle, - const int len, - int* out_len, - const size_t buffer_len, - size_t* out_buffer_len, - char** out_strs) -{ +int LGBM_BoosterGetFeatureNames(BoosterHandle handle, + const int len, + int* out_len, + const size_t buffer_len, + size_t* out_buffer_len, + char** out_strs) { API_BEGIN(); Booster* ref_booster = reinterpret_cast(handle); *out_len = ref_booster->GetFeatureNames(out_strs, len, buffer_len, out_buffer_len); diff --git a/src/lightgbm_R.cpp b/src/lightgbm_R.cpp index 3223fc09e1ec..ba417838fb74 100644 --- a/src/lightgbm_R.cpp +++ b/src/lightgbm_R.cpp @@ -466,8 +466,7 @@ LGBM_SE LGBM_BoosterGetEvalNames_R(LGBM_SE handle, len, &out_len, reserved_string_size, &required_string_size, ptr_names.data() - ) - ); + )); CHECK_EQ(out_len, len); CHECK_GE(reserved_string_size, required_string_size); auto merge_names = Common::Join(ptr_names, "\t"); diff --git a/swig/StringArray.i b/swig/StringArray.i index 9df22249ae36..12f1a5d2b21c 100644 --- a/swig/StringArray.i +++ b/swig/StringArray.i @@ -1,3 +1,7 @@ +/*! + * Copyright (c) 2020 Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See LICENSE file in the project root for license information. + */ /** * This wraps the StringArray.hpp class for SWIG usage, * adding the basic C-style wrappers needed to make it diff --git a/swig/StringArray_API_extensions.i b/swig/StringArray_API_extensions.i index ac3eda2962a1..b44f8f263793 100644 --- a/swig/StringArray_API_extensions.i +++ b/swig/StringArray_API_extensions.i @@ -1,3 +1,7 @@ +/*! + * Copyright (c) 2020 Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See LICENSE file in the project root for license information. + */ /** * This SWIG interface extension provides support to * allocate, return and manage arrays of strings through From 0c4f1fe605cf68f477fac398ad76e964803c42e4 Mon Sep 17 00:00:00 2001 From: Alberto Ferreira Date: Wed, 18 Mar 2020 23:12:09 +0000 Subject: [PATCH 5/7] Update swig/StringArray.hpp Co-Authored-By: Nikita Titov --- swig/StringArray.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/swig/StringArray.hpp b/swig/StringArray.hpp index ef3a163e0d89..4fef62a125e2 100644 --- a/swig/StringArray.hpp +++ b/swig/StringArray.hpp @@ -1,3 +1,7 @@ +/*! + * Copyright (c) 2020 Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See LICENSE file in the project root for license information. + */ #ifndef __STRING_ARRAY_H__ #define __STRING_ARRAY_H__ From 449608006c828b0efaee1cea938c705e024608de Mon Sep 17 00:00:00 2001 From: Alberto Ferreira Date: Thu, 19 Mar 2020 15:58:46 +0000 Subject: [PATCH 6/7] Update python-package/lightgbm/basic.py Co-Authored-By: Nikita Titov --- python-package/lightgbm/basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 41081c8ea17e..cd3288126474 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -2925,7 +2925,7 @@ def __get_eval_info(self): raise ValueError("Length of eval names doesn't equal with num_evals") if reserved_string_buffer_size < required_string_buffer_size.value: raise BufferError( - "Allocated eval buffer size ({}) was inferior to the needed size ({})." + "Allocated eval name buffer size ({}) was inferior to the needed size ({})." .format(reserved_string_buffer_size, required_string_buffer_size.value) ) self.__name_inner_eval = \ From 9873427842bf5b37760ad275f575e271294898f9 Mon Sep 17 00:00:00 2001 From: Alberto Ferreira Date: Thu, 19 Mar 2020 16:00:06 +0000 Subject: [PATCH 7/7] Update src/lightgbm_R.cpp Co-Authored-By: Nikita Titov --- src/lightgbm_R.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lightgbm_R.cpp b/src/lightgbm_R.cpp index ba417838fb74..9cb7a62d6947 100644 --- a/src/lightgbm_R.cpp +++ b/src/lightgbm_R.cpp @@ -465,8 +465,7 @@ LGBM_SE LGBM_BoosterGetEvalNames_R(LGBM_SE handle, R_GET_PTR(handle), len, &out_len, reserved_string_size, &required_string_size, - ptr_names.data() - )); + ptr_names.data())); CHECK_EQ(out_len, len); CHECK_GE(reserved_string_size, required_string_size); auto merge_names = Common::Join(ptr_names, "\t");