Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Distinguish missing and empty categorical values #1034

Merged
merged 6 commits into from
Mar 4, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
The null character silently converts to 0. This seems undesirable, bu…
…t I don't

want to change this behaviour in this PR. Also, fix some formatting broken when
we were first experimenting with auto formatting.
tveasey committed Mar 3, 2020
commit 5d376373d7fc7c0c635307e2effd3bb3d40add1b
3 changes: 2 additions & 1 deletion lib/api/CDataFrameAnalysisSpecification.cc
Original file line number Diff line number Diff line change
@@ -141,7 +141,8 @@ CDataFrameAnalysisSpecification::CDataFrameAnalysisSpecification(
m_DiskUsageAllowed = parameters[DISK_USAGE_ALLOWED].fallback(DEFAULT_DISK_USAGE_ALLOWED);

double missing;
if (core::CStringUtils::stringToTypeSilent(m_MissingFieldValue, missing)) {
if (m_MissingFieldValue != core::CDataFrame::DEFAULT_MISSING_STRING &&
core::CStringUtils::stringToTypeSilent(m_MissingFieldValue, missing)) {
HANDLE_FATAL(<< "Input error: you can't use a number (" << missing
<< ") to denote a missing field value.")
}
71 changes: 18 additions & 53 deletions lib/core/CStringUtils.cc
Original file line number Diff line number Diff line change
@@ -452,9 +452,7 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, unsigned l
if (ret == 0 && errno == EINVAL) {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to unsigned long long: "
<< ::strerror(errno));
<< "' to unsigned long long: " << ::strerror(errno));
}
return false;
}
@@ -463,19 +461,15 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, unsigned l
{
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to unsigned long long: "
<< ::strerror(errno));
<< "' to unsigned long long: " << ::strerror(errno));
}
return false;
}

if (endPtr != nullptr && *endPtr != '\0') {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to unsigned long long: first invalid character "
<< endPtr);
<< "' to unsigned long long: first invalid character " << endPtr);
}
return false;
}
@@ -500,9 +494,7 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, unsigned l
if (ret == 0 && errno == EINVAL) {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to unsigned long: "
<< ::strerror(errno));
<< "' to unsigned long: " << ::strerror(errno));
}
return false;
}
@@ -511,19 +503,15 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, unsigned l
{
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to unsigned long: "
<< ::strerror(errno));
<< "' to unsigned long: " << ::strerror(errno));
}
return false;
}

if (endPtr != nullptr && *endPtr != '\0') {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to unsigned long: first invalid character "
<< endPtr);
<< "' to unsigned long: first invalid character " << endPtr);
}
return false;
}
@@ -544,9 +532,7 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, unsigned i
// Now check if the result is in range for unsigned int
if (ret > std::numeric_limits<unsigned int>::max()) {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to unsigned int - out of range");
LOG_ERROR(<< "Unable to convert string '" << str << "' to unsigned int - out of range");
}
return false;
}
@@ -568,8 +554,7 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, unsigned s
if (ret > std::numeric_limits<unsigned short>::max()) {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to unsigned short - out of range");
<< "' to unsigned short - out of range");
}
return false;
}
@@ -594,9 +579,7 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, long long&
if (ret == 0 && errno == EINVAL) {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to long long: "
<< ::strerror(errno));
<< "' to long long: " << ::strerror(errno));
}
return false;
}
@@ -605,19 +588,15 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, long long&
{
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to long long: "
<< ::strerror(errno));
<< "' to long long: " << ::strerror(errno));
}
return false;
}

if (endPtr != nullptr && *endPtr != '\0') {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to long long: first invalid character "
<< endPtr);
<< "' to long long: first invalid character " << endPtr);
}
return false;
}
@@ -642,9 +621,7 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, long& i) {
if (ret == 0 && errno == EINVAL) {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to long: "
<< ::strerror(errno));
<< "' to long: " << ::strerror(errno));
}
return false;
}
@@ -653,19 +630,15 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, long& i) {
{
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to long: "
<< ::strerror(errno));
<< "' to long: " << ::strerror(errno));
}
return false;
}

if (endPtr != nullptr && *endPtr != '\0') {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to long: first invalid character "
<< endPtr);
<< "' to long: first invalid character " << endPtr);
}
return false;
}
@@ -708,9 +681,7 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, short& i)
if (ret < std::numeric_limits<short>::min() ||
ret > std::numeric_limits<short>::max()) {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to short - out of range");
LOG_ERROR(<< "Unable to convert string '" << str << "' to short - out of range");
}
return false;
}
@@ -805,29 +776,23 @@ bool CStringUtils::_stringToType(bool silent, const std::string& str, double& d)
if (ret == 0 && errno == EINVAL) {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to double: "
<< ::strerror(errno));
<< "' to double: " << ::strerror(errno));
}
return false;
}

if ((ret == HUGE_VAL || ret == -HUGE_VAL) && errno == ERANGE) {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to double: "
<< ::strerror(errno));
<< "' to double: " << ::strerror(errno));
}
return false;
}

if (endPtr != nullptr && *endPtr != '\0') {
if (!silent) {
LOG_ERROR(<< "Unable to convert string '" << str
<< "'"
" to double: first invalid character "
<< endPtr);
<< "' to double: first invalid character " << endPtr);
}
return false;
}