Skip to content

Commit

Permalink
coda-oss 2022-08-02
Browse files Browse the repository at this point in the history
  • Loading branch information
Dan Smith committed Aug 2, 2022
1 parent 03f1654 commit 2cd9bca
Show file tree
Hide file tree
Showing 36 changed files with 2,347 additions and 1,204 deletions.
6 changes: 4 additions & 2 deletions externals/coda-oss/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
```
# coda-oss Release Notes

## Release 2022-??-??
## [Release 2022-08-02](https://github.com/mdaus/coda-oss/releases/tag/2022-08-02)
* remove *Expat* and *libXML* modules and support in **xml.lite**; only *Xerces* was actively used.
* **xml.lite** now uses UTF-8 internally and is no longer tries to preserve incorrect behavior.
* fix `waf` to work-around FIPS warning because of insecure *md5* hashing.
* tweak `str::EncodedStringView` and `str::EncodedString` for
[future XML changes](https://github.com/mdaus/coda-oss/tree/feature/always-write-xml-as-utf8).

## [Release 2022-06-29](https://github.com/mdaus/coda-oss/releases/tag/2022-06-29)
* remove **modules/drivers/boost** as it was empty (and unused);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ static_assert(CODA_OSS_MAKE_VERSION_MMPB(9999, 9999, 9999, 9999) <= UINT64_MAX,

// Do this ala C++ ... we don't currently have major/minor/patch
//#define CODA_OSS_VERSION_ 20210910L // c.f. __cplusplus
#define CODA_OSS_VERSION_ 2022 ## 0006 ## 0029 ## 0000 ## L
#define CODA_OSS_VERSION_ 2022 ## 0008 ## 0002 ## 0000 ## L

// Use the same macros other projects might want to use; overkill for us.
#define CODA_OSS_VERSION_MAJOR 2022
#define CODA_OSS_VERSION_MINOR 6
#define CODA_OSS_VERSION_PATCH 29
#define CODA_OSS_VERSION_MINOR 8
#define CODA_OSS_VERSION_PATCH 2
#define CODA_OSS_VERSION_BUILD 0
#define CODA_OSS_VERSION CODA_OSS_MAKE_VERSION_MMPB(CODA_OSS_VERSION_MAJOR, CODA_OSS_VERSION_MINOR, CODA_OSS_VERSION_PATCH, CODA_OSS_VERSION_BUILD)

Expand Down
2 changes: 1 addition & 1 deletion externals/coda-oss/modules/c++/io/include/io/ReadUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace io
*/
void readFileContents(const std::string& pathname,
std::vector<sys::byte>& buffer);
void readFileContents(const sys::filesystem::path& pathname, std::vector<coda_oss::byte>& buffer);
void readFileContents(const coda_oss::filesystem::path& pathname, std::vector<coda_oss::byte>& buffer);

/*!
* Reads the contents of a file into a string. The file is assumed to be a
Expand Down
2 changes: 1 addition & 1 deletion externals/coda-oss/modules/c++/io/source/ReadUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void readFileContents(const std::string& pathname,
{
readFileContents_(pathname, buffer);
}
void readFileContents(const sys::filesystem::path& pathname, std::vector<coda_oss::byte>& buffer)
void readFileContents(const coda_oss::filesystem::path& pathname, std::vector<coda_oss::byte>& buffer)
{
readFileContents_(pathname, buffer);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,13 @@ class CODA_OSS_API EncodedString final
// We can do most everything through the view, so keep one around.
EncodedStringView v_;

const std::string& string() const
{
return s_;
}

// No "public" operator=() for these; this class is mostly for storage and/or conversion,
// not extensive manipulation. Create a new instance and assign/move that.
void assign(coda_oss::u8string::const_pointer);
void assign(str::W1252string::const_pointer);

public:
EncodedString() = default;
EncodedString();
~EncodedString() = default;
EncodedString(const EncodedString&);
EncodedString& operator=(const EncodedString&);
Expand Down Expand Up @@ -134,7 +129,7 @@ class CODA_OSS_API EncodedString final
{
static const std::string& string(const EncodedString& es) // for unit-testing
{
return es.string();
return es.s_;
}
};
};
Expand Down
33 changes: 18 additions & 15 deletions externals/coda-oss/modules/c++/str/include/str/EncodedStringView.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ class CODA_OSS_API EncodedStringView final
// Since we only support two encodings--UTF-8 (native on Linux) and Windows-1252
// (native on Windows)--both of which are 8-bits, a simple "bool" flag will do.
coda_oss::span<const char> mString;
static constexpr bool mNativeIsUtf8 = details::Platform == details::PlatformType::Linux ? true : false;
#if _WIN32
static constexpr bool mNativeIsUtf8 = false; // Windows-1252
#else
static constexpr bool mNativeIsUtf8 = true; // !_WIN32, assume Linux
#endif
bool mIsUtf8 = mNativeIsUtf8;

// Want to create an EncodedString from EncodedStringView. The public interface
Expand Down Expand Up @@ -89,7 +93,6 @@ class CODA_OSS_API EncodedStringView final

// Convert (perhaps) whatever we're looking at to UTF-8
coda_oss::u8string u8string() const; // c.f. std::filesystem::path::u8string()
std::string& toUtf8(std::string&) const; // std::string is encoded as UTF-8, always.

// Convert whatever we're looking at to UTF-16 or UTF-32
std::u16string u16string() const; // c.f. std::filesystem::path::u8string()
Expand All @@ -111,34 +114,34 @@ class CODA_OSS_API EncodedStringView final
{
return mIsUtf8 ? cast<coda_oss::u8string::const_pointer>(c_str()) : nullptr;
}
str::W1252string::const_pointer c_w1252str() const
{
return mIsUtf8 ? nullptr : cast<str::W1252string::const_pointer>(c_str());
}
size_t size() const
{
return mString.size();
}

// Input is encoded as specified on all platforms.
static EncodedStringView fromUtf8(const std::string& s)
static EncodedStringView fromUtf8(const std::string& utf8)
{
return EncodedStringView(str::c_str<coda_oss::u8string>(s));
return EncodedStringView(str::c_str<coda_oss::u8string>(utf8));
}
static EncodedStringView fromUtf8(std::string::const_pointer p)
static EncodedStringView fromUtf8(std::string::const_pointer pUtf8)
{
return EncodedStringView(str::cast<coda_oss::u8string::const_pointer>(p));
return EncodedStringView(str::cast<coda_oss::u8string::const_pointer>(pUtf8));
}
static EncodedStringView fromWindows1252(const std::string& s)
static EncodedStringView fromWindows1252(const std::string& w1252)
{
return EncodedStringView(str::c_str<str::W1252string>(s));
return EncodedStringView(str::c_str<str::W1252string>(w1252));
}
static EncodedStringView fromWindows1252(std::string::const_pointer p)
static EncodedStringView fromWindows1252(std::string::const_pointer pW1252)
{
return EncodedStringView(str::cast<str::W1252string::const_pointer>(p));
return EncodedStringView(str::cast<str::W1252string::const_pointer>(pW1252));
}

std::string asUtf8() const
{
std::string retval;
return toUtf8(retval);
}
std::string asUtf8() const;
std::string asWindows1252() const;

bool operator_eq(const EncodedStringView&) const;
Expand Down
91 changes: 11 additions & 80 deletions externals/coda-oss/modules/c++/str/include/str/Encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,27 +37,6 @@
#include "gsl/gsl.h"
#include "config/Exports.h"

// This can be useful for code that will compile on all platforms, but needs
// different platform-specific behavior. This avoids the use of more #ifdefs
// (no preprocessor) and also squelches compiler-warnings about unused local
// functions.
namespace str { namespace details // YOU should be using sys::PlatformType
{
enum class PlatformType
{
Windows,
Linux,
// MacOS
};

#if _WIN32
constexpr auto Platform = PlatformType::Windows;
#else
constexpr auto Platform = PlatformType::Linux;
#endif
} }


namespace str
{
template <typename TReturn, typename TChar>
Expand All @@ -83,17 +62,6 @@ inline typename TBasicStringT::const_pointer c_str(const std::basic_string<TChar
enum class Windows1252_T : unsigned char { }; // https://en.cppreference.com/w/cpp/language/types
using W1252string = std::basic_string<Windows1252_T>; // https://en.cppreference.com/w/cpp/string

CODA_OSS_API coda_oss::u8string fromWindows1252(std::string::const_pointer, size_t); // std::string is Windows-1252 **ON ALL PLATFORMS**
inline coda_oss::u8string fromWindows1252(std::string::const_pointer s)
{
return fromWindows1252(s, gsl::narrow<size_t>(strlen(s)));
}
CODA_OSS_API coda_oss::u8string fromUtf8(std::string::const_pointer, size_t); // std::string is UTF-8 **ON ALL PLATFORMS**
inline coda_oss::u8string fromUtf8(std::string::const_pointer s)
{
return fromUtf8(s, gsl::narrow<size_t>(strlen(s)));
}

// With some older C++ compilers, uint16_t may be used instead of char16_t :-(
using ui16string = std::basic_string<uint16_t>; // ui = UInt16_t

Expand All @@ -110,76 +78,39 @@ static_assert(!std::is_same<wchar_t, int32_t>::value, "wchar_t should not be the

// When the encoding is important, we want to "traffic" in coda_oss::u8string (UTF-8), not
// str::W1252string (Windows-1252) or std::string (unknown). Make it easy to get those from other encodings.
CODA_OSS_API coda_oss::u8string to_u8string(std::string::const_pointer, size_t); // std::string is Windows-1252 or UTF-8 depending on platform
CODA_OSS_API coda_oss::u8string to_u8string(str::W1252string::const_pointer, size_t);
inline coda_oss::u8string to_u8string(coda_oss::u8string::const_pointer s, size_t sz)
{
return coda_oss::u8string(s, sz);
}
CODA_OSS_API coda_oss::u8string to_u8string(std::wstring::const_pointer, size_t); // std::wstring is UTF-16 or UTF-32 depending on platform

// UTF-16 is typically uses on Windows (where it is std::wstring::value_type);
// Linux preferred UTF-32.
// UTF-16 is typically uses on Windows (where it is std::wstring::value_type); Linux prefers UTF-32.
CODA_OSS_API coda_oss::u8string to_u8string(std::u16string::const_pointer, size_t);

CODA_OSS_API std::u16string to_u16string(coda_oss::u8string::const_pointer, size_t);
str::ui16string to_ui16string(coda_oss::u8string::const_pointer, size_t);
std::u16string to_u16string(str::W1252string::const_pointer, size_t);
str::ui16string to_ui16string(str::W1252string::const_pointer, size_t);

// UTF-32 is convenient because each code-point is a single 32-bit integer.
// It's typically std::wstring::value_type on Linux, but NOT Windows.
CODA_OSS_API coda_oss::u8string to_u8string(std::u32string::const_pointer, size_t);
CODA_OSS_API std::u32string to_u32string(coda_oss::u8string::const_pointer, size_t);
std::u32string to_u32string(str::W1252string::const_pointer, size_t);

template <typename TChar>
inline coda_oss::u8string to_u8string(const std::basic_string<TChar>& s)
{
return to_u8string(s.c_str(), s.size());
}
template <typename TChar>
inline std::u16string to_u16string(const std::basic_string<TChar>& s)
{
return to_u16string(s.c_str(), s.size());
}
template <typename TChar>
inline std::u32string to_u32string(const std::basic_string<TChar>& s)
{
return to_u32string(s.c_str(), s.size());
}

namespace details // YOU should use EncodedStringView
{
coda_oss::u8string to_u8string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */);
std::u16string to_u16string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */);
ui16string to_ui16string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); // older C++ compilers
std::u32string to_u32string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */);
std::wstring to_wstring(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */);

std::string& to_u8string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */, std::string&); // encoding is lost
std::string& to_u8string(std::u16string::const_pointer, size_t, std::string&); // encoding is lost
std::string& to_u8string(std::u32string::const_pointer, size_t, std::string&); // encoding is lost

str::W1252string to_w1252string(std::string::const_pointer, size_t); // std::string is Windows-1252 or UTF-8 depending on platform
str::W1252string to_w1252string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */);
CODA_OSS_API str::W1252string to_w1252string(coda_oss::u8string::const_pointer, size_t);
inline str::W1252string to_w1252string(str::W1252string::const_pointer s, size_t sz)
{
return str::W1252string(s, sz);
}
CODA_OSS_API str::W1252string to_w1252string(coda_oss::u8string::const_pointer p, size_t sz);

std::string to_native(coda_oss::u8string::const_pointer, size_t); // std::string is Windows-1252 or UTF-8 depending on platform
std::string to_native(str::W1252string::const_pointer s, size_t sz); // std::string is Windows-1252 or UTF-8 depending on platform
inline std::string to_native(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) // std::string is Windows-1252 or UTF-8 depending on platform
{
return is_utf8 ? to_native(cast<coda_oss::u8string::const_pointer>(s), sz)
: to_native(cast<str::W1252string::const_pointer>(s), sz);
}
inline std::string to_native(std::string::const_pointer s, size_t sz)
{
return std::string(s, sz);
}
template <typename TChar>
inline std::string to_native(const std::basic_string<TChar>& s)
namespace details // YOU should use EncodedStringView
{
return to_native(s.c_str(), s.size());
}
void w1252to8(str::W1252string::const_pointer p, size_t sz, std::string&); // encoding is lost
void utf16to8(std::u16string::const_pointer, size_t, std::string&); // encoding is lost
void utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::string&); // encoding is lost
}
}

Expand Down
25 changes: 19 additions & 6 deletions externals/coda-oss/modules/c++/str/source/EncodedString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ void str::EncodedString::assign(coda_oss::u8string::const_pointer s)
using char_t = std::remove_pointer<decltype(s)>::type; // avoid copy-paste error
using string_t = std::basic_string<std::remove_const<char_t>::type>;
s_ = cast<std::string::const_pointer>(s); // copy
v_ = EncodedStringView(c_str<string_t>(s_));
v_ = EncodedStringView(str::c_str<string_t>(s_));
}

void str::EncodedString::assign(str::W1252string::const_pointer s)
{
using char_t = std::remove_pointer<decltype(s)>::type; // avoid copy-paste error
using string_t = std::basic_string<std::remove_const<char_t>::type>;
s_ = cast<std::string::const_pointer>(s); // copy
v_ = EncodedStringView(c_str<string_t>(s_)); // avoid copy-paste error
v_ = EncodedStringView(str::c_str<string_t>(s_)); // avoid copy-paste error
}

static str::EncodedStringView make_EncodedStringView(const std::string& s, bool isUtf8)
Expand All @@ -52,8 +52,9 @@ static str::EncodedStringView make_EncodedStringView(const std::string& s, bool
return str::EncodedStringView(str::c_str<str::W1252string>(s));
}

str::EncodedString::EncodedString(std::string::const_pointer s) : s_(s) /*copy*/, v_ (s_) { }
str::EncodedString::EncodedString(const std::string& s) : s_(s) /*copy*/, v_ (s_) { }
str::EncodedString::EncodedString(std::string::const_pointer s) : s_(s) /*copy*/, v_(s_) { }
str::EncodedString::EncodedString(const std::string& s) : s_(s) /*copy*/, v_(s_) { }
str::EncodedString::EncodedString() : EncodedString(""){ }

str::EncodedString::EncodedString(coda_oss::u8string::const_pointer s)
{
Expand All @@ -70,9 +71,21 @@ str::EncodedString::EncodedString(const str::W1252string& s) : EncodedString(s.c
str::EncodedString::EncodedString(const std::u16string& s) : EncodedString(to_u8string(s)) { }
str::EncodedString::EncodedString(const std::u32string& s) : EncodedString(to_u8string(s)) { }

str::EncodedString::EncodedString(std::wstring::const_pointer s) : EncodedString(to_u8string(s, wcslen(s))) { }
str::EncodedString::EncodedString(const std::wstring& s) : EncodedString(to_u8string(s)) { }
static inline coda_oss::u8string to_u8string_(std::wstring::const_pointer p_, size_t sz) // std::wstring is UTF-16 or UTF-32 depending on platform
{
const auto p =
// Need to use #ifdef's because str::cast() checks to be sure the sizes are correct.
#if _WIN32
str::cast<std::u16string::const_pointer>(p_); // std::wstring is UTF-16 on Windows
#endif
#if !_WIN32
str::cast<std::u32string::const_pointer>(p_); // std::wstring is UTF-32 on Linux
#endif
return str::to_u8string(p, sz);
}

str::EncodedString::EncodedString(std::wstring::const_pointer s) : EncodedString(to_u8string_(s, wcslen(s))) { }
str::EncodedString::EncodedString(const std::wstring& s) : EncodedString(to_u8string_(s.c_str(), s.size())) { }

// create from a view
str::EncodedString& str::EncodedString::operator=(const EncodedStringView& v)
Expand Down
Loading

0 comments on commit 2cd9bca

Please sign in to comment.