diff --git a/far/changelog b/far/changelog index a215e2846a..2d95ea290e 100644 --- a/far/changelog +++ b/far/changelog @@ -1,3 +1,8 @@ +-------------------------------------------------------------------------------- +drkns 2024-09-23 16:09:43+01:00 - build 6374 + +1. Continue 6371 & 6372. + -------------------------------------------------------------------------------- shmuel 2024-09-23 10:13:10+03:00 - build 6373 diff --git a/far/encoding.cpp b/far/encoding.cpp index c73337d58e..6c0cab4a3e 100644 --- a/far/encoding.cpp +++ b/far/encoding.cpp @@ -346,7 +346,7 @@ static size_t get_bytes_impl(uintptr_t const Codepage, string_view const Str, st return 0; if (Diagnostics) - *Diagnostics = {}; + Diagnostics->clear(); switch(Codepage) { @@ -380,6 +380,30 @@ static size_t get_bytes_impl(uintptr_t const Codepage, string_view const Str, st } } +encoding::diagnostics::diagnostics(unsigned Diagnostics): + EnabledDiagnostics(Diagnostics) +{ +} + +void encoding::diagnostics::clear() +{ + ErrorPosition.reset(); + PartialInput = {}; + PartialOutput = {}; + m_IsUtf8 = is_utf8::yes_ascii; +} + +void encoding::diagnostics::set_is_utf8(is_utf8 const IsUtf8) +{ + if (m_IsUtf8 == is_utf8::yes_ascii) + m_IsUtf8 = IsUtf8; +} + +encoding::is_utf8 encoding::diagnostics::get_is_utf8() const +{ + return m_IsUtf8; +} + size_t encoding::get_bytes(uintptr_t const Codepage, string_view const Str, std::span const Buffer, diagnostics* const Diagnostics) { const auto Result = get_bytes_impl(Codepage, Str, Buffer, Diagnostics); @@ -444,7 +468,7 @@ static size_t get_chars_impl(uintptr_t const Codepage, std::string_view Str, std return 0; if (Diagnostics) - *Diagnostics = {}; + Diagnostics->clear(); const auto validate_unicode = [&] { @@ -911,8 +935,7 @@ static size_t BytesToUnicode( if (LocalDiagnostics.ErrorPosition && !Diagnostics->ErrorPosition) Diagnostics->ErrorPosition = StrIterator - Str.begin() + *LocalDiagnostics.ErrorPosition; - if (LocalDiagnostics.SeenValidUtf8) - Diagnostics->SeenValidUtf8 = true; + Diagnostics->set_is_utf8(LocalDiagnostics.get_is_utf8()); } const auto StoreChar = [&](wchar_t Char) @@ -1094,6 +1117,7 @@ size_t Utf8::get_char( encoding::replace_char; Diagnostics.ErrorPosition = Position; + Diagnostics.set_is_utf8(encoding::is_utf8::no); return 1; }; @@ -1136,7 +1160,7 @@ size_t Utf8::get_char( // legal 2-byte First = utf8::extract(c1, c2); ++StrIterator; - Diagnostics.SeenValidUtf8 = true; + Diagnostics.set_is_utf8(encoding::is_utf8::yes); return 1; } @@ -1155,15 +1179,17 @@ size_t Utf8::get_char( // legal 3-byte First = utf8::extract(c1, c2, c3); - if constexpr (!utf8::support_unpaired_surrogates) + // invalid: surrogate area code + if (in_closed_range(utf16::surrogate_first, First, utf16::surrogate_last)) { - // invalid: surrogate area code - if (in_closed_range(utf16::surrogate_first, First, utf16::surrogate_last)) + Diagnostics.set_is_utf8(encoding::is_utf8::no); + + if constexpr (!utf8::support_unpaired_surrogates) return InvalidChar(c1, 2); } StrIterator += 2; - Diagnostics.SeenValidUtf8 = true; + Diagnostics.set_is_utf8(encoding::is_utf8::yes); return 1; } @@ -1180,7 +1206,7 @@ size_t Utf8::get_char( // legal 4-byte (produces 2 WCHARs) std::tie(First, Second) = encoding::utf16::to_surrogate(utf8::extract(c1, c2, c3, c4)); StrIterator += 3; - Diagnostics.SeenValidUtf8 = true; + Diagnostics.set_is_utf8(encoding::is_utf8::yes); return 2; } diff --git a/far/encoding.hpp b/far/encoding.hpp index 8c96b936a0..89574837f3 100644 --- a/far/encoding.hpp +++ b/far/encoding.hpp @@ -50,22 +50,38 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace encoding { - struct diagnostics + enum class is_utf8 + { + no, + yes, + yes_ascii + }; + + class diagnostics { + public: enum: unsigned { - no_translation = 0_bit, + no_translation = 0_bit, not_enough_data = 1_bit, all = ~0u }; - unsigned EnabledDiagnostics{ all }; + explicit diagnostics(unsigned Diagnostics = all); + + void clear(); + + unsigned EnabledDiagnostics; std::optional ErrorPosition; size_t PartialInput{}; size_t PartialOutput{}; - bool SeenValidUtf8{}; + void set_is_utf8(is_utf8 IsUtf8); + is_utf8 get_is_utf8() const; + + private: + is_utf8 m_IsUtf8{is_utf8::yes_ascii}; }; [[nodiscard]] size_t get_bytes(uintptr_t Codepage, string_view Str, std::span Buffer, diagnostics* Diagnostics = {}); @@ -197,13 +213,6 @@ namespace encoding bool m_IgnoreEncodingErrors; }; - enum class is_utf8 - { - no, - yes, - yes_ascii - }; - is_utf8 is_valid_utf8(std::string_view Str, bool PartialContent); inline constexpr wchar_t bom_char = L''; // Zero Length Space diff --git a/far/exception_handler.cpp b/far/exception_handler.cpp index 4ebe47e94b..3c638ee46a 100644 --- a/far/exception_handler.cpp +++ b/far/exception_handler.cpp @@ -1557,6 +1557,7 @@ static string collect_information( const auto Version = self_version(); const auto Compiler = build::compiler(); + const auto Library = build::library(); const auto PeTime = pe_timestamp(); const auto FileTime = file_timestamp(); const auto SystemTime = system_timestamp(); @@ -1594,6 +1595,7 @@ static string collect_information( { { L"Far: "sv, Version, }, { L"Compiler: "sv, Compiler, }, + { L"Library: "sv, Library, }, { L"PE time: "sv, PeTime, }, { L"File time:"sv, FileTime, }, { L"Time: "sv, SystemTime, }, diff --git a/far/filestr.cpp b/far/filestr.cpp index 5bcd4563f4..54a934ebfd 100644 --- a/far/filestr.cpp +++ b/far/filestr.cpp @@ -258,10 +258,10 @@ bool enum_lines::GetString(string_view& Str, eol& Eol) const const auto TryUtf8 = m_TryUtf8 && *m_TryUtf8 && !IsUtf8Cp; const auto Size = encoding::get_chars(TryUtf8? Utf8CP : m_CodePage, Data.m_Bytes, Data.m_wBuffer, &m_Diagnostics); - if (m_Diagnostics.SeenValidUtf8) - m_SeenValidUtf8 = true; + if (m_IsUtf8 == encoding::is_utf8::yes_ascii) + m_IsUtf8 = m_Diagnostics.get_is_utf8(); - if (TryUtf8 && m_Diagnostics.ErrorPosition && !m_SeenValidUtf8) + if (TryUtf8 && m_Diagnostics.ErrorPosition && m_IsUtf8 != encoding::is_utf8::yes) { *m_TryUtf8 = false; continue; diff --git a/far/filestr.hpp b/far/filestr.hpp index 02a22dd0c4..73334ce7f7 100644 --- a/far/filestr.hpp +++ b/far/filestr.hpp @@ -80,7 +80,7 @@ class [[nodiscard]] enum_lines: public enumerator size_t m_BeginPos; uintptr_t m_CodePage; bool* m_TryUtf8; - mutable bool m_SeenValidUtf8{}; + mutable encoding::is_utf8 m_IsUtf8{encoding::is_utf8::yes_ascii}; raw_eol m_Eol; mutable char_ptr m_Buffer; diff --git a/far/vbuild.m4 b/far/vbuild.m4 index a711420039..86849be51f 100644 --- a/far/vbuild.m4 +++ b/far/vbuild.m4 @@ -1 +1 @@ -6373 +6374