Skip to content

Commit

Permalink
Continue 6371 & 6372
Browse files Browse the repository at this point in the history
  • Loading branch information
alabuzhev committed Sep 23, 2024
1 parent 55e38fe commit f0a49d0
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 26 deletions.
5 changes: 5 additions & 0 deletions far/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
--------------------------------------------------------------------------------
drkns 2024-09-23 16:09:43+01:00 - build 6374

1. Continue 6371 & 6372.

--------------------------------------------------------------------------------
shmuel 2024-09-23 10:13:10+03:00 - build 6373

Expand Down
46 changes: 36 additions & 10 deletions far/encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ static size_t get_bytes_impl(uintptr_t const Codepage, string_view const Str, st
return 0;

if (Diagnostics)
*Diagnostics = {};
Diagnostics->clear();

switch(Codepage)
{
Expand Down Expand Up @@ -380,6 +380,30 @@ static size_t get_bytes_impl(uintptr_t const Codepage, string_view const Str, st
}
}

encoding::diagnostics::diagnostics(unsigned Diagnostics):
EnabledDiagnostics(Diagnostics)
{
}

void encoding::diagnostics::clear()
{
ErrorPosition.reset();
PartialInput = {};
PartialOutput = {};
m_IsUtf8 = is_utf8::yes_ascii;
}

void encoding::diagnostics::set_is_utf8(is_utf8 const IsUtf8)
{
if (m_IsUtf8 == is_utf8::yes_ascii)
m_IsUtf8 = IsUtf8;
}

encoding::is_utf8 encoding::diagnostics::get_is_utf8() const
{
return m_IsUtf8;
}

size_t encoding::get_bytes(uintptr_t const Codepage, string_view const Str, std::span<char> const Buffer, diagnostics* const Diagnostics)
{
const auto Result = get_bytes_impl(Codepage, Str, Buffer, Diagnostics);
Expand Down Expand Up @@ -444,7 +468,7 @@ static size_t get_chars_impl(uintptr_t const Codepage, std::string_view Str, std
return 0;

if (Diagnostics)
*Diagnostics = {};
Diagnostics->clear();

const auto validate_unicode = [&]
{
Expand Down Expand Up @@ -911,8 +935,7 @@ static size_t BytesToUnicode(
if (LocalDiagnostics.ErrorPosition && !Diagnostics->ErrorPosition)
Diagnostics->ErrorPosition = StrIterator - Str.begin() + *LocalDiagnostics.ErrorPosition;

if (LocalDiagnostics.SeenValidUtf8)
Diagnostics->SeenValidUtf8 = true;
Diagnostics->set_is_utf8(LocalDiagnostics.get_is_utf8());
}

const auto StoreChar = [&](wchar_t Char)
Expand Down Expand Up @@ -1094,6 +1117,7 @@ size_t Utf8::get_char(
encoding::replace_char;

Diagnostics.ErrorPosition = Position;
Diagnostics.set_is_utf8(encoding::is_utf8::no);
return 1;
};

Expand Down Expand Up @@ -1136,7 +1160,7 @@ size_t Utf8::get_char(
// legal 2-byte
First = utf8::extract(c1, c2);
++StrIterator;
Diagnostics.SeenValidUtf8 = true;
Diagnostics.set_is_utf8(encoding::is_utf8::yes);
return 1;
}

Expand All @@ -1155,15 +1179,17 @@ size_t Utf8::get_char(
// legal 3-byte
First = utf8::extract(c1, c2, c3);

if constexpr (!utf8::support_unpaired_surrogates)
// invalid: surrogate area code
if (in_closed_range(utf16::surrogate_first, First, utf16::surrogate_last))
{
// invalid: surrogate area code
if (in_closed_range(utf16::surrogate_first, First, utf16::surrogate_last))
Diagnostics.set_is_utf8(encoding::is_utf8::no);

if constexpr (!utf8::support_unpaired_surrogates)
return InvalidChar(c1, 2);
}

StrIterator += 2;
Diagnostics.SeenValidUtf8 = true;
Diagnostics.set_is_utf8(encoding::is_utf8::yes);
return 1;
}

Expand All @@ -1180,7 +1206,7 @@ size_t Utf8::get_char(
// legal 4-byte (produces 2 WCHARs)
std::tie(First, Second) = encoding::utf16::to_surrogate(utf8::extract(c1, c2, c3, c4));
StrIterator += 3;
Diagnostics.SeenValidUtf8 = true;
Diagnostics.set_is_utf8(encoding::is_utf8::yes);
return 2;
}

Expand Down
31 changes: 20 additions & 11 deletions far/encoding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,22 +50,38 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

namespace encoding
{
struct diagnostics
enum class is_utf8
{
no,
yes,
yes_ascii
};

class diagnostics
{
public:
enum: unsigned
{
no_translation = 0_bit,
no_translation = 0_bit,
not_enough_data = 1_bit,

all = ~0u
};

unsigned EnabledDiagnostics{ all };
explicit diagnostics(unsigned Diagnostics = all);

void clear();

unsigned EnabledDiagnostics;
std::optional<size_t> ErrorPosition;
size_t PartialInput{};
size_t PartialOutput{};

bool SeenValidUtf8{};
void set_is_utf8(is_utf8 IsUtf8);
is_utf8 get_is_utf8() const;

private:
is_utf8 m_IsUtf8{is_utf8::yes_ascii};
};

[[nodiscard]] size_t get_bytes(uintptr_t Codepage, string_view Str, std::span<char> Buffer, diagnostics* Diagnostics = {});
Expand Down Expand Up @@ -197,13 +213,6 @@ namespace encoding
bool m_IgnoreEncodingErrors;
};

enum class is_utf8
{
no,
yes,
yes_ascii
};

is_utf8 is_valid_utf8(std::string_view Str, bool PartialContent);

inline constexpr wchar_t bom_char = L''; // Zero Length Space
Expand Down
2 changes: 2 additions & 0 deletions far/exception_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1557,6 +1557,7 @@ static string collect_information(

const auto Version = self_version();
const auto Compiler = build::compiler();
const auto Library = build::library();
const auto PeTime = pe_timestamp();
const auto FileTime = file_timestamp();
const auto SystemTime = system_timestamp();
Expand Down Expand Up @@ -1594,6 +1595,7 @@ static string collect_information(
{
{ L"Far: "sv, Version, },
{ L"Compiler: "sv, Compiler, },
{ L"Library: "sv, Library, },
{ L"PE time: "sv, PeTime, },
{ L"File time:"sv, FileTime, },
{ L"Time: "sv, SystemTime, },
Expand Down
6 changes: 3 additions & 3 deletions far/filestr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,10 @@ bool enum_lines::GetString(string_view& Str, eol& Eol) const
const auto TryUtf8 = m_TryUtf8 && *m_TryUtf8 && !IsUtf8Cp;
const auto Size = encoding::get_chars(TryUtf8? Utf8CP : m_CodePage, Data.m_Bytes, Data.m_wBuffer, &m_Diagnostics);

if (m_Diagnostics.SeenValidUtf8)
m_SeenValidUtf8 = true;
if (m_IsUtf8 == encoding::is_utf8::yes_ascii)
m_IsUtf8 = m_Diagnostics.get_is_utf8();

if (TryUtf8 && m_Diagnostics.ErrorPosition && !m_SeenValidUtf8)
if (TryUtf8 && m_Diagnostics.ErrorPosition && m_IsUtf8 != encoding::is_utf8::yes)
{
*m_TryUtf8 = false;
continue;
Expand Down
2 changes: 1 addition & 1 deletion far/filestr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class [[nodiscard]] enum_lines: public enumerator<enum_lines, file_line>
size_t m_BeginPos;
uintptr_t m_CodePage;
bool* m_TryUtf8;
mutable bool m_SeenValidUtf8{};
mutable encoding::is_utf8 m_IsUtf8{encoding::is_utf8::yes_ascii};
raw_eol m_Eol;

mutable char_ptr m_Buffer;
Expand Down
2 changes: 1 addition & 1 deletion far/vbuild.m4
Original file line number Diff line number Diff line change
@@ -1 +1 @@
6373
6374

0 comments on commit f0a49d0

Please sign in to comment.