From e76cd6458f1bdafa5ce35d01933a6d7620c2f9f0 Mon Sep 17 00:00:00 2001 From: Alex Alabuzhev Date: Sun, 22 Sep 2024 14:59:12 +0100 Subject: [PATCH] Assume that hlf and lng files are in UTF-8 by default, even if there is no BOM Fallback to OEM only if decoding fails --- far/changelog | 6 ++++ far/filelist.cpp | 4 +-- far/filestr.cpp | 15 ++++++-- far/filestr.hpp | 3 +- far/help.cpp | 34 +++++++++--------- far/language.cpp | 91 ++++++++++++++++++++++++------------------------ far/language.hpp | 18 ++++++++-- far/vbuild.m4 | 2 +- 8 files changed, 101 insertions(+), 72 deletions(-) diff --git a/far/changelog b/far/changelog index 7e73edd2d2..0c80ccb851 100644 --- a/far/changelog +++ b/far/changelog @@ -1,3 +1,9 @@ +-------------------------------------------------------------------------------- +drkns 2024-09-22 14:58:10+01:00 - build 6371 + +1. Assume that hlf and lng files are in UTF-8 by default, even if there is no BOM. + Fallback to OEM only if decoding fails. + -------------------------------------------------------------------------------- drkns 2024-09-15 14:01:29+01:00 - build 6370 diff --git a/far/filelist.cpp b/far/filelist.cpp index 0f0ecf933c..1a301906dc 100644 --- a/far/filelist.cpp +++ b/far/filelist.cpp @@ -5536,8 +5536,8 @@ bool FileList::PluginPanelHelp(const plugin_panel* hPlugin) const { string_view strPath = hPlugin->plugin()->ModuleName(); CutToSlash(strPath); - const auto [File, Name, Codepage] = OpenLangFile(strPath, Global->HelpFileMask, Global->Opt->strHelpLanguage); - if (!File) + const auto HelpFile = OpenLangFile(strPath, Global->HelpFileMask, Global->Opt->strHelpLanguage); + if (!HelpFile) return false; help::show(help::make_link(strPath, L"Contents"sv)); diff --git a/far/filestr.cpp b/far/filestr.cpp index ab668c3e03..a2407fe52a 100644 --- a/far/filestr.cpp +++ b/far/filestr.cpp @@ -63,10 +63,11 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. const auto BufferSize = 65536; static_assert(BufferSize % sizeof(wchar_t) == 0); -enum_lines::enum_lines(std::istream& Stream, uintptr_t CodePage): +enum_lines::enum_lines(std::istream& Stream, uintptr_t const CodePage, bool* TryUtf8): m_Stream(Stream), m_BeginPos(m_Stream.tellg()), m_CodePage(CodePage), + m_TryUtf8(TryUtf8), m_Eol(m_CodePage), m_Buffer(BufferSize) { @@ -249,9 +250,19 @@ bool enum_lines::GetString(string_view& Str, eol& Eol) const if (Data.m_Bytes.size() > Data.m_wBuffer.size()) Data.m_wBuffer.reset(Data.m_Bytes.size()); + const auto Utf8CP = encoding::codepage::utf8(); + const auto IsUtf8Cp = m_CodePage == Utf8CP; + for (;;) { - const auto Size = encoding::get_chars(m_CodePage, Data.m_Bytes, Data.m_wBuffer, &m_Diagnostics); + const auto TryUtf8 = m_TryUtf8 && *m_TryUtf8 && !IsUtf8Cp; + const auto Size = encoding::get_chars(TryUtf8? Utf8CP : m_CodePage, Data.m_Bytes, Data.m_wBuffer, &m_Diagnostics); + if (TryUtf8 && m_Diagnostics.ErrorPosition) + { + *m_TryUtf8 = false; + continue; + } + if (Size <= Data.m_wBuffer.size()) { Data.m_Bytes.clear(); diff --git a/far/filestr.hpp b/far/filestr.hpp index 7367636543..9993f5308f 100644 --- a/far/filestr.hpp +++ b/far/filestr.hpp @@ -61,7 +61,7 @@ class [[nodiscard]] enum_lines: public enumerator IMPLEMENTS_ENUMERATOR(enum_lines); public: - enum_lines(std::istream& Stream, uintptr_t CodePage); + enum_lines(std::istream& Stream, uintptr_t CodePage, bool* TryUtf8 = {}); bool conversion_error() const { return m_Diagnostics.ErrorPosition.has_value(); } @@ -79,6 +79,7 @@ class [[nodiscard]] enum_lines: public enumerator std::istream& m_Stream; size_t m_BeginPos; uintptr_t m_CodePage; + bool* m_TryUtf8; raw_eol m_Eol; mutable char_ptr m_Buffer; diff --git a/far/help.cpp b/far/help.cpp index 4f96f663d3..4467bf0bdb 100644 --- a/far/help.cpp +++ b/far/help.cpp @@ -183,7 +183,7 @@ class Help final: public window bool GetTopic(int realX, int realY, string& strTopic) const; void MoveToReference(int Forward, int CurScreen); void ReadDocumentsHelp(int TypeIndex); - void Search(const os::fs::file& HelpFile, uintptr_t nCodePage); + void Search(lang_file& HelpFile); bool JumpTopic(string_view Topic); bool JumpTopic(); int CanvasHeight() const { return ObjHeight() - 1 - 1; } @@ -224,9 +224,9 @@ class Help final: public window SearchReplaceDlgParams m_SearchDlgParams; }; -static bool GetOptionsParam(const os::fs::file& LangFile, string_view const KeyName, string& Value, unsigned CodePage) +static bool GetOptionsParam(lang_file& LangFile, string_view const KeyName, string& Value) { - return GetLangParam(LangFile, L"Options "sv + KeyName, Value, CodePage); + return GetLangParam(LangFile, L"Options "sv + KeyName, Value); } Help::Help(private_tag): @@ -328,7 +328,7 @@ bool Help::ReadHelp(string_view const Mask) return true; } - const auto [HelpFile, Name, HelpFileCodePage] = OpenLangFile(strPath, Mask.empty()? Global->HelpFileMask : Mask, Global->Opt->strHelpLanguage); + auto HelpFile = OpenLangFile(strPath, Mask.empty()? Global->HelpFileMask : Mask, Global->Opt->strHelpLanguage); if (!HelpFile) { ErrorHelp = true; @@ -352,11 +352,11 @@ bool Help::ReadHelp(string_view const Mask) return false; } - strFullHelpPathName = HelpFile.GetName(); + strFullHelpPathName = HelpFile.File.GetName(); string strReadStr; - if (GetOptionsParam(HelpFile, L"TabSize"sv, strReadStr, HelpFileCodePage)) + if (GetOptionsParam(HelpFile, L"TabSize"sv, strReadStr)) { unsigned UserTabSize; if (from_string(strReadStr, UserTabSize)) @@ -376,12 +376,12 @@ bool Help::ReadHelp(string_view const Mask) } } - if (GetOptionsParam(HelpFile, L"CtrlColorChar"sv, strReadStr, HelpFileCodePage)) + if (GetOptionsParam(HelpFile, L"CtrlColorChar"sv, strReadStr)) m_CtrlColorChar = strReadStr.front(); else m_CtrlColorChar = 0; - if (GetOptionsParam(HelpFile, L"CtrlStartPosChar"sv, strReadStr, HelpFileCodePage)) + if (GetOptionsParam(HelpFile, L"CtrlStartPosChar"sv, strReadStr)) strCtrlStartPosChar = strReadStr; else strCtrlStartPosChar.clear(); @@ -389,7 +389,7 @@ bool Help::ReadHelp(string_view const Mask) /* $ 29.11.2001 DJ запомним, чего там написано в PluginContents */ - if (!GetLangParam(HelpFile, L"PluginContents"sv, strCurPluginContents, HelpFileCodePage)) + if (!GetLangParam(HelpFile, L"PluginContents"sv, strCurPluginContents)) strCurPluginContents.clear(); string strTabSpace(CtrlTabSize, L' '); @@ -398,7 +398,7 @@ bool Help::ReadHelp(string_view const Mask) if (StackData.strHelpTopic == FoundContents) { - Search(HelpFile, HelpFileCodePage); + Search(HelpFile); return true; } @@ -419,11 +419,11 @@ bool Help::ReadHelp(string_view const Mask) int MI=0; string strMacroArea; - os::fs::filebuf StreamBuffer(HelpFile, std::ios::in); + os::fs::filebuf StreamBuffer(HelpFile.File, std::ios::in); std::istream Stream(&StreamBuffer); Stream.exceptions(Stream.badbit | Stream.failbit); // BUGBUG, add try/catch - enum_lines EnumFileLines(Stream, HelpFileCodePage); + enum_lines EnumFileLines(Stream, HelpFile.Codepage, &HelpFile.TryUtf8); auto FileIterator = EnumFileLines.begin(); const size_t StartSizeKeyName = 20; size_t SizeKeyName = StartSizeKeyName; @@ -1944,7 +1944,7 @@ void Help::MoveToReference(int Forward,int CurScreen) FastShow(); } -void Help::Search(const os::fs::file& HelpFile,uintptr_t nCodePage) +void Help::Search(lang_file& HelpFile) { FixCount=1; StackData.TopStr=0; @@ -1980,11 +1980,11 @@ void Help::Search(const os::fs::file& HelpFile,uintptr_t nCodePage) searchers Searchers; const auto& Searcher = init_searcher(Searchers, m_SearchDlgParams.CaseSensitive.value(), m_SearchDlgParams.Fuzzy.value(), m_SearchDlgParams.SearchStr); - os::fs::filebuf StreamBuffer(HelpFile, std::ios::in); + os::fs::filebuf StreamBuffer(HelpFile.File, std::ios::in); std::istream Stream(&StreamBuffer); Stream.exceptions(Stream.badbit | Stream.failbit); // BUGBUG, add try/catch - for (const auto& i: enum_lines(Stream, nCodePage)) + for (const auto& i: enum_lines(Stream, HelpFile.Codepage, &HelpFile.TryUtf8)) { auto Str = trim_right(i.Str); @@ -2076,12 +2076,12 @@ void Help::ReadDocumentsHelp(int TypeIndex) { string_view Path = i->ModuleName(); CutToSlash(Path); - const auto [HelpFile, HelpLangName, HelpFileCodePage] = OpenLangFile(Path, Global->HelpFileMask, Global->Opt->strHelpLanguage); + auto HelpFile = OpenLangFile(Path, Global->HelpFileMask, Global->Opt->strHelpLanguage); if (!HelpFile) continue; string strEntryName; - if (!GetLangParam(HelpFile, ContentsName, strEntryName, HelpFileCodePage)) + if (!GetLangParam(HelpFile, ContentsName, strEntryName)) continue; AddLine(far::format(L" ~{}~@{}@"sv, strEntryName, help::make_link(Path, HelpContents))); diff --git a/far/language.cpp b/far/language.cpp index 33de4eea62..de1829862b 100644 --- a/far/language.cpp +++ b/far/language.cpp @@ -67,59 +67,64 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. static const auto LangFileMask = L"*.lng"sv; -std::tuple OpenLangFile(string_view const Path, string_view const Mask, string_view const Language) +static lang_file open_impl(string_view const FileName) { - FN_RETURN_TYPE(OpenLangFile) CurrentFileData, EnglishFileData; + lang_file Result; + if (!Result.File.Open(FileName, FILE_READ_DATA, os::fs::file_share_read, nullptr, OPEN_EXISTING)) + return {}; - for (const auto& FindData: os::fs::enum_files(path::join(Path, Mask))) - { - if (!os::fs::is_file(FindData)) - continue; + Result.Codepage = GetFileCodepage(Result.File, encoding::codepage::oem(), nullptr, false); + Result.TryUtf8 = !IsUtfCodePage(Result.Codepage); - const auto CurrentFileName = path::join(Path, FindData.FileName); + string Language; + if (!GetLangParam(Result, L"Language"sv, Language)) + return {}; - auto& [CurrentFile, CurrentLngName, CurrentCodepage] = CurrentFileData; + std::tie(Result.Name, Result.Description) = split(Language, L','); - CurrentFile = os::fs::file(CurrentFileName, FILE_READ_DATA, os::fs::file_share_read, nullptr, OPEN_EXISTING); - if (!CurrentFile) - continue; + return Result; +} - CurrentCodepage = GetFileCodepage(CurrentFile, encoding::codepage::oem(), nullptr, false); +lang_file OpenLangFile(string_view const Path, string_view const Mask, string_view const Language) +{ + lang_file CurrentFile, EnglishFile; - if (!GetLangParam(CurrentFile, L"Language"sv, CurrentLngName, CurrentCodepage)) + for (const auto& FindData: os::fs::enum_files(path::join(Path, Mask))) + { + if (!os::fs::is_file(FindData)) continue; - const auto [LngName, LngDescription] = split(CurrentLngName, L','); - if (!LngDescription.empty()) - CurrentLngName.resize(LngName.size()); + CurrentFile = open_impl(path::join(Path, FindData.FileName)); + if (!CurrentFile) + continue; - if (equal_icase(CurrentLngName, Language)) - return CurrentFileData; + if (equal_icase(CurrentFile.Name, Language)) + return CurrentFile; - if (equal_icase(CurrentLngName, L"English"sv)) + if (equal_icase(CurrentFile.Name, L"English"sv)) { - EnglishFileData = std::move(CurrentFileData); + EnglishFile = std::move(CurrentFile); } } - if (std::get<0>(EnglishFileData)) - return EnglishFileData; + if (EnglishFile) + return EnglishFile; - return CurrentFileData; + return CurrentFile; } -bool GetLangParam(const os::fs::file& LangFile, string_view const ParamName, string& Param, uintptr_t CodePage) +bool GetLangParam(lang_file& LangFile, string_view const ParamName, string& Param) { const auto strFullParamName = concat(L'.', ParamName); - const auto CurFilePos = LangFile.GetPointer(); - SCOPE_EXIT{ LangFile.SetPointer(CurFilePos, nullptr, FILE_BEGIN); }; + const auto CurFilePos = LangFile.File.GetPointer(); + SCOPE_EXIT{ LangFile.File.SetPointer(CurFilePos, nullptr, FILE_BEGIN); }; - os::fs::filebuf StreamBuffer(LangFile, std::ios::in); + os::fs::filebuf StreamBuffer(LangFile.File, std::ios::in); std::istream Stream(&StreamBuffer); Stream.exceptions(Stream.badbit | Stream.failbit); - for (const auto& i: enum_lines(Stream, CodePage)) + for (const auto& i: enum_lines(Stream, LangFile.Codepage, &LangFile.TryUtf8)) { if (starts_with_icase(i.Str, strFullParamName)) { @@ -157,32 +162,25 @@ static bool SelectLanguage(bool HelpLanguage, string& Dest) if (!os::fs::is_file(FindData)) continue; - const os::fs::file LangFile(path::join(Global->g_strFarPath, FindData.FileName), FILE_READ_DATA, os::fs::file_share_read, nullptr, OPEN_EXISTING); + auto LangFile = open_impl(path::join(Global->g_strFarPath, FindData.FileName)); if (!LangFile) continue; - const auto Codepage = GetFileCodepage(LangFile, encoding::codepage::oem(), nullptr, false); - - string LangParamValue; - if (!GetLangParam(LangFile, L"Language"sv, LangParamValue, Codepage)) - continue; - string strEntryName; if (HelpLanguage && ( - GetLangParam(LangFile, L"PluginContents"sv, strEntryName, Codepage) || - GetLangParam(LangFile, L"DocumentContents"sv, strEntryName, Codepage) + GetLangParam(LangFile, L"PluginContents"sv, strEntryName) || + GetLangParam(LangFile, L"DocumentContents"sv, strEntryName) )) continue; - const auto [LangName, LangDescription] = split(LangParamValue, L','); - MenuItemEx LangMenuItem(!LangDescription.empty()? LangDescription : LangName); + MenuItemEx LangMenuItem(!LangFile.Description.empty()? LangFile.Description: LangFile.Name); // No duplicate languages if (LangMenu->FindItem(0, LangMenuItem.Name, LIFIND_EXACTMATCH) != -1) continue; - LangMenuItem.SetSelect(equal_icase(Dest, LangName)); - LangMenuItem.ComplexUserData = string(LangName); + LangMenuItem.SetSelect(equal_icase(Dest, LangFile.Name)); + LangMenuItem.ComplexUserData = LangFile.Name; LangMenu->AddItem(LangMenuItem); } @@ -325,6 +323,7 @@ static void LoadCustomStrings(string_view const FileName, unordered_string_mapcreate(); - const auto [LangFile, LangFileName, LangFileCodePage] = OpenLangFile(Path, LangFileMask, Language); + auto LangFile = OpenLangFile(Path, LangFileMask, Language); if (!LangFile) { throw far_known_exception(far::format(L"Cannot find any language files in \"{}\""sv, Path)); } - Data->m_FileName = LangFile.GetName(); + Data->m_FileName = LangFile.File.GetName(); if (CountNeed != -1) { @@ -393,11 +392,11 @@ void language::load(string_view const Path, string_view const Language, int Coun string SavedLabel; - os::fs::filebuf StreamBuffer(LangFile, std::ios::in); + os::fs::filebuf StreamBuffer(LangFile.File, std::ios::in); std::istream Stream(&StreamBuffer); Stream.exceptions(Stream.badbit | Stream.failbit); - for (const auto& i: enum_lines(Stream, LangFileCodePage)) + for (const auto& i: enum_lines(Stream, LangFile.Codepage, &LangFile.TryUtf8)) { switch (auto Line = parse_lng_line(trim(i.Str), LoadLabels); Line.Type) { diff --git a/far/language.hpp b/far/language.hpp index 24530e8538..7ee7f20cca 100644 --- a/far/language.hpp +++ b/far/language.hpp @@ -102,9 +102,21 @@ class far_language final: private language, public singleton ~far_language() override = default; }; -// (file, name, codepage) -std::tuple OpenLangFile(string_view Path, string_view Mask, string_view Language); -bool GetLangParam(const os::fs::file& LangFile, string_view ParamName, string& Param, uintptr_t CodePage); +struct lang_file +{ + os::fs::file File; + string Name, Description; + uintptr_t Codepage{}; + bool TryUtf8{}; + + explicit operator bool() const + { + return File.operator bool(); + } +}; + +lang_file OpenLangFile(string_view Path, string_view Mask, string_view Language); +bool GetLangParam(lang_file& LangFile, string_view ParamName, string& Param); bool SelectInterfaceLanguage(string& Dest); bool SelectHelpLanguage(string& Dest); diff --git a/far/vbuild.m4 b/far/vbuild.m4 index e062cfdd05..bc7e89bcc5 100644 --- a/far/vbuild.m4 +++ b/far/vbuild.m4 @@ -1 +1 @@ -6370 +6371