Skip to content

Commit

Permalink
Assume that hlf and lng files are in UTF-8 by default, even if there …
Browse files Browse the repository at this point in the history
…is no BOM

Fallback to OEM only if decoding fails
  • Loading branch information
alabuzhev committed Sep 22, 2024
1 parent 00d400c commit e76cd64
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 72 deletions.
6 changes: 6 additions & 0 deletions far/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
--------------------------------------------------------------------------------
drkns 2024-09-22 14:58:10+01:00 - build 6371

1. Assume that hlf and lng files are in UTF-8 by default, even if there is no BOM.
Fallback to OEM only if decoding fails.

--------------------------------------------------------------------------------
drkns 2024-09-15 14:01:29+01:00 - build 6370

Expand Down
4 changes: 2 additions & 2 deletions far/filelist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5536,8 +5536,8 @@ bool FileList::PluginPanelHelp(const plugin_panel* hPlugin) const
{
string_view strPath = hPlugin->plugin()->ModuleName();
CutToSlash(strPath);
const auto [File, Name, Codepage] = OpenLangFile(strPath, Global->HelpFileMask, Global->Opt->strHelpLanguage);
if (!File)
const auto HelpFile = OpenLangFile(strPath, Global->HelpFileMask, Global->Opt->strHelpLanguage);
if (!HelpFile)
return false;

help::show(help::make_link(strPath, L"Contents"sv));
Expand Down
15 changes: 13 additions & 2 deletions far/filestr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,11 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
const auto BufferSize = 65536;
static_assert(BufferSize % sizeof(wchar_t) == 0);

enum_lines::enum_lines(std::istream& Stream, uintptr_t CodePage):
enum_lines::enum_lines(std::istream& Stream, uintptr_t const CodePage, bool* TryUtf8):
m_Stream(Stream),
m_BeginPos(m_Stream.tellg()),
m_CodePage(CodePage),
m_TryUtf8(TryUtf8),
m_Eol(m_CodePage),
m_Buffer(BufferSize)
{
Expand Down Expand Up @@ -249,9 +250,19 @@ bool enum_lines::GetString(string_view& Str, eol& Eol) const
if (Data.m_Bytes.size() > Data.m_wBuffer.size())
Data.m_wBuffer.reset(Data.m_Bytes.size());

const auto Utf8CP = encoding::codepage::utf8();
const auto IsUtf8Cp = m_CodePage == Utf8CP;

for (;;)
{
const auto Size = encoding::get_chars(m_CodePage, Data.m_Bytes, Data.m_wBuffer, &m_Diagnostics);
const auto TryUtf8 = m_TryUtf8 && *m_TryUtf8 && !IsUtf8Cp;
const auto Size = encoding::get_chars(TryUtf8? Utf8CP : m_CodePage, Data.m_Bytes, Data.m_wBuffer, &m_Diagnostics);
if (TryUtf8 && m_Diagnostics.ErrorPosition)
{
*m_TryUtf8 = false;
continue;
}

if (Size <= Data.m_wBuffer.size())
{
Data.m_Bytes.clear();
Expand Down
3 changes: 2 additions & 1 deletion far/filestr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class [[nodiscard]] enum_lines: public enumerator<enum_lines, file_line>
IMPLEMENTS_ENUMERATOR(enum_lines);

public:
enum_lines(std::istream& Stream, uintptr_t CodePage);
enum_lines(std::istream& Stream, uintptr_t CodePage, bool* TryUtf8 = {});

bool conversion_error() const { return m_Diagnostics.ErrorPosition.has_value(); }

Expand All @@ -79,6 +79,7 @@ class [[nodiscard]] enum_lines: public enumerator<enum_lines, file_line>
std::istream& m_Stream;
size_t m_BeginPos;
uintptr_t m_CodePage;
bool* m_TryUtf8;
raw_eol m_Eol;

mutable char_ptr m_Buffer;
Expand Down
34 changes: 17 additions & 17 deletions far/help.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ class Help final: public window
bool GetTopic(int realX, int realY, string& strTopic) const;
void MoveToReference(int Forward, int CurScreen);
void ReadDocumentsHelp(int TypeIndex);
void Search(const os::fs::file& HelpFile, uintptr_t nCodePage);
void Search(lang_file& HelpFile);
bool JumpTopic(string_view Topic);
bool JumpTopic();
int CanvasHeight() const { return ObjHeight() - 1 - 1; }
Expand Down Expand Up @@ -224,9 +224,9 @@ class Help final: public window
SearchReplaceDlgParams m_SearchDlgParams;
};

static bool GetOptionsParam(const os::fs::file& LangFile, string_view const KeyName, string& Value, unsigned CodePage)
static bool GetOptionsParam(lang_file& LangFile, string_view const KeyName, string& Value)
{
return GetLangParam(LangFile, L"Options "sv + KeyName, Value, CodePage);
return GetLangParam(LangFile, L"Options "sv + KeyName, Value);
}

Help::Help(private_tag):
Expand Down Expand Up @@ -328,7 +328,7 @@ bool Help::ReadHelp(string_view const Mask)
return true;
}

const auto [HelpFile, Name, HelpFileCodePage] = OpenLangFile(strPath, Mask.empty()? Global->HelpFileMask : Mask, Global->Opt->strHelpLanguage);
auto HelpFile = OpenLangFile(strPath, Mask.empty()? Global->HelpFileMask : Mask, Global->Opt->strHelpLanguage);
if (!HelpFile)
{
ErrorHelp = true;
Expand All @@ -352,11 +352,11 @@ bool Help::ReadHelp(string_view const Mask)
return false;
}

strFullHelpPathName = HelpFile.GetName();
strFullHelpPathName = HelpFile.File.GetName();

string strReadStr;

if (GetOptionsParam(HelpFile, L"TabSize"sv, strReadStr, HelpFileCodePage))
if (GetOptionsParam(HelpFile, L"TabSize"sv, strReadStr))
{
unsigned UserTabSize;
if (from_string(strReadStr, UserTabSize))
Expand All @@ -376,20 +376,20 @@ bool Help::ReadHelp(string_view const Mask)
}
}

if (GetOptionsParam(HelpFile, L"CtrlColorChar"sv, strReadStr, HelpFileCodePage))
if (GetOptionsParam(HelpFile, L"CtrlColorChar"sv, strReadStr))
m_CtrlColorChar = strReadStr.front();
else
m_CtrlColorChar = 0;

if (GetOptionsParam(HelpFile, L"CtrlStartPosChar"sv, strReadStr, HelpFileCodePage))
if (GetOptionsParam(HelpFile, L"CtrlStartPosChar"sv, strReadStr))
strCtrlStartPosChar = strReadStr;
else
strCtrlStartPosChar.clear();

/* $ 29.11.2001 DJ
запомним, чего там написано в PluginContents
*/
if (!GetLangParam(HelpFile, L"PluginContents"sv, strCurPluginContents, HelpFileCodePage))
if (!GetLangParam(HelpFile, L"PluginContents"sv, strCurPluginContents))
strCurPluginContents.clear();

string strTabSpace(CtrlTabSize, L' ');
Expand All @@ -398,7 +398,7 @@ bool Help::ReadHelp(string_view const Mask)

if (StackData.strHelpTopic == FoundContents)
{
Search(HelpFile, HelpFileCodePage);
Search(HelpFile);
return true;
}

Expand All @@ -419,11 +419,11 @@ bool Help::ReadHelp(string_view const Mask)
int MI=0;
string strMacroArea;

os::fs::filebuf StreamBuffer(HelpFile, std::ios::in);
os::fs::filebuf StreamBuffer(HelpFile.File, std::ios::in);
std::istream Stream(&StreamBuffer);
Stream.exceptions(Stream.badbit | Stream.failbit); // BUGBUG, add try/catch

enum_lines EnumFileLines(Stream, HelpFileCodePage);
enum_lines EnumFileLines(Stream, HelpFile.Codepage, &HelpFile.TryUtf8);
auto FileIterator = EnumFileLines.begin();
const size_t StartSizeKeyName = 20;
size_t SizeKeyName = StartSizeKeyName;
Expand Down Expand Up @@ -1944,7 +1944,7 @@ void Help::MoveToReference(int Forward,int CurScreen)
FastShow();
}

void Help::Search(const os::fs::file& HelpFile,uintptr_t nCodePage)
void Help::Search(lang_file& HelpFile)
{
FixCount=1;
StackData.TopStr=0;
Expand Down Expand Up @@ -1980,11 +1980,11 @@ void Help::Search(const os::fs::file& HelpFile,uintptr_t nCodePage)
searchers Searchers;
const auto& Searcher = init_searcher(Searchers, m_SearchDlgParams.CaseSensitive.value(), m_SearchDlgParams.Fuzzy.value(), m_SearchDlgParams.SearchStr);

os::fs::filebuf StreamBuffer(HelpFile, std::ios::in);
os::fs::filebuf StreamBuffer(HelpFile.File, std::ios::in);
std::istream Stream(&StreamBuffer);
Stream.exceptions(Stream.badbit | Stream.failbit); // BUGBUG, add try/catch

for (const auto& i: enum_lines(Stream, nCodePage))
for (const auto& i: enum_lines(Stream, HelpFile.Codepage, &HelpFile.TryUtf8))
{
auto Str = trim_right(i.Str);

Expand Down Expand Up @@ -2076,12 +2076,12 @@ void Help::ReadDocumentsHelp(int TypeIndex)
{
string_view Path = i->ModuleName();
CutToSlash(Path);
const auto [HelpFile, HelpLangName, HelpFileCodePage] = OpenLangFile(Path, Global->HelpFileMask, Global->Opt->strHelpLanguage);
auto HelpFile = OpenLangFile(Path, Global->HelpFileMask, Global->Opt->strHelpLanguage);
if (!HelpFile)
continue;

string strEntryName;
if (!GetLangParam(HelpFile, ContentsName, strEntryName, HelpFileCodePage))
if (!GetLangParam(HelpFile, ContentsName, strEntryName))
continue;

AddLine(far::format(L" ~{}~@{}@"sv, strEntryName, help::make_link(Path, HelpContents)));
Expand Down
91 changes: 45 additions & 46 deletions far/language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,59 +67,64 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

static const auto LangFileMask = L"*.lng"sv;

std::tuple<os::fs::file, string, uintptr_t> OpenLangFile(string_view const Path, string_view const Mask, string_view const Language)
static lang_file open_impl(string_view const FileName)
{
FN_RETURN_TYPE(OpenLangFile) CurrentFileData, EnglishFileData;
lang_file Result;
if (!Result.File.Open(FileName, FILE_READ_DATA, os::fs::file_share_read, nullptr, OPEN_EXISTING))
return {};

for (const auto& FindData: os::fs::enum_files(path::join(Path, Mask)))
{
if (!os::fs::is_file(FindData))
continue;
Result.Codepage = GetFileCodepage(Result.File, encoding::codepage::oem(), nullptr, false);
Result.TryUtf8 = !IsUtfCodePage(Result.Codepage);

const auto CurrentFileName = path::join(Path, FindData.FileName);
string Language;
if (!GetLangParam(Result, L"Language"sv, Language))
return {};

auto& [CurrentFile, CurrentLngName, CurrentCodepage] = CurrentFileData;
std::tie(Result.Name, Result.Description) = split(Language, L',');

CurrentFile = os::fs::file(CurrentFileName, FILE_READ_DATA, os::fs::file_share_read, nullptr, OPEN_EXISTING);
if (!CurrentFile)
continue;
return Result;
}

CurrentCodepage = GetFileCodepage(CurrentFile, encoding::codepage::oem(), nullptr, false);
lang_file OpenLangFile(string_view const Path, string_view const Mask, string_view const Language)
{
lang_file CurrentFile, EnglishFile;

if (!GetLangParam(CurrentFile, L"Language"sv, CurrentLngName, CurrentCodepage))
for (const auto& FindData: os::fs::enum_files(path::join(Path, Mask)))
{
if (!os::fs::is_file(FindData))
continue;

const auto [LngName, LngDescription] = split(CurrentLngName, L',');
if (!LngDescription.empty())
CurrentLngName.resize(LngName.size());
CurrentFile = open_impl(path::join(Path, FindData.FileName));
if (!CurrentFile)
continue;

if (equal_icase(CurrentLngName, Language))
return CurrentFileData;
if (equal_icase(CurrentFile.Name, Language))
return CurrentFile;

if (equal_icase(CurrentLngName, L"English"sv))
if (equal_icase(CurrentFile.Name, L"English"sv))
{
EnglishFileData = std::move(CurrentFileData);
EnglishFile = std::move(CurrentFile);
}
}

if (std::get<0>(EnglishFileData))
return EnglishFileData;
if (EnglishFile)
return EnglishFile;

return CurrentFileData;
return CurrentFile;
}


bool GetLangParam(const os::fs::file& LangFile, string_view const ParamName, string& Param, uintptr_t CodePage)
bool GetLangParam(lang_file& LangFile, string_view const ParamName, string& Param)
{
const auto strFullParamName = concat(L'.', ParamName);
const auto CurFilePos = LangFile.GetPointer();
SCOPE_EXIT{ LangFile.SetPointer(CurFilePos, nullptr, FILE_BEGIN); };
const auto CurFilePos = LangFile.File.GetPointer();
SCOPE_EXIT{ LangFile.File.SetPointer(CurFilePos, nullptr, FILE_BEGIN); };

os::fs::filebuf StreamBuffer(LangFile, std::ios::in);
os::fs::filebuf StreamBuffer(LangFile.File, std::ios::in);
std::istream Stream(&StreamBuffer);
Stream.exceptions(Stream.badbit | Stream.failbit);

for (const auto& i: enum_lines(Stream, CodePage))
for (const auto& i: enum_lines(Stream, LangFile.Codepage, &LangFile.TryUtf8))
{
if (starts_with_icase(i.Str, strFullParamName))
{
Expand Down Expand Up @@ -157,32 +162,25 @@ static bool SelectLanguage(bool HelpLanguage, string& Dest)
if (!os::fs::is_file(FindData))
continue;

const os::fs::file LangFile(path::join(Global->g_strFarPath, FindData.FileName), FILE_READ_DATA, os::fs::file_share_read, nullptr, OPEN_EXISTING);
auto LangFile = open_impl(path::join(Global->g_strFarPath, FindData.FileName));
if (!LangFile)
continue;

const auto Codepage = GetFileCodepage(LangFile, encoding::codepage::oem(), nullptr, false);

string LangParamValue;
if (!GetLangParam(LangFile, L"Language"sv, LangParamValue, Codepage))
continue;

string strEntryName;
if (HelpLanguage && (
GetLangParam(LangFile, L"PluginContents"sv, strEntryName, Codepage) ||
GetLangParam(LangFile, L"DocumentContents"sv, strEntryName, Codepage)
GetLangParam(LangFile, L"PluginContents"sv, strEntryName) ||
GetLangParam(LangFile, L"DocumentContents"sv, strEntryName)
))
continue;

const auto [LangName, LangDescription] = split(LangParamValue, L',');
MenuItemEx LangMenuItem(!LangDescription.empty()? LangDescription : LangName);
MenuItemEx LangMenuItem(!LangFile.Description.empty()? LangFile.Description: LangFile.Name);

// No duplicate languages
if (LangMenu->FindItem(0, LangMenuItem.Name, LIFIND_EXACTMATCH) != -1)
continue;

LangMenuItem.SetSelect(equal_icase(Dest, LangName));
LangMenuItem.ComplexUserData = string(LangName);
LangMenuItem.SetSelect(equal_icase(Dest, LangFile.Name));
LangMenuItem.ComplexUserData = LangFile.Name;
LangMenu->AddItem(LangMenuItem);
}

Expand Down Expand Up @@ -325,6 +323,7 @@ static void LoadCustomStrings(string_view const FileName, unordered_string_map<s
return;

const auto CustomFileCodepage = GetFileCodepage(CustomFile, encoding::codepage::oem(), nullptr, false);
auto TryUtf8 = !IsUtfCodePage(CustomFileCodepage);

string SavedLabel;

Expand All @@ -334,7 +333,7 @@ static void LoadCustomStrings(string_view const FileName, unordered_string_map<s

const auto LastSize = Strings.size();

for (const auto& i: enum_lines(Stream, CustomFileCodepage))
for (const auto& i: enum_lines(Stream, CustomFileCodepage, &TryUtf8))
{
switch (const auto Line = parse_lng_line(trim(i.Str), true); Line.Type)
{
Expand Down Expand Up @@ -365,13 +364,13 @@ void language::load(string_view const Path, string_view const Language, int Coun

auto Data = m_Data->create();

const auto [LangFile, LangFileName, LangFileCodePage] = OpenLangFile(Path, LangFileMask, Language);
auto LangFile = OpenLangFile(Path, LangFileMask, Language);
if (!LangFile)
{
throw far_known_exception(far::format(L"Cannot find any language files in \"{}\""sv, Path));
}

Data->m_FileName = LangFile.GetName();
Data->m_FileName = LangFile.File.GetName();

if (CountNeed != -1)
{
Expand All @@ -393,11 +392,11 @@ void language::load(string_view const Path, string_view const Language, int Coun

string SavedLabel;

os::fs::filebuf StreamBuffer(LangFile, std::ios::in);
os::fs::filebuf StreamBuffer(LangFile.File, std::ios::in);
std::istream Stream(&StreamBuffer);
Stream.exceptions(Stream.badbit | Stream.failbit);

for (const auto& i: enum_lines(Stream, LangFileCodePage))
for (const auto& i: enum_lines(Stream, LangFile.Codepage, &LangFile.TryUtf8))
{
switch (auto Line = parse_lng_line(trim(i.Str), LoadLabels); Line.Type)
{
Expand Down
Loading

0 comments on commit e76cd64

Please sign in to comment.