Skip to content

Commit

Permalink
Introduce til::hasher for sucessive hashing of structs (#11887)
Browse files Browse the repository at this point in the history
This commit serves two purposes:
* Simplify construction of hashes for non-trivial structs
  This is especially helpful for ActionArgs
* Improve hash quality by not needlessly throwing away entropy

`til::hasher` is modeled after Rust's `std::hash::Hasher` and works similar.
The idea is simple: A stateful hash function can hash multiple unrelated fields,
without loosing entropy by running a finalizer after hashing each interim field.
This is especially useful for modern hash functions, which often have a wider
internal state than the output width. Additionally this improves performance
for hash functions with complex finalizers.

Most of this is of course a bit moot right now, considering that `til::hasher`
is still based on STL's FNV1a algorithm, which offers a very poor hash quality.
But counterintuitively, FNV1a actually benefits most from this PR: Since it
lacks a finalizer entirely, this commit greatly improves hash quality as
it encodes more data into FNV's state and thus improves randomness.

## PR Checklist
* [x] I work here
* [x] Tests added/passed

## Validation Steps Performed
* No unusual behavior ✅
  • Loading branch information
lhecker authored Dec 7, 2021
1 parent 91ad17d commit 00fe2b0
Show file tree
Hide file tree
Showing 12 changed files with 316 additions and 178 deletions.
13 changes: 2 additions & 11 deletions .github/actions/spelling/expect/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ cacafire
callee
capslock
CARETBLINKINGENABLED
carlos
CARRIAGERETURN
cascadia
cassert
Expand Down Expand Up @@ -635,7 +634,7 @@ doskey
dotnet
doubleclick
downlevel
dpg
DPG
dpi
DPIAPI
DPICHANGE
Expand Down Expand Up @@ -793,6 +792,7 @@ FIXEDCONVERTED
FIXEDFILEINFO
Flg
flyout
fmix
fmodern
fmtarg
fmtid
Expand Down Expand Up @@ -1471,7 +1471,6 @@ msix
msrc
msvcrt
MSVCRTD
MSVS
msys
msysgit
MTSM
Expand Down Expand Up @@ -2174,7 +2173,6 @@ SHOWNOACTIVATE
SHOWNORMAL
SHOWWINDOW
SHRT
sid
sidebyside
SIF
SIGDN
Expand Down Expand Up @@ -2205,7 +2203,6 @@ somefile
SOURCEBRANCH
sourced
SOURCESDIRECTORY
SPACEBAR
spammy
spand
sprintf
Expand Down Expand Up @@ -2510,7 +2507,6 @@ unmark
UNORM
unparseable
unpause
Unregister
unregistering
untests
untextured
Expand All @@ -2523,7 +2519,6 @@ upvote
uri
url
urlencoded
Urxvt
USASCII
usebackq
USECALLBACK
Expand Down Expand Up @@ -2568,7 +2563,6 @@ Vcount
vcpkg
vcprintf
vcproj
vcvarsall
vcxitems
vcxproj
vec
Expand Down Expand Up @@ -2825,7 +2819,6 @@ xes
xff
XFile
XFORM
xIcon
XManifest
XMath
XMFLOAT
Expand Down Expand Up @@ -2858,15 +2851,13 @@ YCast
YCENTER
YCount
YDPI
yIcon
yml
YOffset
YPosition
YSize
YSubstantial
YVIRTUALSCREEN
YWalk
zamora
ZCmd
ZCtrl
zsh
Expand Down
11 changes: 5 additions & 6 deletions src/buffer/out/UnicodeStorage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ Author(s):

#pragma once

#include <vector>
#include <unordered_map>
#include <climits>
#include <vector>

#include <til/bit.h>
#include <til/hash.h>

// std::unordered_map needs help to know how to hash a COORD
namespace std
Expand All @@ -33,10 +35,7 @@ namespace std
// - the hashed coord
constexpr size_t operator()(const COORD& coord) const noexcept
{
size_t retVal = coord.Y;
const size_t xCoord = coord.X;
retVal |= xCoord << (sizeof(coord.Y) * CHAR_BIT);
return retVal;
return til::hash(til::bit_cast<uint32_t>(coord));
}
};
}
Expand Down
2 changes: 1 addition & 1 deletion src/buffer/out/textBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2447,7 +2447,7 @@ uint16_t TextBuffer::GetHyperlinkId(std::wstring_view uri, std::wstring_view id)
// assign _currentHyperlinkId if the custom id does not already exist
std::wstring newId{ id };
// hash the URL and add it to the custom ID - GH#7698
newId += L"%" + std::to_wstring(std::hash<std::wstring_view>{}(uri));
newId += L"%" + std::to_wstring(til::hash(uri));
const auto result = _hyperlinkCustomIdMap.emplace(newId, _currentHyperlinkId);
if (result.second)
{
Expand Down
61 changes: 30 additions & 31 deletions src/cascadia/TerminalSettingsModel/ActionArgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,27 +62,6 @@ private:
// * ActionEventArgs holds a single IActionArgs. For events that don't need
// additional args, this can be nullptr.

template<>
constexpr size_t Microsoft::Terminal::Settings::Model::HashUtils::HashProperty(const winrt::Microsoft::Terminal::Settings::Model::IActionArgs& args)
{
return gsl::narrow_cast<size_t>(args.Hash());
}

template<>
constexpr size_t Microsoft::Terminal::Settings::Model::HashUtils::HashProperty(const winrt::Microsoft::Terminal::Settings::Model::NewTerminalArgs& args)
{
return gsl::narrow_cast<size_t>(args.Hash());
}

// Retrieves the hash value for an empty-constructed object.
template<typename T>
static size_t EmptyHash()
{
// cache the value of the empty hash
static const size_t cachedHash = winrt::make_self<T>()->Hash();
return cachedHash;
}

////////////////////////////////////////////////////////////////////////////////
// BEGIN X-MACRO MADNESS
////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -324,9 +303,18 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation
copy->_ColorScheme = _ColorScheme;
return *copy;
}
size_t Hash() const
size_t Hash(uint64_t hasherState) const
{
return ::Microsoft::Terminal::Settings::Model::HashUtils::HashProperty(Commandline(), StartingDirectory(), TabTitle(), TabColor(), ProfileIndex(), Profile(), SuppressApplicationTitle(), ColorScheme());
til::hasher h{ gsl::narrow_cast<size_t>(hasherState) };
h.write(Commandline());
h.write(StartingDirectory());
h.write(TabTitle());
h.write(TabColor());
h.write(ProfileIndex());
h.write(Profile());
h.write(SuppressApplicationTitle());
h.write(ColorScheme());
return h.finalize();
}
};

Expand Down Expand Up @@ -375,9 +363,11 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation
copy->_TerminalArgs = _TerminalArgs.Copy();
return *copy;
}
size_t Hash() const
size_t Hash(uint64_t hasherState) const
{
return ::Microsoft::Terminal::Settings::Model::HashUtils::HashProperty(TerminalArgs());
til::hasher h{ gsl::narrow_cast<size_t>(hasherState) };
h.write(TerminalArgs());
return h.finalize();
}
};

Expand Down Expand Up @@ -459,9 +449,14 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation
copy->_SplitSize = _SplitSize;
return *copy;
}
size_t Hash() const
size_t Hash(uint64_t hasherState) const
{
return ::Microsoft::Terminal::Settings::Model::HashUtils::HashProperty(SplitDirection(), TerminalArgs(), SplitMode(), SplitSize());
til::hasher h{ gsl::narrow_cast<size_t>(hasherState) };
h.write(SplitDirection());
h.write(TerminalArgs());
h.write(SplitMode());
h.write(SplitSize());
return h.finalize();
}
};

Expand Down Expand Up @@ -506,9 +501,11 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation
copy->_TerminalArgs = _TerminalArgs.Copy();
return *copy;
}
size_t Hash() const
size_t Hash(uint64_t hasherState) const
{
return ::Microsoft::Terminal::Settings::Model::HashUtils::HashProperty(TerminalArgs());
til::hasher h{ gsl::narrow_cast<size_t>(hasherState) };
h.write(TerminalArgs());
return h.finalize();
}
};

Expand Down Expand Up @@ -625,9 +622,11 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation
copy->_Actions = _Actions;
return *copy;
}
size_t Hash() const
size_t Hash(uint64_t hasherState) const
{
return ::Microsoft::Terminal::Settings::Model::HashUtils::HashProperty(_Actions);
til::hasher h{ gsl::narrow_cast<size_t>(hasherState) };
h.write(winrt::get_abi(_Actions));
return h.finalize();
}
};
}
Expand Down
4 changes: 2 additions & 2 deletions src/cascadia/TerminalSettingsModel/ActionArgs.idl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace Microsoft.Terminal.Settings.Model
Boolean Equals(IActionArgs other);
String GenerateName();
IActionArgs Copy();
UInt64 Hash();
UInt64 Hash(UInt64 hasher);
};

interface IActionEventArgs
Expand Down Expand Up @@ -128,7 +128,7 @@ namespace Microsoft.Terminal.Settings.Model
Boolean Equals(NewTerminalArgs other);
String GenerateName();
String ToCommandline();
UInt64 Hash();
UInt64 Hash(UInt64 hasher);
};

[default_interface] runtimeclass ActionEventArgs : IActionEventArgs
Expand Down
101 changes: 52 additions & 49 deletions src/cascadia/TerminalSettingsModel/ActionArgsMagic.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Author(s):
--*/

#pragma once

// MACRO HACKS
//
// We want to have code that looks like:
Expand Down Expand Up @@ -96,7 +98,7 @@ struct InitListPlaceholder
// definition of Hash() below, that's to deal with trailing commas (or in this
// case, leading.)
#define HASH_ARGS(type, name, jsonKey, required, ...) \
, name()
h.write(name());

// Use ACTION_ARGS_STRUCT when you've got no other customizing to do.
#define ACTION_ARGS_STRUCT(className, argsMacro) \
Expand All @@ -109,52 +111,53 @@ struct InitListPlaceholder
// * NewTerminalArgs has a ToCommandline method it needs to additionally declare.
// * GlobalSummonArgs has the QuakeModeFromJson helper

#define ACTION_ARG_BODY(className, argsMacro) \
className() = default; \
className( \
argsMacro(CTOR_PARAMS) InitListPlaceholder = {}) : \
argsMacro(CTOR_INIT) _placeholder{} {}; \
argsMacro(DECLARE_ARGS); \
\
private: \
InitListPlaceholder _placeholder; \
\
public: \
hstring GenerateName() const; \
bool Equals(const IActionArgs& other) \
{ \
auto otherAsUs = other.try_as<className>(); \
if (otherAsUs) \
{ \
return true argsMacro(EQUALS_ARGS); \
} \
return false; \
}; \
static FromJsonResult FromJson(const Json::Value& json) \
{ \
auto args = winrt::make_self<className>(); \
argsMacro(FROM_JSON_ARGS); \
return { *args, {} }; \
} \
static Json::Value ToJson(const IActionArgs& val) \
{ \
if (!val) \
{ \
return {}; \
} \
Json::Value json{ Json::ValueType::objectValue }; \
const auto args{ get_self<className>(val) }; \
argsMacro(TO_JSON_ARGS); \
return json; \
} \
IActionArgs Copy() const \
{ \
auto copy{ winrt::make_self<className>() }; \
argsMacro(COPY_ARGS); \
return *copy; \
} \
size_t Hash() const \
{ \
return ::Microsoft::Terminal::Settings::Model::HashUtils::HashProperty( \
0 argsMacro(HASH_ARGS)); \
#define ACTION_ARG_BODY(className, argsMacro) \
className() = default; \
className( \
argsMacro(CTOR_PARAMS) InitListPlaceholder = {}) : \
argsMacro(CTOR_INIT) _placeholder{} {}; \
argsMacro(DECLARE_ARGS); \
\
private: \
InitListPlaceholder _placeholder; \
\
public: \
hstring GenerateName() const; \
bool Equals(const IActionArgs& other) \
{ \
auto otherAsUs = other.try_as<className>(); \
if (otherAsUs) \
{ \
return true argsMacro(EQUALS_ARGS); \
} \
return false; \
}; \
static FromJsonResult FromJson(const Json::Value& json) \
{ \
auto args = winrt::make_self<className>(); \
argsMacro(FROM_JSON_ARGS); \
return { *args, {} }; \
} \
static Json::Value ToJson(const IActionArgs& val) \
{ \
if (!val) \
{ \
return {}; \
} \
Json::Value json{ Json::ValueType::objectValue }; \
const auto args{ get_self<className>(val) }; \
argsMacro(TO_JSON_ARGS); \
return json; \
} \
IActionArgs Copy() const \
{ \
auto copy{ winrt::make_self<className>() }; \
argsMacro(COPY_ARGS); \
return *copy; \
} \
size_t Hash(uint64_t hasherState) const \
{ \
til::hasher h{ gsl::narrow_cast<size_t>(hasherState) }; \
argsMacro(HASH_ARGS); \
return h.finalize(); \
}
Loading

0 comments on commit 00fe2b0

Please sign in to comment.